extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -33,6 +33,12 @@
|
|
|
33
33
|
*/
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
/* Streaming state is used to inform allocation of the literal buffer */
|
|
37
|
+
typedef enum {
|
|
38
|
+
not_streaming = 0,
|
|
39
|
+
is_streaming = 1
|
|
40
|
+
} streaming_operation;
|
|
41
|
+
|
|
36
42
|
/* ZSTD_decompressBlock_internal() :
|
|
37
43
|
* decompress block, starting at `src`,
|
|
38
44
|
* into destination buffer `dst`.
|
|
@@ -41,7 +47,7 @@
|
|
|
41
47
|
*/
|
|
42
48
|
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
43
49
|
void* dst, size_t dstCapacity,
|
|
44
|
-
const void* src, size_t srcSize, const
|
|
50
|
+
const void* src, size_t srcSize, const streaming_operation streaming);
|
|
45
51
|
|
|
46
52
|
/* ZSTD_buildFSETable() :
|
|
47
53
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
|
@@ -54,9 +60,14 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
54
60
|
*/
|
|
55
61
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
56
62
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
57
|
-
const U32* baseValue, const
|
|
63
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
58
64
|
unsigned tableLog, void* wksp, size_t wkspSize,
|
|
59
65
|
int bmi2);
|
|
60
66
|
|
|
67
|
+
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
|
|
68
|
+
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
|
69
|
+
void* dst, size_t dstCapacity,
|
|
70
|
+
const void* src, size_t srcSize);
|
|
71
|
+
|
|
61
72
|
|
|
62
73
|
#endif /* ZSTD_DEC_BLOCK_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
* Dependencies
|
|
21
21
|
*********************************************************/
|
|
22
22
|
#include "../common/mem.h" /* BYTE, U16, U32 */
|
|
23
|
-
#include "../common/zstd_internal.h" /*
|
|
23
|
+
#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
|
|
@@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
|
|
40
40
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
|
41
41
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
|
42
42
|
|
|
43
|
-
static UNUSED_ATTR const
|
|
43
|
+
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
|
|
44
44
|
0, 1, 2, 3, 4, 5, 6, 7,
|
|
45
45
|
8, 9, 10, 11, 12, 13, 14, 15,
|
|
46
46
|
16, 17, 18, 19, 20, 21, 22, 23,
|
|
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
|
|
75
75
|
|
|
76
76
|
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
|
77
77
|
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
|
78
|
+
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
|
|
78
79
|
|
|
79
80
|
typedef struct {
|
|
80
81
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
|
81
82
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
|
82
83
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
|
83
|
-
HUF_DTable hufTable[HUF_DTABLE_SIZE(
|
|
84
|
+
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
|
|
84
85
|
U32 rep[ZSTD_REP_NUM];
|
|
85
86
|
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
|
86
87
|
} ZSTD_entropyDTables_t;
|
|
@@ -99,6 +100,29 @@ typedef enum {
|
|
|
99
100
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
|
100
101
|
} ZSTD_dictUses_e;
|
|
101
102
|
|
|
103
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
|
104
|
+
typedef struct {
|
|
105
|
+
const ZSTD_DDict** ddictPtrTable;
|
|
106
|
+
size_t ddictPtrTableSize;
|
|
107
|
+
size_t ddictPtrCount;
|
|
108
|
+
} ZSTD_DDictHashSet;
|
|
109
|
+
|
|
110
|
+
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
|
|
111
|
+
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
|
|
112
|
+
#endif
|
|
113
|
+
|
|
114
|
+
#define ZSTD_LBMIN 64
|
|
115
|
+
#define ZSTD_LBMAX (128 << 10)
|
|
116
|
+
|
|
117
|
+
/* extra buffer, compensates when dst is not large enough to store litBuffer */
|
|
118
|
+
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
|
|
119
|
+
|
|
120
|
+
typedef enum {
|
|
121
|
+
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
|
|
122
|
+
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
|
|
123
|
+
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
|
|
124
|
+
} ZSTD_litLocation_e;
|
|
125
|
+
|
|
102
126
|
struct ZSTD_DCtx_s
|
|
103
127
|
{
|
|
104
128
|
const ZSTD_seqSymbol* LLTptr;
|
|
@@ -113,6 +137,7 @@ struct ZSTD_DCtx_s
|
|
|
113
137
|
const void* dictEnd; /* end of previous segment */
|
|
114
138
|
size_t expected;
|
|
115
139
|
ZSTD_frameHeader fParams;
|
|
140
|
+
U64 processedCSize;
|
|
116
141
|
U64 decodedSize;
|
|
117
142
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
|
118
143
|
ZSTD_dStage stage;
|
|
@@ -128,7 +153,10 @@ struct ZSTD_DCtx_s
|
|
|
128
153
|
size_t litSize;
|
|
129
154
|
size_t rleSize;
|
|
130
155
|
size_t staticSize;
|
|
156
|
+
int isFrameDecompression;
|
|
157
|
+
#if DYNAMIC_BMI2 != 0
|
|
131
158
|
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
|
159
|
+
#endif
|
|
132
160
|
|
|
133
161
|
/* dictionary */
|
|
134
162
|
ZSTD_DDict* ddictLocal;
|
|
@@ -136,6 +164,10 @@ struct ZSTD_DCtx_s
|
|
|
136
164
|
U32 dictID;
|
|
137
165
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
|
138
166
|
ZSTD_dictUses_e dictUses;
|
|
167
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
|
168
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
|
169
|
+
int disableHufAsm;
|
|
170
|
+
int maxBlockSizeParam;
|
|
139
171
|
|
|
140
172
|
/* streaming */
|
|
141
173
|
ZSTD_dStreamStage streamStage;
|
|
@@ -148,16 +180,21 @@ struct ZSTD_DCtx_s
|
|
|
148
180
|
size_t outStart;
|
|
149
181
|
size_t outEnd;
|
|
150
182
|
size_t lhSize;
|
|
183
|
+
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
|
151
184
|
void* legacyContext;
|
|
152
185
|
U32 previousLegacyVersion;
|
|
153
186
|
U32 legacyVersion;
|
|
187
|
+
#endif
|
|
154
188
|
U32 hostageByte;
|
|
155
189
|
int noForwardProgress;
|
|
156
190
|
ZSTD_bufferMode_e outBufferMode;
|
|
157
191
|
ZSTD_outBuffer expectedOutBuffer;
|
|
158
192
|
|
|
159
193
|
/* workspace */
|
|
160
|
-
BYTE litBuffer
|
|
194
|
+
BYTE* litBuffer;
|
|
195
|
+
const BYTE* litBufferEnd;
|
|
196
|
+
ZSTD_litLocation_e litBufferLocation;
|
|
197
|
+
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
|
|
161
198
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
|
162
199
|
|
|
163
200
|
size_t oversizedDuration;
|
|
@@ -166,8 +203,21 @@ struct ZSTD_DCtx_s
|
|
|
166
203
|
void const* dictContentBeginForFuzzing;
|
|
167
204
|
void const* dictContentEndForFuzzing;
|
|
168
205
|
#endif
|
|
206
|
+
|
|
207
|
+
/* Tracing */
|
|
208
|
+
#if ZSTD_TRACE
|
|
209
|
+
ZSTD_TraceCtx traceCtx;
|
|
210
|
+
#endif
|
|
169
211
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
|
170
212
|
|
|
213
|
+
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
|
|
214
|
+
#if DYNAMIC_BMI2 != 0
|
|
215
|
+
return dctx->bmi2;
|
|
216
|
+
#else
|
|
217
|
+
(void)dctx;
|
|
218
|
+
return 0;
|
|
219
|
+
#endif
|
|
220
|
+
}
|
|
171
221
|
|
|
172
222
|
/*-*******************************************************
|
|
173
223
|
* Shared internal functions
|
|
@@ -184,7 +234,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
|
184
234
|
* If yes, do nothing (continue on current segment).
|
|
185
235
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
|
186
236
|
* This function cannot fail. */
|
|
187
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
|
237
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
|
188
238
|
|
|
189
239
|
|
|
190
240
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
***************************************/
|
|
16
16
|
#define ZBUFF_STATIC_LINKING_ONLY
|
|
17
17
|
#include "zbuff.h"
|
|
18
|
+
#include "../common/error_private.h"
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
/*-***********************************************************
|
|
@@ -73,13 +74,32 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
|
|
|
73
74
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
|
74
75
|
{
|
|
75
76
|
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
|
|
76
|
-
|
|
77
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
|
|
78
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setPledgedSrcSize(zbc, pledgedSrcSize), "");
|
|
79
|
+
|
|
80
|
+
FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
|
|
81
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_windowLog, params.cParams.windowLog), "");
|
|
82
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_hashLog, params.cParams.hashLog), "");
|
|
83
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_chainLog, params.cParams.chainLog), "");
|
|
84
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_searchLog, params.cParams.searchLog), "");
|
|
85
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_minMatch, params.cParams.minMatch), "");
|
|
86
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_targetLength, params.cParams.targetLength), "");
|
|
87
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_strategy, params.cParams.strategy), "");
|
|
88
|
+
|
|
89
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_contentSizeFlag, params.fParams.contentSizeFlag), "");
|
|
90
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_checksumFlag, params.fParams.checksumFlag), "");
|
|
91
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_dictIDFlag, params.fParams.noDictIDFlag), "");
|
|
92
|
+
|
|
93
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
|
|
94
|
+
return 0;
|
|
77
95
|
}
|
|
78
96
|
|
|
79
|
-
|
|
80
97
|
size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
|
|
81
98
|
{
|
|
82
|
-
|
|
99
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
|
|
100
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_compressionLevel, compressionLevel), "");
|
|
101
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
|
|
102
|
+
return 0;
|
|
83
103
|
}
|
|
84
104
|
|
|
85
105
|
size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
/* *************************************
|
|
14
14
|
* Dependencies
|
|
15
15
|
***************************************/
|
|
16
|
+
#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* suppress warning on ZSTD_initDStream_usingDict */
|
|
17
|
+
#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
|
16
18
|
#define ZBUFF_STATIC_LINKING_ONLY
|
|
17
19
|
#include "zbuff.h"
|
|
18
20
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -26,19 +26,28 @@
|
|
|
26
26
|
#include <string.h> /* memset */
|
|
27
27
|
#include <time.h> /* clock */
|
|
28
28
|
|
|
29
|
-
#include "../common/mem.h" /* read */
|
|
30
|
-
#include "../common/pool.h"
|
|
31
|
-
#include "../common/threading.h"
|
|
32
|
-
#include "cover.h"
|
|
33
|
-
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
34
29
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
35
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
|
30
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
36
31
|
#endif
|
|
37
|
-
|
|
32
|
+
|
|
33
|
+
#include "../common/mem.h" /* read */
|
|
34
|
+
#include "../common/pool.h" /* POOL_ctx */
|
|
35
|
+
#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
|
|
36
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
37
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
|
38
|
+
#include "../zdict.h"
|
|
39
|
+
#include "cover.h"
|
|
38
40
|
|
|
39
41
|
/*-*************************************
|
|
40
42
|
* Constants
|
|
41
43
|
***************************************/
|
|
44
|
+
/**
|
|
45
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
|
46
|
+
* on 64bit builds.
|
|
47
|
+
* For 32bit builds we choose 1 GB.
|
|
48
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
|
49
|
+
* contiguous buffer, so 1GB is already a high limit.
|
|
50
|
+
*/
|
|
42
51
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
|
43
52
|
#define COVER_DEFAULT_SPLITPOINT 1.0
|
|
44
53
|
|
|
@@ -46,7 +55,7 @@
|
|
|
46
55
|
* Console display
|
|
47
56
|
***************************************/
|
|
48
57
|
#ifndef LOCALDISPLAYLEVEL
|
|
49
|
-
static int g_displayLevel =
|
|
58
|
+
static int g_displayLevel = 0;
|
|
50
59
|
#endif
|
|
51
60
|
#undef DISPLAY
|
|
52
61
|
#define DISPLAY(...) \
|
|
@@ -69,7 +78,7 @@ static clock_t g_time = 0;
|
|
|
69
78
|
#undef LOCALDISPLAYUPDATE
|
|
70
79
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
|
71
80
|
if (displayLevel >= l) { \
|
|
72
|
-
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {
|
|
81
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
|
73
82
|
g_time = clock(); \
|
|
74
83
|
DISPLAY(__VA_ARGS__); \
|
|
75
84
|
} \
|
|
@@ -292,9 +301,10 @@ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
|
292
301
|
* Returns the first pointer in [first, last) whose element does not compare
|
|
293
302
|
* less than value. If no such element exists it returns last.
|
|
294
303
|
*/
|
|
295
|
-
static const size_t *COVER_lower_bound(const size_t
|
|
304
|
+
static const size_t *COVER_lower_bound(const size_t* first, const size_t* last,
|
|
296
305
|
size_t value) {
|
|
297
|
-
size_t count = last - first;
|
|
306
|
+
size_t count = (size_t)(last - first);
|
|
307
|
+
assert(last >= first);
|
|
298
308
|
while (count != 0) {
|
|
299
309
|
size_t step = count / 2;
|
|
300
310
|
const size_t *ptr = first;
|
|
@@ -533,14 +543,15 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
|
533
543
|
|
|
534
544
|
/**
|
|
535
545
|
* Prepare a context for dictionary building.
|
|
536
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
|
546
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
|
537
547
|
* times.
|
|
538
548
|
* Returns 0 on success or error code on error.
|
|
539
549
|
* The context must be destroyed with `COVER_ctx_destroy()`.
|
|
540
550
|
*/
|
|
541
551
|
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
542
552
|
const size_t *samplesSizes, unsigned nbSamples,
|
|
543
|
-
unsigned d, double splitPoint)
|
|
553
|
+
unsigned d, double splitPoint)
|
|
554
|
+
{
|
|
544
555
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
|
545
556
|
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
|
546
557
|
/* Split samples into testing and training sets */
|
|
@@ -638,7 +649,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
638
649
|
|
|
639
650
|
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
|
640
651
|
{
|
|
641
|
-
const double ratio = (double)nbDmers / maxDictSize;
|
|
652
|
+
const double ratio = (double)nbDmers / (double)maxDictSize;
|
|
642
653
|
if (ratio >= 10) {
|
|
643
654
|
return;
|
|
644
655
|
}
|
|
@@ -724,7 +735,7 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
|
724
735
|
return tail;
|
|
725
736
|
}
|
|
726
737
|
|
|
727
|
-
|
|
738
|
+
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
|
|
728
739
|
void *dictBuffer, size_t dictBufferCapacity,
|
|
729
740
|
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
|
730
741
|
ZDICT_cover_params_t parameters)
|
|
@@ -734,7 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
|
734
745
|
COVER_map_t activeDmers;
|
|
735
746
|
parameters.splitPoint = 1.0;
|
|
736
747
|
/* Initialize global data */
|
|
737
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
|
748
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
|
738
749
|
/* Checks */
|
|
739
750
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
|
740
751
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
|
@@ -898,8 +909,10 @@ void COVER_best_start(COVER_best_t *best) {
|
|
|
898
909
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
|
899
910
|
* If this dictionary is the best so far save it and its parameters.
|
|
900
911
|
*/
|
|
901
|
-
void COVER_best_finish(COVER_best_t
|
|
902
|
-
|
|
912
|
+
void COVER_best_finish(COVER_best_t* best,
|
|
913
|
+
ZDICT_cover_params_t parameters,
|
|
914
|
+
COVER_dictSelection_t selection)
|
|
915
|
+
{
|
|
903
916
|
void* dict = selection.dictContent;
|
|
904
917
|
size_t compressedSize = selection.totalCompressedSize;
|
|
905
918
|
size_t dictSize = selection.dictSize;
|
|
@@ -942,9 +955,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
|
942
955
|
}
|
|
943
956
|
}
|
|
944
957
|
|
|
958
|
+
static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
|
|
959
|
+
{
|
|
960
|
+
COVER_dictSelection_t ds;
|
|
961
|
+
ds.dictContent = buf;
|
|
962
|
+
ds.dictSize = s;
|
|
963
|
+
ds.totalCompressedSize = csz;
|
|
964
|
+
return ds;
|
|
965
|
+
}
|
|
966
|
+
|
|
945
967
|
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
|
946
|
-
|
|
947
|
-
return selection;
|
|
968
|
+
return setDictSelection(NULL, 0, error);
|
|
948
969
|
}
|
|
949
970
|
|
|
950
971
|
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
|
@@ -963,8 +984,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
|
|
963
984
|
size_t largestCompressed = 0;
|
|
964
985
|
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
|
965
986
|
|
|
966
|
-
BYTE
|
|
967
|
-
BYTE
|
|
987
|
+
BYTE* largestDictbuffer = (BYTE*)malloc(dictBufferCapacity);
|
|
988
|
+
BYTE* candidateDictBuffer = (BYTE*)malloc(dictBufferCapacity);
|
|
968
989
|
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
|
969
990
|
|
|
970
991
|
if (!largestDictbuffer || !candidateDictBuffer) {
|
|
@@ -997,9 +1018,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
|
|
997
1018
|
}
|
|
998
1019
|
|
|
999
1020
|
if (params.shrinkDict == 0) {
|
|
1000
|
-
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
|
1001
1021
|
free(candidateDictBuffer);
|
|
1002
|
-
return
|
|
1022
|
+
return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
|
|
1003
1023
|
}
|
|
1004
1024
|
|
|
1005
1025
|
largestDict = dictContentSize;
|
|
@@ -1031,20 +1051,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
|
|
1031
1051
|
return COVER_dictSelectionError(totalCompressedSize);
|
|
1032
1052
|
}
|
|
1033
1053
|
|
|
1034
|
-
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
|
1035
|
-
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
|
1054
|
+
if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
|
|
1036
1055
|
free(largestDictbuffer);
|
|
1037
|
-
return
|
|
1056
|
+
return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
|
|
1038
1057
|
}
|
|
1039
1058
|
dictContentSize *= 2;
|
|
1040
1059
|
}
|
|
1041
1060
|
dictContentSize = largestDict;
|
|
1042
1061
|
totalCompressedSize = largestCompressed;
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
free(candidateDictBuffer);
|
|
1046
|
-
return selection;
|
|
1047
|
-
}
|
|
1062
|
+
free(candidateDictBuffer);
|
|
1063
|
+
return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
|
|
1048
1064
|
}
|
|
1049
1065
|
|
|
1050
1066
|
/**
|
|
@@ -1062,18 +1078,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
|
1062
1078
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
|
1063
1079
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
|
1064
1080
|
*/
|
|
1065
|
-
static void COVER_tryParameters(void *opaque)
|
|
1081
|
+
static void COVER_tryParameters(void *opaque)
|
|
1082
|
+
{
|
|
1066
1083
|
/* Save parameters as local variables */
|
|
1067
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
|
1084
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
|
1068
1085
|
const COVER_ctx_t *const ctx = data->ctx;
|
|
1069
1086
|
const ZDICT_cover_params_t parameters = data->parameters;
|
|
1070
1087
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
1071
1088
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
1072
1089
|
/* Allocate space for hash table, dict, and freqs */
|
|
1073
1090
|
COVER_map_t activeDmers;
|
|
1074
|
-
BYTE
|
|
1091
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
|
1075
1092
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
1076
|
-
U32
|
|
1093
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
|
1077
1094
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
|
1078
1095
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
|
1079
1096
|
goto _cleanup;
|
|
@@ -1103,15 +1120,14 @@ _cleanup:
|
|
|
1103
1120
|
free(data);
|
|
1104
1121
|
COVER_map_destroy(&activeDmers);
|
|
1105
1122
|
COVER_dictSelectionFree(selection);
|
|
1106
|
-
|
|
1107
|
-
free(freqs);
|
|
1108
|
-
}
|
|
1123
|
+
free(freqs);
|
|
1109
1124
|
}
|
|
1110
1125
|
|
|
1111
|
-
|
|
1112
|
-
void
|
|
1113
|
-
const size_t
|
|
1114
|
-
ZDICT_cover_params_t
|
|
1126
|
+
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1127
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
|
1128
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
|
1129
|
+
ZDICT_cover_params_t* parameters)
|
|
1130
|
+
{
|
|
1115
1131
|
/* constants */
|
|
1116
1132
|
const unsigned nbThreads = parameters->nbThreads;
|
|
1117
1133
|
const double splitPoint =
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -8,18 +8,13 @@
|
|
|
8
8
|
* You may select, at your option, one of the above-listed licenses.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
#include <stdio.h> /* fprintf */
|
|
12
|
-
#include <stdlib.h> /* malloc, free, qsort */
|
|
13
|
-
#include <string.h> /* memset */
|
|
14
|
-
#include <time.h> /* clock */
|
|
15
|
-
#include "../common/mem.h" /* read */
|
|
16
|
-
#include "../common/pool.h"
|
|
17
|
-
#include "../common/threading.h"
|
|
18
|
-
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
19
11
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
20
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
|
12
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
21
13
|
#endif
|
|
22
|
-
|
|
14
|
+
|
|
15
|
+
#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
|
|
16
|
+
#include "../common/mem.h" /* U32, BYTE */
|
|
17
|
+
#include "../zdict.h"
|
|
23
18
|
|
|
24
19
|
/**
|
|
25
20
|
* COVER_best_t is used for two purposes:
|
|
@@ -1576,7 +1576,7 @@ note:
|
|
|
1576
1576
|
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
|
1577
1577
|
trsort(ISAb, SA, m, 1);
|
|
1578
1578
|
|
|
1579
|
-
/* Set the sorted order of
|
|
1579
|
+
/* Set the sorted order of type B* suffixes. */
|
|
1580
1580
|
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
|
1581
1581
|
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
|
1582
1582
|
if(0 <= i) {
|