extzstd 0.3.2 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -33,6 +33,12 @@
|
|
33
33
|
*/
|
34
34
|
|
35
35
|
|
36
|
+
/* Streaming state is used to inform allocation of the literal buffer */
|
37
|
+
typedef enum {
|
38
|
+
not_streaming = 0,
|
39
|
+
is_streaming = 1
|
40
|
+
} streaming_operation;
|
41
|
+
|
36
42
|
/* ZSTD_decompressBlock_internal() :
|
37
43
|
* decompress block, starting at `src`,
|
38
44
|
* into destination buffer `dst`.
|
@@ -41,7 +47,7 @@
|
|
41
47
|
*/
|
42
48
|
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
43
49
|
void* dst, size_t dstCapacity,
|
44
|
-
const void* src, size_t srcSize, const
|
50
|
+
const void* src, size_t srcSize, const streaming_operation streaming);
|
45
51
|
|
46
52
|
/* ZSTD_buildFSETable() :
|
47
53
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
@@ -54,9 +60,14 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
54
60
|
*/
|
55
61
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
56
62
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
57
|
-
const U32* baseValue, const
|
63
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
58
64
|
unsigned tableLog, void* wksp, size_t wkspSize,
|
59
65
|
int bmi2);
|
60
66
|
|
67
|
+
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
|
68
|
+
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
69
|
+
void* dst, size_t dstCapacity,
|
70
|
+
const void* src, size_t srcSize);
|
71
|
+
|
61
72
|
|
62
73
|
#endif /* ZSTD_DEC_BLOCK_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -20,7 +20,7 @@
|
|
20
20
|
* Dependencies
|
21
21
|
*********************************************************/
|
22
22
|
#include "../common/mem.h" /* BYTE, U16, U32 */
|
23
|
-
#include "../common/zstd_internal.h" /*
|
23
|
+
#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
|
24
24
|
|
25
25
|
|
26
26
|
|
@@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
|
40
40
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
41
41
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
42
42
|
|
43
|
-
static UNUSED_ATTR const
|
43
|
+
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
|
44
44
|
0, 1, 2, 3, 4, 5, 6, 7,
|
45
45
|
8, 9, 10, 11, 12, 13, 14, 15,
|
46
46
|
16, 17, 18, 19, 20, 21, 22, 23,
|
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
|
75
75
|
|
76
76
|
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
77
77
|
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
78
|
+
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
|
78
79
|
|
79
80
|
typedef struct {
|
80
81
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
81
82
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
82
83
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
83
|
-
HUF_DTable hufTable[HUF_DTABLE_SIZE(
|
84
|
+
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
|
84
85
|
U32 rep[ZSTD_REP_NUM];
|
85
86
|
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
86
87
|
} ZSTD_entropyDTables_t;
|
@@ -99,6 +100,29 @@ typedef enum {
|
|
99
100
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
100
101
|
} ZSTD_dictUses_e;
|
101
102
|
|
103
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
104
|
+
typedef struct {
|
105
|
+
const ZSTD_DDict** ddictPtrTable;
|
106
|
+
size_t ddictPtrTableSize;
|
107
|
+
size_t ddictPtrCount;
|
108
|
+
} ZSTD_DDictHashSet;
|
109
|
+
|
110
|
+
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
|
111
|
+
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
|
112
|
+
#endif
|
113
|
+
|
114
|
+
#define ZSTD_LBMIN 64
|
115
|
+
#define ZSTD_LBMAX (128 << 10)
|
116
|
+
|
117
|
+
/* extra buffer, compensates when dst is not large enough to store litBuffer */
|
118
|
+
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
|
119
|
+
|
120
|
+
typedef enum {
|
121
|
+
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
|
122
|
+
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
|
123
|
+
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
|
124
|
+
} ZSTD_litLocation_e;
|
125
|
+
|
102
126
|
struct ZSTD_DCtx_s
|
103
127
|
{
|
104
128
|
const ZSTD_seqSymbol* LLTptr;
|
@@ -113,6 +137,7 @@ struct ZSTD_DCtx_s
|
|
113
137
|
const void* dictEnd; /* end of previous segment */
|
114
138
|
size_t expected;
|
115
139
|
ZSTD_frameHeader fParams;
|
140
|
+
U64 processedCSize;
|
116
141
|
U64 decodedSize;
|
117
142
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
118
143
|
ZSTD_dStage stage;
|
@@ -128,7 +153,10 @@ struct ZSTD_DCtx_s
|
|
128
153
|
size_t litSize;
|
129
154
|
size_t rleSize;
|
130
155
|
size_t staticSize;
|
156
|
+
int isFrameDecompression;
|
157
|
+
#if DYNAMIC_BMI2 != 0
|
131
158
|
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
159
|
+
#endif
|
132
160
|
|
133
161
|
/* dictionary */
|
134
162
|
ZSTD_DDict* ddictLocal;
|
@@ -136,6 +164,10 @@ struct ZSTD_DCtx_s
|
|
136
164
|
U32 dictID;
|
137
165
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
138
166
|
ZSTD_dictUses_e dictUses;
|
167
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
168
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
169
|
+
int disableHufAsm;
|
170
|
+
int maxBlockSizeParam;
|
139
171
|
|
140
172
|
/* streaming */
|
141
173
|
ZSTD_dStreamStage streamStage;
|
@@ -148,16 +180,21 @@ struct ZSTD_DCtx_s
|
|
148
180
|
size_t outStart;
|
149
181
|
size_t outEnd;
|
150
182
|
size_t lhSize;
|
183
|
+
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
151
184
|
void* legacyContext;
|
152
185
|
U32 previousLegacyVersion;
|
153
186
|
U32 legacyVersion;
|
187
|
+
#endif
|
154
188
|
U32 hostageByte;
|
155
189
|
int noForwardProgress;
|
156
190
|
ZSTD_bufferMode_e outBufferMode;
|
157
191
|
ZSTD_outBuffer expectedOutBuffer;
|
158
192
|
|
159
193
|
/* workspace */
|
160
|
-
BYTE litBuffer
|
194
|
+
BYTE* litBuffer;
|
195
|
+
const BYTE* litBufferEnd;
|
196
|
+
ZSTD_litLocation_e litBufferLocation;
|
197
|
+
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
|
161
198
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
162
199
|
|
163
200
|
size_t oversizedDuration;
|
@@ -166,8 +203,21 @@ struct ZSTD_DCtx_s
|
|
166
203
|
void const* dictContentBeginForFuzzing;
|
167
204
|
void const* dictContentEndForFuzzing;
|
168
205
|
#endif
|
206
|
+
|
207
|
+
/* Tracing */
|
208
|
+
#if ZSTD_TRACE
|
209
|
+
ZSTD_TraceCtx traceCtx;
|
210
|
+
#endif
|
169
211
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
170
212
|
|
213
|
+
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
|
214
|
+
#if DYNAMIC_BMI2 != 0
|
215
|
+
return dctx->bmi2;
|
216
|
+
#else
|
217
|
+
(void)dctx;
|
218
|
+
return 0;
|
219
|
+
#endif
|
220
|
+
}
|
171
221
|
|
172
222
|
/*-*******************************************************
|
173
223
|
* Shared internal functions
|
@@ -184,7 +234,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
184
234
|
* If yes, do nothing (continue on current segment).
|
185
235
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
186
236
|
* This function cannot fail. */
|
187
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
237
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
188
238
|
|
189
239
|
|
190
240
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,6 +15,7 @@
|
|
15
15
|
***************************************/
|
16
16
|
#define ZBUFF_STATIC_LINKING_ONLY
|
17
17
|
#include "zbuff.h"
|
18
|
+
#include "../common/error_private.h"
|
18
19
|
|
19
20
|
|
20
21
|
/*-***********************************************************
|
@@ -73,13 +74,32 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
|
|
73
74
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
74
75
|
{
|
75
76
|
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
|
76
|
-
|
77
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
|
78
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setPledgedSrcSize(zbc, pledgedSrcSize), "");
|
79
|
+
|
80
|
+
FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
|
81
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_windowLog, params.cParams.windowLog), "");
|
82
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_hashLog, params.cParams.hashLog), "");
|
83
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_chainLog, params.cParams.chainLog), "");
|
84
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_searchLog, params.cParams.searchLog), "");
|
85
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_minMatch, params.cParams.minMatch), "");
|
86
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_targetLength, params.cParams.targetLength), "");
|
87
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_strategy, params.cParams.strategy), "");
|
88
|
+
|
89
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_contentSizeFlag, params.fParams.contentSizeFlag), "");
|
90
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_checksumFlag, params.fParams.checksumFlag), "");
|
91
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_dictIDFlag, params.fParams.noDictIDFlag), "");
|
92
|
+
|
93
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
|
94
|
+
return 0;
|
77
95
|
}
|
78
96
|
|
79
|
-
|
80
97
|
size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
|
81
98
|
{
|
82
|
-
|
99
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
|
100
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_compressionLevel, compressionLevel), "");
|
101
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
|
102
|
+
return 0;
|
83
103
|
}
|
84
104
|
|
85
105
|
size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -13,6 +13,8 @@
|
|
13
13
|
/* *************************************
|
14
14
|
* Dependencies
|
15
15
|
***************************************/
|
16
|
+
#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* suppress warning on ZSTD_initDStream_usingDict */
|
17
|
+
#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
16
18
|
#define ZBUFF_STATIC_LINKING_ONLY
|
17
19
|
#include "zbuff.h"
|
18
20
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -26,19 +26,28 @@
|
|
26
26
|
#include <string.h> /* memset */
|
27
27
|
#include <time.h> /* clock */
|
28
28
|
|
29
|
-
#include "../common/mem.h" /* read */
|
30
|
-
#include "../common/pool.h"
|
31
|
-
#include "../common/threading.h"
|
32
|
-
#include "cover.h"
|
33
|
-
#include "../common/zstd_internal.h" /* includes zstd.h */
|
34
29
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
35
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
30
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
36
31
|
#endif
|
37
|
-
|
32
|
+
|
33
|
+
#include "../common/mem.h" /* read */
|
34
|
+
#include "../common/pool.h" /* POOL_ctx */
|
35
|
+
#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
|
36
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
37
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
38
|
+
#include "../zdict.h"
|
39
|
+
#include "cover.h"
|
38
40
|
|
39
41
|
/*-*************************************
|
40
42
|
* Constants
|
41
43
|
***************************************/
|
44
|
+
/**
|
45
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
46
|
+
* on 64bit builds.
|
47
|
+
* For 32bit builds we choose 1 GB.
|
48
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
49
|
+
* contiguous buffer, so 1GB is already a high limit.
|
50
|
+
*/
|
42
51
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
43
52
|
#define COVER_DEFAULT_SPLITPOINT 1.0
|
44
53
|
|
@@ -46,7 +55,7 @@
|
|
46
55
|
* Console display
|
47
56
|
***************************************/
|
48
57
|
#ifndef LOCALDISPLAYLEVEL
|
49
|
-
static int g_displayLevel =
|
58
|
+
static int g_displayLevel = 0;
|
50
59
|
#endif
|
51
60
|
#undef DISPLAY
|
52
61
|
#define DISPLAY(...) \
|
@@ -69,7 +78,7 @@ static clock_t g_time = 0;
|
|
69
78
|
#undef LOCALDISPLAYUPDATE
|
70
79
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
71
80
|
if (displayLevel >= l) { \
|
72
|
-
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {
|
81
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
73
82
|
g_time = clock(); \
|
74
83
|
DISPLAY(__VA_ARGS__); \
|
75
84
|
} \
|
@@ -292,9 +301,10 @@ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
292
301
|
* Returns the first pointer in [first, last) whose element does not compare
|
293
302
|
* less than value. If no such element exists it returns last.
|
294
303
|
*/
|
295
|
-
static const size_t *COVER_lower_bound(const size_t
|
304
|
+
static const size_t *COVER_lower_bound(const size_t* first, const size_t* last,
|
296
305
|
size_t value) {
|
297
|
-
size_t count = last - first;
|
306
|
+
size_t count = (size_t)(last - first);
|
307
|
+
assert(last >= first);
|
298
308
|
while (count != 0) {
|
299
309
|
size_t step = count / 2;
|
300
310
|
const size_t *ptr = first;
|
@@ -533,14 +543,15 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
533
543
|
|
534
544
|
/**
|
535
545
|
* Prepare a context for dictionary building.
|
536
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
546
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
537
547
|
* times.
|
538
548
|
* Returns 0 on success or error code on error.
|
539
549
|
* The context must be destroyed with `COVER_ctx_destroy()`.
|
540
550
|
*/
|
541
551
|
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
542
552
|
const size_t *samplesSizes, unsigned nbSamples,
|
543
|
-
unsigned d, double splitPoint)
|
553
|
+
unsigned d, double splitPoint)
|
554
|
+
{
|
544
555
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
545
556
|
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
546
557
|
/* Split samples into testing and training sets */
|
@@ -638,7 +649,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
638
649
|
|
639
650
|
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
640
651
|
{
|
641
|
-
const double ratio = (double)nbDmers / maxDictSize;
|
652
|
+
const double ratio = (double)nbDmers / (double)maxDictSize;
|
642
653
|
if (ratio >= 10) {
|
643
654
|
return;
|
644
655
|
}
|
@@ -724,7 +735,7 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
724
735
|
return tail;
|
725
736
|
}
|
726
737
|
|
727
|
-
|
738
|
+
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
|
728
739
|
void *dictBuffer, size_t dictBufferCapacity,
|
729
740
|
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
730
741
|
ZDICT_cover_params_t parameters)
|
@@ -734,7 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
734
745
|
COVER_map_t activeDmers;
|
735
746
|
parameters.splitPoint = 1.0;
|
736
747
|
/* Initialize global data */
|
737
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
748
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
738
749
|
/* Checks */
|
739
750
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
740
751
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
@@ -898,8 +909,10 @@ void COVER_best_start(COVER_best_t *best) {
|
|
898
909
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
899
910
|
* If this dictionary is the best so far save it and its parameters.
|
900
911
|
*/
|
901
|
-
void COVER_best_finish(COVER_best_t
|
902
|
-
|
912
|
+
void COVER_best_finish(COVER_best_t* best,
|
913
|
+
ZDICT_cover_params_t parameters,
|
914
|
+
COVER_dictSelection_t selection)
|
915
|
+
{
|
903
916
|
void* dict = selection.dictContent;
|
904
917
|
size_t compressedSize = selection.totalCompressedSize;
|
905
918
|
size_t dictSize = selection.dictSize;
|
@@ -942,9 +955,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
942
955
|
}
|
943
956
|
}
|
944
957
|
|
958
|
+
static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
|
959
|
+
{
|
960
|
+
COVER_dictSelection_t ds;
|
961
|
+
ds.dictContent = buf;
|
962
|
+
ds.dictSize = s;
|
963
|
+
ds.totalCompressedSize = csz;
|
964
|
+
return ds;
|
965
|
+
}
|
966
|
+
|
945
967
|
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
946
|
-
|
947
|
-
return selection;
|
968
|
+
return setDictSelection(NULL, 0, error);
|
948
969
|
}
|
949
970
|
|
950
971
|
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
@@ -963,8 +984,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
|
963
984
|
size_t largestCompressed = 0;
|
964
985
|
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
965
986
|
|
966
|
-
BYTE
|
967
|
-
BYTE
|
987
|
+
BYTE* largestDictbuffer = (BYTE*)malloc(dictBufferCapacity);
|
988
|
+
BYTE* candidateDictBuffer = (BYTE*)malloc(dictBufferCapacity);
|
968
989
|
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
969
990
|
|
970
991
|
if (!largestDictbuffer || !candidateDictBuffer) {
|
@@ -997,9 +1018,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
|
997
1018
|
}
|
998
1019
|
|
999
1020
|
if (params.shrinkDict == 0) {
|
1000
|
-
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
1001
1021
|
free(candidateDictBuffer);
|
1002
|
-
return
|
1022
|
+
return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
|
1003
1023
|
}
|
1004
1024
|
|
1005
1025
|
largestDict = dictContentSize;
|
@@ -1031,20 +1051,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
|
1031
1051
|
return COVER_dictSelectionError(totalCompressedSize);
|
1032
1052
|
}
|
1033
1053
|
|
1034
|
-
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
1035
|
-
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
1054
|
+
if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
|
1036
1055
|
free(largestDictbuffer);
|
1037
|
-
return
|
1056
|
+
return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
|
1038
1057
|
}
|
1039
1058
|
dictContentSize *= 2;
|
1040
1059
|
}
|
1041
1060
|
dictContentSize = largestDict;
|
1042
1061
|
totalCompressedSize = largestCompressed;
|
1043
|
-
|
1044
|
-
|
1045
|
-
free(candidateDictBuffer);
|
1046
|
-
return selection;
|
1047
|
-
}
|
1062
|
+
free(candidateDictBuffer);
|
1063
|
+
return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
|
1048
1064
|
}
|
1049
1065
|
|
1050
1066
|
/**
|
@@ -1062,18 +1078,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
1062
1078
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
1063
1079
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
1064
1080
|
*/
|
1065
|
-
static void COVER_tryParameters(void *opaque)
|
1081
|
+
static void COVER_tryParameters(void *opaque)
|
1082
|
+
{
|
1066
1083
|
/* Save parameters as local variables */
|
1067
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
1084
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
1068
1085
|
const COVER_ctx_t *const ctx = data->ctx;
|
1069
1086
|
const ZDICT_cover_params_t parameters = data->parameters;
|
1070
1087
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
1071
1088
|
size_t totalCompressedSize = ERROR(GENERIC);
|
1072
1089
|
/* Allocate space for hash table, dict, and freqs */
|
1073
1090
|
COVER_map_t activeDmers;
|
1074
|
-
BYTE
|
1091
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
1075
1092
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
1076
|
-
U32
|
1093
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
1077
1094
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
1078
1095
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
1079
1096
|
goto _cleanup;
|
@@ -1103,15 +1120,14 @@ _cleanup:
|
|
1103
1120
|
free(data);
|
1104
1121
|
COVER_map_destroy(&activeDmers);
|
1105
1122
|
COVER_dictSelectionFree(selection);
|
1106
|
-
|
1107
|
-
free(freqs);
|
1108
|
-
}
|
1123
|
+
free(freqs);
|
1109
1124
|
}
|
1110
1125
|
|
1111
|
-
|
1112
|
-
void
|
1113
|
-
const size_t
|
1114
|
-
ZDICT_cover_params_t
|
1126
|
+
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
1127
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
1128
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
1129
|
+
ZDICT_cover_params_t* parameters)
|
1130
|
+
{
|
1115
1131
|
/* constants */
|
1116
1132
|
const unsigned nbThreads = parameters->nbThreads;
|
1117
1133
|
const double splitPoint =
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -8,18 +8,13 @@
|
|
8
8
|
* You may select, at your option, one of the above-listed licenses.
|
9
9
|
*/
|
10
10
|
|
11
|
-
#include <stdio.h> /* fprintf */
|
12
|
-
#include <stdlib.h> /* malloc, free, qsort */
|
13
|
-
#include <string.h> /* memset */
|
14
|
-
#include <time.h> /* clock */
|
15
|
-
#include "../common/mem.h" /* read */
|
16
|
-
#include "../common/pool.h"
|
17
|
-
#include "../common/threading.h"
|
18
|
-
#include "../common/zstd_internal.h" /* includes zstd.h */
|
19
11
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
20
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
12
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
21
13
|
#endif
|
22
|
-
|
14
|
+
|
15
|
+
#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
|
16
|
+
#include "../common/mem.h" /* U32, BYTE */
|
17
|
+
#include "../zdict.h"
|
23
18
|
|
24
19
|
/**
|
25
20
|
* COVER_best_t is used for two purposes:
|
@@ -1576,7 +1576,7 @@ note:
|
|
1576
1576
|
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
1577
1577
|
trsort(ISAb, SA, m, 1);
|
1578
1578
|
|
1579
|
-
/* Set the sorted order of
|
1579
|
+
/* Set the sorted order of type B* suffixes. */
|
1580
1580
|
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
1581
1581
|
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
1582
1582
|
if(0 <= i) {
|