zstd-ruby 1.4.1.0 → 1.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +304 -113
- data/ext/zstdruby/libzstd/README.md +83 -20
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +150 -8
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +8 -4
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
- data/ext/zstdruby/libzstd/common/huf.h +43 -39
- data/ext/zstdruby/libzstd/common/mem.h +69 -25
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
- data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +740 -153
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +21 -10
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,9 +15,9 @@
|
|
|
15
15
|
/*-*******************************************************
|
|
16
16
|
* Dependencies
|
|
17
17
|
*********************************************************/
|
|
18
|
-
#include
|
|
19
|
-
#include "zstd.h" /* DCtx, and some public functions */
|
|
20
|
-
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
|
18
|
+
#include "../common/zstd_deps.h" /* size_t */
|
|
19
|
+
#include "../zstd.h" /* DCtx, and some public functions */
|
|
20
|
+
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
|
|
21
21
|
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
|
22
22
|
|
|
23
23
|
|
|
@@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
48
48
|
* this function must be called with valid parameters only
|
|
49
49
|
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
|
50
50
|
* in which case it cannot fail.
|
|
51
|
+
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
|
|
52
|
+
* defined in zstd_decompress_internal.h.
|
|
51
53
|
* Internal use only.
|
|
52
54
|
*/
|
|
53
55
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
54
56
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
55
57
|
const U32* baseValue, const U32* nbAdditionalBits,
|
|
56
|
-
unsigned tableLog
|
|
58
|
+
unsigned tableLog, void* wksp, size_t wkspSize,
|
|
59
|
+
int bmi2);
|
|
57
60
|
|
|
58
61
|
|
|
59
62
|
#endif /* ZSTD_DEC_BLOCK_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -19,34 +19,34 @@
|
|
|
19
19
|
/*-*******************************************************
|
|
20
20
|
* Dependencies
|
|
21
21
|
*********************************************************/
|
|
22
|
-
#include "mem.h" /* BYTE, U16, U32 */
|
|
23
|
-
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
|
22
|
+
#include "../common/mem.h" /* BYTE, U16, U32 */
|
|
23
|
+
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
/*-*******************************************************
|
|
28
28
|
* Constants
|
|
29
29
|
*********************************************************/
|
|
30
|
-
static const U32 LL_base[MaxLL+1] = {
|
|
30
|
+
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
|
31
31
|
0, 1, 2, 3, 4, 5, 6, 7,
|
|
32
32
|
8, 9, 10, 11, 12, 13, 14, 15,
|
|
33
33
|
16, 18, 20, 22, 24, 28, 32, 40,
|
|
34
34
|
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
|
35
35
|
0x2000, 0x4000, 0x8000, 0x10000 };
|
|
36
36
|
|
|
37
|
-
static const U32 OF_base[MaxOff+1] = {
|
|
37
|
+
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
|
38
38
|
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
|
39
39
|
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
|
40
40
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
|
41
41
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
|
42
42
|
|
|
43
|
-
static const U32 OF_bits[MaxOff+1] = {
|
|
43
|
+
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
|
44
44
|
0, 1, 2, 3, 4, 5, 6, 7,
|
|
45
45
|
8, 9, 10, 11, 12, 13, 14, 15,
|
|
46
46
|
16, 17, 18, 19, 20, 21, 22, 23,
|
|
47
47
|
24, 25, 26, 27, 28, 29, 30, 31 };
|
|
48
48
|
|
|
49
|
-
static const U32 ML_base[MaxML+1] = {
|
|
49
|
+
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
|
50
50
|
3, 4, 5, 6, 7, 8, 9, 10,
|
|
51
51
|
11, 12, 13, 14, 15, 16, 17, 18,
|
|
52
52
|
19, 20, 21, 22, 23, 24, 25, 26,
|
|
@@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = {
|
|
|
73
73
|
|
|
74
74
|
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
|
75
75
|
|
|
76
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
|
77
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
|
78
|
+
|
|
76
79
|
typedef struct {
|
|
77
80
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
|
78
81
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
|
79
82
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
|
80
83
|
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
|
81
84
|
U32 rep[ZSTD_REP_NUM];
|
|
85
|
+
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
|
82
86
|
} ZSTD_entropyDTables_t;
|
|
83
87
|
|
|
84
88
|
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
|
@@ -95,6 +99,13 @@ typedef enum {
|
|
|
95
99
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
|
96
100
|
} ZSTD_dictUses_e;
|
|
97
101
|
|
|
102
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
|
103
|
+
typedef struct {
|
|
104
|
+
const ZSTD_DDict** ddictPtrTable;
|
|
105
|
+
size_t ddictPtrTableSize;
|
|
106
|
+
size_t ddictPtrCount;
|
|
107
|
+
} ZSTD_DDictHashSet;
|
|
108
|
+
|
|
98
109
|
struct ZSTD_DCtx_s
|
|
99
110
|
{
|
|
100
111
|
const ZSTD_seqSymbol* LLTptr;
|
|
@@ -109,6 +120,7 @@ struct ZSTD_DCtx_s
|
|
|
109
120
|
const void* dictEnd; /* end of previous segment */
|
|
110
121
|
size_t expected;
|
|
111
122
|
ZSTD_frameHeader fParams;
|
|
123
|
+
U64 processedCSize;
|
|
112
124
|
U64 decodedSize;
|
|
113
125
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
|
114
126
|
ZSTD_dStage stage;
|
|
@@ -117,6 +129,8 @@ struct ZSTD_DCtx_s
|
|
|
117
129
|
XXH64_state_t xxhState;
|
|
118
130
|
size_t headerSize;
|
|
119
131
|
ZSTD_format_e format;
|
|
132
|
+
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
|
133
|
+
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
|
120
134
|
const BYTE* litPtr;
|
|
121
135
|
ZSTD_customMem customMem;
|
|
122
136
|
size_t litSize;
|
|
@@ -130,6 +144,8 @@ struct ZSTD_DCtx_s
|
|
|
130
144
|
U32 dictID;
|
|
131
145
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
|
132
146
|
ZSTD_dictUses_e dictUses;
|
|
147
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
|
148
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
|
133
149
|
|
|
134
150
|
/* streaming */
|
|
135
151
|
ZSTD_dStreamStage streamStage;
|
|
@@ -147,10 +163,24 @@ struct ZSTD_DCtx_s
|
|
|
147
163
|
U32 legacyVersion;
|
|
148
164
|
U32 hostageByte;
|
|
149
165
|
int noForwardProgress;
|
|
166
|
+
ZSTD_bufferMode_e outBufferMode;
|
|
167
|
+
ZSTD_outBuffer expectedOutBuffer;
|
|
150
168
|
|
|
151
169
|
/* workspace */
|
|
152
170
|
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
|
153
171
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
|
172
|
+
|
|
173
|
+
size_t oversizedDuration;
|
|
174
|
+
|
|
175
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
176
|
+
void const* dictContentBeginForFuzzing;
|
|
177
|
+
void const* dictContentEndForFuzzing;
|
|
178
|
+
#endif
|
|
179
|
+
|
|
180
|
+
/* Tracing */
|
|
181
|
+
#if ZSTD_TRACE
|
|
182
|
+
ZSTD_TraceCtx traceCtx;
|
|
183
|
+
#endif
|
|
154
184
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
|
155
185
|
|
|
156
186
|
|
|
@@ -160,7 +190,7 @@ struct ZSTD_DCtx_s
|
|
|
160
190
|
|
|
161
191
|
/*! ZSTD_loadDEntropy() :
|
|
162
192
|
* dict : must point at beginning of a valid zstd dictionary.
|
|
163
|
-
* @return : size of entropy tables
|
|
193
|
+
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
|
164
194
|
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
165
195
|
const void* const dict, size_t const dictSize);
|
|
166
196
|
|
|
@@ -169,7 +199,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
|
169
199
|
* If yes, do nothing (continue on current segment).
|
|
170
200
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
|
171
201
|
* This function cannot fail. */
|
|
172
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
|
202
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
|
173
203
|
|
|
174
204
|
|
|
175
205
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -28,7 +28,7 @@ extern "C" {
|
|
|
28
28
|
* Dependencies
|
|
29
29
|
***************************************/
|
|
30
30
|
#include <stddef.h> /* size_t */
|
|
31
|
-
#include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
|
31
|
+
#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
/* ***************************************************************
|
|
@@ -36,16 +36,17 @@ extern "C" {
|
|
|
36
36
|
*****************************************************************/
|
|
37
37
|
/* Deprecation warnings */
|
|
38
38
|
/* Should these warnings be a problem,
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
* it is generally possible to disable them,
|
|
40
|
+
* typically with -Wno-deprecated-declarations for gcc
|
|
41
|
+
* or _CRT_SECURE_NO_WARNINGS in Visual.
|
|
42
|
+
* Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
|
43
|
+
*/
|
|
43
44
|
#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
|
44
45
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
|
|
45
46
|
#else
|
|
46
47
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
|
47
48
|
# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
|
|
48
|
-
# elif (defined(
|
|
49
|
+
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
|
|
49
50
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
|
|
50
51
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
51
52
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
|
|
@@ -185,7 +186,7 @@ ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(voi
|
|
|
185
186
|
|
|
186
187
|
/*--- Dependency ---*/
|
|
187
188
|
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
|
|
188
|
-
#include "zstd.h"
|
|
189
|
+
#include "../zstd.h"
|
|
189
190
|
|
|
190
191
|
|
|
191
192
|
/*--- Custom memory allocator ---*/
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
/*-*************************************
|
|
12
12
|
* Dependencies
|
|
13
13
|
***************************************/
|
|
14
|
-
#include "error_private.h"
|
|
14
|
+
#include "../common/error_private.h"
|
|
15
15
|
#include "zbuff.h"
|
|
16
16
|
|
|
17
17
|
/*-****************************************
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -26,47 +26,57 @@
|
|
|
26
26
|
#include <string.h> /* memset */
|
|
27
27
|
#include <time.h> /* clock */
|
|
28
28
|
|
|
29
|
-
#include "mem.h" /* read */
|
|
30
|
-
#include "pool.h"
|
|
31
|
-
#include "threading.h"
|
|
32
|
-
#include "cover.h"
|
|
33
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
34
29
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
35
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
|
30
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
36
31
|
#endif
|
|
37
|
-
|
|
32
|
+
|
|
33
|
+
#include "../common/mem.h" /* read */
|
|
34
|
+
#include "../common/pool.h"
|
|
35
|
+
#include "../common/threading.h"
|
|
36
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
37
|
+
#include "../zdict.h"
|
|
38
|
+
#include "cover.h"
|
|
38
39
|
|
|
39
40
|
/*-*************************************
|
|
40
41
|
* Constants
|
|
41
42
|
***************************************/
|
|
42
43
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
|
43
|
-
#define
|
|
44
|
+
#define COVER_DEFAULT_SPLITPOINT 1.0
|
|
44
45
|
|
|
45
46
|
/*-*************************************
|
|
46
47
|
* Console display
|
|
47
48
|
***************************************/
|
|
49
|
+
#ifndef LOCALDISPLAYLEVEL
|
|
48
50
|
static int g_displayLevel = 2;
|
|
51
|
+
#endif
|
|
52
|
+
#undef DISPLAY
|
|
49
53
|
#define DISPLAY(...) \
|
|
50
54
|
{ \
|
|
51
55
|
fprintf(stderr, __VA_ARGS__); \
|
|
52
56
|
fflush(stderr); \
|
|
53
57
|
}
|
|
58
|
+
#undef LOCALDISPLAYLEVEL
|
|
54
59
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
|
55
60
|
if (displayLevel >= l) { \
|
|
56
61
|
DISPLAY(__VA_ARGS__); \
|
|
57
62
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
63
|
+
#undef DISPLAYLEVEL
|
|
58
64
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
|
59
65
|
|
|
66
|
+
#ifndef LOCALDISPLAYUPDATE
|
|
67
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
68
|
+
static clock_t g_time = 0;
|
|
69
|
+
#endif
|
|
70
|
+
#undef LOCALDISPLAYUPDATE
|
|
60
71
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
|
61
72
|
if (displayLevel >= l) { \
|
|
62
|
-
if ((clock() - g_time >
|
|
73
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
|
63
74
|
g_time = clock(); \
|
|
64
75
|
DISPLAY(__VA_ARGS__); \
|
|
65
76
|
} \
|
|
66
77
|
}
|
|
78
|
+
#undef DISPLAYUPDATE
|
|
67
79
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
|
68
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
69
|
-
static clock_t g_time = 0;
|
|
70
80
|
|
|
71
81
|
/*-*************************************
|
|
72
82
|
* Hash table
|
|
@@ -120,9 +130,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
|
|
|
120
130
|
/**
|
|
121
131
|
* Internal hash function
|
|
122
132
|
*/
|
|
123
|
-
static const U32
|
|
133
|
+
static const U32 COVER_prime4bytes = 2654435761U;
|
|
124
134
|
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
|
|
125
|
-
return (key *
|
|
135
|
+
return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
|
|
126
136
|
}
|
|
127
137
|
|
|
128
138
|
/**
|
|
@@ -215,7 +225,7 @@ typedef struct {
|
|
|
215
225
|
} COVER_ctx_t;
|
|
216
226
|
|
|
217
227
|
/* We need a global context for qsort... */
|
|
218
|
-
static COVER_ctx_t *
|
|
228
|
+
static COVER_ctx_t *g_coverCtx = NULL;
|
|
219
229
|
|
|
220
230
|
/*-*************************************
|
|
221
231
|
* Helper functions
|
|
@@ -258,11 +268,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
|
258
268
|
|
|
259
269
|
/**
|
|
260
270
|
* Same as COVER_cmp() except ties are broken by pointer value
|
|
261
|
-
* NOTE:
|
|
271
|
+
* NOTE: g_coverCtx must be set to call this function. A global is required because
|
|
262
272
|
* qsort doesn't take an opaque pointer.
|
|
263
273
|
*/
|
|
264
|
-
static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
265
|
-
int result = COVER_cmp(
|
|
274
|
+
static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
|
|
275
|
+
int result = COVER_cmp(g_coverCtx, lp, rp);
|
|
266
276
|
if (result == 0) {
|
|
267
277
|
result = lp < rp ? -1 : 1;
|
|
268
278
|
}
|
|
@@ -271,8 +281,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
|
271
281
|
/**
|
|
272
282
|
* Faster version for d <= 8.
|
|
273
283
|
*/
|
|
274
|
-
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
275
|
-
int result = COVER_cmp8(
|
|
284
|
+
static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
285
|
+
int result = COVER_cmp8(g_coverCtx, lp, rp);
|
|
276
286
|
if (result == 0) {
|
|
277
287
|
result = lp < rp ? -1 : 1;
|
|
278
288
|
}
|
|
@@ -603,7 +613,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
603
613
|
/* qsort doesn't take an opaque pointer, so pass as a global.
|
|
604
614
|
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
|
|
605
615
|
*/
|
|
606
|
-
|
|
616
|
+
g_coverCtx = ctx;
|
|
607
617
|
#if defined(__OpenBSD__)
|
|
608
618
|
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
|
609
619
|
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
|
@@ -638,8 +648,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
|
|
|
638
648
|
"compared to the source size %u! "
|
|
639
649
|
"size(source)/size(dictionary) = %f, but it should be >= "
|
|
640
650
|
"10! This may lead to a subpar dictionary! We recommend "
|
|
641
|
-
"training on sources at least 10x, and
|
|
642
|
-
"size of the dictionary
|
|
651
|
+
"training on sources at least 10x, and preferably 100x "
|
|
652
|
+
"the size of the dictionary! \n", (U32)maxDictSize,
|
|
643
653
|
(U32)nbDmers, ratio);
|
|
644
654
|
}
|
|
645
655
|
|
|
@@ -919,13 +929,12 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
|
919
929
|
}
|
|
920
930
|
}
|
|
921
931
|
/* Save the dictionary, parameters, and size */
|
|
922
|
-
if (
|
|
923
|
-
|
|
932
|
+
if (dict) {
|
|
933
|
+
memcpy(best->dict, dict, dictSize);
|
|
934
|
+
best->dictSize = dictSize;
|
|
935
|
+
best->parameters = parameters;
|
|
936
|
+
best->compressedSize = compressedSize;
|
|
924
937
|
}
|
|
925
|
-
memcpy(best->dict, dict, dictSize);
|
|
926
|
-
best->dictSize = dictSize;
|
|
927
|
-
best->parameters = parameters;
|
|
928
|
-
best->compressedSize = compressedSize;
|
|
929
938
|
}
|
|
930
939
|
if (liveJobs == 0) {
|
|
931
940
|
ZSTD_pthread_cond_broadcast(&best->cond);
|
|
@@ -947,7 +956,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
|
|
947
956
|
free(selection.dictContent);
|
|
948
957
|
}
|
|
949
958
|
|
|
950
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
959
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
|
951
960
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
|
952
961
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
|
953
962
|
|
|
@@ -955,8 +964,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
|
955
964
|
size_t largestCompressed = 0;
|
|
956
965
|
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
|
957
966
|
|
|
958
|
-
BYTE * largestDictbuffer = (BYTE *)malloc(
|
|
959
|
-
BYTE * candidateDictBuffer = (BYTE *)malloc(
|
|
967
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
|
|
968
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
|
|
960
969
|
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
|
961
970
|
|
|
962
971
|
if (!largestDictbuffer || !candidateDictBuffer) {
|
|
@@ -968,7 +977,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
|
968
977
|
/* Initial dictionary size and compressed size */
|
|
969
978
|
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
|
970
979
|
dictContentSize = ZDICT_finalizeDictionary(
|
|
971
|
-
largestDictbuffer,
|
|
980
|
+
largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
|
|
972
981
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
|
973
982
|
|
|
974
983
|
if (ZDICT_isError(dictContentSize)) {
|
|
@@ -1002,7 +1011,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
|
1002
1011
|
while (dictContentSize < largestDict) {
|
|
1003
1012
|
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
|
1004
1013
|
dictContentSize = ZDICT_finalizeDictionary(
|
|
1005
|
-
candidateDictBuffer,
|
|
1014
|
+
candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
|
|
1006
1015
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
|
1007
1016
|
|
|
1008
1017
|
if (ZDICT_isError(dictContentSize)) {
|
|
@@ -1054,18 +1063,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
|
1054
1063
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
|
1055
1064
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
|
1056
1065
|
*/
|
|
1057
|
-
static void COVER_tryParameters(void *opaque)
|
|
1066
|
+
static void COVER_tryParameters(void *opaque)
|
|
1067
|
+
{
|
|
1058
1068
|
/* Save parameters as local variables */
|
|
1059
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
|
1069
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
|
1060
1070
|
const COVER_ctx_t *const ctx = data->ctx;
|
|
1061
1071
|
const ZDICT_cover_params_t parameters = data->parameters;
|
|
1062
1072
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
1063
1073
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
1064
1074
|
/* Allocate space for hash table, dict, and freqs */
|
|
1065
1075
|
COVER_map_t activeDmers;
|
|
1066
|
-
BYTE
|
|
1076
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
|
1067
1077
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
1068
|
-
U32
|
|
1078
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
|
1069
1079
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
|
1070
1080
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
|
1071
1081
|
goto _cleanup;
|
|
@@ -1080,7 +1090,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
|
1080
1090
|
{
|
|
1081
1091
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
|
1082
1092
|
dictBufferCapacity, parameters);
|
|
1083
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
|
1093
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
|
1084
1094
|
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
|
1085
1095
|
totalCompressedSize);
|
|
1086
1096
|
|
|
@@ -1095,19 +1105,18 @@ _cleanup:
|
|
|
1095
1105
|
free(data);
|
|
1096
1106
|
COVER_map_destroy(&activeDmers);
|
|
1097
1107
|
COVER_dictSelectionFree(selection);
|
|
1098
|
-
|
|
1099
|
-
free(freqs);
|
|
1100
|
-
}
|
|
1108
|
+
free(freqs);
|
|
1101
1109
|
}
|
|
1102
1110
|
|
|
1103
1111
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1104
|
-
void
|
|
1105
|
-
const size_t
|
|
1106
|
-
ZDICT_cover_params_t
|
|
1112
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
|
1113
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
|
1114
|
+
ZDICT_cover_params_t* parameters)
|
|
1115
|
+
{
|
|
1107
1116
|
/* constants */
|
|
1108
1117
|
const unsigned nbThreads = parameters->nbThreads;
|
|
1109
1118
|
const double splitPoint =
|
|
1110
|
-
parameters->splitPoint <= 0.0 ?
|
|
1119
|
+
parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
|
1111
1120
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
|
1112
1121
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
|
1113
1122
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|