zstd-ruby 1.4.0.0 → 1.4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +274 -107
- data/ext/zstdruby/libzstd/README.md +75 -16
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +154 -5
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +7 -3
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
- data/ext/zstdruby/libzstd/common/huf.h +41 -38
- data/ext/zstdruby/libzstd/common/mem.h +68 -22
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/zstd.h +655 -118
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +20 -10
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,9 +15,9 @@
|
|
|
15
15
|
/*-*******************************************************
|
|
16
16
|
* Dependencies
|
|
17
17
|
*********************************************************/
|
|
18
|
-
#include
|
|
19
|
-
#include "zstd.h" /* DCtx, and some public functions */
|
|
20
|
-
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
|
18
|
+
#include "../common/zstd_deps.h" /* size_t */
|
|
19
|
+
#include "../zstd.h" /* DCtx, and some public functions */
|
|
20
|
+
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
|
|
21
21
|
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
|
22
22
|
|
|
23
23
|
|
|
@@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
48
48
|
* this function must be called with valid parameters only
|
|
49
49
|
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
|
50
50
|
* in which case it cannot fail.
|
|
51
|
+
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
|
|
52
|
+
* defined in zstd_decompress_internal.h.
|
|
51
53
|
* Internal use only.
|
|
52
54
|
*/
|
|
53
55
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
54
56
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
55
57
|
const U32* baseValue, const U32* nbAdditionalBits,
|
|
56
|
-
unsigned tableLog
|
|
58
|
+
unsigned tableLog, void* wksp, size_t wkspSize,
|
|
59
|
+
int bmi2);
|
|
57
60
|
|
|
58
61
|
|
|
59
62
|
#endif /* ZSTD_DEC_BLOCK_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -19,34 +19,35 @@
|
|
|
19
19
|
/*-*******************************************************
|
|
20
20
|
* Dependencies
|
|
21
21
|
*********************************************************/
|
|
22
|
-
#include "mem.h" /* BYTE, U16, U32 */
|
|
23
|
-
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
|
22
|
+
#include "../common/mem.h" /* BYTE, U16, U32 */
|
|
23
|
+
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
|
24
|
+
#include "../common/zstd_trace.h" /* ZSTD_TraceCtx */
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
/*-*******************************************************
|
|
28
29
|
* Constants
|
|
29
30
|
*********************************************************/
|
|
30
|
-
static const U32 LL_base[MaxLL+1] = {
|
|
31
|
+
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
|
31
32
|
0, 1, 2, 3, 4, 5, 6, 7,
|
|
32
33
|
8, 9, 10, 11, 12, 13, 14, 15,
|
|
33
34
|
16, 18, 20, 22, 24, 28, 32, 40,
|
|
34
35
|
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
|
35
36
|
0x2000, 0x4000, 0x8000, 0x10000 };
|
|
36
37
|
|
|
37
|
-
static const U32 OF_base[MaxOff+1] = {
|
|
38
|
+
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
|
38
39
|
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
|
39
40
|
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
|
40
41
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
|
41
42
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
|
42
43
|
|
|
43
|
-
static const U32 OF_bits[MaxOff+1] = {
|
|
44
|
+
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
|
44
45
|
0, 1, 2, 3, 4, 5, 6, 7,
|
|
45
46
|
8, 9, 10, 11, 12, 13, 14, 15,
|
|
46
47
|
16, 17, 18, 19, 20, 21, 22, 23,
|
|
47
48
|
24, 25, 26, 27, 28, 29, 30, 31 };
|
|
48
49
|
|
|
49
|
-
static const U32 ML_base[MaxML+1] = {
|
|
50
|
+
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
|
50
51
|
3, 4, 5, 6, 7, 8, 9, 10,
|
|
51
52
|
11, 12, 13, 14, 15, 16, 17, 18,
|
|
52
53
|
19, 20, 21, 22, 23, 24, 25, 26,
|
|
@@ -73,12 +74,16 @@ static const U32 ML_base[MaxML+1] = {
|
|
|
73
74
|
|
|
74
75
|
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
|
75
76
|
|
|
77
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
|
78
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
|
79
|
+
|
|
76
80
|
typedef struct {
|
|
77
81
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
|
78
82
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
|
79
83
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
|
80
84
|
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
|
81
85
|
U32 rep[ZSTD_REP_NUM];
|
|
86
|
+
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
|
82
87
|
} ZSTD_entropyDTables_t;
|
|
83
88
|
|
|
84
89
|
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
|
@@ -95,6 +100,13 @@ typedef enum {
|
|
|
95
100
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
|
96
101
|
} ZSTD_dictUses_e;
|
|
97
102
|
|
|
103
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
|
104
|
+
typedef struct {
|
|
105
|
+
const ZSTD_DDict** ddictPtrTable;
|
|
106
|
+
size_t ddictPtrTableSize;
|
|
107
|
+
size_t ddictPtrCount;
|
|
108
|
+
} ZSTD_DDictHashSet;
|
|
109
|
+
|
|
98
110
|
struct ZSTD_DCtx_s
|
|
99
111
|
{
|
|
100
112
|
const ZSTD_seqSymbol* LLTptr;
|
|
@@ -109,6 +121,7 @@ struct ZSTD_DCtx_s
|
|
|
109
121
|
const void* dictEnd; /* end of previous segment */
|
|
110
122
|
size_t expected;
|
|
111
123
|
ZSTD_frameHeader fParams;
|
|
124
|
+
U64 processedCSize;
|
|
112
125
|
U64 decodedSize;
|
|
113
126
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
|
114
127
|
ZSTD_dStage stage;
|
|
@@ -117,6 +130,8 @@ struct ZSTD_DCtx_s
|
|
|
117
130
|
XXH64_state_t xxhState;
|
|
118
131
|
size_t headerSize;
|
|
119
132
|
ZSTD_format_e format;
|
|
133
|
+
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
|
134
|
+
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
|
120
135
|
const BYTE* litPtr;
|
|
121
136
|
ZSTD_customMem customMem;
|
|
122
137
|
size_t litSize;
|
|
@@ -130,6 +145,8 @@ struct ZSTD_DCtx_s
|
|
|
130
145
|
U32 dictID;
|
|
131
146
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
|
132
147
|
ZSTD_dictUses_e dictUses;
|
|
148
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
|
149
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
|
133
150
|
|
|
134
151
|
/* streaming */
|
|
135
152
|
ZSTD_dStreamStage streamStage;
|
|
@@ -147,10 +164,24 @@ struct ZSTD_DCtx_s
|
|
|
147
164
|
U32 legacyVersion;
|
|
148
165
|
U32 hostageByte;
|
|
149
166
|
int noForwardProgress;
|
|
167
|
+
ZSTD_bufferMode_e outBufferMode;
|
|
168
|
+
ZSTD_outBuffer expectedOutBuffer;
|
|
150
169
|
|
|
151
170
|
/* workspace */
|
|
152
171
|
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
|
153
172
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
|
173
|
+
|
|
174
|
+
size_t oversizedDuration;
|
|
175
|
+
|
|
176
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
177
|
+
void const* dictContentBeginForFuzzing;
|
|
178
|
+
void const* dictContentEndForFuzzing;
|
|
179
|
+
#endif
|
|
180
|
+
|
|
181
|
+
/* Tracing */
|
|
182
|
+
#if ZSTD_TRACE
|
|
183
|
+
ZSTD_TraceCtx traceCtx;
|
|
184
|
+
#endif
|
|
154
185
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
|
155
186
|
|
|
156
187
|
|
|
@@ -160,7 +191,7 @@ struct ZSTD_DCtx_s
|
|
|
160
191
|
|
|
161
192
|
/*! ZSTD_loadDEntropy() :
|
|
162
193
|
* dict : must point at beginning of a valid zstd dictionary.
|
|
163
|
-
* @return : size of entropy tables
|
|
194
|
+
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
|
164
195
|
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
165
196
|
const void* const dict, size_t const dictSize);
|
|
166
197
|
|
|
@@ -169,7 +200,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
|
169
200
|
* If yes, do nothing (continue on current segment).
|
|
170
201
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
|
171
202
|
* This function cannot fail. */
|
|
172
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
|
203
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
|
173
204
|
|
|
174
205
|
|
|
175
206
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -28,7 +28,7 @@ extern "C" {
|
|
|
28
28
|
* Dependencies
|
|
29
29
|
***************************************/
|
|
30
30
|
#include <stddef.h> /* size_t */
|
|
31
|
-
#include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
|
31
|
+
#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
/* ***************************************************************
|
|
@@ -36,16 +36,17 @@ extern "C" {
|
|
|
36
36
|
*****************************************************************/
|
|
37
37
|
/* Deprecation warnings */
|
|
38
38
|
/* Should these warnings be a problem,
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
* it is generally possible to disable them,
|
|
40
|
+
* typically with -Wno-deprecated-declarations for gcc
|
|
41
|
+
* or _CRT_SECURE_NO_WARNINGS in Visual.
|
|
42
|
+
* Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
|
43
|
+
*/
|
|
43
44
|
#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
|
44
45
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
|
|
45
46
|
#else
|
|
46
47
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
|
47
48
|
# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
|
|
48
|
-
# elif (defined(
|
|
49
|
+
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
|
|
49
50
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
|
|
50
51
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
51
52
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
|
|
@@ -185,7 +186,7 @@ ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(voi
|
|
|
185
186
|
|
|
186
187
|
/*--- Dependency ---*/
|
|
187
188
|
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
|
|
188
|
-
#include "zstd.h"
|
|
189
|
+
#include "../zstd.h"
|
|
189
190
|
|
|
190
191
|
|
|
191
192
|
/*--- Custom memory allocator ---*/
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
/*-*************************************
|
|
12
12
|
* Dependencies
|
|
13
13
|
***************************************/
|
|
14
|
-
#include "error_private.h"
|
|
14
|
+
#include "../common/error_private.h"
|
|
15
15
|
#include "zbuff.h"
|
|
16
16
|
|
|
17
17
|
/*-****************************************
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -26,11 +26,11 @@
|
|
|
26
26
|
#include <string.h> /* memset */
|
|
27
27
|
#include <time.h> /* clock */
|
|
28
28
|
|
|
29
|
-
#include "mem.h" /* read */
|
|
30
|
-
#include "pool.h"
|
|
31
|
-
#include "threading.h"
|
|
29
|
+
#include "../common/mem.h" /* read */
|
|
30
|
+
#include "../common/pool.h"
|
|
31
|
+
#include "../common/threading.h"
|
|
32
32
|
#include "cover.h"
|
|
33
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
33
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
34
34
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
35
35
|
#define ZDICT_STATIC_LINKING_ONLY
|
|
36
36
|
#endif
|
|
@@ -40,33 +40,42 @@
|
|
|
40
40
|
* Constants
|
|
41
41
|
***************************************/
|
|
42
42
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
|
43
|
-
#define
|
|
43
|
+
#define COVER_DEFAULT_SPLITPOINT 1.0
|
|
44
44
|
|
|
45
45
|
/*-*************************************
|
|
46
46
|
* Console display
|
|
47
47
|
***************************************/
|
|
48
|
+
#ifndef LOCALDISPLAYLEVEL
|
|
48
49
|
static int g_displayLevel = 2;
|
|
50
|
+
#endif
|
|
51
|
+
#undef DISPLAY
|
|
49
52
|
#define DISPLAY(...) \
|
|
50
53
|
{ \
|
|
51
54
|
fprintf(stderr, __VA_ARGS__); \
|
|
52
55
|
fflush(stderr); \
|
|
53
56
|
}
|
|
57
|
+
#undef LOCALDISPLAYLEVEL
|
|
54
58
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
|
55
59
|
if (displayLevel >= l) { \
|
|
56
60
|
DISPLAY(__VA_ARGS__); \
|
|
57
61
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
62
|
+
#undef DISPLAYLEVEL
|
|
58
63
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
|
59
64
|
|
|
65
|
+
#ifndef LOCALDISPLAYUPDATE
|
|
66
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
67
|
+
static clock_t g_time = 0;
|
|
68
|
+
#endif
|
|
69
|
+
#undef LOCALDISPLAYUPDATE
|
|
60
70
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
|
61
71
|
if (displayLevel >= l) { \
|
|
62
|
-
if ((clock() - g_time >
|
|
72
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
|
63
73
|
g_time = clock(); \
|
|
64
74
|
DISPLAY(__VA_ARGS__); \
|
|
65
75
|
} \
|
|
66
76
|
}
|
|
77
|
+
#undef DISPLAYUPDATE
|
|
67
78
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
|
68
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
69
|
-
static clock_t g_time = 0;
|
|
70
79
|
|
|
71
80
|
/*-*************************************
|
|
72
81
|
* Hash table
|
|
@@ -120,9 +129,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
|
|
|
120
129
|
/**
|
|
121
130
|
* Internal hash function
|
|
122
131
|
*/
|
|
123
|
-
static const U32
|
|
132
|
+
static const U32 COVER_prime4bytes = 2654435761U;
|
|
124
133
|
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
|
|
125
|
-
return (key *
|
|
134
|
+
return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
|
|
126
135
|
}
|
|
127
136
|
|
|
128
137
|
/**
|
|
@@ -215,7 +224,7 @@ typedef struct {
|
|
|
215
224
|
} COVER_ctx_t;
|
|
216
225
|
|
|
217
226
|
/* We need a global context for qsort... */
|
|
218
|
-
static COVER_ctx_t *
|
|
227
|
+
static COVER_ctx_t *g_coverCtx = NULL;
|
|
219
228
|
|
|
220
229
|
/*-*************************************
|
|
221
230
|
* Helper functions
|
|
@@ -258,11 +267,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
|
258
267
|
|
|
259
268
|
/**
|
|
260
269
|
* Same as COVER_cmp() except ties are broken by pointer value
|
|
261
|
-
* NOTE:
|
|
270
|
+
* NOTE: g_coverCtx must be set to call this function. A global is required because
|
|
262
271
|
* qsort doesn't take an opaque pointer.
|
|
263
272
|
*/
|
|
264
|
-
static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
265
|
-
int result = COVER_cmp(
|
|
273
|
+
static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
|
|
274
|
+
int result = COVER_cmp(g_coverCtx, lp, rp);
|
|
266
275
|
if (result == 0) {
|
|
267
276
|
result = lp < rp ? -1 : 1;
|
|
268
277
|
}
|
|
@@ -271,8 +280,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
|
271
280
|
/**
|
|
272
281
|
* Faster version for d <= 8.
|
|
273
282
|
*/
|
|
274
|
-
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
275
|
-
int result = COVER_cmp8(
|
|
283
|
+
static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
284
|
+
int result = COVER_cmp8(g_coverCtx, lp, rp);
|
|
276
285
|
if (result == 0) {
|
|
277
286
|
result = lp < rp ? -1 : 1;
|
|
278
287
|
}
|
|
@@ -526,10 +535,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
|
526
535
|
* Prepare a context for dictionary building.
|
|
527
536
|
* The context is only dependent on the parameter `d` and can used multiple
|
|
528
537
|
* times.
|
|
529
|
-
* Returns
|
|
538
|
+
* Returns 0 on success or error code on error.
|
|
530
539
|
* The context must be destroyed with `COVER_ctx_destroy()`.
|
|
531
540
|
*/
|
|
532
|
-
static
|
|
541
|
+
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
533
542
|
const size_t *samplesSizes, unsigned nbSamples,
|
|
534
543
|
unsigned d, double splitPoint) {
|
|
535
544
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
|
@@ -544,17 +553,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
544
553
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
|
545
554
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
|
546
555
|
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
|
547
|
-
return
|
|
556
|
+
return ERROR(srcSize_wrong);
|
|
548
557
|
}
|
|
549
558
|
/* Check if there are at least 5 training samples */
|
|
550
559
|
if (nbTrainSamples < 5) {
|
|
551
560
|
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
|
552
|
-
return
|
|
561
|
+
return ERROR(srcSize_wrong);
|
|
553
562
|
}
|
|
554
563
|
/* Check if there's testing sample */
|
|
555
564
|
if (nbTestSamples < 1) {
|
|
556
565
|
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
|
557
|
-
return
|
|
566
|
+
return ERROR(srcSize_wrong);
|
|
558
567
|
}
|
|
559
568
|
/* Zero the context */
|
|
560
569
|
memset(ctx, 0, sizeof(*ctx));
|
|
@@ -577,7 +586,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
577
586
|
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
|
|
578
587
|
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
|
579
588
|
COVER_ctx_destroy(ctx);
|
|
580
|
-
return
|
|
589
|
+
return ERROR(memory_allocation);
|
|
581
590
|
}
|
|
582
591
|
ctx->freqs = NULL;
|
|
583
592
|
ctx->d = d;
|
|
@@ -603,7 +612,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
603
612
|
/* qsort doesn't take an opaque pointer, so pass as a global.
|
|
604
613
|
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
|
|
605
614
|
*/
|
|
606
|
-
|
|
615
|
+
g_coverCtx = ctx;
|
|
607
616
|
#if defined(__OpenBSD__)
|
|
608
617
|
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
|
609
618
|
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
|
@@ -624,7 +633,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
624
633
|
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
|
625
634
|
ctx->freqs = ctx->suffix;
|
|
626
635
|
ctx->suffix = NULL;
|
|
627
|
-
return
|
|
636
|
+
return 0;
|
|
628
637
|
}
|
|
629
638
|
|
|
630
639
|
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
|
@@ -638,8 +647,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
|
|
|
638
647
|
"compared to the source size %u! "
|
|
639
648
|
"size(source)/size(dictionary) = %f, but it should be >= "
|
|
640
649
|
"10! This may lead to a subpar dictionary! We recommend "
|
|
641
|
-
"training on sources at least 10x, and
|
|
642
|
-
"size of the dictionary
|
|
650
|
+
"training on sources at least 10x, and preferably 100x "
|
|
651
|
+
"the size of the dictionary! \n", (U32)maxDictSize,
|
|
643
652
|
(U32)nbDmers, ratio);
|
|
644
653
|
}
|
|
645
654
|
|
|
@@ -729,11 +738,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
|
729
738
|
/* Checks */
|
|
730
739
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
|
731
740
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
|
732
|
-
return ERROR(
|
|
741
|
+
return ERROR(parameter_outOfBound);
|
|
733
742
|
}
|
|
734
743
|
if (nbSamples == 0) {
|
|
735
744
|
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
|
736
|
-
return ERROR(
|
|
745
|
+
return ERROR(srcSize_wrong);
|
|
737
746
|
}
|
|
738
747
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
|
739
748
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
|
@@ -741,15 +750,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
|
741
750
|
return ERROR(dstSize_tooSmall);
|
|
742
751
|
}
|
|
743
752
|
/* Initialize context and activeDmers */
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
753
|
+
{
|
|
754
|
+
size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
|
755
|
+
parameters.d, parameters.splitPoint);
|
|
756
|
+
if (ZSTD_isError(initVal)) {
|
|
757
|
+
return initVal;
|
|
758
|
+
}
|
|
747
759
|
}
|
|
748
760
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
|
|
749
761
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
|
750
762
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
|
751
763
|
COVER_ctx_destroy(&ctx);
|
|
752
|
-
return ERROR(
|
|
764
|
+
return ERROR(memory_allocation);
|
|
753
765
|
}
|
|
754
766
|
|
|
755
767
|
DISPLAYLEVEL(2, "Building dictionary\n");
|
|
@@ -810,7 +822,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
|
|
810
822
|
cctx, dst, dstCapacity, samples + offsets[i],
|
|
811
823
|
samplesSizes[i], cdict);
|
|
812
824
|
if (ZSTD_isError(size)) {
|
|
813
|
-
totalCompressedSize =
|
|
825
|
+
totalCompressedSize = size;
|
|
814
826
|
goto _compressCleanup;
|
|
815
827
|
}
|
|
816
828
|
totalCompressedSize += size;
|
|
@@ -886,9 +898,11 @@ void COVER_best_start(COVER_best_t *best) {
|
|
|
886
898
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
|
887
899
|
* If this dictionary is the best so far save it and its parameters.
|
|
888
900
|
*/
|
|
889
|
-
void COVER_best_finish(COVER_best_t *best,
|
|
890
|
-
|
|
891
|
-
|
|
901
|
+
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
902
|
+
COVER_dictSelection_t selection) {
|
|
903
|
+
void* dict = selection.dictContent;
|
|
904
|
+
size_t compressedSize = selection.totalCompressedSize;
|
|
905
|
+
size_t dictSize = selection.dictSize;
|
|
892
906
|
if (!best) {
|
|
893
907
|
return;
|
|
894
908
|
}
|
|
@@ -914,10 +928,12 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
|
914
928
|
}
|
|
915
929
|
}
|
|
916
930
|
/* Save the dictionary, parameters, and size */
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
931
|
+
if (dict) {
|
|
932
|
+
memcpy(best->dict, dict, dictSize);
|
|
933
|
+
best->dictSize = dictSize;
|
|
934
|
+
best->parameters = parameters;
|
|
935
|
+
best->compressedSize = compressedSize;
|
|
936
|
+
}
|
|
921
937
|
}
|
|
922
938
|
if (liveJobs == 0) {
|
|
923
939
|
ZSTD_pthread_cond_broadcast(&best->cond);
|
|
@@ -926,6 +942,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
|
926
942
|
}
|
|
927
943
|
}
|
|
928
944
|
|
|
945
|
+
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
|
946
|
+
COVER_dictSelection_t selection = { NULL, 0, error };
|
|
947
|
+
return selection;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
|
951
|
+
return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
|
955
|
+
free(selection.dictContent);
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
|
959
|
+
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
|
960
|
+
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
|
961
|
+
|
|
962
|
+
size_t largestDict = 0;
|
|
963
|
+
size_t largestCompressed = 0;
|
|
964
|
+
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
|
965
|
+
|
|
966
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
|
|
967
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
|
|
968
|
+
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
|
969
|
+
|
|
970
|
+
if (!largestDictbuffer || !candidateDictBuffer) {
|
|
971
|
+
free(largestDictbuffer);
|
|
972
|
+
free(candidateDictBuffer);
|
|
973
|
+
return COVER_dictSelectionError(dictContentSize);
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
/* Initial dictionary size and compressed size */
|
|
977
|
+
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
|
978
|
+
dictContentSize = ZDICT_finalizeDictionary(
|
|
979
|
+
largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
|
|
980
|
+
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
|
981
|
+
|
|
982
|
+
if (ZDICT_isError(dictContentSize)) {
|
|
983
|
+
free(largestDictbuffer);
|
|
984
|
+
free(candidateDictBuffer);
|
|
985
|
+
return COVER_dictSelectionError(dictContentSize);
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
|
989
|
+
samplesBuffer, offsets,
|
|
990
|
+
nbCheckSamples, nbSamples,
|
|
991
|
+
largestDictbuffer, dictContentSize);
|
|
992
|
+
|
|
993
|
+
if (ZSTD_isError(totalCompressedSize)) {
|
|
994
|
+
free(largestDictbuffer);
|
|
995
|
+
free(candidateDictBuffer);
|
|
996
|
+
return COVER_dictSelectionError(totalCompressedSize);
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
if (params.shrinkDict == 0) {
|
|
1000
|
+
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
|
1001
|
+
free(candidateDictBuffer);
|
|
1002
|
+
return selection;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
largestDict = dictContentSize;
|
|
1006
|
+
largestCompressed = totalCompressedSize;
|
|
1007
|
+
dictContentSize = ZDICT_DICTSIZE_MIN;
|
|
1008
|
+
|
|
1009
|
+
/* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
|
|
1010
|
+
while (dictContentSize < largestDict) {
|
|
1011
|
+
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
|
1012
|
+
dictContentSize = ZDICT_finalizeDictionary(
|
|
1013
|
+
candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
|
|
1014
|
+
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
|
1015
|
+
|
|
1016
|
+
if (ZDICT_isError(dictContentSize)) {
|
|
1017
|
+
free(largestDictbuffer);
|
|
1018
|
+
free(candidateDictBuffer);
|
|
1019
|
+
return COVER_dictSelectionError(dictContentSize);
|
|
1020
|
+
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
|
1024
|
+
samplesBuffer, offsets,
|
|
1025
|
+
nbCheckSamples, nbSamples,
|
|
1026
|
+
candidateDictBuffer, dictContentSize);
|
|
1027
|
+
|
|
1028
|
+
if (ZSTD_isError(totalCompressedSize)) {
|
|
1029
|
+
free(largestDictbuffer);
|
|
1030
|
+
free(candidateDictBuffer);
|
|
1031
|
+
return COVER_dictSelectionError(totalCompressedSize);
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
|
1035
|
+
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
|
1036
|
+
free(largestDictbuffer);
|
|
1037
|
+
return selection;
|
|
1038
|
+
}
|
|
1039
|
+
dictContentSize *= 2;
|
|
1040
|
+
}
|
|
1041
|
+
dictContentSize = largestDict;
|
|
1042
|
+
totalCompressedSize = largestCompressed;
|
|
1043
|
+
{
|
|
1044
|
+
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
|
1045
|
+
free(candidateDictBuffer);
|
|
1046
|
+
return selection;
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
|
|
929
1050
|
/**
|
|
930
1051
|
* Parameters for COVER_tryParameters().
|
|
931
1052
|
*/
|
|
@@ -941,17 +1062,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
|
941
1062
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
|
942
1063
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
|
943
1064
|
*/
|
|
944
|
-
static void COVER_tryParameters(void *opaque)
|
|
1065
|
+
static void COVER_tryParameters(void *opaque)
|
|
1066
|
+
{
|
|
945
1067
|
/* Save parameters as local variables */
|
|
946
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
|
1068
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
|
947
1069
|
const COVER_ctx_t *const ctx = data->ctx;
|
|
948
1070
|
const ZDICT_cover_params_t parameters = data->parameters;
|
|
949
1071
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
950
1072
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
951
1073
|
/* Allocate space for hash table, dict, and freqs */
|
|
952
1074
|
COVER_map_t activeDmers;
|
|
953
|
-
BYTE
|
|
954
|
-
|
|
1075
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
|
1076
|
+
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
1077
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
|
955
1078
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
|
956
1079
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
|
957
1080
|
goto _cleanup;
|
|
@@ -966,42 +1089,33 @@ static void COVER_tryParameters(void *opaque) {
|
|
|
966
1089
|
{
|
|
967
1090
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
|
968
1091
|
dictBufferCapacity, parameters);
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
if (
|
|
974
|
-
DISPLAYLEVEL(1, "Failed to
|
|
1092
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
|
1093
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
|
1094
|
+
totalCompressedSize);
|
|
1095
|
+
|
|
1096
|
+
if (COVER_dictSelectionIsError(selection)) {
|
|
1097
|
+
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
|
975
1098
|
goto _cleanup;
|
|
976
1099
|
}
|
|
977
1100
|
}
|
|
978
|
-
/* Check total compressed size */
|
|
979
|
-
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
|
980
|
-
ctx->samples, ctx->offsets,
|
|
981
|
-
ctx->nbTrainSamples, ctx->nbSamples,
|
|
982
|
-
dict, dictBufferCapacity);
|
|
983
|
-
|
|
984
1101
|
_cleanup:
|
|
985
|
-
|
|
986
|
-
|
|
1102
|
+
free(dict);
|
|
1103
|
+
COVER_best_finish(data->best, parameters, selection);
|
|
987
1104
|
free(data);
|
|
988
1105
|
COVER_map_destroy(&activeDmers);
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
}
|
|
992
|
-
if (freqs) {
|
|
993
|
-
free(freqs);
|
|
994
|
-
}
|
|
1106
|
+
COVER_dictSelectionFree(selection);
|
|
1107
|
+
free(freqs);
|
|
995
1108
|
}
|
|
996
1109
|
|
|
997
1110
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
998
|
-
void
|
|
999
|
-
const size_t
|
|
1000
|
-
ZDICT_cover_params_t
|
|
1111
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
|
1112
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
|
1113
|
+
ZDICT_cover_params_t* parameters)
|
|
1114
|
+
{
|
|
1001
1115
|
/* constants */
|
|
1002
1116
|
const unsigned nbThreads = parameters->nbThreads;
|
|
1003
1117
|
const double splitPoint =
|
|
1004
|
-
parameters->splitPoint <= 0.0 ?
|
|
1118
|
+
parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
|
1005
1119
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
|
1006
1120
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
|
1007
1121
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
|
@@ -1010,6 +1124,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1010
1124
|
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
|
1011
1125
|
const unsigned kIterations =
|
|
1012
1126
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
|
1127
|
+
const unsigned shrinkDict = 0;
|
|
1013
1128
|
/* Local variables */
|
|
1014
1129
|
const int displayLevel = parameters->zParams.notificationLevel;
|
|
1015
1130
|
unsigned iteration = 1;
|
|
@@ -1022,15 +1137,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1022
1137
|
/* Checks */
|
|
1023
1138
|
if (splitPoint <= 0 || splitPoint > 1) {
|
|
1024
1139
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
|
1025
|
-
return ERROR(
|
|
1140
|
+
return ERROR(parameter_outOfBound);
|
|
1026
1141
|
}
|
|
1027
1142
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
|
1028
1143
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
|
1029
|
-
return ERROR(
|
|
1144
|
+
return ERROR(parameter_outOfBound);
|
|
1030
1145
|
}
|
|
1031
1146
|
if (nbSamples == 0) {
|
|
1032
1147
|
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
|
1033
|
-
return ERROR(
|
|
1148
|
+
return ERROR(srcSize_wrong);
|
|
1034
1149
|
}
|
|
1035
1150
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
|
1036
1151
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
|
@@ -1054,11 +1169,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1054
1169
|
/* Initialize the context for this value of d */
|
|
1055
1170
|
COVER_ctx_t ctx;
|
|
1056
1171
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1172
|
+
{
|
|
1173
|
+
const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
|
|
1174
|
+
if (ZSTD_isError(initVal)) {
|
|
1175
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
|
1176
|
+
COVER_best_destroy(&best);
|
|
1177
|
+
POOL_free(pool);
|
|
1178
|
+
return initVal;
|
|
1179
|
+
}
|
|
1062
1180
|
}
|
|
1063
1181
|
if (!warned) {
|
|
1064
1182
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
|
|
@@ -1075,7 +1193,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1075
1193
|
COVER_best_destroy(&best);
|
|
1076
1194
|
COVER_ctx_destroy(&ctx);
|
|
1077
1195
|
POOL_free(pool);
|
|
1078
|
-
return ERROR(
|
|
1196
|
+
return ERROR(memory_allocation);
|
|
1079
1197
|
}
|
|
1080
1198
|
data->ctx = &ctx;
|
|
1081
1199
|
data->best = &best;
|
|
@@ -1085,6 +1203,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1085
1203
|
data->parameters.d = d;
|
|
1086
1204
|
data->parameters.splitPoint = splitPoint;
|
|
1087
1205
|
data->parameters.steps = kSteps;
|
|
1206
|
+
data->parameters.shrinkDict = shrinkDict;
|
|
1088
1207
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
|
1089
1208
|
/* Check the parameters */
|
|
1090
1209
|
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|