zstd-ruby 1.4.0.0 → 1.4.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +274 -107
- data/ext/zstdruby/libzstd/README.md +75 -16
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +154 -5
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +7 -3
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
- data/ext/zstdruby/libzstd/common/huf.h +41 -38
- data/ext/zstdruby/libzstd/common/mem.h +68 -22
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/zstd.h +655 -118
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +20 -10
- data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,9 +15,9 @@
|
|
15
15
|
/*-*******************************************************
|
16
16
|
* Dependencies
|
17
17
|
*********************************************************/
|
18
|
-
#include
|
19
|
-
#include "zstd.h" /* DCtx, and some public functions */
|
20
|
-
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
18
|
+
#include "../common/zstd_deps.h" /* size_t */
|
19
|
+
#include "../zstd.h" /* DCtx, and some public functions */
|
20
|
+
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
|
21
21
|
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
22
22
|
|
23
23
|
|
@@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
48
48
|
* this function must be called with valid parameters only
|
49
49
|
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
50
50
|
* in which case it cannot fail.
|
51
|
+
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
|
52
|
+
* defined in zstd_decompress_internal.h.
|
51
53
|
* Internal use only.
|
52
54
|
*/
|
53
55
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
54
56
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
55
57
|
const U32* baseValue, const U32* nbAdditionalBits,
|
56
|
-
unsigned tableLog
|
58
|
+
unsigned tableLog, void* wksp, size_t wkspSize,
|
59
|
+
int bmi2);
|
57
60
|
|
58
61
|
|
59
62
|
#endif /* ZSTD_DEC_BLOCK_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -19,34 +19,35 @@
|
|
19
19
|
/*-*******************************************************
|
20
20
|
* Dependencies
|
21
21
|
*********************************************************/
|
22
|
-
#include "mem.h" /* BYTE, U16, U32 */
|
23
|
-
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
22
|
+
#include "../common/mem.h" /* BYTE, U16, U32 */
|
23
|
+
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
24
|
+
#include "../common/zstd_trace.h" /* ZSTD_TraceCtx */
|
24
25
|
|
25
26
|
|
26
27
|
|
27
28
|
/*-*******************************************************
|
28
29
|
* Constants
|
29
30
|
*********************************************************/
|
30
|
-
static const U32 LL_base[MaxLL+1] = {
|
31
|
+
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
31
32
|
0, 1, 2, 3, 4, 5, 6, 7,
|
32
33
|
8, 9, 10, 11, 12, 13, 14, 15,
|
33
34
|
16, 18, 20, 22, 24, 28, 32, 40,
|
34
35
|
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
35
36
|
0x2000, 0x4000, 0x8000, 0x10000 };
|
36
37
|
|
37
|
-
static const U32 OF_base[MaxOff+1] = {
|
38
|
+
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
38
39
|
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
39
40
|
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
40
41
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
41
42
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
42
43
|
|
43
|
-
static const U32 OF_bits[MaxOff+1] = {
|
44
|
+
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
44
45
|
0, 1, 2, 3, 4, 5, 6, 7,
|
45
46
|
8, 9, 10, 11, 12, 13, 14, 15,
|
46
47
|
16, 17, 18, 19, 20, 21, 22, 23,
|
47
48
|
24, 25, 26, 27, 28, 29, 30, 31 };
|
48
49
|
|
49
|
-
static const U32 ML_base[MaxML+1] = {
|
50
|
+
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
50
51
|
3, 4, 5, 6, 7, 8, 9, 10,
|
51
52
|
11, 12, 13, 14, 15, 16, 17, 18,
|
52
53
|
19, 20, 21, 22, 23, 24, 25, 26,
|
@@ -73,12 +74,16 @@ static const U32 ML_base[MaxML+1] = {
|
|
73
74
|
|
74
75
|
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
75
76
|
|
77
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
78
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
79
|
+
|
76
80
|
typedef struct {
|
77
81
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
78
82
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
79
83
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
80
84
|
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
81
85
|
U32 rep[ZSTD_REP_NUM];
|
86
|
+
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
82
87
|
} ZSTD_entropyDTables_t;
|
83
88
|
|
84
89
|
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
@@ -95,6 +100,13 @@ typedef enum {
|
|
95
100
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
96
101
|
} ZSTD_dictUses_e;
|
97
102
|
|
103
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
104
|
+
typedef struct {
|
105
|
+
const ZSTD_DDict** ddictPtrTable;
|
106
|
+
size_t ddictPtrTableSize;
|
107
|
+
size_t ddictPtrCount;
|
108
|
+
} ZSTD_DDictHashSet;
|
109
|
+
|
98
110
|
struct ZSTD_DCtx_s
|
99
111
|
{
|
100
112
|
const ZSTD_seqSymbol* LLTptr;
|
@@ -109,6 +121,7 @@ struct ZSTD_DCtx_s
|
|
109
121
|
const void* dictEnd; /* end of previous segment */
|
110
122
|
size_t expected;
|
111
123
|
ZSTD_frameHeader fParams;
|
124
|
+
U64 processedCSize;
|
112
125
|
U64 decodedSize;
|
113
126
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
114
127
|
ZSTD_dStage stage;
|
@@ -117,6 +130,8 @@ struct ZSTD_DCtx_s
|
|
117
130
|
XXH64_state_t xxhState;
|
118
131
|
size_t headerSize;
|
119
132
|
ZSTD_format_e format;
|
133
|
+
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
134
|
+
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
120
135
|
const BYTE* litPtr;
|
121
136
|
ZSTD_customMem customMem;
|
122
137
|
size_t litSize;
|
@@ -130,6 +145,8 @@ struct ZSTD_DCtx_s
|
|
130
145
|
U32 dictID;
|
131
146
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
132
147
|
ZSTD_dictUses_e dictUses;
|
148
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
149
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
133
150
|
|
134
151
|
/* streaming */
|
135
152
|
ZSTD_dStreamStage streamStage;
|
@@ -147,10 +164,24 @@ struct ZSTD_DCtx_s
|
|
147
164
|
U32 legacyVersion;
|
148
165
|
U32 hostageByte;
|
149
166
|
int noForwardProgress;
|
167
|
+
ZSTD_bufferMode_e outBufferMode;
|
168
|
+
ZSTD_outBuffer expectedOutBuffer;
|
150
169
|
|
151
170
|
/* workspace */
|
152
171
|
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
153
172
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
173
|
+
|
174
|
+
size_t oversizedDuration;
|
175
|
+
|
176
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
177
|
+
void const* dictContentBeginForFuzzing;
|
178
|
+
void const* dictContentEndForFuzzing;
|
179
|
+
#endif
|
180
|
+
|
181
|
+
/* Tracing */
|
182
|
+
#if ZSTD_TRACE
|
183
|
+
ZSTD_TraceCtx traceCtx;
|
184
|
+
#endif
|
154
185
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
155
186
|
|
156
187
|
|
@@ -160,7 +191,7 @@ struct ZSTD_DCtx_s
|
|
160
191
|
|
161
192
|
/*! ZSTD_loadDEntropy() :
|
162
193
|
* dict : must point at beginning of a valid zstd dictionary.
|
163
|
-
* @return : size of entropy tables
|
194
|
+
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
164
195
|
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
165
196
|
const void* const dict, size_t const dictSize);
|
166
197
|
|
@@ -169,7 +200,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
169
200
|
* If yes, do nothing (continue on current segment).
|
170
201
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
171
202
|
* This function cannot fail. */
|
172
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
203
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
173
204
|
|
174
205
|
|
175
206
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -28,7 +28,7 @@ extern "C" {
|
|
28
28
|
* Dependencies
|
29
29
|
***************************************/
|
30
30
|
#include <stddef.h> /* size_t */
|
31
|
-
#include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
31
|
+
#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
32
32
|
|
33
33
|
|
34
34
|
/* ***************************************************************
|
@@ -36,16 +36,17 @@ extern "C" {
|
|
36
36
|
*****************************************************************/
|
37
37
|
/* Deprecation warnings */
|
38
38
|
/* Should these warnings be a problem,
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
* it is generally possible to disable them,
|
40
|
+
* typically with -Wno-deprecated-declarations for gcc
|
41
|
+
* or _CRT_SECURE_NO_WARNINGS in Visual.
|
42
|
+
* Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
43
|
+
*/
|
43
44
|
#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
44
45
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
|
45
46
|
#else
|
46
47
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
47
48
|
# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
|
48
|
-
# elif (defined(
|
49
|
+
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
|
49
50
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
|
50
51
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
51
52
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
|
@@ -185,7 +186,7 @@ ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(voi
|
|
185
186
|
|
186
187
|
/*--- Dependency ---*/
|
187
188
|
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
|
188
|
-
#include "zstd.h"
|
189
|
+
#include "../zstd.h"
|
189
190
|
|
190
191
|
|
191
192
|
/*--- Custom memory allocator ---*/
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -11,7 +11,7 @@
|
|
11
11
|
/*-*************************************
|
12
12
|
* Dependencies
|
13
13
|
***************************************/
|
14
|
-
#include "error_private.h"
|
14
|
+
#include "../common/error_private.h"
|
15
15
|
#include "zbuff.h"
|
16
16
|
|
17
17
|
/*-****************************************
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -26,11 +26,11 @@
|
|
26
26
|
#include <string.h> /* memset */
|
27
27
|
#include <time.h> /* clock */
|
28
28
|
|
29
|
-
#include "mem.h" /* read */
|
30
|
-
#include "pool.h"
|
31
|
-
#include "threading.h"
|
29
|
+
#include "../common/mem.h" /* read */
|
30
|
+
#include "../common/pool.h"
|
31
|
+
#include "../common/threading.h"
|
32
32
|
#include "cover.h"
|
33
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
33
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
34
34
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
35
35
|
#define ZDICT_STATIC_LINKING_ONLY
|
36
36
|
#endif
|
@@ -40,33 +40,42 @@
|
|
40
40
|
* Constants
|
41
41
|
***************************************/
|
42
42
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
43
|
-
#define
|
43
|
+
#define COVER_DEFAULT_SPLITPOINT 1.0
|
44
44
|
|
45
45
|
/*-*************************************
|
46
46
|
* Console display
|
47
47
|
***************************************/
|
48
|
+
#ifndef LOCALDISPLAYLEVEL
|
48
49
|
static int g_displayLevel = 2;
|
50
|
+
#endif
|
51
|
+
#undef DISPLAY
|
49
52
|
#define DISPLAY(...) \
|
50
53
|
{ \
|
51
54
|
fprintf(stderr, __VA_ARGS__); \
|
52
55
|
fflush(stderr); \
|
53
56
|
}
|
57
|
+
#undef LOCALDISPLAYLEVEL
|
54
58
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
55
59
|
if (displayLevel >= l) { \
|
56
60
|
DISPLAY(__VA_ARGS__); \
|
57
61
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
62
|
+
#undef DISPLAYLEVEL
|
58
63
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
59
64
|
|
65
|
+
#ifndef LOCALDISPLAYUPDATE
|
66
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
67
|
+
static clock_t g_time = 0;
|
68
|
+
#endif
|
69
|
+
#undef LOCALDISPLAYUPDATE
|
60
70
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
61
71
|
if (displayLevel >= l) { \
|
62
|
-
if ((clock() - g_time >
|
72
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
63
73
|
g_time = clock(); \
|
64
74
|
DISPLAY(__VA_ARGS__); \
|
65
75
|
} \
|
66
76
|
}
|
77
|
+
#undef DISPLAYUPDATE
|
67
78
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
68
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
69
|
-
static clock_t g_time = 0;
|
70
79
|
|
71
80
|
/*-*************************************
|
72
81
|
* Hash table
|
@@ -120,9 +129,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
|
|
120
129
|
/**
|
121
130
|
* Internal hash function
|
122
131
|
*/
|
123
|
-
static const U32
|
132
|
+
static const U32 COVER_prime4bytes = 2654435761U;
|
124
133
|
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
|
125
|
-
return (key *
|
134
|
+
return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
|
126
135
|
}
|
127
136
|
|
128
137
|
/**
|
@@ -215,7 +224,7 @@ typedef struct {
|
|
215
224
|
} COVER_ctx_t;
|
216
225
|
|
217
226
|
/* We need a global context for qsort... */
|
218
|
-
static COVER_ctx_t *
|
227
|
+
static COVER_ctx_t *g_coverCtx = NULL;
|
219
228
|
|
220
229
|
/*-*************************************
|
221
230
|
* Helper functions
|
@@ -258,11 +267,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
258
267
|
|
259
268
|
/**
|
260
269
|
* Same as COVER_cmp() except ties are broken by pointer value
|
261
|
-
* NOTE:
|
270
|
+
* NOTE: g_coverCtx must be set to call this function. A global is required because
|
262
271
|
* qsort doesn't take an opaque pointer.
|
263
272
|
*/
|
264
|
-
static int COVER_strict_cmp(const void *lp, const void *rp) {
|
265
|
-
int result = COVER_cmp(
|
273
|
+
static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
|
274
|
+
int result = COVER_cmp(g_coverCtx, lp, rp);
|
266
275
|
if (result == 0) {
|
267
276
|
result = lp < rp ? -1 : 1;
|
268
277
|
}
|
@@ -271,8 +280,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
271
280
|
/**
|
272
281
|
* Faster version for d <= 8.
|
273
282
|
*/
|
274
|
-
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
275
|
-
int result = COVER_cmp8(
|
283
|
+
static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
284
|
+
int result = COVER_cmp8(g_coverCtx, lp, rp);
|
276
285
|
if (result == 0) {
|
277
286
|
result = lp < rp ? -1 : 1;
|
278
287
|
}
|
@@ -526,10 +535,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
526
535
|
* Prepare a context for dictionary building.
|
527
536
|
* The context is only dependent on the parameter `d` and can used multiple
|
528
537
|
* times.
|
529
|
-
* Returns
|
538
|
+
* Returns 0 on success or error code on error.
|
530
539
|
* The context must be destroyed with `COVER_ctx_destroy()`.
|
531
540
|
*/
|
532
|
-
static
|
541
|
+
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
533
542
|
const size_t *samplesSizes, unsigned nbSamples,
|
534
543
|
unsigned d, double splitPoint) {
|
535
544
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
@@ -544,17 +553,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
544
553
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
545
554
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
546
555
|
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
547
|
-
return
|
556
|
+
return ERROR(srcSize_wrong);
|
548
557
|
}
|
549
558
|
/* Check if there are at least 5 training samples */
|
550
559
|
if (nbTrainSamples < 5) {
|
551
560
|
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
552
|
-
return
|
561
|
+
return ERROR(srcSize_wrong);
|
553
562
|
}
|
554
563
|
/* Check if there's testing sample */
|
555
564
|
if (nbTestSamples < 1) {
|
556
565
|
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
557
|
-
return
|
566
|
+
return ERROR(srcSize_wrong);
|
558
567
|
}
|
559
568
|
/* Zero the context */
|
560
569
|
memset(ctx, 0, sizeof(*ctx));
|
@@ -577,7 +586,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
577
586
|
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
|
578
587
|
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
579
588
|
COVER_ctx_destroy(ctx);
|
580
|
-
return
|
589
|
+
return ERROR(memory_allocation);
|
581
590
|
}
|
582
591
|
ctx->freqs = NULL;
|
583
592
|
ctx->d = d;
|
@@ -603,7 +612,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
603
612
|
/* qsort doesn't take an opaque pointer, so pass as a global.
|
604
613
|
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
|
605
614
|
*/
|
606
|
-
|
615
|
+
g_coverCtx = ctx;
|
607
616
|
#if defined(__OpenBSD__)
|
608
617
|
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
609
618
|
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
@@ -624,7 +633,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
624
633
|
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
625
634
|
ctx->freqs = ctx->suffix;
|
626
635
|
ctx->suffix = NULL;
|
627
|
-
return
|
636
|
+
return 0;
|
628
637
|
}
|
629
638
|
|
630
639
|
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
@@ -638,8 +647,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
|
|
638
647
|
"compared to the source size %u! "
|
639
648
|
"size(source)/size(dictionary) = %f, but it should be >= "
|
640
649
|
"10! This may lead to a subpar dictionary! We recommend "
|
641
|
-
"training on sources at least 10x, and
|
642
|
-
"size of the dictionary
|
650
|
+
"training on sources at least 10x, and preferably 100x "
|
651
|
+
"the size of the dictionary! \n", (U32)maxDictSize,
|
643
652
|
(U32)nbDmers, ratio);
|
644
653
|
}
|
645
654
|
|
@@ -729,11 +738,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
729
738
|
/* Checks */
|
730
739
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
731
740
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
732
|
-
return ERROR(
|
741
|
+
return ERROR(parameter_outOfBound);
|
733
742
|
}
|
734
743
|
if (nbSamples == 0) {
|
735
744
|
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
736
|
-
return ERROR(
|
745
|
+
return ERROR(srcSize_wrong);
|
737
746
|
}
|
738
747
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
739
748
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
@@ -741,15 +750,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
741
750
|
return ERROR(dstSize_tooSmall);
|
742
751
|
}
|
743
752
|
/* Initialize context and activeDmers */
|
744
|
-
|
745
|
-
|
746
|
-
|
753
|
+
{
|
754
|
+
size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
755
|
+
parameters.d, parameters.splitPoint);
|
756
|
+
if (ZSTD_isError(initVal)) {
|
757
|
+
return initVal;
|
758
|
+
}
|
747
759
|
}
|
748
760
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
|
749
761
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
750
762
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
751
763
|
COVER_ctx_destroy(&ctx);
|
752
|
-
return ERROR(
|
764
|
+
return ERROR(memory_allocation);
|
753
765
|
}
|
754
766
|
|
755
767
|
DISPLAYLEVEL(2, "Building dictionary\n");
|
@@ -810,7 +822,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
|
810
822
|
cctx, dst, dstCapacity, samples + offsets[i],
|
811
823
|
samplesSizes[i], cdict);
|
812
824
|
if (ZSTD_isError(size)) {
|
813
|
-
totalCompressedSize =
|
825
|
+
totalCompressedSize = size;
|
814
826
|
goto _compressCleanup;
|
815
827
|
}
|
816
828
|
totalCompressedSize += size;
|
@@ -886,9 +898,11 @@ void COVER_best_start(COVER_best_t *best) {
|
|
886
898
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
887
899
|
* If this dictionary is the best so far save it and its parameters.
|
888
900
|
*/
|
889
|
-
void COVER_best_finish(COVER_best_t *best,
|
890
|
-
|
891
|
-
|
901
|
+
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
902
|
+
COVER_dictSelection_t selection) {
|
903
|
+
void* dict = selection.dictContent;
|
904
|
+
size_t compressedSize = selection.totalCompressedSize;
|
905
|
+
size_t dictSize = selection.dictSize;
|
892
906
|
if (!best) {
|
893
907
|
return;
|
894
908
|
}
|
@@ -914,10 +928,12 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
914
928
|
}
|
915
929
|
}
|
916
930
|
/* Save the dictionary, parameters, and size */
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
931
|
+
if (dict) {
|
932
|
+
memcpy(best->dict, dict, dictSize);
|
933
|
+
best->dictSize = dictSize;
|
934
|
+
best->parameters = parameters;
|
935
|
+
best->compressedSize = compressedSize;
|
936
|
+
}
|
921
937
|
}
|
922
938
|
if (liveJobs == 0) {
|
923
939
|
ZSTD_pthread_cond_broadcast(&best->cond);
|
@@ -926,6 +942,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
926
942
|
}
|
927
943
|
}
|
928
944
|
|
945
|
+
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
946
|
+
COVER_dictSelection_t selection = { NULL, 0, error };
|
947
|
+
return selection;
|
948
|
+
}
|
949
|
+
|
950
|
+
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
951
|
+
return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
|
952
|
+
}
|
953
|
+
|
954
|
+
void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
955
|
+
free(selection.dictContent);
|
956
|
+
}
|
957
|
+
|
958
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
959
|
+
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
960
|
+
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
961
|
+
|
962
|
+
size_t largestDict = 0;
|
963
|
+
size_t largestCompressed = 0;
|
964
|
+
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
965
|
+
|
966
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
|
967
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
|
968
|
+
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
969
|
+
|
970
|
+
if (!largestDictbuffer || !candidateDictBuffer) {
|
971
|
+
free(largestDictbuffer);
|
972
|
+
free(candidateDictBuffer);
|
973
|
+
return COVER_dictSelectionError(dictContentSize);
|
974
|
+
}
|
975
|
+
|
976
|
+
/* Initial dictionary size and compressed size */
|
977
|
+
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
978
|
+
dictContentSize = ZDICT_finalizeDictionary(
|
979
|
+
largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
|
980
|
+
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
981
|
+
|
982
|
+
if (ZDICT_isError(dictContentSize)) {
|
983
|
+
free(largestDictbuffer);
|
984
|
+
free(candidateDictBuffer);
|
985
|
+
return COVER_dictSelectionError(dictContentSize);
|
986
|
+
}
|
987
|
+
|
988
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
989
|
+
samplesBuffer, offsets,
|
990
|
+
nbCheckSamples, nbSamples,
|
991
|
+
largestDictbuffer, dictContentSize);
|
992
|
+
|
993
|
+
if (ZSTD_isError(totalCompressedSize)) {
|
994
|
+
free(largestDictbuffer);
|
995
|
+
free(candidateDictBuffer);
|
996
|
+
return COVER_dictSelectionError(totalCompressedSize);
|
997
|
+
}
|
998
|
+
|
999
|
+
if (params.shrinkDict == 0) {
|
1000
|
+
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
1001
|
+
free(candidateDictBuffer);
|
1002
|
+
return selection;
|
1003
|
+
}
|
1004
|
+
|
1005
|
+
largestDict = dictContentSize;
|
1006
|
+
largestCompressed = totalCompressedSize;
|
1007
|
+
dictContentSize = ZDICT_DICTSIZE_MIN;
|
1008
|
+
|
1009
|
+
/* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
|
1010
|
+
while (dictContentSize < largestDict) {
|
1011
|
+
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
1012
|
+
dictContentSize = ZDICT_finalizeDictionary(
|
1013
|
+
candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
|
1014
|
+
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
1015
|
+
|
1016
|
+
if (ZDICT_isError(dictContentSize)) {
|
1017
|
+
free(largestDictbuffer);
|
1018
|
+
free(candidateDictBuffer);
|
1019
|
+
return COVER_dictSelectionError(dictContentSize);
|
1020
|
+
|
1021
|
+
}
|
1022
|
+
|
1023
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
1024
|
+
samplesBuffer, offsets,
|
1025
|
+
nbCheckSamples, nbSamples,
|
1026
|
+
candidateDictBuffer, dictContentSize);
|
1027
|
+
|
1028
|
+
if (ZSTD_isError(totalCompressedSize)) {
|
1029
|
+
free(largestDictbuffer);
|
1030
|
+
free(candidateDictBuffer);
|
1031
|
+
return COVER_dictSelectionError(totalCompressedSize);
|
1032
|
+
}
|
1033
|
+
|
1034
|
+
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
1035
|
+
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
1036
|
+
free(largestDictbuffer);
|
1037
|
+
return selection;
|
1038
|
+
}
|
1039
|
+
dictContentSize *= 2;
|
1040
|
+
}
|
1041
|
+
dictContentSize = largestDict;
|
1042
|
+
totalCompressedSize = largestCompressed;
|
1043
|
+
{
|
1044
|
+
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
1045
|
+
free(candidateDictBuffer);
|
1046
|
+
return selection;
|
1047
|
+
}
|
1048
|
+
}
|
1049
|
+
|
929
1050
|
/**
|
930
1051
|
* Parameters for COVER_tryParameters().
|
931
1052
|
*/
|
@@ -941,17 +1062,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
941
1062
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
942
1063
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
943
1064
|
*/
|
944
|
-
static void COVER_tryParameters(void *opaque)
|
1065
|
+
static void COVER_tryParameters(void *opaque)
|
1066
|
+
{
|
945
1067
|
/* Save parameters as local variables */
|
946
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
1068
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
947
1069
|
const COVER_ctx_t *const ctx = data->ctx;
|
948
1070
|
const ZDICT_cover_params_t parameters = data->parameters;
|
949
1071
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
950
1072
|
size_t totalCompressedSize = ERROR(GENERIC);
|
951
1073
|
/* Allocate space for hash table, dict, and freqs */
|
952
1074
|
COVER_map_t activeDmers;
|
953
|
-
BYTE
|
954
|
-
|
1075
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
1076
|
+
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
1077
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
955
1078
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
956
1079
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
957
1080
|
goto _cleanup;
|
@@ -966,42 +1089,33 @@ static void COVER_tryParameters(void *opaque) {
|
|
966
1089
|
{
|
967
1090
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
968
1091
|
dictBufferCapacity, parameters);
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
if (
|
974
|
-
DISPLAYLEVEL(1, "Failed to
|
1092
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
1093
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
1094
|
+
totalCompressedSize);
|
1095
|
+
|
1096
|
+
if (COVER_dictSelectionIsError(selection)) {
|
1097
|
+
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
975
1098
|
goto _cleanup;
|
976
1099
|
}
|
977
1100
|
}
|
978
|
-
/* Check total compressed size */
|
979
|
-
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
980
|
-
ctx->samples, ctx->offsets,
|
981
|
-
ctx->nbTrainSamples, ctx->nbSamples,
|
982
|
-
dict, dictBufferCapacity);
|
983
|
-
|
984
1101
|
_cleanup:
|
985
|
-
|
986
|
-
|
1102
|
+
free(dict);
|
1103
|
+
COVER_best_finish(data->best, parameters, selection);
|
987
1104
|
free(data);
|
988
1105
|
COVER_map_destroy(&activeDmers);
|
989
|
-
|
990
|
-
|
991
|
-
}
|
992
|
-
if (freqs) {
|
993
|
-
free(freqs);
|
994
|
-
}
|
1106
|
+
COVER_dictSelectionFree(selection);
|
1107
|
+
free(freqs);
|
995
1108
|
}
|
996
1109
|
|
997
1110
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
998
|
-
void
|
999
|
-
const size_t
|
1000
|
-
ZDICT_cover_params_t
|
1111
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
1112
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
1113
|
+
ZDICT_cover_params_t* parameters)
|
1114
|
+
{
|
1001
1115
|
/* constants */
|
1002
1116
|
const unsigned nbThreads = parameters->nbThreads;
|
1003
1117
|
const double splitPoint =
|
1004
|
-
parameters->splitPoint <= 0.0 ?
|
1118
|
+
parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
1005
1119
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
1006
1120
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
1007
1121
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
@@ -1010,6 +1124,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1010
1124
|
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
1011
1125
|
const unsigned kIterations =
|
1012
1126
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
1127
|
+
const unsigned shrinkDict = 0;
|
1013
1128
|
/* Local variables */
|
1014
1129
|
const int displayLevel = parameters->zParams.notificationLevel;
|
1015
1130
|
unsigned iteration = 1;
|
@@ -1022,15 +1137,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1022
1137
|
/* Checks */
|
1023
1138
|
if (splitPoint <= 0 || splitPoint > 1) {
|
1024
1139
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
1025
|
-
return ERROR(
|
1140
|
+
return ERROR(parameter_outOfBound);
|
1026
1141
|
}
|
1027
1142
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
1028
1143
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
1029
|
-
return ERROR(
|
1144
|
+
return ERROR(parameter_outOfBound);
|
1030
1145
|
}
|
1031
1146
|
if (nbSamples == 0) {
|
1032
1147
|
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
1033
|
-
return ERROR(
|
1148
|
+
return ERROR(srcSize_wrong);
|
1034
1149
|
}
|
1035
1150
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
1036
1151
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
@@ -1054,11 +1169,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1054
1169
|
/* Initialize the context for this value of d */
|
1055
1170
|
COVER_ctx_t ctx;
|
1056
1171
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1172
|
+
{
|
1173
|
+
const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
|
1174
|
+
if (ZSTD_isError(initVal)) {
|
1175
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
1176
|
+
COVER_best_destroy(&best);
|
1177
|
+
POOL_free(pool);
|
1178
|
+
return initVal;
|
1179
|
+
}
|
1062
1180
|
}
|
1063
1181
|
if (!warned) {
|
1064
1182
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
|
@@ -1075,7 +1193,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1075
1193
|
COVER_best_destroy(&best);
|
1076
1194
|
COVER_ctx_destroy(&ctx);
|
1077
1195
|
POOL_free(pool);
|
1078
|
-
return ERROR(
|
1196
|
+
return ERROR(memory_allocation);
|
1079
1197
|
}
|
1080
1198
|
data->ctx = &ctx;
|
1081
1199
|
data->best = &best;
|
@@ -1085,6 +1203,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1085
1203
|
data->parameters.d = d;
|
1086
1204
|
data->parameters.splitPoint = splitPoint;
|
1087
1205
|
data->parameters.steps = kSteps;
|
1206
|
+
data->parameters.shrinkDict = shrinkDict;
|
1088
1207
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
1089
1208
|
/* Check the parameters */
|
1090
1209
|
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|