zstd-ruby 1.4.5.0 → 1.4.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +237 -138
- data/ext/zstdruby/libzstd/README.md +28 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +25 -16
- data/ext/zstdruby/libzstd/common/compiler.h +118 -4
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +189 -43
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +2 -2
- data/ext/zstdruby/libzstd/common/fse.h +40 -12
- data/ext/zstdruby/libzstd/common/fse_decompress.c +124 -17
- data/ext/zstdruby/libzstd/common/huf.h +27 -6
- data/ext/zstdruby/libzstd/common/mem.h +67 -94
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/threading.c +6 -5
- data/ext/zstdruby/libzstd/common/xxhash.c +19 -57
- data/ext/zstdruby/libzstd/common/xxhash.h +2 -2
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +90 -59
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +31 -24
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +217 -101
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1495 -478
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +143 -44
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +7 -7
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +18 -4
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +62 -26
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +23 -23
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +21 -21
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +352 -78
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +276 -209
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +8 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +191 -46
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +79 -410
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +27 -109
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +303 -201
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +370 -87
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +153 -45
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +28 -11
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +40 -31
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -2
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +26 -25
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +22 -24
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -4
- data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +6 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +7 -3
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +10 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +10 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +3 -3
- data/ext/zstdruby/libzstd/zstd.h +414 -54
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -3
- data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -21,32 +21,33 @@
|
|
21
21
|
*********************************************************/
|
22
22
|
#include "../common/mem.h" /* BYTE, U16, U32 */
|
23
23
|
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
24
|
+
#include "../common/zstd_trace.h" /* ZSTD_TraceCtx */
|
24
25
|
|
25
26
|
|
26
27
|
|
27
28
|
/*-*******************************************************
|
28
29
|
* Constants
|
29
30
|
*********************************************************/
|
30
|
-
static const U32 LL_base[MaxLL+1] = {
|
31
|
+
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
31
32
|
0, 1, 2, 3, 4, 5, 6, 7,
|
32
33
|
8, 9, 10, 11, 12, 13, 14, 15,
|
33
34
|
16, 18, 20, 22, 24, 28, 32, 40,
|
34
35
|
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
35
36
|
0x2000, 0x4000, 0x8000, 0x10000 };
|
36
37
|
|
37
|
-
static const U32 OF_base[MaxOff+1] = {
|
38
|
+
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
38
39
|
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
39
40
|
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
40
41
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
41
42
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
42
43
|
|
43
|
-
static const U32 OF_bits[MaxOff+1] = {
|
44
|
+
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
44
45
|
0, 1, 2, 3, 4, 5, 6, 7,
|
45
46
|
8, 9, 10, 11, 12, 13, 14, 15,
|
46
47
|
16, 17, 18, 19, 20, 21, 22, 23,
|
47
48
|
24, 25, 26, 27, 28, 29, 30, 31 };
|
48
49
|
|
49
|
-
static const U32 ML_base[MaxML+1] = {
|
50
|
+
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
50
51
|
3, 4, 5, 6, 7, 8, 9, 10,
|
51
52
|
11, 12, 13, 14, 15, 16, 17, 18,
|
52
53
|
19, 20, 21, 22, 23, 24, 25, 26,
|
@@ -73,12 +74,16 @@ static const U32 ML_base[MaxML+1] = {
|
|
73
74
|
|
74
75
|
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
75
76
|
|
77
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
78
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
79
|
+
|
76
80
|
typedef struct {
|
77
81
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
78
82
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
79
83
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
80
84
|
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
81
85
|
U32 rep[ZSTD_REP_NUM];
|
86
|
+
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
82
87
|
} ZSTD_entropyDTables_t;
|
83
88
|
|
84
89
|
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
@@ -95,10 +100,12 @@ typedef enum {
|
|
95
100
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
96
101
|
} ZSTD_dictUses_e;
|
97
102
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
103
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
104
|
+
typedef struct {
|
105
|
+
const ZSTD_DDict** ddictPtrTable;
|
106
|
+
size_t ddictPtrTableSize;
|
107
|
+
size_t ddictPtrCount;
|
108
|
+
} ZSTD_DDictHashSet;
|
102
109
|
|
103
110
|
struct ZSTD_DCtx_s
|
104
111
|
{
|
@@ -114,6 +121,7 @@ struct ZSTD_DCtx_s
|
|
114
121
|
const void* dictEnd; /* end of previous segment */
|
115
122
|
size_t expected;
|
116
123
|
ZSTD_frameHeader fParams;
|
124
|
+
U64 processedCSize;
|
117
125
|
U64 decodedSize;
|
118
126
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
119
127
|
ZSTD_dStage stage;
|
@@ -122,6 +130,8 @@ struct ZSTD_DCtx_s
|
|
122
130
|
XXH64_state_t xxhState;
|
123
131
|
size_t headerSize;
|
124
132
|
ZSTD_format_e format;
|
133
|
+
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
134
|
+
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
125
135
|
const BYTE* litPtr;
|
126
136
|
ZSTD_customMem customMem;
|
127
137
|
size_t litSize;
|
@@ -135,6 +145,8 @@ struct ZSTD_DCtx_s
|
|
135
145
|
U32 dictID;
|
136
146
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
137
147
|
ZSTD_dictUses_e dictUses;
|
148
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
149
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
138
150
|
|
139
151
|
/* streaming */
|
140
152
|
ZSTD_dStreamStage streamStage;
|
@@ -152,7 +164,7 @@ struct ZSTD_DCtx_s
|
|
152
164
|
U32 legacyVersion;
|
153
165
|
U32 hostageByte;
|
154
166
|
int noForwardProgress;
|
155
|
-
|
167
|
+
ZSTD_bufferMode_e outBufferMode;
|
156
168
|
ZSTD_outBuffer expectedOutBuffer;
|
157
169
|
|
158
170
|
/* workspace */
|
@@ -165,6 +177,11 @@ struct ZSTD_DCtx_s
|
|
165
177
|
void const* dictContentBeginForFuzzing;
|
166
178
|
void const* dictContentEndForFuzzing;
|
167
179
|
#endif
|
180
|
+
|
181
|
+
/* Tracing */
|
182
|
+
#if ZSTD_TRACE
|
183
|
+
ZSTD_TraceCtx traceCtx;
|
184
|
+
#endif
|
168
185
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
169
186
|
|
170
187
|
|
@@ -183,7 +200,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
183
200
|
* If yes, do nothing (continue on current segment).
|
184
201
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
185
202
|
* This function cannot fail. */
|
186
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
203
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
187
204
|
|
188
205
|
|
189
206
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -40,33 +40,42 @@
|
|
40
40
|
* Constants
|
41
41
|
***************************************/
|
42
42
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
43
|
-
#define
|
43
|
+
#define COVER_DEFAULT_SPLITPOINT 1.0
|
44
44
|
|
45
45
|
/*-*************************************
|
46
46
|
* Console display
|
47
47
|
***************************************/
|
48
|
+
#ifndef LOCALDISPLAYLEVEL
|
48
49
|
static int g_displayLevel = 2;
|
50
|
+
#endif
|
51
|
+
#undef DISPLAY
|
49
52
|
#define DISPLAY(...) \
|
50
53
|
{ \
|
51
54
|
fprintf(stderr, __VA_ARGS__); \
|
52
55
|
fflush(stderr); \
|
53
56
|
}
|
57
|
+
#undef LOCALDISPLAYLEVEL
|
54
58
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
55
59
|
if (displayLevel >= l) { \
|
56
60
|
DISPLAY(__VA_ARGS__); \
|
57
61
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
62
|
+
#undef DISPLAYLEVEL
|
58
63
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
59
64
|
|
65
|
+
#ifndef LOCALDISPLAYUPDATE
|
66
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
67
|
+
static clock_t g_time = 0;
|
68
|
+
#endif
|
69
|
+
#undef LOCALDISPLAYUPDATE
|
60
70
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
61
71
|
if (displayLevel >= l) { \
|
62
|
-
if ((clock() - g_time >
|
72
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
63
73
|
g_time = clock(); \
|
64
74
|
DISPLAY(__VA_ARGS__); \
|
65
75
|
} \
|
66
76
|
}
|
77
|
+
#undef DISPLAYUPDATE
|
67
78
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
68
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
69
|
-
static clock_t g_time = 0;
|
70
79
|
|
71
80
|
/*-*************************************
|
72
81
|
* Hash table
|
@@ -120,9 +129,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
|
|
120
129
|
/**
|
121
130
|
* Internal hash function
|
122
131
|
*/
|
123
|
-
static const U32
|
132
|
+
static const U32 COVER_prime4bytes = 2654435761U;
|
124
133
|
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
|
125
|
-
return (key *
|
134
|
+
return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
|
126
135
|
}
|
127
136
|
|
128
137
|
/**
|
@@ -215,7 +224,7 @@ typedef struct {
|
|
215
224
|
} COVER_ctx_t;
|
216
225
|
|
217
226
|
/* We need a global context for qsort... */
|
218
|
-
static COVER_ctx_t *
|
227
|
+
static COVER_ctx_t *g_coverCtx = NULL;
|
219
228
|
|
220
229
|
/*-*************************************
|
221
230
|
* Helper functions
|
@@ -258,11 +267,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
258
267
|
|
259
268
|
/**
|
260
269
|
* Same as COVER_cmp() except ties are broken by pointer value
|
261
|
-
* NOTE:
|
270
|
+
* NOTE: g_coverCtx must be set to call this function. A global is required because
|
262
271
|
* qsort doesn't take an opaque pointer.
|
263
272
|
*/
|
264
|
-
static int COVER_strict_cmp(const void *lp, const void *rp) {
|
265
|
-
int result = COVER_cmp(
|
273
|
+
static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
|
274
|
+
int result = COVER_cmp(g_coverCtx, lp, rp);
|
266
275
|
if (result == 0) {
|
267
276
|
result = lp < rp ? -1 : 1;
|
268
277
|
}
|
@@ -271,8 +280,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
271
280
|
/**
|
272
281
|
* Faster version for d <= 8.
|
273
282
|
*/
|
274
|
-
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
275
|
-
int result = COVER_cmp8(
|
283
|
+
static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
284
|
+
int result = COVER_cmp8(g_coverCtx, lp, rp);
|
276
285
|
if (result == 0) {
|
277
286
|
result = lp < rp ? -1 : 1;
|
278
287
|
}
|
@@ -603,7 +612,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
603
612
|
/* qsort doesn't take an opaque pointer, so pass as a global.
|
604
613
|
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
|
605
614
|
*/
|
606
|
-
|
615
|
+
g_coverCtx = ctx;
|
607
616
|
#if defined(__OpenBSD__)
|
608
617
|
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
609
618
|
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
@@ -946,7 +955,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
|
946
955
|
free(selection.dictContent);
|
947
956
|
}
|
948
957
|
|
949
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
958
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
950
959
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
951
960
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
952
961
|
|
@@ -954,8 +963,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
954
963
|
size_t largestCompressed = 0;
|
955
964
|
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
956
965
|
|
957
|
-
BYTE * largestDictbuffer = (BYTE *)malloc(
|
958
|
-
BYTE * candidateDictBuffer = (BYTE *)malloc(
|
966
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
|
967
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
|
959
968
|
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
960
969
|
|
961
970
|
if (!largestDictbuffer || !candidateDictBuffer) {
|
@@ -967,7 +976,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
967
976
|
/* Initial dictionary size and compressed size */
|
968
977
|
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
969
978
|
dictContentSize = ZDICT_finalizeDictionary(
|
970
|
-
largestDictbuffer,
|
979
|
+
largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
|
971
980
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
972
981
|
|
973
982
|
if (ZDICT_isError(dictContentSize)) {
|
@@ -1001,7 +1010,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
1001
1010
|
while (dictContentSize < largestDict) {
|
1002
1011
|
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
1003
1012
|
dictContentSize = ZDICT_finalizeDictionary(
|
1004
|
-
candidateDictBuffer,
|
1013
|
+
candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
|
1005
1014
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
1006
1015
|
|
1007
1016
|
if (ZDICT_isError(dictContentSize)) {
|
@@ -1053,18 +1062,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
1053
1062
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
1054
1063
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
1055
1064
|
*/
|
1056
|
-
static void COVER_tryParameters(void *opaque)
|
1065
|
+
static void COVER_tryParameters(void *opaque)
|
1066
|
+
{
|
1057
1067
|
/* Save parameters as local variables */
|
1058
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
1068
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
1059
1069
|
const COVER_ctx_t *const ctx = data->ctx;
|
1060
1070
|
const ZDICT_cover_params_t parameters = data->parameters;
|
1061
1071
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
1062
1072
|
size_t totalCompressedSize = ERROR(GENERIC);
|
1063
1073
|
/* Allocate space for hash table, dict, and freqs */
|
1064
1074
|
COVER_map_t activeDmers;
|
1065
|
-
BYTE
|
1075
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
1066
1076
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
1067
|
-
U32
|
1077
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
1068
1078
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
1069
1079
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
1070
1080
|
goto _cleanup;
|
@@ -1079,7 +1089,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
1079
1089
|
{
|
1080
1090
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
1081
1091
|
dictBufferCapacity, parameters);
|
1082
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
1092
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
1083
1093
|
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
1084
1094
|
totalCompressedSize);
|
1085
1095
|
|
@@ -1094,19 +1104,18 @@ _cleanup:
|
|
1094
1104
|
free(data);
|
1095
1105
|
COVER_map_destroy(&activeDmers);
|
1096
1106
|
COVER_dictSelectionFree(selection);
|
1097
|
-
|
1098
|
-
free(freqs);
|
1099
|
-
}
|
1107
|
+
free(freqs);
|
1100
1108
|
}
|
1101
1109
|
|
1102
1110
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
1103
|
-
void
|
1104
|
-
const size_t
|
1105
|
-
ZDICT_cover_params_t
|
1111
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
1112
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
1113
|
+
ZDICT_cover_params_t* parameters)
|
1114
|
+
{
|
1106
1115
|
/* constants */
|
1107
1116
|
const unsigned nbThreads = parameters->nbThreads;
|
1108
1117
|
const double splitPoint =
|
1109
|
-
parameters->splitPoint <= 0.0 ?
|
1118
|
+
parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
1110
1119
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
1111
1120
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
1112
1121
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2017-
|
2
|
+
* Copyright (c) 2017-2021, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -152,6 +152,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
|
152
152
|
* smallest dictionary within a specified regression of the compressed size
|
153
153
|
* from the largest dictionary.
|
154
154
|
*/
|
155
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
155
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
156
156
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
157
157
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
@@ -1576,7 +1576,7 @@ note:
|
|
1576
1576
|
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
1577
1577
|
trsort(ISAb, SA, m, 1);
|
1578
1578
|
|
1579
|
-
/* Set the sorted order of
|
1579
|
+
/* Set the sorted order of type B* suffixes. */
|
1580
1580
|
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
1581
1581
|
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
1582
1582
|
if(0 <= i) {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2018-
|
2
|
+
* Copyright (c) 2018-2021, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -21,6 +21,7 @@
|
|
21
21
|
#include "../common/threading.h"
|
22
22
|
#include "cover.h"
|
23
23
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
24
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
24
25
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
25
26
|
#define ZDICT_STATIC_LINKING_ONLY
|
26
27
|
#endif
|
@@ -33,7 +34,7 @@
|
|
33
34
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
34
35
|
#define FASTCOVER_MAX_F 31
|
35
36
|
#define FASTCOVER_MAX_ACCEL 10
|
36
|
-
#define
|
37
|
+
#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
|
37
38
|
#define DEFAULT_F 20
|
38
39
|
#define DEFAULT_ACCEL 1
|
39
40
|
|
@@ -41,50 +42,50 @@
|
|
41
42
|
/*-*************************************
|
42
43
|
* Console display
|
43
44
|
***************************************/
|
45
|
+
#ifndef LOCALDISPLAYLEVEL
|
44
46
|
static int g_displayLevel = 2;
|
47
|
+
#endif
|
48
|
+
#undef DISPLAY
|
45
49
|
#define DISPLAY(...) \
|
46
50
|
{ \
|
47
51
|
fprintf(stderr, __VA_ARGS__); \
|
48
52
|
fflush(stderr); \
|
49
53
|
}
|
54
|
+
#undef LOCALDISPLAYLEVEL
|
50
55
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
51
56
|
if (displayLevel >= l) { \
|
52
57
|
DISPLAY(__VA_ARGS__); \
|
53
58
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
59
|
+
#undef DISPLAYLEVEL
|
54
60
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
55
61
|
|
62
|
+
#ifndef LOCALDISPLAYUPDATE
|
63
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
64
|
+
static clock_t g_time = 0;
|
65
|
+
#endif
|
66
|
+
#undef LOCALDISPLAYUPDATE
|
56
67
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
57
68
|
if (displayLevel >= l) { \
|
58
|
-
if ((clock() - g_time >
|
69
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
59
70
|
g_time = clock(); \
|
60
71
|
DISPLAY(__VA_ARGS__); \
|
61
72
|
} \
|
62
73
|
}
|
74
|
+
#undef DISPLAYUPDATE
|
63
75
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
64
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
65
|
-
static clock_t g_time = 0;
|
66
76
|
|
67
77
|
|
68
78
|
/*-*************************************
|
69
79
|
* Hash Functions
|
70
80
|
***************************************/
|
71
|
-
static const U64 prime6bytes = 227718039650203ULL;
|
72
|
-
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
73
|
-
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
74
|
-
|
75
|
-
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
76
|
-
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
77
|
-
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
78
|
-
|
79
|
-
|
80
81
|
/**
|
81
|
-
* Hash the d-byte value pointed to by p and mod 2^f
|
82
|
+
* Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
|
82
83
|
*/
|
83
|
-
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32
|
84
|
+
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
|
84
85
|
if (d == 6) {
|
85
|
-
return ZSTD_hash6Ptr(p,
|
86
|
+
return ZSTD_hash6Ptr(p, f);
|
86
87
|
}
|
87
|
-
return ZSTD_hash8Ptr(p,
|
88
|
+
return ZSTD_hash8Ptr(p, f);
|
88
89
|
}
|
89
90
|
|
90
91
|
|
@@ -461,20 +462,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
461
462
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
462
463
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
463
464
|
*/
|
464
|
-
static void FASTCOVER_tryParameters(void
|
465
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
465
466
|
{
|
466
467
|
/* Save parameters as local variables */
|
467
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
468
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
468
469
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
469
470
|
const ZDICT_cover_params_t parameters = data->parameters;
|
470
471
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
471
472
|
size_t totalCompressedSize = ERROR(GENERIC);
|
472
473
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
473
|
-
U16* segmentFreqs = (U16
|
474
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
474
475
|
/* Allocate space for hash table, dict, and freqs */
|
475
|
-
BYTE *const dict = (BYTE
|
476
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
476
477
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
477
|
-
U32
|
478
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
478
479
|
if (!segmentFreqs || !dict || !freqs) {
|
479
480
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
480
481
|
goto _cleanup;
|
@@ -486,7 +487,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
486
487
|
parameters, segmentFreqs);
|
487
488
|
|
488
489
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
489
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
490
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
490
491
|
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
491
492
|
totalCompressedSize);
|
492
493
|
|
@@ -617,7 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
617
618
|
/* constants */
|
618
619
|
const unsigned nbThreads = parameters->nbThreads;
|
619
620
|
const double splitPoint =
|
620
|
-
parameters->splitPoint <= 0.0 ?
|
621
|
+
parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
621
622
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
622
623
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
623
624
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|