zstd-ruby 1.4.5.0 → 1.4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +237 -138
- data/ext/zstdruby/libzstd/README.md +28 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +25 -16
- data/ext/zstdruby/libzstd/common/compiler.h +118 -4
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +189 -43
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +2 -2
- data/ext/zstdruby/libzstd/common/fse.h +40 -12
- data/ext/zstdruby/libzstd/common/fse_decompress.c +124 -17
- data/ext/zstdruby/libzstd/common/huf.h +27 -6
- data/ext/zstdruby/libzstd/common/mem.h +67 -94
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/threading.c +6 -5
- data/ext/zstdruby/libzstd/common/xxhash.c +19 -57
- data/ext/zstdruby/libzstd/common/xxhash.h +2 -2
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +90 -59
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +31 -24
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +217 -101
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1495 -478
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +143 -44
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +7 -7
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +18 -4
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +62 -26
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +23 -23
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +21 -21
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +352 -78
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +276 -209
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +8 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +191 -46
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +79 -410
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +27 -109
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +303 -201
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +370 -87
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +153 -45
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +28 -11
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +40 -31
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -2
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +26 -25
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +22 -24
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -4
- data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +6 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +7 -3
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +10 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +10 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +3 -3
- data/ext/zstdruby/libzstd/zstd.h +414 -54
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -3
- data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -21,32 +21,33 @@
|
|
21
21
|
*********************************************************/
|
22
22
|
#include "../common/mem.h" /* BYTE, U16, U32 */
|
23
23
|
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
24
|
+
#include "../common/zstd_trace.h" /* ZSTD_TraceCtx */
|
24
25
|
|
25
26
|
|
26
27
|
|
27
28
|
/*-*******************************************************
|
28
29
|
* Constants
|
29
30
|
*********************************************************/
|
30
|
-
static const U32 LL_base[MaxLL+1] = {
|
31
|
+
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
31
32
|
0, 1, 2, 3, 4, 5, 6, 7,
|
32
33
|
8, 9, 10, 11, 12, 13, 14, 15,
|
33
34
|
16, 18, 20, 22, 24, 28, 32, 40,
|
34
35
|
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
35
36
|
0x2000, 0x4000, 0x8000, 0x10000 };
|
36
37
|
|
37
|
-
static const U32 OF_base[MaxOff+1] = {
|
38
|
+
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
38
39
|
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
39
40
|
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
40
41
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
41
42
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
42
43
|
|
43
|
-
static const U32 OF_bits[MaxOff+1] = {
|
44
|
+
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
44
45
|
0, 1, 2, 3, 4, 5, 6, 7,
|
45
46
|
8, 9, 10, 11, 12, 13, 14, 15,
|
46
47
|
16, 17, 18, 19, 20, 21, 22, 23,
|
47
48
|
24, 25, 26, 27, 28, 29, 30, 31 };
|
48
49
|
|
49
|
-
static const U32 ML_base[MaxML+1] = {
|
50
|
+
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
50
51
|
3, 4, 5, 6, 7, 8, 9, 10,
|
51
52
|
11, 12, 13, 14, 15, 16, 17, 18,
|
52
53
|
19, 20, 21, 22, 23, 24, 25, 26,
|
@@ -73,12 +74,16 @@ static const U32 ML_base[MaxML+1] = {
|
|
73
74
|
|
74
75
|
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
75
76
|
|
77
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
78
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
79
|
+
|
76
80
|
typedef struct {
|
77
81
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
78
82
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
79
83
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
80
84
|
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
81
85
|
U32 rep[ZSTD_REP_NUM];
|
86
|
+
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
82
87
|
} ZSTD_entropyDTables_t;
|
83
88
|
|
84
89
|
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
@@ -95,10 +100,12 @@ typedef enum {
|
|
95
100
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
96
101
|
} ZSTD_dictUses_e;
|
97
102
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
103
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
104
|
+
typedef struct {
|
105
|
+
const ZSTD_DDict** ddictPtrTable;
|
106
|
+
size_t ddictPtrTableSize;
|
107
|
+
size_t ddictPtrCount;
|
108
|
+
} ZSTD_DDictHashSet;
|
102
109
|
|
103
110
|
struct ZSTD_DCtx_s
|
104
111
|
{
|
@@ -114,6 +121,7 @@ struct ZSTD_DCtx_s
|
|
114
121
|
const void* dictEnd; /* end of previous segment */
|
115
122
|
size_t expected;
|
116
123
|
ZSTD_frameHeader fParams;
|
124
|
+
U64 processedCSize;
|
117
125
|
U64 decodedSize;
|
118
126
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
119
127
|
ZSTD_dStage stage;
|
@@ -122,6 +130,8 @@ struct ZSTD_DCtx_s
|
|
122
130
|
XXH64_state_t xxhState;
|
123
131
|
size_t headerSize;
|
124
132
|
ZSTD_format_e format;
|
133
|
+
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
134
|
+
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
125
135
|
const BYTE* litPtr;
|
126
136
|
ZSTD_customMem customMem;
|
127
137
|
size_t litSize;
|
@@ -135,6 +145,8 @@ struct ZSTD_DCtx_s
|
|
135
145
|
U32 dictID;
|
136
146
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
137
147
|
ZSTD_dictUses_e dictUses;
|
148
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
149
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
138
150
|
|
139
151
|
/* streaming */
|
140
152
|
ZSTD_dStreamStage streamStage;
|
@@ -152,7 +164,7 @@ struct ZSTD_DCtx_s
|
|
152
164
|
U32 legacyVersion;
|
153
165
|
U32 hostageByte;
|
154
166
|
int noForwardProgress;
|
155
|
-
|
167
|
+
ZSTD_bufferMode_e outBufferMode;
|
156
168
|
ZSTD_outBuffer expectedOutBuffer;
|
157
169
|
|
158
170
|
/* workspace */
|
@@ -165,6 +177,11 @@ struct ZSTD_DCtx_s
|
|
165
177
|
void const* dictContentBeginForFuzzing;
|
166
178
|
void const* dictContentEndForFuzzing;
|
167
179
|
#endif
|
180
|
+
|
181
|
+
/* Tracing */
|
182
|
+
#if ZSTD_TRACE
|
183
|
+
ZSTD_TraceCtx traceCtx;
|
184
|
+
#endif
|
168
185
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
169
186
|
|
170
187
|
|
@@ -183,7 +200,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
183
200
|
* If yes, do nothing (continue on current segment).
|
184
201
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
185
202
|
* This function cannot fail. */
|
186
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
203
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
187
204
|
|
188
205
|
|
189
206
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -40,33 +40,42 @@
|
|
40
40
|
* Constants
|
41
41
|
***************************************/
|
42
42
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
43
|
-
#define
|
43
|
+
#define COVER_DEFAULT_SPLITPOINT 1.0
|
44
44
|
|
45
45
|
/*-*************************************
|
46
46
|
* Console display
|
47
47
|
***************************************/
|
48
|
+
#ifndef LOCALDISPLAYLEVEL
|
48
49
|
static int g_displayLevel = 2;
|
50
|
+
#endif
|
51
|
+
#undef DISPLAY
|
49
52
|
#define DISPLAY(...) \
|
50
53
|
{ \
|
51
54
|
fprintf(stderr, __VA_ARGS__); \
|
52
55
|
fflush(stderr); \
|
53
56
|
}
|
57
|
+
#undef LOCALDISPLAYLEVEL
|
54
58
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
55
59
|
if (displayLevel >= l) { \
|
56
60
|
DISPLAY(__VA_ARGS__); \
|
57
61
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
62
|
+
#undef DISPLAYLEVEL
|
58
63
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
59
64
|
|
65
|
+
#ifndef LOCALDISPLAYUPDATE
|
66
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
67
|
+
static clock_t g_time = 0;
|
68
|
+
#endif
|
69
|
+
#undef LOCALDISPLAYUPDATE
|
60
70
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
61
71
|
if (displayLevel >= l) { \
|
62
|
-
if ((clock() - g_time >
|
72
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
63
73
|
g_time = clock(); \
|
64
74
|
DISPLAY(__VA_ARGS__); \
|
65
75
|
} \
|
66
76
|
}
|
77
|
+
#undef DISPLAYUPDATE
|
67
78
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
68
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
69
|
-
static clock_t g_time = 0;
|
70
79
|
|
71
80
|
/*-*************************************
|
72
81
|
* Hash table
|
@@ -120,9 +129,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
|
|
120
129
|
/**
|
121
130
|
* Internal hash function
|
122
131
|
*/
|
123
|
-
static const U32
|
132
|
+
static const U32 COVER_prime4bytes = 2654435761U;
|
124
133
|
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
|
125
|
-
return (key *
|
134
|
+
return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
|
126
135
|
}
|
127
136
|
|
128
137
|
/**
|
@@ -215,7 +224,7 @@ typedef struct {
|
|
215
224
|
} COVER_ctx_t;
|
216
225
|
|
217
226
|
/* We need a global context for qsort... */
|
218
|
-
static COVER_ctx_t *
|
227
|
+
static COVER_ctx_t *g_coverCtx = NULL;
|
219
228
|
|
220
229
|
/*-*************************************
|
221
230
|
* Helper functions
|
@@ -258,11 +267,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
258
267
|
|
259
268
|
/**
|
260
269
|
* Same as COVER_cmp() except ties are broken by pointer value
|
261
|
-
* NOTE:
|
270
|
+
* NOTE: g_coverCtx must be set to call this function. A global is required because
|
262
271
|
* qsort doesn't take an opaque pointer.
|
263
272
|
*/
|
264
|
-
static int COVER_strict_cmp(const void *lp, const void *rp) {
|
265
|
-
int result = COVER_cmp(
|
273
|
+
static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
|
274
|
+
int result = COVER_cmp(g_coverCtx, lp, rp);
|
266
275
|
if (result == 0) {
|
267
276
|
result = lp < rp ? -1 : 1;
|
268
277
|
}
|
@@ -271,8 +280,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
271
280
|
/**
|
272
281
|
* Faster version for d <= 8.
|
273
282
|
*/
|
274
|
-
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
275
|
-
int result = COVER_cmp8(
|
283
|
+
static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
284
|
+
int result = COVER_cmp8(g_coverCtx, lp, rp);
|
276
285
|
if (result == 0) {
|
277
286
|
result = lp < rp ? -1 : 1;
|
278
287
|
}
|
@@ -603,7 +612,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
603
612
|
/* qsort doesn't take an opaque pointer, so pass as a global.
|
604
613
|
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
|
605
614
|
*/
|
606
|
-
|
615
|
+
g_coverCtx = ctx;
|
607
616
|
#if defined(__OpenBSD__)
|
608
617
|
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
609
618
|
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
@@ -946,7 +955,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
|
946
955
|
free(selection.dictContent);
|
947
956
|
}
|
948
957
|
|
949
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
958
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
950
959
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
951
960
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
952
961
|
|
@@ -954,8 +963,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
954
963
|
size_t largestCompressed = 0;
|
955
964
|
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
956
965
|
|
957
|
-
BYTE * largestDictbuffer = (BYTE *)malloc(
|
958
|
-
BYTE * candidateDictBuffer = (BYTE *)malloc(
|
966
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
|
967
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
|
959
968
|
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
960
969
|
|
961
970
|
if (!largestDictbuffer || !candidateDictBuffer) {
|
@@ -967,7 +976,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
967
976
|
/* Initial dictionary size and compressed size */
|
968
977
|
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
969
978
|
dictContentSize = ZDICT_finalizeDictionary(
|
970
|
-
largestDictbuffer,
|
979
|
+
largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
|
971
980
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
972
981
|
|
973
982
|
if (ZDICT_isError(dictContentSize)) {
|
@@ -1001,7 +1010,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
1001
1010
|
while (dictContentSize < largestDict) {
|
1002
1011
|
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
1003
1012
|
dictContentSize = ZDICT_finalizeDictionary(
|
1004
|
-
candidateDictBuffer,
|
1013
|
+
candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
|
1005
1014
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
1006
1015
|
|
1007
1016
|
if (ZDICT_isError(dictContentSize)) {
|
@@ -1053,18 +1062,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
1053
1062
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
1054
1063
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
1055
1064
|
*/
|
1056
|
-
static void COVER_tryParameters(void *opaque)
|
1065
|
+
static void COVER_tryParameters(void *opaque)
|
1066
|
+
{
|
1057
1067
|
/* Save parameters as local variables */
|
1058
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
1068
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
1059
1069
|
const COVER_ctx_t *const ctx = data->ctx;
|
1060
1070
|
const ZDICT_cover_params_t parameters = data->parameters;
|
1061
1071
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
1062
1072
|
size_t totalCompressedSize = ERROR(GENERIC);
|
1063
1073
|
/* Allocate space for hash table, dict, and freqs */
|
1064
1074
|
COVER_map_t activeDmers;
|
1065
|
-
BYTE
|
1075
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
1066
1076
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
1067
|
-
U32
|
1077
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
1068
1078
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
1069
1079
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
1070
1080
|
goto _cleanup;
|
@@ -1079,7 +1089,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
1079
1089
|
{
|
1080
1090
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
1081
1091
|
dictBufferCapacity, parameters);
|
1082
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
1092
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
1083
1093
|
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
1084
1094
|
totalCompressedSize);
|
1085
1095
|
|
@@ -1094,19 +1104,18 @@ _cleanup:
|
|
1094
1104
|
free(data);
|
1095
1105
|
COVER_map_destroy(&activeDmers);
|
1096
1106
|
COVER_dictSelectionFree(selection);
|
1097
|
-
|
1098
|
-
free(freqs);
|
1099
|
-
}
|
1107
|
+
free(freqs);
|
1100
1108
|
}
|
1101
1109
|
|
1102
1110
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
1103
|
-
void
|
1104
|
-
const size_t
|
1105
|
-
ZDICT_cover_params_t
|
1111
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
1112
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
1113
|
+
ZDICT_cover_params_t* parameters)
|
1114
|
+
{
|
1106
1115
|
/* constants */
|
1107
1116
|
const unsigned nbThreads = parameters->nbThreads;
|
1108
1117
|
const double splitPoint =
|
1109
|
-
parameters->splitPoint <= 0.0 ?
|
1118
|
+
parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
1110
1119
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
1111
1120
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
1112
1121
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2017-
|
2
|
+
* Copyright (c) 2017-2021, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -152,6 +152,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
|
152
152
|
* smallest dictionary within a specified regression of the compressed size
|
153
153
|
* from the largest dictionary.
|
154
154
|
*/
|
155
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
155
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
156
156
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
157
157
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
@@ -1576,7 +1576,7 @@ note:
|
|
1576
1576
|
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
1577
1577
|
trsort(ISAb, SA, m, 1);
|
1578
1578
|
|
1579
|
-
/* Set the sorted order of
|
1579
|
+
/* Set the sorted order of type B* suffixes. */
|
1580
1580
|
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
1581
1581
|
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
1582
1582
|
if(0 <= i) {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2018-
|
2
|
+
* Copyright (c) 2018-2021, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -21,6 +21,7 @@
|
|
21
21
|
#include "../common/threading.h"
|
22
22
|
#include "cover.h"
|
23
23
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
24
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
24
25
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
25
26
|
#define ZDICT_STATIC_LINKING_ONLY
|
26
27
|
#endif
|
@@ -33,7 +34,7 @@
|
|
33
34
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
34
35
|
#define FASTCOVER_MAX_F 31
|
35
36
|
#define FASTCOVER_MAX_ACCEL 10
|
36
|
-
#define
|
37
|
+
#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
|
37
38
|
#define DEFAULT_F 20
|
38
39
|
#define DEFAULT_ACCEL 1
|
39
40
|
|
@@ -41,50 +42,50 @@
|
|
41
42
|
/*-*************************************
|
42
43
|
* Console display
|
43
44
|
***************************************/
|
45
|
+
#ifndef LOCALDISPLAYLEVEL
|
44
46
|
static int g_displayLevel = 2;
|
47
|
+
#endif
|
48
|
+
#undef DISPLAY
|
45
49
|
#define DISPLAY(...) \
|
46
50
|
{ \
|
47
51
|
fprintf(stderr, __VA_ARGS__); \
|
48
52
|
fflush(stderr); \
|
49
53
|
}
|
54
|
+
#undef LOCALDISPLAYLEVEL
|
50
55
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
51
56
|
if (displayLevel >= l) { \
|
52
57
|
DISPLAY(__VA_ARGS__); \
|
53
58
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
59
|
+
#undef DISPLAYLEVEL
|
54
60
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
55
61
|
|
62
|
+
#ifndef LOCALDISPLAYUPDATE
|
63
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
64
|
+
static clock_t g_time = 0;
|
65
|
+
#endif
|
66
|
+
#undef LOCALDISPLAYUPDATE
|
56
67
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
57
68
|
if (displayLevel >= l) { \
|
58
|
-
if ((clock() - g_time >
|
69
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
59
70
|
g_time = clock(); \
|
60
71
|
DISPLAY(__VA_ARGS__); \
|
61
72
|
} \
|
62
73
|
}
|
74
|
+
#undef DISPLAYUPDATE
|
63
75
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
64
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
65
|
-
static clock_t g_time = 0;
|
66
76
|
|
67
77
|
|
68
78
|
/*-*************************************
|
69
79
|
* Hash Functions
|
70
80
|
***************************************/
|
71
|
-
static const U64 prime6bytes = 227718039650203ULL;
|
72
|
-
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
73
|
-
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
74
|
-
|
75
|
-
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
76
|
-
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
77
|
-
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
78
|
-
|
79
|
-
|
80
81
|
/**
|
81
|
-
* Hash the d-byte value pointed to by p and mod 2^f
|
82
|
+
* Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
|
82
83
|
*/
|
83
|
-
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32
|
84
|
+
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
|
84
85
|
if (d == 6) {
|
85
|
-
return ZSTD_hash6Ptr(p,
|
86
|
+
return ZSTD_hash6Ptr(p, f);
|
86
87
|
}
|
87
|
-
return ZSTD_hash8Ptr(p,
|
88
|
+
return ZSTD_hash8Ptr(p, f);
|
88
89
|
}
|
89
90
|
|
90
91
|
|
@@ -461,20 +462,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
461
462
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
462
463
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
463
464
|
*/
|
464
|
-
static void FASTCOVER_tryParameters(void
|
465
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
465
466
|
{
|
466
467
|
/* Save parameters as local variables */
|
467
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
468
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
468
469
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
469
470
|
const ZDICT_cover_params_t parameters = data->parameters;
|
470
471
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
471
472
|
size_t totalCompressedSize = ERROR(GENERIC);
|
472
473
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
473
|
-
U16* segmentFreqs = (U16
|
474
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
474
475
|
/* Allocate space for hash table, dict, and freqs */
|
475
|
-
BYTE *const dict = (BYTE
|
476
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
476
477
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
477
|
-
U32
|
478
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
478
479
|
if (!segmentFreqs || !dict || !freqs) {
|
479
480
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
480
481
|
goto _cleanup;
|
@@ -486,7 +487,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
486
487
|
parameters, segmentFreqs);
|
487
488
|
|
488
489
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
489
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
490
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
490
491
|
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
491
492
|
totalCompressedSize);
|
492
493
|
|
@@ -617,7 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
617
618
|
/* constants */
|
618
619
|
const unsigned nbThreads = parameters->nbThreads;
|
619
620
|
const double splitPoint =
|
620
|
-
parameters->splitPoint <= 0.0 ?
|
621
|
+
parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
621
622
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
622
623
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
623
624
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|