zstd-ruby 1.4.5.0 → 1.5.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +2 -1
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +225 -222
- data/ext/zstdruby/libzstd/README.md +43 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +46 -22
- data/ext/zstdruby/libzstd/common/compiler.h +182 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +196 -44
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +41 -12
- data/ext/zstdruby/libzstd/common/fse_decompress.c +139 -22
- data/ext/zstdruby/libzstd/common/huf.h +47 -23
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +6 -5
- data/ext/zstdruby/libzstd/common/xxhash.c +6 -846
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +189 -142
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +89 -46
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +770 -198
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2894 -863
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +390 -90
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +12 -11
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +31 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -297
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +206 -69
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +307 -132
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +322 -143
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1136 -174
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +316 -213
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +373 -150
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +152 -444
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +31 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1044 -403
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +450 -105
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +913 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +14 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +59 -12
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -38
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -34
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +128 -58
- data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +8 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +9 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +9 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +10 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +154 -7
- data/ext/zstdruby/libzstd/zstd.h +699 -214
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +2 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +15 -6
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
/*-*******************************************************
|
|
16
16
|
* Dependencies
|
|
17
17
|
*********************************************************/
|
|
18
|
-
#include
|
|
18
|
+
#include "../common/zstd_deps.h" /* size_t */
|
|
19
19
|
#include "../zstd.h" /* DCtx, and some public functions */
|
|
20
20
|
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
|
|
21
21
|
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
|
@@ -33,6 +33,12 @@
|
|
|
33
33
|
*/
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
/* Streaming state is used to inform allocation of the literal buffer */
|
|
37
|
+
typedef enum {
|
|
38
|
+
not_streaming = 0,
|
|
39
|
+
is_streaming = 1
|
|
40
|
+
} streaming_operation;
|
|
41
|
+
|
|
36
42
|
/* ZSTD_decompressBlock_internal() :
|
|
37
43
|
* decompress block, starting at `src`,
|
|
38
44
|
* into destination buffer `dst`.
|
|
@@ -41,19 +47,22 @@
|
|
|
41
47
|
*/
|
|
42
48
|
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
43
49
|
void* dst, size_t dstCapacity,
|
|
44
|
-
const void* src, size_t srcSize, const int frame);
|
|
50
|
+
const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
|
|
45
51
|
|
|
46
52
|
/* ZSTD_buildFSETable() :
|
|
47
53
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
|
48
54
|
* this function must be called with valid parameters only
|
|
49
55
|
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
|
50
56
|
* in which case it cannot fail.
|
|
57
|
+
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
|
|
58
|
+
* defined in zstd_decompress_internal.h.
|
|
51
59
|
* Internal use only.
|
|
52
60
|
*/
|
|
53
61
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
54
62
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
55
|
-
const U32* baseValue, const
|
|
56
|
-
unsigned tableLog
|
|
63
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
64
|
+
unsigned tableLog, void* wksp, size_t wkspSize,
|
|
65
|
+
int bmi2);
|
|
57
66
|
|
|
58
67
|
|
|
59
68
|
#endif /* ZSTD_DEC_BLOCK_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -20,33 +20,33 @@
|
|
|
20
20
|
* Dependencies
|
|
21
21
|
*********************************************************/
|
|
22
22
|
#include "../common/mem.h" /* BYTE, U16, U32 */
|
|
23
|
-
#include "../common/zstd_internal.h" /*
|
|
23
|
+
#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
/*-*******************************************************
|
|
28
28
|
* Constants
|
|
29
29
|
*********************************************************/
|
|
30
|
-
static const U32 LL_base[MaxLL+1] = {
|
|
30
|
+
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
|
31
31
|
0, 1, 2, 3, 4, 5, 6, 7,
|
|
32
32
|
8, 9, 10, 11, 12, 13, 14, 15,
|
|
33
33
|
16, 18, 20, 22, 24, 28, 32, 40,
|
|
34
34
|
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
|
35
35
|
0x2000, 0x4000, 0x8000, 0x10000 };
|
|
36
36
|
|
|
37
|
-
static const U32 OF_base[MaxOff+1] = {
|
|
37
|
+
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
|
38
38
|
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
|
39
39
|
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
|
40
40
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
|
41
41
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
|
42
42
|
|
|
43
|
-
static const
|
|
43
|
+
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
|
|
44
44
|
0, 1, 2, 3, 4, 5, 6, 7,
|
|
45
45
|
8, 9, 10, 11, 12, 13, 14, 15,
|
|
46
46
|
16, 17, 18, 19, 20, 21, 22, 23,
|
|
47
47
|
24, 25, 26, 27, 28, 29, 30, 31 };
|
|
48
48
|
|
|
49
|
-
static const U32 ML_base[MaxML+1] = {
|
|
49
|
+
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
|
50
50
|
3, 4, 5, 6, 7, 8, 9, 10,
|
|
51
51
|
11, 12, 13, 14, 15, 16, 17, 18,
|
|
52
52
|
19, 20, 21, 22, 23, 24, 25, 26,
|
|
@@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = {
|
|
|
73
73
|
|
|
74
74
|
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
|
75
75
|
|
|
76
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
|
77
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
|
78
|
+
|
|
76
79
|
typedef struct {
|
|
77
80
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
|
78
81
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
|
79
82
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
|
80
83
|
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
|
81
84
|
U32 rep[ZSTD_REP_NUM];
|
|
85
|
+
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
|
82
86
|
} ZSTD_entropyDTables_t;
|
|
83
87
|
|
|
84
88
|
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
|
@@ -95,10 +99,28 @@ typedef enum {
|
|
|
95
99
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
|
96
100
|
} ZSTD_dictUses_e;
|
|
97
101
|
|
|
102
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
|
103
|
+
typedef struct {
|
|
104
|
+
const ZSTD_DDict** ddictPtrTable;
|
|
105
|
+
size_t ddictPtrTableSize;
|
|
106
|
+
size_t ddictPtrCount;
|
|
107
|
+
} ZSTD_DDictHashSet;
|
|
108
|
+
|
|
109
|
+
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
|
|
110
|
+
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
|
|
111
|
+
#endif
|
|
112
|
+
|
|
113
|
+
#define ZSTD_LBMIN 64
|
|
114
|
+
#define ZSTD_LBMAX (128 << 10)
|
|
115
|
+
|
|
116
|
+
/* extra buffer, compensates when dst is not large enough to store litBuffer */
|
|
117
|
+
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
|
|
118
|
+
|
|
98
119
|
typedef enum {
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
120
|
+
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
|
|
121
|
+
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
|
|
122
|
+
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
|
|
123
|
+
} ZSTD_litLocation_e;
|
|
102
124
|
|
|
103
125
|
struct ZSTD_DCtx_s
|
|
104
126
|
{
|
|
@@ -114,6 +136,7 @@ struct ZSTD_DCtx_s
|
|
|
114
136
|
const void* dictEnd; /* end of previous segment */
|
|
115
137
|
size_t expected;
|
|
116
138
|
ZSTD_frameHeader fParams;
|
|
139
|
+
U64 processedCSize;
|
|
117
140
|
U64 decodedSize;
|
|
118
141
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
|
119
142
|
ZSTD_dStage stage;
|
|
@@ -122,12 +145,16 @@ struct ZSTD_DCtx_s
|
|
|
122
145
|
XXH64_state_t xxhState;
|
|
123
146
|
size_t headerSize;
|
|
124
147
|
ZSTD_format_e format;
|
|
148
|
+
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
|
149
|
+
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
|
125
150
|
const BYTE* litPtr;
|
|
126
151
|
ZSTD_customMem customMem;
|
|
127
152
|
size_t litSize;
|
|
128
153
|
size_t rleSize;
|
|
129
154
|
size_t staticSize;
|
|
155
|
+
#if DYNAMIC_BMI2 != 0
|
|
130
156
|
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
|
157
|
+
#endif
|
|
131
158
|
|
|
132
159
|
/* dictionary */
|
|
133
160
|
ZSTD_DDict* ddictLocal;
|
|
@@ -135,6 +162,8 @@ struct ZSTD_DCtx_s
|
|
|
135
162
|
U32 dictID;
|
|
136
163
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
|
137
164
|
ZSTD_dictUses_e dictUses;
|
|
165
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
|
166
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
|
138
167
|
|
|
139
168
|
/* streaming */
|
|
140
169
|
ZSTD_dStreamStage streamStage;
|
|
@@ -147,16 +176,21 @@ struct ZSTD_DCtx_s
|
|
|
147
176
|
size_t outStart;
|
|
148
177
|
size_t outEnd;
|
|
149
178
|
size_t lhSize;
|
|
179
|
+
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
|
150
180
|
void* legacyContext;
|
|
151
181
|
U32 previousLegacyVersion;
|
|
152
182
|
U32 legacyVersion;
|
|
183
|
+
#endif
|
|
153
184
|
U32 hostageByte;
|
|
154
185
|
int noForwardProgress;
|
|
155
|
-
|
|
186
|
+
ZSTD_bufferMode_e outBufferMode;
|
|
156
187
|
ZSTD_outBuffer expectedOutBuffer;
|
|
157
188
|
|
|
158
189
|
/* workspace */
|
|
159
|
-
BYTE litBuffer
|
|
190
|
+
BYTE* litBuffer;
|
|
191
|
+
const BYTE* litBufferEnd;
|
|
192
|
+
ZSTD_litLocation_e litBufferLocation;
|
|
193
|
+
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
|
|
160
194
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
|
161
195
|
|
|
162
196
|
size_t oversizedDuration;
|
|
@@ -165,8 +199,21 @@ struct ZSTD_DCtx_s
|
|
|
165
199
|
void const* dictContentBeginForFuzzing;
|
|
166
200
|
void const* dictContentEndForFuzzing;
|
|
167
201
|
#endif
|
|
202
|
+
|
|
203
|
+
/* Tracing */
|
|
204
|
+
#if ZSTD_TRACE
|
|
205
|
+
ZSTD_TraceCtx traceCtx;
|
|
206
|
+
#endif
|
|
168
207
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
|
169
208
|
|
|
209
|
+
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
|
|
210
|
+
#if DYNAMIC_BMI2 != 0
|
|
211
|
+
return dctx->bmi2;
|
|
212
|
+
#else
|
|
213
|
+
(void)dctx;
|
|
214
|
+
return 0;
|
|
215
|
+
#endif
|
|
216
|
+
}
|
|
170
217
|
|
|
171
218
|
/*-*******************************************************
|
|
172
219
|
* Shared internal functions
|
|
@@ -183,7 +230,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
|
183
230
|
* If yes, do nothing (continue on current segment).
|
|
184
231
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
|
185
232
|
* This function cannot fail. */
|
|
186
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
|
233
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
|
187
234
|
|
|
188
235
|
|
|
189
236
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
***************************************/
|
|
16
16
|
#define ZBUFF_STATIC_LINKING_ONLY
|
|
17
17
|
#include "zbuff.h"
|
|
18
|
+
#include "../common/error_private.h"
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
/*-***********************************************************
|
|
@@ -73,13 +74,32 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
|
|
|
73
74
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
|
74
75
|
{
|
|
75
76
|
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
|
|
76
|
-
|
|
77
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
|
|
78
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setPledgedSrcSize(zbc, pledgedSrcSize), "");
|
|
79
|
+
|
|
80
|
+
FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
|
|
81
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_windowLog, params.cParams.windowLog), "");
|
|
82
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_hashLog, params.cParams.hashLog), "");
|
|
83
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_chainLog, params.cParams.chainLog), "");
|
|
84
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_searchLog, params.cParams.searchLog), "");
|
|
85
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_minMatch, params.cParams.minMatch), "");
|
|
86
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_targetLength, params.cParams.targetLength), "");
|
|
87
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_strategy, params.cParams.strategy), "");
|
|
88
|
+
|
|
89
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_contentSizeFlag, params.fParams.contentSizeFlag), "");
|
|
90
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_checksumFlag, params.fParams.checksumFlag), "");
|
|
91
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_dictIDFlag, params.fParams.noDictIDFlag), "");
|
|
92
|
+
|
|
93
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
|
|
94
|
+
return 0;
|
|
77
95
|
}
|
|
78
96
|
|
|
79
|
-
|
|
80
97
|
size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
|
|
81
98
|
{
|
|
82
|
-
|
|
99
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
|
|
100
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_compressionLevel, compressionLevel), "");
|
|
101
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
|
|
102
|
+
return 0;
|
|
83
103
|
}
|
|
84
104
|
|
|
85
105
|
size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -26,47 +26,64 @@
|
|
|
26
26
|
#include <string.h> /* memset */
|
|
27
27
|
#include <time.h> /* clock */
|
|
28
28
|
|
|
29
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
30
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
31
|
+
#endif
|
|
32
|
+
|
|
29
33
|
#include "../common/mem.h" /* read */
|
|
30
34
|
#include "../common/pool.h"
|
|
31
35
|
#include "../common/threading.h"
|
|
32
|
-
#include "cover.h"
|
|
33
36
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
#endif
|
|
37
|
-
#include "zdict.h"
|
|
37
|
+
#include "../zdict.h"
|
|
38
|
+
#include "cover.h"
|
|
38
39
|
|
|
39
40
|
/*-*************************************
|
|
40
41
|
* Constants
|
|
41
42
|
***************************************/
|
|
43
|
+
/**
|
|
44
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
|
45
|
+
* on 64bit builds.
|
|
46
|
+
* For 32bit builds we choose 1 GB.
|
|
47
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
|
48
|
+
* contiguous buffer, so 1GB is already a high limit.
|
|
49
|
+
*/
|
|
42
50
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
|
43
|
-
#define
|
|
51
|
+
#define COVER_DEFAULT_SPLITPOINT 1.0
|
|
44
52
|
|
|
45
53
|
/*-*************************************
|
|
46
54
|
* Console display
|
|
47
55
|
***************************************/
|
|
48
|
-
|
|
56
|
+
#ifndef LOCALDISPLAYLEVEL
|
|
57
|
+
static int g_displayLevel = 0;
|
|
58
|
+
#endif
|
|
59
|
+
#undef DISPLAY
|
|
49
60
|
#define DISPLAY(...) \
|
|
50
61
|
{ \
|
|
51
62
|
fprintf(stderr, __VA_ARGS__); \
|
|
52
63
|
fflush(stderr); \
|
|
53
64
|
}
|
|
65
|
+
#undef LOCALDISPLAYLEVEL
|
|
54
66
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
|
55
67
|
if (displayLevel >= l) { \
|
|
56
68
|
DISPLAY(__VA_ARGS__); \
|
|
57
69
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
70
|
+
#undef DISPLAYLEVEL
|
|
58
71
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
|
59
72
|
|
|
73
|
+
#ifndef LOCALDISPLAYUPDATE
|
|
74
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
75
|
+
static clock_t g_time = 0;
|
|
76
|
+
#endif
|
|
77
|
+
#undef LOCALDISPLAYUPDATE
|
|
60
78
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
|
61
79
|
if (displayLevel >= l) { \
|
|
62
|
-
if ((clock() - g_time >
|
|
80
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
|
63
81
|
g_time = clock(); \
|
|
64
82
|
DISPLAY(__VA_ARGS__); \
|
|
65
83
|
} \
|
|
66
84
|
}
|
|
85
|
+
#undef DISPLAYUPDATE
|
|
67
86
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
|
68
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
69
|
-
static clock_t g_time = 0;
|
|
70
87
|
|
|
71
88
|
/*-*************************************
|
|
72
89
|
* Hash table
|
|
@@ -120,9 +137,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
|
|
|
120
137
|
/**
|
|
121
138
|
* Internal hash function
|
|
122
139
|
*/
|
|
123
|
-
static const U32
|
|
140
|
+
static const U32 COVER_prime4bytes = 2654435761U;
|
|
124
141
|
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
|
|
125
|
-
return (key *
|
|
142
|
+
return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
|
|
126
143
|
}
|
|
127
144
|
|
|
128
145
|
/**
|
|
@@ -215,7 +232,7 @@ typedef struct {
|
|
|
215
232
|
} COVER_ctx_t;
|
|
216
233
|
|
|
217
234
|
/* We need a global context for qsort... */
|
|
218
|
-
static COVER_ctx_t *
|
|
235
|
+
static COVER_ctx_t *g_coverCtx = NULL;
|
|
219
236
|
|
|
220
237
|
/*-*************************************
|
|
221
238
|
* Helper functions
|
|
@@ -258,11 +275,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
|
258
275
|
|
|
259
276
|
/**
|
|
260
277
|
* Same as COVER_cmp() except ties are broken by pointer value
|
|
261
|
-
* NOTE:
|
|
278
|
+
* NOTE: g_coverCtx must be set to call this function. A global is required because
|
|
262
279
|
* qsort doesn't take an opaque pointer.
|
|
263
280
|
*/
|
|
264
|
-
static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
265
|
-
int result = COVER_cmp(
|
|
281
|
+
static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
|
|
282
|
+
int result = COVER_cmp(g_coverCtx, lp, rp);
|
|
266
283
|
if (result == 0) {
|
|
267
284
|
result = lp < rp ? -1 : 1;
|
|
268
285
|
}
|
|
@@ -271,8 +288,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
|
271
288
|
/**
|
|
272
289
|
* Faster version for d <= 8.
|
|
273
290
|
*/
|
|
274
|
-
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
275
|
-
int result = COVER_cmp8(
|
|
291
|
+
static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
292
|
+
int result = COVER_cmp8(g_coverCtx, lp, rp);
|
|
276
293
|
if (result == 0) {
|
|
277
294
|
result = lp < rp ? -1 : 1;
|
|
278
295
|
}
|
|
@@ -603,7 +620,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
603
620
|
/* qsort doesn't take an opaque pointer, so pass as a global.
|
|
604
621
|
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
|
|
605
622
|
*/
|
|
606
|
-
|
|
623
|
+
g_coverCtx = ctx;
|
|
607
624
|
#if defined(__OpenBSD__)
|
|
608
625
|
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
|
609
626
|
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
|
@@ -725,7 +742,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
|
725
742
|
COVER_map_t activeDmers;
|
|
726
743
|
parameters.splitPoint = 1.0;
|
|
727
744
|
/* Initialize global data */
|
|
728
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
|
745
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
|
729
746
|
/* Checks */
|
|
730
747
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
|
731
748
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
|
@@ -946,7 +963,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
|
|
946
963
|
free(selection.dictContent);
|
|
947
964
|
}
|
|
948
965
|
|
|
949
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
966
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
|
950
967
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
|
951
968
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
|
952
969
|
|
|
@@ -954,8 +971,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
|
954
971
|
size_t largestCompressed = 0;
|
|
955
972
|
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
|
956
973
|
|
|
957
|
-
BYTE * largestDictbuffer = (BYTE *)malloc(
|
|
958
|
-
BYTE * candidateDictBuffer = (BYTE *)malloc(
|
|
974
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
|
|
975
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
|
|
959
976
|
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
|
960
977
|
|
|
961
978
|
if (!largestDictbuffer || !candidateDictBuffer) {
|
|
@@ -967,7 +984,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
|
967
984
|
/* Initial dictionary size and compressed size */
|
|
968
985
|
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
|
969
986
|
dictContentSize = ZDICT_finalizeDictionary(
|
|
970
|
-
largestDictbuffer,
|
|
987
|
+
largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
|
|
971
988
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
|
972
989
|
|
|
973
990
|
if (ZDICT_isError(dictContentSize)) {
|
|
@@ -1001,7 +1018,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
|
1001
1018
|
while (dictContentSize < largestDict) {
|
|
1002
1019
|
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
|
1003
1020
|
dictContentSize = ZDICT_finalizeDictionary(
|
|
1004
|
-
candidateDictBuffer,
|
|
1021
|
+
candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
|
|
1005
1022
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
|
1006
1023
|
|
|
1007
1024
|
if (ZDICT_isError(dictContentSize)) {
|
|
@@ -1053,18 +1070,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
|
1053
1070
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
|
1054
1071
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
|
1055
1072
|
*/
|
|
1056
|
-
static void COVER_tryParameters(void *opaque)
|
|
1073
|
+
static void COVER_tryParameters(void *opaque)
|
|
1074
|
+
{
|
|
1057
1075
|
/* Save parameters as local variables */
|
|
1058
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
|
1076
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
|
1059
1077
|
const COVER_ctx_t *const ctx = data->ctx;
|
|
1060
1078
|
const ZDICT_cover_params_t parameters = data->parameters;
|
|
1061
1079
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
1062
1080
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
1063
1081
|
/* Allocate space for hash table, dict, and freqs */
|
|
1064
1082
|
COVER_map_t activeDmers;
|
|
1065
|
-
BYTE
|
|
1083
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
|
1066
1084
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
1067
|
-
U32
|
|
1085
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
|
1068
1086
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
|
1069
1087
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
|
1070
1088
|
goto _cleanup;
|
|
@@ -1079,7 +1097,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
|
1079
1097
|
{
|
|
1080
1098
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
|
1081
1099
|
dictBufferCapacity, parameters);
|
|
1082
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
|
1100
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
|
1083
1101
|
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
|
1084
1102
|
totalCompressedSize);
|
|
1085
1103
|
|
|
@@ -1094,19 +1112,18 @@ _cleanup:
|
|
|
1094
1112
|
free(data);
|
|
1095
1113
|
COVER_map_destroy(&activeDmers);
|
|
1096
1114
|
COVER_dictSelectionFree(selection);
|
|
1097
|
-
|
|
1098
|
-
free(freqs);
|
|
1099
|
-
}
|
|
1115
|
+
free(freqs);
|
|
1100
1116
|
}
|
|
1101
1117
|
|
|
1102
1118
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1103
|
-
void
|
|
1104
|
-
const size_t
|
|
1105
|
-
ZDICT_cover_params_t
|
|
1119
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
|
1120
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
|
1121
|
+
ZDICT_cover_params_t* parameters)
|
|
1122
|
+
{
|
|
1106
1123
|
/* constants */
|
|
1107
1124
|
const unsigned nbThreads = parameters->nbThreads;
|
|
1108
1125
|
const double splitPoint =
|
|
1109
|
-
parameters->splitPoint <= 0.0 ?
|
|
1126
|
+
parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
|
1110
1127
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
|
1111
1128
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
|
1112
1129
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -8,6 +8,10 @@
|
|
|
8
8
|
* You may select, at your option, one of the above-listed licenses.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
12
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
13
|
+
#endif
|
|
14
|
+
|
|
11
15
|
#include <stdio.h> /* fprintf */
|
|
12
16
|
#include <stdlib.h> /* malloc, free, qsort */
|
|
13
17
|
#include <string.h> /* memset */
|
|
@@ -16,10 +20,7 @@
|
|
|
16
20
|
#include "../common/pool.h"
|
|
17
21
|
#include "../common/threading.h"
|
|
18
22
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
19
|
-
#
|
|
20
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
|
21
|
-
#endif
|
|
22
|
-
#include "zdict.h"
|
|
23
|
+
#include "../zdict.h"
|
|
23
24
|
|
|
24
25
|
/**
|
|
25
26
|
* COVER_best_t is used for two purposes:
|
|
@@ -152,6 +153,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
|
|
152
153
|
* smallest dictionary within a specified regression of the compressed size
|
|
153
154
|
* from the largest dictionary.
|
|
154
155
|
*/
|
|
155
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
156
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
|
156
157
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
|
157
158
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
|
@@ -1576,7 +1576,7 @@ note:
|
|
|
1576
1576
|
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
|
1577
1577
|
trsort(ISAb, SA, m, 1);
|
|
1578
1578
|
|
|
1579
|
-
/* Set the sorted order of
|
|
1579
|
+
/* Set the sorted order of type B* suffixes. */
|
|
1580
1580
|
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
|
1581
1581
|
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
|
1582
1582
|
if(0 <= i) {
|