zstd-ruby 0.1.2 → 1.1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +186 -0
- data/ext/zstdruby/libzstd/Makefile +58 -31
- data/ext/zstdruby/libzstd/common/mem.h +1 -1
- data/ext/zstdruby/libzstd/common/pool.c +194 -0
- data/ext/zstdruby/libzstd/common/pool.h +56 -0
- data/ext/zstdruby/libzstd/common/threading.c +79 -0
- data/ext/zstdruby/libzstd/common/threading.h +104 -0
- data/ext/zstdruby/libzstd/common/zstd_common.c +0 -4
- data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -2
- data/ext/zstdruby/libzstd/common/zstd_internal.h +9 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +94 -51
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +6 -6
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +740 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +78 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +39 -22
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +26 -0
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +1021 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +60 -12
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +109 -19
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +13 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +4 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -3
- data/ext/zstdruby/libzstd/zstd.h +53 -25
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +11 -2
@@ -36,12 +36,11 @@
|
|
36
36
|
#include <time.h> /* clock */
|
37
37
|
|
38
38
|
#include "mem.h" /* read */
|
39
|
-
#include "error_private.h"
|
40
39
|
#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
41
40
|
#define HUF_STATIC_LINKING_ONLY
|
42
|
-
#include "huf.h"
|
41
|
+
#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
43
42
|
#include "zstd_internal.h" /* includes zstd.h */
|
44
|
-
#include "xxhash.h"
|
43
|
+
#include "xxhash.h" /* XXH64 */
|
45
44
|
#include "divsufsort.h"
|
46
45
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
47
46
|
# define ZDICT_STATIC_LINKING_ONLY
|
@@ -61,7 +60,7 @@
|
|
61
60
|
#define NOISELENGTH 32
|
62
61
|
|
63
62
|
#define MINRATIO 4
|
64
|
-
static const int g_compressionLevel_default =
|
63
|
+
static const int g_compressionLevel_default = 6;
|
65
64
|
static const U32 g_selectivity_default = 9;
|
66
65
|
static const size_t g_provision_entropySize = 200;
|
67
66
|
static const size_t g_min_fast_dictContent = 192;
|
@@ -307,13 +306,13 @@ static dictItem ZDICT_analyzePos(
|
|
307
306
|
} while (length >=MINMATCHLENGTH);
|
308
307
|
|
309
308
|
/* look backward */
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
309
|
+
length = MINMATCHLENGTH;
|
310
|
+
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
311
|
+
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
312
|
+
if (length >= LLIMIT) length = LLIMIT - 1;
|
313
|
+
lengthList[length]++;
|
314
|
+
if (length >= MINMATCHLENGTH) start--;
|
315
|
+
}
|
317
316
|
|
318
317
|
/* largest useful length */
|
319
318
|
memset(cumulLength, 0, sizeof(cumulLength));
|
@@ -570,7 +569,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
570
569
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
|
571
570
|
}
|
572
571
|
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
|
573
|
-
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(
|
572
|
+
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
|
574
573
|
|
575
574
|
if (cSize) { /* if == 0; block is not compressible */
|
576
575
|
const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
@@ -825,6 +824,55 @@ _cleanup:
|
|
825
824
|
}
|
826
825
|
|
827
826
|
|
827
|
+
|
828
|
+
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
829
|
+
const void* customDictContent, size_t dictContentSize,
|
830
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
831
|
+
ZDICT_params_t params)
|
832
|
+
{
|
833
|
+
size_t hSize;
|
834
|
+
#define HBUFFSIZE 256
|
835
|
+
BYTE header[HBUFFSIZE];
|
836
|
+
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
|
837
|
+
U32 const notificationLevel = params.notificationLevel;
|
838
|
+
|
839
|
+
/* check conditions */
|
840
|
+
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
841
|
+
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
842
|
+
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
843
|
+
|
844
|
+
/* dictionary header */
|
845
|
+
MEM_writeLE32(header, ZSTD_DICT_MAGIC);
|
846
|
+
{ U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
|
847
|
+
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
848
|
+
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
849
|
+
MEM_writeLE32(header+4, dictID);
|
850
|
+
}
|
851
|
+
hSize = 8;
|
852
|
+
|
853
|
+
/* entropy tables */
|
854
|
+
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
855
|
+
DISPLAYLEVEL(2, "statistics ... \n");
|
856
|
+
{ size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
|
857
|
+
compressionLevel,
|
858
|
+
samplesBuffer, samplesSizes, nbSamples,
|
859
|
+
customDictContent, dictContentSize,
|
860
|
+
notificationLevel);
|
861
|
+
if (ZDICT_isError(eSize)) return eSize;
|
862
|
+
hSize += eSize;
|
863
|
+
}
|
864
|
+
|
865
|
+
/* copy elements in final buffer ; note : src and dst buffer can overlap */
|
866
|
+
if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
|
867
|
+
{ size_t const dictSize = hSize + dictContentSize;
|
868
|
+
char* dictEnd = (char*)dictBuffer + dictSize;
|
869
|
+
memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
|
870
|
+
memcpy(dictBuffer, header, hSize);
|
871
|
+
return dictSize;
|
872
|
+
}
|
873
|
+
}
|
874
|
+
|
875
|
+
|
828
876
|
size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
829
877
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
830
878
|
ZDICT_params_t params)
|
@@ -19,15 +19,18 @@ extern "C" {
|
|
19
19
|
#include <stddef.h> /* size_t */
|
20
20
|
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
#
|
28
|
-
#
|
22
|
+
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
23
|
+
#if defined(__GNUC__) && (__GNUC__ >= 4)
|
24
|
+
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
25
|
+
#else
|
26
|
+
# define ZDICTLIB_VISIBILITY
|
27
|
+
#endif
|
28
|
+
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
29
|
+
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
|
30
|
+
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
31
|
+
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
29
32
|
#else
|
30
|
-
# define ZDICTLIB_API
|
33
|
+
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
31
34
|
#endif
|
32
35
|
|
33
36
|
|
@@ -79,29 +82,116 @@ typedef struct {
|
|
79
82
|
or an error code, which can be tested by ZDICT_isError().
|
80
83
|
note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
|
81
84
|
*/
|
82
|
-
size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
85
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
83
86
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
84
87
|
ZDICT_params_t parameters);
|
85
88
|
|
89
|
+
/*! COVER_params_t :
|
90
|
+
For all values 0 means default.
|
91
|
+
kMin and d are the only required parameters.
|
92
|
+
*/
|
93
|
+
typedef struct {
|
94
|
+
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
95
|
+
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
96
|
+
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
97
|
+
|
98
|
+
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
99
|
+
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
100
|
+
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
101
|
+
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
102
|
+
} COVER_params_t;
|
86
103
|
|
87
|
-
/*! ZDICT_addEntropyTablesFromBuffer() :
|
88
104
|
|
89
|
-
|
90
|
-
|
105
|
+
/*! COVER_trainFromBuffer() :
|
106
|
+
Train a dictionary from an array of samples using the COVER algorithm.
|
107
|
+
Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
108
|
+
supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
109
|
+
The resulting dictionary will be saved into `dictBuffer`.
|
110
|
+
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
111
|
+
or an error code, which can be tested with ZDICT_isError().
|
112
|
+
Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
113
|
+
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
|
114
|
+
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
115
|
+
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
116
|
+
It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
117
|
+
*/
|
118
|
+
ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
119
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
120
|
+
COVER_params_t parameters);
|
121
|
+
|
122
|
+
/*! COVER_optimizeTrainFromBuffer() :
|
123
|
+
The same requirements as above hold for all the parameters except `parameters`.
|
124
|
+
This function tries many parameter combinations and picks the best parameters.
|
125
|
+
`*parameters` is filled with the best parameters found, and the dictionary
|
126
|
+
constructed with those parameters is stored in `dictBuffer`.
|
127
|
+
|
128
|
+
All of the parameters d, k, steps are optional.
|
129
|
+
If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
130
|
+
if steps is zero it defaults to its default value.
|
131
|
+
If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
132
|
+
|
133
|
+
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
134
|
+
or an error code, which can be tested with ZDICT_isError().
|
135
|
+
On success `*parameters` contains the parameters selected.
|
136
|
+
Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
137
|
+
*/
|
138
|
+
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
139
|
+
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
140
|
+
COVER_params_t *parameters);
|
141
|
+
|
142
|
+
/*! ZDICT_finalizeDictionary() :
|
143
|
+
|
144
|
+
Given a custom content as a basis for dictionary, and a set of samples,
|
145
|
+
finalize dictionary by adding headers and statistics.
|
146
|
+
|
91
147
|
Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
92
148
|
supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
93
149
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
150
|
+
dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
|
151
|
+
maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
|
152
|
+
|
153
|
+
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
154
|
+
or an error code, which can be tested by ZDICT_isError().
|
155
|
+
note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
156
|
+
note 2 : dictBuffer and customDictContent can overlap
|
99
157
|
*/
|
100
|
-
|
101
|
-
|
158
|
+
#define ZDICT_CONTENTSIZE_MIN 256
|
159
|
+
#define ZDICT_DICTSIZE_MIN 512
|
160
|
+
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
161
|
+
const void* customDictContent, size_t dictContentSize,
|
162
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
163
|
+
ZDICT_params_t parameters);
|
102
164
|
|
103
165
|
|
104
166
|
|
167
|
+
/* Deprecation warnings */
|
168
|
+
/* It is generally possible to disable deprecation warnings from compiler,
|
169
|
+
for example with -Wno-deprecated-declarations for gcc
|
170
|
+
or _CRT_SECURE_NO_WARNINGS in Visual.
|
171
|
+
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
172
|
+
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
|
173
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
|
174
|
+
#else
|
175
|
+
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
176
|
+
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
177
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
|
178
|
+
# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
|
179
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
180
|
+
# elif (ZDICT_GCC_VERSION >= 301)
|
181
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|
182
|
+
# elif defined(_MSC_VER)
|
183
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
|
184
|
+
# else
|
185
|
+
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
|
186
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API
|
187
|
+
# endif
|
188
|
+
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
189
|
+
|
190
|
+
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
|
191
|
+
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
192
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
193
|
+
|
194
|
+
|
105
195
|
#endif /* ZDICT_STATIC_LINKING_ONLY */
|
106
196
|
|
107
197
|
#if defined (__cplusplus)
|
@@ -100,6 +100,7 @@
|
|
100
100
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
101
101
|
<AdditionalLibraryDirectories>$(SolutionDir)..\dll;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
102
102
|
<AdditionalDependencies>libzstd.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
103
|
+
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
|
103
104
|
</Link>
|
104
105
|
</ItemDefinitionGroup>
|
105
106
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
@@ -141,6 +142,7 @@
|
|
141
142
|
<OptimizeReferences>true</OptimizeReferences>
|
142
143
|
<AdditionalLibraryDirectories>$(SolutionDir)..\dll;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
143
144
|
<AdditionalDependencies>libzstd.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
145
|
+
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
|
144
146
|
</Link>
|
145
147
|
</ItemDefinitionGroup>
|
146
148
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
@@ -3012,21 +3012,19 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
|
3012
3012
|
/* Literal length */
|
3013
3013
|
litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
|
3014
3014
|
prevOffset = litLength ? seq->offset : seqState->prevOffset;
|
3015
|
-
if (litLength == MaxLL)
|
3016
|
-
{
|
3015
|
+
if (litLength == MaxLL) {
|
3017
3016
|
U32 add = *dumps++;
|
3018
3017
|
if (add < 255) litLength += add;
|
3019
|
-
else
|
3020
|
-
|
3021
|
-
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
3018
|
+
else {
|
3019
|
+
litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
|
3022
3020
|
dumps += 3;
|
3023
3021
|
}
|
3024
|
-
if (dumps
|
3022
|
+
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
|
3023
|
+
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
3025
3024
|
}
|
3026
3025
|
|
3027
3026
|
/* Offset */
|
3028
|
-
{
|
3029
|
-
static const U32 offsetPrefix[MaxOff+1] = {
|
3027
|
+
{ static const U32 offsetPrefix[MaxOff+1] = {
|
3030
3028
|
1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
|
3031
3029
|
512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
|
3032
3030
|
524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
|
@@ -3043,16 +3041,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
|
3043
3041
|
|
3044
3042
|
/* MatchLength */
|
3045
3043
|
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
3046
|
-
if (matchLength == MaxML)
|
3047
|
-
{
|
3044
|
+
if (matchLength == MaxML) {
|
3048
3045
|
U32 add = *dumps++;
|
3049
3046
|
if (add < 255) matchLength += add;
|
3050
|
-
else
|
3051
|
-
|
3052
|
-
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
3047
|
+
else {
|
3048
|
+
matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
|
3053
3049
|
dumps += 3;
|
3054
3050
|
}
|
3055
|
-
if (dumps
|
3051
|
+
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
|
3052
|
+
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
3056
3053
|
}
|
3057
3054
|
matchLength += MINMATCH;
|
3058
3055
|
|
@@ -3116,8 +3113,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
|
3116
3113
|
/* Requirement: op <= oend_8 */
|
3117
3114
|
|
3118
3115
|
/* match within prefix */
|
3119
|
-
if (sequence.offset < 8)
|
3120
|
-
{
|
3116
|
+
if (sequence.offset < 8) {
|
3121
3117
|
/* close range match, overlap */
|
3122
3118
|
const int sub2 = dec64table[sequence.offset];
|
3123
3119
|
op[0] = match[0];
|
@@ -3127,9 +3123,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
|
3127
3123
|
match += dec32table[sequence.offset];
|
3128
3124
|
ZSTD_copy4(op+4, match);
|
3129
3125
|
match -= sub2;
|
3130
|
-
}
|
3131
|
-
else
|
3132
|
-
{
|
3126
|
+
} else {
|
3133
3127
|
ZSTD_copy8(op, match);
|
3134
3128
|
}
|
3135
3129
|
op += 8; match += 8;
|
@@ -3230,7 +3230,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
|
|
3230
3230
|
if (litLength&1) litLength>>=1, dumps += 3;
|
3231
3231
|
else litLength = (U16)(litLength)>>1, dumps += 2;
|
3232
3232
|
}
|
3233
|
-
if (dumps
|
3233
|
+
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
|
3234
|
+
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
3234
3235
|
}
|
3235
3236
|
|
3236
3237
|
/* Offset */
|
@@ -3263,7 +3264,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
|
|
3263
3264
|
if (matchLength&1) matchLength>>=1, dumps += 3;
|
3264
3265
|
else matchLength = (U16)(matchLength)>>1, dumps += 2;
|
3265
3266
|
}
|
3266
|
-
if (dumps
|
3267
|
+
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
|
3268
|
+
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
3267
3269
|
}
|
3268
3270
|
matchLength += MINMATCH;
|
3269
3271
|
|
@@ -4134,9 +4134,9 @@ static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, si
|
|
4134
4134
|
}
|
4135
4135
|
|
4136
4136
|
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
|
4137
|
-
dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
|
4138
|
-
dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
|
4139
|
-
dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
|
4137
|
+
dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
|
4138
|
+
dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
|
4139
|
+
dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
|
4140
4140
|
dictPtr += 12;
|
4141
4141
|
|
4142
4142
|
dctx->litEntropy = dctx->fseEntropy = 1;
|
data/ext/zstdruby/libzstd/zstd.h
CHANGED
@@ -20,13 +20,16 @@ extern "C" {
|
|
20
20
|
|
21
21
|
/* ===== ZSTDLIB_API : control library symbols visibility ===== */
|
22
22
|
#if defined(__GNUC__) && (__GNUC__ >= 4)
|
23
|
-
# define
|
24
|
-
#
|
25
|
-
# define
|
23
|
+
# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
24
|
+
#else
|
25
|
+
# define ZSTDLIB_VISIBILITY
|
26
|
+
#endif
|
27
|
+
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
28
|
+
# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
|
26
29
|
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
27
|
-
# define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
30
|
+
# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
28
31
|
#else
|
29
|
-
# define ZSTDLIB_API
|
32
|
+
# define ZSTDLIB_API ZSTDLIB_VISIBILITY
|
30
33
|
#endif
|
31
34
|
|
32
35
|
|
@@ -53,7 +56,7 @@ extern "C" {
|
|
53
56
|
/*------ Version ------*/
|
54
57
|
#define ZSTD_VERSION_MAJOR 1
|
55
58
|
#define ZSTD_VERSION_MINOR 1
|
56
|
-
#define ZSTD_VERSION_RELEASE
|
59
|
+
#define ZSTD_VERSION_RELEASE 3
|
57
60
|
|
58
61
|
#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
|
59
62
|
#define ZSTD_QUOTE(str) #str
|
@@ -170,8 +173,8 @@ typedef struct ZSTD_CDict_s ZSTD_CDict;
|
|
170
173
|
* When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
|
171
174
|
* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
|
172
175
|
* ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
|
173
|
-
* `
|
174
|
-
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void*
|
176
|
+
* `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */
|
177
|
+
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
|
175
178
|
|
176
179
|
/*! ZSTD_freeCDict() :
|
177
180
|
* Function frees memory allocated by ZSTD_createCDict(). */
|
@@ -191,8 +194,8 @@ typedef struct ZSTD_DDict_s ZSTD_DDict;
|
|
191
194
|
|
192
195
|
/*! ZSTD_createDDict() :
|
193
196
|
* Create a digested dictionary, ready to start decompression operation without startup delay.
|
194
|
-
*
|
195
|
-
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void*
|
197
|
+
* dictBuffer can be released after DDict creation, as its content is copied inside DDict */
|
198
|
+
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
|
196
199
|
|
197
200
|
/*! ZSTD_freeDDict() :
|
198
201
|
* Function frees memory allocated with ZSTD_createDDict() */
|
@@ -325,7 +328,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
|
|
325
328
|
* ***************************************************************************************/
|
326
329
|
|
327
330
|
/* --- Constants ---*/
|
328
|
-
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */
|
331
|
+
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
|
329
332
|
#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
|
330
333
|
|
331
334
|
#define ZSTD_WINDOWLOG_MAX_32 25
|
@@ -345,8 +348,9 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
|
|
345
348
|
#define ZSTD_TARGETLENGTH_MAX 999
|
346
349
|
|
347
350
|
#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
|
351
|
+
#define ZSTD_FRAMEHEADERSIZE_MIN 6
|
348
352
|
static const size_t ZSTD_frameHeaderSize_prefix = 5;
|
349
|
-
static const size_t ZSTD_frameHeaderSize_min =
|
353
|
+
static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
|
350
354
|
static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
|
351
355
|
static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
|
352
356
|
|
@@ -365,9 +369,9 @@ typedef struct {
|
|
365
369
|
} ZSTD_compressionParameters;
|
366
370
|
|
367
371
|
typedef struct {
|
368
|
-
unsigned contentSizeFlag; /**< 1: content size will be in frame header (
|
369
|
-
unsigned checksumFlag; /**< 1:
|
370
|
-
unsigned noDictIDFlag; /**< 1: no
|
372
|
+
unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
|
373
|
+
unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */
|
374
|
+
unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */
|
371
375
|
} ZSTD_frameParameters;
|
372
376
|
|
373
377
|
typedef struct {
|
@@ -397,9 +401,23 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
|
|
397
401
|
* Gives the amount of memory used by a given ZSTD_CCtx */
|
398
402
|
ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
|
399
403
|
|
404
|
+
typedef enum {
|
405
|
+
ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
|
406
|
+
} ZSTD_CCtxParameter;
|
407
|
+
/*! ZSTD_setCCtxParameter() :
|
408
|
+
* Set advanced parameters, selected through enum ZSTD_CCtxParameter
|
409
|
+
* @result : 0, or an error code (which can be tested with ZSTD_isError()) */
|
410
|
+
ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
|
411
|
+
|
412
|
+
/*! ZSTD_createCDict_byReference() :
|
413
|
+
* Create a digested dictionary for compression
|
414
|
+
* Dictionary content is simply referenced, and therefore stays in dictBuffer.
|
415
|
+
* It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
|
416
|
+
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
|
417
|
+
|
400
418
|
/*! ZSTD_createCDict_advanced() :
|
401
419
|
* Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
|
402
|
-
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
|
420
|
+
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
|
403
421
|
ZSTD_parameters params, ZSTD_customMem customMem);
|
404
422
|
|
405
423
|
/*! ZSTD_sizeof_CDict() :
|
@@ -455,6 +473,15 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
|
|
455
473
|
* Gives the amount of memory used by a given ZSTD_DCtx */
|
456
474
|
ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
|
457
475
|
|
476
|
+
/*! ZSTD_createDDict_byReference() :
|
477
|
+
* Create a digested dictionary, ready to start decompression operation without startup delay.
|
478
|
+
* Dictionary content is simply referenced, and therefore stays in dictBuffer.
|
479
|
+
* It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
|
480
|
+
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
|
481
|
+
|
482
|
+
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
483
|
+
unsigned byReference, ZSTD_customMem customMem);
|
484
|
+
|
458
485
|
/*! ZSTD_sizeof_DDict() :
|
459
486
|
* Gives the amount of memory used by a given ZSTD_DDict */
|
460
487
|
ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
@@ -463,13 +490,13 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
|
463
490
|
* Provides the dictID stored within dictionary.
|
464
491
|
* if @return == 0, the dictionary is not conformant with Zstandard specification.
|
465
492
|
* It can still be loaded, but as a content-only dictionary. */
|
466
|
-
unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
|
493
|
+
ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
|
467
494
|
|
468
495
|
/*! ZSTD_getDictID_fromDDict() :
|
469
496
|
* Provides the dictID of the dictionary loaded into `ddict`.
|
470
497
|
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
|
471
498
|
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
|
472
|
-
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
|
499
|
+
ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
|
473
500
|
|
474
501
|
/*! ZSTD_getDictID_fromFrame() :
|
475
502
|
* Provides the dictID required to decompressed the frame stored within `src`.
|
@@ -481,7 +508,7 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
|
|
481
508
|
* - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
|
482
509
|
* - This is not a Zstandard frame.
|
483
510
|
* When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
|
484
|
-
unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
|
511
|
+
ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
|
485
512
|
|
486
513
|
|
487
514
|
/********************************************************************
|
@@ -491,7 +518,7 @@ unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
|
|
491
518
|
/*===== Advanced Streaming compression functions =====*/
|
492
519
|
ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
|
493
520
|
ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */
|
494
|
-
ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
|
521
|
+
ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
|
495
522
|
ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
|
496
523
|
ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
|
497
524
|
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
|
@@ -500,9 +527,9 @@ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
|
|
500
527
|
|
501
528
|
|
502
529
|
/*===== Advanced Streaming decompression functions =====*/
|
503
|
-
typedef enum {
|
530
|
+
typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
|
504
531
|
ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
|
505
|
-
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
|
532
|
+
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
|
506
533
|
ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
|
507
534
|
ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */
|
508
535
|
ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
|
@@ -542,10 +569,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
|
|
542
569
|
In which case, it will "discard" the relevant memory section from its history.
|
543
570
|
|
544
571
|
Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
|
545
|
-
It's possible to use
|
546
|
-
Without last block mark, frames will be considered unfinished (
|
572
|
+
It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
|
573
|
+
Without last block mark, frames will be considered unfinished (corrupted) by decoders.
|
547
574
|
|
548
|
-
|
575
|
+
`ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
|
549
576
|
*/
|
550
577
|
|
551
578
|
/*===== Buffer-less streaming compression functions =====*/
|
@@ -553,6 +580,7 @@ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
|
|
553
580
|
ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
|
554
581
|
ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
|
555
582
|
ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
|
583
|
+
ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize);
|
556
584
|
ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
557
585
|
ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
558
586
|
|