zstd-ruby 0.1.2 → 1.1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +186 -0
- data/ext/zstdruby/libzstd/Makefile +58 -31
- data/ext/zstdruby/libzstd/common/mem.h +1 -1
- data/ext/zstdruby/libzstd/common/pool.c +194 -0
- data/ext/zstdruby/libzstd/common/pool.h +56 -0
- data/ext/zstdruby/libzstd/common/threading.c +79 -0
- data/ext/zstdruby/libzstd/common/threading.h +104 -0
- data/ext/zstdruby/libzstd/common/zstd_common.c +0 -4
- data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -2
- data/ext/zstdruby/libzstd/common/zstd_internal.h +9 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +94 -51
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +6 -6
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +740 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +78 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +39 -22
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +26 -0
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +1021 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +60 -12
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +109 -19
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +13 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +4 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -3
- data/ext/zstdruby/libzstd/zstd.h +53 -25
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +11 -2
@@ -36,12 +36,11 @@
|
|
36
36
|
#include <time.h> /* clock */
|
37
37
|
|
38
38
|
#include "mem.h" /* read */
|
39
|
-
#include "error_private.h"
|
40
39
|
#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
41
40
|
#define HUF_STATIC_LINKING_ONLY
|
42
|
-
#include "huf.h"
|
41
|
+
#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
43
42
|
#include "zstd_internal.h" /* includes zstd.h */
|
44
|
-
#include "xxhash.h"
|
43
|
+
#include "xxhash.h" /* XXH64 */
|
45
44
|
#include "divsufsort.h"
|
46
45
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
47
46
|
# define ZDICT_STATIC_LINKING_ONLY
|
@@ -61,7 +60,7 @@
|
|
61
60
|
#define NOISELENGTH 32
|
62
61
|
|
63
62
|
#define MINRATIO 4
|
64
|
-
static const int g_compressionLevel_default =
|
63
|
+
static const int g_compressionLevel_default = 6;
|
65
64
|
static const U32 g_selectivity_default = 9;
|
66
65
|
static const size_t g_provision_entropySize = 200;
|
67
66
|
static const size_t g_min_fast_dictContent = 192;
|
@@ -307,13 +306,13 @@ static dictItem ZDICT_analyzePos(
|
|
307
306
|
} while (length >=MINMATCHLENGTH);
|
308
307
|
|
309
308
|
/* look backward */
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
309
|
+
length = MINMATCHLENGTH;
|
310
|
+
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
311
|
+
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
312
|
+
if (length >= LLIMIT) length = LLIMIT - 1;
|
313
|
+
lengthList[length]++;
|
314
|
+
if (length >= MINMATCHLENGTH) start--;
|
315
|
+
}
|
317
316
|
|
318
317
|
/* largest useful length */
|
319
318
|
memset(cumulLength, 0, sizeof(cumulLength));
|
@@ -570,7 +569,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
570
569
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
|
571
570
|
}
|
572
571
|
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
|
573
|
-
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(
|
572
|
+
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
|
574
573
|
|
575
574
|
if (cSize) { /* if == 0; block is not compressible */
|
576
575
|
const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
@@ -825,6 +824,55 @@ _cleanup:
|
|
825
824
|
}
|
826
825
|
|
827
826
|
|
827
|
+
|
828
|
+
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
829
|
+
const void* customDictContent, size_t dictContentSize,
|
830
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
831
|
+
ZDICT_params_t params)
|
832
|
+
{
|
833
|
+
size_t hSize;
|
834
|
+
#define HBUFFSIZE 256
|
835
|
+
BYTE header[HBUFFSIZE];
|
836
|
+
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
|
837
|
+
U32 const notificationLevel = params.notificationLevel;
|
838
|
+
|
839
|
+
/* check conditions */
|
840
|
+
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
841
|
+
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
842
|
+
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
843
|
+
|
844
|
+
/* dictionary header */
|
845
|
+
MEM_writeLE32(header, ZSTD_DICT_MAGIC);
|
846
|
+
{ U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
|
847
|
+
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
848
|
+
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
849
|
+
MEM_writeLE32(header+4, dictID);
|
850
|
+
}
|
851
|
+
hSize = 8;
|
852
|
+
|
853
|
+
/* entropy tables */
|
854
|
+
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
855
|
+
DISPLAYLEVEL(2, "statistics ... \n");
|
856
|
+
{ size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
|
857
|
+
compressionLevel,
|
858
|
+
samplesBuffer, samplesSizes, nbSamples,
|
859
|
+
customDictContent, dictContentSize,
|
860
|
+
notificationLevel);
|
861
|
+
if (ZDICT_isError(eSize)) return eSize;
|
862
|
+
hSize += eSize;
|
863
|
+
}
|
864
|
+
|
865
|
+
/* copy elements in final buffer ; note : src and dst buffer can overlap */
|
866
|
+
if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
|
867
|
+
{ size_t const dictSize = hSize + dictContentSize;
|
868
|
+
char* dictEnd = (char*)dictBuffer + dictSize;
|
869
|
+
memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
|
870
|
+
memcpy(dictBuffer, header, hSize);
|
871
|
+
return dictSize;
|
872
|
+
}
|
873
|
+
}
|
874
|
+
|
875
|
+
|
828
876
|
size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
829
877
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
830
878
|
ZDICT_params_t params)
|
@@ -19,15 +19,18 @@ extern "C" {
|
|
19
19
|
#include <stddef.h> /* size_t */
|
20
20
|
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
#
|
28
|
-
#
|
22
|
+
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
23
|
+
#if defined(__GNUC__) && (__GNUC__ >= 4)
|
24
|
+
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
25
|
+
#else
|
26
|
+
# define ZDICTLIB_VISIBILITY
|
27
|
+
#endif
|
28
|
+
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
29
|
+
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
|
30
|
+
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
31
|
+
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
29
32
|
#else
|
30
|
-
# define ZDICTLIB_API
|
33
|
+
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
31
34
|
#endif
|
32
35
|
|
33
36
|
|
@@ -79,29 +82,116 @@ typedef struct {
|
|
79
82
|
or an error code, which can be tested by ZDICT_isError().
|
80
83
|
note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
|
81
84
|
*/
|
82
|
-
size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
85
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
83
86
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
84
87
|
ZDICT_params_t parameters);
|
85
88
|
|
89
|
+
/*! COVER_params_t :
|
90
|
+
For all values 0 means default.
|
91
|
+
kMin and d are the only required parameters.
|
92
|
+
*/
|
93
|
+
typedef struct {
|
94
|
+
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
95
|
+
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
96
|
+
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
97
|
+
|
98
|
+
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
99
|
+
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
100
|
+
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
101
|
+
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
102
|
+
} COVER_params_t;
|
86
103
|
|
87
|
-
/*! ZDICT_addEntropyTablesFromBuffer() :
|
88
104
|
|
89
|
-
|
90
|
-
|
105
|
+
/*! COVER_trainFromBuffer() :
|
106
|
+
Train a dictionary from an array of samples using the COVER algorithm.
|
107
|
+
Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
108
|
+
supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
109
|
+
The resulting dictionary will be saved into `dictBuffer`.
|
110
|
+
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
111
|
+
or an error code, which can be tested with ZDICT_isError().
|
112
|
+
Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
113
|
+
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
|
114
|
+
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
115
|
+
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
116
|
+
It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
117
|
+
*/
|
118
|
+
ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
119
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
120
|
+
COVER_params_t parameters);
|
121
|
+
|
122
|
+
/*! COVER_optimizeTrainFromBuffer() :
|
123
|
+
The same requirements as above hold for all the parameters except `parameters`.
|
124
|
+
This function tries many parameter combinations and picks the best parameters.
|
125
|
+
`*parameters` is filled with the best parameters found, and the dictionary
|
126
|
+
constructed with those parameters is stored in `dictBuffer`.
|
127
|
+
|
128
|
+
All of the parameters d, k, steps are optional.
|
129
|
+
If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
130
|
+
if steps is zero it defaults to its default value.
|
131
|
+
If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
132
|
+
|
133
|
+
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
134
|
+
or an error code, which can be tested with ZDICT_isError().
|
135
|
+
On success `*parameters` contains the parameters selected.
|
136
|
+
Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
137
|
+
*/
|
138
|
+
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
139
|
+
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
140
|
+
COVER_params_t *parameters);
|
141
|
+
|
142
|
+
/*! ZDICT_finalizeDictionary() :
|
143
|
+
|
144
|
+
Given a custom content as a basis for dictionary, and a set of samples,
|
145
|
+
finalize dictionary by adding headers and statistics.
|
146
|
+
|
91
147
|
Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
92
148
|
supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
93
149
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
150
|
+
dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
|
151
|
+
maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
|
152
|
+
|
153
|
+
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
154
|
+
or an error code, which can be tested by ZDICT_isError().
|
155
|
+
note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
156
|
+
note 2 : dictBuffer and customDictContent can overlap
|
99
157
|
*/
|
100
|
-
|
101
|
-
|
158
|
+
#define ZDICT_CONTENTSIZE_MIN 256
|
159
|
+
#define ZDICT_DICTSIZE_MIN 512
|
160
|
+
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
161
|
+
const void* customDictContent, size_t dictContentSize,
|
162
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
163
|
+
ZDICT_params_t parameters);
|
102
164
|
|
103
165
|
|
104
166
|
|
167
|
+
/* Deprecation warnings */
|
168
|
+
/* It is generally possible to disable deprecation warnings from compiler,
|
169
|
+
for example with -Wno-deprecated-declarations for gcc
|
170
|
+
or _CRT_SECURE_NO_WARNINGS in Visual.
|
171
|
+
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
172
|
+
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
|
173
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
|
174
|
+
#else
|
175
|
+
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
176
|
+
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
177
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
|
178
|
+
# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
|
179
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
180
|
+
# elif (ZDICT_GCC_VERSION >= 301)
|
181
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|
182
|
+
# elif defined(_MSC_VER)
|
183
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
|
184
|
+
# else
|
185
|
+
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
|
186
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API
|
187
|
+
# endif
|
188
|
+
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
189
|
+
|
190
|
+
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
|
191
|
+
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
192
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
193
|
+
|
194
|
+
|
105
195
|
#endif /* ZDICT_STATIC_LINKING_ONLY */
|
106
196
|
|
107
197
|
#if defined (__cplusplus)
|
@@ -100,6 +100,7 @@
|
|
100
100
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
101
101
|
<AdditionalLibraryDirectories>$(SolutionDir)..\dll;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
102
102
|
<AdditionalDependencies>libzstd.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
103
|
+
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
|
103
104
|
</Link>
|
104
105
|
</ItemDefinitionGroup>
|
105
106
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
@@ -141,6 +142,7 @@
|
|
141
142
|
<OptimizeReferences>true</OptimizeReferences>
|
142
143
|
<AdditionalLibraryDirectories>$(SolutionDir)..\dll;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
143
144
|
<AdditionalDependencies>libzstd.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
145
|
+
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
|
144
146
|
</Link>
|
145
147
|
</ItemDefinitionGroup>
|
146
148
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
@@ -3012,21 +3012,19 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
|
3012
3012
|
/* Literal length */
|
3013
3013
|
litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
|
3014
3014
|
prevOffset = litLength ? seq->offset : seqState->prevOffset;
|
3015
|
-
if (litLength == MaxLL)
|
3016
|
-
{
|
3015
|
+
if (litLength == MaxLL) {
|
3017
3016
|
U32 add = *dumps++;
|
3018
3017
|
if (add < 255) litLength += add;
|
3019
|
-
else
|
3020
|
-
|
3021
|
-
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
3018
|
+
else {
|
3019
|
+
litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
|
3022
3020
|
dumps += 3;
|
3023
3021
|
}
|
3024
|
-
if (dumps
|
3022
|
+
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
|
3023
|
+
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
3025
3024
|
}
|
3026
3025
|
|
3027
3026
|
/* Offset */
|
3028
|
-
{
|
3029
|
-
static const U32 offsetPrefix[MaxOff+1] = {
|
3027
|
+
{ static const U32 offsetPrefix[MaxOff+1] = {
|
3030
3028
|
1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
|
3031
3029
|
512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
|
3032
3030
|
524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
|
@@ -3043,16 +3041,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
|
3043
3041
|
|
3044
3042
|
/* MatchLength */
|
3045
3043
|
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
3046
|
-
if (matchLength == MaxML)
|
3047
|
-
{
|
3044
|
+
if (matchLength == MaxML) {
|
3048
3045
|
U32 add = *dumps++;
|
3049
3046
|
if (add < 255) matchLength += add;
|
3050
|
-
else
|
3051
|
-
|
3052
|
-
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
3047
|
+
else {
|
3048
|
+
matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
|
3053
3049
|
dumps += 3;
|
3054
3050
|
}
|
3055
|
-
if (dumps
|
3051
|
+
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
|
3052
|
+
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
3056
3053
|
}
|
3057
3054
|
matchLength += MINMATCH;
|
3058
3055
|
|
@@ -3116,8 +3113,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
|
3116
3113
|
/* Requirement: op <= oend_8 */
|
3117
3114
|
|
3118
3115
|
/* match within prefix */
|
3119
|
-
if (sequence.offset < 8)
|
3120
|
-
{
|
3116
|
+
if (sequence.offset < 8) {
|
3121
3117
|
/* close range match, overlap */
|
3122
3118
|
const int sub2 = dec64table[sequence.offset];
|
3123
3119
|
op[0] = match[0];
|
@@ -3127,9 +3123,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
|
3127
3123
|
match += dec32table[sequence.offset];
|
3128
3124
|
ZSTD_copy4(op+4, match);
|
3129
3125
|
match -= sub2;
|
3130
|
-
}
|
3131
|
-
else
|
3132
|
-
{
|
3126
|
+
} else {
|
3133
3127
|
ZSTD_copy8(op, match);
|
3134
3128
|
}
|
3135
3129
|
op += 8; match += 8;
|
@@ -3230,7 +3230,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
|
|
3230
3230
|
if (litLength&1) litLength>>=1, dumps += 3;
|
3231
3231
|
else litLength = (U16)(litLength)>>1, dumps += 2;
|
3232
3232
|
}
|
3233
|
-
if (dumps
|
3233
|
+
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
|
3234
|
+
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
3234
3235
|
}
|
3235
3236
|
|
3236
3237
|
/* Offset */
|
@@ -3263,7 +3264,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
|
|
3263
3264
|
if (matchLength&1) matchLength>>=1, dumps += 3;
|
3264
3265
|
else matchLength = (U16)(matchLength)>>1, dumps += 2;
|
3265
3266
|
}
|
3266
|
-
if (dumps
|
3267
|
+
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
|
3268
|
+
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
3267
3269
|
}
|
3268
3270
|
matchLength += MINMATCH;
|
3269
3271
|
|
@@ -4134,9 +4134,9 @@ static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, si
|
|
4134
4134
|
}
|
4135
4135
|
|
4136
4136
|
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
|
4137
|
-
dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
|
4138
|
-
dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
|
4139
|
-
dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
|
4137
|
+
dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
|
4138
|
+
dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
|
4139
|
+
dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
|
4140
4140
|
dictPtr += 12;
|
4141
4141
|
|
4142
4142
|
dctx->litEntropy = dctx->fseEntropy = 1;
|
data/ext/zstdruby/libzstd/zstd.h
CHANGED
@@ -20,13 +20,16 @@ extern "C" {
|
|
20
20
|
|
21
21
|
/* ===== ZSTDLIB_API : control library symbols visibility ===== */
|
22
22
|
#if defined(__GNUC__) && (__GNUC__ >= 4)
|
23
|
-
# define
|
24
|
-
#
|
25
|
-
# define
|
23
|
+
# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
24
|
+
#else
|
25
|
+
# define ZSTDLIB_VISIBILITY
|
26
|
+
#endif
|
27
|
+
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
28
|
+
# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
|
26
29
|
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
27
|
-
# define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
30
|
+
# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
28
31
|
#else
|
29
|
-
# define ZSTDLIB_API
|
32
|
+
# define ZSTDLIB_API ZSTDLIB_VISIBILITY
|
30
33
|
#endif
|
31
34
|
|
32
35
|
|
@@ -53,7 +56,7 @@ extern "C" {
|
|
53
56
|
/*------ Version ------*/
|
54
57
|
#define ZSTD_VERSION_MAJOR 1
|
55
58
|
#define ZSTD_VERSION_MINOR 1
|
56
|
-
#define ZSTD_VERSION_RELEASE
|
59
|
+
#define ZSTD_VERSION_RELEASE 3
|
57
60
|
|
58
61
|
#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
|
59
62
|
#define ZSTD_QUOTE(str) #str
|
@@ -170,8 +173,8 @@ typedef struct ZSTD_CDict_s ZSTD_CDict;
|
|
170
173
|
* When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
|
171
174
|
* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
|
172
175
|
* ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
|
173
|
-
* `
|
174
|
-
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void*
|
176
|
+
* `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */
|
177
|
+
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
|
175
178
|
|
176
179
|
/*! ZSTD_freeCDict() :
|
177
180
|
* Function frees memory allocated by ZSTD_createCDict(). */
|
@@ -191,8 +194,8 @@ typedef struct ZSTD_DDict_s ZSTD_DDict;
|
|
191
194
|
|
192
195
|
/*! ZSTD_createDDict() :
|
193
196
|
* Create a digested dictionary, ready to start decompression operation without startup delay.
|
194
|
-
*
|
195
|
-
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void*
|
197
|
+
* dictBuffer can be released after DDict creation, as its content is copied inside DDict */
|
198
|
+
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
|
196
199
|
|
197
200
|
/*! ZSTD_freeDDict() :
|
198
201
|
* Function frees memory allocated with ZSTD_createDDict() */
|
@@ -325,7 +328,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
|
|
325
328
|
* ***************************************************************************************/
|
326
329
|
|
327
330
|
/* --- Constants ---*/
|
328
|
-
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */
|
331
|
+
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
|
329
332
|
#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
|
330
333
|
|
331
334
|
#define ZSTD_WINDOWLOG_MAX_32 25
|
@@ -345,8 +348,9 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
|
|
345
348
|
#define ZSTD_TARGETLENGTH_MAX 999
|
346
349
|
|
347
350
|
#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
|
351
|
+
#define ZSTD_FRAMEHEADERSIZE_MIN 6
|
348
352
|
static const size_t ZSTD_frameHeaderSize_prefix = 5;
|
349
|
-
static const size_t ZSTD_frameHeaderSize_min =
|
353
|
+
static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
|
350
354
|
static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
|
351
355
|
static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
|
352
356
|
|
@@ -365,9 +369,9 @@ typedef struct {
|
|
365
369
|
} ZSTD_compressionParameters;
|
366
370
|
|
367
371
|
typedef struct {
|
368
|
-
unsigned contentSizeFlag; /**< 1: content size will be in frame header (
|
369
|
-
unsigned checksumFlag; /**< 1:
|
370
|
-
unsigned noDictIDFlag; /**< 1: no
|
372
|
+
unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
|
373
|
+
unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */
|
374
|
+
unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */
|
371
375
|
} ZSTD_frameParameters;
|
372
376
|
|
373
377
|
typedef struct {
|
@@ -397,9 +401,23 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
|
|
397
401
|
* Gives the amount of memory used by a given ZSTD_CCtx */
|
398
402
|
ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
|
399
403
|
|
404
|
+
typedef enum {
|
405
|
+
ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
|
406
|
+
} ZSTD_CCtxParameter;
|
407
|
+
/*! ZSTD_setCCtxParameter() :
|
408
|
+
* Set advanced parameters, selected through enum ZSTD_CCtxParameter
|
409
|
+
* @result : 0, or an error code (which can be tested with ZSTD_isError()) */
|
410
|
+
ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
|
411
|
+
|
412
|
+
/*! ZSTD_createCDict_byReference() :
|
413
|
+
* Create a digested dictionary for compression
|
414
|
+
* Dictionary content is simply referenced, and therefore stays in dictBuffer.
|
415
|
+
* It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
|
416
|
+
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
|
417
|
+
|
400
418
|
/*! ZSTD_createCDict_advanced() :
|
401
419
|
* Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
|
402
|
-
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
|
420
|
+
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
|
403
421
|
ZSTD_parameters params, ZSTD_customMem customMem);
|
404
422
|
|
405
423
|
/*! ZSTD_sizeof_CDict() :
|
@@ -455,6 +473,15 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
|
|
455
473
|
* Gives the amount of memory used by a given ZSTD_DCtx */
|
456
474
|
ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
|
457
475
|
|
476
|
+
/*! ZSTD_createDDict_byReference() :
|
477
|
+
* Create a digested dictionary, ready to start decompression operation without startup delay.
|
478
|
+
* Dictionary content is simply referenced, and therefore stays in dictBuffer.
|
479
|
+
* It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
|
480
|
+
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
|
481
|
+
|
482
|
+
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
483
|
+
unsigned byReference, ZSTD_customMem customMem);
|
484
|
+
|
458
485
|
/*! ZSTD_sizeof_DDict() :
|
459
486
|
* Gives the amount of memory used by a given ZSTD_DDict */
|
460
487
|
ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
@@ -463,13 +490,13 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
|
463
490
|
* Provides the dictID stored within dictionary.
|
464
491
|
* if @return == 0, the dictionary is not conformant with Zstandard specification.
|
465
492
|
* It can still be loaded, but as a content-only dictionary. */
|
466
|
-
unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
|
493
|
+
ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
|
467
494
|
|
468
495
|
/*! ZSTD_getDictID_fromDDict() :
|
469
496
|
* Provides the dictID of the dictionary loaded into `ddict`.
|
470
497
|
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
|
471
498
|
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
|
472
|
-
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
|
499
|
+
ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
|
473
500
|
|
474
501
|
/*! ZSTD_getDictID_fromFrame() :
|
475
502
|
* Provides the dictID required to decompressed the frame stored within `src`.
|
@@ -481,7 +508,7 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
|
|
481
508
|
* - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
|
482
509
|
* - This is not a Zstandard frame.
|
483
510
|
* When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
|
484
|
-
unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
|
511
|
+
ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
|
485
512
|
|
486
513
|
|
487
514
|
/********************************************************************
|
@@ -491,7 +518,7 @@ unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
|
|
491
518
|
/*===== Advanced Streaming compression functions =====*/
|
492
519
|
ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
|
493
520
|
ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */
|
494
|
-
ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
|
521
|
+
ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
|
495
522
|
ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
|
496
523
|
ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
|
497
524
|
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
|
@@ -500,9 +527,9 @@ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
|
|
500
527
|
|
501
528
|
|
502
529
|
/*===== Advanced Streaming decompression functions =====*/
|
503
|
-
typedef enum {
|
530
|
+
typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
|
504
531
|
ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
|
505
|
-
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
|
532
|
+
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
|
506
533
|
ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
|
507
534
|
ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */
|
508
535
|
ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
|
@@ -542,10 +569,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
|
|
542
569
|
In which case, it will "discard" the relevant memory section from its history.
|
543
570
|
|
544
571
|
Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
|
545
|
-
It's possible to use
|
546
|
-
Without last block mark, frames will be considered unfinished (
|
572
|
+
It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
|
573
|
+
Without last block mark, frames will be considered unfinished (corrupted) by decoders.
|
547
574
|
|
548
|
-
|
575
|
+
`ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
|
549
576
|
*/
|
550
577
|
|
551
578
|
/*===== Buffer-less streaming compression functions =====*/
|
@@ -553,6 +580,7 @@ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
|
|
553
580
|
ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
|
554
581
|
ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
|
555
582
|
ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
|
583
|
+
ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize);
|
556
584
|
ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
557
585
|
ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
558
586
|
|