zstd-ruby 0.1.2 → 1.1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,12 +36,11 @@
36
36
  #include <time.h> /* clock */
37
37
 
38
38
  #include "mem.h" /* read */
39
- #include "error_private.h"
40
39
  #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
41
40
  #define HUF_STATIC_LINKING_ONLY
42
- #include "huf.h"
41
+ #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
43
42
  #include "zstd_internal.h" /* includes zstd.h */
44
- #include "xxhash.h"
43
+ #include "xxhash.h" /* XXH64 */
45
44
  #include "divsufsort.h"
46
45
  #ifndef ZDICT_STATIC_LINKING_ONLY
47
46
  # define ZDICT_STATIC_LINKING_ONLY
@@ -61,7 +60,7 @@
61
60
  #define NOISELENGTH 32
62
61
 
63
62
  #define MINRATIO 4
64
- static const int g_compressionLevel_default = 5;
63
+ static const int g_compressionLevel_default = 6;
65
64
  static const U32 g_selectivity_default = 9;
66
65
  static const size_t g_provision_entropySize = 200;
67
66
  static const size_t g_min_fast_dictContent = 192;
@@ -307,13 +306,13 @@ static dictItem ZDICT_analyzePos(
307
306
  } while (length >=MINMATCHLENGTH);
308
307
 
309
308
  /* look backward */
310
- length = MINMATCHLENGTH;
311
- while ((length >= MINMATCHLENGTH) & (start > 0)) {
312
- length = ZDICT_count(b + pos, b + suffix[start - 1]);
313
- if (length >= LLIMIT) length = LLIMIT - 1;
314
- lengthList[length]++;
315
- if (length >= MINMATCHLENGTH) start--;
316
- }
309
+ length = MINMATCHLENGTH;
310
+ while ((length >= MINMATCHLENGTH) & (start > 0)) {
311
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
312
+ if (length >= LLIMIT) length = LLIMIT - 1;
313
+ lengthList[length]++;
314
+ if (length >= MINMATCHLENGTH) start--;
315
+ }
317
316
 
318
317
  /* largest useful length */
319
318
  memset(cumulLength, 0, sizeof(cumulLength));
@@ -570,7 +569,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
570
569
  if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
571
570
  }
572
571
  cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
573
- if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
572
+ if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
574
573
 
575
574
  if (cSize) { /* if == 0; block is not compressible */
576
575
  const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -825,6 +824,55 @@ _cleanup:
825
824
  }
826
825
 
827
826
 
827
+
828
+ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
829
+ const void* customDictContent, size_t dictContentSize,
830
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
831
+ ZDICT_params_t params)
832
+ {
833
+ size_t hSize;
834
+ #define HBUFFSIZE 256
835
+ BYTE header[HBUFFSIZE];
836
+ int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
837
+ U32 const notificationLevel = params.notificationLevel;
838
+
839
+ /* check conditions */
840
+ if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
841
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
842
+ if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
843
+
844
+ /* dictionary header */
845
+ MEM_writeLE32(header, ZSTD_DICT_MAGIC);
846
+ { U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
847
+ U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
848
+ U32 const dictID = params.dictID ? params.dictID : compliantID;
849
+ MEM_writeLE32(header+4, dictID);
850
+ }
851
+ hSize = 8;
852
+
853
+ /* entropy tables */
854
+ DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
855
+ DISPLAYLEVEL(2, "statistics ... \n");
856
+ { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
857
+ compressionLevel,
858
+ samplesBuffer, samplesSizes, nbSamples,
859
+ customDictContent, dictContentSize,
860
+ notificationLevel);
861
+ if (ZDICT_isError(eSize)) return eSize;
862
+ hSize += eSize;
863
+ }
864
+
865
+ /* copy elements in final buffer ; note : src and dst buffer can overlap */
866
+ if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
867
+ { size_t const dictSize = hSize + dictContentSize;
868
+ char* dictEnd = (char*)dictBuffer + dictSize;
869
+ memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
870
+ memcpy(dictBuffer, header, hSize);
871
+ return dictSize;
872
+ }
873
+ }
874
+
875
+
828
876
  size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
829
877
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
830
878
  ZDICT_params_t params)
@@ -19,15 +19,18 @@ extern "C" {
19
19
  #include <stddef.h> /* size_t */
20
20
 
21
21
 
22
- /*====== Export for Windows ======*/
23
- /*!
24
- * ZSTD_DLL_EXPORT :
25
- * Enable exporting of functions when building a Windows DLL
26
- */
27
- #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28
- # define ZDICTLIB_API __declspec(dllexport)
22
+ /* ===== ZDICTLIB_API : control library symbols visibility ===== */
23
+ #if defined(__GNUC__) && (__GNUC__ >= 4)
24
+ # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
25
+ #else
26
+ # define ZDICTLIB_VISIBILITY
27
+ #endif
28
+ #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
29
+ # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
30
+ #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
31
+ # define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
29
32
  #else
30
- # define ZDICTLIB_API
33
+ # define ZDICTLIB_API ZDICTLIB_VISIBILITY
31
34
  #endif
32
35
 
33
36
 
@@ -79,29 +82,116 @@ typedef struct {
79
82
  or an error code, which can be tested by ZDICT_isError().
80
83
  note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
81
84
  */
82
- size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
85
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
83
86
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
84
87
  ZDICT_params_t parameters);
85
88
 
89
+ /*! COVER_params_t :
90
+ For all values 0 means default.
91
+ kMin and d are the only required parameters.
92
+ */
93
+ typedef struct {
94
+ unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
95
+ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
96
+ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
97
+
98
+ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
99
+ unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
100
+ unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
101
+ int compressionLevel; /* 0 means default; target a specific zstd compression level */
102
+ } COVER_params_t;
86
103
 
87
- /*! ZDICT_addEntropyTablesFromBuffer() :
88
104
 
89
- Given a content-only dictionary (built using any 3rd party algorithm),
90
- add entropy tables computed from an array of samples.
105
+ /*! COVER_trainFromBuffer() :
106
+ Train a dictionary from an array of samples using the COVER algorithm.
107
+ Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
108
+ supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
109
+ The resulting dictionary will be saved into `dictBuffer`.
110
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
111
+ or an error code, which can be tested with ZDICT_isError().
112
+ Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
113
+ Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
114
+ It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
115
+ In general, it's recommended to provide a few thousands samples, but this can vary a lot.
116
+ It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
117
+ */
118
+ ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
119
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
120
+ COVER_params_t parameters);
121
+
122
+ /*! COVER_optimizeTrainFromBuffer() :
123
+ The same requirements as above hold for all the parameters except `parameters`.
124
+ This function tries many parameter combinations and picks the best parameters.
125
+ `*parameters` is filled with the best parameters found, and the dictionary
126
+ constructed with those parameters is stored in `dictBuffer`.
127
+
128
+ All of the parameters d, k, steps are optional.
129
+ If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
130
+ if steps is zero it defaults to its default value.
131
+ If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
132
+
133
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
134
+ or an error code, which can be tested with ZDICT_isError().
135
+ On success `*parameters` contains the parameters selected.
136
+ Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
137
+ */
138
+ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
139
+ const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
140
+ COVER_params_t *parameters);
141
+
142
+ /*! ZDICT_finalizeDictionary() :
143
+
144
+ Given a custom content as a basis for dictionary, and a set of samples,
145
+ finalize dictionary by adding headers and statistics.
146
+
91
147
  Samples must be stored concatenated in a flat buffer `samplesBuffer`,
92
148
  supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
93
149
 
94
- The input dictionary content must be stored *at the end* of `dictBuffer`.
95
- Its size is `dictContentSize`.
96
- The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
97
- starting from its beginning.
98
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
150
+ dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
151
+ maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
152
+
153
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154
+ or an error code, which can be tested by ZDICT_isError().
155
+ note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156
+ note 2 : dictBuffer and customDictContent can overlap
99
157
  */
100
- size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
101
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
158
+ #define ZDICT_CONTENTSIZE_MIN 256
159
+ #define ZDICT_DICTSIZE_MIN 512
160
+ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
161
+ const void* customDictContent, size_t dictContentSize,
162
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
163
+ ZDICT_params_t parameters);
102
164
 
103
165
 
104
166
 
167
+ /* Deprecation warnings */
168
+ /* It is generally possible to disable deprecation warnings from compiler,
169
+ for example with -Wno-deprecated-declarations for gcc
170
+ or _CRT_SECURE_NO_WARNINGS in Visual.
171
+ Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
172
+ #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
173
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
174
+ #else
175
+ # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
176
+ # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
177
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
178
+ # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
179
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
180
+ # elif (ZDICT_GCC_VERSION >= 301)
181
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
182
+ # elif defined(_MSC_VER)
183
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
184
+ # else
185
+ # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
186
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API
187
+ # endif
188
+ #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
189
+
190
+ ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
191
+ size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
192
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
193
+
194
+
105
195
  #endif /* ZDICT_STATIC_LINKING_ONLY */
106
196
 
107
197
  #if defined (__cplusplus)
@@ -100,6 +100,7 @@
100
100
  <GenerateDebugInformation>true</GenerateDebugInformation>
101
101
  <AdditionalLibraryDirectories>$(SolutionDir)..\dll;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
102
102
  <AdditionalDependencies>libzstd.lib;%(AdditionalDependencies)</AdditionalDependencies>
103
+ <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
103
104
  </Link>
104
105
  </ItemDefinitionGroup>
105
106
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@@ -141,6 +142,7 @@
141
142
  <OptimizeReferences>true</OptimizeReferences>
142
143
  <AdditionalLibraryDirectories>$(SolutionDir)..\dll;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
143
144
  <AdditionalDependencies>libzstd.lib;%(AdditionalDependencies)</AdditionalDependencies>
145
+ <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
144
146
  </Link>
145
147
  </ItemDefinitionGroup>
146
148
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@@ -3012,21 +3012,19 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
3012
3012
  /* Literal length */
3013
3013
  litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
3014
3014
  prevOffset = litLength ? seq->offset : seqState->prevOffset;
3015
- if (litLength == MaxLL)
3016
- {
3015
+ if (litLength == MaxLL) {
3017
3016
  U32 add = *dumps++;
3018
3017
  if (add < 255) litLength += add;
3019
- else
3020
- {
3021
- litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
3018
+ else {
3019
+ litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
3022
3020
  dumps += 3;
3023
3021
  }
3024
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3022
+ if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
3023
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3025
3024
  }
3026
3025
 
3027
3026
  /* Offset */
3028
- {
3029
- static const U32 offsetPrefix[MaxOff+1] = {
3027
+ { static const U32 offsetPrefix[MaxOff+1] = {
3030
3028
  1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
3031
3029
  512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
3032
3030
  524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
@@ -3043,16 +3041,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
3043
3041
 
3044
3042
  /* MatchLength */
3045
3043
  matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
3046
- if (matchLength == MaxML)
3047
- {
3044
+ if (matchLength == MaxML) {
3048
3045
  U32 add = *dumps++;
3049
3046
  if (add < 255) matchLength += add;
3050
- else
3051
- {
3052
- matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
3047
+ else {
3048
+ matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
3053
3049
  dumps += 3;
3054
3050
  }
3055
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3051
+ if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
3052
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3056
3053
  }
3057
3054
  matchLength += MINMATCH;
3058
3055
 
@@ -3116,8 +3113,7 @@ static size_t ZSTD_execSequence(BYTE* op,
3116
3113
  /* Requirement: op <= oend_8 */
3117
3114
 
3118
3115
  /* match within prefix */
3119
- if (sequence.offset < 8)
3120
- {
3116
+ if (sequence.offset < 8) {
3121
3117
  /* close range match, overlap */
3122
3118
  const int sub2 = dec64table[sequence.offset];
3123
3119
  op[0] = match[0];
@@ -3127,9 +3123,7 @@ static size_t ZSTD_execSequence(BYTE* op,
3127
3123
  match += dec32table[sequence.offset];
3128
3124
  ZSTD_copy4(op+4, match);
3129
3125
  match -= sub2;
3130
- }
3131
- else
3132
- {
3126
+ } else {
3133
3127
  ZSTD_copy8(op, match);
3134
3128
  }
3135
3129
  op += 8; match += 8;
@@ -3230,7 +3230,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3230
3230
  if (litLength&1) litLength>>=1, dumps += 3;
3231
3231
  else litLength = (U16)(litLength)>>1, dumps += 2;
3232
3232
  }
3233
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3233
+ if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
3234
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3234
3235
  }
3235
3236
 
3236
3237
  /* Offset */
@@ -3263,7 +3264,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3263
3264
  if (matchLength&1) matchLength>>=1, dumps += 3;
3264
3265
  else matchLength = (U16)(matchLength)>>1, dumps += 2;
3265
3266
  }
3266
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3267
+ if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
3268
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3267
3269
  }
3268
3270
  matchLength += MINMATCH;
3269
3271
 
@@ -4134,9 +4134,9 @@ static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, si
4134
4134
  }
4135
4135
 
4136
4136
  if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
4137
- dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
4138
- dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
4139
- dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
4137
+ dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
4138
+ dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
4139
+ dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
4140
4140
  dictPtr += 12;
4141
4141
 
4142
4142
  dctx->litEntropy = dctx->fseEntropy = 1;
@@ -20,13 +20,16 @@ extern "C" {
20
20
 
21
21
  /* ===== ZSTDLIB_API : control library symbols visibility ===== */
22
22
  #if defined(__GNUC__) && (__GNUC__ >= 4)
23
- # define ZSTDLIB_API __attribute__ ((visibility ("default")))
24
- #elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
25
- # define ZSTDLIB_API __declspec(dllexport)
23
+ # define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
24
+ #else
25
+ # define ZSTDLIB_VISIBILITY
26
+ #endif
27
+ #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28
+ # define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
26
29
  #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
27
- # define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
30
+ # define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
28
31
  #else
29
- # define ZSTDLIB_API
32
+ # define ZSTDLIB_API ZSTDLIB_VISIBILITY
30
33
  #endif
31
34
 
32
35
 
@@ -53,7 +56,7 @@ extern "C" {
53
56
  /*------ Version ------*/
54
57
  #define ZSTD_VERSION_MAJOR 1
55
58
  #define ZSTD_VERSION_MINOR 1
56
- #define ZSTD_VERSION_RELEASE 2
59
+ #define ZSTD_VERSION_RELEASE 3
57
60
 
58
61
  #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
59
62
  #define ZSTD_QUOTE(str) #str
@@ -170,8 +173,8 @@ typedef struct ZSTD_CDict_s ZSTD_CDict;
170
173
  * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
171
174
  * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
172
175
  * ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
173
- * `dict` can be released after ZSTD_CDict creation. */
174
- ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
176
+ * `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */
177
+ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
175
178
 
176
179
  /*! ZSTD_freeCDict() :
177
180
  * Function frees memory allocated by ZSTD_createCDict(). */
@@ -191,8 +194,8 @@ typedef struct ZSTD_DDict_s ZSTD_DDict;
191
194
 
192
195
  /*! ZSTD_createDDict() :
193
196
  * Create a digested dictionary, ready to start decompression operation without startup delay.
194
- * `dict` can be released after creation. */
195
- ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
197
+ * dictBuffer can be released after DDict creation, as its content is copied inside DDict */
198
+ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
196
199
 
197
200
  /*! ZSTD_freeDDict() :
198
201
  * Function frees memory allocated with ZSTD_createDDict() */
@@ -325,7 +328,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
325
328
  * ***************************************************************************************/
326
329
 
327
330
  /* --- Constants ---*/
328
- #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */
331
+ #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
329
332
  #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
330
333
 
331
334
  #define ZSTD_WINDOWLOG_MAX_32 25
@@ -345,8 +348,9 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
345
348
  #define ZSTD_TARGETLENGTH_MAX 999
346
349
 
347
350
  #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
351
+ #define ZSTD_FRAMEHEADERSIZE_MIN 6
348
352
  static const size_t ZSTD_frameHeaderSize_prefix = 5;
349
- static const size_t ZSTD_frameHeaderSize_min = 6;
353
+ static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
350
354
  static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
351
355
  static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
352
356
 
@@ -365,9 +369,9 @@ typedef struct {
365
369
  } ZSTD_compressionParameters;
366
370
 
367
371
  typedef struct {
368
- unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
369
- unsigned checksumFlag; /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
370
- unsigned noDictIDFlag; /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
372
+ unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
373
+ unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */
374
+ unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */
371
375
  } ZSTD_frameParameters;
372
376
 
373
377
  typedef struct {
@@ -397,9 +401,23 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
397
401
  * Gives the amount of memory used by a given ZSTD_CCtx */
398
402
  ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
399
403
 
404
+ typedef enum {
405
+ ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
406
+ } ZSTD_CCtxParameter;
407
+ /*! ZSTD_setCCtxParameter() :
408
+ * Set advanced parameters, selected through enum ZSTD_CCtxParameter
409
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()) */
410
+ ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
411
+
412
+ /*! ZSTD_createCDict_byReference() :
413
+ * Create a digested dictionary for compression
414
+ * Dictionary content is simply referenced, and therefore stays in dictBuffer.
415
+ * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
416
+ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
417
+
400
418
  /*! ZSTD_createCDict_advanced() :
401
419
  * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
402
- ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
420
+ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
403
421
  ZSTD_parameters params, ZSTD_customMem customMem);
404
422
 
405
423
  /*! ZSTD_sizeof_CDict() :
@@ -455,6 +473,15 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
455
473
  * Gives the amount of memory used by a given ZSTD_DCtx */
456
474
  ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
457
475
 
476
+ /*! ZSTD_createDDict_byReference() :
477
+ * Create a digested dictionary, ready to start decompression operation without startup delay.
478
+ * Dictionary content is simply referenced, and therefore stays in dictBuffer.
479
+ * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
480
+ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
481
+
482
+ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
483
+ unsigned byReference, ZSTD_customMem customMem);
484
+
458
485
  /*! ZSTD_sizeof_DDict() :
459
486
  * Gives the amount of memory used by a given ZSTD_DDict */
460
487
  ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
@@ -463,13 +490,13 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
463
490
  * Provides the dictID stored within dictionary.
464
491
  * if @return == 0, the dictionary is not conformant with Zstandard specification.
465
492
  * It can still be loaded, but as a content-only dictionary. */
466
- unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
493
+ ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
467
494
 
468
495
  /*! ZSTD_getDictID_fromDDict() :
469
496
  * Provides the dictID of the dictionary loaded into `ddict`.
470
497
  * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
471
498
  * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
472
- unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
499
+ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
473
500
 
474
501
  /*! ZSTD_getDictID_fromFrame() :
475
502
  * Provides the dictID required to decompressed the frame stored within `src`.
@@ -481,7 +508,7 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
481
508
  * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
482
509
  * - This is not a Zstandard frame.
483
510
  * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
484
- unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
511
+ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
485
512
 
486
513
 
487
514
  /********************************************************************
@@ -491,7 +518,7 @@ unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
491
518
  /*===== Advanced Streaming compression functions =====*/
492
519
  ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
493
520
  ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */
494
- ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
521
+ ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
495
522
  ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
496
523
  ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
497
524
  ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
@@ -500,9 +527,9 @@ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
500
527
 
501
528
 
502
529
  /*===== Advanced Streaming decompression functions =====*/
503
- typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
530
+ typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
504
531
  ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
505
- ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
532
+ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
506
533
  ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
507
534
  ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */
508
535
  ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
@@ -542,10 +569,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
542
569
  In which case, it will "discard" the relevant memory section from its history.
543
570
 
544
571
  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
545
- It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
546
- Without last block mark, frames will be considered unfinished (broken) by decoders.
572
+ It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
573
+ Without last block mark, frames will be considered unfinished (corrupted) by decoders.
547
574
 
548
- You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
575
+ `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
549
576
  */
550
577
 
551
578
  /*===== Buffer-less streaming compression functions =====*/
@@ -553,6 +580,7 @@ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
553
580
  ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
554
581
  ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
555
582
  ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
583
+ ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize);
556
584
  ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
557
585
  ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
558
586