zstd-ruby 0.1.2 → 1.1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -36,12 +36,11 @@
36
36
  #include <time.h> /* clock */
37
37
 
38
38
  #include "mem.h" /* read */
39
- #include "error_private.h"
40
39
  #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
41
40
  #define HUF_STATIC_LINKING_ONLY
42
- #include "huf.h"
41
+ #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
43
42
  #include "zstd_internal.h" /* includes zstd.h */
44
- #include "xxhash.h"
43
+ #include "xxhash.h" /* XXH64 */
45
44
  #include "divsufsort.h"
46
45
  #ifndef ZDICT_STATIC_LINKING_ONLY
47
46
  # define ZDICT_STATIC_LINKING_ONLY
@@ -61,7 +60,7 @@
61
60
  #define NOISELENGTH 32
62
61
 
63
62
  #define MINRATIO 4
64
- static const int g_compressionLevel_default = 5;
63
+ static const int g_compressionLevel_default = 6;
65
64
  static const U32 g_selectivity_default = 9;
66
65
  static const size_t g_provision_entropySize = 200;
67
66
  static const size_t g_min_fast_dictContent = 192;
@@ -307,13 +306,13 @@ static dictItem ZDICT_analyzePos(
307
306
  } while (length >=MINMATCHLENGTH);
308
307
 
309
308
  /* look backward */
310
- length = MINMATCHLENGTH;
311
- while ((length >= MINMATCHLENGTH) & (start > 0)) {
312
- length = ZDICT_count(b + pos, b + suffix[start - 1]);
313
- if (length >= LLIMIT) length = LLIMIT - 1;
314
- lengthList[length]++;
315
- if (length >= MINMATCHLENGTH) start--;
316
- }
309
+ length = MINMATCHLENGTH;
310
+ while ((length >= MINMATCHLENGTH) & (start > 0)) {
311
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
312
+ if (length >= LLIMIT) length = LLIMIT - 1;
313
+ lengthList[length]++;
314
+ if (length >= MINMATCHLENGTH) start--;
315
+ }
317
316
 
318
317
  /* largest useful length */
319
318
  memset(cumulLength, 0, sizeof(cumulLength));
@@ -570,7 +569,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
570
569
  if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
571
570
  }
572
571
  cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
573
- if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
572
+ if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
574
573
 
575
574
  if (cSize) { /* if == 0; block is not compressible */
576
575
  const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -825,6 +824,55 @@ _cleanup:
825
824
  }
826
825
 
827
826
 
827
+
828
+ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
829
+ const void* customDictContent, size_t dictContentSize,
830
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
831
+ ZDICT_params_t params)
832
+ {
833
+ size_t hSize;
834
+ #define HBUFFSIZE 256
835
+ BYTE header[HBUFFSIZE];
836
+ int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
837
+ U32 const notificationLevel = params.notificationLevel;
838
+
839
+ /* check conditions */
840
+ if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
841
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
842
+ if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
843
+
844
+ /* dictionary header */
845
+ MEM_writeLE32(header, ZSTD_DICT_MAGIC);
846
+ { U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
847
+ U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
848
+ U32 const dictID = params.dictID ? params.dictID : compliantID;
849
+ MEM_writeLE32(header+4, dictID);
850
+ }
851
+ hSize = 8;
852
+
853
+ /* entropy tables */
854
+ DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
855
+ DISPLAYLEVEL(2, "statistics ... \n");
856
+ { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
857
+ compressionLevel,
858
+ samplesBuffer, samplesSizes, nbSamples,
859
+ customDictContent, dictContentSize,
860
+ notificationLevel);
861
+ if (ZDICT_isError(eSize)) return eSize;
862
+ hSize += eSize;
863
+ }
864
+
865
+ /* copy elements in final buffer ; note : src and dst buffer can overlap */
866
+ if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
867
+ { size_t const dictSize = hSize + dictContentSize;
868
+ char* dictEnd = (char*)dictBuffer + dictSize;
869
+ memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
870
+ memcpy(dictBuffer, header, hSize);
871
+ return dictSize;
872
+ }
873
+ }
874
+
875
+
828
876
  size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
829
877
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
830
878
  ZDICT_params_t params)
@@ -19,15 +19,18 @@ extern "C" {
19
19
  #include <stddef.h> /* size_t */
20
20
 
21
21
 
22
- /*====== Export for Windows ======*/
23
- /*!
24
- * ZSTD_DLL_EXPORT :
25
- * Enable exporting of functions when building a Windows DLL
26
- */
27
- #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28
- # define ZDICTLIB_API __declspec(dllexport)
22
+ /* ===== ZDICTLIB_API : control library symbols visibility ===== */
23
+ #if defined(__GNUC__) && (__GNUC__ >= 4)
24
+ # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
25
+ #else
26
+ # define ZDICTLIB_VISIBILITY
27
+ #endif
28
+ #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
29
+ # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
30
+ #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
31
+ # define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
29
32
  #else
30
- # define ZDICTLIB_API
33
+ # define ZDICTLIB_API ZDICTLIB_VISIBILITY
31
34
  #endif
32
35
 
33
36
 
@@ -79,29 +82,116 @@ typedef struct {
79
82
  or an error code, which can be tested by ZDICT_isError().
80
83
  note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
81
84
  */
82
- size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
85
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
83
86
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
84
87
  ZDICT_params_t parameters);
85
88
 
89
+ /*! COVER_params_t :
90
+ For all values 0 means default.
91
+ kMin and d are the only required parameters.
92
+ */
93
+ typedef struct {
94
+ unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
95
+ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
96
+ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
97
+
98
+ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
99
+ unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
100
+ unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
101
+ int compressionLevel; /* 0 means default; target a specific zstd compression level */
102
+ } COVER_params_t;
86
103
 
87
- /*! ZDICT_addEntropyTablesFromBuffer() :
88
104
 
89
- Given a content-only dictionary (built using any 3rd party algorithm),
90
- add entropy tables computed from an array of samples.
105
+ /*! COVER_trainFromBuffer() :
106
+ Train a dictionary from an array of samples using the COVER algorithm.
107
+ Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
108
+ supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
109
+ The resulting dictionary will be saved into `dictBuffer`.
110
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
111
+ or an error code, which can be tested with ZDICT_isError().
112
+ Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
113
+ Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
114
+ It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
115
+ In general, it's recommended to provide a few thousands samples, but this can vary a lot.
116
+ It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
117
+ */
118
+ ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
119
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
120
+ COVER_params_t parameters);
121
+
122
+ /*! COVER_optimizeTrainFromBuffer() :
123
+ The same requirements as above hold for all the parameters except `parameters`.
124
+ This function tries many parameter combinations and picks the best parameters.
125
+ `*parameters` is filled with the best parameters found, and the dictionary
126
+ constructed with those parameters is stored in `dictBuffer`.
127
+
128
+ All of the parameters d, k, steps are optional.
129
+ If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
130
+ if steps is zero it defaults to its default value.
131
+ If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
132
+
133
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
134
+ or an error code, which can be tested with ZDICT_isError().
135
+ On success `*parameters` contains the parameters selected.
136
+ Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
137
+ */
138
+ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
139
+ const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
140
+ COVER_params_t *parameters);
141
+
142
+ /*! ZDICT_finalizeDictionary() :
143
+
144
+ Given a custom content as a basis for dictionary, and a set of samples,
145
+ finalize dictionary by adding headers and statistics.
146
+
91
147
  Samples must be stored concatenated in a flat buffer `samplesBuffer`,
92
148
  supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
93
149
 
94
- The input dictionary content must be stored *at the end* of `dictBuffer`.
95
- Its size is `dictContentSize`.
96
- The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
97
- starting from its beginning.
98
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
150
+ dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
151
+ maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
152
+
153
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154
+ or an error code, which can be tested by ZDICT_isError().
155
+ note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156
+ note 2 : dictBuffer and customDictContent can overlap
99
157
  */
100
- size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
101
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
158
+ #define ZDICT_CONTENTSIZE_MIN 256
159
+ #define ZDICT_DICTSIZE_MIN 512
160
+ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
161
+ const void* customDictContent, size_t dictContentSize,
162
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
163
+ ZDICT_params_t parameters);
102
164
 
103
165
 
104
166
 
167
+ /* Deprecation warnings */
168
+ /* It is generally possible to disable deprecation warnings from compiler,
169
+ for example with -Wno-deprecated-declarations for gcc
170
+ or _CRT_SECURE_NO_WARNINGS in Visual.
171
+ Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
172
+ #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
173
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
174
+ #else
175
+ # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
176
+ # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
177
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
178
+ # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
179
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
180
+ # elif (ZDICT_GCC_VERSION >= 301)
181
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
182
+ # elif defined(_MSC_VER)
183
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
184
+ # else
185
+ # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
186
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API
187
+ # endif
188
+ #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
189
+
190
+ ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
191
+ size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
192
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
193
+
194
+
105
195
  #endif /* ZDICT_STATIC_LINKING_ONLY */
106
196
 
107
197
  #if defined (__cplusplus)
@@ -100,6 +100,7 @@
100
100
  <GenerateDebugInformation>true</GenerateDebugInformation>
101
101
  <AdditionalLibraryDirectories>$(SolutionDir)..\dll;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
102
102
  <AdditionalDependencies>libzstd.lib;%(AdditionalDependencies)</AdditionalDependencies>
103
+ <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
103
104
  </Link>
104
105
  </ItemDefinitionGroup>
105
106
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@@ -141,6 +142,7 @@
141
142
  <OptimizeReferences>true</OptimizeReferences>
142
143
  <AdditionalLibraryDirectories>$(SolutionDir)..\dll;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
143
144
  <AdditionalDependencies>libzstd.lib;%(AdditionalDependencies)</AdditionalDependencies>
145
+ <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
144
146
  </Link>
145
147
  </ItemDefinitionGroup>
146
148
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@@ -3012,21 +3012,19 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
3012
3012
  /* Literal length */
3013
3013
  litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
3014
3014
  prevOffset = litLength ? seq->offset : seqState->prevOffset;
3015
- if (litLength == MaxLL)
3016
- {
3015
+ if (litLength == MaxLL) {
3017
3016
  U32 add = *dumps++;
3018
3017
  if (add < 255) litLength += add;
3019
- else
3020
- {
3021
- litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
3018
+ else {
3019
+ litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
3022
3020
  dumps += 3;
3023
3021
  }
3024
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3022
+ if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
3023
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3025
3024
  }
3026
3025
 
3027
3026
  /* Offset */
3028
- {
3029
- static const U32 offsetPrefix[MaxOff+1] = {
3027
+ { static const U32 offsetPrefix[MaxOff+1] = {
3030
3028
  1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
3031
3029
  512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
3032
3030
  524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
@@ -3043,16 +3041,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
3043
3041
 
3044
3042
  /* MatchLength */
3045
3043
  matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
3046
- if (matchLength == MaxML)
3047
- {
3044
+ if (matchLength == MaxML) {
3048
3045
  U32 add = *dumps++;
3049
3046
  if (add < 255) matchLength += add;
3050
- else
3051
- {
3052
- matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
3047
+ else {
3048
+ matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
3053
3049
  dumps += 3;
3054
3050
  }
3055
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3051
+ if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
3052
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3056
3053
  }
3057
3054
  matchLength += MINMATCH;
3058
3055
 
@@ -3116,8 +3113,7 @@ static size_t ZSTD_execSequence(BYTE* op,
3116
3113
  /* Requirement: op <= oend_8 */
3117
3114
 
3118
3115
  /* match within prefix */
3119
- if (sequence.offset < 8)
3120
- {
3116
+ if (sequence.offset < 8) {
3121
3117
  /* close range match, overlap */
3122
3118
  const int sub2 = dec64table[sequence.offset];
3123
3119
  op[0] = match[0];
@@ -3127,9 +3123,7 @@ static size_t ZSTD_execSequence(BYTE* op,
3127
3123
  match += dec32table[sequence.offset];
3128
3124
  ZSTD_copy4(op+4, match);
3129
3125
  match -= sub2;
3130
- }
3131
- else
3132
- {
3126
+ } else {
3133
3127
  ZSTD_copy8(op, match);
3134
3128
  }
3135
3129
  op += 8; match += 8;
@@ -3230,7 +3230,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3230
3230
  if (litLength&1) litLength>>=1, dumps += 3;
3231
3231
  else litLength = (U16)(litLength)>>1, dumps += 2;
3232
3232
  }
3233
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3233
+ if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
3234
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3234
3235
  }
3235
3236
 
3236
3237
  /* Offset */
@@ -3263,7 +3264,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3263
3264
  if (matchLength&1) matchLength>>=1, dumps += 3;
3264
3265
  else matchLength = (U16)(matchLength)>>1, dumps += 2;
3265
3266
  }
3266
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3267
+ if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
3268
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3267
3269
  }
3268
3270
  matchLength += MINMATCH;
3269
3271
 
@@ -4134,9 +4134,9 @@ static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, si
4134
4134
  }
4135
4135
 
4136
4136
  if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
4137
- dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
4138
- dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
4139
- dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
4137
+ dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
4138
+ dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
4139
+ dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
4140
4140
  dictPtr += 12;
4141
4141
 
4142
4142
  dctx->litEntropy = dctx->fseEntropy = 1;
@@ -20,13 +20,16 @@ extern "C" {
20
20
 
21
21
  /* ===== ZSTDLIB_API : control library symbols visibility ===== */
22
22
  #if defined(__GNUC__) && (__GNUC__ >= 4)
23
- # define ZSTDLIB_API __attribute__ ((visibility ("default")))
24
- #elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
25
- # define ZSTDLIB_API __declspec(dllexport)
23
+ # define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
24
+ #else
25
+ # define ZSTDLIB_VISIBILITY
26
+ #endif
27
+ #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28
+ # define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
26
29
  #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
27
- # define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
30
+ # define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
28
31
  #else
29
- # define ZSTDLIB_API
32
+ # define ZSTDLIB_API ZSTDLIB_VISIBILITY
30
33
  #endif
31
34
 
32
35
 
@@ -53,7 +56,7 @@ extern "C" {
53
56
  /*------ Version ------*/
54
57
  #define ZSTD_VERSION_MAJOR 1
55
58
  #define ZSTD_VERSION_MINOR 1
56
- #define ZSTD_VERSION_RELEASE 2
59
+ #define ZSTD_VERSION_RELEASE 3
57
60
 
58
61
  #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
59
62
  #define ZSTD_QUOTE(str) #str
@@ -170,8 +173,8 @@ typedef struct ZSTD_CDict_s ZSTD_CDict;
170
173
  * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
171
174
  * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
172
175
  * ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
173
- * `dict` can be released after ZSTD_CDict creation. */
174
- ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
176
+ * `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */
177
+ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
175
178
 
176
179
  /*! ZSTD_freeCDict() :
177
180
  * Function frees memory allocated by ZSTD_createCDict(). */
@@ -191,8 +194,8 @@ typedef struct ZSTD_DDict_s ZSTD_DDict;
191
194
 
192
195
  /*! ZSTD_createDDict() :
193
196
  * Create a digested dictionary, ready to start decompression operation without startup delay.
194
- * `dict` can be released after creation. */
195
- ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
197
+ * dictBuffer can be released after DDict creation, as its content is copied inside DDict */
198
+ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
196
199
 
197
200
  /*! ZSTD_freeDDict() :
198
201
  * Function frees memory allocated with ZSTD_createDDict() */
@@ -325,7 +328,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
325
328
  * ***************************************************************************************/
326
329
 
327
330
  /* --- Constants ---*/
328
- #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */
331
+ #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
329
332
  #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
330
333
 
331
334
  #define ZSTD_WINDOWLOG_MAX_32 25
@@ -345,8 +348,9 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
345
348
  #define ZSTD_TARGETLENGTH_MAX 999
346
349
 
347
350
  #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
351
+ #define ZSTD_FRAMEHEADERSIZE_MIN 6
348
352
  static const size_t ZSTD_frameHeaderSize_prefix = 5;
349
- static const size_t ZSTD_frameHeaderSize_min = 6;
353
+ static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
350
354
  static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
351
355
  static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
352
356
 
@@ -365,9 +369,9 @@ typedef struct {
365
369
  } ZSTD_compressionParameters;
366
370
 
367
371
  typedef struct {
368
- unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
369
- unsigned checksumFlag; /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
370
- unsigned noDictIDFlag; /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
372
+ unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
373
+ unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */
374
+ unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */
371
375
  } ZSTD_frameParameters;
372
376
 
373
377
  typedef struct {
@@ -397,9 +401,23 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
397
401
  * Gives the amount of memory used by a given ZSTD_CCtx */
398
402
  ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
399
403
 
404
+ typedef enum {
405
+ ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
406
+ } ZSTD_CCtxParameter;
407
+ /*! ZSTD_setCCtxParameter() :
408
+ * Set advanced parameters, selected through enum ZSTD_CCtxParameter
409
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()) */
410
+ ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
411
+
412
+ /*! ZSTD_createCDict_byReference() :
413
+ * Create a digested dictionary for compression
414
+ * Dictionary content is simply referenced, and therefore stays in dictBuffer.
415
+ * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
416
+ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
417
+
400
418
  /*! ZSTD_createCDict_advanced() :
401
419
  * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
402
- ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
420
+ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
403
421
  ZSTD_parameters params, ZSTD_customMem customMem);
404
422
 
405
423
  /*! ZSTD_sizeof_CDict() :
@@ -455,6 +473,15 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
455
473
  * Gives the amount of memory used by a given ZSTD_DCtx */
456
474
  ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
457
475
 
476
+ /*! ZSTD_createDDict_byReference() :
477
+ * Create a digested dictionary, ready to start decompression operation without startup delay.
478
+ * Dictionary content is simply referenced, and therefore stays in dictBuffer.
479
+ * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
480
+ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
481
+
482
+ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
483
+ unsigned byReference, ZSTD_customMem customMem);
484
+
458
485
  /*! ZSTD_sizeof_DDict() :
459
486
  * Gives the amount of memory used by a given ZSTD_DDict */
460
487
  ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
@@ -463,13 +490,13 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
463
490
  * Provides the dictID stored within dictionary.
464
491
  * if @return == 0, the dictionary is not conformant with Zstandard specification.
465
492
  * It can still be loaded, but as a content-only dictionary. */
466
- unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
493
+ ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
467
494
 
468
495
  /*! ZSTD_getDictID_fromDDict() :
469
496
  * Provides the dictID of the dictionary loaded into `ddict`.
470
497
  * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
471
498
  * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
472
- unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
499
+ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
473
500
 
474
501
  /*! ZSTD_getDictID_fromFrame() :
475
502
  * Provides the dictID required to decompressed the frame stored within `src`.
@@ -481,7 +508,7 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
481
508
  * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
482
509
  * - This is not a Zstandard frame.
483
510
  * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
484
- unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
511
+ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
485
512
 
486
513
 
487
514
  /********************************************************************
@@ -491,7 +518,7 @@ unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
491
518
  /*===== Advanced Streaming compression functions =====*/
492
519
  ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
493
520
  ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */
494
- ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
521
+ ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
495
522
  ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
496
523
  ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
497
524
  ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
@@ -500,9 +527,9 @@ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
500
527
 
501
528
 
502
529
  /*===== Advanced Streaming decompression functions =====*/
503
- typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
530
+ typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
504
531
  ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
505
- ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
532
+ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
506
533
  ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
507
534
  ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */
508
535
  ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
@@ -542,10 +569,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
542
569
  In which case, it will "discard" the relevant memory section from its history.
543
570
 
544
571
  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
545
- It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
546
- Without last block mark, frames will be considered unfinished (broken) by decoders.
572
+ It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
573
+ Without last block mark, frames will be considered unfinished (corrupted) by decoders.
547
574
 
548
- You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
575
+ `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
549
576
  */
550
577
 
551
578
  /*===== Buffer-less streaming compression functions =====*/
@@ -553,6 +580,7 @@ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
553
580
  ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
554
581
  ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
555
582
  ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
583
+ ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize);
556
584
  ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
557
585
  ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
558
586