extzstd 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja +5 -0
  3. data/README.md +5 -5
  4. data/contrib/zstd/CONTRIBUTING.md +42 -0
  5. data/contrib/zstd/LICENSE-examples +11 -0
  6. data/contrib/zstd/Makefile +315 -0
  7. data/contrib/zstd/NEWS +261 -0
  8. data/contrib/zstd/PATENTS +33 -0
  9. data/contrib/zstd/README.md +121 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +178 -0
  12. data/contrib/zstd/circle.yml +75 -0
  13. data/contrib/zstd/lib/BUCK +186 -0
  14. data/contrib/zstd/lib/Makefile +163 -0
  15. data/contrib/zstd/lib/README.md +77 -0
  16. data/contrib/zstd/{common → lib/common}/bitstream.h +7 -4
  17. data/contrib/zstd/{common → lib/common}/entropy_common.c +19 -23
  18. data/contrib/zstd/{common → lib/common}/error_private.c +0 -0
  19. data/contrib/zstd/{common → lib/common}/error_private.h +0 -0
  20. data/contrib/zstd/{common → lib/common}/fse.h +94 -34
  21. data/contrib/zstd/{common → lib/common}/fse_decompress.c +18 -19
  22. data/contrib/zstd/{common → lib/common}/huf.h +52 -20
  23. data/contrib/zstd/{common → lib/common}/mem.h +17 -13
  24. data/contrib/zstd/lib/common/pool.c +194 -0
  25. data/contrib/zstd/lib/common/pool.h +56 -0
  26. data/contrib/zstd/lib/common/threading.c +80 -0
  27. data/contrib/zstd/lib/common/threading.h +104 -0
  28. data/contrib/zstd/{common → lib/common}/xxhash.c +3 -1
  29. data/contrib/zstd/{common → lib/common}/xxhash.h +11 -15
  30. data/contrib/zstd/{common → lib/common}/zstd_common.c +1 -11
  31. data/contrib/zstd/{common → lib/common}/zstd_errors.h +16 -2
  32. data/contrib/zstd/{common → lib/common}/zstd_internal.h +17 -1
  33. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +138 -91
  34. data/contrib/zstd/{compress → lib/compress}/huf_compress.c +218 -67
  35. data/contrib/zstd/{compress → lib/compress}/zstd_compress.c +231 -108
  36. data/contrib/zstd/{compress → lib/compress}/zstd_opt.h +44 -25
  37. data/contrib/zstd/lib/compress/zstdmt_compress.c +739 -0
  38. data/contrib/zstd/lib/compress/zstdmt_compress.h +78 -0
  39. data/contrib/zstd/{decompress → lib/decompress}/huf_decompress.c +28 -23
  40. data/contrib/zstd/{decompress → lib/decompress}/zstd_decompress.c +814 -176
  41. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +60 -39
  42. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  43. data/contrib/zstd/lib/deprecated/zbuff_compress.c +145 -0
  44. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +74 -0
  45. data/contrib/zstd/lib/dictBuilder/cover.c +1029 -0
  46. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +0 -0
  47. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  48. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +68 -18
  49. data/contrib/zstd/lib/dictBuilder/zdict.h +201 -0
  50. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +122 -7
  51. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +34 -3
  52. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +8 -0
  53. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +45 -12
  54. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +8 -0
  55. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +45 -12
  56. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +8 -0
  57. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +56 -33
  58. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +8 -0
  59. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +45 -18
  60. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +7 -0
  61. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +43 -16
  62. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +7 -0
  63. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +57 -23
  64. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +8 -0
  65. data/contrib/zstd/lib/libzstd.pc.in +14 -0
  66. data/contrib/zstd/{zstd.h → lib/zstd.h} +206 -71
  67. data/ext/depend +2 -0
  68. data/ext/extconf.rb +4 -4
  69. data/ext/extzstd.c +1 -1
  70. data/ext/zstd_common.c +5 -5
  71. data/ext/zstd_compress.c +3 -3
  72. data/ext/zstd_decompress.c +2 -2
  73. data/ext/zstd_dictbuilder.c +2 -2
  74. data/ext/zstd_legacy_v01.c +1 -1
  75. data/ext/zstd_legacy_v02.c +1 -1
  76. data/ext/zstd_legacy_v03.c +1 -1
  77. data/ext/zstd_legacy_v04.c +1 -1
  78. data/ext/zstd_legacy_v05.c +1 -1
  79. data/ext/zstd_legacy_v06.c +1 -1
  80. data/ext/zstd_legacy_v07.c +1 -1
  81. data/gemstub.rb +9 -5
  82. data/lib/extzstd/version.rb +1 -1
  83. metadata +73 -51
  84. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  85. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  86. data/contrib/zstd/dictBuilder/zdict.h +0 -111
@@ -36,12 +36,11 @@
36
36
  #include <time.h> /* clock */
37
37
 
38
38
  #include "mem.h" /* read */
39
- #include "error_private.h"
40
39
  #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
41
40
  #define HUF_STATIC_LINKING_ONLY
42
- #include "huf.h"
41
+ #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
43
42
  #include "zstd_internal.h" /* includes zstd.h */
44
- #include "xxhash.h"
43
+ #include "xxhash.h" /* XXH64 */
45
44
  #include "divsufsort.h"
46
45
  #ifndef ZDICT_STATIC_LINKING_ONLY
47
46
  # define ZDICT_STATIC_LINKING_ONLY
@@ -61,7 +60,7 @@
61
60
  #define NOISELENGTH 32
62
61
 
63
62
  #define MINRATIO 4
64
- static const int g_compressionLevel_default = 5;
63
+ static const int g_compressionLevel_default = 6;
65
64
  static const U32 g_selectivity_default = 9;
66
65
  static const size_t g_provision_entropySize = 200;
67
66
  static const size_t g_min_fast_dictContent = 192;
@@ -307,13 +306,13 @@ static dictItem ZDICT_analyzePos(
307
306
  } while (length >=MINMATCHLENGTH);
308
307
 
309
308
  /* look backward */
310
- length = MINMATCHLENGTH;
311
- while ((length >= MINMATCHLENGTH) & (start > 0)) {
312
- length = ZDICT_count(b + pos, b + suffix[start - 1]);
313
- if (length >= LLIMIT) length = LLIMIT - 1;
314
- lengthList[length]++;
315
- if (length >= MINMATCHLENGTH) start--;
316
- }
309
+ length = MINMATCHLENGTH;
310
+ while ((length >= MINMATCHLENGTH) & (start > 0)) {
311
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
312
+ if (length >= LLIMIT) length = LLIMIT - 1;
313
+ lengthList[length]++;
314
+ if (length >= MINMATCHLENGTH) start--;
315
+ }
317
316
 
318
317
  /* largest useful length */
319
318
  memset(cumulLength, 0, sizeof(cumulLength));
@@ -570,7 +569,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
570
569
  if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
571
570
  }
572
571
  cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
573
- if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
572
+ if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
574
573
 
575
574
  if (cSize) { /* if == 0; block is not compressible */
576
575
  const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -825,6 +824,55 @@ _cleanup:
825
824
  }
826
825
 
827
826
 
827
+
828
+ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
829
+ const void* customDictContent, size_t dictContentSize,
830
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
831
+ ZDICT_params_t params)
832
+ {
833
+ size_t hSize;
834
+ #define HBUFFSIZE 256
835
+ BYTE header[HBUFFSIZE];
836
+ int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
837
+ U32 const notificationLevel = params.notificationLevel;
838
+
839
+ /* check conditions */
840
+ if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
841
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
842
+ if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
843
+
844
+ /* dictionary header */
845
+ MEM_writeLE32(header, ZSTD_DICT_MAGIC);
846
+ { U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
847
+ U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
848
+ U32 const dictID = params.dictID ? params.dictID : compliantID;
849
+ MEM_writeLE32(header+4, dictID);
850
+ }
851
+ hSize = 8;
852
+
853
+ /* entropy tables */
854
+ DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
855
+ DISPLAYLEVEL(2, "statistics ... \n");
856
+ { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
857
+ compressionLevel,
858
+ samplesBuffer, samplesSizes, nbSamples,
859
+ customDictContent, dictContentSize,
860
+ notificationLevel);
861
+ if (ZDICT_isError(eSize)) return eSize;
862
+ hSize += eSize;
863
+ }
864
+
865
+ /* copy elements in final buffer ; note : src and dst buffer can overlap */
866
+ if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
867
+ { size_t const dictSize = hSize + dictContentSize;
868
+ char* dictEnd = (char*)dictBuffer + dictSize;
869
+ memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
870
+ memcpy(dictBuffer, header, hSize);
871
+ return dictSize;
872
+ }
873
+ }
874
+
875
+
828
876
  size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
829
877
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
830
878
  ZDICT_params_t params)
@@ -898,12 +946,14 @@ size_t ZDICT_trainFromBuffer_unsafe(
898
946
  U32 const nb = MIN(25, dictList[0].pos);
899
947
  U32 const dictContentSize = ZDICT_dictSize(dictList);
900
948
  U32 u;
901
- DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
902
- DISPLAYLEVEL(3, "list %u best segments \n", nb);
903
- for (u=1; u<=nb; u++) {
904
- U32 pos = dictList[u].pos;
905
- U32 length = dictList[u].length;
906
- U32 printedLength = MIN(40, length);
949
+ DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize);
950
+ DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
951
+ for (u=1; u<nb; u++) {
952
+ U32 const pos = dictList[u].pos;
953
+ U32 const length = dictList[u].length;
954
+ U32 const printedLength = MIN(40, length);
955
+ if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize))
956
+ return ERROR(GENERIC); /* should never happen */
907
957
  DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
908
958
  u, length, pos, dictList[u].savings);
909
959
  ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
@@ -0,0 +1,201 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the BSD-style license found in the
6
+ * LICENSE file in the root directory of this source tree. An additional grant
7
+ * of patent rights can be found in the PATENTS file in the same directory.
8
+ */
9
+
10
+ #ifndef DICTBUILDER_H_001
11
+ #define DICTBUILDER_H_001
12
+
13
+ #if defined (__cplusplus)
14
+ extern "C" {
15
+ #endif
16
+
17
+
18
+ /*====== Dependencies ======*/
19
+ #include <stddef.h> /* size_t */
20
+
21
+
22
+ /* ===== ZDICTLIB_API : control library symbols visibility ===== */
23
+ #if defined(__GNUC__) && (__GNUC__ >= 4)
24
+ # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
25
+ #else
26
+ # define ZDICTLIB_VISIBILITY
27
+ #endif
28
+ #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
29
+ # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
30
+ #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
31
+ # define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
32
+ #else
33
+ # define ZDICTLIB_API ZDICTLIB_VISIBILITY
34
+ #endif
35
+
36
+
37
+ /*! ZDICT_trainFromBuffer() :
38
+ Train a dictionary from an array of samples.
39
+ Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
40
+ supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
41
+ The resulting dictionary will be saved into `dictBuffer`.
42
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
43
+ or an error code, which can be tested with ZDICT_isError().
44
+ Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
45
+ It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
46
+ In general, it's recommended to provide a few thousands samples, but this can vary a lot.
47
+ It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
48
+ */
49
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
50
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
51
+
52
+
53
+ /*====== Helper functions ======*/
54
+ ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
55
+ ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
56
+ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
57
+
58
+
59
+
60
+ #ifdef ZDICT_STATIC_LINKING_ONLY
61
+
62
+ /* ====================================================================================
63
+ * The definitions in this section are considered experimental.
64
+ * They should never be used with a dynamic library, as they may change in the future.
65
+ * They are provided for advanced usages.
66
+ * Use them only in association with static linking.
67
+ * ==================================================================================== */
68
+
69
+ typedef struct {
70
+ unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
71
+ int compressionLevel; /* 0 means default; target a specific zstd compression level */
72
+ unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
73
+ unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
74
+ unsigned reserved[2]; /* reserved space for future parameters */
75
+ } ZDICT_params_t;
76
+
77
+
78
+ /*! ZDICT_trainFromBuffer_advanced() :
79
+ Same as ZDICT_trainFromBuffer() with control over more parameters.
80
+ `parameters` is optional and can be provided with values set to 0 to mean "default".
81
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
82
+ or an error code, which can be tested by ZDICT_isError().
83
+ note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
84
+ */
85
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
86
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
87
+ ZDICT_params_t parameters);
88
+
89
+ /*! COVER_params_t :
90
+ For all values 0 means default.
91
+ kMin and d are the only required parameters.
92
+ */
93
+ typedef struct {
94
+ unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
95
+ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
96
+ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
97
+
98
+ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
99
+ unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
100
+ unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
101
+ int compressionLevel; /* 0 means default; target a specific zstd compression level */
102
+ } COVER_params_t;
103
+
104
+
105
+ /*! COVER_trainFromBuffer() :
106
+ Train a dictionary from an array of samples using the COVER algorithm.
107
+ Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
108
+ supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
109
+ The resulting dictionary will be saved into `dictBuffer`.
110
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
111
+ or an error code, which can be tested with ZDICT_isError().
112
+ Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
113
+ Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
114
+ It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
115
+ In general, it's recommended to provide a few thousands samples, but this can vary a lot.
116
+ It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
117
+ */
118
+ ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
119
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
120
+ COVER_params_t parameters);
121
+
122
+ /*! COVER_optimizeTrainFromBuffer() :
123
+ The same requirements as above hold for all the parameters except `parameters`.
124
+ This function tries many parameter combinations and picks the best parameters.
125
+ `*parameters` is filled with the best parameters found, and the dictionary
126
+ constructed with those parameters is stored in `dictBuffer`.
127
+
128
+ All of the parameters d, k, steps are optional.
129
+ If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
130
+ if steps is zero it defaults to its default value.
131
+ If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
132
+
133
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
134
+ or an error code, which can be tested with ZDICT_isError().
135
+ On success `*parameters` contains the parameters selected.
136
+ Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
137
+ */
138
+ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
139
+ const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
140
+ COVER_params_t *parameters);
141
+
142
+ /*! ZDICT_finalizeDictionary() :
143
+
144
+ Given a custom content as a basis for dictionary, and a set of samples,
145
+ finalize dictionary by adding headers and statistics.
146
+
147
+ Samples must be stored concatenated in a flat buffer `samplesBuffer`,
148
+ supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
149
+
150
+ dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
151
+ maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
152
+
153
+ @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154
+ or an error code, which can be tested by ZDICT_isError().
155
+ note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156
+ note 2 : dictBuffer and customDictContent can overlap
157
+ */
158
+ #define ZDICT_CONTENTSIZE_MIN 256
159
+ #define ZDICT_DICTSIZE_MIN 512
160
+ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
161
+ const void* customDictContent, size_t dictContentSize,
162
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
163
+ ZDICT_params_t parameters);
164
+
165
+
166
+
167
+ /* Deprecation warnings */
168
+ /* It is generally possible to disable deprecation warnings from compiler,
169
+ for example with -Wno-deprecated-declarations for gcc
170
+ or _CRT_SECURE_NO_WARNINGS in Visual.
171
+ Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
172
+ #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
173
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
174
+ #else
175
+ # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
176
+ # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
177
+ # define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
178
+ # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
179
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
180
+ # elif (ZDICT_GCC_VERSION >= 301)
181
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
182
+ # elif defined(_MSC_VER)
183
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
184
+ # else
185
+ # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
186
+ # define ZDICT_DEPRECATED(message) ZDICTLIB_API
187
+ # endif
188
+ #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
189
+
190
+ ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
191
+ size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
192
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
193
+
194
+
195
+ #endif /* ZDICT_STATIC_LINKING_ONLY */
196
+
197
+ #if defined (__cplusplus)
198
+ }
199
+ #endif
200
+
201
+ #endif /* DICTBUILDER_H_001 */
@@ -20,14 +20,33 @@ extern "C" {
20
20
  #include "mem.h" /* MEM_STATIC */
21
21
  #include "error_private.h" /* ERROR */
22
22
  #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */
23
- #include "zstd_v01.h"
24
- #include "zstd_v02.h"
25
- #include "zstd_v03.h"
26
- #include "zstd_v04.h"
27
- #include "zstd_v05.h"
28
- #include "zstd_v06.h"
29
- #include "zstd_v07.h"
30
23
 
24
+ #if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
25
+ # undef ZSTD_LEGACY_SUPPORT
26
+ # define ZSTD_LEGACY_SUPPORT 8
27
+ #endif
28
+
29
+ #if (ZSTD_LEGACY_SUPPORT <= 1)
30
+ # include "zstd_v01.h"
31
+ #endif
32
+ #if (ZSTD_LEGACY_SUPPORT <= 2)
33
+ # include "zstd_v02.h"
34
+ #endif
35
+ #if (ZSTD_LEGACY_SUPPORT <= 3)
36
+ # include "zstd_v03.h"
37
+ #endif
38
+ #if (ZSTD_LEGACY_SUPPORT <= 4)
39
+ # include "zstd_v04.h"
40
+ #endif
41
+ #if (ZSTD_LEGACY_SUPPORT <= 5)
42
+ # include "zstd_v05.h"
43
+ #endif
44
+ #if (ZSTD_LEGACY_SUPPORT <= 6)
45
+ # include "zstd_v06.h"
46
+ #endif
47
+ #if (ZSTD_LEGACY_SUPPORT <= 7)
48
+ # include "zstd_v07.h"
49
+ #endif
31
50
 
32
51
  /** ZSTD_isLegacy() :
33
52
  @return : > 0 if supported by legacy decoder. 0 otherwise.
@@ -40,13 +59,27 @@ MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
40
59
  magicNumberLE = MEM_readLE32(src);
41
60
  switch(magicNumberLE)
42
61
  {
62
+ #if (ZSTD_LEGACY_SUPPORT <= 1)
43
63
  case ZSTDv01_magicNumberLE:return 1;
64
+ #endif
65
+ #if (ZSTD_LEGACY_SUPPORT <= 2)
44
66
  case ZSTDv02_magicNumber : return 2;
67
+ #endif
68
+ #if (ZSTD_LEGACY_SUPPORT <= 3)
45
69
  case ZSTDv03_magicNumber : return 3;
70
+ #endif
71
+ #if (ZSTD_LEGACY_SUPPORT <= 4)
46
72
  case ZSTDv04_magicNumber : return 4;
73
+ #endif
74
+ #if (ZSTD_LEGACY_SUPPORT <= 5)
47
75
  case ZSTDv05_MAGICNUMBER : return 5;
76
+ #endif
77
+ #if (ZSTD_LEGACY_SUPPORT <= 6)
48
78
  case ZSTDv06_MAGICNUMBER : return 6;
79
+ #endif
80
+ #if (ZSTD_LEGACY_SUPPORT <= 7)
49
81
  case ZSTDv07_MAGICNUMBER : return 7;
82
+ #endif
50
83
  default : return 0;
51
84
  }
52
85
  }
@@ -56,24 +89,30 @@ MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, s
56
89
  {
57
90
  U32 const version = ZSTD_isLegacy(src, srcSize);
58
91
  if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */
92
+ #if (ZSTD_LEGACY_SUPPORT <= 5)
59
93
  if (version==5) {
60
94
  ZSTDv05_parameters fParams;
61
95
  size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize);
62
96
  if (frResult != 0) return 0;
63
97
  return fParams.srcSize;
64
98
  }
99
+ #endif
100
+ #if (ZSTD_LEGACY_SUPPORT <= 6)
65
101
  if (version==6) {
66
102
  ZSTDv06_frameParams fParams;
67
103
  size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize);
68
104
  if (frResult != 0) return 0;
69
105
  return fParams.frameContentSize;
70
106
  }
107
+ #endif
108
+ #if (ZSTD_LEGACY_SUPPORT <= 7)
71
109
  if (version==7) {
72
110
  ZSTDv07_frameParams fParams;
73
111
  size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize);
74
112
  if (frResult != 0) return 0;
75
113
  return fParams.frameContentSize;
76
114
  }
115
+ #endif
77
116
  return 0; /* should not be possible */
78
117
  }
79
118
 
@@ -86,14 +125,23 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
86
125
  U32 const version = ZSTD_isLegacy(src, compressedSize);
87
126
  switch(version)
88
127
  {
128
+ #if (ZSTD_LEGACY_SUPPORT <= 1)
89
129
  case 1 :
90
130
  return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
131
+ #endif
132
+ #if (ZSTD_LEGACY_SUPPORT <= 2)
91
133
  case 2 :
92
134
  return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
135
+ #endif
136
+ #if (ZSTD_LEGACY_SUPPORT <= 3)
93
137
  case 3 :
94
138
  return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
139
+ #endif
140
+ #if (ZSTD_LEGACY_SUPPORT <= 4)
95
141
  case 4 :
96
142
  return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
143
+ #endif
144
+ #if (ZSTD_LEGACY_SUPPORT <= 5)
97
145
  case 5 :
98
146
  { size_t result;
99
147
  ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx();
@@ -102,6 +150,8 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
102
150
  ZSTDv05_freeDCtx(zd);
103
151
  return result;
104
152
  }
153
+ #endif
154
+ #if (ZSTD_LEGACY_SUPPORT <= 6)
105
155
  case 6 :
106
156
  { size_t result;
107
157
  ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx();
@@ -110,6 +160,8 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
110
160
  ZSTDv06_freeDCtx(zd);
111
161
  return result;
112
162
  }
163
+ #endif
164
+ #if (ZSTD_LEGACY_SUPPORT <= 7)
113
165
  case 7 :
114
166
  { size_t result;
115
167
  ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx();
@@ -118,11 +170,50 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
118
170
  ZSTDv07_freeDCtx(zd);
119
171
  return result;
120
172
  }
173
+ #endif
121
174
  default :
122
175
  return ERROR(prefix_unknown);
123
176
  }
124
177
  }
125
178
 
179
+ MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
180
+ size_t compressedSize)
181
+ {
182
+ U32 const version = ZSTD_isLegacy(src, compressedSize);
183
+ switch(version)
184
+ {
185
+ #if (ZSTD_LEGACY_SUPPORT <= 1)
186
+ case 1 :
187
+ return ZSTDv01_findFrameCompressedSize(src, compressedSize);
188
+ #endif
189
+ #if (ZSTD_LEGACY_SUPPORT <= 2)
190
+ case 2 :
191
+ return ZSTDv02_findFrameCompressedSize(src, compressedSize);
192
+ #endif
193
+ #if (ZSTD_LEGACY_SUPPORT <= 3)
194
+ case 3 :
195
+ return ZSTDv03_findFrameCompressedSize(src, compressedSize);
196
+ #endif
197
+ #if (ZSTD_LEGACY_SUPPORT <= 4)
198
+ case 4 :
199
+ return ZSTDv04_findFrameCompressedSize(src, compressedSize);
200
+ #endif
201
+ #if (ZSTD_LEGACY_SUPPORT <= 5)
202
+ case 5 :
203
+ return ZSTDv05_findFrameCompressedSize(src, compressedSize);
204
+ #endif
205
+ #if (ZSTD_LEGACY_SUPPORT <= 6)
206
+ case 6 :
207
+ return ZSTDv06_findFrameCompressedSize(src, compressedSize);
208
+ #endif
209
+ #if (ZSTD_LEGACY_SUPPORT <= 7)
210
+ case 7 :
211
+ return ZSTDv07_findFrameCompressedSize(src, compressedSize);
212
+ #endif
213
+ default :
214
+ return ERROR(prefix_unknown);
215
+ }
216
+ }
126
217
 
127
218
  MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
128
219
  {
@@ -133,10 +224,18 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
133
224
  case 2 :
134
225
  case 3 :
135
226
  return ERROR(version_unsupported);
227
+ #if (ZSTD_LEGACY_SUPPORT <= 4)
136
228
  case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext);
229
+ #endif
230
+ #if (ZSTD_LEGACY_SUPPORT <= 5)
137
231
  case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext);
232
+ #endif
233
+ #if (ZSTD_LEGACY_SUPPORT <= 6)
138
234
  case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext);
235
+ #endif
236
+ #if (ZSTD_LEGACY_SUPPORT <= 7)
139
237
  case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext);
238
+ #endif
140
239
  }
141
240
  }
142
241
 
@@ -152,6 +251,7 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
152
251
  case 2 :
153
252
  case 3 :
154
253
  return 0;
254
+ #if (ZSTD_LEGACY_SUPPORT <= 4)
155
255
  case 4 :
156
256
  {
157
257
  ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext;
@@ -161,6 +261,8 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
161
261
  *legacyContext = dctx;
162
262
  return 0;
163
263
  }
264
+ #endif
265
+ #if (ZSTD_LEGACY_SUPPORT <= 5)
164
266
  case 5 :
165
267
  {
166
268
  ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext;
@@ -169,6 +271,8 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
169
271
  *legacyContext = dctx;
170
272
  return 0;
171
273
  }
274
+ #endif
275
+ #if (ZSTD_LEGACY_SUPPORT <= 6)
172
276
  case 6 :
173
277
  {
174
278
  ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext;
@@ -177,6 +281,8 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
177
281
  *legacyContext = dctx;
178
282
  return 0;
179
283
  }
284
+ #endif
285
+ #if (ZSTD_LEGACY_SUPPORT <= 7)
180
286
  case 7 :
181
287
  {
182
288
  ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext;
@@ -185,6 +291,7 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
185
291
  *legacyContext = dctx;
186
292
  return 0;
187
293
  }
294
+ #endif
188
295
  }
189
296
  }
190
297
 
@@ -200,6 +307,7 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
200
307
  case 2 :
201
308
  case 3 :
202
309
  return ERROR(version_unsupported);
310
+ #if (ZSTD_LEGACY_SUPPORT <= 4)
203
311
  case 4 :
204
312
  {
205
313
  ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext;
@@ -212,6 +320,8 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
212
320
  input->pos += readSize;
213
321
  return hintSize;
214
322
  }
323
+ #endif
324
+ #if (ZSTD_LEGACY_SUPPORT <= 5)
215
325
  case 5 :
216
326
  {
217
327
  ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext;
@@ -224,6 +334,8 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
224
334
  input->pos += readSize;
225
335
  return hintSize;
226
336
  }
337
+ #endif
338
+ #if (ZSTD_LEGACY_SUPPORT <= 6)
227
339
  case 6 :
228
340
  {
229
341
  ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext;
@@ -236,6 +348,8 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
236
348
  input->pos += readSize;
237
349
  return hintSize;
238
350
  }
351
+ #endif
352
+ #if (ZSTD_LEGACY_SUPPORT <= 7)
239
353
  case 7 :
240
354
  {
241
355
  ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext;
@@ -248,6 +362,7 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
248
362
  input->pos += readSize;
249
363
  return hintSize;
250
364
  }
365
+ #endif
251
366
  }
252
367
  }
253
368
 
@@ -1354,7 +1354,7 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1354
1354
 
1355
1355
  #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
1356
1356
 
1357
- static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
1357
+ static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1358
1358
  {
1359
1359
  const BYTE* ip = (const BYTE*)src;
1360
1360
  BYTE* op = (BYTE*)dst;
@@ -1803,7 +1803,7 @@ static size_t ZSTD_execSequence(BYTE* op,
1803
1803
  } else { ZSTD_copy8(op, match); }
1804
1804
  op += 8; match += 8;
1805
1805
 
1806
- if (endMatch > oend-12)
1806
+ if (endMatch > oend-(16-MINMATCH))
1807
1807
  {
1808
1808
  if (op < oend-8)
1809
1809
  {
@@ -1814,7 +1814,7 @@ static size_t ZSTD_execSequence(BYTE* op,
1814
1814
  while (op<endMatch) *op++ = *match++;
1815
1815
  }
1816
1816
  else
1817
- ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
1817
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1818
1818
 
1819
1819
  /* restore, in case of overlap */
1820
1820
  if (overlapRisk) memcpy(endMatch, saved, qutt);
@@ -1992,6 +1992,37 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
1992
1992
  return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1993
1993
  }
1994
1994
 
1995
+ size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
1996
+ {
1997
+ const BYTE* ip = (const BYTE*)src;
1998
+ size_t remainingSize = srcSize;
1999
+ U32 magicNumber;
2000
+ blockProperties_t blockProperties;
2001
+
2002
+ /* Frame Header */
2003
+ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
2004
+ magicNumber = ZSTD_readBE32(src);
2005
+ if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2006
+ ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2007
+
2008
+ /* Loop on each block */
2009
+ while (1)
2010
+ {
2011
+ size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2012
+ if (ZSTDv01_isError(blockSize)) return blockSize;
2013
+
2014
+ ip += ZSTD_blockHeaderSize;
2015
+ remainingSize -= ZSTD_blockHeaderSize;
2016
+ if (blockSize > remainingSize) return ERROR(srcSize_wrong);
2017
+
2018
+ if (blockSize == 0) break; /* bt_end */
2019
+
2020
+ ip += blockSize;
2021
+ remainingSize -= blockSize;
2022
+ }
2023
+
2024
+ return ip - (const BYTE*)src;
2025
+ }
1995
2026
 
1996
2027
  /*******************************
1997
2028
  * Streaming Decompression API