extzstd 0.3.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -33,6 +33,12 @@
33
33
  */
34
34
 
35
35
 
36
+ /* Streaming state is used to inform allocation of the literal buffer */
37
+ typedef enum {
38
+ not_streaming = 0,
39
+ is_streaming = 1
40
+ } streaming_operation;
41
+
36
42
  /* ZSTD_decompressBlock_internal() :
37
43
  * decompress block, starting at `src`,
38
44
  * into destination buffer `dst`.
@@ -41,7 +47,7 @@
41
47
  */
42
48
  size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
43
49
  void* dst, size_t dstCapacity,
44
- const void* src, size_t srcSize, const int frame);
50
+ const void* src, size_t srcSize, const streaming_operation streaming);
45
51
 
46
52
  /* ZSTD_buildFSETable() :
47
53
  * generate FSE decoding table for one symbol (ll, ml or off)
@@ -54,9 +60,14 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
54
60
  */
55
61
  void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
56
62
  const short* normalizedCounter, unsigned maxSymbolValue,
57
- const U32* baseValue, const U32* nbAdditionalBits,
63
+ const U32* baseValue, const U8* nbAdditionalBits,
58
64
  unsigned tableLog, void* wksp, size_t wkspSize,
59
65
  int bmi2);
60
66
 
67
+ /* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
68
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
69
+ void* dst, size_t dstCapacity,
70
+ const void* src, size_t srcSize);
71
+
61
72
 
62
73
  #endif /* ZSTD_DEC_BLOCK_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -20,7 +20,7 @@
20
20
  * Dependencies
21
21
  *********************************************************/
22
22
  #include "../common/mem.h" /* BYTE, U16, U32 */
23
- #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
23
+ #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24
24
 
25
25
 
26
26
 
@@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
40
40
  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41
41
  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
42
 
43
- static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
43
+ static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44
44
  0, 1, 2, 3, 4, 5, 6, 7,
45
45
  8, 9, 10, 11, 12, 13, 14, 15,
46
46
  16, 17, 18, 19, 20, 21, 22, 23,
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
75
75
 
76
76
  #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77
77
  #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78
+ #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
78
79
 
79
80
  typedef struct {
80
81
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
81
82
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
82
83
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
83
- HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
84
+ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
84
85
  U32 rep[ZSTD_REP_NUM];
85
86
  U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
86
87
  } ZSTD_entropyDTables_t;
@@ -99,6 +100,29 @@ typedef enum {
99
100
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
100
101
  } ZSTD_dictUses_e;
101
102
 
103
+ /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
104
+ typedef struct {
105
+ const ZSTD_DDict** ddictPtrTable;
106
+ size_t ddictPtrTableSize;
107
+ size_t ddictPtrCount;
108
+ } ZSTD_DDictHashSet;
109
+
110
+ #ifndef ZSTD_DECODER_INTERNAL_BUFFER
111
+ # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
112
+ #endif
113
+
114
+ #define ZSTD_LBMIN 64
115
+ #define ZSTD_LBMAX (128 << 10)
116
+
117
+ /* extra buffer, compensates when dst is not large enough to store litBuffer */
118
+ #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
119
+
120
+ typedef enum {
121
+ ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
122
+ ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
123
+ ZSTD_split = 2 /* Split between litExtraBuffer and dst */
124
+ } ZSTD_litLocation_e;
125
+
102
126
  struct ZSTD_DCtx_s
103
127
  {
104
128
  const ZSTD_seqSymbol* LLTptr;
@@ -113,6 +137,7 @@ struct ZSTD_DCtx_s
113
137
  const void* dictEnd; /* end of previous segment */
114
138
  size_t expected;
115
139
  ZSTD_frameHeader fParams;
140
+ U64 processedCSize;
116
141
  U64 decodedSize;
117
142
  blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
118
143
  ZSTD_dStage stage;
@@ -128,7 +153,10 @@ struct ZSTD_DCtx_s
128
153
  size_t litSize;
129
154
  size_t rleSize;
130
155
  size_t staticSize;
156
+ int isFrameDecompression;
157
+ #if DYNAMIC_BMI2 != 0
131
158
  int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
159
+ #endif
132
160
 
133
161
  /* dictionary */
134
162
  ZSTD_DDict* ddictLocal;
@@ -136,6 +164,10 @@ struct ZSTD_DCtx_s
136
164
  U32 dictID;
137
165
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
138
166
  ZSTD_dictUses_e dictUses;
167
+ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
168
+ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
169
+ int disableHufAsm;
170
+ int maxBlockSizeParam;
139
171
 
140
172
  /* streaming */
141
173
  ZSTD_dStreamStage streamStage;
@@ -148,16 +180,21 @@ struct ZSTD_DCtx_s
148
180
  size_t outStart;
149
181
  size_t outEnd;
150
182
  size_t lhSize;
183
+ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
151
184
  void* legacyContext;
152
185
  U32 previousLegacyVersion;
153
186
  U32 legacyVersion;
187
+ #endif
154
188
  U32 hostageByte;
155
189
  int noForwardProgress;
156
190
  ZSTD_bufferMode_e outBufferMode;
157
191
  ZSTD_outBuffer expectedOutBuffer;
158
192
 
159
193
  /* workspace */
160
- BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
194
+ BYTE* litBuffer;
195
+ const BYTE* litBufferEnd;
196
+ ZSTD_litLocation_e litBufferLocation;
197
+ BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
161
198
  BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
162
199
 
163
200
  size_t oversizedDuration;
@@ -166,8 +203,21 @@ struct ZSTD_DCtx_s
166
203
  void const* dictContentBeginForFuzzing;
167
204
  void const* dictContentEndForFuzzing;
168
205
  #endif
206
+
207
+ /* Tracing */
208
+ #if ZSTD_TRACE
209
+ ZSTD_TraceCtx traceCtx;
210
+ #endif
169
211
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
170
212
 
213
+ MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
214
+ #if DYNAMIC_BMI2 != 0
215
+ return dctx->bmi2;
216
+ #else
217
+ (void)dctx;
218
+ return 0;
219
+ #endif
220
+ }
171
221
 
172
222
  /*-*******************************************************
173
223
  * Shared internal functions
@@ -184,7 +234,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
184
234
  * If yes, do nothing (continue on current segment).
185
235
  * If not, classify previous segment as "external dictionary", and start a new segment.
186
236
  * This function cannot fail. */
187
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
237
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
188
238
 
189
239
 
190
240
  #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,6 +15,7 @@
15
15
  ***************************************/
16
16
  #define ZBUFF_STATIC_LINKING_ONLY
17
17
  #include "zbuff.h"
18
+ #include "../common/error_private.h"
18
19
 
19
20
 
20
21
  /*-***********************************************************
@@ -73,13 +74,32 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
73
74
  ZSTD_parameters params, unsigned long long pledgedSrcSize)
74
75
  {
75
76
  if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
76
- return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize);
77
+ FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
78
+ FORWARD_IF_ERROR(ZSTD_CCtx_setPledgedSrcSize(zbc, pledgedSrcSize), "");
79
+
80
+ FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
81
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_windowLog, params.cParams.windowLog), "");
82
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_hashLog, params.cParams.hashLog), "");
83
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_chainLog, params.cParams.chainLog), "");
84
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_searchLog, params.cParams.searchLog), "");
85
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_minMatch, params.cParams.minMatch), "");
86
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_targetLength, params.cParams.targetLength), "");
87
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_strategy, params.cParams.strategy), "");
88
+
89
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_contentSizeFlag, params.fParams.contentSizeFlag), "");
90
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_checksumFlag, params.fParams.checksumFlag), "");
91
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_dictIDFlag, params.fParams.noDictIDFlag), "");
92
+
93
+ FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
94
+ return 0;
77
95
  }
78
96
 
79
-
80
97
  size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
81
98
  {
82
- return ZSTD_initCStream_usingDict(zbc, dict, dictSize, compressionLevel);
99
+ FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
100
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_compressionLevel, compressionLevel), "");
101
+ FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
102
+ return 0;
83
103
  }
84
104
 
85
105
  size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -13,6 +13,8 @@
13
13
  /* *************************************
14
14
  * Dependencies
15
15
  ***************************************/
16
+ #define ZSTD_DISABLE_DEPRECATE_WARNINGS /* suppress warning on ZSTD_initDStream_usingDict */
17
+ #include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
16
18
  #define ZBUFF_STATIC_LINKING_ONLY
17
19
  #include "zbuff.h"
18
20
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,19 +26,28 @@
26
26
  #include <string.h> /* memset */
27
27
  #include <time.h> /* clock */
28
28
 
29
- #include "../common/mem.h" /* read */
30
- #include "../common/pool.h"
31
- #include "../common/threading.h"
32
- #include "cover.h"
33
- #include "../common/zstd_internal.h" /* includes zstd.h */
34
29
  #ifndef ZDICT_STATIC_LINKING_ONLY
35
- #define ZDICT_STATIC_LINKING_ONLY
30
+ # define ZDICT_STATIC_LINKING_ONLY
36
31
  #endif
37
- #include "zdict.h"
32
+
33
+ #include "../common/mem.h" /* read */
34
+ #include "../common/pool.h" /* POOL_ctx */
35
+ #include "../common/threading.h" /* ZSTD_pthread_mutex_t */
36
+ #include "../common/zstd_internal.h" /* includes zstd.h */
37
+ #include "../common/bits.h" /* ZSTD_highbit32 */
38
+ #include "../zdict.h"
39
+ #include "cover.h"
38
40
 
39
41
  /*-*************************************
40
42
  * Constants
41
43
  ***************************************/
44
+ /**
45
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
46
+ * on 64bit builds.
47
+ * For 32bit builds we choose 1 GB.
48
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
49
+ * contiguous buffer, so 1GB is already a high limit.
50
+ */
42
51
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43
52
  #define COVER_DEFAULT_SPLITPOINT 1.0
44
53
 
@@ -46,7 +55,7 @@
46
55
  * Console display
47
56
  ***************************************/
48
57
  #ifndef LOCALDISPLAYLEVEL
49
- static int g_displayLevel = 2;
58
+ static int g_displayLevel = 0;
50
59
  #endif
51
60
  #undef DISPLAY
52
61
  #define DISPLAY(...) \
@@ -69,7 +78,7 @@ static clock_t g_time = 0;
69
78
  #undef LOCALDISPLAYUPDATE
70
79
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
71
80
  if (displayLevel >= l) { \
72
- if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
81
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
73
82
  g_time = clock(); \
74
83
  DISPLAY(__VA_ARGS__); \
75
84
  } \
@@ -292,9 +301,10 @@ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
292
301
  * Returns the first pointer in [first, last) whose element does not compare
293
302
  * less than value. If no such element exists it returns last.
294
303
  */
295
- static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
304
+ static const size_t *COVER_lower_bound(const size_t* first, const size_t* last,
296
305
  size_t value) {
297
- size_t count = last - first;
306
+ size_t count = (size_t)(last - first);
307
+ assert(last >= first);
298
308
  while (count != 0) {
299
309
  size_t step = count / 2;
300
310
  const size_t *ptr = first;
@@ -533,14 +543,15 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
533
543
 
534
544
  /**
535
545
  * Prepare a context for dictionary building.
536
- * The context is only dependent on the parameter `d` and can used multiple
546
+ * The context is only dependent on the parameter `d` and can be used multiple
537
547
  * times.
538
548
  * Returns 0 on success or error code on error.
539
549
  * The context must be destroyed with `COVER_ctx_destroy()`.
540
550
  */
541
551
  static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
542
552
  const size_t *samplesSizes, unsigned nbSamples,
543
- unsigned d, double splitPoint) {
553
+ unsigned d, double splitPoint)
554
+ {
544
555
  const BYTE *const samples = (const BYTE *)samplesBuffer;
545
556
  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
546
557
  /* Split samples into testing and training sets */
@@ -638,7 +649,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
638
649
 
639
650
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
640
651
  {
641
- const double ratio = (double)nbDmers / maxDictSize;
652
+ const double ratio = (double)nbDmers / (double)maxDictSize;
642
653
  if (ratio >= 10) {
643
654
  return;
644
655
  }
@@ -724,7 +735,7 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
724
735
  return tail;
725
736
  }
726
737
 
727
- ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
738
+ ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
728
739
  void *dictBuffer, size_t dictBufferCapacity,
729
740
  const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
730
741
  ZDICT_cover_params_t parameters)
@@ -734,7 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
734
745
  COVER_map_t activeDmers;
735
746
  parameters.splitPoint = 1.0;
736
747
  /* Initialize global data */
737
- g_displayLevel = parameters.zParams.notificationLevel;
748
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
738
749
  /* Checks */
739
750
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
740
751
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
@@ -898,8 +909,10 @@ void COVER_best_start(COVER_best_t *best) {
898
909
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
899
910
  * If this dictionary is the best so far save it and its parameters.
900
911
  */
901
- void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
902
- COVER_dictSelection_t selection) {
912
+ void COVER_best_finish(COVER_best_t* best,
913
+ ZDICT_cover_params_t parameters,
914
+ COVER_dictSelection_t selection)
915
+ {
903
916
  void* dict = selection.dictContent;
904
917
  size_t compressedSize = selection.totalCompressedSize;
905
918
  size_t dictSize = selection.dictSize;
@@ -942,9 +955,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
942
955
  }
943
956
  }
944
957
 
958
+ static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
959
+ {
960
+ COVER_dictSelection_t ds;
961
+ ds.dictContent = buf;
962
+ ds.dictSize = s;
963
+ ds.totalCompressedSize = csz;
964
+ return ds;
965
+ }
966
+
945
967
  COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
946
- COVER_dictSelection_t selection = { NULL, 0, error };
947
- return selection;
968
+ return setDictSelection(NULL, 0, error);
948
969
  }
949
970
 
950
971
  unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
@@ -963,8 +984,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
963
984
  size_t largestCompressed = 0;
964
985
  BYTE* customDictContentEnd = customDictContent + dictContentSize;
965
986
 
966
- BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
967
- BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
987
+ BYTE* largestDictbuffer = (BYTE*)malloc(dictBufferCapacity);
988
+ BYTE* candidateDictBuffer = (BYTE*)malloc(dictBufferCapacity);
968
989
  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
969
990
 
970
991
  if (!largestDictbuffer || !candidateDictBuffer) {
@@ -997,9 +1018,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
997
1018
  }
998
1019
 
999
1020
  if (params.shrinkDict == 0) {
1000
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1001
1021
  free(candidateDictBuffer);
1002
- return selection;
1022
+ return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
1003
1023
  }
1004
1024
 
1005
1025
  largestDict = dictContentSize;
@@ -1031,20 +1051,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
1031
1051
  return COVER_dictSelectionError(totalCompressedSize);
1032
1052
  }
1033
1053
 
1034
- if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1035
- COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1054
+ if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
1036
1055
  free(largestDictbuffer);
1037
- return selection;
1056
+ return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
1038
1057
  }
1039
1058
  dictContentSize *= 2;
1040
1059
  }
1041
1060
  dictContentSize = largestDict;
1042
1061
  totalCompressedSize = largestCompressed;
1043
- {
1044
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1045
- free(candidateDictBuffer);
1046
- return selection;
1047
- }
1062
+ free(candidateDictBuffer);
1063
+ return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
1048
1064
  }
1049
1065
 
1050
1066
  /**
@@ -1062,18 +1078,19 @@ typedef struct COVER_tryParameters_data_s {
1062
1078
  * This function is thread safe if zstd is compiled with multithreaded support.
1063
1079
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
1064
1080
  */
1065
- static void COVER_tryParameters(void *opaque) {
1081
+ static void COVER_tryParameters(void *opaque)
1082
+ {
1066
1083
  /* Save parameters as local variables */
1067
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
1084
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
1068
1085
  const COVER_ctx_t *const ctx = data->ctx;
1069
1086
  const ZDICT_cover_params_t parameters = data->parameters;
1070
1087
  size_t dictBufferCapacity = data->dictBufferCapacity;
1071
1088
  size_t totalCompressedSize = ERROR(GENERIC);
1072
1089
  /* Allocate space for hash table, dict, and freqs */
1073
1090
  COVER_map_t activeDmers;
1074
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1091
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
1075
1092
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
1076
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
1093
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
1077
1094
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
1078
1095
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
1079
1096
  goto _cleanup;
@@ -1103,15 +1120,14 @@ _cleanup:
1103
1120
  free(data);
1104
1121
  COVER_map_destroy(&activeDmers);
1105
1122
  COVER_dictSelectionFree(selection);
1106
- if (freqs) {
1107
- free(freqs);
1108
- }
1123
+ free(freqs);
1109
1124
  }
1110
1125
 
1111
- ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1112
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
1113
- const size_t *samplesSizes, unsigned nbSamples,
1114
- ZDICT_cover_params_t *parameters) {
1126
+ ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1127
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
1128
+ const size_t* samplesSizes, unsigned nbSamples,
1129
+ ZDICT_cover_params_t* parameters)
1130
+ {
1115
1131
  /* constants */
1116
1132
  const unsigned nbThreads = parameters->nbThreads;
1117
1133
  const double splitPoint =
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2017-2020, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,18 +8,13 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
- #include <stdio.h> /* fprintf */
12
- #include <stdlib.h> /* malloc, free, qsort */
13
- #include <string.h> /* memset */
14
- #include <time.h> /* clock */
15
- #include "../common/mem.h" /* read */
16
- #include "../common/pool.h"
17
- #include "../common/threading.h"
18
- #include "../common/zstd_internal.h" /* includes zstd.h */
19
11
  #ifndef ZDICT_STATIC_LINKING_ONLY
20
- #define ZDICT_STATIC_LINKING_ONLY
12
+ # define ZDICT_STATIC_LINKING_ONLY
21
13
  #endif
22
- #include "zdict.h"
14
+
15
+ #include "../common/threading.h" /* ZSTD_pthread_mutex_t */
16
+ #include "../common/mem.h" /* U32, BYTE */
17
+ #include "../zdict.h"
23
18
 
24
19
  /**
25
20
  * COVER_best_t is used for two purposes:
@@ -1576,7 +1576,7 @@ note:
1576
1576
  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577
1577
  trsort(ISAb, SA, m, 1);
1578
1578
 
1579
- /* Set the sorted order of tyoe B* suffixes. */
1579
+ /* Set the sorted order of type B* suffixes. */
1580
1580
  for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581
1581
  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582
1582
  if(0 <= i) {