extzstd 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/contrib/zstd/CHANGELOG +188 -1
  4. data/contrib/zstd/CONTRIBUTING.md +157 -74
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +81 -58
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +59 -35
  9. data/contrib/zstd/TESTING.md +2 -3
  10. data/contrib/zstd/appveyor.yml +49 -136
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +87 -181
  13. data/contrib/zstd/lib/README.md +23 -6
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +33 -59
  17. data/contrib/zstd/lib/common/compiler.h +115 -45
  18. data/contrib/zstd/lib/common/cpu.h +1 -1
  19. data/contrib/zstd/lib/common/debug.c +1 -1
  20. data/contrib/zstd/lib/common/debug.h +1 -1
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +82 -3
  24. data/contrib/zstd/lib/common/fse.h +9 -85
  25. data/contrib/zstd/lib/common/fse_decompress.c +29 -111
  26. data/contrib/zstd/lib/common/huf.h +84 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -49
  28. data/contrib/zstd/lib/common/pool.c +37 -16
  29. data/contrib/zstd/lib/common/pool.h +9 -3
  30. data/contrib/zstd/lib/common/portability_macros.h +156 -0
  31. data/contrib/zstd/lib/common/threading.c +68 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +7 -809
  34. data/contrib/zstd/lib/common/xxhash.h +5568 -167
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +64 -150
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +69 -150
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +773 -251
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2650 -826
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +509 -180
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +33 -305
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +266 -85
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +369 -132
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +722 -258
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1105 -360
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +41 -1
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +272 -208
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +324 -197
  63. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +109 -53
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1071 -539
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +507 -82
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +962 -310
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +54 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +44 -32
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -5
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +24 -16
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +88 -95
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +16 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +24 -69
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +25 -72
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +23 -69
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +35 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +42 -87
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +35 -82
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +214 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +922 -293
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +7 -6
  105. data/ext/extzstd.c +13 -10
  106. data/ext/libzstd_conf.h +0 -1
  107. data/ext/zstd_decompress_asm.S +1 -0
  108. metadata +16 -5
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -33,6 +33,12 @@
33
33
  */
34
34
 
35
35
 
36
+ /* Streaming state is used to inform allocation of the literal buffer */
37
+ typedef enum {
38
+ not_streaming = 0,
39
+ is_streaming = 1
40
+ } streaming_operation;
41
+
36
42
  /* ZSTD_decompressBlock_internal() :
37
43
  * decompress block, starting at `src`,
38
44
  * into destination buffer `dst`.
@@ -41,7 +47,7 @@
41
47
  */
42
48
  size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
43
49
  void* dst, size_t dstCapacity,
44
- const void* src, size_t srcSize, const int frame);
50
+ const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
45
51
 
46
52
  /* ZSTD_buildFSETable() :
47
53
  * generate FSE decoding table for one symbol (ll, ml or off)
@@ -54,9 +60,14 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
54
60
  */
55
61
  void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
56
62
  const short* normalizedCounter, unsigned maxSymbolValue,
57
- const U32* baseValue, const U32* nbAdditionalBits,
63
+ const U32* baseValue, const U8* nbAdditionalBits,
58
64
  unsigned tableLog, void* wksp, size_t wkspSize,
59
65
  int bmi2);
60
66
 
67
+ /* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
68
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
69
+ void* dst, size_t dstCapacity,
70
+ const void* src, size_t srcSize);
71
+
61
72
 
62
73
  #endif /* ZSTD_DEC_BLOCK_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -20,7 +20,7 @@
20
20
  * Dependencies
21
21
  *********************************************************/
22
22
  #include "../common/mem.h" /* BYTE, U16, U32 */
23
- #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
23
+ #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24
24
 
25
25
 
26
26
 
@@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
40
40
  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41
41
  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
42
 
43
- static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
43
+ static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44
44
  0, 1, 2, 3, 4, 5, 6, 7,
45
45
  8, 9, 10, 11, 12, 13, 14, 15,
46
46
  16, 17, 18, 19, 20, 21, 22, 23,
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
75
75
 
76
76
  #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77
77
  #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78
+ #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
78
79
 
79
80
  typedef struct {
80
81
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
81
82
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
82
83
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
83
- HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
84
+ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
84
85
  U32 rep[ZSTD_REP_NUM];
85
86
  U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
86
87
  } ZSTD_entropyDTables_t;
@@ -99,6 +100,29 @@ typedef enum {
99
100
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
100
101
  } ZSTD_dictUses_e;
101
102
 
103
+ /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
104
+ typedef struct {
105
+ const ZSTD_DDict** ddictPtrTable;
106
+ size_t ddictPtrTableSize;
107
+ size_t ddictPtrCount;
108
+ } ZSTD_DDictHashSet;
109
+
110
+ #ifndef ZSTD_DECODER_INTERNAL_BUFFER
111
+ # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
112
+ #endif
113
+
114
+ #define ZSTD_LBMIN 64
115
+ #define ZSTD_LBMAX (128 << 10)
116
+
117
+ /* extra buffer, compensates when dst is not large enough to store litBuffer */
118
+ #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
119
+
120
+ typedef enum {
121
+ ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
122
+ ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
123
+ ZSTD_split = 2 /* Split between litExtraBuffer and dst */
124
+ } ZSTD_litLocation_e;
125
+
102
126
  struct ZSTD_DCtx_s
103
127
  {
104
128
  const ZSTD_seqSymbol* LLTptr;
@@ -113,6 +137,7 @@ struct ZSTD_DCtx_s
113
137
  const void* dictEnd; /* end of previous segment */
114
138
  size_t expected;
115
139
  ZSTD_frameHeader fParams;
140
+ U64 processedCSize;
116
141
  U64 decodedSize;
117
142
  blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
118
143
  ZSTD_dStage stage;
@@ -128,7 +153,9 @@ struct ZSTD_DCtx_s
128
153
  size_t litSize;
129
154
  size_t rleSize;
130
155
  size_t staticSize;
156
+ #if DYNAMIC_BMI2 != 0
131
157
  int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
158
+ #endif
132
159
 
133
160
  /* dictionary */
134
161
  ZSTD_DDict* ddictLocal;
@@ -136,6 +163,9 @@ struct ZSTD_DCtx_s
136
163
  U32 dictID;
137
164
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
138
165
  ZSTD_dictUses_e dictUses;
166
+ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
167
+ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
168
+ int disableHufAsm;
139
169
 
140
170
  /* streaming */
141
171
  ZSTD_dStreamStage streamStage;
@@ -148,16 +178,21 @@ struct ZSTD_DCtx_s
148
178
  size_t outStart;
149
179
  size_t outEnd;
150
180
  size_t lhSize;
181
+ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
151
182
  void* legacyContext;
152
183
  U32 previousLegacyVersion;
153
184
  U32 legacyVersion;
185
+ #endif
154
186
  U32 hostageByte;
155
187
  int noForwardProgress;
156
188
  ZSTD_bufferMode_e outBufferMode;
157
189
  ZSTD_outBuffer expectedOutBuffer;
158
190
 
159
191
  /* workspace */
160
- BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
192
+ BYTE* litBuffer;
193
+ const BYTE* litBufferEnd;
194
+ ZSTD_litLocation_e litBufferLocation;
195
+ BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
161
196
  BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
162
197
 
163
198
  size_t oversizedDuration;
@@ -166,8 +201,21 @@ struct ZSTD_DCtx_s
166
201
  void const* dictContentBeginForFuzzing;
167
202
  void const* dictContentEndForFuzzing;
168
203
  #endif
204
+
205
+ /* Tracing */
206
+ #if ZSTD_TRACE
207
+ ZSTD_TraceCtx traceCtx;
208
+ #endif
169
209
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
170
210
 
211
+ MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
212
+ #if DYNAMIC_BMI2 != 0
213
+ return dctx->bmi2;
214
+ #else
215
+ (void)dctx;
216
+ return 0;
217
+ #endif
218
+ }
171
219
 
172
220
  /*-*******************************************************
173
221
  * Shared internal functions
@@ -184,7 +232,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
184
232
  * If yes, do nothing (continue on current segment).
185
233
  * If not, classify previous segment as "external dictionary", and start a new segment.
186
234
  * This function cannot fail. */
187
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
235
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
188
236
 
189
237
 
190
238
  #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,6 +15,7 @@
15
15
  ***************************************/
16
16
  #define ZBUFF_STATIC_LINKING_ONLY
17
17
  #include "zbuff.h"
18
+ #include "../common/error_private.h"
18
19
 
19
20
 
20
21
  /*-***********************************************************
@@ -73,13 +74,32 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
73
74
  ZSTD_parameters params, unsigned long long pledgedSrcSize)
74
75
  {
75
76
  if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
76
- return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize);
77
+ FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
78
+ FORWARD_IF_ERROR(ZSTD_CCtx_setPledgedSrcSize(zbc, pledgedSrcSize), "");
79
+
80
+ FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
81
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_windowLog, params.cParams.windowLog), "");
82
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_hashLog, params.cParams.hashLog), "");
83
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_chainLog, params.cParams.chainLog), "");
84
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_searchLog, params.cParams.searchLog), "");
85
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_minMatch, params.cParams.minMatch), "");
86
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_targetLength, params.cParams.targetLength), "");
87
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_strategy, params.cParams.strategy), "");
88
+
89
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_contentSizeFlag, params.fParams.contentSizeFlag), "");
90
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_checksumFlag, params.fParams.checksumFlag), "");
91
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_dictIDFlag, params.fParams.noDictIDFlag), "");
92
+
93
+ FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
94
+ return 0;
77
95
  }
78
96
 
79
-
80
97
  size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
81
98
  {
82
- return ZSTD_initCStream_usingDict(zbc, dict, dictSize, compressionLevel);
99
+ FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
100
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_compressionLevel, compressionLevel), "");
101
+ FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
102
+ return 0;
83
103
  }
84
104
 
85
105
  size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -13,6 +13,8 @@
13
13
  /* *************************************
14
14
  * Dependencies
15
15
  ***************************************/
16
+ #define ZSTD_DISABLE_DEPRECATE_WARNINGS /* suppress warning on ZSTD_initDStream_usingDict */
17
+ #include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
16
18
  #define ZBUFF_STATIC_LINKING_ONLY
17
19
  #include "zbuff.h"
18
20
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,19 +26,28 @@
26
26
  #include <string.h> /* memset */
27
27
  #include <time.h> /* clock */
28
28
 
29
+ #ifndef ZDICT_STATIC_LINKING_ONLY
30
+ # define ZDICT_STATIC_LINKING_ONLY
31
+ #endif
32
+
29
33
  #include "../common/mem.h" /* read */
30
34
  #include "../common/pool.h"
31
35
  #include "../common/threading.h"
32
- #include "cover.h"
33
36
  #include "../common/zstd_internal.h" /* includes zstd.h */
34
- #ifndef ZDICT_STATIC_LINKING_ONLY
35
- #define ZDICT_STATIC_LINKING_ONLY
36
- #endif
37
- #include "zdict.h"
37
+ #include "../common/bits.h" /* ZSTD_highbit32 */
38
+ #include "../zdict.h"
39
+ #include "cover.h"
38
40
 
39
41
  /*-*************************************
40
42
  * Constants
41
43
  ***************************************/
44
+ /**
45
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
46
+ * on 64bit builds.
47
+ * For 32bit builds we choose 1 GB.
48
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
49
+ * contiguous buffer, so 1GB is already a high limit.
50
+ */
42
51
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43
52
  #define COVER_DEFAULT_SPLITPOINT 1.0
44
53
 
@@ -46,7 +55,7 @@
46
55
  * Console display
47
56
  ***************************************/
48
57
  #ifndef LOCALDISPLAYLEVEL
49
- static int g_displayLevel = 2;
58
+ static int g_displayLevel = 0;
50
59
  #endif
51
60
  #undef DISPLAY
52
61
  #define DISPLAY(...) \
@@ -533,7 +542,7 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
533
542
 
534
543
  /**
535
544
  * Prepare a context for dictionary building.
536
- * The context is only dependent on the parameter `d` and can used multiple
545
+ * The context is only dependent on the parameter `d` and can be used multiple
537
546
  * times.
538
547
  * Returns 0 on success or error code on error.
539
548
  * The context must be destroyed with `COVER_ctx_destroy()`.
@@ -638,7 +647,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
638
647
 
639
648
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
640
649
  {
641
- const double ratio = (double)nbDmers / maxDictSize;
650
+ const double ratio = (double)nbDmers / (double)maxDictSize;
642
651
  if (ratio >= 10) {
643
652
  return;
644
653
  }
@@ -734,7 +743,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
734
743
  COVER_map_t activeDmers;
735
744
  parameters.splitPoint = 1.0;
736
745
  /* Initialize global data */
737
- g_displayLevel = parameters.zParams.notificationLevel;
746
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
738
747
  /* Checks */
739
748
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
740
749
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
@@ -942,9 +951,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
942
951
  }
943
952
  }
944
953
 
954
+ static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
955
+ {
956
+ COVER_dictSelection_t ds;
957
+ ds.dictContent = buf;
958
+ ds.dictSize = s;
959
+ ds.totalCompressedSize = csz;
960
+ return ds;
961
+ }
962
+
945
963
  COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
946
- COVER_dictSelection_t selection = { NULL, 0, error };
947
- return selection;
964
+ return setDictSelection(NULL, 0, error);
948
965
  }
949
966
 
950
967
  unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
@@ -997,9 +1014,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
997
1014
  }
998
1015
 
999
1016
  if (params.shrinkDict == 0) {
1000
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1001
1017
  free(candidateDictBuffer);
1002
- return selection;
1018
+ return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
1003
1019
  }
1004
1020
 
1005
1021
  largestDict = dictContentSize;
@@ -1031,20 +1047,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
1031
1047
  return COVER_dictSelectionError(totalCompressedSize);
1032
1048
  }
1033
1049
 
1034
- if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1035
- COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1050
+ if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
1036
1051
  free(largestDictbuffer);
1037
- return selection;
1052
+ return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
1038
1053
  }
1039
1054
  dictContentSize *= 2;
1040
1055
  }
1041
1056
  dictContentSize = largestDict;
1042
1057
  totalCompressedSize = largestCompressed;
1043
- {
1044
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1045
- free(candidateDictBuffer);
1046
- return selection;
1047
- }
1058
+ free(candidateDictBuffer);
1059
+ return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
1048
1060
  }
1049
1061
 
1050
1062
  /**
@@ -1062,18 +1074,19 @@ typedef struct COVER_tryParameters_data_s {
1062
1074
  * This function is thread safe if zstd is compiled with multithreaded support.
1063
1075
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
1064
1076
  */
1065
- static void COVER_tryParameters(void *opaque) {
1077
+ static void COVER_tryParameters(void *opaque)
1078
+ {
1066
1079
  /* Save parameters as local variables */
1067
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
1080
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
1068
1081
  const COVER_ctx_t *const ctx = data->ctx;
1069
1082
  const ZDICT_cover_params_t parameters = data->parameters;
1070
1083
  size_t dictBufferCapacity = data->dictBufferCapacity;
1071
1084
  size_t totalCompressedSize = ERROR(GENERIC);
1072
1085
  /* Allocate space for hash table, dict, and freqs */
1073
1086
  COVER_map_t activeDmers;
1074
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1087
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
1075
1088
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
1076
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
1089
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
1077
1090
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
1078
1091
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
1079
1092
  goto _cleanup;
@@ -1103,15 +1116,14 @@ _cleanup:
1103
1116
  free(data);
1104
1117
  COVER_map_destroy(&activeDmers);
1105
1118
  COVER_dictSelectionFree(selection);
1106
- if (freqs) {
1107
- free(freqs);
1108
- }
1119
+ free(freqs);
1109
1120
  }
1110
1121
 
1111
1122
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1112
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
1113
- const size_t *samplesSizes, unsigned nbSamples,
1114
- ZDICT_cover_params_t *parameters) {
1123
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
1124
+ const size_t* samplesSizes, unsigned nbSamples,
1125
+ ZDICT_cover_params_t* parameters)
1126
+ {
1115
1127
  /* constants */
1116
1128
  const unsigned nbThreads = parameters->nbThreads;
1117
1129
  const double splitPoint =
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2017-2020, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,10 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
+ #ifndef ZDICT_STATIC_LINKING_ONLY
12
+ # define ZDICT_STATIC_LINKING_ONLY
13
+ #endif
14
+
11
15
  #include <stdio.h> /* fprintf */
12
16
  #include <stdlib.h> /* malloc, free, qsort */
13
17
  #include <string.h> /* memset */
@@ -16,10 +20,7 @@
16
20
  #include "../common/pool.h"
17
21
  #include "../common/threading.h"
18
22
  #include "../common/zstd_internal.h" /* includes zstd.h */
19
- #ifndef ZDICT_STATIC_LINKING_ONLY
20
- #define ZDICT_STATIC_LINKING_ONLY
21
- #endif
22
- #include "zdict.h"
23
+ #include "../zdict.h"
23
24
 
24
25
  /**
25
26
  * COVER_best_t is used for two purposes:
@@ -1576,7 +1576,7 @@ note:
1576
1576
  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577
1577
  trsort(ISAb, SA, m, 1);
1578
1578
 
1579
- /* Set the sorted order of tyoe B* suffixes. */
1579
+ /* Set the sorted order of type B* suffixes. */
1580
1580
  for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581
1581
  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582
1582
  if(0 <= i) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2018-2020, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,21 +16,29 @@
16
16
  #include <string.h> /* memset */
17
17
  #include <time.h> /* clock */
18
18
 
19
+ #ifndef ZDICT_STATIC_LINKING_ONLY
20
+ # define ZDICT_STATIC_LINKING_ONLY
21
+ #endif
22
+
19
23
  #include "../common/mem.h" /* read */
20
24
  #include "../common/pool.h"
21
25
  #include "../common/threading.h"
22
- #include "cover.h"
23
26
  #include "../common/zstd_internal.h" /* includes zstd.h */
24
27
  #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
25
- #ifndef ZDICT_STATIC_LINKING_ONLY
26
- #define ZDICT_STATIC_LINKING_ONLY
27
- #endif
28
- #include "zdict.h"
28
+ #include "../zdict.h"
29
+ #include "cover.h"
29
30
 
30
31
 
31
32
  /*-*************************************
32
33
  * Constants
33
34
  ***************************************/
35
+ /**
36
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
37
+ * on 64bit builds.
38
+ * For 32bit builds we choose 1 GB.
39
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
40
+ * contiguous buffer, so 1GB is already a high limit.
41
+ */
34
42
  #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
35
43
  #define FASTCOVER_MAX_F 31
36
44
  #define FASTCOVER_MAX_ACCEL 10
@@ -43,7 +51,7 @@
43
51
  * Console display
44
52
  ***************************************/
45
53
  #ifndef LOCALDISPLAYLEVEL
46
- static int g_displayLevel = 2;
54
+ static int g_displayLevel = 0;
47
55
  #endif
48
56
  #undef DISPLAY
49
57
  #define DISPLAY(...) \
@@ -296,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
296
304
 
297
305
  /**
298
306
  * Prepare a context for dictionary building.
299
- * The context is only dependent on the parameter `d` and can used multiple
307
+ * The context is only dependent on the parameter `d` and can be used multiple
300
308
  * times.
301
309
  * Returns 0 on success or error code on error.
302
310
  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
@@ -462,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
462
470
  * This function is thread safe if zstd is compiled with multithreaded support.
463
471
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
464
472
  */
465
- static void FASTCOVER_tryParameters(void *opaque)
473
+ static void FASTCOVER_tryParameters(void* opaque)
466
474
  {
467
475
  /* Save parameters as local variables */
468
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
476
+ FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
469
477
  const FASTCOVER_ctx_t *const ctx = data->ctx;
470
478
  const ZDICT_cover_params_t parameters = data->parameters;
471
479
  size_t dictBufferCapacity = data->dictBufferCapacity;
472
480
  size_t totalCompressedSize = ERROR(GENERIC);
473
481
  /* Initialize array to keep track of frequency of dmer within activeSegment */
474
- U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
482
+ U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
475
483
  /* Allocate space for hash table, dict, and freqs */
476
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
484
+ BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
477
485
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
478
- U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
486
+ U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
479
487
  if (!segmentFreqs || !dict || !freqs) {
480
488
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
481
489
  goto _cleanup;
@@ -548,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
548
556
  ZDICT_cover_params_t coverParams;
549
557
  FASTCOVER_accel_t accelParams;
550
558
  /* Initialize global data */
551
- g_displayLevel = parameters.zParams.notificationLevel;
559
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
552
560
  /* Assign splitPoint and f if not provided */
553
561
  parameters.splitPoint = 1.0;
554
562
  parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
@@ -631,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
631
639
  const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
632
640
  const unsigned shrinkDict = 0;
633
641
  /* Local variables */
634
- const int displayLevel = parameters->zParams.notificationLevel;
642
+ const int displayLevel = (int)parameters->zParams.notificationLevel;
635
643
  unsigned iteration = 1;
636
644
  unsigned d;
637
645
  unsigned k;
@@ -715,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
715
723
  data->parameters.splitPoint = splitPoint;
716
724
  data->parameters.steps = kSteps;
717
725
  data->parameters.shrinkDict = shrinkDict;
718
- data->parameters.zParams.notificationLevel = g_displayLevel;
726
+ data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
719
727
  /* Check the parameters */
720
728
  if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
721
729
  data->ctx->f, accel)) {