zstd-ruby 1.4.4.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
  10. data/ext/zstdruby/libzstd/common/compiler.h +219 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
  15. data/ext/zstdruby/libzstd/common/error_private.c +11 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +47 -116
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
  19. data/ext/zstdruby/libzstd/common/huf.h +112 -197
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +11 -5
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +78 -22
  25. data/ext/zstdruby/libzstd/common/threading.h +9 -13
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
  73. data/ext/zstdruby/libzstd/zstd.h +1277 -306
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +24 -39
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -289
  89. data/ext/zstdruby/libzstd/README.md +0 -159
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,9 +15,9 @@
15
15
  /*-*******************************************************
16
16
  * Dependencies
17
17
  *********************************************************/
18
- #include <stddef.h> /* size_t */
19
- #include "zstd.h" /* DCtx, and some public functions */
20
- #include "zstd_internal.h" /* blockProperties_t, and some public functions */
18
+ #include "../common/zstd_deps.h" /* size_t */
19
+ #include "../zstd.h" /* DCtx, and some public functions */
20
+ #include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
21
21
  #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
22
22
 
23
23
 
@@ -33,6 +33,12 @@
33
33
  */
34
34
 
35
35
 
36
+ /* Streaming state is used to inform allocation of the literal buffer */
37
+ typedef enum {
38
+ not_streaming = 0,
39
+ is_streaming = 1
40
+ } streaming_operation;
41
+
36
42
  /* ZSTD_decompressBlock_internal() :
37
43
  * decompress block, starting at `src`,
38
44
  * into destination buffer `dst`.
@@ -41,19 +47,27 @@
41
47
  */
42
48
  size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
43
49
  void* dst, size_t dstCapacity,
44
- const void* src, size_t srcSize, const int frame);
50
+ const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
45
51
 
46
52
  /* ZSTD_buildFSETable() :
47
53
  * generate FSE decoding table for one symbol (ll, ml or off)
48
54
  * this function must be called with valid parameters only
49
55
  * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
50
56
  * in which case it cannot fail.
57
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
58
+ * defined in zstd_decompress_internal.h.
51
59
  * Internal use only.
52
60
  */
53
61
  void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
54
62
  const short* normalizedCounter, unsigned maxSymbolValue,
55
- const U32* baseValue, const U32* nbAdditionalBits,
56
- unsigned tableLog);
63
+ const U32* baseValue, const U8* nbAdditionalBits,
64
+ unsigned tableLog, void* wksp, size_t wkspSize,
65
+ int bmi2);
66
+
67
+ /* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
68
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
69
+ void* dst, size_t dstCapacity,
70
+ const void* src, size_t srcSize);
57
71
 
58
72
 
59
73
  #endif /* ZSTD_DEC_BLOCK_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,34 +19,34 @@
19
19
  /*-*******************************************************
20
20
  * Dependencies
21
21
  *********************************************************/
22
- #include "mem.h" /* BYTE, U16, U32 */
23
- #include "zstd_internal.h" /* ZSTD_seqSymbol */
22
+ #include "../common/mem.h" /* BYTE, U16, U32 */
23
+ #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24
24
 
25
25
 
26
26
 
27
27
  /*-*******************************************************
28
28
  * Constants
29
29
  *********************************************************/
30
- static const U32 LL_base[MaxLL+1] = {
30
+ static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31
31
  0, 1, 2, 3, 4, 5, 6, 7,
32
32
  8, 9, 10, 11, 12, 13, 14, 15,
33
33
  16, 18, 20, 22, 24, 28, 32, 40,
34
34
  48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35
35
  0x2000, 0x4000, 0x8000, 0x10000 };
36
36
 
37
- static const U32 OF_base[MaxOff+1] = {
37
+ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38
38
  0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
39
39
  0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
40
40
  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41
41
  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
42
 
43
- static const U32 OF_bits[MaxOff+1] = {
43
+ static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44
44
  0, 1, 2, 3, 4, 5, 6, 7,
45
45
  8, 9, 10, 11, 12, 13, 14, 15,
46
46
  16, 17, 18, 19, 20, 21, 22, 23,
47
47
  24, 25, 26, 27, 28, 29, 30, 31 };
48
48
 
49
- static const U32 ML_base[MaxML+1] = {
49
+ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50
50
  3, 4, 5, 6, 7, 8, 9, 10,
51
51
  11, 12, 13, 14, 15, 16, 17, 18,
52
52
  19, 20, 21, 22, 23, 24, 25, 26,
@@ -73,12 +73,17 @@ static const U32 ML_base[MaxML+1] = {
73
73
 
74
74
  #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
75
75
 
76
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78
+ #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
79
+
76
80
  typedef struct {
77
81
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
78
82
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
79
83
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
80
- HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
84
+ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
81
85
  U32 rep[ZSTD_REP_NUM];
86
+ U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
82
87
  } ZSTD_entropyDTables_t;
83
88
 
84
89
  typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
@@ -95,6 +100,29 @@ typedef enum {
95
100
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
101
  } ZSTD_dictUses_e;
97
102
 
103
+ /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
104
+ typedef struct {
105
+ const ZSTD_DDict** ddictPtrTable;
106
+ size_t ddictPtrTableSize;
107
+ size_t ddictPtrCount;
108
+ } ZSTD_DDictHashSet;
109
+
110
+ #ifndef ZSTD_DECODER_INTERNAL_BUFFER
111
+ # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
112
+ #endif
113
+
114
+ #define ZSTD_LBMIN 64
115
+ #define ZSTD_LBMAX (128 << 10)
116
+
117
+ /* extra buffer, compensates when dst is not large enough to store litBuffer */
118
+ #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
119
+
120
+ typedef enum {
121
+ ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
122
+ ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
123
+ ZSTD_split = 2 /* Split between litExtraBuffer and dst */
124
+ } ZSTD_litLocation_e;
125
+
98
126
  struct ZSTD_DCtx_s
99
127
  {
100
128
  const ZSTD_seqSymbol* LLTptr;
@@ -109,6 +137,7 @@ struct ZSTD_DCtx_s
109
137
  const void* dictEnd; /* end of previous segment */
110
138
  size_t expected;
111
139
  ZSTD_frameHeader fParams;
140
+ U64 processedCSize;
112
141
  U64 decodedSize;
113
142
  blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
114
143
  ZSTD_dStage stage;
@@ -117,12 +146,16 @@ struct ZSTD_DCtx_s
117
146
  XXH64_state_t xxhState;
118
147
  size_t headerSize;
119
148
  ZSTD_format_e format;
149
+ ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
150
+ U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
120
151
  const BYTE* litPtr;
121
152
  ZSTD_customMem customMem;
122
153
  size_t litSize;
123
154
  size_t rleSize;
124
155
  size_t staticSize;
156
+ #if DYNAMIC_BMI2 != 0
125
157
  int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
158
+ #endif
126
159
 
127
160
  /* dictionary */
128
161
  ZSTD_DDict* ddictLocal;
@@ -130,6 +163,9 @@ struct ZSTD_DCtx_s
130
163
  U32 dictID;
131
164
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
132
165
  ZSTD_dictUses_e dictUses;
166
+ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
167
+ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
168
+ int disableHufAsm;
133
169
 
134
170
  /* streaming */
135
171
  ZSTD_dStreamStage streamStage;
@@ -142,17 +178,44 @@ struct ZSTD_DCtx_s
142
178
  size_t outStart;
143
179
  size_t outEnd;
144
180
  size_t lhSize;
181
+ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
145
182
  void* legacyContext;
146
183
  U32 previousLegacyVersion;
147
184
  U32 legacyVersion;
185
+ #endif
148
186
  U32 hostageByte;
149
187
  int noForwardProgress;
188
+ ZSTD_bufferMode_e outBufferMode;
189
+ ZSTD_outBuffer expectedOutBuffer;
150
190
 
151
191
  /* workspace */
152
- BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
192
+ BYTE* litBuffer;
193
+ const BYTE* litBufferEnd;
194
+ ZSTD_litLocation_e litBufferLocation;
195
+ BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
153
196
  BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
197
+
198
+ size_t oversizedDuration;
199
+
200
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
201
+ void const* dictContentBeginForFuzzing;
202
+ void const* dictContentEndForFuzzing;
203
+ #endif
204
+
205
+ /* Tracing */
206
+ #if ZSTD_TRACE
207
+ ZSTD_TraceCtx traceCtx;
208
+ #endif
154
209
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
155
210
 
211
+ MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
212
+ #if DYNAMIC_BMI2 != 0
213
+ return dctx->bmi2;
214
+ #else
215
+ (void)dctx;
216
+ return 0;
217
+ #endif
218
+ }
156
219
 
157
220
  /*-*******************************************************
158
221
  * Shared internal functions
@@ -160,7 +223,7 @@ struct ZSTD_DCtx_s
160
223
 
161
224
  /*! ZSTD_loadDEntropy() :
162
225
  * dict : must point at beginning of a valid zstd dictionary.
163
- * @return : size of entropy tables read */
226
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
164
227
  size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
165
228
  const void* const dict, size_t const dictSize);
166
229
 
@@ -169,7 +232,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
169
232
  * If yes, do nothing (continue on current segment).
170
233
  * If not, classify previous segment as "external dictionary", and start a new segment.
171
234
  * This function cannot fail. */
172
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
235
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
173
236
 
174
237
 
175
238
  #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,47 +26,65 @@
26
26
  #include <string.h> /* memset */
27
27
  #include <time.h> /* clock */
28
28
 
29
- #include "mem.h" /* read */
30
- #include "pool.h"
31
- #include "threading.h"
32
- #include "cover.h"
33
- #include "zstd_internal.h" /* includes zstd.h */
34
29
  #ifndef ZDICT_STATIC_LINKING_ONLY
35
- #define ZDICT_STATIC_LINKING_ONLY
30
+ # define ZDICT_STATIC_LINKING_ONLY
36
31
  #endif
37
- #include "zdict.h"
32
+
33
+ #include "../common/mem.h" /* read */
34
+ #include "../common/pool.h"
35
+ #include "../common/threading.h"
36
+ #include "../common/zstd_internal.h" /* includes zstd.h */
37
+ #include "../common/bits.h" /* ZSTD_highbit32 */
38
+ #include "../zdict.h"
39
+ #include "cover.h"
38
40
 
39
41
  /*-*************************************
40
42
  * Constants
41
43
  ***************************************/
44
+ /**
45
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
46
+ * on 64bit builds.
47
+ * For 32bit builds we choose 1 GB.
48
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
49
+ * contiguous buffer, so 1GB is already a high limit.
50
+ */
42
51
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43
- #define DEFAULT_SPLITPOINT 1.0
52
+ #define COVER_DEFAULT_SPLITPOINT 1.0
44
53
 
45
54
  /*-*************************************
46
55
  * Console display
47
56
  ***************************************/
48
- static int g_displayLevel = 2;
57
+ #ifndef LOCALDISPLAYLEVEL
58
+ static int g_displayLevel = 0;
59
+ #endif
60
+ #undef DISPLAY
49
61
  #define DISPLAY(...) \
50
62
  { \
51
63
  fprintf(stderr, __VA_ARGS__); \
52
64
  fflush(stderr); \
53
65
  }
66
+ #undef LOCALDISPLAYLEVEL
54
67
  #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
55
68
  if (displayLevel >= l) { \
56
69
  DISPLAY(__VA_ARGS__); \
57
70
  } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
71
+ #undef DISPLAYLEVEL
58
72
  #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
59
73
 
74
+ #ifndef LOCALDISPLAYUPDATE
75
+ static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
76
+ static clock_t g_time = 0;
77
+ #endif
78
+ #undef LOCALDISPLAYUPDATE
60
79
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
61
80
  if (displayLevel >= l) { \
62
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
81
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
63
82
  g_time = clock(); \
64
83
  DISPLAY(__VA_ARGS__); \
65
84
  } \
66
85
  }
86
+ #undef DISPLAYUPDATE
67
87
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
68
- static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
69
- static clock_t g_time = 0;
70
88
 
71
89
  /*-*************************************
72
90
  * Hash table
@@ -120,9 +138,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
120
138
  /**
121
139
  * Internal hash function
122
140
  */
123
- static const U32 prime4bytes = 2654435761U;
141
+ static const U32 COVER_prime4bytes = 2654435761U;
124
142
  static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
125
- return (key * prime4bytes) >> (32 - map->sizeLog);
143
+ return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
126
144
  }
127
145
 
128
146
  /**
@@ -215,7 +233,7 @@ typedef struct {
215
233
  } COVER_ctx_t;
216
234
 
217
235
  /* We need a global context for qsort... */
218
- static COVER_ctx_t *g_ctx = NULL;
236
+ static COVER_ctx_t *g_coverCtx = NULL;
219
237
 
220
238
  /*-*************************************
221
239
  * Helper functions
@@ -258,11 +276,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
258
276
 
259
277
  /**
260
278
  * Same as COVER_cmp() except ties are broken by pointer value
261
- * NOTE: g_ctx must be set to call this function. A global is required because
279
+ * NOTE: g_coverCtx must be set to call this function. A global is required because
262
280
  * qsort doesn't take an opaque pointer.
263
281
  */
264
- static int COVER_strict_cmp(const void *lp, const void *rp) {
265
- int result = COVER_cmp(g_ctx, lp, rp);
282
+ static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
283
+ int result = COVER_cmp(g_coverCtx, lp, rp);
266
284
  if (result == 0) {
267
285
  result = lp < rp ? -1 : 1;
268
286
  }
@@ -271,8 +289,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
271
289
  /**
272
290
  * Faster version for d <= 8.
273
291
  */
274
- static int COVER_strict_cmp8(const void *lp, const void *rp) {
275
- int result = COVER_cmp8(g_ctx, lp, rp);
292
+ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
293
+ int result = COVER_cmp8(g_coverCtx, lp, rp);
276
294
  if (result == 0) {
277
295
  result = lp < rp ? -1 : 1;
278
296
  }
@@ -524,7 +542,7 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
524
542
 
525
543
  /**
526
544
  * Prepare a context for dictionary building.
527
- * The context is only dependent on the parameter `d` and can used multiple
545
+ * The context is only dependent on the parameter `d` and can be used multiple
528
546
  * times.
529
547
  * Returns 0 on success or error code on error.
530
548
  * The context must be destroyed with `COVER_ctx_destroy()`.
@@ -603,7 +621,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
603
621
  /* qsort doesn't take an opaque pointer, so pass as a global.
604
622
  * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
605
623
  */
606
- g_ctx = ctx;
624
+ g_coverCtx = ctx;
607
625
  #if defined(__OpenBSD__)
608
626
  mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
609
627
  (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
@@ -629,7 +647,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
629
647
 
630
648
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
631
649
  {
632
- const double ratio = (double)nbDmers / maxDictSize;
650
+ const double ratio = (double)nbDmers / (double)maxDictSize;
633
651
  if (ratio >= 10) {
634
652
  return;
635
653
  }
@@ -725,7 +743,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
725
743
  COVER_map_t activeDmers;
726
744
  parameters.splitPoint = 1.0;
727
745
  /* Initialize global data */
728
- g_displayLevel = parameters.zParams.notificationLevel;
746
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
729
747
  /* Checks */
730
748
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
731
749
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
@@ -933,9 +951,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
933
951
  }
934
952
  }
935
953
 
954
+ static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
955
+ {
956
+ COVER_dictSelection_t ds;
957
+ ds.dictContent = buf;
958
+ ds.dictSize = s;
959
+ ds.totalCompressedSize = csz;
960
+ return ds;
961
+ }
962
+
936
963
  COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
937
- COVER_dictSelection_t selection = { NULL, 0, error };
938
- return selection;
964
+ return setDictSelection(NULL, 0, error);
939
965
  }
940
966
 
941
967
  unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
@@ -946,7 +972,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
946
972
  free(selection.dictContent);
947
973
  }
948
974
 
949
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
975
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
950
976
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
951
977
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
952
978
 
@@ -954,8 +980,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
954
980
  size_t largestCompressed = 0;
955
981
  BYTE* customDictContentEnd = customDictContent + dictContentSize;
956
982
 
957
- BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
958
- BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
983
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
984
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
959
985
  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
960
986
 
961
987
  if (!largestDictbuffer || !candidateDictBuffer) {
@@ -967,7 +993,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
967
993
  /* Initial dictionary size and compressed size */
968
994
  memcpy(largestDictbuffer, customDictContent, dictContentSize);
969
995
  dictContentSize = ZDICT_finalizeDictionary(
970
- largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
996
+ largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
971
997
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
972
998
 
973
999
  if (ZDICT_isError(dictContentSize)) {
@@ -988,9 +1014,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
988
1014
  }
989
1015
 
990
1016
  if (params.shrinkDict == 0) {
991
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
992
1017
  free(candidateDictBuffer);
993
- return selection;
1018
+ return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
994
1019
  }
995
1020
 
996
1021
  largestDict = dictContentSize;
@@ -1001,7 +1026,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
1001
1026
  while (dictContentSize < largestDict) {
1002
1027
  memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1003
1028
  dictContentSize = ZDICT_finalizeDictionary(
1004
- candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1029
+ candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
1005
1030
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1006
1031
 
1007
1032
  if (ZDICT_isError(dictContentSize)) {
@@ -1022,20 +1047,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
1022
1047
  return COVER_dictSelectionError(totalCompressedSize);
1023
1048
  }
1024
1049
 
1025
- if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1026
- COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1050
+ if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
1027
1051
  free(largestDictbuffer);
1028
- return selection;
1052
+ return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
1029
1053
  }
1030
1054
  dictContentSize *= 2;
1031
1055
  }
1032
1056
  dictContentSize = largestDict;
1033
1057
  totalCompressedSize = largestCompressed;
1034
- {
1035
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1036
- free(candidateDictBuffer);
1037
- return selection;
1038
- }
1058
+ free(candidateDictBuffer);
1059
+ return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
1039
1060
  }
1040
1061
 
1041
1062
  /**
@@ -1053,18 +1074,19 @@ typedef struct COVER_tryParameters_data_s {
1053
1074
  * This function is thread safe if zstd is compiled with multithreaded support.
1054
1075
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
1055
1076
  */
1056
- static void COVER_tryParameters(void *opaque) {
1077
+ static void COVER_tryParameters(void *opaque)
1078
+ {
1057
1079
  /* Save parameters as local variables */
1058
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
1080
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
1059
1081
  const COVER_ctx_t *const ctx = data->ctx;
1060
1082
  const ZDICT_cover_params_t parameters = data->parameters;
1061
1083
  size_t dictBufferCapacity = data->dictBufferCapacity;
1062
1084
  size_t totalCompressedSize = ERROR(GENERIC);
1063
1085
  /* Allocate space for hash table, dict, and freqs */
1064
1086
  COVER_map_t activeDmers;
1065
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1087
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
1066
1088
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
1067
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
1089
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
1068
1090
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
1069
1091
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
1070
1092
  goto _cleanup;
@@ -1079,7 +1101,7 @@ static void COVER_tryParameters(void *opaque) {
1079
1101
  {
1080
1102
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
1081
1103
  dictBufferCapacity, parameters);
1082
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1104
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
1083
1105
  ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1084
1106
  totalCompressedSize);
1085
1107
 
@@ -1094,19 +1116,18 @@ _cleanup:
1094
1116
  free(data);
1095
1117
  COVER_map_destroy(&activeDmers);
1096
1118
  COVER_dictSelectionFree(selection);
1097
- if (freqs) {
1098
- free(freqs);
1099
- }
1119
+ free(freqs);
1100
1120
  }
1101
1121
 
1102
1122
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1103
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
1104
- const size_t *samplesSizes, unsigned nbSamples,
1105
- ZDICT_cover_params_t *parameters) {
1123
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
1124
+ const size_t* samplesSizes, unsigned nbSamples,
1125
+ ZDICT_cover_params_t* parameters)
1126
+ {
1106
1127
  /* constants */
1107
1128
  const unsigned nbThreads = parameters->nbThreads;
1108
1129
  const double splitPoint =
1109
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
1130
+ parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
1110
1131
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
1111
1132
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
1112
1133
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
@@ -1,15 +1,26 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZDICT_STATIC_LINKING_ONLY
12
+ # define ZDICT_STATIC_LINKING_ONLY
13
+ #endif
14
+
1
15
  #include <stdio.h> /* fprintf */
2
16
  #include <stdlib.h> /* malloc, free, qsort */
3
17
  #include <string.h> /* memset */
4
18
  #include <time.h> /* clock */
5
- #include "mem.h" /* read */
6
- #include "pool.h"
7
- #include "threading.h"
8
- #include "zstd_internal.h" /* includes zstd.h */
9
- #ifndef ZDICT_STATIC_LINKING_ONLY
10
- #define ZDICT_STATIC_LINKING_ONLY
11
- #endif
12
- #include "zdict.h"
19
+ #include "../common/mem.h" /* read */
20
+ #include "../common/pool.h"
21
+ #include "../common/threading.h"
22
+ #include "../common/zstd_internal.h" /* includes zstd.h */
23
+ #include "../zdict.h"
13
24
 
14
25
  /**
15
26
  * COVER_best_t is used for two purposes:
@@ -142,6 +153,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
142
153
  * smallest dictionary within a specified regression of the compressed size
143
154
  * from the largest dictionary.
144
155
  */
145
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
156
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
146
157
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
147
158
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
@@ -1576,7 +1576,7 @@ note:
1576
1576
  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577
1577
  trsort(ISAb, SA, m, 1);
1578
1578
 
1579
- /* Set the sorted order of tyoe B* suffixes. */
1579
+ /* Set the sorted order of type B* suffixes. */
1580
1580
  for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581
1581
  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582
1582
  if(0 <= i) {