zstd-ruby 1.4.5.0 → 1.5.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
  10. data/ext/zstdruby/libzstd/common/compiler.h +205 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
  15. data/ext/zstdruby/libzstd/common/error_private.c +10 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +37 -86
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
  19. data/ext/zstdruby/libzstd/common/huf.h +99 -166
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +10 -4
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +74 -19
  25. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  26. data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
  34. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  35. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
  73. data/ext/zstdruby/libzstd/zstd.h +1217 -287
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +19 -36
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -354
  89. data/ext/zstdruby/libzstd/README.md +0 -179
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  /*-*******************************************************
16
16
  * Dependencies
17
17
  *********************************************************/
18
- #include <stddef.h> /* size_t */
18
+ #include "../common/zstd_deps.h" /* size_t */
19
19
  #include "../zstd.h" /* DCtx, and some public functions */
20
20
  #include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
21
21
  #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
@@ -33,6 +33,12 @@
33
33
  */
34
34
 
35
35
 
36
+ /* Streaming state is used to inform allocation of the literal buffer */
37
+ typedef enum {
38
+ not_streaming = 0,
39
+ is_streaming = 1
40
+ } streaming_operation;
41
+
36
42
  /* ZSTD_decompressBlock_internal() :
37
43
  * decompress block, starting at `src`,
38
44
  * into destination buffer `dst`.
@@ -41,19 +47,27 @@
41
47
  */
42
48
  size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
43
49
  void* dst, size_t dstCapacity,
44
- const void* src, size_t srcSize, const int frame);
50
+ const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
45
51
 
46
52
  /* ZSTD_buildFSETable() :
47
53
  * generate FSE decoding table for one symbol (ll, ml or off)
48
54
  * this function must be called with valid parameters only
49
55
  * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
50
56
  * in which case it cannot fail.
57
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
58
+ * defined in zstd_decompress_internal.h.
51
59
  * Internal use only.
52
60
  */
53
61
  void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
54
62
  const short* normalizedCounter, unsigned maxSymbolValue,
55
- const U32* baseValue, const U32* nbAdditionalBits,
56
- unsigned tableLog);
63
+ const U32* baseValue, const U8* nbAdditionalBits,
64
+ unsigned tableLog, void* wksp, size_t wkspSize,
65
+ int bmi2);
66
+
67
+ /* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
68
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
69
+ void* dst, size_t dstCapacity,
70
+ const void* src, size_t srcSize);
57
71
 
58
72
 
59
73
  #endif /* ZSTD_DEC_BLOCK_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -20,33 +20,33 @@
20
20
  * Dependencies
21
21
  *********************************************************/
22
22
  #include "../common/mem.h" /* BYTE, U16, U32 */
23
- #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
23
+ #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24
24
 
25
25
 
26
26
 
27
27
  /*-*******************************************************
28
28
  * Constants
29
29
  *********************************************************/
30
- static const U32 LL_base[MaxLL+1] = {
30
+ static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31
31
  0, 1, 2, 3, 4, 5, 6, 7,
32
32
  8, 9, 10, 11, 12, 13, 14, 15,
33
33
  16, 18, 20, 22, 24, 28, 32, 40,
34
34
  48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35
35
  0x2000, 0x4000, 0x8000, 0x10000 };
36
36
 
37
- static const U32 OF_base[MaxOff+1] = {
37
+ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38
38
  0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
39
39
  0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
40
40
  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41
41
  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
42
 
43
- static const U32 OF_bits[MaxOff+1] = {
43
+ static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44
44
  0, 1, 2, 3, 4, 5, 6, 7,
45
45
  8, 9, 10, 11, 12, 13, 14, 15,
46
46
  16, 17, 18, 19, 20, 21, 22, 23,
47
47
  24, 25, 26, 27, 28, 29, 30, 31 };
48
48
 
49
- static const U32 ML_base[MaxML+1] = {
49
+ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50
50
  3, 4, 5, 6, 7, 8, 9, 10,
51
51
  11, 12, 13, 14, 15, 16, 17, 18,
52
52
  19, 20, 21, 22, 23, 24, 25, 26,
@@ -73,12 +73,17 @@ static const U32 ML_base[MaxML+1] = {
73
73
 
74
74
  #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
75
75
 
76
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78
+ #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
79
+
76
80
  typedef struct {
77
81
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
78
82
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
79
83
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
80
- HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
84
+ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
81
85
  U32 rep[ZSTD_REP_NUM];
86
+ U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
82
87
  } ZSTD_entropyDTables_t;
83
88
 
84
89
  typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
@@ -95,10 +100,28 @@ typedef enum {
95
100
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
101
  } ZSTD_dictUses_e;
97
102
 
103
+ /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
104
+ typedef struct {
105
+ const ZSTD_DDict** ddictPtrTable;
106
+ size_t ddictPtrTableSize;
107
+ size_t ddictPtrCount;
108
+ } ZSTD_DDictHashSet;
109
+
110
+ #ifndef ZSTD_DECODER_INTERNAL_BUFFER
111
+ # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
112
+ #endif
113
+
114
+ #define ZSTD_LBMIN 64
115
+ #define ZSTD_LBMAX (128 << 10)
116
+
117
+ /* extra buffer, compensates when dst is not large enough to store litBuffer */
118
+ #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
119
+
98
120
  typedef enum {
99
- ZSTD_obm_buffered = 0, /* Buffer the output */
100
- ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
101
- } ZSTD_outBufferMode_e;
121
+ ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
122
+ ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
123
+ ZSTD_split = 2 /* Split between litExtraBuffer and dst */
124
+ } ZSTD_litLocation_e;
102
125
 
103
126
  struct ZSTD_DCtx_s
104
127
  {
@@ -114,6 +137,7 @@ struct ZSTD_DCtx_s
114
137
  const void* dictEnd; /* end of previous segment */
115
138
  size_t expected;
116
139
  ZSTD_frameHeader fParams;
140
+ U64 processedCSize;
117
141
  U64 decodedSize;
118
142
  blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
119
143
  ZSTD_dStage stage;
@@ -122,12 +146,16 @@ struct ZSTD_DCtx_s
122
146
  XXH64_state_t xxhState;
123
147
  size_t headerSize;
124
148
  ZSTD_format_e format;
149
+ ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
150
+ U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
125
151
  const BYTE* litPtr;
126
152
  ZSTD_customMem customMem;
127
153
  size_t litSize;
128
154
  size_t rleSize;
129
155
  size_t staticSize;
156
+ #if DYNAMIC_BMI2 != 0
130
157
  int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
158
+ #endif
131
159
 
132
160
  /* dictionary */
133
161
  ZSTD_DDict* ddictLocal;
@@ -135,6 +163,9 @@ struct ZSTD_DCtx_s
135
163
  U32 dictID;
136
164
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
137
165
  ZSTD_dictUses_e dictUses;
166
+ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
167
+ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
168
+ int disableHufAsm;
138
169
 
139
170
  /* streaming */
140
171
  ZSTD_dStreamStage streamStage;
@@ -147,16 +178,21 @@ struct ZSTD_DCtx_s
147
178
  size_t outStart;
148
179
  size_t outEnd;
149
180
  size_t lhSize;
181
+ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
150
182
  void* legacyContext;
151
183
  U32 previousLegacyVersion;
152
184
  U32 legacyVersion;
185
+ #endif
153
186
  U32 hostageByte;
154
187
  int noForwardProgress;
155
- ZSTD_outBufferMode_e outBufferMode;
188
+ ZSTD_bufferMode_e outBufferMode;
156
189
  ZSTD_outBuffer expectedOutBuffer;
157
190
 
158
191
  /* workspace */
159
- BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
192
+ BYTE* litBuffer;
193
+ const BYTE* litBufferEnd;
194
+ ZSTD_litLocation_e litBufferLocation;
195
+ BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
160
196
  BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
161
197
 
162
198
  size_t oversizedDuration;
@@ -165,8 +201,21 @@ struct ZSTD_DCtx_s
165
201
  void const* dictContentBeginForFuzzing;
166
202
  void const* dictContentEndForFuzzing;
167
203
  #endif
204
+
205
+ /* Tracing */
206
+ #if ZSTD_TRACE
207
+ ZSTD_TraceCtx traceCtx;
208
+ #endif
168
209
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
169
210
 
211
+ MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
212
+ #if DYNAMIC_BMI2 != 0
213
+ return dctx->bmi2;
214
+ #else
215
+ (void)dctx;
216
+ return 0;
217
+ #endif
218
+ }
170
219
 
171
220
  /*-*******************************************************
172
221
  * Shared internal functions
@@ -183,7 +232,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
183
232
  * If yes, do nothing (continue on current segment).
184
233
  * If not, classify previous segment as "external dictionary", and start a new segment.
185
234
  * This function cannot fail. */
186
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
235
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
187
236
 
188
237
 
189
238
  #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,47 +26,65 @@
26
26
  #include <string.h> /* memset */
27
27
  #include <time.h> /* clock */
28
28
 
29
+ #ifndef ZDICT_STATIC_LINKING_ONLY
30
+ # define ZDICT_STATIC_LINKING_ONLY
31
+ #endif
32
+
29
33
  #include "../common/mem.h" /* read */
30
34
  #include "../common/pool.h"
31
35
  #include "../common/threading.h"
32
- #include "cover.h"
33
36
  #include "../common/zstd_internal.h" /* includes zstd.h */
34
- #ifndef ZDICT_STATIC_LINKING_ONLY
35
- #define ZDICT_STATIC_LINKING_ONLY
36
- #endif
37
- #include "zdict.h"
37
+ #include "../common/bits.h" /* ZSTD_highbit32 */
38
+ #include "../zdict.h"
39
+ #include "cover.h"
38
40
 
39
41
  /*-*************************************
40
42
  * Constants
41
43
  ***************************************/
44
+ /**
45
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
46
+ * on 64bit builds.
47
+ * For 32bit builds we choose 1 GB.
48
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
49
+ * contiguous buffer, so 1GB is already a high limit.
50
+ */
42
51
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43
- #define DEFAULT_SPLITPOINT 1.0
52
+ #define COVER_DEFAULT_SPLITPOINT 1.0
44
53
 
45
54
  /*-*************************************
46
55
  * Console display
47
56
  ***************************************/
48
- static int g_displayLevel = 2;
57
+ #ifndef LOCALDISPLAYLEVEL
58
+ static int g_displayLevel = 0;
59
+ #endif
60
+ #undef DISPLAY
49
61
  #define DISPLAY(...) \
50
62
  { \
51
63
  fprintf(stderr, __VA_ARGS__); \
52
64
  fflush(stderr); \
53
65
  }
66
+ #undef LOCALDISPLAYLEVEL
54
67
  #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
55
68
  if (displayLevel >= l) { \
56
69
  DISPLAY(__VA_ARGS__); \
57
70
  } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
71
+ #undef DISPLAYLEVEL
58
72
  #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
59
73
 
74
+ #ifndef LOCALDISPLAYUPDATE
75
+ static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
76
+ static clock_t g_time = 0;
77
+ #endif
78
+ #undef LOCALDISPLAYUPDATE
60
79
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
61
80
  if (displayLevel >= l) { \
62
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
81
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
63
82
  g_time = clock(); \
64
83
  DISPLAY(__VA_ARGS__); \
65
84
  } \
66
85
  }
86
+ #undef DISPLAYUPDATE
67
87
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
68
- static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
69
- static clock_t g_time = 0;
70
88
 
71
89
  /*-*************************************
72
90
  * Hash table
@@ -120,9 +138,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
120
138
  /**
121
139
  * Internal hash function
122
140
  */
123
- static const U32 prime4bytes = 2654435761U;
141
+ static const U32 COVER_prime4bytes = 2654435761U;
124
142
  static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
125
- return (key * prime4bytes) >> (32 - map->sizeLog);
143
+ return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
126
144
  }
127
145
 
128
146
  /**
@@ -215,7 +233,7 @@ typedef struct {
215
233
  } COVER_ctx_t;
216
234
 
217
235
  /* We need a global context for qsort... */
218
- static COVER_ctx_t *g_ctx = NULL;
236
+ static COVER_ctx_t *g_coverCtx = NULL;
219
237
 
220
238
  /*-*************************************
221
239
  * Helper functions
@@ -258,11 +276,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
258
276
 
259
277
  /**
260
278
  * Same as COVER_cmp() except ties are broken by pointer value
261
- * NOTE: g_ctx must be set to call this function. A global is required because
279
+ * NOTE: g_coverCtx must be set to call this function. A global is required because
262
280
  * qsort doesn't take an opaque pointer.
263
281
  */
264
- static int COVER_strict_cmp(const void *lp, const void *rp) {
265
- int result = COVER_cmp(g_ctx, lp, rp);
282
+ static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
283
+ int result = COVER_cmp(g_coverCtx, lp, rp);
266
284
  if (result == 0) {
267
285
  result = lp < rp ? -1 : 1;
268
286
  }
@@ -271,8 +289,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
271
289
  /**
272
290
  * Faster version for d <= 8.
273
291
  */
274
- static int COVER_strict_cmp8(const void *lp, const void *rp) {
275
- int result = COVER_cmp8(g_ctx, lp, rp);
292
+ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
293
+ int result = COVER_cmp8(g_coverCtx, lp, rp);
276
294
  if (result == 0) {
277
295
  result = lp < rp ? -1 : 1;
278
296
  }
@@ -524,7 +542,7 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
524
542
 
525
543
  /**
526
544
  * Prepare a context for dictionary building.
527
- * The context is only dependent on the parameter `d` and can used multiple
545
+ * The context is only dependent on the parameter `d` and can be used multiple
528
546
  * times.
529
547
  * Returns 0 on success or error code on error.
530
548
  * The context must be destroyed with `COVER_ctx_destroy()`.
@@ -603,7 +621,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
603
621
  /* qsort doesn't take an opaque pointer, so pass as a global.
604
622
  * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
605
623
  */
606
- g_ctx = ctx;
624
+ g_coverCtx = ctx;
607
625
  #if defined(__OpenBSD__)
608
626
  mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
609
627
  (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
@@ -629,7 +647,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
629
647
 
630
648
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
631
649
  {
632
- const double ratio = (double)nbDmers / maxDictSize;
650
+ const double ratio = (double)nbDmers / (double)maxDictSize;
633
651
  if (ratio >= 10) {
634
652
  return;
635
653
  }
@@ -725,7 +743,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
725
743
  COVER_map_t activeDmers;
726
744
  parameters.splitPoint = 1.0;
727
745
  /* Initialize global data */
728
- g_displayLevel = parameters.zParams.notificationLevel;
746
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
729
747
  /* Checks */
730
748
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
731
749
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
@@ -933,9 +951,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
933
951
  }
934
952
  }
935
953
 
954
+ static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
955
+ {
956
+ COVER_dictSelection_t ds;
957
+ ds.dictContent = buf;
958
+ ds.dictSize = s;
959
+ ds.totalCompressedSize = csz;
960
+ return ds;
961
+ }
962
+
936
963
  COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
937
- COVER_dictSelection_t selection = { NULL, 0, error };
938
- return selection;
964
+ return setDictSelection(NULL, 0, error);
939
965
  }
940
966
 
941
967
  unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
@@ -946,7 +972,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
946
972
  free(selection.dictContent);
947
973
  }
948
974
 
949
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
975
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
950
976
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
951
977
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
952
978
 
@@ -954,8 +980,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
954
980
  size_t largestCompressed = 0;
955
981
  BYTE* customDictContentEnd = customDictContent + dictContentSize;
956
982
 
957
- BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
958
- BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
983
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
984
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
959
985
  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
960
986
 
961
987
  if (!largestDictbuffer || !candidateDictBuffer) {
@@ -967,7 +993,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
967
993
  /* Initial dictionary size and compressed size */
968
994
  memcpy(largestDictbuffer, customDictContent, dictContentSize);
969
995
  dictContentSize = ZDICT_finalizeDictionary(
970
- largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
996
+ largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
971
997
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
972
998
 
973
999
  if (ZDICT_isError(dictContentSize)) {
@@ -988,9 +1014,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
988
1014
  }
989
1015
 
990
1016
  if (params.shrinkDict == 0) {
991
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
992
1017
  free(candidateDictBuffer);
993
- return selection;
1018
+ return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
994
1019
  }
995
1020
 
996
1021
  largestDict = dictContentSize;
@@ -1001,7 +1026,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
1001
1026
  while (dictContentSize < largestDict) {
1002
1027
  memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1003
1028
  dictContentSize = ZDICT_finalizeDictionary(
1004
- candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1029
+ candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
1005
1030
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1006
1031
 
1007
1032
  if (ZDICT_isError(dictContentSize)) {
@@ -1022,20 +1047,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
1022
1047
  return COVER_dictSelectionError(totalCompressedSize);
1023
1048
  }
1024
1049
 
1025
- if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1026
- COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1050
+ if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
1027
1051
  free(largestDictbuffer);
1028
- return selection;
1052
+ return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
1029
1053
  }
1030
1054
  dictContentSize *= 2;
1031
1055
  }
1032
1056
  dictContentSize = largestDict;
1033
1057
  totalCompressedSize = largestCompressed;
1034
- {
1035
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1036
- free(candidateDictBuffer);
1037
- return selection;
1038
- }
1058
+ free(candidateDictBuffer);
1059
+ return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
1039
1060
  }
1040
1061
 
1041
1062
  /**
@@ -1053,18 +1074,19 @@ typedef struct COVER_tryParameters_data_s {
1053
1074
  * This function is thread safe if zstd is compiled with multithreaded support.
1054
1075
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
1055
1076
  */
1056
- static void COVER_tryParameters(void *opaque) {
1077
+ static void COVER_tryParameters(void *opaque)
1078
+ {
1057
1079
  /* Save parameters as local variables */
1058
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
1080
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
1059
1081
  const COVER_ctx_t *const ctx = data->ctx;
1060
1082
  const ZDICT_cover_params_t parameters = data->parameters;
1061
1083
  size_t dictBufferCapacity = data->dictBufferCapacity;
1062
1084
  size_t totalCompressedSize = ERROR(GENERIC);
1063
1085
  /* Allocate space for hash table, dict, and freqs */
1064
1086
  COVER_map_t activeDmers;
1065
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1087
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
1066
1088
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
1067
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
1089
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
1068
1090
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
1069
1091
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
1070
1092
  goto _cleanup;
@@ -1079,7 +1101,7 @@ static void COVER_tryParameters(void *opaque) {
1079
1101
  {
1080
1102
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
1081
1103
  dictBufferCapacity, parameters);
1082
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1104
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
1083
1105
  ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1084
1106
  totalCompressedSize);
1085
1107
 
@@ -1094,19 +1116,18 @@ _cleanup:
1094
1116
  free(data);
1095
1117
  COVER_map_destroy(&activeDmers);
1096
1118
  COVER_dictSelectionFree(selection);
1097
- if (freqs) {
1098
- free(freqs);
1099
- }
1119
+ free(freqs);
1100
1120
  }
1101
1121
 
1102
1122
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1103
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
1104
- const size_t *samplesSizes, unsigned nbSamples,
1105
- ZDICT_cover_params_t *parameters) {
1123
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
1124
+ const size_t* samplesSizes, unsigned nbSamples,
1125
+ ZDICT_cover_params_t* parameters)
1126
+ {
1106
1127
  /* constants */
1107
1128
  const unsigned nbThreads = parameters->nbThreads;
1108
1129
  const double splitPoint =
1109
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
1130
+ parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
1110
1131
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
1111
1132
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
1112
1133
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2017-2020, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,10 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
+ #ifndef ZDICT_STATIC_LINKING_ONLY
12
+ # define ZDICT_STATIC_LINKING_ONLY
13
+ #endif
14
+
11
15
  #include <stdio.h> /* fprintf */
12
16
  #include <stdlib.h> /* malloc, free, qsort */
13
17
  #include <string.h> /* memset */
@@ -16,10 +20,7 @@
16
20
  #include "../common/pool.h"
17
21
  #include "../common/threading.h"
18
22
  #include "../common/zstd_internal.h" /* includes zstd.h */
19
- #ifndef ZDICT_STATIC_LINKING_ONLY
20
- #define ZDICT_STATIC_LINKING_ONLY
21
- #endif
22
- #include "zdict.h"
23
+ #include "../zdict.h"
23
24
 
24
25
  /**
25
26
  * COVER_best_t is used for two purposes:
@@ -152,6 +153,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
152
153
  * smallest dictionary within a specified regression of the compressed size
153
154
  * from the largest dictionary.
154
155
  */
155
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
156
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
156
157
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
157
158
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
@@ -1576,7 +1576,7 @@ note:
1576
1576
  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577
1577
  trsort(ISAb, SA, m, 1);
1578
1578
 
1579
- /* Set the sorted order of tyoe B* suffixes. */
1579
+ /* Set the sorted order of type B* suffixes. */
1580
1580
  for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581
1581
  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582
1582
  if(0 <= i) {