zstd-ruby 1.4.1.0 → 1.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/libzstd/BUCK +5 -7
  6. data/ext/zstdruby/libzstd/Makefile +304 -113
  7. data/ext/zstdruby/libzstd/README.md +83 -20
  8. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  9. data/ext/zstdruby/libzstd/common/compiler.h +150 -8
  10. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  11. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  12. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  13. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  14. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  15. data/ext/zstdruby/libzstd/common/error_private.h +8 -4
  16. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  17. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
  18. data/ext/zstdruby/libzstd/common/huf.h +43 -39
  19. data/ext/zstdruby/libzstd/common/mem.h +69 -25
  20. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  21. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  22. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  23. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  24. data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
  25. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  26. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  27. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  28. data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  90. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  91. data/ext/zstdruby/libzstd/zstd.h +740 -153
  92. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  93. data/lib/zstd-ruby/version.rb +1 -1
  94. data/zstd-ruby.gemspec +1 -1
  95. metadata +21 -10
  96. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,9 +15,9 @@
15
15
  /*-*******************************************************
16
16
  * Dependencies
17
17
  *********************************************************/
18
- #include <stddef.h> /* size_t */
19
- #include "zstd.h" /* DCtx, and some public functions */
20
- #include "zstd_internal.h" /* blockProperties_t, and some public functions */
18
+ #include "../common/zstd_deps.h" /* size_t */
19
+ #include "../zstd.h" /* DCtx, and some public functions */
20
+ #include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
21
21
  #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
22
22
 
23
23
 
@@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
48
48
  * this function must be called with valid parameters only
49
49
  * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
50
50
  * in which case it cannot fail.
51
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
52
+ * defined in zstd_decompress_internal.h.
51
53
  * Internal use only.
52
54
  */
53
55
  void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
54
56
  const short* normalizedCounter, unsigned maxSymbolValue,
55
57
  const U32* baseValue, const U32* nbAdditionalBits,
56
- unsigned tableLog);
58
+ unsigned tableLog, void* wksp, size_t wkspSize,
59
+ int bmi2);
57
60
 
58
61
 
59
62
  #endif /* ZSTD_DEC_BLOCK_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,34 +19,34 @@
19
19
  /*-*******************************************************
20
20
  * Dependencies
21
21
  *********************************************************/
22
- #include "mem.h" /* BYTE, U16, U32 */
23
- #include "zstd_internal.h" /* ZSTD_seqSymbol */
22
+ #include "../common/mem.h" /* BYTE, U16, U32 */
23
+ #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
24
24
 
25
25
 
26
26
 
27
27
  /*-*******************************************************
28
28
  * Constants
29
29
  *********************************************************/
30
- static const U32 LL_base[MaxLL+1] = {
30
+ static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31
31
  0, 1, 2, 3, 4, 5, 6, 7,
32
32
  8, 9, 10, 11, 12, 13, 14, 15,
33
33
  16, 18, 20, 22, 24, 28, 32, 40,
34
34
  48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35
35
  0x2000, 0x4000, 0x8000, 0x10000 };
36
36
 
37
- static const U32 OF_base[MaxOff+1] = {
37
+ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38
38
  0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
39
39
  0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
40
40
  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41
41
  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
42
 
43
- static const U32 OF_bits[MaxOff+1] = {
43
+ static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
44
44
  0, 1, 2, 3, 4, 5, 6, 7,
45
45
  8, 9, 10, 11, 12, 13, 14, 15,
46
46
  16, 17, 18, 19, 20, 21, 22, 23,
47
47
  24, 25, 26, 27, 28, 29, 30, 31 };
48
48
 
49
- static const U32 ML_base[MaxML+1] = {
49
+ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50
50
  3, 4, 5, 6, 7, 8, 9, 10,
51
51
  11, 12, 13, 14, 15, 16, 17, 18,
52
52
  19, 20, 21, 22, 23, 24, 25, 26,
@@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = {
73
73
 
74
74
  #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
75
75
 
76
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78
+
76
79
  typedef struct {
77
80
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
78
81
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
79
82
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
80
83
  HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
81
84
  U32 rep[ZSTD_REP_NUM];
85
+ U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
82
86
  } ZSTD_entropyDTables_t;
83
87
 
84
88
  typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
@@ -95,6 +99,13 @@ typedef enum {
95
99
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
100
  } ZSTD_dictUses_e;
97
101
 
102
+ /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
103
+ typedef struct {
104
+ const ZSTD_DDict** ddictPtrTable;
105
+ size_t ddictPtrTableSize;
106
+ size_t ddictPtrCount;
107
+ } ZSTD_DDictHashSet;
108
+
98
109
  struct ZSTD_DCtx_s
99
110
  {
100
111
  const ZSTD_seqSymbol* LLTptr;
@@ -109,6 +120,7 @@ struct ZSTD_DCtx_s
109
120
  const void* dictEnd; /* end of previous segment */
110
121
  size_t expected;
111
122
  ZSTD_frameHeader fParams;
123
+ U64 processedCSize;
112
124
  U64 decodedSize;
113
125
  blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
114
126
  ZSTD_dStage stage;
@@ -117,6 +129,8 @@ struct ZSTD_DCtx_s
117
129
  XXH64_state_t xxhState;
118
130
  size_t headerSize;
119
131
  ZSTD_format_e format;
132
+ ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
133
+ U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
120
134
  const BYTE* litPtr;
121
135
  ZSTD_customMem customMem;
122
136
  size_t litSize;
@@ -130,6 +144,8 @@ struct ZSTD_DCtx_s
130
144
  U32 dictID;
131
145
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
132
146
  ZSTD_dictUses_e dictUses;
147
+ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
148
+ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
133
149
 
134
150
  /* streaming */
135
151
  ZSTD_dStreamStage streamStage;
@@ -147,10 +163,24 @@ struct ZSTD_DCtx_s
147
163
  U32 legacyVersion;
148
164
  U32 hostageByte;
149
165
  int noForwardProgress;
166
+ ZSTD_bufferMode_e outBufferMode;
167
+ ZSTD_outBuffer expectedOutBuffer;
150
168
 
151
169
  /* workspace */
152
170
  BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
153
171
  BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
172
+
173
+ size_t oversizedDuration;
174
+
175
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
176
+ void const* dictContentBeginForFuzzing;
177
+ void const* dictContentEndForFuzzing;
178
+ #endif
179
+
180
+ /* Tracing */
181
+ #if ZSTD_TRACE
182
+ ZSTD_TraceCtx traceCtx;
183
+ #endif
154
184
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
155
185
 
156
186
 
@@ -160,7 +190,7 @@ struct ZSTD_DCtx_s
160
190
 
161
191
  /*! ZSTD_loadDEntropy() :
162
192
  * dict : must point at beginning of a valid zstd dictionary.
163
- * @return : size of entropy tables read */
193
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
164
194
  size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
165
195
  const void* const dict, size_t const dictSize);
166
196
 
@@ -169,7 +199,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
169
199
  * If yes, do nothing (continue on current segment).
170
200
  * If not, classify previous segment as "external dictionary", and start a new segment.
171
201
  * This function cannot fail. */
172
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
202
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
173
203
 
174
204
 
175
205
  #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,7 +28,7 @@ extern "C" {
28
28
  * Dependencies
29
29
  ***************************************/
30
30
  #include <stddef.h> /* size_t */
31
- #include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
31
+ #include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
32
32
 
33
33
 
34
34
  /* ***************************************************************
@@ -36,16 +36,17 @@ extern "C" {
36
36
  *****************************************************************/
37
37
  /* Deprecation warnings */
38
38
  /* Should these warnings be a problem,
39
- it is generally possible to disable them,
40
- typically with -Wno-deprecated-declarations for gcc
41
- or _CRT_SECURE_NO_WARNINGS in Visual.
42
- Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
39
+ * it is generally possible to disable them,
40
+ * typically with -Wno-deprecated-declarations for gcc
41
+ * or _CRT_SECURE_NO_WARNINGS in Visual.
42
+ * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
43
+ */
43
44
  #ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
44
45
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
45
46
  #else
46
47
  # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
47
48
  # define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
48
- # elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
49
+ # elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
49
50
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
50
51
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
51
52
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
@@ -185,7 +186,7 @@ ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(voi
185
186
 
186
187
  /*--- Dependency ---*/
187
188
  #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
188
- #include "zstd.h"
189
+ #include "../zstd.h"
189
190
 
190
191
 
191
192
  /*--- Custom memory allocator ---*/
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,7 +11,7 @@
11
11
  /*-*************************************
12
12
  * Dependencies
13
13
  ***************************************/
14
- #include "error_private.h"
14
+ #include "../common/error_private.h"
15
15
  #include "zbuff.h"
16
16
 
17
17
  /*-****************************************
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,47 +26,57 @@
26
26
  #include <string.h> /* memset */
27
27
  #include <time.h> /* clock */
28
28
 
29
- #include "mem.h" /* read */
30
- #include "pool.h"
31
- #include "threading.h"
32
- #include "cover.h"
33
- #include "zstd_internal.h" /* includes zstd.h */
34
29
  #ifndef ZDICT_STATIC_LINKING_ONLY
35
- #define ZDICT_STATIC_LINKING_ONLY
30
+ # define ZDICT_STATIC_LINKING_ONLY
36
31
  #endif
37
- #include "zdict.h"
32
+
33
+ #include "../common/mem.h" /* read */
34
+ #include "../common/pool.h"
35
+ #include "../common/threading.h"
36
+ #include "../common/zstd_internal.h" /* includes zstd.h */
37
+ #include "../zdict.h"
38
+ #include "cover.h"
38
39
 
39
40
  /*-*************************************
40
41
  * Constants
41
42
  ***************************************/
42
43
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43
- #define DEFAULT_SPLITPOINT 1.0
44
+ #define COVER_DEFAULT_SPLITPOINT 1.0
44
45
 
45
46
  /*-*************************************
46
47
  * Console display
47
48
  ***************************************/
49
+ #ifndef LOCALDISPLAYLEVEL
48
50
  static int g_displayLevel = 2;
51
+ #endif
52
+ #undef DISPLAY
49
53
  #define DISPLAY(...) \
50
54
  { \
51
55
  fprintf(stderr, __VA_ARGS__); \
52
56
  fflush(stderr); \
53
57
  }
58
+ #undef LOCALDISPLAYLEVEL
54
59
  #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
55
60
  if (displayLevel >= l) { \
56
61
  DISPLAY(__VA_ARGS__); \
57
62
  } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
63
+ #undef DISPLAYLEVEL
58
64
  #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
59
65
 
66
+ #ifndef LOCALDISPLAYUPDATE
67
+ static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
68
+ static clock_t g_time = 0;
69
+ #endif
70
+ #undef LOCALDISPLAYUPDATE
60
71
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
61
72
  if (displayLevel >= l) { \
62
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
73
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
63
74
  g_time = clock(); \
64
75
  DISPLAY(__VA_ARGS__); \
65
76
  } \
66
77
  }
78
+ #undef DISPLAYUPDATE
67
79
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
68
- static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
69
- static clock_t g_time = 0;
70
80
 
71
81
  /*-*************************************
72
82
  * Hash table
@@ -120,9 +130,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
120
130
  /**
121
131
  * Internal hash function
122
132
  */
123
- static const U32 prime4bytes = 2654435761U;
133
+ static const U32 COVER_prime4bytes = 2654435761U;
124
134
  static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
125
- return (key * prime4bytes) >> (32 - map->sizeLog);
135
+ return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
126
136
  }
127
137
 
128
138
  /**
@@ -215,7 +225,7 @@ typedef struct {
215
225
  } COVER_ctx_t;
216
226
 
217
227
  /* We need a global context for qsort... */
218
- static COVER_ctx_t *g_ctx = NULL;
228
+ static COVER_ctx_t *g_coverCtx = NULL;
219
229
 
220
230
  /*-*************************************
221
231
  * Helper functions
@@ -258,11 +268,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
258
268
 
259
269
  /**
260
270
  * Same as COVER_cmp() except ties are broken by pointer value
261
- * NOTE: g_ctx must be set to call this function. A global is required because
271
+ * NOTE: g_coverCtx must be set to call this function. A global is required because
262
272
  * qsort doesn't take an opaque pointer.
263
273
  */
264
- static int COVER_strict_cmp(const void *lp, const void *rp) {
265
- int result = COVER_cmp(g_ctx, lp, rp);
274
+ static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
275
+ int result = COVER_cmp(g_coverCtx, lp, rp);
266
276
  if (result == 0) {
267
277
  result = lp < rp ? -1 : 1;
268
278
  }
@@ -271,8 +281,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
271
281
  /**
272
282
  * Faster version for d <= 8.
273
283
  */
274
- static int COVER_strict_cmp8(const void *lp, const void *rp) {
275
- int result = COVER_cmp8(g_ctx, lp, rp);
284
+ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
285
+ int result = COVER_cmp8(g_coverCtx, lp, rp);
276
286
  if (result == 0) {
277
287
  result = lp < rp ? -1 : 1;
278
288
  }
@@ -603,7 +613,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
603
613
  /* qsort doesn't take an opaque pointer, so pass as a global.
604
614
  * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
605
615
  */
606
- g_ctx = ctx;
616
+ g_coverCtx = ctx;
607
617
  #if defined(__OpenBSD__)
608
618
  mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
609
619
  (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
@@ -638,8 +648,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
638
648
  "compared to the source size %u! "
639
649
  "size(source)/size(dictionary) = %f, but it should be >= "
640
650
  "10! This may lead to a subpar dictionary! We recommend "
641
- "training on sources at least 10x, and up to 100x the "
642
- "size of the dictionary!\n", (U32)maxDictSize,
651
+ "training on sources at least 10x, and preferably 100x "
652
+ "the size of the dictionary! \n", (U32)maxDictSize,
643
653
  (U32)nbDmers, ratio);
644
654
  }
645
655
 
@@ -919,13 +929,12 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
919
929
  }
920
930
  }
921
931
  /* Save the dictionary, parameters, and size */
922
- if (!dict) {
923
- return;
932
+ if (dict) {
933
+ memcpy(best->dict, dict, dictSize);
934
+ best->dictSize = dictSize;
935
+ best->parameters = parameters;
936
+ best->compressedSize = compressedSize;
924
937
  }
925
- memcpy(best->dict, dict, dictSize);
926
- best->dictSize = dictSize;
927
- best->parameters = parameters;
928
- best->compressedSize = compressedSize;
929
938
  }
930
939
  if (liveJobs == 0) {
931
940
  ZSTD_pthread_cond_broadcast(&best->cond);
@@ -947,7 +956,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
947
956
  free(selection.dictContent);
948
957
  }
949
958
 
950
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
959
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
951
960
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
952
961
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
953
962
 
@@ -955,8 +964,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
955
964
  size_t largestCompressed = 0;
956
965
  BYTE* customDictContentEnd = customDictContent + dictContentSize;
957
966
 
958
- BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
959
- BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
967
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
968
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
960
969
  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
961
970
 
962
971
  if (!largestDictbuffer || !candidateDictBuffer) {
@@ -968,7 +977,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
968
977
  /* Initial dictionary size and compressed size */
969
978
  memcpy(largestDictbuffer, customDictContent, dictContentSize);
970
979
  dictContentSize = ZDICT_finalizeDictionary(
971
- largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
980
+ largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
972
981
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
973
982
 
974
983
  if (ZDICT_isError(dictContentSize)) {
@@ -1002,7 +1011,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
1002
1011
  while (dictContentSize < largestDict) {
1003
1012
  memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1004
1013
  dictContentSize = ZDICT_finalizeDictionary(
1005
- candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1014
+ candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
1006
1015
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1007
1016
 
1008
1017
  if (ZDICT_isError(dictContentSize)) {
@@ -1054,18 +1063,19 @@ typedef struct COVER_tryParameters_data_s {
1054
1063
  * This function is thread safe if zstd is compiled with multithreaded support.
1055
1064
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
1056
1065
  */
1057
- static void COVER_tryParameters(void *opaque) {
1066
+ static void COVER_tryParameters(void *opaque)
1067
+ {
1058
1068
  /* Save parameters as local variables */
1059
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
1069
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
1060
1070
  const COVER_ctx_t *const ctx = data->ctx;
1061
1071
  const ZDICT_cover_params_t parameters = data->parameters;
1062
1072
  size_t dictBufferCapacity = data->dictBufferCapacity;
1063
1073
  size_t totalCompressedSize = ERROR(GENERIC);
1064
1074
  /* Allocate space for hash table, dict, and freqs */
1065
1075
  COVER_map_t activeDmers;
1066
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1076
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
1067
1077
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
1068
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
1078
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
1069
1079
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
1070
1080
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
1071
1081
  goto _cleanup;
@@ -1080,7 +1090,7 @@ static void COVER_tryParameters(void *opaque) {
1080
1090
  {
1081
1091
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
1082
1092
  dictBufferCapacity, parameters);
1083
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1093
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
1084
1094
  ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1085
1095
  totalCompressedSize);
1086
1096
 
@@ -1095,19 +1105,18 @@ _cleanup:
1095
1105
  free(data);
1096
1106
  COVER_map_destroy(&activeDmers);
1097
1107
  COVER_dictSelectionFree(selection);
1098
- if (freqs) {
1099
- free(freqs);
1100
- }
1108
+ free(freqs);
1101
1109
  }
1102
1110
 
1103
1111
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1104
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
1105
- const size_t *samplesSizes, unsigned nbSamples,
1106
- ZDICT_cover_params_t *parameters) {
1112
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
1113
+ const size_t* samplesSizes, unsigned nbSamples,
1114
+ ZDICT_cover_params_t* parameters)
1115
+ {
1107
1116
  /* constants */
1108
1117
  const unsigned nbThreads = parameters->nbThreads;
1109
1118
  const double splitPoint =
1110
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
1119
+ parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
1111
1120
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
1112
1121
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
1113
1122
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;