zstd-ruby 1.4.5.0 → 1.4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +237 -138
  5. data/ext/zstdruby/libzstd/README.md +28 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +25 -16
  7. data/ext/zstdruby/libzstd/common/compiler.h +118 -4
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +189 -43
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +2 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +40 -12
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +124 -17
  16. data/ext/zstdruby/libzstd/common/huf.h +27 -6
  17. data/ext/zstdruby/libzstd/common/mem.h +67 -94
  18. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  21. data/ext/zstdruby/libzstd/common/xxhash.c +19 -57
  22. data/ext/zstdruby/libzstd/common/xxhash.h +2 -2
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  24. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +90 -59
  27. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  28. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  29. data/ext/zstdruby/libzstd/compress/fse_compress.c +31 -24
  30. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  31. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  32. data/ext/zstdruby/libzstd/compress/huf_compress.c +217 -101
  33. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1495 -478
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +143 -44
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +7 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +18 -4
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  41. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +62 -26
  42. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +23 -23
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_fast.c +21 -21
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  46. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +352 -78
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +276 -209
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +8 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  51. data/ext/zstdruby/libzstd/compress/zstd_opt.c +191 -46
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  53. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +79 -410
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +27 -109
  55. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +303 -201
  56. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +370 -87
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +153 -45
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -3
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +28 -11
  62. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  63. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  66. data/ext/zstdruby/libzstd/dictBuilder/cover.c +40 -31
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -2
  68. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  69. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +26 -25
  70. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +22 -24
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -4
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +6 -2
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -2
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -2
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +7 -3
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +10 -6
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -6
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +10 -6
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -3
  90. data/ext/zstdruby/libzstd/zstd.h +414 -54
  91. data/lib/zstd-ruby/version.rb +1 -1
  92. metadata +7 -3
  93. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -21,32 +21,33 @@
21
21
  *********************************************************/
22
22
  #include "../common/mem.h" /* BYTE, U16, U32 */
23
23
  #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
24
+ #include "../common/zstd_trace.h" /* ZSTD_TraceCtx */
24
25
 
25
26
 
26
27
 
27
28
  /*-*******************************************************
28
29
  * Constants
29
30
  *********************************************************/
30
- static const U32 LL_base[MaxLL+1] = {
31
+ static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31
32
  0, 1, 2, 3, 4, 5, 6, 7,
32
33
  8, 9, 10, 11, 12, 13, 14, 15,
33
34
  16, 18, 20, 22, 24, 28, 32, 40,
34
35
  48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35
36
  0x2000, 0x4000, 0x8000, 0x10000 };
36
37
 
37
- static const U32 OF_base[MaxOff+1] = {
38
+ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38
39
  0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
39
40
  0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
40
41
  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41
42
  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
43
 
43
- static const U32 OF_bits[MaxOff+1] = {
44
+ static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
44
45
  0, 1, 2, 3, 4, 5, 6, 7,
45
46
  8, 9, 10, 11, 12, 13, 14, 15,
46
47
  16, 17, 18, 19, 20, 21, 22, 23,
47
48
  24, 25, 26, 27, 28, 29, 30, 31 };
48
49
 
49
- static const U32 ML_base[MaxML+1] = {
50
+ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50
51
  3, 4, 5, 6, 7, 8, 9, 10,
51
52
  11, 12, 13, 14, 15, 16, 17, 18,
52
53
  19, 20, 21, 22, 23, 24, 25, 26,
@@ -73,12 +74,16 @@ static const U32 ML_base[MaxML+1] = {
73
74
 
74
75
  #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
75
76
 
77
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
78
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
79
+
76
80
  typedef struct {
77
81
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
78
82
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
79
83
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
80
84
  HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
81
85
  U32 rep[ZSTD_REP_NUM];
86
+ U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
82
87
  } ZSTD_entropyDTables_t;
83
88
 
84
89
  typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
@@ -95,10 +100,12 @@ typedef enum {
95
100
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
101
  } ZSTD_dictUses_e;
97
102
 
98
- typedef enum {
99
- ZSTD_obm_buffered = 0, /* Buffer the output */
100
- ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
101
- } ZSTD_outBufferMode_e;
103
+ /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
104
+ typedef struct {
105
+ const ZSTD_DDict** ddictPtrTable;
106
+ size_t ddictPtrTableSize;
107
+ size_t ddictPtrCount;
108
+ } ZSTD_DDictHashSet;
102
109
 
103
110
  struct ZSTD_DCtx_s
104
111
  {
@@ -114,6 +121,7 @@ struct ZSTD_DCtx_s
114
121
  const void* dictEnd; /* end of previous segment */
115
122
  size_t expected;
116
123
  ZSTD_frameHeader fParams;
124
+ U64 processedCSize;
117
125
  U64 decodedSize;
118
126
  blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
119
127
  ZSTD_dStage stage;
@@ -122,6 +130,8 @@ struct ZSTD_DCtx_s
122
130
  XXH64_state_t xxhState;
123
131
  size_t headerSize;
124
132
  ZSTD_format_e format;
133
+ ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
134
+ U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
125
135
  const BYTE* litPtr;
126
136
  ZSTD_customMem customMem;
127
137
  size_t litSize;
@@ -135,6 +145,8 @@ struct ZSTD_DCtx_s
135
145
  U32 dictID;
136
146
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
137
147
  ZSTD_dictUses_e dictUses;
148
+ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
149
+ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
138
150
 
139
151
  /* streaming */
140
152
  ZSTD_dStreamStage streamStage;
@@ -152,7 +164,7 @@ struct ZSTD_DCtx_s
152
164
  U32 legacyVersion;
153
165
  U32 hostageByte;
154
166
  int noForwardProgress;
155
- ZSTD_outBufferMode_e outBufferMode;
167
+ ZSTD_bufferMode_e outBufferMode;
156
168
  ZSTD_outBuffer expectedOutBuffer;
157
169
 
158
170
  /* workspace */
@@ -165,6 +177,11 @@ struct ZSTD_DCtx_s
165
177
  void const* dictContentBeginForFuzzing;
166
178
  void const* dictContentEndForFuzzing;
167
179
  #endif
180
+
181
+ /* Tracing */
182
+ #if ZSTD_TRACE
183
+ ZSTD_TraceCtx traceCtx;
184
+ #endif
168
185
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
169
186
 
170
187
 
@@ -183,7 +200,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
183
200
  * If yes, do nothing (continue on current segment).
184
201
  * If not, classify previous segment as "external dictionary", and start a new segment.
185
202
  * This function cannot fail. */
186
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
203
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
187
204
 
188
205
 
189
206
  #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -40,33 +40,42 @@
40
40
  * Constants
41
41
  ***************************************/
42
42
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43
- #define DEFAULT_SPLITPOINT 1.0
43
+ #define COVER_DEFAULT_SPLITPOINT 1.0
44
44
 
45
45
  /*-*************************************
46
46
  * Console display
47
47
  ***************************************/
48
+ #ifndef LOCALDISPLAYLEVEL
48
49
  static int g_displayLevel = 2;
50
+ #endif
51
+ #undef DISPLAY
49
52
  #define DISPLAY(...) \
50
53
  { \
51
54
  fprintf(stderr, __VA_ARGS__); \
52
55
  fflush(stderr); \
53
56
  }
57
+ #undef LOCALDISPLAYLEVEL
54
58
  #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
55
59
  if (displayLevel >= l) { \
56
60
  DISPLAY(__VA_ARGS__); \
57
61
  } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
62
+ #undef DISPLAYLEVEL
58
63
  #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
59
64
 
65
+ #ifndef LOCALDISPLAYUPDATE
66
+ static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
67
+ static clock_t g_time = 0;
68
+ #endif
69
+ #undef LOCALDISPLAYUPDATE
60
70
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
61
71
  if (displayLevel >= l) { \
62
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
72
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
63
73
  g_time = clock(); \
64
74
  DISPLAY(__VA_ARGS__); \
65
75
  } \
66
76
  }
77
+ #undef DISPLAYUPDATE
67
78
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
68
- static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
69
- static clock_t g_time = 0;
70
79
 
71
80
  /*-*************************************
72
81
  * Hash table
@@ -120,9 +129,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
120
129
  /**
121
130
  * Internal hash function
122
131
  */
123
- static const U32 prime4bytes = 2654435761U;
132
+ static const U32 COVER_prime4bytes = 2654435761U;
124
133
  static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
125
- return (key * prime4bytes) >> (32 - map->sizeLog);
134
+ return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
126
135
  }
127
136
 
128
137
  /**
@@ -215,7 +224,7 @@ typedef struct {
215
224
  } COVER_ctx_t;
216
225
 
217
226
  /* We need a global context for qsort... */
218
- static COVER_ctx_t *g_ctx = NULL;
227
+ static COVER_ctx_t *g_coverCtx = NULL;
219
228
 
220
229
  /*-*************************************
221
230
  * Helper functions
@@ -258,11 +267,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
258
267
 
259
268
  /**
260
269
  * Same as COVER_cmp() except ties are broken by pointer value
261
- * NOTE: g_ctx must be set to call this function. A global is required because
270
+ * NOTE: g_coverCtx must be set to call this function. A global is required because
262
271
  * qsort doesn't take an opaque pointer.
263
272
  */
264
- static int COVER_strict_cmp(const void *lp, const void *rp) {
265
- int result = COVER_cmp(g_ctx, lp, rp);
273
+ static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
274
+ int result = COVER_cmp(g_coverCtx, lp, rp);
266
275
  if (result == 0) {
267
276
  result = lp < rp ? -1 : 1;
268
277
  }
@@ -271,8 +280,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
271
280
  /**
272
281
  * Faster version for d <= 8.
273
282
  */
274
- static int COVER_strict_cmp8(const void *lp, const void *rp) {
275
- int result = COVER_cmp8(g_ctx, lp, rp);
283
+ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
284
+ int result = COVER_cmp8(g_coverCtx, lp, rp);
276
285
  if (result == 0) {
277
286
  result = lp < rp ? -1 : 1;
278
287
  }
@@ -603,7 +612,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
603
612
  /* qsort doesn't take an opaque pointer, so pass as a global.
604
613
  * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
605
614
  */
606
- g_ctx = ctx;
615
+ g_coverCtx = ctx;
607
616
  #if defined(__OpenBSD__)
608
617
  mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
609
618
  (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
@@ -946,7 +955,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
946
955
  free(selection.dictContent);
947
956
  }
948
957
 
949
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
958
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
950
959
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
951
960
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
952
961
 
@@ -954,8 +963,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
954
963
  size_t largestCompressed = 0;
955
964
  BYTE* customDictContentEnd = customDictContent + dictContentSize;
956
965
 
957
- BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
958
- BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
966
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
967
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
959
968
  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
960
969
 
961
970
  if (!largestDictbuffer || !candidateDictBuffer) {
@@ -967,7 +976,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
967
976
  /* Initial dictionary size and compressed size */
968
977
  memcpy(largestDictbuffer, customDictContent, dictContentSize);
969
978
  dictContentSize = ZDICT_finalizeDictionary(
970
- largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
979
+ largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
971
980
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
972
981
 
973
982
  if (ZDICT_isError(dictContentSize)) {
@@ -1001,7 +1010,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
1001
1010
  while (dictContentSize < largestDict) {
1002
1011
  memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1003
1012
  dictContentSize = ZDICT_finalizeDictionary(
1004
- candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1013
+ candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
1005
1014
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1006
1015
 
1007
1016
  if (ZDICT_isError(dictContentSize)) {
@@ -1053,18 +1062,19 @@ typedef struct COVER_tryParameters_data_s {
1053
1062
  * This function is thread safe if zstd is compiled with multithreaded support.
1054
1063
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
1055
1064
  */
1056
- static void COVER_tryParameters(void *opaque) {
1065
+ static void COVER_tryParameters(void *opaque)
1066
+ {
1057
1067
  /* Save parameters as local variables */
1058
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
1068
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
1059
1069
  const COVER_ctx_t *const ctx = data->ctx;
1060
1070
  const ZDICT_cover_params_t parameters = data->parameters;
1061
1071
  size_t dictBufferCapacity = data->dictBufferCapacity;
1062
1072
  size_t totalCompressedSize = ERROR(GENERIC);
1063
1073
  /* Allocate space for hash table, dict, and freqs */
1064
1074
  COVER_map_t activeDmers;
1065
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1075
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
1066
1076
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
1067
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
1077
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
1068
1078
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
1069
1079
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
1070
1080
  goto _cleanup;
@@ -1079,7 +1089,7 @@ static void COVER_tryParameters(void *opaque) {
1079
1089
  {
1080
1090
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
1081
1091
  dictBufferCapacity, parameters);
1082
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1092
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
1083
1093
  ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1084
1094
  totalCompressedSize);
1085
1095
 
@@ -1094,19 +1104,18 @@ _cleanup:
1094
1104
  free(data);
1095
1105
  COVER_map_destroy(&activeDmers);
1096
1106
  COVER_dictSelectionFree(selection);
1097
- if (freqs) {
1098
- free(freqs);
1099
- }
1107
+ free(freqs);
1100
1108
  }
1101
1109
 
1102
1110
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1103
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
1104
- const size_t *samplesSizes, unsigned nbSamples,
1105
- ZDICT_cover_params_t *parameters) {
1111
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
1112
+ const size_t* samplesSizes, unsigned nbSamples,
1113
+ ZDICT_cover_params_t* parameters)
1114
+ {
1106
1115
  /* constants */
1107
1116
  const unsigned nbThreads = parameters->nbThreads;
1108
1117
  const double splitPoint =
1109
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
1118
+ parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
1110
1119
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
1111
1120
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
1112
1121
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2017-2020, Facebook, Inc.
2
+ * Copyright (c) 2017-2021, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -152,6 +152,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
152
152
  * smallest dictionary within a specified regression of the compressed size
153
153
  * from the largest dictionary.
154
154
  */
155
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
155
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
156
156
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
157
157
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
@@ -1576,7 +1576,7 @@ note:
1576
1576
  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577
1577
  trsort(ISAb, SA, m, 1);
1578
1578
 
1579
- /* Set the sorted order of tyoe B* suffixes. */
1579
+ /* Set the sorted order of type B* suffixes. */
1580
1580
  for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581
1581
  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582
1582
  if(0 <= i) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2018-2020, Facebook, Inc.
2
+ * Copyright (c) 2018-2021, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -21,6 +21,7 @@
21
21
  #include "../common/threading.h"
22
22
  #include "cover.h"
23
23
  #include "../common/zstd_internal.h" /* includes zstd.h */
24
+ #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
24
25
  #ifndef ZDICT_STATIC_LINKING_ONLY
25
26
  #define ZDICT_STATIC_LINKING_ONLY
26
27
  #endif
@@ -33,7 +34,7 @@
33
34
  #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
34
35
  #define FASTCOVER_MAX_F 31
35
36
  #define FASTCOVER_MAX_ACCEL 10
36
- #define DEFAULT_SPLITPOINT 0.75
37
+ #define FASTCOVER_DEFAULT_SPLITPOINT 0.75
37
38
  #define DEFAULT_F 20
38
39
  #define DEFAULT_ACCEL 1
39
40
 
@@ -41,50 +42,50 @@
41
42
  /*-*************************************
42
43
  * Console display
43
44
  ***************************************/
45
+ #ifndef LOCALDISPLAYLEVEL
44
46
  static int g_displayLevel = 2;
47
+ #endif
48
+ #undef DISPLAY
45
49
  #define DISPLAY(...) \
46
50
  { \
47
51
  fprintf(stderr, __VA_ARGS__); \
48
52
  fflush(stderr); \
49
53
  }
54
+ #undef LOCALDISPLAYLEVEL
50
55
  #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
51
56
  if (displayLevel >= l) { \
52
57
  DISPLAY(__VA_ARGS__); \
53
58
  } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
59
+ #undef DISPLAYLEVEL
54
60
  #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
55
61
 
62
+ #ifndef LOCALDISPLAYUPDATE
63
+ static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
64
+ static clock_t g_time = 0;
65
+ #endif
66
+ #undef LOCALDISPLAYUPDATE
56
67
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
57
68
  if (displayLevel >= l) { \
58
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
69
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
59
70
  g_time = clock(); \
60
71
  DISPLAY(__VA_ARGS__); \
61
72
  } \
62
73
  }
74
+ #undef DISPLAYUPDATE
63
75
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
64
- static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
65
- static clock_t g_time = 0;
66
76
 
67
77
 
68
78
  /*-*************************************
69
79
  * Hash Functions
70
80
  ***************************************/
71
- static const U64 prime6bytes = 227718039650203ULL;
72
- static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
73
- static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
74
-
75
- static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
76
- static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
77
- static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
78
-
79
-
80
81
  /**
81
- * Hash the d-byte value pointed to by p and mod 2^f
82
+ * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
82
83
  */
83
- static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
84
+ static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
84
85
  if (d == 6) {
85
- return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
86
+ return ZSTD_hash6Ptr(p, f);
86
87
  }
87
- return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
88
+ return ZSTD_hash8Ptr(p, f);
88
89
  }
89
90
 
90
91
 
@@ -461,20 +462,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
461
462
  * This function is thread safe if zstd is compiled with multithreaded support.
462
463
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
463
464
  */
464
- static void FASTCOVER_tryParameters(void *opaque)
465
+ static void FASTCOVER_tryParameters(void* opaque)
465
466
  {
466
467
  /* Save parameters as local variables */
467
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
468
+ FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
468
469
  const FASTCOVER_ctx_t *const ctx = data->ctx;
469
470
  const ZDICT_cover_params_t parameters = data->parameters;
470
471
  size_t dictBufferCapacity = data->dictBufferCapacity;
471
472
  size_t totalCompressedSize = ERROR(GENERIC);
472
473
  /* Initialize array to keep track of frequency of dmer within activeSegment */
473
- U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
474
+ U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
474
475
  /* Allocate space for hash table, dict, and freqs */
475
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
476
+ BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
476
477
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
477
- U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
478
+ U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
478
479
  if (!segmentFreqs || !dict || !freqs) {
479
480
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
480
481
  goto _cleanup;
@@ -486,7 +487,7 @@ static void FASTCOVER_tryParameters(void *opaque)
486
487
  parameters, segmentFreqs);
487
488
 
488
489
  const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
489
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
490
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
490
491
  ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
491
492
  totalCompressedSize);
492
493
 
@@ -617,7 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
617
618
  /* constants */
618
619
  const unsigned nbThreads = parameters->nbThreads;
619
620
  const double splitPoint =
620
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
621
+ parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
621
622
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
622
623
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
623
624
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;