zstd-ruby 1.4.5.0 → 1.4.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +237 -138
  5. data/ext/zstdruby/libzstd/README.md +28 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +25 -16
  7. data/ext/zstdruby/libzstd/common/compiler.h +118 -4
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +189 -43
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +2 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +40 -12
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +124 -17
  16. data/ext/zstdruby/libzstd/common/huf.h +27 -6
  17. data/ext/zstdruby/libzstd/common/mem.h +67 -94
  18. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  21. data/ext/zstdruby/libzstd/common/xxhash.c +19 -57
  22. data/ext/zstdruby/libzstd/common/xxhash.h +2 -2
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  24. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +90 -59
  27. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  28. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  29. data/ext/zstdruby/libzstd/compress/fse_compress.c +31 -24
  30. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  31. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  32. data/ext/zstdruby/libzstd/compress/huf_compress.c +217 -101
  33. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1495 -478
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +143 -44
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +7 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +18 -4
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  41. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +62 -26
  42. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +23 -23
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_fast.c +21 -21
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  46. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +352 -78
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +276 -209
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +8 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  51. data/ext/zstdruby/libzstd/compress/zstd_opt.c +191 -46
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  53. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +79 -410
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +27 -109
  55. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +303 -201
  56. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +370 -87
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +153 -45
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -3
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +28 -11
  62. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  63. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  66. data/ext/zstdruby/libzstd/dictBuilder/cover.c +40 -31
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -2
  68. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  69. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +26 -25
  70. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +22 -24
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -4
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +6 -2
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -2
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -2
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +7 -3
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +10 -6
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -6
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +10 -6
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -3
  90. data/ext/zstdruby/libzstd/zstd.h +414 -54
  91. data/lib/zstd-ruby/version.rb +1 -1
  92. metadata +7 -3
  93. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -21,32 +21,33 @@
21
21
  *********************************************************/
22
22
  #include "../common/mem.h" /* BYTE, U16, U32 */
23
23
  #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
24
+ #include "../common/zstd_trace.h" /* ZSTD_TraceCtx */
24
25
 
25
26
 
26
27
 
27
28
  /*-*******************************************************
28
29
  * Constants
29
30
  *********************************************************/
30
- static const U32 LL_base[MaxLL+1] = {
31
+ static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31
32
  0, 1, 2, 3, 4, 5, 6, 7,
32
33
  8, 9, 10, 11, 12, 13, 14, 15,
33
34
  16, 18, 20, 22, 24, 28, 32, 40,
34
35
  48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35
36
  0x2000, 0x4000, 0x8000, 0x10000 };
36
37
 
37
- static const U32 OF_base[MaxOff+1] = {
38
+ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38
39
  0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
39
40
  0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
40
41
  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41
42
  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
43
 
43
- static const U32 OF_bits[MaxOff+1] = {
44
+ static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
44
45
  0, 1, 2, 3, 4, 5, 6, 7,
45
46
  8, 9, 10, 11, 12, 13, 14, 15,
46
47
  16, 17, 18, 19, 20, 21, 22, 23,
47
48
  24, 25, 26, 27, 28, 29, 30, 31 };
48
49
 
49
- static const U32 ML_base[MaxML+1] = {
50
+ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50
51
  3, 4, 5, 6, 7, 8, 9, 10,
51
52
  11, 12, 13, 14, 15, 16, 17, 18,
52
53
  19, 20, 21, 22, 23, 24, 25, 26,
@@ -73,12 +74,16 @@ static const U32 ML_base[MaxML+1] = {
73
74
 
74
75
  #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
75
76
 
77
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
78
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
79
+
76
80
  typedef struct {
77
81
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
78
82
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
79
83
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
80
84
  HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
81
85
  U32 rep[ZSTD_REP_NUM];
86
+ U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
82
87
  } ZSTD_entropyDTables_t;
83
88
 
84
89
  typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
@@ -95,10 +100,12 @@ typedef enum {
95
100
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
101
  } ZSTD_dictUses_e;
97
102
 
98
- typedef enum {
99
- ZSTD_obm_buffered = 0, /* Buffer the output */
100
- ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
101
- } ZSTD_outBufferMode_e;
103
+ /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
104
+ typedef struct {
105
+ const ZSTD_DDict** ddictPtrTable;
106
+ size_t ddictPtrTableSize;
107
+ size_t ddictPtrCount;
108
+ } ZSTD_DDictHashSet;
102
109
 
103
110
  struct ZSTD_DCtx_s
104
111
  {
@@ -114,6 +121,7 @@ struct ZSTD_DCtx_s
114
121
  const void* dictEnd; /* end of previous segment */
115
122
  size_t expected;
116
123
  ZSTD_frameHeader fParams;
124
+ U64 processedCSize;
117
125
  U64 decodedSize;
118
126
  blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
119
127
  ZSTD_dStage stage;
@@ -122,6 +130,8 @@ struct ZSTD_DCtx_s
122
130
  XXH64_state_t xxhState;
123
131
  size_t headerSize;
124
132
  ZSTD_format_e format;
133
+ ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
134
+ U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
125
135
  const BYTE* litPtr;
126
136
  ZSTD_customMem customMem;
127
137
  size_t litSize;
@@ -135,6 +145,8 @@ struct ZSTD_DCtx_s
135
145
  U32 dictID;
136
146
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
137
147
  ZSTD_dictUses_e dictUses;
148
+ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
149
+ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
138
150
 
139
151
  /* streaming */
140
152
  ZSTD_dStreamStage streamStage;
@@ -152,7 +164,7 @@ struct ZSTD_DCtx_s
152
164
  U32 legacyVersion;
153
165
  U32 hostageByte;
154
166
  int noForwardProgress;
155
- ZSTD_outBufferMode_e outBufferMode;
167
+ ZSTD_bufferMode_e outBufferMode;
156
168
  ZSTD_outBuffer expectedOutBuffer;
157
169
 
158
170
  /* workspace */
@@ -165,6 +177,11 @@ struct ZSTD_DCtx_s
165
177
  void const* dictContentBeginForFuzzing;
166
178
  void const* dictContentEndForFuzzing;
167
179
  #endif
180
+
181
+ /* Tracing */
182
+ #if ZSTD_TRACE
183
+ ZSTD_TraceCtx traceCtx;
184
+ #endif
168
185
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
169
186
 
170
187
 
@@ -183,7 +200,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
183
200
  * If yes, do nothing (continue on current segment).
184
201
  * If not, classify previous segment as "external dictionary", and start a new segment.
185
202
  * This function cannot fail. */
186
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
203
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
187
204
 
188
205
 
189
206
  #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -40,33 +40,42 @@
40
40
  * Constants
41
41
  ***************************************/
42
42
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43
- #define DEFAULT_SPLITPOINT 1.0
43
+ #define COVER_DEFAULT_SPLITPOINT 1.0
44
44
 
45
45
  /*-*************************************
46
46
  * Console display
47
47
  ***************************************/
48
+ #ifndef LOCALDISPLAYLEVEL
48
49
  static int g_displayLevel = 2;
50
+ #endif
51
+ #undef DISPLAY
49
52
  #define DISPLAY(...) \
50
53
  { \
51
54
  fprintf(stderr, __VA_ARGS__); \
52
55
  fflush(stderr); \
53
56
  }
57
+ #undef LOCALDISPLAYLEVEL
54
58
  #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
55
59
  if (displayLevel >= l) { \
56
60
  DISPLAY(__VA_ARGS__); \
57
61
  } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
62
+ #undef DISPLAYLEVEL
58
63
  #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
59
64
 
65
+ #ifndef LOCALDISPLAYUPDATE
66
+ static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
67
+ static clock_t g_time = 0;
68
+ #endif
69
+ #undef LOCALDISPLAYUPDATE
60
70
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
61
71
  if (displayLevel >= l) { \
62
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
72
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
63
73
  g_time = clock(); \
64
74
  DISPLAY(__VA_ARGS__); \
65
75
  } \
66
76
  }
77
+ #undef DISPLAYUPDATE
67
78
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
68
- static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
69
- static clock_t g_time = 0;
70
79
 
71
80
  /*-*************************************
72
81
  * Hash table
@@ -120,9 +129,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
120
129
  /**
121
130
  * Internal hash function
122
131
  */
123
- static const U32 prime4bytes = 2654435761U;
132
+ static const U32 COVER_prime4bytes = 2654435761U;
124
133
  static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
125
- return (key * prime4bytes) >> (32 - map->sizeLog);
134
+ return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
126
135
  }
127
136
 
128
137
  /**
@@ -215,7 +224,7 @@ typedef struct {
215
224
  } COVER_ctx_t;
216
225
 
217
226
  /* We need a global context for qsort... */
218
- static COVER_ctx_t *g_ctx = NULL;
227
+ static COVER_ctx_t *g_coverCtx = NULL;
219
228
 
220
229
  /*-*************************************
221
230
  * Helper functions
@@ -258,11 +267,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
258
267
 
259
268
  /**
260
269
  * Same as COVER_cmp() except ties are broken by pointer value
261
- * NOTE: g_ctx must be set to call this function. A global is required because
270
+ * NOTE: g_coverCtx must be set to call this function. A global is required because
262
271
  * qsort doesn't take an opaque pointer.
263
272
  */
264
- static int COVER_strict_cmp(const void *lp, const void *rp) {
265
- int result = COVER_cmp(g_ctx, lp, rp);
273
+ static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
274
+ int result = COVER_cmp(g_coverCtx, lp, rp);
266
275
  if (result == 0) {
267
276
  result = lp < rp ? -1 : 1;
268
277
  }
@@ -271,8 +280,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
271
280
  /**
272
281
  * Faster version for d <= 8.
273
282
  */
274
- static int COVER_strict_cmp8(const void *lp, const void *rp) {
275
- int result = COVER_cmp8(g_ctx, lp, rp);
283
+ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
284
+ int result = COVER_cmp8(g_coverCtx, lp, rp);
276
285
  if (result == 0) {
277
286
  result = lp < rp ? -1 : 1;
278
287
  }
@@ -603,7 +612,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
603
612
  /* qsort doesn't take an opaque pointer, so pass as a global.
604
613
  * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
605
614
  */
606
- g_ctx = ctx;
615
+ g_coverCtx = ctx;
607
616
  #if defined(__OpenBSD__)
608
617
  mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
609
618
  (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
@@ -946,7 +955,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
946
955
  free(selection.dictContent);
947
956
  }
948
957
 
949
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
958
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
950
959
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
951
960
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
952
961
 
@@ -954,8 +963,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
954
963
  size_t largestCompressed = 0;
955
964
  BYTE* customDictContentEnd = customDictContent + dictContentSize;
956
965
 
957
- BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
958
- BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
966
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
967
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
959
968
  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
960
969
 
961
970
  if (!largestDictbuffer || !candidateDictBuffer) {
@@ -967,7 +976,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
967
976
  /* Initial dictionary size and compressed size */
968
977
  memcpy(largestDictbuffer, customDictContent, dictContentSize);
969
978
  dictContentSize = ZDICT_finalizeDictionary(
970
- largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
979
+ largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
971
980
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
972
981
 
973
982
  if (ZDICT_isError(dictContentSize)) {
@@ -1001,7 +1010,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
1001
1010
  while (dictContentSize < largestDict) {
1002
1011
  memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1003
1012
  dictContentSize = ZDICT_finalizeDictionary(
1004
- candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1013
+ candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
1005
1014
  samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1006
1015
 
1007
1016
  if (ZDICT_isError(dictContentSize)) {
@@ -1053,18 +1062,19 @@ typedef struct COVER_tryParameters_data_s {
1053
1062
  * This function is thread safe if zstd is compiled with multithreaded support.
1054
1063
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
1055
1064
  */
1056
- static void COVER_tryParameters(void *opaque) {
1065
+ static void COVER_tryParameters(void *opaque)
1066
+ {
1057
1067
  /* Save parameters as local variables */
1058
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
1068
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
1059
1069
  const COVER_ctx_t *const ctx = data->ctx;
1060
1070
  const ZDICT_cover_params_t parameters = data->parameters;
1061
1071
  size_t dictBufferCapacity = data->dictBufferCapacity;
1062
1072
  size_t totalCompressedSize = ERROR(GENERIC);
1063
1073
  /* Allocate space for hash table, dict, and freqs */
1064
1074
  COVER_map_t activeDmers;
1065
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1075
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
1066
1076
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
1067
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
1077
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
1068
1078
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
1069
1079
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
1070
1080
  goto _cleanup;
@@ -1079,7 +1089,7 @@ static void COVER_tryParameters(void *opaque) {
1079
1089
  {
1080
1090
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
1081
1091
  dictBufferCapacity, parameters);
1082
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1092
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
1083
1093
  ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1084
1094
  totalCompressedSize);
1085
1095
 
@@ -1094,19 +1104,18 @@ _cleanup:
1094
1104
  free(data);
1095
1105
  COVER_map_destroy(&activeDmers);
1096
1106
  COVER_dictSelectionFree(selection);
1097
- if (freqs) {
1098
- free(freqs);
1099
- }
1107
+ free(freqs);
1100
1108
  }
1101
1109
 
1102
1110
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1103
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
1104
- const size_t *samplesSizes, unsigned nbSamples,
1105
- ZDICT_cover_params_t *parameters) {
1111
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
1112
+ const size_t* samplesSizes, unsigned nbSamples,
1113
+ ZDICT_cover_params_t* parameters)
1114
+ {
1106
1115
  /* constants */
1107
1116
  const unsigned nbThreads = parameters->nbThreads;
1108
1117
  const double splitPoint =
1109
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
1118
+ parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
1110
1119
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
1111
1120
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
1112
1121
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2017-2020, Facebook, Inc.
2
+ * Copyright (c) 2017-2021, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -152,6 +152,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
152
152
  * smallest dictionary within a specified regression of the compressed size
153
153
  * from the largest dictionary.
154
154
  */
155
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
155
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
156
156
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
157
157
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
@@ -1576,7 +1576,7 @@ note:
1576
1576
  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577
1577
  trsort(ISAb, SA, m, 1);
1578
1578
 
1579
- /* Set the sorted order of tyoe B* suffixes. */
1579
+ /* Set the sorted order of type B* suffixes. */
1580
1580
  for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581
1581
  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582
1582
  if(0 <= i) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2018-2020, Facebook, Inc.
2
+ * Copyright (c) 2018-2021, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -21,6 +21,7 @@
21
21
  #include "../common/threading.h"
22
22
  #include "cover.h"
23
23
  #include "../common/zstd_internal.h" /* includes zstd.h */
24
+ #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
24
25
  #ifndef ZDICT_STATIC_LINKING_ONLY
25
26
  #define ZDICT_STATIC_LINKING_ONLY
26
27
  #endif
@@ -33,7 +34,7 @@
33
34
  #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
34
35
  #define FASTCOVER_MAX_F 31
35
36
  #define FASTCOVER_MAX_ACCEL 10
36
- #define DEFAULT_SPLITPOINT 0.75
37
+ #define FASTCOVER_DEFAULT_SPLITPOINT 0.75
37
38
  #define DEFAULT_F 20
38
39
  #define DEFAULT_ACCEL 1
39
40
 
@@ -41,50 +42,50 @@
41
42
  /*-*************************************
42
43
  * Console display
43
44
  ***************************************/
45
+ #ifndef LOCALDISPLAYLEVEL
44
46
  static int g_displayLevel = 2;
47
+ #endif
48
+ #undef DISPLAY
45
49
  #define DISPLAY(...) \
46
50
  { \
47
51
  fprintf(stderr, __VA_ARGS__); \
48
52
  fflush(stderr); \
49
53
  }
54
+ #undef LOCALDISPLAYLEVEL
50
55
  #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
51
56
  if (displayLevel >= l) { \
52
57
  DISPLAY(__VA_ARGS__); \
53
58
  } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
59
+ #undef DISPLAYLEVEL
54
60
  #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
55
61
 
62
+ #ifndef LOCALDISPLAYUPDATE
63
+ static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
64
+ static clock_t g_time = 0;
65
+ #endif
66
+ #undef LOCALDISPLAYUPDATE
56
67
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
57
68
  if (displayLevel >= l) { \
58
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
69
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
59
70
  g_time = clock(); \
60
71
  DISPLAY(__VA_ARGS__); \
61
72
  } \
62
73
  }
74
+ #undef DISPLAYUPDATE
63
75
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
64
- static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
65
- static clock_t g_time = 0;
66
76
 
67
77
 
68
78
  /*-*************************************
69
79
  * Hash Functions
70
80
  ***************************************/
71
- static const U64 prime6bytes = 227718039650203ULL;
72
- static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
73
- static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
74
-
75
- static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
76
- static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
77
- static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
78
-
79
-
80
81
  /**
81
- * Hash the d-byte value pointed to by p and mod 2^f
82
+ * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
82
83
  */
83
- static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
84
+ static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
84
85
  if (d == 6) {
85
- return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
86
+ return ZSTD_hash6Ptr(p, f);
86
87
  }
87
- return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
88
+ return ZSTD_hash8Ptr(p, f);
88
89
  }
89
90
 
90
91
 
@@ -461,20 +462,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
461
462
  * This function is thread safe if zstd is compiled with multithreaded support.
462
463
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
463
464
  */
464
- static void FASTCOVER_tryParameters(void *opaque)
465
+ static void FASTCOVER_tryParameters(void* opaque)
465
466
  {
466
467
  /* Save parameters as local variables */
467
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
468
+ FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
468
469
  const FASTCOVER_ctx_t *const ctx = data->ctx;
469
470
  const ZDICT_cover_params_t parameters = data->parameters;
470
471
  size_t dictBufferCapacity = data->dictBufferCapacity;
471
472
  size_t totalCompressedSize = ERROR(GENERIC);
472
473
  /* Initialize array to keep track of frequency of dmer within activeSegment */
473
- U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
474
+ U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
474
475
  /* Allocate space for hash table, dict, and freqs */
475
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
476
+ BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
476
477
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
477
- U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
478
+ U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
478
479
  if (!segmentFreqs || !dict || !freqs) {
479
480
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
480
481
  goto _cleanup;
@@ -486,7 +487,7 @@ static void FASTCOVER_tryParameters(void *opaque)
486
487
  parameters, segmentFreqs);
487
488
 
488
489
  const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
489
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
490
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
490
491
  ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
491
492
  totalCompressedSize);
492
493
 
@@ -617,7 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
617
618
  /* constants */
618
619
  const unsigned nbThreads = parameters->nbThreads;
619
620
  const double splitPoint =
620
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
621
+ parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
621
622
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
622
623
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
623
624
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;