extzstd 0.3.2 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2018-2020, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,21 +16,29 @@
16
16
  #include <string.h> /* memset */
17
17
  #include <time.h> /* clock */
18
18
 
19
+ #ifndef ZDICT_STATIC_LINKING_ONLY
20
+ # define ZDICT_STATIC_LINKING_ONLY
21
+ #endif
22
+
19
23
  #include "../common/mem.h" /* read */
20
24
  #include "../common/pool.h"
21
25
  #include "../common/threading.h"
22
- #include "cover.h"
23
26
  #include "../common/zstd_internal.h" /* includes zstd.h */
24
27
  #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
25
- #ifndef ZDICT_STATIC_LINKING_ONLY
26
- #define ZDICT_STATIC_LINKING_ONLY
27
- #endif
28
- #include "zdict.h"
28
+ #include "../zdict.h"
29
+ #include "cover.h"
29
30
 
30
31
 
31
32
  /*-*************************************
32
33
  * Constants
33
34
  ***************************************/
35
+ /**
36
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
37
+ * on 64bit builds.
38
+ * For 32bit builds we choose 1 GB.
39
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
40
+ * contiguous buffer, so 1GB is already a high limit.
41
+ */
34
42
  #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
35
43
  #define FASTCOVER_MAX_F 31
36
44
  #define FASTCOVER_MAX_ACCEL 10
@@ -43,7 +51,7 @@
43
51
  * Console display
44
52
  ***************************************/
45
53
  #ifndef LOCALDISPLAYLEVEL
46
- static int g_displayLevel = 2;
54
+ static int g_displayLevel = 0;
47
55
  #endif
48
56
  #undef DISPLAY
49
57
  #define DISPLAY(...) \
@@ -296,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
296
304
 
297
305
  /**
298
306
  * Prepare a context for dictionary building.
299
- * The context is only dependent on the parameter `d` and can used multiple
307
+ * The context is only dependent on the parameter `d` and can be used multiple
300
308
  * times.
301
309
  * Returns 0 on success or error code on error.
302
310
  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
@@ -462,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
462
470
  * This function is thread safe if zstd is compiled with multithreaded support.
463
471
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
464
472
  */
465
- static void FASTCOVER_tryParameters(void *opaque)
473
+ static void FASTCOVER_tryParameters(void* opaque)
466
474
  {
467
475
  /* Save parameters as local variables */
468
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
476
+ FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
469
477
  const FASTCOVER_ctx_t *const ctx = data->ctx;
470
478
  const ZDICT_cover_params_t parameters = data->parameters;
471
479
  size_t dictBufferCapacity = data->dictBufferCapacity;
472
480
  size_t totalCompressedSize = ERROR(GENERIC);
473
481
  /* Initialize array to keep track of frequency of dmer within activeSegment */
474
- U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
482
+ U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
475
483
  /* Allocate space for hash table, dict, and freqs */
476
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
484
+ BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
477
485
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
478
- U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
486
+ U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
479
487
  if (!segmentFreqs || !dict || !freqs) {
480
488
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
481
489
  goto _cleanup;
@@ -537,7 +545,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
537
545
  }
538
546
 
539
547
 
540
- ZDICTLIB_API size_t
548
+ ZDICTLIB_STATIC_API size_t
541
549
  ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
542
550
  const void* samplesBuffer,
543
551
  const size_t* samplesSizes, unsigned nbSamples,
@@ -548,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
548
556
  ZDICT_cover_params_t coverParams;
549
557
  FASTCOVER_accel_t accelParams;
550
558
  /* Initialize global data */
551
- g_displayLevel = parameters.zParams.notificationLevel;
559
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
552
560
  /* Assign splitPoint and f if not provided */
553
561
  parameters.splitPoint = 1.0;
554
562
  parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
@@ -606,7 +614,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
606
614
  }
607
615
 
608
616
 
609
- ZDICTLIB_API size_t
617
+ ZDICTLIB_STATIC_API size_t
610
618
  ZDICT_optimizeTrainFromBuffer_fastCover(
611
619
  void* dictBuffer, size_t dictBufferCapacity,
612
620
  const void* samplesBuffer,
@@ -631,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
631
639
  const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
632
640
  const unsigned shrinkDict = 0;
633
641
  /* Local variables */
634
- const int displayLevel = parameters->zParams.notificationLevel;
642
+ const int displayLevel = (int)parameters->zParams.notificationLevel;
635
643
  unsigned iteration = 1;
636
644
  unsigned d;
637
645
  unsigned k;
@@ -715,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
715
723
  data->parameters.splitPoint = splitPoint;
716
724
  data->parameters.steps = kSteps;
717
725
  data->parameters.shrinkDict = shrinkDict;
718
- data->parameters.zParams.notificationLevel = g_displayLevel;
726
+ data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
719
727
  /* Check the parameters */
720
728
  if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
721
729
  data->ctx->f, accel)) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -23,9 +23,13 @@
23
23
  /* Unix Large Files support (>4GB) */
24
24
  #define _FILE_OFFSET_BITS 64
25
25
  #if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
26
+ # ifndef _LARGEFILE_SOURCE
26
27
  # define _LARGEFILE_SOURCE
28
+ # endif
27
29
  #elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
30
+ # ifndef _LARGEFILE64_SOURCE
28
31
  # define _LARGEFILE64_SOURCE
32
+ # endif
29
33
  #endif
30
34
 
31
35
 
@@ -37,18 +41,19 @@
37
41
  #include <stdio.h> /* fprintf, fopen, ftello64 */
38
42
  #include <time.h> /* clock */
39
43
 
44
+ #ifndef ZDICT_STATIC_LINKING_ONLY
45
+ # define ZDICT_STATIC_LINKING_ONLY
46
+ #endif
47
+
40
48
  #include "../common/mem.h" /* read */
41
49
  #include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
42
- #define HUF_STATIC_LINKING_ONLY
43
50
  #include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
44
51
  #include "../common/zstd_internal.h" /* includes zstd.h */
45
52
  #include "../common/xxhash.h" /* XXH64 */
46
- #include "divsufsort.h"
47
- #ifndef ZDICT_STATIC_LINKING_ONLY
48
- # define ZDICT_STATIC_LINKING_ONLY
49
- #endif
50
- #include "zdict.h"
51
53
  #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
54
+ #include "../zdict.h"
55
+ #include "divsufsort.h"
56
+ #include "../common/bits.h" /* ZSTD_NbCommonBytes */
52
57
 
53
58
 
54
59
  /*-*************************************
@@ -69,9 +74,9 @@ static const U32 g_selectivity_default = 9;
69
74
  * Console display
70
75
  ***************************************/
71
76
  #undef DISPLAY
72
- #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
77
+ #define DISPLAY(...) do { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } while (0)
73
78
  #undef DISPLAYLEVEL
74
- #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
79
+ #define DISPLAYLEVEL(l, ...) do { if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } } while (0) /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
75
80
 
76
81
  static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
77
82
 
@@ -125,65 +130,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
125
130
  /*-********************************************************
126
131
  * Dictionary training functions
127
132
  **********************************************************/
128
- static unsigned ZDICT_NbCommonBytes (size_t val)
129
- {
130
- if (MEM_isLittleEndian()) {
131
- if (MEM_64bits()) {
132
- # if defined(_MSC_VER) && defined(_WIN64)
133
- unsigned long r = 0;
134
- _BitScanForward64( &r, (U64)val );
135
- return (unsigned)(r>>3);
136
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
137
- return (__builtin_ctzll((U64)val) >> 3);
138
- # else
139
- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
140
- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
141
- # endif
142
- } else { /* 32 bits */
143
- # if defined(_MSC_VER)
144
- unsigned long r=0;
145
- _BitScanForward( &r, (U32)val );
146
- return (unsigned)(r>>3);
147
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
148
- return (__builtin_ctz((U32)val) >> 3);
149
- # else
150
- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
151
- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
152
- # endif
153
- }
154
- } else { /* Big Endian CPU */
155
- if (MEM_64bits()) {
156
- # if defined(_MSC_VER) && defined(_WIN64)
157
- unsigned long r = 0;
158
- _BitScanReverse64( &r, val );
159
- return (unsigned)(r>>3);
160
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
161
- return (__builtin_clzll(val) >> 3);
162
- # else
163
- unsigned r;
164
- const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
165
- if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
166
- if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
167
- r += (!val);
168
- return r;
169
- # endif
170
- } else { /* 32 bits */
171
- # if defined(_MSC_VER)
172
- unsigned long r = 0;
173
- _BitScanReverse( &r, (unsigned long)val );
174
- return (unsigned)(r>>3);
175
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
176
- return (__builtin_clz((U32)val) >> 3);
177
- # else
178
- unsigned r;
179
- if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
180
- r += (!val);
181
- return r;
182
- # endif
183
- } }
184
- }
185
-
186
-
187
133
  /*! ZDICT_count() :
188
134
  Count the nb of common bytes between 2 pointers.
189
135
  Note : this function presumes end of buffer followed by noisy guard band.
@@ -198,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
198
144
  pMatch = (const char*)pMatch+sizeof(size_t);
199
145
  continue;
200
146
  }
201
- pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff);
147
+ pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
202
148
  return (size_t)((const char*)pIn - pStart);
203
149
  }
204
150
  }
@@ -230,7 +176,7 @@ static dictItem ZDICT_analyzePos(
230
176
  U32 savings[LLIMIT] = {0};
231
177
  const BYTE* b = (const BYTE*)buffer;
232
178
  size_t maxLength = LLIMIT;
233
- size_t pos = suffix[start];
179
+ size_t pos = (size_t)suffix[start];
234
180
  U32 end = start;
235
181
  dictItem solution;
236
182
 
@@ -364,7 +310,7 @@ static dictItem ZDICT_analyzePos(
364
310
  savings[i] = savings[i-1] + (lengthList[i] * (i-3));
365
311
 
366
312
  DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
367
- (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
313
+ (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
368
314
 
369
315
  solution.pos = (U32)pos;
370
316
  solution.length = (U32)maxLength;
@@ -374,7 +320,7 @@ static dictItem ZDICT_analyzePos(
374
320
  { U32 id;
375
321
  for (id=start; id<end; id++) {
376
322
  U32 p, pEnd, length;
377
- U32 const testedPos = suffix[id];
323
+ U32 const testedPos = (U32)suffix[id];
378
324
  if (testedPos == pos)
379
325
  length = solution.length;
380
326
  else {
@@ -426,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
426
372
  elt = table[u];
427
373
  /* sort : improve rank */
428
374
  while ((u>1) && (table[u-1].savings < elt.savings))
429
- table[u] = table[u-1], u--;
375
+ table[u] = table[u-1], u--;
430
376
  table[u] = elt;
431
377
  return u;
432
378
  } }
@@ -437,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
437
383
 
438
384
  if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
439
385
  /* append */
440
- int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
386
+ int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
441
387
  table[u].savings += elt.length / 8; /* rough approx bonus */
442
388
  if (addedLength > 0) { /* otherwise, elt fully included into existing */
443
389
  table[u].length += addedLength;
@@ -531,10 +477,16 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
531
477
  clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
532
478
 
533
479
  # undef DISPLAYUPDATE
534
- # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
535
- if (ZDICT_clockSpan(displayClock) > refreshRate) \
536
- { displayClock = clock(); DISPLAY(__VA_ARGS__); \
537
- if (notificationLevel>=4) fflush(stderr); } }
480
+ # define DISPLAYUPDATE(l, ...) \
481
+ do { \
482
+ if (notificationLevel>=l) { \
483
+ if (ZDICT_clockSpan(displayClock) > refreshRate) { \
484
+ displayClock = clock(); \
485
+ DISPLAY(__VA_ARGS__); \
486
+ } \
487
+ if (notificationLevel>=4) fflush(stderr); \
488
+ } \
489
+ } while (0)
538
490
 
539
491
  /* init */
540
492
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
@@ -577,7 +529,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
577
529
  if (solution.length==0) { cursor++; continue; }
578
530
  ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
579
531
  cursor += solution.length;
580
- DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
532
+ DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
581
533
  } }
582
534
 
583
535
  _cleanup:
@@ -620,11 +572,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
620
572
  size_t cSize;
621
573
 
622
574
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
623
- { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
575
+ { size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
624
576
  if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
625
577
 
626
578
  }
627
- cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
579
+ cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
628
580
  if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
629
581
 
630
582
  if (cSize) { /* if == 0; block is not compressible */
@@ -657,8 +609,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
657
609
 
658
610
  if (nbSeq >= 2) { /* rep offsets */
659
611
  const seqDef* const seq = seqStorePtr->sequencesStart;
660
- U32 offset1 = seq[0].offset - 3;
661
- U32 offset2 = seq[1].offset - 3;
612
+ U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
613
+ U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
662
614
  if (offset1 >= MAXREPOFFSET) offset1 = 0;
663
615
  if (offset2 >= MAXREPOFFSET) offset2 = 0;
664
616
  repOffsets[offset1] += 3;
@@ -729,6 +681,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
729
681
  size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
730
682
  size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
731
683
  BYTE* dstPtr = (BYTE*)dstBuffer;
684
+ U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
732
685
 
733
686
  /* init */
734
687
  DEBUGLOG(4, "ZDICT_analyzeEntropy");
@@ -761,8 +714,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
761
714
  pos += fileSizes[u];
762
715
  }
763
716
 
717
+ if (notificationLevel >= 4) {
718
+ /* writeStats */
719
+ DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
720
+ for (u=0; u<=offcodeMax; u++) {
721
+ DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
722
+ } }
723
+
764
724
  /* analyze, build stats, starting with literals */
765
- { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
725
+ { size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
766
726
  if (HUF_isError(maxNbBits)) {
767
727
  eSize = maxNbBits;
768
728
  DISPLAYLEVEL(1, " HUF_buildCTable error \n");
@@ -771,7 +731,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
771
731
  if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
772
732
  DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
773
733
  ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
774
- maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
734
+ maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
775
735
  assert(maxNbBits==9);
776
736
  }
777
737
  huffLog = (U32)maxNbBits;
@@ -812,7 +772,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
812
772
  llLog = (U32)errorCode;
813
773
 
814
774
  /* write result to buffer */
815
- { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
775
+ { size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
816
776
  if (HUF_isError(hhSize)) {
817
777
  eSize = hhSize;
818
778
  DISPLAYLEVEL(1, "HUF_writeCTable error \n");
@@ -867,7 +827,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
867
827
  MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
868
828
  #else
869
829
  /* at this stage, we don't use the result of "most common first offset",
870
- as the impact of statistics is not properly evaluated */
830
+ * as the impact of statistics is not properly evaluated */
871
831
  MEM_writeLE32(dstPtr+0, repStartValue[0]);
872
832
  MEM_writeLE32(dstPtr+4, repStartValue[1]);
873
833
  MEM_writeLE32(dstPtr+8, repStartValue[2]);
@@ -883,6 +843,17 @@ _cleanup:
883
843
  }
884
844
 
885
845
 
846
+ /**
847
+ * @returns the maximum repcode value
848
+ */
849
+ static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
850
+ {
851
+ U32 maxRep = reps[0];
852
+ int r;
853
+ for (r = 1; r < ZSTD_REP_NUM; ++r)
854
+ maxRep = MAX(maxRep, reps[r]);
855
+ return maxRep;
856
+ }
886
857
 
887
858
  size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
888
859
  const void* customDictContent, size_t dictContentSize,
@@ -894,11 +865,13 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
894
865
  BYTE header[HBUFFSIZE];
895
866
  int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
896
867
  U32 const notificationLevel = params.notificationLevel;
868
+ /* The final dictionary content must be at least as large as the largest repcode */
869
+ size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
870
+ size_t paddingSize;
897
871
 
898
872
  /* check conditions */
899
873
  DEBUGLOG(4, "ZDICT_finalizeDictionary");
900
874
  if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
901
- if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
902
875
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
903
876
 
904
877
  /* dictionary header */
@@ -922,12 +895,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
922
895
  hSize += eSize;
923
896
  }
924
897
 
925
- /* copy elements in final buffer ; note : src and dst buffer can overlap */
926
- if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
927
- { size_t const dictSize = hSize + dictContentSize;
928
- char* dictEnd = (char*)dictBuffer + dictSize;
929
- memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
930
- memcpy(dictBuffer, header, hSize);
898
+ /* Shrink the content size if it doesn't fit in the buffer */
899
+ if (hSize + dictContentSize > dictBufferCapacity) {
900
+ dictContentSize = dictBufferCapacity - hSize;
901
+ }
902
+
903
+ /* Pad the dictionary content with zeros if it is too small */
904
+ if (dictContentSize < minContentSize) {
905
+ RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
906
+ "dictBufferCapacity too small to fit max repcode");
907
+ paddingSize = minContentSize - dictContentSize;
908
+ } else {
909
+ paddingSize = 0;
910
+ }
911
+
912
+ {
913
+ size_t const dictSize = hSize + paddingSize + dictContentSize;
914
+
915
+ /* The dictionary consists of the header, optional padding, and the content.
916
+ * The padding comes before the content because the "best" position in the
917
+ * dictionary is the last byte.
918
+ */
919
+ BYTE* const outDictHeader = (BYTE*)dictBuffer;
920
+ BYTE* const outDictPadding = outDictHeader + hSize;
921
+ BYTE* const outDictContent = outDictPadding + paddingSize;
922
+
923
+ assert(dictSize <= dictBufferCapacity);
924
+ assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
925
+
926
+ /* First copy the customDictContent into its final location.
927
+ * `customDictContent` and `dictBuffer` may overlap, so we must
928
+ * do this before any other writes into the output buffer.
929
+ * Then copy the header & padding into the output buffer.
930
+ */
931
+ memmove(outDictContent, customDictContent, dictContentSize);
932
+ memcpy(outDictHeader, header, hSize);
933
+ memset(outDictPadding, 0, paddingSize);
934
+
931
935
  return dictSize;
932
936
  }
933
937
  }
@@ -967,16 +971,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
967
971
  return MIN(dictBufferCapacity, hSize+dictContentSize);
968
972
  }
969
973
 
970
- /* Hidden declaration for dbio.c */
971
- size_t ZDICT_trainFromBuffer_unsafe_legacy(
972
- void* dictBuffer, size_t maxDictSize,
973
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
974
- ZDICT_legacy_params_t params);
975
974
  /*! ZDICT_trainFromBuffer_unsafe_legacy() :
976
- * Warning : `samplesBuffer` must be followed by noisy guard band.
975
+ * Warning : `samplesBuffer` must be followed by noisy guard band !!!
977
976
  * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
978
977
  */
979
- size_t ZDICT_trainFromBuffer_unsafe_legacy(
978
+ static size_t ZDICT_trainFromBuffer_unsafe_legacy(
980
979
  void* dictBuffer, size_t maxDictSize,
981
980
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
982
981
  ZDICT_legacy_params_t params)
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -124,6 +124,20 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
124
124
  const void* dict,size_t dictSize)
125
125
  {
126
126
  U32 const version = ZSTD_isLegacy(src, compressedSize);
127
+ char x;
128
+ /* Avoid passing NULL to legacy decoding. */
129
+ if (dst == NULL) {
130
+ assert(dstCapacity == 0);
131
+ dst = &x;
132
+ }
133
+ if (src == NULL) {
134
+ assert(compressedSize == 0);
135
+ src = &x;
136
+ }
137
+ if (dict == NULL) {
138
+ assert(dictSize == 0);
139
+ dict = &x;
140
+ }
127
141
  (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
128
142
  switch(version)
129
143
  {
@@ -242,6 +256,13 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
242
256
  frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
243
257
  frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
244
258
  }
259
+ /* In all cases, decompressedBound == nbBlocks * ZSTD_BLOCKSIZE_MAX.
260
+ * So we can compute nbBlocks without having to change every function.
261
+ */
262
+ if (frameSizeInfo.decompressedBound != ZSTD_CONTENTSIZE_ERROR) {
263
+ assert((frameSizeInfo.decompressedBound & (ZSTD_BLOCKSIZE_MAX - 1)) == 0);
264
+ frameSizeInfo.nbBlocks = (size_t)(frameSizeInfo.decompressedBound / ZSTD_BLOCKSIZE_MAX);
265
+ }
245
266
  return frameSizeInfo;
246
267
  }
247
268
 
@@ -280,6 +301,12 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
280
301
  MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
281
302
  const void* dict, size_t dictSize)
282
303
  {
304
+ char x;
305
+ /* Avoid passing NULL to legacy decoding. */
306
+ if (dict == NULL) {
307
+ assert(dictSize == 0);
308
+ dict = &x;
309
+ }
283
310
  DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
284
311
  if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
285
312
  switch(newVersion)
@@ -339,6 +366,16 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
339
366
  MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
340
367
  ZSTD_outBuffer* output, ZSTD_inBuffer* input)
341
368
  {
369
+ static char x;
370
+ /* Avoid passing NULL to legacy decoding. */
371
+ if (output->dst == NULL) {
372
+ assert(output->size == 0);
373
+ output->dst = &x;
374
+ }
375
+ if (input->src == NULL) {
376
+ assert(input->size == 0);
377
+ input->src = &x;
378
+ }
342
379
  DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
343
380
  switch(version)
344
381
  {