extzstd 0.3.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2018-2020, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,21 +16,29 @@
16
16
  #include <string.h> /* memset */
17
17
  #include <time.h> /* clock */
18
18
 
19
+ #ifndef ZDICT_STATIC_LINKING_ONLY
20
+ # define ZDICT_STATIC_LINKING_ONLY
21
+ #endif
22
+
19
23
  #include "../common/mem.h" /* read */
20
24
  #include "../common/pool.h"
21
25
  #include "../common/threading.h"
22
- #include "cover.h"
23
26
  #include "../common/zstd_internal.h" /* includes zstd.h */
24
27
  #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
25
- #ifndef ZDICT_STATIC_LINKING_ONLY
26
- #define ZDICT_STATIC_LINKING_ONLY
27
- #endif
28
- #include "zdict.h"
28
+ #include "../zdict.h"
29
+ #include "cover.h"
29
30
 
30
31
 
31
32
  /*-*************************************
32
33
  * Constants
33
34
  ***************************************/
35
+ /**
36
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
37
+ * on 64bit builds.
38
+ * For 32bit builds we choose 1 GB.
39
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
40
+ * contiguous buffer, so 1GB is already a high limit.
41
+ */
34
42
  #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
35
43
  #define FASTCOVER_MAX_F 31
36
44
  #define FASTCOVER_MAX_ACCEL 10
@@ -43,7 +51,7 @@
43
51
  * Console display
44
52
  ***************************************/
45
53
  #ifndef LOCALDISPLAYLEVEL
46
- static int g_displayLevel = 2;
54
+ static int g_displayLevel = 0;
47
55
  #endif
48
56
  #undef DISPLAY
49
57
  #define DISPLAY(...) \
@@ -296,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
296
304
 
297
305
  /**
298
306
  * Prepare a context for dictionary building.
299
- * The context is only dependent on the parameter `d` and can used multiple
307
+ * The context is only dependent on the parameter `d` and can be used multiple
300
308
  * times.
301
309
  * Returns 0 on success or error code on error.
302
310
  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
@@ -462,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
462
470
  * This function is thread safe if zstd is compiled with multithreaded support.
463
471
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
464
472
  */
465
- static void FASTCOVER_tryParameters(void *opaque)
473
+ static void FASTCOVER_tryParameters(void* opaque)
466
474
  {
467
475
  /* Save parameters as local variables */
468
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
476
+ FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
469
477
  const FASTCOVER_ctx_t *const ctx = data->ctx;
470
478
  const ZDICT_cover_params_t parameters = data->parameters;
471
479
  size_t dictBufferCapacity = data->dictBufferCapacity;
472
480
  size_t totalCompressedSize = ERROR(GENERIC);
473
481
  /* Initialize array to keep track of frequency of dmer within activeSegment */
474
- U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
482
+ U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
475
483
  /* Allocate space for hash table, dict, and freqs */
476
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
484
+ BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
477
485
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
478
- U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
486
+ U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
479
487
  if (!segmentFreqs || !dict || !freqs) {
480
488
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
481
489
  goto _cleanup;
@@ -537,7 +545,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
537
545
  }
538
546
 
539
547
 
540
- ZDICTLIB_API size_t
548
+ ZDICTLIB_STATIC_API size_t
541
549
  ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
542
550
  const void* samplesBuffer,
543
551
  const size_t* samplesSizes, unsigned nbSamples,
@@ -548,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
548
556
  ZDICT_cover_params_t coverParams;
549
557
  FASTCOVER_accel_t accelParams;
550
558
  /* Initialize global data */
551
- g_displayLevel = parameters.zParams.notificationLevel;
559
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
552
560
  /* Assign splitPoint and f if not provided */
553
561
  parameters.splitPoint = 1.0;
554
562
  parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
@@ -606,7 +614,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
606
614
  }
607
615
 
608
616
 
609
- ZDICTLIB_API size_t
617
+ ZDICTLIB_STATIC_API size_t
610
618
  ZDICT_optimizeTrainFromBuffer_fastCover(
611
619
  void* dictBuffer, size_t dictBufferCapacity,
612
620
  const void* samplesBuffer,
@@ -631,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
631
639
  const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
632
640
  const unsigned shrinkDict = 0;
633
641
  /* Local variables */
634
- const int displayLevel = parameters->zParams.notificationLevel;
642
+ const int displayLevel = (int)parameters->zParams.notificationLevel;
635
643
  unsigned iteration = 1;
636
644
  unsigned d;
637
645
  unsigned k;
@@ -715,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
715
723
  data->parameters.splitPoint = splitPoint;
716
724
  data->parameters.steps = kSteps;
717
725
  data->parameters.shrinkDict = shrinkDict;
718
- data->parameters.zParams.notificationLevel = g_displayLevel;
726
+ data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
719
727
  /* Check the parameters */
720
728
  if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
721
729
  data->ctx->f, accel)) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -23,9 +23,13 @@
23
23
  /* Unix Large Files support (>4GB) */
24
24
  #define _FILE_OFFSET_BITS 64
25
25
  #if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
26
+ # ifndef _LARGEFILE_SOURCE
26
27
  # define _LARGEFILE_SOURCE
28
+ # endif
27
29
  #elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
30
+ # ifndef _LARGEFILE64_SOURCE
28
31
  # define _LARGEFILE64_SOURCE
32
+ # endif
29
33
  #endif
30
34
 
31
35
 
@@ -37,18 +41,19 @@
37
41
  #include <stdio.h> /* fprintf, fopen, ftello64 */
38
42
  #include <time.h> /* clock */
39
43
 
44
+ #ifndef ZDICT_STATIC_LINKING_ONLY
45
+ # define ZDICT_STATIC_LINKING_ONLY
46
+ #endif
47
+
40
48
  #include "../common/mem.h" /* read */
41
49
  #include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
42
- #define HUF_STATIC_LINKING_ONLY
43
50
  #include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
44
51
  #include "../common/zstd_internal.h" /* includes zstd.h */
45
52
  #include "../common/xxhash.h" /* XXH64 */
46
- #include "divsufsort.h"
47
- #ifndef ZDICT_STATIC_LINKING_ONLY
48
- # define ZDICT_STATIC_LINKING_ONLY
49
- #endif
50
- #include "zdict.h"
51
53
  #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
54
+ #include "../zdict.h"
55
+ #include "divsufsort.h"
56
+ #include "../common/bits.h" /* ZSTD_NbCommonBytes */
52
57
 
53
58
 
54
59
  /*-*************************************
@@ -69,9 +74,9 @@ static const U32 g_selectivity_default = 9;
69
74
  * Console display
70
75
  ***************************************/
71
76
  #undef DISPLAY
72
- #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
77
+ #define DISPLAY(...) do { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } while (0)
73
78
  #undef DISPLAYLEVEL
74
- #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
79
+ #define DISPLAYLEVEL(l, ...) do { if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } } while (0) /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
75
80
 
76
81
  static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
77
82
 
@@ -125,65 +130,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
125
130
  /*-********************************************************
126
131
  * Dictionary training functions
127
132
  **********************************************************/
128
- static unsigned ZDICT_NbCommonBytes (size_t val)
129
- {
130
- if (MEM_isLittleEndian()) {
131
- if (MEM_64bits()) {
132
- # if defined(_MSC_VER) && defined(_WIN64)
133
- unsigned long r = 0;
134
- _BitScanForward64( &r, (U64)val );
135
- return (unsigned)(r>>3);
136
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
137
- return (__builtin_ctzll((U64)val) >> 3);
138
- # else
139
- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
140
- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
141
- # endif
142
- } else { /* 32 bits */
143
- # if defined(_MSC_VER)
144
- unsigned long r=0;
145
- _BitScanForward( &r, (U32)val );
146
- return (unsigned)(r>>3);
147
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
148
- return (__builtin_ctz((U32)val) >> 3);
149
- # else
150
- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
151
- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
152
- # endif
153
- }
154
- } else { /* Big Endian CPU */
155
- if (MEM_64bits()) {
156
- # if defined(_MSC_VER) && defined(_WIN64)
157
- unsigned long r = 0;
158
- _BitScanReverse64( &r, val );
159
- return (unsigned)(r>>3);
160
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
161
- return (__builtin_clzll(val) >> 3);
162
- # else
163
- unsigned r;
164
- const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
165
- if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
166
- if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
167
- r += (!val);
168
- return r;
169
- # endif
170
- } else { /* 32 bits */
171
- # if defined(_MSC_VER)
172
- unsigned long r = 0;
173
- _BitScanReverse( &r, (unsigned long)val );
174
- return (unsigned)(r>>3);
175
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
176
- return (__builtin_clz((U32)val) >> 3);
177
- # else
178
- unsigned r;
179
- if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
180
- r += (!val);
181
- return r;
182
- # endif
183
- } }
184
- }
185
-
186
-
187
133
  /*! ZDICT_count() :
188
134
  Count the nb of common bytes between 2 pointers.
189
135
  Note : this function presumes end of buffer followed by noisy guard band.
@@ -198,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
198
144
  pMatch = (const char*)pMatch+sizeof(size_t);
199
145
  continue;
200
146
  }
201
- pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff);
147
+ pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
202
148
  return (size_t)((const char*)pIn - pStart);
203
149
  }
204
150
  }
@@ -230,7 +176,7 @@ static dictItem ZDICT_analyzePos(
230
176
  U32 savings[LLIMIT] = {0};
231
177
  const BYTE* b = (const BYTE*)buffer;
232
178
  size_t maxLength = LLIMIT;
233
- size_t pos = suffix[start];
179
+ size_t pos = (size_t)suffix[start];
234
180
  U32 end = start;
235
181
  dictItem solution;
236
182
 
@@ -364,7 +310,7 @@ static dictItem ZDICT_analyzePos(
364
310
  savings[i] = savings[i-1] + (lengthList[i] * (i-3));
365
311
 
366
312
  DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
367
- (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
313
+ (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
368
314
 
369
315
  solution.pos = (U32)pos;
370
316
  solution.length = (U32)maxLength;
@@ -374,7 +320,7 @@ static dictItem ZDICT_analyzePos(
374
320
  { U32 id;
375
321
  for (id=start; id<end; id++) {
376
322
  U32 p, pEnd, length;
377
- U32 const testedPos = suffix[id];
323
+ U32 const testedPos = (U32)suffix[id];
378
324
  if (testedPos == pos)
379
325
  length = solution.length;
380
326
  else {
@@ -426,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
426
372
  elt = table[u];
427
373
  /* sort : improve rank */
428
374
  while ((u>1) && (table[u-1].savings < elt.savings))
429
- table[u] = table[u-1], u--;
375
+ table[u] = table[u-1], u--;
430
376
  table[u] = elt;
431
377
  return u;
432
378
  } }
@@ -437,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
437
383
 
438
384
  if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
439
385
  /* append */
440
- int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
386
+ int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
441
387
  table[u].savings += elt.length / 8; /* rough approx bonus */
442
388
  if (addedLength > 0) { /* otherwise, elt fully included into existing */
443
389
  table[u].length += addedLength;
@@ -531,10 +477,16 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
531
477
  clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
532
478
 
533
479
  # undef DISPLAYUPDATE
534
- # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
535
- if (ZDICT_clockSpan(displayClock) > refreshRate) \
536
- { displayClock = clock(); DISPLAY(__VA_ARGS__); \
537
- if (notificationLevel>=4) fflush(stderr); } }
480
+ # define DISPLAYUPDATE(l, ...) \
481
+ do { \
482
+ if (notificationLevel>=l) { \
483
+ if (ZDICT_clockSpan(displayClock) > refreshRate) { \
484
+ displayClock = clock(); \
485
+ DISPLAY(__VA_ARGS__); \
486
+ } \
487
+ if (notificationLevel>=4) fflush(stderr); \
488
+ } \
489
+ } while (0)
538
490
 
539
491
  /* init */
540
492
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
@@ -577,7 +529,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
577
529
  if (solution.length==0) { cursor++; continue; }
578
530
  ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
579
531
  cursor += solution.length;
580
- DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
532
+ DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
581
533
  } }
582
534
 
583
535
  _cleanup:
@@ -620,11 +572,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
620
572
  size_t cSize;
621
573
 
622
574
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
623
- { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
575
+ { size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
624
576
  if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
625
577
 
626
578
  }
627
- cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
579
+ cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
628
580
  if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
629
581
 
630
582
  if (cSize) { /* if == 0; block is not compressible */
@@ -657,8 +609,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
657
609
 
658
610
  if (nbSeq >= 2) { /* rep offsets */
659
611
  const seqDef* const seq = seqStorePtr->sequencesStart;
660
- U32 offset1 = seq[0].offset - 3;
661
- U32 offset2 = seq[1].offset - 3;
612
+ U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
613
+ U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
662
614
  if (offset1 >= MAXREPOFFSET) offset1 = 0;
663
615
  if (offset2 >= MAXREPOFFSET) offset2 = 0;
664
616
  repOffsets[offset1] += 3;
@@ -729,6 +681,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
729
681
  size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
730
682
  size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
731
683
  BYTE* dstPtr = (BYTE*)dstBuffer;
684
+ U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
732
685
 
733
686
  /* init */
734
687
  DEBUGLOG(4, "ZDICT_analyzeEntropy");
@@ -761,8 +714,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
761
714
  pos += fileSizes[u];
762
715
  }
763
716
 
717
+ if (notificationLevel >= 4) {
718
+ /* writeStats */
719
+ DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
720
+ for (u=0; u<=offcodeMax; u++) {
721
+ DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
722
+ } }
723
+
764
724
  /* analyze, build stats, starting with literals */
765
- { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
725
+ { size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
766
726
  if (HUF_isError(maxNbBits)) {
767
727
  eSize = maxNbBits;
768
728
  DISPLAYLEVEL(1, " HUF_buildCTable error \n");
@@ -771,7 +731,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
771
731
  if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
772
732
  DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
773
733
  ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
774
- maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
734
+ maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
775
735
  assert(maxNbBits==9);
776
736
  }
777
737
  huffLog = (U32)maxNbBits;
@@ -812,7 +772,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
812
772
  llLog = (U32)errorCode;
813
773
 
814
774
  /* write result to buffer */
815
- { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
775
+ { size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
816
776
  if (HUF_isError(hhSize)) {
817
777
  eSize = hhSize;
818
778
  DISPLAYLEVEL(1, "HUF_writeCTable error \n");
@@ -867,7 +827,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
867
827
  MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
868
828
  #else
869
829
  /* at this stage, we don't use the result of "most common first offset",
870
- as the impact of statistics is not properly evaluated */
830
+ * as the impact of statistics is not properly evaluated */
871
831
  MEM_writeLE32(dstPtr+0, repStartValue[0]);
872
832
  MEM_writeLE32(dstPtr+4, repStartValue[1]);
873
833
  MEM_writeLE32(dstPtr+8, repStartValue[2]);
@@ -883,6 +843,17 @@ _cleanup:
883
843
  }
884
844
 
885
845
 
846
+ /**
847
+ * @returns the maximum repcode value
848
+ */
849
+ static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
850
+ {
851
+ U32 maxRep = reps[0];
852
+ int r;
853
+ for (r = 1; r < ZSTD_REP_NUM; ++r)
854
+ maxRep = MAX(maxRep, reps[r]);
855
+ return maxRep;
856
+ }
886
857
 
887
858
  size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
888
859
  const void* customDictContent, size_t dictContentSize,
@@ -894,11 +865,13 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
894
865
  BYTE header[HBUFFSIZE];
895
866
  int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
896
867
  U32 const notificationLevel = params.notificationLevel;
868
+ /* The final dictionary content must be at least as large as the largest repcode */
869
+ size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
870
+ size_t paddingSize;
897
871
 
898
872
  /* check conditions */
899
873
  DEBUGLOG(4, "ZDICT_finalizeDictionary");
900
874
  if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
901
- if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
902
875
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
903
876
 
904
877
  /* dictionary header */
@@ -922,12 +895,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
922
895
  hSize += eSize;
923
896
  }
924
897
 
925
- /* copy elements in final buffer ; note : src and dst buffer can overlap */
926
- if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
927
- { size_t const dictSize = hSize + dictContentSize;
928
- char* dictEnd = (char*)dictBuffer + dictSize;
929
- memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
930
- memcpy(dictBuffer, header, hSize);
898
+ /* Shrink the content size if it doesn't fit in the buffer */
899
+ if (hSize + dictContentSize > dictBufferCapacity) {
900
+ dictContentSize = dictBufferCapacity - hSize;
901
+ }
902
+
903
+ /* Pad the dictionary content with zeros if it is too small */
904
+ if (dictContentSize < minContentSize) {
905
+ RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
906
+ "dictBufferCapacity too small to fit max repcode");
907
+ paddingSize = minContentSize - dictContentSize;
908
+ } else {
909
+ paddingSize = 0;
910
+ }
911
+
912
+ {
913
+ size_t const dictSize = hSize + paddingSize + dictContentSize;
914
+
915
+ /* The dictionary consists of the header, optional padding, and the content.
916
+ * The padding comes before the content because the "best" position in the
917
+ * dictionary is the last byte.
918
+ */
919
+ BYTE* const outDictHeader = (BYTE*)dictBuffer;
920
+ BYTE* const outDictPadding = outDictHeader + hSize;
921
+ BYTE* const outDictContent = outDictPadding + paddingSize;
922
+
923
+ assert(dictSize <= dictBufferCapacity);
924
+ assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
925
+
926
+ /* First copy the customDictContent into its final location.
927
+ * `customDictContent` and `dictBuffer` may overlap, so we must
928
+ * do this before any other writes into the output buffer.
929
+ * Then copy the header & padding into the output buffer.
930
+ */
931
+ memmove(outDictContent, customDictContent, dictContentSize);
932
+ memcpy(outDictHeader, header, hSize);
933
+ memset(outDictPadding, 0, paddingSize);
934
+
931
935
  return dictSize;
932
936
  }
933
937
  }
@@ -967,16 +971,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
967
971
  return MIN(dictBufferCapacity, hSize+dictContentSize);
968
972
  }
969
973
 
970
- /* Hidden declaration for dbio.c */
971
- size_t ZDICT_trainFromBuffer_unsafe_legacy(
972
- void* dictBuffer, size_t maxDictSize,
973
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
974
- ZDICT_legacy_params_t params);
975
974
  /*! ZDICT_trainFromBuffer_unsafe_legacy() :
976
- * Warning : `samplesBuffer` must be followed by noisy guard band.
975
+ * Warning : `samplesBuffer` must be followed by noisy guard band !!!
977
976
  * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
978
977
  */
979
- size_t ZDICT_trainFromBuffer_unsafe_legacy(
978
+ static size_t ZDICT_trainFromBuffer_unsafe_legacy(
980
979
  void* dictBuffer, size_t maxDictSize,
981
980
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
982
981
  ZDICT_legacy_params_t params)
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -124,6 +124,20 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
124
124
  const void* dict,size_t dictSize)
125
125
  {
126
126
  U32 const version = ZSTD_isLegacy(src, compressedSize);
127
+ char x;
128
+ /* Avoid passing NULL to legacy decoding. */
129
+ if (dst == NULL) {
130
+ assert(dstCapacity == 0);
131
+ dst = &x;
132
+ }
133
+ if (src == NULL) {
134
+ assert(compressedSize == 0);
135
+ src = &x;
136
+ }
137
+ if (dict == NULL) {
138
+ assert(dictSize == 0);
139
+ dict = &x;
140
+ }
127
141
  (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
128
142
  switch(version)
129
143
  {
@@ -242,6 +256,13 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
242
256
  frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
243
257
  frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
244
258
  }
259
+ /* In all cases, decompressedBound == nbBlocks * ZSTD_BLOCKSIZE_MAX.
260
+ * So we can compute nbBlocks without having to change every function.
261
+ */
262
+ if (frameSizeInfo.decompressedBound != ZSTD_CONTENTSIZE_ERROR) {
263
+ assert((frameSizeInfo.decompressedBound & (ZSTD_BLOCKSIZE_MAX - 1)) == 0);
264
+ frameSizeInfo.nbBlocks = (size_t)(frameSizeInfo.decompressedBound / ZSTD_BLOCKSIZE_MAX);
265
+ }
245
266
  return frameSizeInfo;
246
267
  }
247
268
 
@@ -280,6 +301,12 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
280
301
  MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
281
302
  const void* dict, size_t dictSize)
282
303
  {
304
+ char x;
305
+ /* Avoid passing NULL to legacy decoding. */
306
+ if (dict == NULL) {
307
+ assert(dictSize == 0);
308
+ dict = &x;
309
+ }
283
310
  DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
284
311
  if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
285
312
  switch(newVersion)
@@ -339,6 +366,16 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
339
366
  MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
340
367
  ZSTD_outBuffer* output, ZSTD_inBuffer* input)
341
368
  {
369
+ static char x;
370
+ /* Avoid passing NULL to legacy decoding. */
371
+ if (output->dst == NULL) {
372
+ assert(output->size == 0);
373
+ output->dst = &x;
374
+ }
375
+ if (input->src == NULL) {
376
+ assert(input->size == 0);
377
+ input->src = &x;
378
+ }
342
379
  DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
343
380
  switch(version)
344
381
  {