zstd-ruby 1.5.0.0 → 1.5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/README.md +1 -1
  4. data/ext/zstdruby/extconf.rb +2 -1
  5. data/ext/zstdruby/libzstd/Makefile +50 -175
  6. data/ext/zstdruby/libzstd/README.md +7 -1
  7. data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
  8. data/ext/zstdruby/libzstd/common/compiler.h +89 -43
  9. data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
  10. data/ext/zstdruby/libzstd/common/error_private.h +79 -0
  11. data/ext/zstdruby/libzstd/common/fse.h +2 -1
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +24 -22
  14. data/ext/zstdruby/libzstd/common/mem.h +18 -0
  15. data/ext/zstdruby/libzstd/common/pool.c +11 -6
  16. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  17. data/ext/zstdruby/libzstd/common/portability_macros.h +137 -0
  18. data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
  19. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  20. data/ext/zstdruby/libzstd/common/zstd_internal.h +95 -92
  21. data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
  22. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  23. data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
  24. data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
  25. data/ext/zstdruby/libzstd/compress/zstd_compress.c +307 -373
  26. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +174 -83
  27. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
  28. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
  29. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +15 -14
  30. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +4 -3
  31. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +41 -27
  32. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +295 -120
  33. data/ext/zstdruby/libzstd/compress/zstd_fast.c +309 -130
  34. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +482 -562
  35. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
  38. data/ext/zstdruby/libzstd/compress/zstd_opt.c +249 -148
  39. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +76 -38
  40. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +4 -1
  41. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
  42. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +585 -0
  43. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
  44. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
  45. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
  46. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
  47. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
  48. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  49. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
  50. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +101 -30
  51. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
  52. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
  53. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
  54. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
  55. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
  56. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
  57. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
  58. data/ext/zstdruby/libzstd/libzstd.mk +203 -0
  59. data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
  60. data/ext/zstdruby/libzstd/module.modulemap +25 -0
  61. data/ext/zstdruby/libzstd/zdict.h +4 -4
  62. data/ext/zstdruby/libzstd/zstd.h +179 -136
  63. data/ext/zstdruby/zstdruby.c +2 -2
  64. data/lib/zstd-ruby/version.rb +1 -1
  65. metadata +11 -6
@@ -33,6 +33,12 @@
33
33
  */
34
34
 
35
35
 
36
+ /* Streaming state is used to inform allocation of the literal buffer */
37
+ typedef enum {
38
+ not_streaming = 0,
39
+ is_streaming = 1
40
+ } streaming_operation;
41
+
36
42
  /* ZSTD_decompressBlock_internal() :
37
43
  * decompress block, starting at `src`,
38
44
  * into destination buffer `dst`.
@@ -41,7 +47,7 @@
41
47
  */
42
48
  size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
43
49
  void* dst, size_t dstCapacity,
44
- const void* src, size_t srcSize, const int frame);
50
+ const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
45
51
 
46
52
  /* ZSTD_buildFSETable() :
47
53
  * generate FSE decoding table for one symbol (ll, ml or off)
@@ -54,7 +60,7 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
54
60
  */
55
61
  void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
56
62
  const short* normalizedCounter, unsigned maxSymbolValue,
57
- const U32* baseValue, const U32* nbAdditionalBits,
63
+ const U32* baseValue, const U8* nbAdditionalBits,
58
64
  unsigned tableLog, void* wksp, size_t wkspSize,
59
65
  int bmi2);
60
66
 
@@ -20,7 +20,7 @@
20
20
  * Dependencies
21
21
  *********************************************************/
22
22
  #include "../common/mem.h" /* BYTE, U16, U32 */
23
- #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
23
+ #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24
24
 
25
25
 
26
26
 
@@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
40
40
  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41
41
  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
42
 
43
- static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
43
+ static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44
44
  0, 1, 2, 3, 4, 5, 6, 7,
45
45
  8, 9, 10, 11, 12, 13, 14, 15,
46
46
  16, 17, 18, 19, 20, 21, 22, 23,
@@ -106,6 +106,22 @@ typedef struct {
106
106
  size_t ddictPtrCount;
107
107
  } ZSTD_DDictHashSet;
108
108
 
109
+ #ifndef ZSTD_DECODER_INTERNAL_BUFFER
110
+ # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
111
+ #endif
112
+
113
+ #define ZSTD_LBMIN 64
114
+ #define ZSTD_LBMAX (128 << 10)
115
+
116
+ /* extra buffer, compensates when dst is not large enough to store litBuffer */
117
+ #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
118
+
119
+ typedef enum {
120
+ ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
121
+ ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
122
+ ZSTD_split = 2 /* Split between litExtraBuffer and dst */
123
+ } ZSTD_litLocation_e;
124
+
109
125
  struct ZSTD_DCtx_s
110
126
  {
111
127
  const ZSTD_seqSymbol* LLTptr;
@@ -136,7 +152,9 @@ struct ZSTD_DCtx_s
136
152
  size_t litSize;
137
153
  size_t rleSize;
138
154
  size_t staticSize;
155
+ #if DYNAMIC_BMI2 != 0
139
156
  int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
157
+ #endif
140
158
 
141
159
  /* dictionary */
142
160
  ZSTD_DDict* ddictLocal;
@@ -158,16 +176,21 @@ struct ZSTD_DCtx_s
158
176
  size_t outStart;
159
177
  size_t outEnd;
160
178
  size_t lhSize;
179
+ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
161
180
  void* legacyContext;
162
181
  U32 previousLegacyVersion;
163
182
  U32 legacyVersion;
183
+ #endif
164
184
  U32 hostageByte;
165
185
  int noForwardProgress;
166
186
  ZSTD_bufferMode_e outBufferMode;
167
187
  ZSTD_outBuffer expectedOutBuffer;
168
188
 
169
189
  /* workspace */
170
- BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
190
+ BYTE* litBuffer;
191
+ const BYTE* litBufferEnd;
192
+ ZSTD_litLocation_e litBufferLocation;
193
+ BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
171
194
  BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
172
195
 
173
196
  size_t oversizedDuration;
@@ -183,6 +206,14 @@ struct ZSTD_DCtx_s
183
206
  #endif
184
207
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
185
208
 
209
+ MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
210
+ #if DYNAMIC_BMI2 != 0
211
+ return dctx->bmi2;
212
+ #else
213
+ (void)dctx;
214
+ return 0;
215
+ #endif
216
+ }
186
217
 
187
218
  /*-*******************************************************
188
219
  * Shared internal functions
@@ -15,6 +15,7 @@
15
15
  ***************************************/
16
16
  #define ZBUFF_STATIC_LINKING_ONLY
17
17
  #include "zbuff.h"
18
+ #include "../common/error_private.h"
18
19
 
19
20
 
20
21
  /*-***********************************************************
@@ -73,13 +74,32 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
73
74
  ZSTD_parameters params, unsigned long long pledgedSrcSize)
74
75
  {
75
76
  if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
76
- return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize);
77
+ FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
78
+ FORWARD_IF_ERROR(ZSTD_CCtx_setPledgedSrcSize(zbc, pledgedSrcSize), "");
79
+
80
+ FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
81
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_windowLog, params.cParams.windowLog), "");
82
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_hashLog, params.cParams.hashLog), "");
83
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_chainLog, params.cParams.chainLog), "");
84
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_searchLog, params.cParams.searchLog), "");
85
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_minMatch, params.cParams.minMatch), "");
86
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_targetLength, params.cParams.targetLength), "");
87
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_strategy, params.cParams.strategy), "");
88
+
89
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_contentSizeFlag, params.fParams.contentSizeFlag), "");
90
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_checksumFlag, params.fParams.checksumFlag), "");
91
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_dictIDFlag, params.fParams.noDictIDFlag), "");
92
+
93
+ FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
94
+ return 0;
77
95
  }
78
96
 
79
-
80
97
  size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
81
98
  {
82
- return ZSTD_initCStream_usingDict(zbc, dict, dictSize, compressionLevel);
99
+ FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
100
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_compressionLevel, compressionLevel), "");
101
+ FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
102
+ return 0;
83
103
  }
84
104
 
85
105
  size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
@@ -40,6 +40,13 @@
40
40
  /*-*************************************
41
41
  * Constants
42
42
  ***************************************/
43
+ /**
44
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
45
+ * on 64bit builds.
46
+ * For 32bit builds we choose 1 GB.
47
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
48
+ * contiguous buffer, so 1GB is already a high limit.
49
+ */
43
50
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
44
51
  #define COVER_DEFAULT_SPLITPOINT 1.0
45
52
 
@@ -47,7 +54,7 @@
47
54
  * Console display
48
55
  ***************************************/
49
56
  #ifndef LOCALDISPLAYLEVEL
50
- static int g_displayLevel = 2;
57
+ static int g_displayLevel = 0;
51
58
  #endif
52
59
  #undef DISPLAY
53
60
  #define DISPLAY(...) \
@@ -735,7 +742,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
735
742
  COVER_map_t activeDmers;
736
743
  parameters.splitPoint = 1.0;
737
744
  /* Initialize global data */
738
- g_displayLevel = parameters.zParams.notificationLevel;
745
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
739
746
  /* Checks */
740
747
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
741
748
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
@@ -32,6 +32,13 @@
32
32
  /*-*************************************
33
33
  * Constants
34
34
  ***************************************/
35
+ /**
36
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
37
+ * on 64bit builds.
38
+ * For 32bit builds we choose 1 GB.
39
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
40
+ * contiguous buffer, so 1GB is already a high limit.
41
+ */
35
42
  #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
36
43
  #define FASTCOVER_MAX_F 31
37
44
  #define FASTCOVER_MAX_ACCEL 10
@@ -44,7 +51,7 @@
44
51
  * Console display
45
52
  ***************************************/
46
53
  #ifndef LOCALDISPLAYLEVEL
47
- static int g_displayLevel = 2;
54
+ static int g_displayLevel = 0;
48
55
  #endif
49
56
  #undef DISPLAY
50
57
  #define DISPLAY(...) \
@@ -549,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
549
556
  ZDICT_cover_params_t coverParams;
550
557
  FASTCOVER_accel_t accelParams;
551
558
  /* Initialize global data */
552
- g_displayLevel = parameters.zParams.notificationLevel;
559
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
553
560
  /* Assign splitPoint and f if not provided */
554
561
  parameters.splitPoint = 1.0;
555
562
  parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
@@ -632,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
632
639
  const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
633
640
  const unsigned shrinkDict = 0;
634
641
  /* Local variables */
635
- const int displayLevel = parameters->zParams.notificationLevel;
642
+ const int displayLevel = (int)parameters->zParams.notificationLevel;
636
643
  unsigned iteration = 1;
637
644
  unsigned d;
638
645
  unsigned k;
@@ -716,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
716
723
  data->parameters.splitPoint = splitPoint;
717
724
  data->parameters.steps = kSteps;
718
725
  data->parameters.shrinkDict = shrinkDict;
719
- data->parameters.zParams.notificationLevel = g_displayLevel;
726
+ data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
720
727
  /* Check the parameters */
721
728
  if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
722
729
  data->ctx->f, accel)) {
@@ -135,22 +135,32 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
135
135
  if (MEM_isLittleEndian()) {
136
136
  if (MEM_64bits()) {
137
137
  # if defined(_MSC_VER) && defined(_WIN64)
138
- unsigned long r = 0;
139
- _BitScanForward64( &r, (U64)val );
140
- return (unsigned)(r>>3);
138
+ if (val != 0) {
139
+ unsigned long r;
140
+ _BitScanForward64(&r, (U64)val);
141
+ return (unsigned)(r >> 3);
142
+ } else {
143
+ /* Should not reach this code path */
144
+ __assume(0);
145
+ }
141
146
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
142
- return (__builtin_ctzll((U64)val) >> 3);
147
+ return (unsigned)(__builtin_ctzll((U64)val) >> 3);
143
148
  # else
144
149
  static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
145
150
  return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
146
151
  # endif
147
152
  } else { /* 32 bits */
148
153
  # if defined(_MSC_VER)
149
- unsigned long r=0;
150
- _BitScanForward( &r, (U32)val );
151
- return (unsigned)(r>>3);
154
+ if (val != 0) {
155
+ unsigned long r;
156
+ _BitScanForward(&r, (U32)val);
157
+ return (unsigned)(r >> 3);
158
+ } else {
159
+ /* Should not reach this code path */
160
+ __assume(0);
161
+ }
152
162
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
153
- return (__builtin_ctz((U32)val) >> 3);
163
+ return (unsigned)(__builtin_ctz((U32)val) >> 3);
154
164
  # else
155
165
  static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
156
166
  return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
@@ -159,11 +169,16 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
159
169
  } else { /* Big Endian CPU */
160
170
  if (MEM_64bits()) {
161
171
  # if defined(_MSC_VER) && defined(_WIN64)
162
- unsigned long r = 0;
163
- _BitScanReverse64( &r, val );
164
- return (unsigned)(r>>3);
172
+ if (val != 0) {
173
+ unsigned long r;
174
+ _BitScanReverse64(&r, val);
175
+ return (unsigned)(r >> 3);
176
+ } else {
177
+ /* Should not reach this code path */
178
+ __assume(0);
179
+ }
165
180
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
166
- return (__builtin_clzll(val) >> 3);
181
+ return (unsigned)(__builtin_clzll(val) >> 3);
167
182
  # else
168
183
  unsigned r;
169
184
  const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
@@ -174,11 +189,16 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
174
189
  # endif
175
190
  } else { /* 32 bits */
176
191
  # if defined(_MSC_VER)
177
- unsigned long r = 0;
178
- _BitScanReverse( &r, (unsigned long)val );
179
- return (unsigned)(r>>3);
192
+ if (val != 0) {
193
+ unsigned long r;
194
+ _BitScanReverse(&r, (unsigned long)val);
195
+ return (unsigned)(r >> 3);
196
+ } else {
197
+ /* Should not reach this code path */
198
+ __assume(0);
199
+ }
180
200
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
181
- return (__builtin_clz((U32)val) >> 3);
201
+ return (unsigned)(__builtin_clz((U32)val) >> 3);
182
202
  # else
183
203
  unsigned r;
184
204
  if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
@@ -235,7 +255,7 @@ static dictItem ZDICT_analyzePos(
235
255
  U32 savings[LLIMIT] = {0};
236
256
  const BYTE* b = (const BYTE*)buffer;
237
257
  size_t maxLength = LLIMIT;
238
- size_t pos = suffix[start];
258
+ size_t pos = (size_t)suffix[start];
239
259
  U32 end = start;
240
260
  dictItem solution;
241
261
 
@@ -369,7 +389,7 @@ static dictItem ZDICT_analyzePos(
369
389
  savings[i] = savings[i-1] + (lengthList[i] * (i-3));
370
390
 
371
391
  DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
372
- (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
392
+ (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
373
393
 
374
394
  solution.pos = (U32)pos;
375
395
  solution.length = (U32)maxLength;
@@ -379,7 +399,7 @@ static dictItem ZDICT_analyzePos(
379
399
  { U32 id;
380
400
  for (id=start; id<end; id++) {
381
401
  U32 p, pEnd, length;
382
- U32 const testedPos = suffix[id];
402
+ U32 const testedPos = (U32)suffix[id];
383
403
  if (testedPos == pos)
384
404
  length = solution.length;
385
405
  else {
@@ -442,7 +462,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
442
462
 
443
463
  if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
444
464
  /* append */
445
- int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
465
+ int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
446
466
  table[u].savings += elt.length / 8; /* rough approx bonus */
447
467
  if (addedLength > 0) { /* otherwise, elt fully included into existing */
448
468
  table[u].length += addedLength;
@@ -662,8 +682,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
662
682
 
663
683
  if (nbSeq >= 2) { /* rep offsets */
664
684
  const seqDef* const seq = seqStorePtr->sequencesStart;
665
- U32 offset1 = seq[0].offset - 3;
666
- U32 offset2 = seq[1].offset - 3;
685
+ U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
686
+ U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
667
687
  if (offset1 >= MAXREPOFFSET) offset1 = 0;
668
688
  if (offset2 >= MAXREPOFFSET) offset2 = 0;
669
689
  repOffsets[offset1] += 3;
@@ -766,6 +786,13 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
766
786
  pos += fileSizes[u];
767
787
  }
768
788
 
789
+ if (notificationLevel >= 4) {
790
+ /* writeStats */
791
+ DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
792
+ for (u=0; u<=offcodeMax; u++) {
793
+ DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
794
+ } }
795
+
769
796
  /* analyze, build stats, starting with literals */
770
797
  { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
771
798
  if (HUF_isError(maxNbBits)) {
@@ -872,7 +899,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
872
899
  MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
873
900
  #else
874
901
  /* at this stage, we don't use the result of "most common first offset",
875
- as the impact of statistics is not properly evaluated */
902
+ * as the impact of statistics is not properly evaluated */
876
903
  MEM_writeLE32(dstPtr+0, repStartValue[0]);
877
904
  MEM_writeLE32(dstPtr+4, repStartValue[1]);
878
905
  MEM_writeLE32(dstPtr+8, repStartValue[2]);
@@ -888,6 +915,17 @@ _cleanup:
888
915
  }
889
916
 
890
917
 
918
+ /**
919
+ * @returns the maximum repcode value
920
+ */
921
+ static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
922
+ {
923
+ U32 maxRep = reps[0];
924
+ int r;
925
+ for (r = 1; r < ZSTD_REP_NUM; ++r)
926
+ maxRep = MAX(maxRep, reps[r]);
927
+ return maxRep;
928
+ }
891
929
 
892
930
  size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
893
931
  const void* customDictContent, size_t dictContentSize,
@@ -899,11 +937,13 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
899
937
  BYTE header[HBUFFSIZE];
900
938
  int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
901
939
  U32 const notificationLevel = params.notificationLevel;
940
+ /* The final dictionary content must be at least as large as the largest repcode */
941
+ size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
942
+ size_t paddingSize;
902
943
 
903
944
  /* check conditions */
904
945
  DEBUGLOG(4, "ZDICT_finalizeDictionary");
905
946
  if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
906
- if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
907
947
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
908
948
 
909
949
  /* dictionary header */
@@ -927,12 +967,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
927
967
  hSize += eSize;
928
968
  }
929
969
 
930
- /* copy elements in final buffer ; note : src and dst buffer can overlap */
931
- if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
932
- { size_t const dictSize = hSize + dictContentSize;
933
- char* dictEnd = (char*)dictBuffer + dictSize;
934
- memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
935
- memcpy(dictBuffer, header, hSize);
970
+ /* Shrink the content size if it doesn't fit in the buffer */
971
+ if (hSize + dictContentSize > dictBufferCapacity) {
972
+ dictContentSize = dictBufferCapacity - hSize;
973
+ }
974
+
975
+ /* Pad the dictionary content with zeros if it is too small */
976
+ if (dictContentSize < minContentSize) {
977
+ RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
978
+ "dictBufferCapacity too small to fit max repcode");
979
+ paddingSize = minContentSize - dictContentSize;
980
+ } else {
981
+ paddingSize = 0;
982
+ }
983
+
984
+ {
985
+ size_t const dictSize = hSize + paddingSize + dictContentSize;
986
+
987
+ /* The dictionary consists of the header, optional padding, and the content.
988
+ * The padding comes before the content because the "best" position in the
989
+ * dictionary is the last byte.
990
+ */
991
+ BYTE* const outDictHeader = (BYTE*)dictBuffer;
992
+ BYTE* const outDictPadding = outDictHeader + hSize;
993
+ BYTE* const outDictContent = outDictPadding + paddingSize;
994
+
995
+ assert(dictSize <= dictBufferCapacity);
996
+ assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
997
+
998
+ /* First copy the customDictContent into its final location.
999
+ * `customDictContent` and `dictBuffer` may overlap, so we must
1000
+ * do this before any other writes into the output buffer.
1001
+ * Then copy the header & padding into the output buffer.
1002
+ */
1003
+ memmove(outDictContent, customDictContent, dictContentSize);
1004
+ memcpy(outDictHeader, header, hSize);
1005
+ memset(outDictPadding, 0, paddingSize);
1006
+
936
1007
  return dictSize;
937
1008
  }
938
1009
  }
@@ -204,10 +204,7 @@ typedef signed long long S64;
204
204
  * Prefer these methods in priority order (0 > 1 > 2)
205
205
  */
206
206
  #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
207
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
208
- # define FSE_FORCE_MEMORY_ACCESS 2
209
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
210
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
207
+ # if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
211
208
  # define FSE_FORCE_MEMORY_ACCESS 1
212
209
  # endif
213
210
  #endif
@@ -343,8 +340,7 @@ FORCE_INLINE unsigned FSE_highbit32 (U32 val)
343
340
  {
344
341
  # if defined(_MSC_VER) /* Visual */
345
342
  unsigned long r;
346
- _BitScanReverse ( &r, val );
347
- return (unsigned) r;
343
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
348
344
  # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
349
345
  return __builtin_clz (val) ^ 31;
350
346
  # else /* Software version */
@@ -129,10 +129,7 @@ extern "C" {
129
129
  * Prefer these methods in priority order (0 > 1 > 2)
130
130
  */
131
131
  #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
132
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
133
- # define MEM_FORCE_MEMORY_ACCESS 2
134
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
135
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
132
+ # if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
136
133
  # define MEM_FORCE_MEMORY_ACCESS 1
137
134
  # endif
138
135
  #endif
@@ -353,9 +350,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
353
350
  MEM_STATIC unsigned BIT_highbit32 (U32 val)
354
351
  {
355
352
  # if defined(_MSC_VER) /* Visual */
356
- unsigned long r=0;
357
- _BitScanReverse ( &r, val );
358
- return (unsigned) r;
353
+ unsigned long r;
354
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
359
355
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
360
356
  return __builtin_clz (val) ^ 31;
361
357
  # else /* Software version */
@@ -130,10 +130,7 @@ extern "C" {
130
130
  * Prefer these methods in priority order (0 > 1 > 2)
131
131
  */
132
132
  #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
133
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
134
- # define MEM_FORCE_MEMORY_ACCESS 2
135
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
136
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
133
+ # if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
137
134
  # define MEM_FORCE_MEMORY_ACCESS 1
138
135
  # endif
139
136
  #endif
@@ -356,9 +353,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
356
353
  MEM_STATIC unsigned BIT_highbit32 (U32 val)
357
354
  {
358
355
  # if defined(_MSC_VER) /* Visual */
359
- unsigned long r=0;
360
- _BitScanReverse ( &r, val );
361
- return (unsigned) r;
356
+ unsigned long r;
357
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
362
358
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
363
359
  return __builtin_clz (val) ^ 31;
364
360
  # else /* Software version */
@@ -101,10 +101,7 @@ extern "C" {
101
101
  * Prefer these methods in priority order (0 > 1 > 2)
102
102
  */
103
103
  #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
104
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
105
- # define MEM_FORCE_MEMORY_ACCESS 2
106
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
107
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
104
+ # if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
108
105
  # define MEM_FORCE_MEMORY_ACCESS 1
109
106
  # endif
110
107
  #endif
@@ -627,9 +624,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
627
624
  MEM_STATIC unsigned BIT_highbit32 (U32 val)
628
625
  {
629
626
  # if defined(_MSC_VER) /* Visual */
630
- unsigned long r=0;
631
- _BitScanReverse ( &r, val );
632
- return (unsigned) r;
627
+ unsigned long r;
628
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
633
629
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
634
630
  return __builtin_clz (val) ^ 31;
635
631
  # else /* Software version */
@@ -120,10 +120,7 @@ extern "C" {
120
120
  * Prefer these methods in priority order (0 > 1 > 2)
121
121
  */
122
122
  #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
123
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
124
- # define MEM_FORCE_MEMORY_ACCESS 2
125
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
126
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
123
+ # if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
127
124
  # define MEM_FORCE_MEMORY_ACCESS 1
128
125
  # endif
129
126
  #endif
@@ -756,9 +753,8 @@ MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits);
756
753
  MEM_STATIC unsigned BITv05_highbit32 (U32 val)
757
754
  {
758
755
  # if defined(_MSC_VER) /* Visual */
759
- unsigned long r=0;
760
- _BitScanReverse ( &r, val );
761
- return (unsigned) r;
756
+ unsigned long r;
757
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
762
758
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
763
759
  return __builtin_clz (val) ^ 31;
764
760
  # else /* Software version */
@@ -122,10 +122,7 @@ extern "C" {
122
122
  * Prefer these methods in priority order (0 > 1 > 2)
123
123
  */
124
124
  #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
125
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
126
- # define MEM_FORCE_MEMORY_ACCESS 2
127
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
128
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
125
+ # if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
129
126
  # define MEM_FORCE_MEMORY_ACCESS 1
130
127
  # endif
131
128
  #endif
@@ -860,9 +857,8 @@ MEM_STATIC size_t BITv06_readBitsFast(BITv06_DStream_t* bitD, unsigned nbBits);
860
857
  MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
861
858
  {
862
859
  # if defined(_MSC_VER) /* Visual */
863
- unsigned long r=0;
864
- _BitScanReverse ( &r, val );
865
- return (unsigned) r;
860
+ unsigned long r;
861
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
866
862
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
867
863
  return __builtin_clz (val) ^ 31;
868
864
  # else /* Software version */
@@ -282,10 +282,7 @@ extern "C" {
282
282
  * Prefer these methods in priority order (0 > 1 > 2)
283
283
  */
284
284
  #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
285
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
286
- # define MEM_FORCE_MEMORY_ACCESS 2
287
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
288
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
285
+ # if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
289
286
  # define MEM_FORCE_MEMORY_ACCESS 1
290
287
  # endif
291
288
  #endif
@@ -530,9 +527,8 @@ MEM_STATIC size_t BITv07_readBitsFast(BITv07_DStream_t* bitD, unsigned nbBits);
530
527
  MEM_STATIC unsigned BITv07_highbit32 (U32 val)
531
528
  {
532
529
  # if defined(_MSC_VER) /* Visual */
533
- unsigned long r=0;
534
- _BitScanReverse ( &r, val );
535
- return (unsigned) r;
530
+ unsigned long r;
531
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
536
532
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
537
533
  return __builtin_clz (val) ^ 31;
538
534
  # else /* Software version */