zstd-ruby 1.5.0.0 → 1.5.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/README.md +1 -1
  4. data/ext/zstdruby/extconf.rb +2 -1
  5. data/ext/zstdruby/libzstd/Makefile +50 -175
  6. data/ext/zstdruby/libzstd/README.md +7 -1
  7. data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
  8. data/ext/zstdruby/libzstd/common/compiler.h +89 -43
  9. data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
  10. data/ext/zstdruby/libzstd/common/error_private.h +79 -0
  11. data/ext/zstdruby/libzstd/common/fse.h +2 -1
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +24 -22
  14. data/ext/zstdruby/libzstd/common/mem.h +18 -0
  15. data/ext/zstdruby/libzstd/common/pool.c +11 -6
  16. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  17. data/ext/zstdruby/libzstd/common/portability_macros.h +137 -0
  18. data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
  19. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  20. data/ext/zstdruby/libzstd/common/zstd_internal.h +95 -92
  21. data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
  22. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  23. data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
  24. data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
  25. data/ext/zstdruby/libzstd/compress/zstd_compress.c +307 -373
  26. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +174 -83
  27. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
  28. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
  29. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +15 -14
  30. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +4 -3
  31. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +41 -27
  32. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +295 -120
  33. data/ext/zstdruby/libzstd/compress/zstd_fast.c +309 -130
  34. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +482 -562
  35. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
  38. data/ext/zstdruby/libzstd/compress/zstd_opt.c +249 -148
  39. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +76 -38
  40. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +4 -1
  41. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
  42. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +585 -0
  43. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
  44. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
  45. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
  46. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
  47. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
  48. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  49. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
  50. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +101 -30
  51. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
  52. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
  53. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
  54. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
  55. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
  56. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
  57. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
  58. data/ext/zstdruby/libzstd/libzstd.mk +203 -0
  59. data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
  60. data/ext/zstdruby/libzstd/module.modulemap +25 -0
  61. data/ext/zstdruby/libzstd/zdict.h +4 -4
  62. data/ext/zstdruby/libzstd/zstd.h +179 -136
  63. data/ext/zstdruby/zstdruby.c +2 -2
  64. data/lib/zstd-ruby/version.rb +1 -1
  65. metadata +11 -6
@@ -19,10 +19,8 @@
19
19
  /*-*************************************
20
20
  * Dependencies
21
21
  ***************************************/
22
- #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
23
- #include <arm_neon.h>
24
- #endif
25
22
  #include "compiler.h"
23
+ #include "cpu.h"
26
24
  #include "mem.h"
27
25
  #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
28
26
  #include "error_private.h"
@@ -60,81 +58,7 @@ extern "C" {
60
58
  #undef MAX
61
59
  #define MIN(a,b) ((a)<(b) ? (a) : (b))
62
60
  #define MAX(a,b) ((a)>(b) ? (a) : (b))
63
-
64
- /**
65
- * Ignore: this is an internal helper.
66
- *
67
- * This is a helper function to help force C99-correctness during compilation.
68
- * Under strict compilation modes, variadic macro arguments can't be empty.
69
- * However, variadic function arguments can be. Using a function therefore lets
70
- * us statically check that at least one (string) argument was passed,
71
- * independent of the compilation flags.
72
- */
73
- static INLINE_KEYWORD UNUSED_ATTR
74
- void _force_has_format_string(const char *format, ...) {
75
- (void)format;
76
- }
77
-
78
- /**
79
- * Ignore: this is an internal helper.
80
- *
81
- * We want to force this function invocation to be syntactically correct, but
82
- * we don't want to force runtime evaluation of its arguments.
83
- */
84
- #define _FORCE_HAS_FORMAT_STRING(...) \
85
- if (0) { \
86
- _force_has_format_string(__VA_ARGS__); \
87
- }
88
-
89
- /**
90
- * Return the specified error if the condition evaluates to true.
91
- *
92
- * In debug modes, prints additional information.
93
- * In order to do that (particularly, printing the conditional that failed),
94
- * this can't just wrap RETURN_ERROR().
95
- */
96
- #define RETURN_ERROR_IF(cond, err, ...) \
97
- if (cond) { \
98
- RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
99
- __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
100
- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
101
- RAWLOG(3, ": " __VA_ARGS__); \
102
- RAWLOG(3, "\n"); \
103
- return ERROR(err); \
104
- }
105
-
106
- /**
107
- * Unconditionally return the specified error.
108
- *
109
- * In debug modes, prints additional information.
110
- */
111
- #define RETURN_ERROR(err, ...) \
112
- do { \
113
- RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
114
- __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
115
- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
116
- RAWLOG(3, ": " __VA_ARGS__); \
117
- RAWLOG(3, "\n"); \
118
- return ERROR(err); \
119
- } while(0);
120
-
121
- /**
122
- * If the provided expression evaluates to an error code, returns that error code.
123
- *
124
- * In debug modes, prints additional information.
125
- */
126
- #define FORWARD_IF_ERROR(err, ...) \
127
- do { \
128
- size_t const err_code = (err); \
129
- if (ERR_isError(err_code)) { \
130
- RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
131
- __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
132
- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
133
- RAWLOG(3, ": " __VA_ARGS__); \
134
- RAWLOG(3, "\n"); \
135
- return err_code; \
136
- } \
137
- } while(0);
61
+ #define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
138
62
 
139
63
 
140
64
  /*-*************************************
@@ -143,7 +67,6 @@ void _force_has_format_string(const char *format, ...) {
143
67
  #define ZSTD_OPT_NUM (1<<12)
144
68
 
145
69
  #define ZSTD_REP_NUM 3 /* number of repcodes */
146
- #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
147
70
  static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
148
71
 
149
72
  #define KB *(1 <<10)
@@ -195,7 +118,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
195
118
  /* Each table cannot take more than #symbols * FSELog bits */
196
119
  #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
197
120
 
198
- static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
121
+ static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
199
122
  0, 0, 0, 0, 0, 0, 0, 0,
200
123
  0, 0, 0, 0, 0, 0, 0, 0,
201
124
  1, 1, 1, 1, 2, 2, 3, 3,
@@ -212,7 +135,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
212
135
  #define LL_DEFAULTNORMLOG 6 /* for static allocation */
213
136
  static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
214
137
 
215
- static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
138
+ static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
216
139
  0, 0, 0, 0, 0, 0, 0, 0,
217
140
  0, 0, 0, 0, 0, 0, 0, 0,
218
141
  0, 0, 0, 0, 0, 0, 0, 0,
@@ -247,19 +170,30 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
247
170
  * Shared functions to include for inlining
248
171
  *********************************************/
249
172
  static void ZSTD_copy8(void* dst, const void* src) {
250
- #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
173
+ #if defined(ZSTD_ARCH_ARM_NEON)
251
174
  vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
252
175
  #else
253
176
  ZSTD_memcpy(dst, src, 8);
254
177
  #endif
255
178
  }
256
-
257
179
  #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
180
+
181
+ /* Need to use memmove here since the literal buffer can now be located within
182
+ the dst buffer. In circumstances where the op "catches up" to where the
183
+ literal buffer is, there can be partial overlaps in this call on the final
184
+ copy if the literal is being shifted by less than 16 bytes. */
258
185
  static void ZSTD_copy16(void* dst, const void* src) {
259
- #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
186
+ #if defined(ZSTD_ARCH_ARM_NEON)
260
187
  vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
188
+ #elif defined(ZSTD_ARCH_X86_SSE2)
189
+ _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
190
+ #elif defined(__clang__)
191
+ ZSTD_memmove(dst, src, 16);
261
192
  #else
262
- ZSTD_memcpy(dst, src, 16);
193
+ /* ZSTD_memmove is not inlined properly by gcc */
194
+ BYTE copy16_buf[16];
195
+ ZSTD_memcpy(copy16_buf, src, 16);
196
+ ZSTD_memcpy(dst, copy16_buf, 16);
263
197
  #endif
264
198
  }
265
199
  #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
@@ -288,8 +222,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
288
222
  BYTE* op = (BYTE*)dst;
289
223
  BYTE* const oend = op + length;
290
224
 
291
- assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
292
-
293
225
  if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
294
226
  /* Handle short offset copies. */
295
227
  do {
@@ -352,9 +284,9 @@ typedef enum {
352
284
  * Private declarations
353
285
  *********************************************/
354
286
  typedef struct seqDef_s {
355
- U32 offset; /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
287
+ U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
356
288
  U16 litLength;
357
- U16 matchLength;
289
+ U16 mlBase; /* mlBase == matchLength - MINMATCH */
358
290
  } seqDef;
359
291
 
360
292
  /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
@@ -396,7 +328,7 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
396
328
  {
397
329
  ZSTD_sequenceLength seqLen;
398
330
  seqLen.litLength = seq->litLength;
399
- seqLen.matchLength = seq->matchLength + MINMATCH;
331
+ seqLen.matchLength = seq->mlBase + MINMATCH;
400
332
  if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
401
333
  if (seqStore->longLengthType == ZSTD_llt_literalLength) {
402
334
  seqLen.litLength += 0xFFFF;
@@ -436,8 +368,14 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
436
368
  # if STATIC_BMI2 == 1
437
369
  return _lzcnt_u32(val)^31;
438
370
  # else
439
- unsigned long r=0;
440
- return _BitScanReverse(&r, val) ? (unsigned)r : 0;
371
+ if (val != 0) {
372
+ unsigned long r;
373
+ _BitScanReverse(&r, val);
374
+ return (unsigned)r;
375
+ } else {
376
+ /* Should not reach this code path */
377
+ __assume(0);
378
+ }
441
379
  # endif
442
380
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
443
381
  return __builtin_clz (val) ^ 31;
@@ -456,6 +394,63 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
456
394
  }
457
395
  }
458
396
 
397
+ /**
398
+ * Counts the number of trailing zeros of a `size_t`.
399
+ * Most compilers should support CTZ as a builtin. A backup
400
+ * implementation is provided if the builtin isn't supported, but
401
+ * it may not be terribly efficient.
402
+ */
403
+ MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
404
+ {
405
+ if (MEM_64bits()) {
406
+ # if defined(_MSC_VER) && defined(_WIN64)
407
+ # if STATIC_BMI2
408
+ return _tzcnt_u64(val);
409
+ # else
410
+ if (val != 0) {
411
+ unsigned long r;
412
+ _BitScanForward64(&r, (U64)val);
413
+ return (unsigned)r;
414
+ } else {
415
+ /* Should not reach this code path */
416
+ __assume(0);
417
+ }
418
+ # endif
419
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
420
+ return __builtin_ctzll((U64)val);
421
+ # else
422
+ static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
423
+ 4, 25, 14, 28, 9, 34, 20, 56,
424
+ 5, 17, 26, 54, 15, 41, 29, 43,
425
+ 10, 31, 38, 35, 21, 45, 49, 57,
426
+ 63, 6, 12, 18, 24, 27, 33, 55,
427
+ 16, 53, 40, 42, 30, 37, 44, 48,
428
+ 62, 11, 23, 32, 52, 39, 36, 47,
429
+ 61, 22, 51, 46, 60, 50, 59, 58 };
430
+ return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
431
+ # endif
432
+ } else { /* 32 bits */
433
+ # if defined(_MSC_VER)
434
+ if (val != 0) {
435
+ unsigned long r;
436
+ _BitScanForward(&r, (U32)val);
437
+ return (unsigned)r;
438
+ } else {
439
+ /* Should not reach this code path */
440
+ __assume(0);
441
+ }
442
+ # elif defined(__GNUC__) && (__GNUC__ >= 3)
443
+ return __builtin_ctz((U32)val);
444
+ # else
445
+ static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
446
+ 30, 22, 20, 15, 25, 17, 4, 8,
447
+ 31, 27, 13, 23, 21, 19, 16, 7,
448
+ 26, 12, 18, 6, 11, 5, 10, 9 };
449
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
450
+ # endif
451
+ }
452
+ }
453
+
459
454
 
460
455
  /* ZSTD_invalidateRepCodes() :
461
456
  * ensures next compression will not use repcodes from previous block.
@@ -482,6 +477,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
482
477
  size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
483
478
  const void* src, size_t srcSize);
484
479
 
480
+ /**
481
+ * @returns true iff the CPU supports dynamic BMI2 dispatch.
482
+ */
483
+ MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
484
+ {
485
+ ZSTD_cpuid_t cpuid = ZSTD_cpuid();
486
+ return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
487
+ }
485
488
 
486
489
  #if defined (__cplusplus)
487
490
  }
@@ -17,10 +17,19 @@ extern "C" {
17
17
 
18
18
  #include <stddef.h>
19
19
 
20
- /* weak symbol support */
21
- #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
20
+ /* weak symbol support
21
+ * For now, enable conservatively:
22
+ * - Only GNUC
23
+ * - Only ELF
24
+ * - Only x86-64 and i386
25
+ * Also, explicitly disable on platforms known not to work so they aren't
26
+ * forgotten in the future.
27
+ */
28
+ #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
29
+ defined(__GNUC__) && defined(__ELF__) && \
30
+ (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && \
22
31
  !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
23
- !defined(__CYGWIN__)
32
+ !defined(__CYGWIN__) && !defined(_AIX)
24
33
  # define ZSTD_HAVE_WEAK_SYMBOLS 1
25
34
  #else
26
35
  # define ZSTD_HAVE_WEAK_SYMBOLS 0
@@ -0,0 +1,134 @@
1
+ /*
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_CLEVELS_H
12
+ #define ZSTD_CLEVELS_H
13
+
14
+ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
15
+ #include "../zstd.h"
16
+
17
+ /*-===== Pre-defined compression levels =====-*/
18
+
19
+ #define ZSTD_MAX_CLEVEL 22
20
+
21
+ #ifdef __GNUC__
22
+ __attribute__((__unused__))
23
+ #endif
24
+
25
+ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
26
+ { /* "default" - for any srcSize > 256 KB */
27
+ /* W, C, H, S, L, TL, strat */
28
+ { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
29
+ { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
30
+ { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
31
+ { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
32
+ { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
33
+ { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
34
+ { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
35
+ { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
36
+ { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
37
+ { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
38
+ { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
39
+ { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
40
+ { 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
41
+ { 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
42
+ { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
43
+ { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
44
+ { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
45
+ { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
46
+ { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
47
+ { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
48
+ { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
49
+ { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
50
+ { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
51
+ },
52
+ { /* for srcSize <= 256 KB */
53
+ /* W, C, H, S, L, T, strat */
54
+ { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
55
+ { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
56
+ { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
57
+ { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
58
+ { 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
59
+ { 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
60
+ { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
61
+ { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
62
+ { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
63
+ { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
64
+ { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
65
+ { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
66
+ { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
67
+ { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
68
+ { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
69
+ { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
70
+ { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
71
+ { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
72
+ { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
73
+ { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
74
+ { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
75
+ { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
76
+ { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
77
+ },
78
+ { /* for srcSize <= 128 KB */
79
+ /* W, C, H, S, L, T, strat */
80
+ { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
81
+ { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
82
+ { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
83
+ { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
84
+ { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
85
+ { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
86
+ { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
87
+ { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
88
+ { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
89
+ { 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
90
+ { 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
91
+ { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
92
+ { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
93
+ { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
94
+ { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
95
+ { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
96
+ { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
97
+ { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
98
+ { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
99
+ { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
100
+ { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
101
+ { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
102
+ { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
103
+ },
104
+ { /* for srcSize <= 16 KB */
105
+ /* W, C, H, S, L, T, strat */
106
+ { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
107
+ { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
108
+ { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
109
+ { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
110
+ { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
111
+ { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
112
+ { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
113
+ { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
114
+ { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
115
+ { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
116
+ { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
117
+ { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
118
+ { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
119
+ { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
120
+ { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
121
+ { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
122
+ { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
123
+ { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
124
+ { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
125
+ { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
126
+ { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
127
+ { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
128
+ { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
129
+ },
130
+ };
131
+
132
+
133
+
134
+ #endif /* ZSTD_CLEVELS_H */
@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
75
75
  void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
76
76
  FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
77
77
  U32 const step = FSE_TABLESTEP(tableSize);
78
+ U32 const maxSV1 = maxSymbolValue+1;
78
79
 
79
- U32* cumul = (U32*)workSpace;
80
- FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
80
+ U16* cumul = (U16*)workSpace; /* size = maxSV1 */
81
+ FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
81
82
 
82
83
  U32 highThreshold = tableSize-1;
83
84
 
84
- if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
85
+ assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
85
86
  if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
86
87
  /* CTable header */
87
88
  tableU16[-2] = (U16) tableLog;
@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
98
99
  /* symbol start positions */
99
100
  { U32 u;
100
101
  cumul[0] = 0;
101
- for (u=1; u <= maxSymbolValue+1; u++) {
102
+ for (u=1; u <= maxSV1; u++) {
102
103
  if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
103
104
  cumul[u] = cumul[u-1] + 1;
104
105
  tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
105
106
  } else {
106
- cumul[u] = cumul[u-1] + normalizedCounter[u-1];
107
+ assert(normalizedCounter[u-1] >= 0);
108
+ cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
109
+ assert(cumul[u] >= cumul[u-1]); /* no overflow */
107
110
  } }
108
- cumul[maxSymbolValue+1] = tableSize+1;
111
+ cumul[maxSV1] = (U16)(tableSize+1);
109
112
  }
110
113
 
111
114
  /* Spread symbols */
112
- { U32 position = 0;
115
+ if (highThreshold == tableSize - 1) {
116
+ /* Case for no low prob count symbols. Lay down 8 bytes at a time
117
+ * to reduce branch misses since we are operating on a small block
118
+ */
119
+ BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
120
+ { U64 const add = 0x0101010101010101ull;
121
+ size_t pos = 0;
122
+ U64 sv = 0;
123
+ U32 s;
124
+ for (s=0; s<maxSV1; ++s, sv += add) {
125
+ int i;
126
+ int const n = normalizedCounter[s];
127
+ MEM_write64(spread + pos, sv);
128
+ for (i = 8; i < n; i += 8) {
129
+ MEM_write64(spread + pos + i, sv);
130
+ }
131
+ assert(n>=0);
132
+ pos += (size_t)n;
133
+ }
134
+ }
135
+ /* Spread symbols across the table. Lack of lowprob symbols means that
136
+ * we don't need variable sized inner loop, so we can unroll the loop and
137
+ * reduce branch misses.
138
+ */
139
+ { size_t position = 0;
140
+ size_t s;
141
+ size_t const unroll = 2; /* Experimentally determined optimal unroll */
142
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
143
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
144
+ size_t u;
145
+ for (u = 0; u < unroll; ++u) {
146
+ size_t const uPosition = (position + (u * step)) & tableMask;
147
+ tableSymbol[uPosition] = spread[s + u];
148
+ }
149
+ position = (position + (unroll * step)) & tableMask;
150
+ }
151
+ assert(position == 0); /* Must have initialized all positions */
152
+ }
153
+ } else {
154
+ U32 position = 0;
113
155
  U32 symbol;
114
- for (symbol=0; symbol<=maxSymbolValue; symbol++) {
156
+ for (symbol=0; symbol<maxSV1; symbol++) {
115
157
  int nbOccurrences;
116
158
  int const freq = normalizedCounter[symbol];
117
159
  for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
120
162
  while (position > highThreshold)
121
163
  position = (position + step) & tableMask; /* Low proba area */
122
164
  } }
123
-
124
165
  assert(position==0); /* Must have initialized all positions */
125
166
  }
126
167
 
@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
144
185
  case -1:
145
186
  case 1:
146
187
  symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
147
- symbolTT[s].deltaFindState = total - 1;
188
+ assert(total <= INT_MAX);
189
+ symbolTT[s].deltaFindState = (int)(total - 1);
148
190
  total ++;
149
191
  break;
150
192
  default :
151
- {
152
- U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
153
- U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
193
+ assert(normalizedCounter[s] > 1);
194
+ { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
195
+ U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
154
196
  symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
155
- symbolTT[s].deltaFindState = total - normalizedCounter[s];
156
- total += normalizedCounter[s];
197
+ symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
198
+ total += (unsigned)normalizedCounter[s];
157
199
  } } } }
158
200
 
159
201
  #if 0 /* debug : symbol costs */
@@ -164,32 +206,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
164
206
  symbol, normalizedCounter[symbol],
165
207
  FSE_getMaxNbBits(symbolTT, symbol),
166
208
  (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
167
- }
168
- }
209
+ } }
169
210
  #endif
170
211
 
171
212
  return 0;
172
213
  }
173
214
 
174
- #ifndef ZSTD_NO_UNUSED_FUNCTIONS
175
- size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
176
- {
177
- FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
178
- return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
179
- }
180
- #endif
181
-
182
215
 
183
216
 
184
217
  #ifndef FSE_COMMONDEFS_ONLY
185
218
 
186
-
187
219
  /*-**************************************************************
188
220
  * FSE NCount encoding
189
221
  ****************************************************************/
190
222
  size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
191
223
  {
192
- size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
224
+ size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
225
+ + 4 /* bitCount initialized at 4 */
226
+ + 2 /* first two symbols may use one additional bit each */) / 8)
227
+ + 1 /* round up to whole nb bytes */
228
+ + 2 /* additional two bytes for bitstream flush */;
193
229
  return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
194
230
  }
195
231