zstd-ruby 1.5.0.0 → 1.5.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/README.md +1 -1
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/Makefile +50 -175
- data/ext/zstdruby/libzstd/README.md +7 -1
- data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
- data/ext/zstdruby/libzstd/common/compiler.h +89 -43
- data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
- data/ext/zstdruby/libzstd/common/error_private.h +79 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
- data/ext/zstdruby/libzstd/common/huf.h +24 -22
- data/ext/zstdruby/libzstd/common/mem.h +18 -0
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_internal.h +92 -88
- data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
- data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +194 -278
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +102 -44
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +5 -4
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +289 -114
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +302 -123
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +418 -502
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +186 -108
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +59 -29
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +99 -28
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/zdict.h +4 -4
- data/ext/zstdruby/libzstd/zstd.h +179 -136
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +8 -3
@@ -19,10 +19,8 @@
|
|
19
19
|
/*-*************************************
|
20
20
|
* Dependencies
|
21
21
|
***************************************/
|
22
|
-
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
23
|
-
#include <arm_neon.h>
|
24
|
-
#endif
|
25
22
|
#include "compiler.h"
|
23
|
+
#include "cpu.h"
|
26
24
|
#include "mem.h"
|
27
25
|
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
28
26
|
#include "error_private.h"
|
@@ -60,81 +58,7 @@ extern "C" {
|
|
60
58
|
#undef MAX
|
61
59
|
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
62
60
|
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
63
|
-
|
64
|
-
/**
|
65
|
-
* Ignore: this is an internal helper.
|
66
|
-
*
|
67
|
-
* This is a helper function to help force C99-correctness during compilation.
|
68
|
-
* Under strict compilation modes, variadic macro arguments can't be empty.
|
69
|
-
* However, variadic function arguments can be. Using a function therefore lets
|
70
|
-
* us statically check that at least one (string) argument was passed,
|
71
|
-
* independent of the compilation flags.
|
72
|
-
*/
|
73
|
-
static INLINE_KEYWORD UNUSED_ATTR
|
74
|
-
void _force_has_format_string(const char *format, ...) {
|
75
|
-
(void)format;
|
76
|
-
}
|
77
|
-
|
78
|
-
/**
|
79
|
-
* Ignore: this is an internal helper.
|
80
|
-
*
|
81
|
-
* We want to force this function invocation to be syntactically correct, but
|
82
|
-
* we don't want to force runtime evaluation of its arguments.
|
83
|
-
*/
|
84
|
-
#define _FORCE_HAS_FORMAT_STRING(...) \
|
85
|
-
if (0) { \
|
86
|
-
_force_has_format_string(__VA_ARGS__); \
|
87
|
-
}
|
88
|
-
|
89
|
-
/**
|
90
|
-
* Return the specified error if the condition evaluates to true.
|
91
|
-
*
|
92
|
-
* In debug modes, prints additional information.
|
93
|
-
* In order to do that (particularly, printing the conditional that failed),
|
94
|
-
* this can't just wrap RETURN_ERROR().
|
95
|
-
*/
|
96
|
-
#define RETURN_ERROR_IF(cond, err, ...) \
|
97
|
-
if (cond) { \
|
98
|
-
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
99
|
-
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
100
|
-
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
101
|
-
RAWLOG(3, ": " __VA_ARGS__); \
|
102
|
-
RAWLOG(3, "\n"); \
|
103
|
-
return ERROR(err); \
|
104
|
-
}
|
105
|
-
|
106
|
-
/**
|
107
|
-
* Unconditionally return the specified error.
|
108
|
-
*
|
109
|
-
* In debug modes, prints additional information.
|
110
|
-
*/
|
111
|
-
#define RETURN_ERROR(err, ...) \
|
112
|
-
do { \
|
113
|
-
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
114
|
-
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
115
|
-
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
116
|
-
RAWLOG(3, ": " __VA_ARGS__); \
|
117
|
-
RAWLOG(3, "\n"); \
|
118
|
-
return ERROR(err); \
|
119
|
-
} while(0);
|
120
|
-
|
121
|
-
/**
|
122
|
-
* If the provided expression evaluates to an error code, returns that error code.
|
123
|
-
*
|
124
|
-
* In debug modes, prints additional information.
|
125
|
-
*/
|
126
|
-
#define FORWARD_IF_ERROR(err, ...) \
|
127
|
-
do { \
|
128
|
-
size_t const err_code = (err); \
|
129
|
-
if (ERR_isError(err_code)) { \
|
130
|
-
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
131
|
-
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
132
|
-
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
133
|
-
RAWLOG(3, ": " __VA_ARGS__); \
|
134
|
-
RAWLOG(3, "\n"); \
|
135
|
-
return err_code; \
|
136
|
-
} \
|
137
|
-
} while(0);
|
61
|
+
#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
|
138
62
|
|
139
63
|
|
140
64
|
/*-*************************************
|
@@ -195,7 +119,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
|
195
119
|
/* Each table cannot take more than #symbols * FSELog bits */
|
196
120
|
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
|
197
121
|
|
198
|
-
static UNUSED_ATTR const
|
122
|
+
static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
|
199
123
|
0, 0, 0, 0, 0, 0, 0, 0,
|
200
124
|
0, 0, 0, 0, 0, 0, 0, 0,
|
201
125
|
1, 1, 1, 1, 2, 2, 3, 3,
|
@@ -212,7 +136,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
|
|
212
136
|
#define LL_DEFAULTNORMLOG 6 /* for static allocation */
|
213
137
|
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
|
214
138
|
|
215
|
-
static UNUSED_ATTR const
|
139
|
+
static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
|
216
140
|
0, 0, 0, 0, 0, 0, 0, 0,
|
217
141
|
0, 0, 0, 0, 0, 0, 0, 0,
|
218
142
|
0, 0, 0, 0, 0, 0, 0, 0,
|
@@ -247,19 +171,30 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
|
247
171
|
* Shared functions to include for inlining
|
248
172
|
*********************************************/
|
249
173
|
static void ZSTD_copy8(void* dst, const void* src) {
|
250
|
-
#if
|
174
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
251
175
|
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
|
252
176
|
#else
|
253
177
|
ZSTD_memcpy(dst, src, 8);
|
254
178
|
#endif
|
255
179
|
}
|
256
|
-
|
257
180
|
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
181
|
+
|
182
|
+
/* Need to use memmove here since the literal buffer can now be located within
|
183
|
+
the dst buffer. In circumstances where the op "catches up" to where the
|
184
|
+
literal buffer is, there can be partial overlaps in this call on the final
|
185
|
+
copy if the literal is being shifted by less than 16 bytes. */
|
258
186
|
static void ZSTD_copy16(void* dst, const void* src) {
|
259
|
-
#if
|
187
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
260
188
|
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
189
|
+
#elif defined(ZSTD_ARCH_X86_SSE2)
|
190
|
+
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
191
|
+
#elif defined(__clang__)
|
192
|
+
ZSTD_memmove(dst, src, 16);
|
261
193
|
#else
|
262
|
-
|
194
|
+
/* ZSTD_memmove is not inlined properly by gcc */
|
195
|
+
BYTE copy16_buf[16];
|
196
|
+
ZSTD_memcpy(copy16_buf, src, 16);
|
197
|
+
ZSTD_memcpy(dst, copy16_buf, 16);
|
263
198
|
#endif
|
264
199
|
}
|
265
200
|
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
@@ -288,8 +223,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
|
288
223
|
BYTE* op = (BYTE*)dst;
|
289
224
|
BYTE* const oend = op + length;
|
290
225
|
|
291
|
-
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
|
292
|
-
|
293
226
|
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
|
294
227
|
/* Handle short offset copies. */
|
295
228
|
do {
|
@@ -436,8 +369,14 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
|
436
369
|
# if STATIC_BMI2 == 1
|
437
370
|
return _lzcnt_u32(val)^31;
|
438
371
|
# else
|
439
|
-
|
440
|
-
|
372
|
+
if (val != 0) {
|
373
|
+
unsigned long r;
|
374
|
+
_BitScanReverse(&r, val);
|
375
|
+
return (unsigned)r;
|
376
|
+
} else {
|
377
|
+
/* Should not reach this code path */
|
378
|
+
__assume(0);
|
379
|
+
}
|
441
380
|
# endif
|
442
381
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
443
382
|
return __builtin_clz (val) ^ 31;
|
@@ -456,6 +395,63 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
|
456
395
|
}
|
457
396
|
}
|
458
397
|
|
398
|
+
/**
|
399
|
+
* Counts the number of trailing zeros of a `size_t`.
|
400
|
+
* Most compilers should support CTZ as a builtin. A backup
|
401
|
+
* implementation is provided if the builtin isn't supported, but
|
402
|
+
* it may not be terribly efficient.
|
403
|
+
*/
|
404
|
+
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
|
405
|
+
{
|
406
|
+
if (MEM_64bits()) {
|
407
|
+
# if defined(_MSC_VER) && defined(_WIN64)
|
408
|
+
# if STATIC_BMI2
|
409
|
+
return _tzcnt_u64(val);
|
410
|
+
# else
|
411
|
+
if (val != 0) {
|
412
|
+
unsigned long r;
|
413
|
+
_BitScanForward64(&r, (U64)val);
|
414
|
+
return (unsigned)r;
|
415
|
+
} else {
|
416
|
+
/* Should not reach this code path */
|
417
|
+
__assume(0);
|
418
|
+
}
|
419
|
+
# endif
|
420
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
421
|
+
return __builtin_ctzll((U64)val);
|
422
|
+
# else
|
423
|
+
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
|
424
|
+
4, 25, 14, 28, 9, 34, 20, 56,
|
425
|
+
5, 17, 26, 54, 15, 41, 29, 43,
|
426
|
+
10, 31, 38, 35, 21, 45, 49, 57,
|
427
|
+
63, 6, 12, 18, 24, 27, 33, 55,
|
428
|
+
16, 53, 40, 42, 30, 37, 44, 48,
|
429
|
+
62, 11, 23, 32, 52, 39, 36, 47,
|
430
|
+
61, 22, 51, 46, 60, 50, 59, 58 };
|
431
|
+
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
432
|
+
# endif
|
433
|
+
} else { /* 32 bits */
|
434
|
+
# if defined(_MSC_VER)
|
435
|
+
if (val != 0) {
|
436
|
+
unsigned long r;
|
437
|
+
_BitScanForward(&r, (U32)val);
|
438
|
+
return (unsigned)r;
|
439
|
+
} else {
|
440
|
+
/* Should not reach this code path */
|
441
|
+
__assume(0);
|
442
|
+
}
|
443
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
444
|
+
return __builtin_ctz((U32)val);
|
445
|
+
# else
|
446
|
+
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
|
447
|
+
30, 22, 20, 15, 25, 17, 4, 8,
|
448
|
+
31, 27, 13, 23, 21, 19, 16, 7,
|
449
|
+
26, 12, 18, 6, 11, 5, 10, 9 };
|
450
|
+
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
451
|
+
# endif
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
459
455
|
|
460
456
|
/* ZSTD_invalidateRepCodes() :
|
461
457
|
* ensures next compression will not use repcodes from previous block.
|
@@ -482,6 +478,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
482
478
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
483
479
|
const void* src, size_t srcSize);
|
484
480
|
|
481
|
+
/**
|
482
|
+
* @returns true iff the CPU supports dynamic BMI2 dispatch.
|
483
|
+
*/
|
484
|
+
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
|
485
|
+
{
|
486
|
+
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
|
487
|
+
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
|
488
|
+
}
|
485
489
|
|
486
490
|
#if defined (__cplusplus)
|
487
491
|
}
|
@@ -17,10 +17,19 @@ extern "C" {
|
|
17
17
|
|
18
18
|
#include <stddef.h>
|
19
19
|
|
20
|
-
/* weak symbol support
|
21
|
-
|
20
|
+
/* weak symbol support
|
21
|
+
* For now, enable conservatively:
|
22
|
+
* - Only GNUC
|
23
|
+
* - Only ELF
|
24
|
+
* - Only x86-64 and i386
|
25
|
+
* Also, explicitly disable on platforms known not to work so they aren't
|
26
|
+
* forgotten in the future.
|
27
|
+
*/
|
28
|
+
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
|
29
|
+
defined(__GNUC__) && defined(__ELF__) && \
|
30
|
+
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && \
|
22
31
|
!defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
|
23
|
-
!defined(__CYGWIN__)
|
32
|
+
!defined(__CYGWIN__) && !defined(_AIX)
|
24
33
|
# define ZSTD_HAVE_WEAK_SYMBOLS 1
|
25
34
|
#else
|
26
35
|
# define ZSTD_HAVE_WEAK_SYMBOLS 0
|
@@ -0,0 +1,134 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef ZSTD_CLEVELS_H
|
12
|
+
#define ZSTD_CLEVELS_H
|
13
|
+
|
14
|
+
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
|
15
|
+
#include "../zstd.h"
|
16
|
+
|
17
|
+
/*-===== Pre-defined compression levels =====-*/
|
18
|
+
|
19
|
+
#define ZSTD_MAX_CLEVEL 22
|
20
|
+
|
21
|
+
#ifdef __GNUC__
|
22
|
+
__attribute__((__unused__))
|
23
|
+
#endif
|
24
|
+
|
25
|
+
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
|
26
|
+
{ /* "default" - for any srcSize > 256 KB */
|
27
|
+
/* W, C, H, S, L, TL, strat */
|
28
|
+
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
|
29
|
+
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
|
30
|
+
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
|
31
|
+
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
|
32
|
+
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
|
33
|
+
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
|
34
|
+
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
|
35
|
+
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
|
36
|
+
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
|
37
|
+
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
|
38
|
+
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
|
39
|
+
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
|
40
|
+
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
|
41
|
+
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
|
42
|
+
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
|
43
|
+
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
|
44
|
+
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
|
45
|
+
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
|
46
|
+
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
|
47
|
+
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
|
48
|
+
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
|
49
|
+
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
|
50
|
+
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
|
51
|
+
},
|
52
|
+
{ /* for srcSize <= 256 KB */
|
53
|
+
/* W, C, H, S, L, T, strat */
|
54
|
+
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
55
|
+
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
56
|
+
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
|
57
|
+
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
|
58
|
+
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
|
59
|
+
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
|
60
|
+
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
|
61
|
+
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
|
62
|
+
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
63
|
+
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
64
|
+
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
65
|
+
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
|
66
|
+
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
|
67
|
+
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
|
68
|
+
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
69
|
+
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
|
70
|
+
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
71
|
+
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
72
|
+
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
|
73
|
+
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
74
|
+
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
|
75
|
+
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
|
76
|
+
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
|
77
|
+
},
|
78
|
+
{ /* for srcSize <= 128 KB */
|
79
|
+
/* W, C, H, S, L, T, strat */
|
80
|
+
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
81
|
+
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
82
|
+
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
|
83
|
+
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
|
84
|
+
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
|
85
|
+
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
|
86
|
+
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
|
87
|
+
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
88
|
+
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
89
|
+
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
90
|
+
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
91
|
+
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
|
92
|
+
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
|
93
|
+
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
|
94
|
+
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
95
|
+
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
|
96
|
+
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
97
|
+
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
98
|
+
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
|
99
|
+
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
|
100
|
+
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
|
101
|
+
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
102
|
+
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
|
103
|
+
},
|
104
|
+
{ /* for srcSize <= 16 KB */
|
105
|
+
/* W, C, H, S, L, T, strat */
|
106
|
+
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
107
|
+
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
|
108
|
+
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
|
109
|
+
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
|
110
|
+
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
|
111
|
+
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
|
112
|
+
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
|
113
|
+
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
114
|
+
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
|
115
|
+
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
|
116
|
+
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
|
117
|
+
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
|
118
|
+
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
|
119
|
+
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
|
120
|
+
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
|
121
|
+
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
|
122
|
+
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
|
123
|
+
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
|
124
|
+
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
|
125
|
+
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
126
|
+
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
|
127
|
+
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
128
|
+
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
|
129
|
+
},
|
130
|
+
};
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
#endif /* ZSTD_CLEVELS_H */
|
@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
75
75
|
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
|
76
76
|
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
77
77
|
U32 const step = FSE_TABLESTEP(tableSize);
|
78
|
+
U32 const maxSV1 = maxSymbolValue+1;
|
78
79
|
|
79
|
-
|
80
|
-
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (
|
80
|
+
U16* cumul = (U16*)workSpace; /* size = maxSV1 */
|
81
|
+
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
|
81
82
|
|
82
83
|
U32 highThreshold = tableSize-1;
|
83
84
|
|
84
|
-
|
85
|
+
assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
|
85
86
|
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
|
86
87
|
/* CTable header */
|
87
88
|
tableU16[-2] = (U16) tableLog;
|
@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
98
99
|
/* symbol start positions */
|
99
100
|
{ U32 u;
|
100
101
|
cumul[0] = 0;
|
101
|
-
for (u=1; u <=
|
102
|
+
for (u=1; u <= maxSV1; u++) {
|
102
103
|
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
103
104
|
cumul[u] = cumul[u-1] + 1;
|
104
105
|
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
105
106
|
} else {
|
106
|
-
|
107
|
+
assert(normalizedCounter[u-1] >= 0);
|
108
|
+
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
|
109
|
+
assert(cumul[u] >= cumul[u-1]); /* no overflow */
|
107
110
|
} }
|
108
|
-
cumul[
|
111
|
+
cumul[maxSV1] = (U16)(tableSize+1);
|
109
112
|
}
|
110
113
|
|
111
114
|
/* Spread symbols */
|
112
|
-
|
115
|
+
if (highThreshold == tableSize - 1) {
|
116
|
+
/* Case for no low prob count symbols. Lay down 8 bytes at a time
|
117
|
+
* to reduce branch misses since we are operating on a small block
|
118
|
+
*/
|
119
|
+
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
|
120
|
+
{ U64 const add = 0x0101010101010101ull;
|
121
|
+
size_t pos = 0;
|
122
|
+
U64 sv = 0;
|
123
|
+
U32 s;
|
124
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
125
|
+
int i;
|
126
|
+
int const n = normalizedCounter[s];
|
127
|
+
MEM_write64(spread + pos, sv);
|
128
|
+
for (i = 8; i < n; i += 8) {
|
129
|
+
MEM_write64(spread + pos + i, sv);
|
130
|
+
}
|
131
|
+
assert(n>=0);
|
132
|
+
pos += (size_t)n;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
/* Spread symbols across the table. Lack of lowprob symbols means that
|
136
|
+
* we don't need variable sized inner loop, so we can unroll the loop and
|
137
|
+
* reduce branch misses.
|
138
|
+
*/
|
139
|
+
{ size_t position = 0;
|
140
|
+
size_t s;
|
141
|
+
size_t const unroll = 2; /* Experimentally determined optimal unroll */
|
142
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
143
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
144
|
+
size_t u;
|
145
|
+
for (u = 0; u < unroll; ++u) {
|
146
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
147
|
+
tableSymbol[uPosition] = spread[s + u];
|
148
|
+
}
|
149
|
+
position = (position + (unroll * step)) & tableMask;
|
150
|
+
}
|
151
|
+
assert(position == 0); /* Must have initialized all positions */
|
152
|
+
}
|
153
|
+
} else {
|
154
|
+
U32 position = 0;
|
113
155
|
U32 symbol;
|
114
|
-
for (symbol=0; symbol
|
156
|
+
for (symbol=0; symbol<maxSV1; symbol++) {
|
115
157
|
int nbOccurrences;
|
116
158
|
int const freq = normalizedCounter[symbol];
|
117
159
|
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
120
162
|
while (position > highThreshold)
|
121
163
|
position = (position + step) & tableMask; /* Low proba area */
|
122
164
|
} }
|
123
|
-
|
124
165
|
assert(position==0); /* Must have initialized all positions */
|
125
166
|
}
|
126
167
|
|
@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
144
185
|
case -1:
|
145
186
|
case 1:
|
146
187
|
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
|
147
|
-
|
188
|
+
assert(total <= INT_MAX);
|
189
|
+
symbolTT[s].deltaFindState = (int)(total - 1);
|
148
190
|
total ++;
|
149
191
|
break;
|
150
192
|
default :
|
151
|
-
|
152
|
-
|
153
|
-
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
|
193
|
+
assert(normalizedCounter[s] > 1);
|
194
|
+
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
|
195
|
+
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
|
154
196
|
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
|
155
|
-
symbolTT[s].deltaFindState = total - normalizedCounter[s];
|
156
|
-
total += normalizedCounter[s];
|
197
|
+
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
|
198
|
+
total += (unsigned)normalizedCounter[s];
|
157
199
|
} } } }
|
158
200
|
|
159
201
|
#if 0 /* debug : symbol costs */
|
@@ -164,32 +206,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
164
206
|
symbol, normalizedCounter[symbol],
|
165
207
|
FSE_getMaxNbBits(symbolTT, symbol),
|
166
208
|
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
|
167
|
-
|
168
|
-
}
|
209
|
+
} }
|
169
210
|
#endif
|
170
211
|
|
171
212
|
return 0;
|
172
213
|
}
|
173
214
|
|
174
|
-
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
175
|
-
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
176
|
-
{
|
177
|
-
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
|
178
|
-
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
|
179
|
-
}
|
180
|
-
#endif
|
181
|
-
|
182
215
|
|
183
216
|
|
184
217
|
#ifndef FSE_COMMONDEFS_ONLY
|
185
218
|
|
186
|
-
|
187
219
|
/*-**************************************************************
|
188
220
|
* FSE NCount encoding
|
189
221
|
****************************************************************/
|
190
222
|
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
191
223
|
{
|
192
|
-
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
|
224
|
+
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
|
225
|
+
+ 4 /* bitCount initialized at 4 */
|
226
|
+
+ 2 /* first two symbols may use one additional bit each */) / 8)
|
227
|
+
+ 1 /* round up to whole nb bytes */
|
228
|
+
+ 2 /* additional two bytes for bitstream flush */;
|
193
229
|
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
194
230
|
}
|
195
231
|
|