multi_compress 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -3
- data/GET_STARTED.md +3 -3
- data/README.md +75 -66
- data/THIRD_PARTY_NOTICES.md +24 -0
- data/ext/multi_compress/brotli_dec_static_init.c +3 -0
- data/ext/multi_compress/brotli_enc_static_init.c +3 -0
- data/ext/multi_compress/extconf.rb +79 -3
- data/ext/multi_compress/multi_compress.c +199 -120
- data/ext/multi_compress/vendor/.vendored +2 -2
- data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
- data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
- data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
- data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
- data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
- data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
- data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
- data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
- data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
- data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
- data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
- data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
- data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
- data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
- data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
- data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
- data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
- data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
- data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
- data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
- data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
- data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
- data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
- data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
- data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
- data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
- data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
- data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
- data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
- data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
- data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
- data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
- data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
- data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
- data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
- data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
- data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
- data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
- data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
- data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
- data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
- data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
- data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
- data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
- data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
- data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
- data/ext/multi_compress/vendor/zstd/COPYING +339 -0
- data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
- data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
- data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
- data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
- data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
- data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
- data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
- data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
- data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
- data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
- data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
- data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
- data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
- data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
- data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
- data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
- data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
- data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
- data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
- data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
- data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
- data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
- data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
- data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
- data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
- data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
- data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
- data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
- data/lib/multi_compress/version.rb +1 -1
- data/lib/multi_compress.rb +80 -41
- metadata +29 -2
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -13,11 +13,36 @@
|
|
|
13
13
|
***************************************/
|
|
14
14
|
#include "zstd_compress_literals.h"
|
|
15
15
|
|
|
16
|
+
|
|
17
|
+
/* **************************************************************
|
|
18
|
+
* Debug Traces
|
|
19
|
+
****************************************************************/
|
|
20
|
+
#if DEBUGLEVEL >= 2
|
|
21
|
+
|
|
22
|
+
static size_t showHexa(const void* src, size_t srcSize)
|
|
23
|
+
{
|
|
24
|
+
const BYTE* const ip = (const BYTE*)src;
|
|
25
|
+
size_t u;
|
|
26
|
+
for (u=0; u<srcSize; u++) {
|
|
27
|
+
RAWLOG(5, " %02X", ip[u]); (void)ip;
|
|
28
|
+
}
|
|
29
|
+
RAWLOG(5, " \n");
|
|
30
|
+
return srcSize;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
#endif
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
/* **************************************************************
|
|
37
|
+
* Literals compression - special cases
|
|
38
|
+
****************************************************************/
|
|
16
39
|
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
17
40
|
{
|
|
18
41
|
BYTE* const ostart = (BYTE*)dst;
|
|
19
42
|
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
|
20
43
|
|
|
44
|
+
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
|
|
45
|
+
|
|
21
46
|
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
|
|
22
47
|
|
|
23
48
|
switch(flSize)
|
|
@@ -36,16 +61,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
|
|
36
61
|
}
|
|
37
62
|
|
|
38
63
|
ZSTD_memcpy(ostart + flSize, src, srcSize);
|
|
39
|
-
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
|
|
64
|
+
DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
|
|
40
65
|
return srcSize + flSize;
|
|
41
66
|
}
|
|
42
67
|
|
|
68
|
+
static int allBytesIdentical(const void* src, size_t srcSize)
|
|
69
|
+
{
|
|
70
|
+
assert(srcSize >= 1);
|
|
71
|
+
assert(src != NULL);
|
|
72
|
+
{ const BYTE b = ((const BYTE*)src)[0];
|
|
73
|
+
size_t p;
|
|
74
|
+
for (p=1; p<srcSize; p++) {
|
|
75
|
+
if (((const BYTE*)src)[p] != b) return 0;
|
|
76
|
+
}
|
|
77
|
+
return 1;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
43
81
|
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
44
82
|
{
|
|
45
83
|
BYTE* const ostart = (BYTE*)dst;
|
|
46
84
|
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
|
47
85
|
|
|
48
|
-
(
|
|
86
|
+
assert(dstCapacity >= 4); (void)dstCapacity;
|
|
87
|
+
assert(allBytesIdentical(src, srcSize));
|
|
49
88
|
|
|
50
89
|
switch(flSize)
|
|
51
90
|
{
|
|
@@ -63,28 +102,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
|
|
|
63
102
|
}
|
|
64
103
|
|
|
65
104
|
ostart[flSize] = *(const BYTE*)src;
|
|
66
|
-
DEBUGLOG(5, "RLE
|
|
105
|
+
DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
|
|
67
106
|
return flSize+1;
|
|
68
107
|
}
|
|
69
108
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
109
|
+
/* ZSTD_minLiteralsToCompress() :
|
|
110
|
+
* returns minimal amount of literals
|
|
111
|
+
* for literal compression to even be attempted.
|
|
112
|
+
* Minimum is made tighter as compression strategy increases.
|
|
113
|
+
*/
|
|
114
|
+
static size_t
|
|
115
|
+
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
|
|
116
|
+
{
|
|
117
|
+
assert((int)strategy >= 0);
|
|
118
|
+
assert((int)strategy <= 9);
|
|
119
|
+
/* btultra2 : min 8 bytes;
|
|
120
|
+
* then 2x larger for each successive compression strategy
|
|
121
|
+
* max threshold 64 bytes */
|
|
122
|
+
{ int const shift = MIN(9-(int)strategy, 3);
|
|
123
|
+
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
|
|
124
|
+
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
|
|
125
|
+
return mintc;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
size_t ZSTD_compressLiterals (
|
|
130
|
+
void* dst, size_t dstCapacity,
|
|
131
|
+
const void* src, size_t srcSize,
|
|
132
|
+
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
|
133
|
+
const ZSTD_hufCTables_t* prevHuf,
|
|
134
|
+
ZSTD_hufCTables_t* nextHuf,
|
|
135
|
+
ZSTD_strategy strategy,
|
|
136
|
+
int disableLiteralCompression,
|
|
137
|
+
int suspectUncompressible,
|
|
138
|
+
int bmi2)
|
|
78
139
|
{
|
|
79
|
-
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
|
80
140
|
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
|
81
141
|
BYTE* const ostart = (BYTE*)dst;
|
|
82
142
|
U32 singleStream = srcSize < 256;
|
|
83
|
-
|
|
143
|
+
SymbolEncodingType_e hType = set_compressed;
|
|
84
144
|
size_t cLitSize;
|
|
85
145
|
|
|
86
|
-
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
|
|
87
|
-
disableLiteralCompression, (U32)srcSize);
|
|
146
|
+
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
|
|
147
|
+
disableLiteralCompression, (U32)srcSize, dstCapacity);
|
|
148
|
+
|
|
149
|
+
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
|
|
88
150
|
|
|
89
151
|
/* Prepare nextEntropy assuming reusing the existing table */
|
|
90
152
|
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
@@ -92,40 +154,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
|
92
154
|
if (disableLiteralCompression)
|
|
93
155
|
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
|
94
156
|
|
|
95
|
-
/* small
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
|
99
|
-
}
|
|
157
|
+
/* if too small, don't even attempt compression (speed opt) */
|
|
158
|
+
if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
|
|
159
|
+
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
|
100
160
|
|
|
101
161
|
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
|
|
102
162
|
{ HUF_repeat repeat = prevHuf->repeatMode;
|
|
103
|
-
int const
|
|
163
|
+
int const flags = 0
|
|
164
|
+
| (bmi2 ? HUF_flags_bmi2 : 0)
|
|
165
|
+
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
|
|
166
|
+
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
|
|
167
|
+
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
|
|
168
|
+
|
|
169
|
+
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
|
|
170
|
+
huf_compress_f huf_compress;
|
|
104
171
|
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
|
|
172
|
+
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
|
|
173
|
+
cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
|
|
174
|
+
src, srcSize,
|
|
175
|
+
HUF_SYMBOLVALUE_MAX, LitHufLog,
|
|
176
|
+
entropyWorkspace, entropyWorkspaceSize,
|
|
177
|
+
(HUF_CElt*)nextHuf->CTable,
|
|
178
|
+
&repeat, flags);
|
|
179
|
+
DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
|
|
114
180
|
if (repeat != HUF_repeat_none) {
|
|
115
181
|
/* reused the existing table */
|
|
116
|
-
DEBUGLOG(5, "
|
|
182
|
+
DEBUGLOG(5, "reusing statistics from previous huffman block");
|
|
117
183
|
hType = set_repeat;
|
|
118
184
|
}
|
|
119
185
|
}
|
|
120
186
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
187
|
+
{ size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
|
188
|
+
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
|
|
189
|
+
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
190
|
+
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
|
191
|
+
} }
|
|
125
192
|
if (cLitSize==1) {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
193
|
+
/* A return value of 1 signals that the alphabet consists of a single symbol.
|
|
194
|
+
* However, in some rare circumstances, it could be the compressed size (a single byte).
|
|
195
|
+
* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
|
|
196
|
+
* (it's also necessary to not generate statistics).
|
|
197
|
+
* Therefore, in such a case, actively check that all bytes are identical. */
|
|
198
|
+
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
|
|
199
|
+
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
200
|
+
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
|
201
|
+
} }
|
|
129
202
|
|
|
130
203
|
if (hType == set_compressed) {
|
|
131
204
|
/* using a newly constructed table */
|
|
@@ -136,16 +209,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
|
136
209
|
switch(lhSize)
|
|
137
210
|
{
|
|
138
211
|
case 3: /* 2 - 2 - 10 - 10 */
|
|
139
|
-
|
|
212
|
+
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
|
213
|
+
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
|
|
140
214
|
MEM_writeLE24(ostart, lhc);
|
|
141
215
|
break;
|
|
142
216
|
}
|
|
143
217
|
case 4: /* 2 - 2 - 14 - 14 */
|
|
218
|
+
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
|
144
219
|
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
|
|
145
220
|
MEM_writeLE32(ostart, lhc);
|
|
146
221
|
break;
|
|
147
222
|
}
|
|
148
223
|
case 5: /* 2 - 2 - 18 - 18 */
|
|
224
|
+
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
|
149
225
|
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
|
|
150
226
|
MEM_writeLE32(ostart, lhc);
|
|
151
227
|
ostart[4] = (BYTE)(cLitSize >> 10);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -16,16 +16,24 @@
|
|
|
16
16
|
|
|
17
17
|
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
|
18
18
|
|
|
19
|
+
/* ZSTD_compressRleLiteralsBlock() :
|
|
20
|
+
* Conditions :
|
|
21
|
+
* - All bytes in @src are identical
|
|
22
|
+
* - dstCapacity >= 4 */
|
|
19
23
|
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
|
20
24
|
|
|
21
|
-
/*
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
/* ZSTD_compressLiterals():
|
|
26
|
+
* @entropyWorkspace: must be aligned on 4-bytes boundaries
|
|
27
|
+
* @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
|
|
28
|
+
* @suspectUncompressible: sampling checks, to potentially skip huffman coding
|
|
29
|
+
*/
|
|
30
|
+
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
|
|
26
31
|
const void* src, size_t srcSize,
|
|
27
32
|
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
|
28
|
-
const
|
|
29
|
-
|
|
33
|
+
const ZSTD_hufCTables_t* prevHuf,
|
|
34
|
+
ZSTD_hufCTables_t* nextHuf,
|
|
35
|
+
ZSTD_strategy strategy, int disableLiteralCompression,
|
|
36
|
+
int suspectUncompressible,
|
|
37
|
+
int bmi2);
|
|
30
38
|
|
|
31
39
|
#endif /* ZSTD_COMPRESS_LITERALS_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -58,7 +58,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
|
|
|
58
58
|
{
|
|
59
59
|
/* Heuristic: This should cover most blocks <= 16K and
|
|
60
60
|
* start to fade out after 16K to about 32K depending on
|
|
61
|
-
*
|
|
61
|
+
* compressibility.
|
|
62
62
|
*/
|
|
63
63
|
return nbSeq >= 2048;
|
|
64
64
|
}
|
|
@@ -153,20 +153,20 @@ size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
|
|
153
153
|
return cost >> 8;
|
|
154
154
|
}
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
SymbolEncodingType_e
|
|
157
157
|
ZSTD_selectEncodingType(
|
|
158
158
|
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
|
|
159
159
|
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
|
|
160
160
|
FSE_CTable const* prevCTable,
|
|
161
161
|
short const* defaultNorm, U32 defaultNormLog,
|
|
162
|
-
|
|
162
|
+
ZSTD_DefaultPolicy_e const isDefaultAllowed,
|
|
163
163
|
ZSTD_strategy const strategy)
|
|
164
164
|
{
|
|
165
165
|
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
|
|
166
166
|
if (mostFrequent == nbSeq) {
|
|
167
167
|
*repeatMode = FSE_repeat_none;
|
|
168
168
|
if (isDefaultAllowed && nbSeq <= 2) {
|
|
169
|
-
/* Prefer set_basic over set_rle when there are 2 or
|
|
169
|
+
/* Prefer set_basic over set_rle when there are 2 or fewer symbols,
|
|
170
170
|
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
|
171
171
|
* If basic encoding isn't possible, always choose RLE.
|
|
172
172
|
*/
|
|
@@ -241,7 +241,7 @@ typedef struct {
|
|
|
241
241
|
|
|
242
242
|
size_t
|
|
243
243
|
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|
244
|
-
FSE_CTable* nextCTable, U32 FSELog,
|
|
244
|
+
FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
|
|
245
245
|
unsigned* count, U32 max,
|
|
246
246
|
const BYTE* codeTable, size_t nbSeq,
|
|
247
247
|
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
|
@@ -293,7 +293,7 @@ ZSTD_encodeSequences_body(
|
|
|
293
293
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
|
294
294
|
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
|
295
295
|
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
|
296
|
-
|
|
296
|
+
SeqDef const* sequences, size_t nbSeq, int longOffsets)
|
|
297
297
|
{
|
|
298
298
|
BIT_CStream_t blockStream;
|
|
299
299
|
FSE_CState_t stateMatchLength;
|
|
@@ -387,7 +387,7 @@ ZSTD_encodeSequences_default(
|
|
|
387
387
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
|
388
388
|
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
|
389
389
|
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
|
390
|
-
|
|
390
|
+
SeqDef const* sequences, size_t nbSeq, int longOffsets)
|
|
391
391
|
{
|
|
392
392
|
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
|
393
393
|
CTable_MatchLength, mlCodeTable,
|
|
@@ -405,7 +405,7 @@ ZSTD_encodeSequences_bmi2(
|
|
|
405
405
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
|
406
406
|
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
|
407
407
|
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
|
408
|
-
|
|
408
|
+
SeqDef const* sequences, size_t nbSeq, int longOffsets)
|
|
409
409
|
{
|
|
410
410
|
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
|
411
411
|
CTable_MatchLength, mlCodeTable,
|
|
@@ -421,7 +421,7 @@ size_t ZSTD_encodeSequences(
|
|
|
421
421
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
|
422
422
|
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
|
423
423
|
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
|
424
|
-
|
|
424
|
+
SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
|
|
425
425
|
{
|
|
426
426
|
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
|
|
427
427
|
#if DYNAMIC_BMI2
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -11,26 +11,27 @@
|
|
|
11
11
|
#ifndef ZSTD_COMPRESS_SEQUENCES_H
|
|
12
12
|
#define ZSTD_COMPRESS_SEQUENCES_H
|
|
13
13
|
|
|
14
|
+
#include "zstd_compress_internal.h" /* SeqDef */
|
|
14
15
|
#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
|
|
15
|
-
#include "../common/zstd_internal.h" /*
|
|
16
|
+
#include "../common/zstd_internal.h" /* SymbolEncodingType_e, ZSTD_strategy */
|
|
16
17
|
|
|
17
18
|
typedef enum {
|
|
18
19
|
ZSTD_defaultDisallowed = 0,
|
|
19
20
|
ZSTD_defaultAllowed = 1
|
|
20
|
-
}
|
|
21
|
+
} ZSTD_DefaultPolicy_e;
|
|
21
22
|
|
|
22
|
-
|
|
23
|
+
SymbolEncodingType_e
|
|
23
24
|
ZSTD_selectEncodingType(
|
|
24
25
|
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
|
|
25
26
|
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
|
|
26
27
|
FSE_CTable const* prevCTable,
|
|
27
28
|
short const* defaultNorm, U32 defaultNormLog,
|
|
28
|
-
|
|
29
|
+
ZSTD_DefaultPolicy_e const isDefaultAllowed,
|
|
29
30
|
ZSTD_strategy const strategy);
|
|
30
31
|
|
|
31
32
|
size_t
|
|
32
33
|
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|
33
|
-
FSE_CTable* nextCTable, U32 FSELog,
|
|
34
|
+
FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
|
|
34
35
|
unsigned* count, U32 max,
|
|
35
36
|
const BYTE* codeTable, size_t nbSeq,
|
|
36
37
|
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
|
@@ -42,7 +43,7 @@ size_t ZSTD_encodeSequences(
|
|
|
42
43
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
|
43
44
|
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
|
44
45
|
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
|
45
|
-
|
|
46
|
+
SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
|
|
46
47
|
|
|
47
48
|
size_t ZSTD_fseBitCost(
|
|
48
49
|
FSE_CTable const* ctable,
|