deflate-ruby 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +95 -92
- data/LICENSE.txt +6 -6
- data/README.md +87 -65
- data/Rakefile +23 -0
- data/ext/deflate_ruby/{libdeflate/lib/x86/adler32_impl.h → adler32_impl.h} +8 -7
- data/ext/deflate_ruby/common_defs.h +748 -0
- data/ext/deflate_ruby/{libdeflate/lib/x86/cpu_features.c → cpu_features.c} +46 -16
- data/ext/deflate_ruby/{libdeflate/lib/x86/cpu_features.h → cpu_features.h} +2 -1
- data/ext/deflate_ruby/{libdeflate/lib/x86/crc32_impl.h → crc32_impl.h} +22 -23
- data/ext/deflate_ruby/{libdeflate/lib/crc32_multipliers.h → crc32_multipliers.h} +2 -4
- data/ext/deflate_ruby/{libdeflate/lib/x86/crc32_pclmul_template.h → crc32_pclmul_template.h} +23 -94
- data/ext/deflate_ruby/{libdeflate/lib/crc32_tables.h → crc32_tables.h} +1 -1
- data/ext/deflate_ruby/{libdeflate/lib/deflate_compress.c → deflate_compress.c} +59 -60
- data/ext/deflate_ruby/deflate_ruby.c +392 -218
- data/ext/deflate_ruby/deflate_ruby.h +6 -0
- data/ext/deflate_ruby/extconf.rb +35 -25
- data/ext/deflate_ruby/libdeflate/adler32.c +162 -0
- data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/adler32_impl.h +14 -7
- data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/crc32_impl.h +25 -31
- data/ext/deflate_ruby/libdeflate/arm/crc32_pmull_helpers.h +156 -0
- data/ext/deflate_ruby/libdeflate/arm/crc32_pmull_wide.h +226 -0
- data/ext/deflate_ruby/libdeflate/bt_matchfinder.h +342 -0
- data/ext/deflate_ruby/libdeflate/common_defs.h +2 -1
- data/ext/deflate_ruby/libdeflate/cpu_features_common.h +93 -0
- data/ext/deflate_ruby/libdeflate/crc32.c +262 -0
- data/ext/deflate_ruby/libdeflate/crc32_multipliers.h +375 -0
- data/ext/deflate_ruby/libdeflate/crc32_tables.h +587 -0
- data/ext/deflate_ruby/libdeflate/decompress_template.h +777 -0
- data/ext/deflate_ruby/libdeflate/deflate_compress.c +4128 -0
- data/ext/deflate_ruby/libdeflate/deflate_compress.h +15 -0
- data/ext/deflate_ruby/libdeflate/deflate_constants.h +56 -0
- data/ext/deflate_ruby/libdeflate/deflate_decompress.c +1208 -0
- data/ext/deflate_ruby/libdeflate/gzip_compress.c +90 -0
- data/ext/deflate_ruby/libdeflate/gzip_constants.h +45 -0
- data/ext/deflate_ruby/libdeflate/gzip_decompress.c +144 -0
- data/ext/deflate_ruby/libdeflate/hc_matchfinder.h +401 -0
- data/ext/deflate_ruby/libdeflate/ht_matchfinder.h +234 -0
- data/ext/deflate_ruby/libdeflate/lib_common.h +106 -0
- data/ext/deflate_ruby/libdeflate/libdeflate.h +2 -2
- data/ext/deflate_ruby/libdeflate/{lib/matchfinder_common.h → matchfinder_common.h} +3 -3
- data/ext/deflate_ruby/libdeflate/x86/adler32_impl.h +135 -0
- data/ext/deflate_ruby/libdeflate/x86/adler32_template.h +518 -0
- data/ext/deflate_ruby/libdeflate/x86/cpu_features.c +213 -0
- data/ext/deflate_ruby/libdeflate/x86/cpu_features.h +170 -0
- data/ext/deflate_ruby/libdeflate/x86/crc32_impl.h +159 -0
- data/ext/deflate_ruby/libdeflate/x86/crc32_pclmul_template.h +424 -0
- data/ext/deflate_ruby/libdeflate/x86/decompress_impl.h +57 -0
- data/ext/deflate_ruby/libdeflate.h +411 -0
- data/ext/deflate_ruby/matchfinder_common.h +224 -0
- data/ext/deflate_ruby/matchfinder_impl.h +122 -0
- data/ext/deflate_ruby/utils.c +141 -0
- data/ext/deflate_ruby/zlib_compress.c +82 -0
- data/ext/deflate_ruby/zlib_constants.h +21 -0
- data/ext/deflate_ruby/zlib_decompress.c +104 -0
- data/lib/deflate_ruby/version.rb +1 -1
- data/lib/deflate_ruby.rb +1 -63
- data/sig/deflate_ruby.rbs +4 -0
- data/test/test_deflate_ruby.rb +220 -0
- data/test/test_helper.rb +6 -0
- metadata +89 -144
- data/ext/deflate_ruby/libdeflate/CMakeLists.txt +0 -270
- data/ext/deflate_ruby/libdeflate/NEWS.md +0 -494
- data/ext/deflate_ruby/libdeflate/README.md +0 -228
- data/ext/deflate_ruby/libdeflate/libdeflate-config.cmake.in +0 -3
- data/ext/deflate_ruby/libdeflate/libdeflate.pc.in +0 -18
- data/ext/deflate_ruby/libdeflate/programs/CMakeLists.txt +0 -105
- data/ext/deflate_ruby/libdeflate/programs/benchmark.c +0 -696
- data/ext/deflate_ruby/libdeflate/programs/checksum.c +0 -218
- data/ext/deflate_ruby/libdeflate/programs/config.h.in +0 -19
- data/ext/deflate_ruby/libdeflate/programs/gzip.c +0 -688
- data/ext/deflate_ruby/libdeflate/programs/prog_util.c +0 -521
- data/ext/deflate_ruby/libdeflate/programs/prog_util.h +0 -225
- data/ext/deflate_ruby/libdeflate/programs/test_checksums.c +0 -200
- data/ext/deflate_ruby/libdeflate/programs/test_custom_malloc.c +0 -155
- data/ext/deflate_ruby/libdeflate/programs/test_incomplete_codes.c +0 -385
- data/ext/deflate_ruby/libdeflate/programs/test_invalid_streams.c +0 -130
- data/ext/deflate_ruby/libdeflate/programs/test_litrunlen_overflow.c +0 -72
- data/ext/deflate_ruby/libdeflate/programs/test_overread.c +0 -95
- data/ext/deflate_ruby/libdeflate/programs/test_slow_decompression.c +0 -472
- data/ext/deflate_ruby/libdeflate/programs/test_trailing_bytes.c +0 -151
- data/ext/deflate_ruby/libdeflate/programs/test_util.c +0 -237
- data/ext/deflate_ruby/libdeflate/programs/test_util.h +0 -61
- data/ext/deflate_ruby/libdeflate/programs/tgetopt.c +0 -118
- data/ext/deflate_ruby/libdeflate/scripts/android_build.sh +0 -118
- data/ext/deflate_ruby/libdeflate/scripts/android_tests.sh +0 -69
- data/ext/deflate_ruby/libdeflate/scripts/benchmark.sh +0 -10
- data/ext/deflate_ruby/libdeflate/scripts/checksum.sh +0 -10
- data/ext/deflate_ruby/libdeflate/scripts/checksum_benchmarks.sh +0 -253
- data/ext/deflate_ruby/libdeflate/scripts/cmake-helper.sh +0 -17
- data/ext/deflate_ruby/libdeflate/scripts/deflate_benchmarks.sh +0 -119
- data/ext/deflate_ruby/libdeflate/scripts/exec_tests.sh +0 -38
- data/ext/deflate_ruby/libdeflate/scripts/gen-release-archives.sh +0 -37
- data/ext/deflate_ruby/libdeflate/scripts/gen_bitreverse_tab.py +0 -19
- data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_multipliers.c +0 -199
- data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_tables.c +0 -105
- data/ext/deflate_ruby/libdeflate/scripts/gen_default_litlen_costs.py +0 -44
- data/ext/deflate_ruby/libdeflate/scripts/gen_offset_slot_map.py +0 -29
- data/ext/deflate_ruby/libdeflate/scripts/gzip_tests.sh +0 -523
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/corpus/0 +0 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/fuzz.c +0 -95
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/corpus/0 +0 -3
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/fuzz.c +0 -62
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/fuzz.sh +0 -108
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/corpus/0 +0 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/fuzz.c +0 -19
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/corpus/0 +0 -3
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/fuzz.c +0 -19
- data/ext/deflate_ruby/libdeflate/scripts/run_tests.sh +0 -416
- data/ext/deflate_ruby/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake +0 -8
- data/ext/deflate_ruby/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake +0 -8
- /data/ext/deflate_ruby/{libdeflate/lib/adler32.c → adler32.c} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/x86/adler32_template.h → adler32_template.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/bt_matchfinder.h → bt_matchfinder.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/cpu_features_common.h → cpu_features_common.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/crc32.c → crc32.c} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/arm/crc32_pmull_helpers.h → crc32_pmull_helpers.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/arm/crc32_pmull_wide.h → crc32_pmull_wide.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/x86/decompress_impl.h → decompress_impl.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/decompress_template.h → decompress_template.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/deflate_compress.h → deflate_compress.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/deflate_constants.h → deflate_constants.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/deflate_decompress.c → deflate_decompress.c} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/gzip_compress.c → gzip_compress.c} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/gzip_constants.h → gzip_constants.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/gzip_decompress.c → gzip_decompress.c} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/hc_matchfinder.h → hc_matchfinder.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/ht_matchfinder.h → ht_matchfinder.h} +0 -0
- /data/ext/deflate_ruby/{libdeflate/lib/lib_common.h → lib_common.h} +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/cpu_features.c +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/cpu_features.h +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/matchfinder_impl.h +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/riscv → riscv}/matchfinder_impl.h +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/utils.c → utils.c} +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/x86 → x86}/matchfinder_impl.h +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/zlib_compress.c → zlib_compress.c} +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/zlib_constants.h → zlib_constants.h} +0 -0
- /data/ext/deflate_ruby/libdeflate/{lib/zlib_decompress.c → zlib_decompress.c} +0 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2022 Eric Biggers
|
|
5
|
+
*
|
|
6
|
+
* Permission is hereby granted, free of charge, to any person
|
|
7
|
+
* obtaining a copy of this software and associated documentation
|
|
8
|
+
* files (the "Software"), to deal in the Software without
|
|
9
|
+
* restriction, including without limitation the rights to use,
|
|
10
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
* copies of the Software, and to permit persons to whom the
|
|
12
|
+
* Software is furnished to do so, subject to the following
|
|
13
|
+
* conditions:
|
|
14
|
+
*
|
|
15
|
+
* The above copyright notice and this permission notice shall be
|
|
16
|
+
* included in all copies or substantial portions of the Software.
|
|
17
|
+
*
|
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
19
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
20
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
21
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
22
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
23
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
24
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
25
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
|
26
|
+
*
|
|
27
|
+
* ---------------------------------------------------------------------------
|
|
28
|
+
*
|
|
29
|
+
* This is a Hash Table (ht) matchfinder.
|
|
30
|
+
*
|
|
31
|
+
* This is a variant of the Hash Chains (hc) matchfinder that is optimized for
|
|
32
|
+
* very fast compression. The ht_matchfinder stores the hash chains inline in
|
|
33
|
+
* the hash table, whereas the hc_matchfinder stores them in a separate array.
|
|
34
|
+
* Storing the hash chains inline is the faster method when max_search_depth
|
|
35
|
+
* (the maximum chain length) is very small. It is not appropriate when
|
|
36
|
+
* max_search_depth is larger, as then it uses too much memory.
|
|
37
|
+
*
|
|
38
|
+
* Due to its focus on speed, the ht_matchfinder doesn't support length 3
|
|
39
|
+
* matches. It also doesn't allow max_search_depth to vary at runtime; it is
|
|
40
|
+
* fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
|
|
41
|
+
*
|
|
42
|
+
* See hc_matchfinder.h for more information.
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
#ifndef LIB_HT_MATCHFINDER_H
|
|
46
|
+
#define LIB_HT_MATCHFINDER_H
|
|
47
|
+
|
|
48
|
+
#include "matchfinder_common.h"
|
|
49
|
+
|
|
50
|
+
#define HT_MATCHFINDER_HASH_ORDER 15
|
|
51
|
+
#define HT_MATCHFINDER_BUCKET_SIZE 2
|
|
52
|
+
|
|
53
|
+
#define HT_MATCHFINDER_MIN_MATCH_LEN 4
|
|
54
|
+
/* Minimum value of max_len for ht_matchfinder_longest_match() */
|
|
55
|
+
#define HT_MATCHFINDER_REQUIRED_NBYTES 5
|
|
56
|
+
|
|
57
|
+
struct MATCHFINDER_ALIGNED ht_matchfinder {
|
|
58
|
+
mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
|
|
59
|
+
[HT_MATCHFINDER_BUCKET_SIZE];
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
static forceinline void
|
|
63
|
+
ht_matchfinder_init(struct ht_matchfinder *mf)
|
|
64
|
+
{
|
|
65
|
+
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
|
|
66
|
+
|
|
67
|
+
matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
static forceinline void
|
|
71
|
+
ht_matchfinder_slide_window(struct ht_matchfinder *mf)
|
|
72
|
+
{
|
|
73
|
+
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
|
|
77
|
+
static forceinline u32
|
|
78
|
+
ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
|
|
79
|
+
const u8 ** const in_base_p,
|
|
80
|
+
const u8 * const in_next,
|
|
81
|
+
const u32 max_len,
|
|
82
|
+
const u32 nice_len,
|
|
83
|
+
u32 * const next_hash,
|
|
84
|
+
u32 * const offset_ret)
|
|
85
|
+
{
|
|
86
|
+
u32 best_len = 0;
|
|
87
|
+
const u8 *best_matchptr = in_next;
|
|
88
|
+
u32 cur_pos = in_next - *in_base_p;
|
|
89
|
+
const u8 *in_base;
|
|
90
|
+
mf_pos_t cutoff;
|
|
91
|
+
u32 hash;
|
|
92
|
+
u32 seq;
|
|
93
|
+
mf_pos_t cur_node;
|
|
94
|
+
const u8 *matchptr;
|
|
95
|
+
#if HT_MATCHFINDER_BUCKET_SIZE > 1
|
|
96
|
+
mf_pos_t to_insert;
|
|
97
|
+
u32 len;
|
|
98
|
+
#endif
|
|
99
|
+
#if HT_MATCHFINDER_BUCKET_SIZE > 2
|
|
100
|
+
int i;
|
|
101
|
+
#endif
|
|
102
|
+
|
|
103
|
+
/* This is assumed throughout this function. */
|
|
104
|
+
STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
|
|
105
|
+
|
|
106
|
+
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
|
|
107
|
+
ht_matchfinder_slide_window(mf);
|
|
108
|
+
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
|
109
|
+
cur_pos = 0;
|
|
110
|
+
}
|
|
111
|
+
in_base = *in_base_p;
|
|
112
|
+
cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
|
|
113
|
+
|
|
114
|
+
hash = *next_hash;
|
|
115
|
+
STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
|
|
116
|
+
*next_hash = lz_hash(get_unaligned_le32(in_next + 1),
|
|
117
|
+
HT_MATCHFINDER_HASH_ORDER);
|
|
118
|
+
seq = load_u32_unaligned(in_next);
|
|
119
|
+
prefetchw(&mf->hash_tab[*next_hash]);
|
|
120
|
+
#if HT_MATCHFINDER_BUCKET_SIZE == 1
|
|
121
|
+
/* Hand-unrolled version for BUCKET_SIZE == 1 */
|
|
122
|
+
cur_node = mf->hash_tab[hash][0];
|
|
123
|
+
mf->hash_tab[hash][0] = cur_pos;
|
|
124
|
+
if (cur_node <= cutoff)
|
|
125
|
+
goto out;
|
|
126
|
+
matchptr = &in_base[cur_node];
|
|
127
|
+
if (load_u32_unaligned(matchptr) == seq) {
|
|
128
|
+
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
|
129
|
+
best_matchptr = matchptr;
|
|
130
|
+
}
|
|
131
|
+
#elif HT_MATCHFINDER_BUCKET_SIZE == 2
|
|
132
|
+
/*
|
|
133
|
+
* Hand-unrolled version for BUCKET_SIZE == 2. The logic here also
|
|
134
|
+
* differs slightly in that it copies the first entry to the second even
|
|
135
|
+
* if nice_len is reached on the first, as this can be slightly faster.
|
|
136
|
+
*/
|
|
137
|
+
cur_node = mf->hash_tab[hash][0];
|
|
138
|
+
mf->hash_tab[hash][0] = cur_pos;
|
|
139
|
+
if (cur_node <= cutoff)
|
|
140
|
+
goto out;
|
|
141
|
+
matchptr = &in_base[cur_node];
|
|
142
|
+
|
|
143
|
+
to_insert = cur_node;
|
|
144
|
+
cur_node = mf->hash_tab[hash][1];
|
|
145
|
+
mf->hash_tab[hash][1] = to_insert;
|
|
146
|
+
|
|
147
|
+
if (load_u32_unaligned(matchptr) == seq) {
|
|
148
|
+
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
|
149
|
+
best_matchptr = matchptr;
|
|
150
|
+
if (cur_node <= cutoff || best_len >= nice_len)
|
|
151
|
+
goto out;
|
|
152
|
+
matchptr = &in_base[cur_node];
|
|
153
|
+
if (load_u32_unaligned(matchptr) == seq &&
|
|
154
|
+
load_u32_unaligned(matchptr + best_len - 3) ==
|
|
155
|
+
load_u32_unaligned(in_next + best_len - 3)) {
|
|
156
|
+
len = lz_extend(in_next, matchptr, 4, max_len);
|
|
157
|
+
if (len > best_len) {
|
|
158
|
+
best_len = len;
|
|
159
|
+
best_matchptr = matchptr;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
} else {
|
|
163
|
+
if (cur_node <= cutoff)
|
|
164
|
+
goto out;
|
|
165
|
+
matchptr = &in_base[cur_node];
|
|
166
|
+
if (load_u32_unaligned(matchptr) == seq) {
|
|
167
|
+
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
|
168
|
+
best_matchptr = matchptr;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
#else
|
|
172
|
+
/* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
|
|
173
|
+
to_insert = cur_pos;
|
|
174
|
+
for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
|
|
175
|
+
cur_node = mf->hash_tab[hash][i];
|
|
176
|
+
mf->hash_tab[hash][i] = to_insert;
|
|
177
|
+
if (cur_node <= cutoff)
|
|
178
|
+
goto out;
|
|
179
|
+
matchptr = &in_base[cur_node];
|
|
180
|
+
if (load_u32_unaligned(matchptr) == seq) {
|
|
181
|
+
len = lz_extend(in_next, matchptr, 4, max_len);
|
|
182
|
+
if (len > best_len) {
|
|
183
|
+
best_len = len;
|
|
184
|
+
best_matchptr = matchptr;
|
|
185
|
+
if (best_len >= nice_len)
|
|
186
|
+
goto out;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
to_insert = cur_node;
|
|
190
|
+
}
|
|
191
|
+
#endif
|
|
192
|
+
out:
|
|
193
|
+
*offset_ret = in_next - best_matchptr;
|
|
194
|
+
return best_len;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
static forceinline void
|
|
198
|
+
ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
|
|
199
|
+
const u8 ** const in_base_p,
|
|
200
|
+
const u8 *in_next,
|
|
201
|
+
const u8 * const in_end,
|
|
202
|
+
const u32 count,
|
|
203
|
+
u32 * const next_hash)
|
|
204
|
+
{
|
|
205
|
+
s32 cur_pos = in_next - *in_base_p;
|
|
206
|
+
u32 hash;
|
|
207
|
+
u32 remaining = count;
|
|
208
|
+
int i;
|
|
209
|
+
|
|
210
|
+
if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
|
|
211
|
+
return;
|
|
212
|
+
|
|
213
|
+
if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
|
|
214
|
+
ht_matchfinder_slide_window(mf);
|
|
215
|
+
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
|
216
|
+
cur_pos -= MATCHFINDER_WINDOW_SIZE;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
hash = *next_hash;
|
|
220
|
+
do {
|
|
221
|
+
for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
|
|
222
|
+
mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
|
|
223
|
+
mf->hash_tab[hash][0] = cur_pos;
|
|
224
|
+
|
|
225
|
+
hash = lz_hash(get_unaligned_le32(++in_next),
|
|
226
|
+
HT_MATCHFINDER_HASH_ORDER);
|
|
227
|
+
cur_pos++;
|
|
228
|
+
} while (--remaining);
|
|
229
|
+
|
|
230
|
+
prefetchw(&mf->hash_tab[hash]);
|
|
231
|
+
*next_hash = hash;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
#endif /* LIB_HT_MATCHFINDER_H */
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* lib_common.h - internal header included by all library code
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#ifndef LIB_LIB_COMMON_H
|
|
6
|
+
#define LIB_LIB_COMMON_H
|
|
7
|
+
|
|
8
|
+
#ifdef LIBDEFLATE_H
|
|
9
|
+
/*
|
|
10
|
+
* When building the library, LIBDEFLATEAPI needs to be defined properly before
|
|
11
|
+
* including libdeflate.h.
|
|
12
|
+
*/
|
|
13
|
+
# error "lib_common.h must always be included before libdeflate.h"
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
|
|
17
|
+
# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
|
|
18
|
+
#elif defined(__GNUC__)
|
|
19
|
+
# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
|
|
20
|
+
#else
|
|
21
|
+
# define LIBDEFLATE_EXPORT_SYM
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
/*
|
|
25
|
+
* On i386, gcc assumes that the stack is 16-byte aligned at function entry.
|
|
26
|
+
* However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
|
|
27
|
+
* only guarantee 4-byte alignment when calling functions. This is mainly an
|
|
28
|
+
* issue on Windows, but it has been seen on Linux too. Work around this ABI
|
|
29
|
+
* incompatibility by realigning the stack pointer when entering libdeflate.
|
|
30
|
+
* This prevents crashes in SSE/AVX code.
|
|
31
|
+
*/
|
|
32
|
+
#if defined(__GNUC__) && defined(__i386__)
|
|
33
|
+
# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
|
|
34
|
+
#else
|
|
35
|
+
# define LIBDEFLATE_ALIGN_STACK
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
|
|
39
|
+
|
|
40
|
+
#include "../common_defs.h"
|
|
41
|
+
|
|
42
|
+
typedef void *(*malloc_func_t)(size_t);
|
|
43
|
+
typedef void (*free_func_t)(void *);
|
|
44
|
+
|
|
45
|
+
extern malloc_func_t libdeflate_default_malloc_func;
|
|
46
|
+
extern free_func_t libdeflate_default_free_func;
|
|
47
|
+
|
|
48
|
+
void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
|
|
49
|
+
size_t alignment, size_t size);
|
|
50
|
+
void libdeflate_aligned_free(free_func_t free_func, void *ptr);
|
|
51
|
+
|
|
52
|
+
#ifdef FREESTANDING
|
|
53
|
+
/*
|
|
54
|
+
* With -ffreestanding, <string.h> may be missing, and we must provide
|
|
55
|
+
* implementations of memset(), memcpy(), memmove(), and memcmp().
|
|
56
|
+
* See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
|
|
57
|
+
*
|
|
58
|
+
* Also, -ffreestanding disables interpreting calls to these functions as
|
|
59
|
+
* built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
|
|
60
|
+
* not be optimized to a single load instruction. For performance reasons we
|
|
61
|
+
* don't want that. So, declare these functions as macros that expand to the
|
|
62
|
+
* corresponding built-ins. This approach is recommended in the gcc man page.
|
|
63
|
+
* We still need the actual function definitions in case gcc calls them.
|
|
64
|
+
*/
|
|
65
|
+
void *memset(void *s, int c, size_t n);
|
|
66
|
+
#define memset(s, c, n) __builtin_memset((s), (c), (n))
|
|
67
|
+
|
|
68
|
+
void *memcpy(void *dest, const void *src, size_t n);
|
|
69
|
+
#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
|
|
70
|
+
|
|
71
|
+
void *memmove(void *dest, const void *src, size_t n);
|
|
72
|
+
#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
|
|
73
|
+
|
|
74
|
+
int memcmp(const void *s1, const void *s2, size_t n);
|
|
75
|
+
#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
|
|
76
|
+
|
|
77
|
+
#undef LIBDEFLATE_ENABLE_ASSERTIONS
|
|
78
|
+
#else
|
|
79
|
+
# include <string.h>
|
|
80
|
+
/*
|
|
81
|
+
* To prevent false positive static analyzer warnings, ensure that assertions
|
|
82
|
+
* are visible to the static analyzer.
|
|
83
|
+
*/
|
|
84
|
+
# ifdef __clang_analyzer__
|
|
85
|
+
# define LIBDEFLATE_ENABLE_ASSERTIONS
|
|
86
|
+
# endif
|
|
87
|
+
#endif
|
|
88
|
+
|
|
89
|
+
/*
|
|
90
|
+
* Runtime assertion support. Don't enable this in production builds; it may
|
|
91
|
+
* hurt performance significantly.
|
|
92
|
+
*/
|
|
93
|
+
#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
|
|
94
|
+
NORETURN void
|
|
95
|
+
libdeflate_assertion_failed(const char *expr, const char *file, int line);
|
|
96
|
+
#define ASSERT(expr) { if (unlikely(!(expr))) \
|
|
97
|
+
libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
|
|
98
|
+
#else
|
|
99
|
+
#define ASSERT(expr) (void)(expr)
|
|
100
|
+
#endif
|
|
101
|
+
|
|
102
|
+
#define CONCAT_IMPL(a, b) a##b
|
|
103
|
+
#define CONCAT(a, b) CONCAT_IMPL(a, b)
|
|
104
|
+
#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
|
|
105
|
+
|
|
106
|
+
#endif /* LIB_LIB_COMMON_H */
|
|
@@ -13,8 +13,8 @@ extern "C" {
|
|
|
13
13
|
#endif
|
|
14
14
|
|
|
15
15
|
#define LIBDEFLATE_VERSION_MAJOR 1
|
|
16
|
-
#define LIBDEFLATE_VERSION_MINOR
|
|
17
|
-
#define LIBDEFLATE_VERSION_STRING "1.
|
|
16
|
+
#define LIBDEFLATE_VERSION_MINOR 25
|
|
17
|
+
#define LIBDEFLATE_VERSION_STRING "1.25"
|
|
18
18
|
|
|
19
19
|
/*
|
|
20
20
|
* Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause
|
|
@@ -175,11 +175,11 @@ lz_hash(u32 seq, unsigned num_bits)
|
|
|
175
175
|
* Return the number of bytes at @matchptr that match the bytes at @strptr, up
|
|
176
176
|
* to a maximum of @max_len. Initially, @start_len bytes are matched.
|
|
177
177
|
*/
|
|
178
|
-
static forceinline
|
|
178
|
+
static forceinline u32
|
|
179
179
|
lz_extend(const u8 * const strptr, const u8 * const matchptr,
|
|
180
|
-
const
|
|
180
|
+
const u32 start_len, const u32 max_len)
|
|
181
181
|
{
|
|
182
|
-
|
|
182
|
+
u32 len = start_len;
|
|
183
183
|
machine_word_t v_word;
|
|
184
184
|
|
|
185
185
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2016 Eric Biggers
|
|
5
|
+
*
|
|
6
|
+
* Permission is hereby granted, free of charge, to any person
|
|
7
|
+
* obtaining a copy of this software and associated documentation
|
|
8
|
+
* files (the "Software"), to deal in the Software without
|
|
9
|
+
* restriction, including without limitation the rights to use,
|
|
10
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
* copies of the Software, and to permit persons to whom the
|
|
12
|
+
* Software is furnished to do so, subject to the following
|
|
13
|
+
* conditions:
|
|
14
|
+
*
|
|
15
|
+
* The above copyright notice and this permission notice shall be
|
|
16
|
+
* included in all copies or substantial portions of the Software.
|
|
17
|
+
*
|
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
19
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
20
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
21
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
22
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
23
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
24
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
25
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
#ifndef LIB_X86_ADLER32_IMPL_H
|
|
29
|
+
#define LIB_X86_ADLER32_IMPL_H
|
|
30
|
+
|
|
31
|
+
#include "cpu_features.h"
|
|
32
|
+
|
|
33
|
+
/* SSE2 and AVX2 implementations. Used on older CPUs. */
|
|
34
|
+
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
|
|
35
|
+
# define adler32_x86_sse2 adler32_x86_sse2
|
|
36
|
+
# define SUFFIX _sse2
|
|
37
|
+
# define ATTRIBUTES _target_attribute("sse2")
|
|
38
|
+
# define VL 16
|
|
39
|
+
# define USE_VNNI 0
|
|
40
|
+
# define USE_AVX512 0
|
|
41
|
+
# include "adler32_template.h"
|
|
42
|
+
|
|
43
|
+
# define adler32_x86_avx2 adler32_x86_avx2
|
|
44
|
+
# define SUFFIX _avx2
|
|
45
|
+
# define ATTRIBUTES _target_attribute("avx2")
|
|
46
|
+
# define VL 32
|
|
47
|
+
# define USE_VNNI 0
|
|
48
|
+
# define USE_AVX512 0
|
|
49
|
+
# include "adler32_template.h"
|
|
50
|
+
#endif
|
|
51
|
+
|
|
52
|
+
/*
|
|
53
|
+
* AVX-VNNI implementation. This is used on CPUs that have AVX2 and AVX-VNNI
|
|
54
|
+
* but don't have AVX-512, for example Intel Alder Lake.
|
|
55
|
+
*
|
|
56
|
+
* Unusually for a new CPU feature, gcc added support for the AVX-VNNI
|
|
57
|
+
* intrinsics (in gcc 11.1) slightly before binutils added support for
|
|
58
|
+
* assembling AVX-VNNI instructions (in binutils 2.36). Distros can reasonably
|
|
59
|
+
* have gcc 11 with binutils 2.35. Because of this issue, we check for gcc 12
|
|
60
|
+
* instead of gcc 11. (libdeflate supports direct compilation without a
|
|
61
|
+
* configure step, so checking the binutils version is not always an option.)
|
|
62
|
+
*/
|
|
63
|
+
#if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
|
|
64
|
+
!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
|
|
65
|
+
# define adler32_x86_avx2_vnni adler32_x86_avx2_vnni
|
|
66
|
+
# define SUFFIX _avx2_vnni
|
|
67
|
+
# define ATTRIBUTES _target_attribute("avx2,avxvnni")
|
|
68
|
+
# define VL 32
|
|
69
|
+
# define USE_VNNI 1
|
|
70
|
+
# define USE_AVX512 0
|
|
71
|
+
# include "adler32_template.h"
|
|
72
|
+
#endif
|
|
73
|
+
|
|
74
|
+
#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
|
|
75
|
+
!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
|
|
76
|
+
/*
|
|
77
|
+
* AVX512VNNI implementation using 256-bit vectors. This is very similar to the
|
|
78
|
+
* AVX-VNNI implementation but takes advantage of masking and more registers.
|
|
79
|
+
* This is used on certain older Intel CPUs, specifically Ice Lake and Tiger
|
|
80
|
+
* Lake, which support AVX512VNNI but downclock a bit too eagerly when ZMM
|
|
81
|
+
* registers are used.
|
|
82
|
+
*/
|
|
83
|
+
# define adler32_x86_avx512_vl256_vnni adler32_x86_avx512_vl256_vnni
|
|
84
|
+
# define SUFFIX _avx512_vl256_vnni
|
|
85
|
+
# define ATTRIBUTES _target_attribute("avx512bw,avx512vl,avx512vnni")
|
|
86
|
+
# define VL 32
|
|
87
|
+
# define USE_VNNI 1
|
|
88
|
+
# define USE_AVX512 1
|
|
89
|
+
# include "adler32_template.h"
|
|
90
|
+
|
|
91
|
+
/*
|
|
92
|
+
* AVX512VNNI implementation using 512-bit vectors. This is used on CPUs that
|
|
93
|
+
* have a good AVX-512 implementation including AVX512VNNI.
|
|
94
|
+
*/
|
|
95
|
+
# define adler32_x86_avx512_vl512_vnni adler32_x86_avx512_vl512_vnni
|
|
96
|
+
# define SUFFIX _avx512_vl512_vnni
|
|
97
|
+
# define ATTRIBUTES _target_attribute("avx512bw,avx512vnni")
|
|
98
|
+
# define VL 64
|
|
99
|
+
# define USE_VNNI 1
|
|
100
|
+
# define USE_AVX512 1
|
|
101
|
+
# include "adler32_template.h"
|
|
102
|
+
#endif
|
|
103
|
+
|
|
104
|
+
static inline adler32_func_t
|
|
105
|
+
arch_select_adler32_func(void)
|
|
106
|
+
{
|
|
107
|
+
const u32 features MAYBE_UNUSED = get_x86_cpu_features();
|
|
108
|
+
|
|
109
|
+
#ifdef adler32_x86_avx512_vl512_vnni
|
|
110
|
+
if ((features & X86_CPU_FEATURE_ZMM) &&
|
|
111
|
+
HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
|
|
112
|
+
return adler32_x86_avx512_vl512_vnni;
|
|
113
|
+
#endif
|
|
114
|
+
#ifdef adler32_x86_avx512_vl256_vnni
|
|
115
|
+
if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
|
|
116
|
+
HAVE_AVX512VNNI(features))
|
|
117
|
+
return adler32_x86_avx512_vl256_vnni;
|
|
118
|
+
#endif
|
|
119
|
+
#ifdef adler32_x86_avx2_vnni
|
|
120
|
+
if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
|
|
121
|
+
return adler32_x86_avx2_vnni;
|
|
122
|
+
#endif
|
|
123
|
+
#ifdef adler32_x86_avx2
|
|
124
|
+
if (HAVE_AVX2(features))
|
|
125
|
+
return adler32_x86_avx2;
|
|
126
|
+
#endif
|
|
127
|
+
#ifdef adler32_x86_sse2
|
|
128
|
+
if (HAVE_SSE2(features))
|
|
129
|
+
return adler32_x86_sse2;
|
|
130
|
+
#endif
|
|
131
|
+
return NULL;
|
|
132
|
+
}
|
|
133
|
+
#define arch_select_adler32_func arch_select_adler32_func
|
|
134
|
+
|
|
135
|
+
#endif /* LIB_X86_ADLER32_IMPL_H */
|