deflate-ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +138 -0
- data/LICENSE.txt +21 -0
- data/README.md +117 -0
- data/ext/deflate_ruby/deflate_ruby.c +301 -0
- data/ext/deflate_ruby/extconf.rb +34 -0
- data/ext/deflate_ruby/libdeflate/CMakeLists.txt +270 -0
- data/ext/deflate_ruby/libdeflate/COPYING +22 -0
- data/ext/deflate_ruby/libdeflate/NEWS.md +494 -0
- data/ext/deflate_ruby/libdeflate/README.md +228 -0
- data/ext/deflate_ruby/libdeflate/common_defs.h +747 -0
- data/ext/deflate_ruby/libdeflate/lib/adler32.c +162 -0
- data/ext/deflate_ruby/libdeflate/lib/arm/adler32_impl.h +358 -0
- data/ext/deflate_ruby/libdeflate/lib/arm/cpu_features.c +230 -0
- data/ext/deflate_ruby/libdeflate/lib/arm/cpu_features.h +214 -0
- data/ext/deflate_ruby/libdeflate/lib/arm/crc32_impl.h +600 -0
- data/ext/deflate_ruby/libdeflate/lib/arm/crc32_pmull_helpers.h +156 -0
- data/ext/deflate_ruby/libdeflate/lib/arm/crc32_pmull_wide.h +226 -0
- data/ext/deflate_ruby/libdeflate/lib/arm/matchfinder_impl.h +78 -0
- data/ext/deflate_ruby/libdeflate/lib/bt_matchfinder.h +342 -0
- data/ext/deflate_ruby/libdeflate/lib/cpu_features_common.h +93 -0
- data/ext/deflate_ruby/libdeflate/lib/crc32.c +262 -0
- data/ext/deflate_ruby/libdeflate/lib/crc32_multipliers.h +377 -0
- data/ext/deflate_ruby/libdeflate/lib/crc32_tables.h +587 -0
- data/ext/deflate_ruby/libdeflate/lib/decompress_template.h +777 -0
- data/ext/deflate_ruby/libdeflate/lib/deflate_compress.c +4129 -0
- data/ext/deflate_ruby/libdeflate/lib/deflate_compress.h +15 -0
- data/ext/deflate_ruby/libdeflate/lib/deflate_constants.h +56 -0
- data/ext/deflate_ruby/libdeflate/lib/deflate_decompress.c +1208 -0
- data/ext/deflate_ruby/libdeflate/lib/gzip_compress.c +90 -0
- data/ext/deflate_ruby/libdeflate/lib/gzip_constants.h +45 -0
- data/ext/deflate_ruby/libdeflate/lib/gzip_decompress.c +144 -0
- data/ext/deflate_ruby/libdeflate/lib/hc_matchfinder.h +401 -0
- data/ext/deflate_ruby/libdeflate/lib/ht_matchfinder.h +234 -0
- data/ext/deflate_ruby/libdeflate/lib/lib_common.h +106 -0
- data/ext/deflate_ruby/libdeflate/lib/matchfinder_common.h +224 -0
- data/ext/deflate_ruby/libdeflate/lib/riscv/matchfinder_impl.h +97 -0
- data/ext/deflate_ruby/libdeflate/lib/utils.c +141 -0
- data/ext/deflate_ruby/libdeflate/lib/x86/adler32_impl.h +134 -0
- data/ext/deflate_ruby/libdeflate/lib/x86/adler32_template.h +518 -0
- data/ext/deflate_ruby/libdeflate/lib/x86/cpu_features.c +183 -0
- data/ext/deflate_ruby/libdeflate/lib/x86/cpu_features.h +169 -0
- data/ext/deflate_ruby/libdeflate/lib/x86/crc32_impl.h +160 -0
- data/ext/deflate_ruby/libdeflate/lib/x86/crc32_pclmul_template.h +495 -0
- data/ext/deflate_ruby/libdeflate/lib/x86/decompress_impl.h +57 -0
- data/ext/deflate_ruby/libdeflate/lib/x86/matchfinder_impl.h +122 -0
- data/ext/deflate_ruby/libdeflate/lib/zlib_compress.c +82 -0
- data/ext/deflate_ruby/libdeflate/lib/zlib_constants.h +21 -0
- data/ext/deflate_ruby/libdeflate/lib/zlib_decompress.c +104 -0
- data/ext/deflate_ruby/libdeflate/libdeflate-config.cmake.in +3 -0
- data/ext/deflate_ruby/libdeflate/libdeflate.h +411 -0
- data/ext/deflate_ruby/libdeflate/libdeflate.pc.in +18 -0
- data/ext/deflate_ruby/libdeflate/programs/CMakeLists.txt +105 -0
- data/ext/deflate_ruby/libdeflate/programs/benchmark.c +696 -0
- data/ext/deflate_ruby/libdeflate/programs/checksum.c +218 -0
- data/ext/deflate_ruby/libdeflate/programs/config.h.in +19 -0
- data/ext/deflate_ruby/libdeflate/programs/gzip.c +688 -0
- data/ext/deflate_ruby/libdeflate/programs/prog_util.c +521 -0
- data/ext/deflate_ruby/libdeflate/programs/prog_util.h +225 -0
- data/ext/deflate_ruby/libdeflate/programs/test_checksums.c +200 -0
- data/ext/deflate_ruby/libdeflate/programs/test_custom_malloc.c +155 -0
- data/ext/deflate_ruby/libdeflate/programs/test_incomplete_codes.c +385 -0
- data/ext/deflate_ruby/libdeflate/programs/test_invalid_streams.c +130 -0
- data/ext/deflate_ruby/libdeflate/programs/test_litrunlen_overflow.c +72 -0
- data/ext/deflate_ruby/libdeflate/programs/test_overread.c +95 -0
- data/ext/deflate_ruby/libdeflate/programs/test_slow_decompression.c +472 -0
- data/ext/deflate_ruby/libdeflate/programs/test_trailing_bytes.c +151 -0
- data/ext/deflate_ruby/libdeflate/programs/test_util.c +237 -0
- data/ext/deflate_ruby/libdeflate/programs/test_util.h +61 -0
- data/ext/deflate_ruby/libdeflate/programs/tgetopt.c +118 -0
- data/ext/deflate_ruby/libdeflate/scripts/android_build.sh +118 -0
- data/ext/deflate_ruby/libdeflate/scripts/android_tests.sh +69 -0
- data/ext/deflate_ruby/libdeflate/scripts/benchmark.sh +10 -0
- data/ext/deflate_ruby/libdeflate/scripts/checksum.sh +10 -0
- data/ext/deflate_ruby/libdeflate/scripts/checksum_benchmarks.sh +253 -0
- data/ext/deflate_ruby/libdeflate/scripts/cmake-helper.sh +17 -0
- data/ext/deflate_ruby/libdeflate/scripts/deflate_benchmarks.sh +119 -0
- data/ext/deflate_ruby/libdeflate/scripts/exec_tests.sh +38 -0
- data/ext/deflate_ruby/libdeflate/scripts/gen-release-archives.sh +37 -0
- data/ext/deflate_ruby/libdeflate/scripts/gen_bitreverse_tab.py +19 -0
- data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_multipliers.c +199 -0
- data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_tables.c +105 -0
- data/ext/deflate_ruby/libdeflate/scripts/gen_default_litlen_costs.py +44 -0
- data/ext/deflate_ruby/libdeflate/scripts/gen_offset_slot_map.py +29 -0
- data/ext/deflate_ruby/libdeflate/scripts/gzip_tests.sh +523 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/corpus/0 +0 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/fuzz.c +95 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/corpus/0 +3 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/fuzz.c +62 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/fuzz.sh +108 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/corpus/0 +0 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/fuzz.c +19 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/corpus/0 +3 -0
- data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/fuzz.c +19 -0
- data/ext/deflate_ruby/libdeflate/scripts/run_tests.sh +416 -0
- data/ext/deflate_ruby/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake +8 -0
- data/ext/deflate_ruby/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake +8 -0
- data/lib/deflate_ruby/version.rb +5 -0
- data/lib/deflate_ruby.rb +71 -0
- metadata +191 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2022 Eric Biggers
|
|
5
|
+
*
|
|
6
|
+
* Permission is hereby granted, free of charge, to any person
|
|
7
|
+
* obtaining a copy of this software and associated documentation
|
|
8
|
+
* files (the "Software"), to deal in the Software without
|
|
9
|
+
* restriction, including without limitation the rights to use,
|
|
10
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
* copies of the Software, and to permit persons to whom the
|
|
12
|
+
* Software is furnished to do so, subject to the following
|
|
13
|
+
* conditions:
|
|
14
|
+
*
|
|
15
|
+
* The above copyright notice and this permission notice shall be
|
|
16
|
+
* included in all copies or substantial portions of the Software.
|
|
17
|
+
*
|
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
19
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
20
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
21
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
22
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
23
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
24
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
25
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
|
26
|
+
*
|
|
27
|
+
* ---------------------------------------------------------------------------
|
|
28
|
+
*
|
|
29
|
+
* This is a Hash Table (ht) matchfinder.
|
|
30
|
+
*
|
|
31
|
+
* This is a variant of the Hash Chains (hc) matchfinder that is optimized for
|
|
32
|
+
* very fast compression. The ht_matchfinder stores the hash chains inline in
|
|
33
|
+
* the hash table, whereas the hc_matchfinder stores them in a separate array.
|
|
34
|
+
* Storing the hash chains inline is the faster method when max_search_depth
|
|
35
|
+
* (the maximum chain length) is very small. It is not appropriate when
|
|
36
|
+
* max_search_depth is larger, as then it uses too much memory.
|
|
37
|
+
*
|
|
38
|
+
* Due to its focus on speed, the ht_matchfinder doesn't support length 3
|
|
39
|
+
* matches. It also doesn't allow max_search_depth to vary at runtime; it is
|
|
40
|
+
* fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
|
|
41
|
+
*
|
|
42
|
+
* See hc_matchfinder.h for more information.
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
#ifndef LIB_HT_MATCHFINDER_H
|
|
46
|
+
#define LIB_HT_MATCHFINDER_H
|
|
47
|
+
|
|
48
|
+
#include "matchfinder_common.h"
|
|
49
|
+
|
|
50
|
+
#define HT_MATCHFINDER_HASH_ORDER 15
|
|
51
|
+
#define HT_MATCHFINDER_BUCKET_SIZE 2
|
|
52
|
+
|
|
53
|
+
#define HT_MATCHFINDER_MIN_MATCH_LEN 4
|
|
54
|
+
/* Minimum value of max_len for ht_matchfinder_longest_match() */
|
|
55
|
+
#define HT_MATCHFINDER_REQUIRED_NBYTES 5
|
|
56
|
+
|
|
57
|
+
struct MATCHFINDER_ALIGNED ht_matchfinder {
|
|
58
|
+
mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
|
|
59
|
+
[HT_MATCHFINDER_BUCKET_SIZE];
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
static forceinline void
|
|
63
|
+
ht_matchfinder_init(struct ht_matchfinder *mf)
|
|
64
|
+
{
|
|
65
|
+
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
|
|
66
|
+
|
|
67
|
+
matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
static forceinline void
|
|
71
|
+
ht_matchfinder_slide_window(struct ht_matchfinder *mf)
|
|
72
|
+
{
|
|
73
|
+
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
|
|
77
|
+
static forceinline u32
|
|
78
|
+
ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
|
|
79
|
+
const u8 ** const in_base_p,
|
|
80
|
+
const u8 * const in_next,
|
|
81
|
+
const u32 max_len,
|
|
82
|
+
const u32 nice_len,
|
|
83
|
+
u32 * const next_hash,
|
|
84
|
+
u32 * const offset_ret)
|
|
85
|
+
{
|
|
86
|
+
u32 best_len = 0;
|
|
87
|
+
const u8 *best_matchptr = in_next;
|
|
88
|
+
u32 cur_pos = in_next - *in_base_p;
|
|
89
|
+
const u8 *in_base;
|
|
90
|
+
mf_pos_t cutoff;
|
|
91
|
+
u32 hash;
|
|
92
|
+
u32 seq;
|
|
93
|
+
mf_pos_t cur_node;
|
|
94
|
+
const u8 *matchptr;
|
|
95
|
+
#if HT_MATCHFINDER_BUCKET_SIZE > 1
|
|
96
|
+
mf_pos_t to_insert;
|
|
97
|
+
u32 len;
|
|
98
|
+
#endif
|
|
99
|
+
#if HT_MATCHFINDER_BUCKET_SIZE > 2
|
|
100
|
+
int i;
|
|
101
|
+
#endif
|
|
102
|
+
|
|
103
|
+
/* This is assumed throughout this function. */
|
|
104
|
+
STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
|
|
105
|
+
|
|
106
|
+
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
|
|
107
|
+
ht_matchfinder_slide_window(mf);
|
|
108
|
+
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
|
109
|
+
cur_pos = 0;
|
|
110
|
+
}
|
|
111
|
+
in_base = *in_base_p;
|
|
112
|
+
cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
|
|
113
|
+
|
|
114
|
+
hash = *next_hash;
|
|
115
|
+
STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
|
|
116
|
+
*next_hash = lz_hash(get_unaligned_le32(in_next + 1),
|
|
117
|
+
HT_MATCHFINDER_HASH_ORDER);
|
|
118
|
+
seq = load_u32_unaligned(in_next);
|
|
119
|
+
prefetchw(&mf->hash_tab[*next_hash]);
|
|
120
|
+
#if HT_MATCHFINDER_BUCKET_SIZE == 1
|
|
121
|
+
/* Hand-unrolled version for BUCKET_SIZE == 1 */
|
|
122
|
+
cur_node = mf->hash_tab[hash][0];
|
|
123
|
+
mf->hash_tab[hash][0] = cur_pos;
|
|
124
|
+
if (cur_node <= cutoff)
|
|
125
|
+
goto out;
|
|
126
|
+
matchptr = &in_base[cur_node];
|
|
127
|
+
if (load_u32_unaligned(matchptr) == seq) {
|
|
128
|
+
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
|
129
|
+
best_matchptr = matchptr;
|
|
130
|
+
}
|
|
131
|
+
#elif HT_MATCHFINDER_BUCKET_SIZE == 2
|
|
132
|
+
/*
|
|
133
|
+
* Hand-unrolled version for BUCKET_SIZE == 2. The logic here also
|
|
134
|
+
* differs slightly in that it copies the first entry to the second even
|
|
135
|
+
* if nice_len is reached on the first, as this can be slightly faster.
|
|
136
|
+
*/
|
|
137
|
+
cur_node = mf->hash_tab[hash][0];
|
|
138
|
+
mf->hash_tab[hash][0] = cur_pos;
|
|
139
|
+
if (cur_node <= cutoff)
|
|
140
|
+
goto out;
|
|
141
|
+
matchptr = &in_base[cur_node];
|
|
142
|
+
|
|
143
|
+
to_insert = cur_node;
|
|
144
|
+
cur_node = mf->hash_tab[hash][1];
|
|
145
|
+
mf->hash_tab[hash][1] = to_insert;
|
|
146
|
+
|
|
147
|
+
if (load_u32_unaligned(matchptr) == seq) {
|
|
148
|
+
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
|
149
|
+
best_matchptr = matchptr;
|
|
150
|
+
if (cur_node <= cutoff || best_len >= nice_len)
|
|
151
|
+
goto out;
|
|
152
|
+
matchptr = &in_base[cur_node];
|
|
153
|
+
if (load_u32_unaligned(matchptr) == seq &&
|
|
154
|
+
load_u32_unaligned(matchptr + best_len - 3) ==
|
|
155
|
+
load_u32_unaligned(in_next + best_len - 3)) {
|
|
156
|
+
len = lz_extend(in_next, matchptr, 4, max_len);
|
|
157
|
+
if (len > best_len) {
|
|
158
|
+
best_len = len;
|
|
159
|
+
best_matchptr = matchptr;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
} else {
|
|
163
|
+
if (cur_node <= cutoff)
|
|
164
|
+
goto out;
|
|
165
|
+
matchptr = &in_base[cur_node];
|
|
166
|
+
if (load_u32_unaligned(matchptr) == seq) {
|
|
167
|
+
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
|
168
|
+
best_matchptr = matchptr;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
#else
|
|
172
|
+
/* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
|
|
173
|
+
to_insert = cur_pos;
|
|
174
|
+
for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
|
|
175
|
+
cur_node = mf->hash_tab[hash][i];
|
|
176
|
+
mf->hash_tab[hash][i] = to_insert;
|
|
177
|
+
if (cur_node <= cutoff)
|
|
178
|
+
goto out;
|
|
179
|
+
matchptr = &in_base[cur_node];
|
|
180
|
+
if (load_u32_unaligned(matchptr) == seq) {
|
|
181
|
+
len = lz_extend(in_next, matchptr, 4, max_len);
|
|
182
|
+
if (len > best_len) {
|
|
183
|
+
best_len = len;
|
|
184
|
+
best_matchptr = matchptr;
|
|
185
|
+
if (best_len >= nice_len)
|
|
186
|
+
goto out;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
to_insert = cur_node;
|
|
190
|
+
}
|
|
191
|
+
#endif
|
|
192
|
+
out:
|
|
193
|
+
*offset_ret = in_next - best_matchptr;
|
|
194
|
+
return best_len;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
static forceinline void
|
|
198
|
+
ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
|
|
199
|
+
const u8 ** const in_base_p,
|
|
200
|
+
const u8 *in_next,
|
|
201
|
+
const u8 * const in_end,
|
|
202
|
+
const u32 count,
|
|
203
|
+
u32 * const next_hash)
|
|
204
|
+
{
|
|
205
|
+
s32 cur_pos = in_next - *in_base_p;
|
|
206
|
+
u32 hash;
|
|
207
|
+
u32 remaining = count;
|
|
208
|
+
int i;
|
|
209
|
+
|
|
210
|
+
if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
|
|
211
|
+
return;
|
|
212
|
+
|
|
213
|
+
if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
|
|
214
|
+
ht_matchfinder_slide_window(mf);
|
|
215
|
+
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
|
216
|
+
cur_pos -= MATCHFINDER_WINDOW_SIZE;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
hash = *next_hash;
|
|
220
|
+
do {
|
|
221
|
+
for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
|
|
222
|
+
mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
|
|
223
|
+
mf->hash_tab[hash][0] = cur_pos;
|
|
224
|
+
|
|
225
|
+
hash = lz_hash(get_unaligned_le32(++in_next),
|
|
226
|
+
HT_MATCHFINDER_HASH_ORDER);
|
|
227
|
+
cur_pos++;
|
|
228
|
+
} while (--remaining);
|
|
229
|
+
|
|
230
|
+
prefetchw(&mf->hash_tab[hash]);
|
|
231
|
+
*next_hash = hash;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
#endif /* LIB_HT_MATCHFINDER_H */
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* lib_common.h - internal header included by all library code
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#ifndef LIB_LIB_COMMON_H
|
|
6
|
+
#define LIB_LIB_COMMON_H
|
|
7
|
+
|
|
8
|
+
#ifdef LIBDEFLATE_H
|
|
9
|
+
/*
|
|
10
|
+
* When building the library, LIBDEFLATEAPI needs to be defined properly before
|
|
11
|
+
* including libdeflate.h.
|
|
12
|
+
*/
|
|
13
|
+
# error "lib_common.h must always be included before libdeflate.h"
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
|
|
17
|
+
# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
|
|
18
|
+
#elif defined(__GNUC__)
|
|
19
|
+
# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
|
|
20
|
+
#else
|
|
21
|
+
# define LIBDEFLATE_EXPORT_SYM
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
/*
|
|
25
|
+
* On i386, gcc assumes that the stack is 16-byte aligned at function entry.
|
|
26
|
+
* However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
|
|
27
|
+
* only guarantee 4-byte alignment when calling functions. This is mainly an
|
|
28
|
+
* issue on Windows, but it has been seen on Linux too. Work around this ABI
|
|
29
|
+
* incompatibility by realigning the stack pointer when entering libdeflate.
|
|
30
|
+
* This prevents crashes in SSE/AVX code.
|
|
31
|
+
*/
|
|
32
|
+
#if defined(__GNUC__) && defined(__i386__)
|
|
33
|
+
# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
|
|
34
|
+
#else
|
|
35
|
+
# define LIBDEFLATE_ALIGN_STACK
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
|
|
39
|
+
|
|
40
|
+
#include "../common_defs.h"
|
|
41
|
+
|
|
42
|
+
typedef void *(*malloc_func_t)(size_t);
|
|
43
|
+
typedef void (*free_func_t)(void *);
|
|
44
|
+
|
|
45
|
+
extern malloc_func_t libdeflate_default_malloc_func;
|
|
46
|
+
extern free_func_t libdeflate_default_free_func;
|
|
47
|
+
|
|
48
|
+
void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
|
|
49
|
+
size_t alignment, size_t size);
|
|
50
|
+
void libdeflate_aligned_free(free_func_t free_func, void *ptr);
|
|
51
|
+
|
|
52
|
+
#ifdef FREESTANDING
|
|
53
|
+
/*
|
|
54
|
+
* With -ffreestanding, <string.h> may be missing, and we must provide
|
|
55
|
+
* implementations of memset(), memcpy(), memmove(), and memcmp().
|
|
56
|
+
* See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
|
|
57
|
+
*
|
|
58
|
+
* Also, -ffreestanding disables interpreting calls to these functions as
|
|
59
|
+
* built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
|
|
60
|
+
* not be optimized to a single load instruction. For performance reasons we
|
|
61
|
+
* don't want that. So, declare these functions as macros that expand to the
|
|
62
|
+
* corresponding built-ins. This approach is recommended in the gcc man page.
|
|
63
|
+
* We still need the actual function definitions in case gcc calls them.
|
|
64
|
+
*/
|
|
65
|
+
void *memset(void *s, int c, size_t n);
|
|
66
|
+
#define memset(s, c, n) __builtin_memset((s), (c), (n))
|
|
67
|
+
|
|
68
|
+
void *memcpy(void *dest, const void *src, size_t n);
|
|
69
|
+
#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
|
|
70
|
+
|
|
71
|
+
void *memmove(void *dest, const void *src, size_t n);
|
|
72
|
+
#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
|
|
73
|
+
|
|
74
|
+
int memcmp(const void *s1, const void *s2, size_t n);
|
|
75
|
+
#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
|
|
76
|
+
|
|
77
|
+
#undef LIBDEFLATE_ENABLE_ASSERTIONS
|
|
78
|
+
#else
|
|
79
|
+
# include <string.h>
|
|
80
|
+
/*
|
|
81
|
+
* To prevent false positive static analyzer warnings, ensure that assertions
|
|
82
|
+
* are visible to the static analyzer.
|
|
83
|
+
*/
|
|
84
|
+
# ifdef __clang_analyzer__
|
|
85
|
+
# define LIBDEFLATE_ENABLE_ASSERTIONS
|
|
86
|
+
# endif
|
|
87
|
+
#endif
|
|
88
|
+
|
|
89
|
+
/*
|
|
90
|
+
* Runtime assertion support. Don't enable this in production builds; it may
|
|
91
|
+
* hurt performance significantly.
|
|
92
|
+
*/
|
|
93
|
+
#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
|
|
94
|
+
NORETURN void
|
|
95
|
+
libdeflate_assertion_failed(const char *expr, const char *file, int line);
|
|
96
|
+
#define ASSERT(expr) { if (unlikely(!(expr))) \
|
|
97
|
+
libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
|
|
98
|
+
#else
|
|
99
|
+
#define ASSERT(expr) (void)(expr)
|
|
100
|
+
#endif
|
|
101
|
+
|
|
102
|
+
#define CONCAT_IMPL(a, b) a##b
|
|
103
|
+
#define CONCAT(a, b) CONCAT_IMPL(a, b)
|
|
104
|
+
#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
|
|
105
|
+
|
|
106
|
+
#endif /* LIB_LIB_COMMON_H */
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* matchfinder_common.h - common code for Lempel-Ziv matchfinding
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#ifndef LIB_MATCHFINDER_COMMON_H
|
|
6
|
+
#define LIB_MATCHFINDER_COMMON_H
|
|
7
|
+
|
|
8
|
+
#include "lib_common.h"
|
|
9
|
+
|
|
10
|
+
#ifndef MATCHFINDER_WINDOW_ORDER
|
|
11
|
+
# error "MATCHFINDER_WINDOW_ORDER must be defined!"
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
/*
|
|
15
|
+
* Given a 32-bit value that was loaded with the platform's native endianness,
|
|
16
|
+
* return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
|
|
17
|
+
* bits contain the first 3 bytes, arranged in octets in a platform-dependent
|
|
18
|
+
* order, at the memory location from which the input 32-bit value was loaded.
|
|
19
|
+
*/
|
|
20
|
+
static forceinline u32
|
|
21
|
+
loaded_u32_to_u24(u32 v)
|
|
22
|
+
{
|
|
23
|
+
if (CPU_IS_LITTLE_ENDIAN())
|
|
24
|
+
return v & 0xFFFFFF;
|
|
25
|
+
else
|
|
26
|
+
return v >> 8;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/*
|
|
30
|
+
* Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value.
|
|
31
|
+
* The order in which the 3 bytes will be arranged as octets in the 24 bits is
|
|
32
|
+
* platform-dependent. At least 4 bytes (not 3) must be available at @p.
|
|
33
|
+
*/
|
|
34
|
+
static forceinline u32
|
|
35
|
+
load_u24_unaligned(const u8 *p)
|
|
36
|
+
{
|
|
37
|
+
#if UNALIGNED_ACCESS_IS_FAST
|
|
38
|
+
return loaded_u32_to_u24(load_u32_unaligned(p));
|
|
39
|
+
#else
|
|
40
|
+
if (CPU_IS_LITTLE_ENDIAN())
|
|
41
|
+
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
|
|
42
|
+
else
|
|
43
|
+
return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
|
|
44
|
+
#endif
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
|
|
48
|
+
|
|
49
|
+
typedef s16 mf_pos_t;
|
|
50
|
+
|
|
51
|
+
#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
|
|
52
|
+
|
|
53
|
+
/*
|
|
54
|
+
* This is the memory address alignment, in bytes, required for the matchfinder
|
|
55
|
+
* buffers by the architecture-specific implementations of matchfinder_init()
|
|
56
|
+
* and matchfinder_rebase(). "Matchfinder buffer" means an entire struct
|
|
57
|
+
* hc_matchfinder, bt_matchfinder, or ht_matchfinder; the next_tab field of
|
|
58
|
+
* struct hc_matchfinder; or the child_tab field of struct bt_matchfinder.
|
|
59
|
+
*
|
|
60
|
+
* This affects how the entire 'struct deflate_compressor' is allocated, since
|
|
61
|
+
* the matchfinder structures are embedded inside it.
|
|
62
|
+
*
|
|
63
|
+
* Currently the maximum memory address alignment required is 32 bytes, needed
|
|
64
|
+
* by the AVX-2 matchfinder functions.
|
|
65
|
+
*/
|
|
66
|
+
#define MATCHFINDER_MEM_ALIGNMENT 32
|
|
67
|
+
|
|
68
|
+
/*
|
|
69
|
+
* This declares a size, in bytes, that is guaranteed to divide the sizes of the
|
|
70
|
+
* matchfinder buffers (where "matchfinder buffers" is as defined for
|
|
71
|
+
* MATCHFINDER_MEM_ALIGNMENT). The architecture-specific implementations of
|
|
72
|
+
* matchfinder_init() and matchfinder_rebase() take advantage of this value.
|
|
73
|
+
*
|
|
74
|
+
* Currently the maximum size alignment required is 128 bytes, needed by
|
|
75
|
+
* the AVX-2 matchfinder functions. However, the RISC-V Vector Extension
|
|
76
|
+
* matchfinder functions can, in principle, take advantage of a larger size
|
|
77
|
+
* alignment. Therefore, we set this to 1024, which still easily divides the
|
|
78
|
+
* actual sizes that result from the current matchfinder struct definitions.
|
|
79
|
+
* This value can safely be changed to any power of two that is >= 128.
|
|
80
|
+
*/
|
|
81
|
+
#define MATCHFINDER_SIZE_ALIGNMENT 1024
|
|
82
|
+
|
|
83
|
+
#undef matchfinder_init
|
|
84
|
+
#undef matchfinder_rebase
|
|
85
|
+
#ifdef _aligned_attribute
|
|
86
|
+
# define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
|
|
87
|
+
# if defined(ARCH_ARM32) || defined(ARCH_ARM64)
|
|
88
|
+
# include "arm/matchfinder_impl.h"
|
|
89
|
+
# elif defined(ARCH_RISCV)
|
|
90
|
+
# include "riscv/matchfinder_impl.h"
|
|
91
|
+
# elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
|
|
92
|
+
# include "x86/matchfinder_impl.h"
|
|
93
|
+
# endif
|
|
94
|
+
#else
|
|
95
|
+
# define MATCHFINDER_ALIGNED
|
|
96
|
+
#endif
|
|
97
|
+
|
|
98
|
+
/*
|
|
99
|
+
* Initialize the hash table portion of the matchfinder.
|
|
100
|
+
*
|
|
101
|
+
* Essentially, this is an optimized memset().
|
|
102
|
+
*
|
|
103
|
+
* 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
|
|
104
|
+
* 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
|
|
105
|
+
*/
|
|
106
|
+
#ifndef matchfinder_init
|
|
107
|
+
static forceinline void
|
|
108
|
+
matchfinder_init(mf_pos_t *data, size_t size)
|
|
109
|
+
{
|
|
110
|
+
size_t num_entries = size / sizeof(*data);
|
|
111
|
+
size_t i;
|
|
112
|
+
|
|
113
|
+
for (i = 0; i < num_entries; i++)
|
|
114
|
+
data[i] = MATCHFINDER_INITVAL;
|
|
115
|
+
}
|
|
116
|
+
#endif
|
|
117
|
+
|
|
118
|
+
/*
|
|
119
|
+
* Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes.
|
|
120
|
+
*
|
|
121
|
+
* This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been
|
|
122
|
+
* run through the matchfinder.
|
|
123
|
+
*
|
|
124
|
+
* This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given
|
|
125
|
+
* array, making the entries be relative to the current position rather than the
|
|
126
|
+
* position MATCHFINDER_WINDOW_SIZE bytes prior. To avoid integer underflows,
|
|
127
|
+
* entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at
|
|
128
|
+
* -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds.
|
|
129
|
+
*
|
|
130
|
+
* The given array must contain all matchfinder data that is position-relative:
|
|
131
|
+
* the hash table(s) as well as any hash chain or binary tree links. Its
|
|
132
|
+
* address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size
|
|
133
|
+
* must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
|
|
134
|
+
*/
|
|
135
|
+
#ifndef matchfinder_rebase
|
|
136
|
+
static forceinline void
|
|
137
|
+
matchfinder_rebase(mf_pos_t *data, size_t size)
|
|
138
|
+
{
|
|
139
|
+
size_t num_entries = size / sizeof(*data);
|
|
140
|
+
size_t i;
|
|
141
|
+
|
|
142
|
+
if (MATCHFINDER_WINDOW_SIZE == 32768) {
|
|
143
|
+
/*
|
|
144
|
+
* Branchless version for 32768-byte windows. Clear all bits if
|
|
145
|
+
* the value was already negative, then set the sign bit. This
|
|
146
|
+
* is equivalent to subtracting 32768 with signed saturation.
|
|
147
|
+
*/
|
|
148
|
+
for (i = 0; i < num_entries; i++)
|
|
149
|
+
data[i] = 0x8000 | (data[i] & ~(data[i] >> 15));
|
|
150
|
+
} else {
|
|
151
|
+
for (i = 0; i < num_entries; i++) {
|
|
152
|
+
if (data[i] >= 0)
|
|
153
|
+
data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
|
|
154
|
+
else
|
|
155
|
+
data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
#endif
|
|
160
|
+
|
|
161
|
+
/*
|
|
162
|
+
* The hash function: given a sequence prefix held in the low-order bits of a
|
|
163
|
+
* 32-bit value, multiply by a carefully-chosen large constant. Discard any
|
|
164
|
+
* bits of the product that don't fit in a 32-bit value, but take the
|
|
165
|
+
* next-highest @num_bits bits of the product as the hash value, as those have
|
|
166
|
+
* the most randomness.
|
|
167
|
+
*/
|
|
168
|
+
static forceinline u32
|
|
169
|
+
lz_hash(u32 seq, unsigned num_bits)
|
|
170
|
+
{
|
|
171
|
+
return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/*
|
|
175
|
+
* Return the number of bytes at @matchptr that match the bytes at @strptr, up
|
|
176
|
+
* to a maximum of @max_len. Initially, @start_len bytes are matched.
|
|
177
|
+
*/
|
|
178
|
+
static forceinline unsigned
|
|
179
|
+
lz_extend(const u8 * const strptr, const u8 * const matchptr,
|
|
180
|
+
const unsigned start_len, const unsigned max_len)
|
|
181
|
+
{
|
|
182
|
+
unsigned len = start_len;
|
|
183
|
+
machine_word_t v_word;
|
|
184
|
+
|
|
185
|
+
if (UNALIGNED_ACCESS_IS_FAST) {
|
|
186
|
+
|
|
187
|
+
if (likely(max_len - len >= 4 * WORDBYTES)) {
|
|
188
|
+
|
|
189
|
+
#define COMPARE_WORD_STEP \
|
|
190
|
+
v_word = load_word_unaligned(&matchptr[len]) ^ \
|
|
191
|
+
load_word_unaligned(&strptr[len]); \
|
|
192
|
+
if (v_word != 0) \
|
|
193
|
+
goto word_differs; \
|
|
194
|
+
len += WORDBYTES; \
|
|
195
|
+
|
|
196
|
+
COMPARE_WORD_STEP
|
|
197
|
+
COMPARE_WORD_STEP
|
|
198
|
+
COMPARE_WORD_STEP
|
|
199
|
+
COMPARE_WORD_STEP
|
|
200
|
+
#undef COMPARE_WORD_STEP
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
while (len + WORDBYTES <= max_len) {
|
|
204
|
+
v_word = load_word_unaligned(&matchptr[len]) ^
|
|
205
|
+
load_word_unaligned(&strptr[len]);
|
|
206
|
+
if (v_word != 0)
|
|
207
|
+
goto word_differs;
|
|
208
|
+
len += WORDBYTES;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
while (len < max_len && matchptr[len] == strptr[len])
|
|
213
|
+
len++;
|
|
214
|
+
return len;
|
|
215
|
+
|
|
216
|
+
word_differs:
|
|
217
|
+
if (CPU_IS_LITTLE_ENDIAN())
|
|
218
|
+
len += (bsfw(v_word) >> 3);
|
|
219
|
+
else
|
|
220
|
+
len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
|
|
221
|
+
return len;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
#endif /* LIB_MATCHFINDER_COMMON_H */
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* riscv/matchfinder_impl.h - RISC-V implementations of matchfinder functions
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2024 Eric Biggers
|
|
5
|
+
*
|
|
6
|
+
* Permission is hereby granted, free of charge, to any person
|
|
7
|
+
* obtaining a copy of this software and associated documentation
|
|
8
|
+
* files (the "Software"), to deal in the Software without
|
|
9
|
+
* restriction, including without limitation the rights to use,
|
|
10
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
* copies of the Software, and to permit persons to whom the
|
|
12
|
+
* Software is furnished to do so, subject to the following
|
|
13
|
+
* conditions:
|
|
14
|
+
*
|
|
15
|
+
* The above copyright notice and this permission notice shall be
|
|
16
|
+
* included in all copies or substantial portions of the Software.
|
|
17
|
+
*
|
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
19
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
20
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
21
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
22
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
23
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
24
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
25
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
#ifndef LIB_RISCV_MATCHFINDER_IMPL_H
|
|
29
|
+
#define LIB_RISCV_MATCHFINDER_IMPL_H
|
|
30
|
+
|
|
31
|
+
#if defined(ARCH_RISCV) && defined(__riscv_vector)
|
|
32
|
+
#include <riscv_vector.h>
|
|
33
|
+
|
|
34
|
+
/*
|
|
35
|
+
* Return the maximum number of 16-bit (mf_pos_t) elements that fit in 8 RISC-V
|
|
36
|
+
* vector registers and also evenly divide the sizes of the matchfinder buffers.
|
|
37
|
+
*/
|
|
38
|
+
static forceinline size_t
|
|
39
|
+
riscv_matchfinder_vl(void)
|
|
40
|
+
{
|
|
41
|
+
const size_t vl = __riscv_vsetvlmax_e16m8();
|
|
42
|
+
|
|
43
|
+
STATIC_ASSERT(sizeof(mf_pos_t) == sizeof(s16));
|
|
44
|
+
/*
|
|
45
|
+
* MATCHFINDER_SIZE_ALIGNMENT is a power of 2, as is 'vl' because the
|
|
46
|
+
* RISC-V Vector Extension requires that the vector register length
|
|
47
|
+
* (VLEN) be a power of 2. Thus, a simple MIN() gives the correct
|
|
48
|
+
* answer here; rounding to a power of 2 is not required.
|
|
49
|
+
*/
|
|
50
|
+
STATIC_ASSERT((MATCHFINDER_SIZE_ALIGNMENT &
|
|
51
|
+
(MATCHFINDER_SIZE_ALIGNMENT - 1)) == 0);
|
|
52
|
+
ASSERT((vl & (vl - 1)) == 0);
|
|
53
|
+
return MIN(vl, MATCHFINDER_SIZE_ALIGNMENT / sizeof(mf_pos_t));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/* matchfinder_init() optimized using the RISC-V Vector Extension */
|
|
57
|
+
static forceinline void
|
|
58
|
+
matchfinder_init_rvv(mf_pos_t *p, size_t size)
|
|
59
|
+
{
|
|
60
|
+
const size_t vl = riscv_matchfinder_vl();
|
|
61
|
+
const vint16m8_t v = __riscv_vmv_v_x_i16m8(MATCHFINDER_INITVAL, vl);
|
|
62
|
+
|
|
63
|
+
ASSERT(size > 0 && size % (vl * sizeof(p[0])) == 0);
|
|
64
|
+
do {
|
|
65
|
+
__riscv_vse16_v_i16m8(p, v, vl);
|
|
66
|
+
p += vl;
|
|
67
|
+
size -= vl * sizeof(p[0]);
|
|
68
|
+
} while (size != 0);
|
|
69
|
+
}
|
|
70
|
+
#define matchfinder_init matchfinder_init_rvv
|
|
71
|
+
|
|
72
|
+
/* matchfinder_rebase() optimized using the RISC-V Vector Extension */
|
|
73
|
+
static forceinline void
|
|
74
|
+
matchfinder_rebase_rvv(mf_pos_t *p, size_t size)
|
|
75
|
+
{
|
|
76
|
+
const size_t vl = riscv_matchfinder_vl();
|
|
77
|
+
|
|
78
|
+
ASSERT(size > 0 && size % (vl * sizeof(p[0])) == 0);
|
|
79
|
+
do {
|
|
80
|
+
vint16m8_t v = __riscv_vle16_v_i16m8(p, vl);
|
|
81
|
+
|
|
82
|
+
/*
|
|
83
|
+
* This should generate the vsadd.vx instruction
|
|
84
|
+
* (Vector Saturating Add, integer vector-scalar)
|
|
85
|
+
*/
|
|
86
|
+
v = __riscv_vsadd_vx_i16m8(v, (s16)-MATCHFINDER_WINDOW_SIZE,
|
|
87
|
+
vl);
|
|
88
|
+
__riscv_vse16_v_i16m8(p, v, vl);
|
|
89
|
+
p += vl;
|
|
90
|
+
size -= vl * sizeof(p[0]);
|
|
91
|
+
} while (size != 0);
|
|
92
|
+
}
|
|
93
|
+
#define matchfinder_rebase matchfinder_rebase_rvv
|
|
94
|
+
|
|
95
|
+
#endif /* ARCH_RISCV && __riscv_vector */
|
|
96
|
+
|
|
97
|
+
#endif /* LIB_RISCV_MATCHFINDER_IMPL_H */
|