deflate-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +138 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +117 -0
  5. data/ext/deflate_ruby/deflate_ruby.c +301 -0
  6. data/ext/deflate_ruby/extconf.rb +34 -0
  7. data/ext/deflate_ruby/libdeflate/CMakeLists.txt +270 -0
  8. data/ext/deflate_ruby/libdeflate/COPYING +22 -0
  9. data/ext/deflate_ruby/libdeflate/NEWS.md +494 -0
  10. data/ext/deflate_ruby/libdeflate/README.md +228 -0
  11. data/ext/deflate_ruby/libdeflate/common_defs.h +747 -0
  12. data/ext/deflate_ruby/libdeflate/lib/adler32.c +162 -0
  13. data/ext/deflate_ruby/libdeflate/lib/arm/adler32_impl.h +358 -0
  14. data/ext/deflate_ruby/libdeflate/lib/arm/cpu_features.c +230 -0
  15. data/ext/deflate_ruby/libdeflate/lib/arm/cpu_features.h +214 -0
  16. data/ext/deflate_ruby/libdeflate/lib/arm/crc32_impl.h +600 -0
  17. data/ext/deflate_ruby/libdeflate/lib/arm/crc32_pmull_helpers.h +156 -0
  18. data/ext/deflate_ruby/libdeflate/lib/arm/crc32_pmull_wide.h +226 -0
  19. data/ext/deflate_ruby/libdeflate/lib/arm/matchfinder_impl.h +78 -0
  20. data/ext/deflate_ruby/libdeflate/lib/bt_matchfinder.h +342 -0
  21. data/ext/deflate_ruby/libdeflate/lib/cpu_features_common.h +93 -0
  22. data/ext/deflate_ruby/libdeflate/lib/crc32.c +262 -0
  23. data/ext/deflate_ruby/libdeflate/lib/crc32_multipliers.h +377 -0
  24. data/ext/deflate_ruby/libdeflate/lib/crc32_tables.h +587 -0
  25. data/ext/deflate_ruby/libdeflate/lib/decompress_template.h +777 -0
  26. data/ext/deflate_ruby/libdeflate/lib/deflate_compress.c +4129 -0
  27. data/ext/deflate_ruby/libdeflate/lib/deflate_compress.h +15 -0
  28. data/ext/deflate_ruby/libdeflate/lib/deflate_constants.h +56 -0
  29. data/ext/deflate_ruby/libdeflate/lib/deflate_decompress.c +1208 -0
  30. data/ext/deflate_ruby/libdeflate/lib/gzip_compress.c +90 -0
  31. data/ext/deflate_ruby/libdeflate/lib/gzip_constants.h +45 -0
  32. data/ext/deflate_ruby/libdeflate/lib/gzip_decompress.c +144 -0
  33. data/ext/deflate_ruby/libdeflate/lib/hc_matchfinder.h +401 -0
  34. data/ext/deflate_ruby/libdeflate/lib/ht_matchfinder.h +234 -0
  35. data/ext/deflate_ruby/libdeflate/lib/lib_common.h +106 -0
  36. data/ext/deflate_ruby/libdeflate/lib/matchfinder_common.h +224 -0
  37. data/ext/deflate_ruby/libdeflate/lib/riscv/matchfinder_impl.h +97 -0
  38. data/ext/deflate_ruby/libdeflate/lib/utils.c +141 -0
  39. data/ext/deflate_ruby/libdeflate/lib/x86/adler32_impl.h +134 -0
  40. data/ext/deflate_ruby/libdeflate/lib/x86/adler32_template.h +518 -0
  41. data/ext/deflate_ruby/libdeflate/lib/x86/cpu_features.c +183 -0
  42. data/ext/deflate_ruby/libdeflate/lib/x86/cpu_features.h +169 -0
  43. data/ext/deflate_ruby/libdeflate/lib/x86/crc32_impl.h +160 -0
  44. data/ext/deflate_ruby/libdeflate/lib/x86/crc32_pclmul_template.h +495 -0
  45. data/ext/deflate_ruby/libdeflate/lib/x86/decompress_impl.h +57 -0
  46. data/ext/deflate_ruby/libdeflate/lib/x86/matchfinder_impl.h +122 -0
  47. data/ext/deflate_ruby/libdeflate/lib/zlib_compress.c +82 -0
  48. data/ext/deflate_ruby/libdeflate/lib/zlib_constants.h +21 -0
  49. data/ext/deflate_ruby/libdeflate/lib/zlib_decompress.c +104 -0
  50. data/ext/deflate_ruby/libdeflate/libdeflate-config.cmake.in +3 -0
  51. data/ext/deflate_ruby/libdeflate/libdeflate.h +411 -0
  52. data/ext/deflate_ruby/libdeflate/libdeflate.pc.in +18 -0
  53. data/ext/deflate_ruby/libdeflate/programs/CMakeLists.txt +105 -0
  54. data/ext/deflate_ruby/libdeflate/programs/benchmark.c +696 -0
  55. data/ext/deflate_ruby/libdeflate/programs/checksum.c +218 -0
  56. data/ext/deflate_ruby/libdeflate/programs/config.h.in +19 -0
  57. data/ext/deflate_ruby/libdeflate/programs/gzip.c +688 -0
  58. data/ext/deflate_ruby/libdeflate/programs/prog_util.c +521 -0
  59. data/ext/deflate_ruby/libdeflate/programs/prog_util.h +225 -0
  60. data/ext/deflate_ruby/libdeflate/programs/test_checksums.c +200 -0
  61. data/ext/deflate_ruby/libdeflate/programs/test_custom_malloc.c +155 -0
  62. data/ext/deflate_ruby/libdeflate/programs/test_incomplete_codes.c +385 -0
  63. data/ext/deflate_ruby/libdeflate/programs/test_invalid_streams.c +130 -0
  64. data/ext/deflate_ruby/libdeflate/programs/test_litrunlen_overflow.c +72 -0
  65. data/ext/deflate_ruby/libdeflate/programs/test_overread.c +95 -0
  66. data/ext/deflate_ruby/libdeflate/programs/test_slow_decompression.c +472 -0
  67. data/ext/deflate_ruby/libdeflate/programs/test_trailing_bytes.c +151 -0
  68. data/ext/deflate_ruby/libdeflate/programs/test_util.c +237 -0
  69. data/ext/deflate_ruby/libdeflate/programs/test_util.h +61 -0
  70. data/ext/deflate_ruby/libdeflate/programs/tgetopt.c +118 -0
  71. data/ext/deflate_ruby/libdeflate/scripts/android_build.sh +118 -0
  72. data/ext/deflate_ruby/libdeflate/scripts/android_tests.sh +69 -0
  73. data/ext/deflate_ruby/libdeflate/scripts/benchmark.sh +10 -0
  74. data/ext/deflate_ruby/libdeflate/scripts/checksum.sh +10 -0
  75. data/ext/deflate_ruby/libdeflate/scripts/checksum_benchmarks.sh +253 -0
  76. data/ext/deflate_ruby/libdeflate/scripts/cmake-helper.sh +17 -0
  77. data/ext/deflate_ruby/libdeflate/scripts/deflate_benchmarks.sh +119 -0
  78. data/ext/deflate_ruby/libdeflate/scripts/exec_tests.sh +38 -0
  79. data/ext/deflate_ruby/libdeflate/scripts/gen-release-archives.sh +37 -0
  80. data/ext/deflate_ruby/libdeflate/scripts/gen_bitreverse_tab.py +19 -0
  81. data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_multipliers.c +199 -0
  82. data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_tables.c +105 -0
  83. data/ext/deflate_ruby/libdeflate/scripts/gen_default_litlen_costs.py +44 -0
  84. data/ext/deflate_ruby/libdeflate/scripts/gen_offset_slot_map.py +29 -0
  85. data/ext/deflate_ruby/libdeflate/scripts/gzip_tests.sh +523 -0
  86. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/corpus/0 +0 -0
  87. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/fuzz.c +95 -0
  88. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/corpus/0 +3 -0
  89. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/fuzz.c +62 -0
  90. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/fuzz.sh +108 -0
  91. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/corpus/0 +0 -0
  92. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/fuzz.c +19 -0
  93. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/corpus/0 +3 -0
  94. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/fuzz.c +19 -0
  95. data/ext/deflate_ruby/libdeflate/scripts/run_tests.sh +416 -0
  96. data/ext/deflate_ruby/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake +8 -0
  97. data/ext/deflate_ruby/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake +8 -0
  98. data/lib/deflate_ruby/version.rb +5 -0
  99. data/lib/deflate_ruby.rb +71 -0
  100. metadata +191 -0
@@ -0,0 +1,234 @@
1
+ /*
2
+ * ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
3
+ *
4
+ * Copyright 2022 Eric Biggers
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person
7
+ * obtaining a copy of this software and associated documentation
8
+ * files (the "Software"), to deal in the Software without
9
+ * restriction, including without limitation the rights to use,
10
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following
13
+ * conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be
16
+ * included in all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ *
27
+ * ---------------------------------------------------------------------------
28
+ *
29
+ * This is a Hash Table (ht) matchfinder.
30
+ *
31
+ * This is a variant of the Hash Chains (hc) matchfinder that is optimized for
32
+ * very fast compression. The ht_matchfinder stores the hash chains inline in
33
+ * the hash table, whereas the hc_matchfinder stores them in a separate array.
34
+ * Storing the hash chains inline is the faster method when max_search_depth
35
+ * (the maximum chain length) is very small. It is not appropriate when
36
+ * max_search_depth is larger, as then it uses too much memory.
37
+ *
38
+ * Due to its focus on speed, the ht_matchfinder doesn't support length 3
39
+ * matches. It also doesn't allow max_search_depth to vary at runtime; it is
40
+ * fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
41
+ *
42
+ * See hc_matchfinder.h for more information.
43
+ */
44
+
45
+ #ifndef LIB_HT_MATCHFINDER_H
46
+ #define LIB_HT_MATCHFINDER_H
47
+
48
+ #include "matchfinder_common.h"
49
+
50
+ #define HT_MATCHFINDER_HASH_ORDER 15
51
+ #define HT_MATCHFINDER_BUCKET_SIZE 2
52
+
53
+ #define HT_MATCHFINDER_MIN_MATCH_LEN 4
54
+ /* Minimum value of max_len for ht_matchfinder_longest_match() */
55
+ #define HT_MATCHFINDER_REQUIRED_NBYTES 5
56
+
57
+ struct MATCHFINDER_ALIGNED ht_matchfinder {
58
+ mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
59
+ [HT_MATCHFINDER_BUCKET_SIZE];
60
+ };
61
+
62
+ static forceinline void
63
+ ht_matchfinder_init(struct ht_matchfinder *mf)
64
+ {
65
+ STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
66
+
67
+ matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
68
+ }
69
+
70
+ static forceinline void
71
+ ht_matchfinder_slide_window(struct ht_matchfinder *mf)
72
+ {
73
+ matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
74
+ }
75
+
76
+ /* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
77
+ static forceinline u32
78
+ ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
79
+ const u8 ** const in_base_p,
80
+ const u8 * const in_next,
81
+ const u32 max_len,
82
+ const u32 nice_len,
83
+ u32 * const next_hash,
84
+ u32 * const offset_ret)
85
+ {
86
+ u32 best_len = 0;
87
+ const u8 *best_matchptr = in_next;
88
+ u32 cur_pos = in_next - *in_base_p;
89
+ const u8 *in_base;
90
+ mf_pos_t cutoff;
91
+ u32 hash;
92
+ u32 seq;
93
+ mf_pos_t cur_node;
94
+ const u8 *matchptr;
95
+ #if HT_MATCHFINDER_BUCKET_SIZE > 1
96
+ mf_pos_t to_insert;
97
+ u32 len;
98
+ #endif
99
+ #if HT_MATCHFINDER_BUCKET_SIZE > 2
100
+ int i;
101
+ #endif
102
+
103
+ /* This is assumed throughout this function. */
104
+ STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
105
+
106
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
107
+ ht_matchfinder_slide_window(mf);
108
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
109
+ cur_pos = 0;
110
+ }
111
+ in_base = *in_base_p;
112
+ cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
113
+
114
+ hash = *next_hash;
115
+ STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
116
+ *next_hash = lz_hash(get_unaligned_le32(in_next + 1),
117
+ HT_MATCHFINDER_HASH_ORDER);
118
+ seq = load_u32_unaligned(in_next);
119
+ prefetchw(&mf->hash_tab[*next_hash]);
120
+ #if HT_MATCHFINDER_BUCKET_SIZE == 1
121
+ /* Hand-unrolled version for BUCKET_SIZE == 1 */
122
+ cur_node = mf->hash_tab[hash][0];
123
+ mf->hash_tab[hash][0] = cur_pos;
124
+ if (cur_node <= cutoff)
125
+ goto out;
126
+ matchptr = &in_base[cur_node];
127
+ if (load_u32_unaligned(matchptr) == seq) {
128
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
129
+ best_matchptr = matchptr;
130
+ }
131
+ #elif HT_MATCHFINDER_BUCKET_SIZE == 2
132
+ /*
133
+ * Hand-unrolled version for BUCKET_SIZE == 2. The logic here also
134
+ * differs slightly in that it copies the first entry to the second even
135
+ * if nice_len is reached on the first, as this can be slightly faster.
136
+ */
137
+ cur_node = mf->hash_tab[hash][0];
138
+ mf->hash_tab[hash][0] = cur_pos;
139
+ if (cur_node <= cutoff)
140
+ goto out;
141
+ matchptr = &in_base[cur_node];
142
+
143
+ to_insert = cur_node;
144
+ cur_node = mf->hash_tab[hash][1];
145
+ mf->hash_tab[hash][1] = to_insert;
146
+
147
+ if (load_u32_unaligned(matchptr) == seq) {
148
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
149
+ best_matchptr = matchptr;
150
+ if (cur_node <= cutoff || best_len >= nice_len)
151
+ goto out;
152
+ matchptr = &in_base[cur_node];
153
+ if (load_u32_unaligned(matchptr) == seq &&
154
+ load_u32_unaligned(matchptr + best_len - 3) ==
155
+ load_u32_unaligned(in_next + best_len - 3)) {
156
+ len = lz_extend(in_next, matchptr, 4, max_len);
157
+ if (len > best_len) {
158
+ best_len = len;
159
+ best_matchptr = matchptr;
160
+ }
161
+ }
162
+ } else {
163
+ if (cur_node <= cutoff)
164
+ goto out;
165
+ matchptr = &in_base[cur_node];
166
+ if (load_u32_unaligned(matchptr) == seq) {
167
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
168
+ best_matchptr = matchptr;
169
+ }
170
+ }
171
+ #else
172
+ /* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
173
+ to_insert = cur_pos;
174
+ for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
175
+ cur_node = mf->hash_tab[hash][i];
176
+ mf->hash_tab[hash][i] = to_insert;
177
+ if (cur_node <= cutoff)
178
+ goto out;
179
+ matchptr = &in_base[cur_node];
180
+ if (load_u32_unaligned(matchptr) == seq) {
181
+ len = lz_extend(in_next, matchptr, 4, max_len);
182
+ if (len > best_len) {
183
+ best_len = len;
184
+ best_matchptr = matchptr;
185
+ if (best_len >= nice_len)
186
+ goto out;
187
+ }
188
+ }
189
+ to_insert = cur_node;
190
+ }
191
+ #endif
192
+ out:
193
+ *offset_ret = in_next - best_matchptr;
194
+ return best_len;
195
+ }
196
+
197
+ static forceinline void
198
+ ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
199
+ const u8 ** const in_base_p,
200
+ const u8 *in_next,
201
+ const u8 * const in_end,
202
+ const u32 count,
203
+ u32 * const next_hash)
204
+ {
205
+ s32 cur_pos = in_next - *in_base_p;
206
+ u32 hash;
207
+ u32 remaining = count;
208
+ int i;
209
+
210
+ if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
211
+ return;
212
+
213
+ if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
214
+ ht_matchfinder_slide_window(mf);
215
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
216
+ cur_pos -= MATCHFINDER_WINDOW_SIZE;
217
+ }
218
+
219
+ hash = *next_hash;
220
+ do {
221
+ for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
222
+ mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
223
+ mf->hash_tab[hash][0] = cur_pos;
224
+
225
+ hash = lz_hash(get_unaligned_le32(++in_next),
226
+ HT_MATCHFINDER_HASH_ORDER);
227
+ cur_pos++;
228
+ } while (--remaining);
229
+
230
+ prefetchw(&mf->hash_tab[hash]);
231
+ *next_hash = hash;
232
+ }
233
+
234
+ #endif /* LIB_HT_MATCHFINDER_H */
@@ -0,0 +1,106 @@
1
+ /*
2
+ * lib_common.h - internal header included by all library code
3
+ */
4
+
5
+ #ifndef LIB_LIB_COMMON_H
6
+ #define LIB_LIB_COMMON_H
7
+
8
+ #ifdef LIBDEFLATE_H
9
+ /*
10
+ * When building the library, LIBDEFLATEAPI needs to be defined properly before
11
+ * including libdeflate.h.
12
+ */
13
+ # error "lib_common.h must always be included before libdeflate.h"
14
+ #endif
15
+
16
+ #if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
17
+ # define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
18
+ #elif defined(__GNUC__)
19
+ # define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
20
+ #else
21
+ # define LIBDEFLATE_EXPORT_SYM
22
+ #endif
23
+
24
+ /*
25
+ * On i386, gcc assumes that the stack is 16-byte aligned at function entry.
26
+ * However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
27
+ * only guarantee 4-byte alignment when calling functions. This is mainly an
28
+ * issue on Windows, but it has been seen on Linux too. Work around this ABI
29
+ * incompatibility by realigning the stack pointer when entering libdeflate.
30
+ * This prevents crashes in SSE/AVX code.
31
+ */
32
+ #if defined(__GNUC__) && defined(__i386__)
33
+ # define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
34
+ #else
35
+ # define LIBDEFLATE_ALIGN_STACK
36
+ #endif
37
+
38
+ #define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
39
+
40
+ #include "../common_defs.h"
41
+
42
+ typedef void *(*malloc_func_t)(size_t);
43
+ typedef void (*free_func_t)(void *);
44
+
45
+ extern malloc_func_t libdeflate_default_malloc_func;
46
+ extern free_func_t libdeflate_default_free_func;
47
+
48
+ void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
49
+ size_t alignment, size_t size);
50
+ void libdeflate_aligned_free(free_func_t free_func, void *ptr);
51
+
52
+ #ifdef FREESTANDING
53
+ /*
54
+ * With -ffreestanding, <string.h> may be missing, and we must provide
55
+ * implementations of memset(), memcpy(), memmove(), and memcmp().
56
+ * See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
57
+ *
58
+ * Also, -ffreestanding disables interpreting calls to these functions as
59
+ * built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
60
+ * not be optimized to a single load instruction. For performance reasons we
61
+ * don't want that. So, declare these functions as macros that expand to the
62
+ * corresponding built-ins. This approach is recommended in the gcc man page.
63
+ * We still need the actual function definitions in case gcc calls them.
64
+ */
65
+ void *memset(void *s, int c, size_t n);
66
+ #define memset(s, c, n) __builtin_memset((s), (c), (n))
67
+
68
+ void *memcpy(void *dest, const void *src, size_t n);
69
+ #define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
70
+
71
+ void *memmove(void *dest, const void *src, size_t n);
72
+ #define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
73
+
74
+ int memcmp(const void *s1, const void *s2, size_t n);
75
+ #define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
76
+
77
+ #undef LIBDEFLATE_ENABLE_ASSERTIONS
78
+ #else
79
+ # include <string.h>
80
+ /*
81
+ * To prevent false positive static analyzer warnings, ensure that assertions
82
+ * are visible to the static analyzer.
83
+ */
84
+ # ifdef __clang_analyzer__
85
+ # define LIBDEFLATE_ENABLE_ASSERTIONS
86
+ # endif
87
+ #endif
88
+
89
+ /*
90
+ * Runtime assertion support. Don't enable this in production builds; it may
91
+ * hurt performance significantly.
92
+ */
93
+ #ifdef LIBDEFLATE_ENABLE_ASSERTIONS
94
+ NORETURN void
95
+ libdeflate_assertion_failed(const char *expr, const char *file, int line);
96
+ #define ASSERT(expr) { if (unlikely(!(expr))) \
97
+ libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
98
+ #else
99
+ #define ASSERT(expr) (void)(expr)
100
+ #endif
101
+
102
+ #define CONCAT_IMPL(a, b) a##b
103
+ #define CONCAT(a, b) CONCAT_IMPL(a, b)
104
+ #define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
105
+
106
+ #endif /* LIB_LIB_COMMON_H */
@@ -0,0 +1,224 @@
1
+ /*
2
+ * matchfinder_common.h - common code for Lempel-Ziv matchfinding
3
+ */
4
+
5
+ #ifndef LIB_MATCHFINDER_COMMON_H
6
+ #define LIB_MATCHFINDER_COMMON_H
7
+
8
+ #include "lib_common.h"
9
+
10
+ #ifndef MATCHFINDER_WINDOW_ORDER
11
+ # error "MATCHFINDER_WINDOW_ORDER must be defined!"
12
+ #endif
13
+
14
+ /*
15
+ * Given a 32-bit value that was loaded with the platform's native endianness,
16
+ * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
17
+ * bits contain the first 3 bytes, arranged in octets in a platform-dependent
18
+ * order, at the memory location from which the input 32-bit value was loaded.
19
+ */
20
+ static forceinline u32
21
+ loaded_u32_to_u24(u32 v)
22
+ {
23
+ if (CPU_IS_LITTLE_ENDIAN())
24
+ return v & 0xFFFFFF;
25
+ else
26
+ return v >> 8;
27
+ }
28
+
29
+ /*
30
+ * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value.
31
+ * The order in which the 3 bytes will be arranged as octets in the 24 bits is
32
+ * platform-dependent. At least 4 bytes (not 3) must be available at @p.
33
+ */
34
+ static forceinline u32
35
+ load_u24_unaligned(const u8 *p)
36
+ {
37
+ #if UNALIGNED_ACCESS_IS_FAST
38
+ return loaded_u32_to_u24(load_u32_unaligned(p));
39
+ #else
40
+ if (CPU_IS_LITTLE_ENDIAN())
41
+ return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
42
+ else
43
+ return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
44
+ #endif
45
+ }
46
+
47
+ #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
48
+
49
+ typedef s16 mf_pos_t;
50
+
51
+ #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
52
+
53
+ /*
54
+ * This is the memory address alignment, in bytes, required for the matchfinder
55
+ * buffers by the architecture-specific implementations of matchfinder_init()
56
+ * and matchfinder_rebase(). "Matchfinder buffer" means an entire struct
57
+ * hc_matchfinder, bt_matchfinder, or ht_matchfinder; the next_tab field of
58
+ * struct hc_matchfinder; or the child_tab field of struct bt_matchfinder.
59
+ *
60
+ * This affects how the entire 'struct deflate_compressor' is allocated, since
61
+ * the matchfinder structures are embedded inside it.
62
+ *
63
+ * Currently the maximum memory address alignment required is 32 bytes, needed
64
+ * by the AVX-2 matchfinder functions.
65
+ */
66
+ #define MATCHFINDER_MEM_ALIGNMENT 32
67
+
68
+ /*
69
+ * This declares a size, in bytes, that is guaranteed to divide the sizes of the
70
+ * matchfinder buffers (where "matchfinder buffers" is as defined for
71
+ * MATCHFINDER_MEM_ALIGNMENT). The architecture-specific implementations of
72
+ * matchfinder_init() and matchfinder_rebase() take advantage of this value.
73
+ *
74
+ * Currently the maximum size alignment required is 128 bytes, needed by
75
+ * the AVX-2 matchfinder functions. However, the RISC-V Vector Extension
76
+ * matchfinder functions can, in principle, take advantage of a larger size
77
+ * alignment. Therefore, we set this to 1024, which still easily divides the
78
+ * actual sizes that result from the current matchfinder struct definitions.
79
+ * This value can safely be changed to any power of two that is >= 128.
80
+ */
81
+ #define MATCHFINDER_SIZE_ALIGNMENT 1024
82
+
83
+ #undef matchfinder_init
84
+ #undef matchfinder_rebase
85
+ #ifdef _aligned_attribute
86
+ # define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
87
+ # if defined(ARCH_ARM32) || defined(ARCH_ARM64)
88
+ # include "arm/matchfinder_impl.h"
89
+ # elif defined(ARCH_RISCV)
90
+ # include "riscv/matchfinder_impl.h"
91
+ # elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
92
+ # include "x86/matchfinder_impl.h"
93
+ # endif
94
+ #else
95
+ # define MATCHFINDER_ALIGNED
96
+ #endif
97
+
98
+ /*
99
+ * Initialize the hash table portion of the matchfinder.
100
+ *
101
+ * Essentially, this is an optimized memset().
102
+ *
103
+ * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
104
+ * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
105
+ */
106
+ #ifndef matchfinder_init
107
+ static forceinline void
108
+ matchfinder_init(mf_pos_t *data, size_t size)
109
+ {
110
+ size_t num_entries = size / sizeof(*data);
111
+ size_t i;
112
+
113
+ for (i = 0; i < num_entries; i++)
114
+ data[i] = MATCHFINDER_INITVAL;
115
+ }
116
+ #endif
117
+
118
+ /*
119
+ * Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes.
120
+ *
121
+ * This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been
122
+ * run through the matchfinder.
123
+ *
124
+ * This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given
125
+ * array, making the entries be relative to the current position rather than the
126
+ * position MATCHFINDER_WINDOW_SIZE bytes prior. To avoid integer underflows,
127
+ * entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at
128
+ * -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds.
129
+ *
130
+ * The given array must contain all matchfinder data that is position-relative:
131
+ * the hash table(s) as well as any hash chain or binary tree links. Its
132
+ * address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size
133
+ * must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
134
+ */
135
+ #ifndef matchfinder_rebase
136
+ static forceinline void
137
+ matchfinder_rebase(mf_pos_t *data, size_t size)
138
+ {
139
+ size_t num_entries = size / sizeof(*data);
140
+ size_t i;
141
+
142
+ if (MATCHFINDER_WINDOW_SIZE == 32768) {
143
+ /*
144
+ * Branchless version for 32768-byte windows. Clear all bits if
145
+ * the value was already negative, then set the sign bit. This
146
+ * is equivalent to subtracting 32768 with signed saturation.
147
+ */
148
+ for (i = 0; i < num_entries; i++)
149
+ data[i] = 0x8000 | (data[i] & ~(data[i] >> 15));
150
+ } else {
151
+ for (i = 0; i < num_entries; i++) {
152
+ if (data[i] >= 0)
153
+ data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
154
+ else
155
+ data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
156
+ }
157
+ }
158
+ }
159
+ #endif
160
+
161
+ /*
162
+ * The hash function: given a sequence prefix held in the low-order bits of a
163
+ * 32-bit value, multiply by a carefully-chosen large constant. Discard any
164
+ * bits of the product that don't fit in a 32-bit value, but take the
165
+ * next-highest @num_bits bits of the product as the hash value, as those have
166
+ * the most randomness.
167
+ */
168
+ static forceinline u32
169
+ lz_hash(u32 seq, unsigned num_bits)
170
+ {
171
+ return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
172
+ }
173
+
174
+ /*
175
+ * Return the number of bytes at @matchptr that match the bytes at @strptr, up
176
+ * to a maximum of @max_len. Initially, @start_len bytes are matched.
177
+ */
178
+ static forceinline unsigned
179
+ lz_extend(const u8 * const strptr, const u8 * const matchptr,
180
+ const unsigned start_len, const unsigned max_len)
181
+ {
182
+ unsigned len = start_len;
183
+ machine_word_t v_word;
184
+
185
+ if (UNALIGNED_ACCESS_IS_FAST) {
186
+
187
+ if (likely(max_len - len >= 4 * WORDBYTES)) {
188
+
189
+ #define COMPARE_WORD_STEP \
190
+ v_word = load_word_unaligned(&matchptr[len]) ^ \
191
+ load_word_unaligned(&strptr[len]); \
192
+ if (v_word != 0) \
193
+ goto word_differs; \
194
+ len += WORDBYTES; \
195
+
196
+ COMPARE_WORD_STEP
197
+ COMPARE_WORD_STEP
198
+ COMPARE_WORD_STEP
199
+ COMPARE_WORD_STEP
200
+ #undef COMPARE_WORD_STEP
201
+ }
202
+
203
+ while (len + WORDBYTES <= max_len) {
204
+ v_word = load_word_unaligned(&matchptr[len]) ^
205
+ load_word_unaligned(&strptr[len]);
206
+ if (v_word != 0)
207
+ goto word_differs;
208
+ len += WORDBYTES;
209
+ }
210
+ }
211
+
212
+ while (len < max_len && matchptr[len] == strptr[len])
213
+ len++;
214
+ return len;
215
+
216
+ word_differs:
217
+ if (CPU_IS_LITTLE_ENDIAN())
218
+ len += (bsfw(v_word) >> 3);
219
+ else
220
+ len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
221
+ return len;
222
+ }
223
+
224
+ #endif /* LIB_MATCHFINDER_COMMON_H */
@@ -0,0 +1,97 @@
1
+ /*
2
+ * riscv/matchfinder_impl.h - RISC-V implementations of matchfinder functions
3
+ *
4
+ * Copyright 2024 Eric Biggers
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person
7
+ * obtaining a copy of this software and associated documentation
8
+ * files (the "Software"), to deal in the Software without
9
+ * restriction, including without limitation the rights to use,
10
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following
13
+ * conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be
16
+ * included in all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ */
27
+
28
+ #ifndef LIB_RISCV_MATCHFINDER_IMPL_H
29
+ #define LIB_RISCV_MATCHFINDER_IMPL_H
30
+
31
+ #if defined(ARCH_RISCV) && defined(__riscv_vector)
32
+ #include <riscv_vector.h>
33
+
34
+ /*
35
+ * Return the maximum number of 16-bit (mf_pos_t) elements that fit in 8 RISC-V
36
+ * vector registers and also evenly divide the sizes of the matchfinder buffers.
37
+ */
38
+ static forceinline size_t
39
+ riscv_matchfinder_vl(void)
40
+ {
41
+ const size_t vl = __riscv_vsetvlmax_e16m8();
42
+
43
+ STATIC_ASSERT(sizeof(mf_pos_t) == sizeof(s16));
44
+ /*
45
+ * MATCHFINDER_SIZE_ALIGNMENT is a power of 2, as is 'vl' because the
46
+ * RISC-V Vector Extension requires that the vector register length
47
+ * (VLEN) be a power of 2. Thus, a simple MIN() gives the correct
48
+ * answer here; rounding to a power of 2 is not required.
49
+ */
50
+ STATIC_ASSERT((MATCHFINDER_SIZE_ALIGNMENT &
51
+ (MATCHFINDER_SIZE_ALIGNMENT - 1)) == 0);
52
+ ASSERT((vl & (vl - 1)) == 0);
53
+ return MIN(vl, MATCHFINDER_SIZE_ALIGNMENT / sizeof(mf_pos_t));
54
+ }
55
+
56
+ /* matchfinder_init() optimized using the RISC-V Vector Extension */
57
+ static forceinline void
58
+ matchfinder_init_rvv(mf_pos_t *p, size_t size)
59
+ {
60
+ const size_t vl = riscv_matchfinder_vl();
61
+ const vint16m8_t v = __riscv_vmv_v_x_i16m8(MATCHFINDER_INITVAL, vl);
62
+
63
+ ASSERT(size > 0 && size % (vl * sizeof(p[0])) == 0);
64
+ do {
65
+ __riscv_vse16_v_i16m8(p, v, vl);
66
+ p += vl;
67
+ size -= vl * sizeof(p[0]);
68
+ } while (size != 0);
69
+ }
70
+ #define matchfinder_init matchfinder_init_rvv
71
+
72
+ /* matchfinder_rebase() optimized using the RISC-V Vector Extension */
73
+ static forceinline void
74
+ matchfinder_rebase_rvv(mf_pos_t *p, size_t size)
75
+ {
76
+ const size_t vl = riscv_matchfinder_vl();
77
+
78
+ ASSERT(size > 0 && size % (vl * sizeof(p[0])) == 0);
79
+ do {
80
+ vint16m8_t v = __riscv_vle16_v_i16m8(p, vl);
81
+
82
+ /*
83
+ * This should generate the vsadd.vx instruction
84
+ * (Vector Saturating Add, integer vector-scalar)
85
+ */
86
+ v = __riscv_vsadd_vx_i16m8(v, (s16)-MATCHFINDER_WINDOW_SIZE,
87
+ vl);
88
+ __riscv_vse16_v_i16m8(p, v, vl);
89
+ p += vl;
90
+ size -= vl * sizeof(p[0]);
91
+ } while (size != 0);
92
+ }
93
+ #define matchfinder_rebase matchfinder_rebase_rvv
94
+
95
+ #endif /* ARCH_RISCV && __riscv_vector */
96
+
97
+ #endif /* LIB_RISCV_MATCHFINDER_IMPL_H */