deflate-ruby 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +95 -92
  3. data/GEM_VERIFICATION_REPORT.md +140 -0
  4. data/LICENSE.txt +6 -6
  5. data/README.md +87 -65
  6. data/Rakefile +23 -0
  7. data/ext/deflate_ruby/{libdeflate/lib/x86/adler32_impl.h → adler32_impl.h} +8 -7
  8. data/ext/deflate_ruby/common_defs.h +748 -0
  9. data/ext/deflate_ruby/{libdeflate/lib/x86/cpu_features.c → cpu_features.c} +46 -16
  10. data/ext/deflate_ruby/{libdeflate/lib/x86/cpu_features.h → cpu_features.h} +2 -1
  11. data/ext/deflate_ruby/{libdeflate/lib/x86/crc32_impl.h → crc32_impl.h} +22 -23
  12. data/ext/deflate_ruby/{libdeflate/lib/crc32_multipliers.h → crc32_multipliers.h} +2 -4
  13. data/ext/deflate_ruby/{libdeflate/lib/x86/crc32_pclmul_template.h → crc32_pclmul_template.h} +23 -94
  14. data/ext/deflate_ruby/{libdeflate/lib/crc32_tables.h → crc32_tables.h} +1 -1
  15. data/ext/deflate_ruby/{libdeflate/lib/deflate_compress.c → deflate_compress.c} +59 -60
  16. data/ext/deflate_ruby/deflate_ruby.c +392 -218
  17. data/ext/deflate_ruby/deflate_ruby.h +6 -0
  18. data/ext/deflate_ruby/extconf.rb +35 -25
  19. data/ext/deflate_ruby/libdeflate/adler32.c +162 -0
  20. data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/adler32_impl.h +14 -7
  21. data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/crc32_impl.h +25 -31
  22. data/ext/deflate_ruby/libdeflate/arm/crc32_pmull_helpers.h +156 -0
  23. data/ext/deflate_ruby/libdeflate/arm/crc32_pmull_wide.h +226 -0
  24. data/ext/deflate_ruby/libdeflate/bt_matchfinder.h +342 -0
  25. data/ext/deflate_ruby/libdeflate/common_defs.h +2 -1
  26. data/ext/deflate_ruby/libdeflate/cpu_features_common.h +93 -0
  27. data/ext/deflate_ruby/libdeflate/crc32.c +262 -0
  28. data/ext/deflate_ruby/libdeflate/crc32_multipliers.h +375 -0
  29. data/ext/deflate_ruby/libdeflate/crc32_tables.h +587 -0
  30. data/ext/deflate_ruby/libdeflate/decompress_template.h +777 -0
  31. data/ext/deflate_ruby/libdeflate/deflate_compress.c +4128 -0
  32. data/ext/deflate_ruby/libdeflate/deflate_compress.h +15 -0
  33. data/ext/deflate_ruby/libdeflate/deflate_constants.h +56 -0
  34. data/ext/deflate_ruby/libdeflate/deflate_decompress.c +1208 -0
  35. data/ext/deflate_ruby/libdeflate/gzip_compress.c +90 -0
  36. data/ext/deflate_ruby/libdeflate/gzip_constants.h +45 -0
  37. data/ext/deflate_ruby/libdeflate/gzip_decompress.c +144 -0
  38. data/ext/deflate_ruby/libdeflate/hc_matchfinder.h +401 -0
  39. data/ext/deflate_ruby/libdeflate/ht_matchfinder.h +234 -0
  40. data/ext/deflate_ruby/libdeflate/lib_common.h +106 -0
  41. data/ext/deflate_ruby/libdeflate/libdeflate.h +2 -2
  42. data/ext/deflate_ruby/libdeflate/{lib/matchfinder_common.h → matchfinder_common.h} +3 -3
  43. data/ext/deflate_ruby/libdeflate/x86/adler32_impl.h +135 -0
  44. data/ext/deflate_ruby/libdeflate/x86/adler32_template.h +518 -0
  45. data/ext/deflate_ruby/libdeflate/x86/cpu_features.c +213 -0
  46. data/ext/deflate_ruby/libdeflate/x86/cpu_features.h +170 -0
  47. data/ext/deflate_ruby/libdeflate/x86/crc32_impl.h +159 -0
  48. data/ext/deflate_ruby/libdeflate/x86/crc32_pclmul_template.h +424 -0
  49. data/ext/deflate_ruby/libdeflate/x86/decompress_impl.h +57 -0
  50. data/ext/deflate_ruby/libdeflate.h +411 -0
  51. data/ext/deflate_ruby/matchfinder_common.h +224 -0
  52. data/ext/deflate_ruby/matchfinder_impl.h +122 -0
  53. data/ext/deflate_ruby/utils.c +141 -0
  54. data/ext/deflate_ruby/zlib_compress.c +82 -0
  55. data/ext/deflate_ruby/zlib_constants.h +21 -0
  56. data/ext/deflate_ruby/zlib_decompress.c +104 -0
  57. data/lib/deflate_ruby/version.rb +1 -1
  58. data/lib/deflate_ruby.rb +1 -63
  59. data/sig/deflate_ruby.rbs +4 -0
  60. data/test/test_deflate_ruby.rb +220 -0
  61. data/test/test_helper.rb +6 -0
  62. metadata +90 -144
  63. data/ext/deflate_ruby/libdeflate/CMakeLists.txt +0 -270
  64. data/ext/deflate_ruby/libdeflate/NEWS.md +0 -494
  65. data/ext/deflate_ruby/libdeflate/README.md +0 -228
  66. data/ext/deflate_ruby/libdeflate/libdeflate-config.cmake.in +0 -3
  67. data/ext/deflate_ruby/libdeflate/libdeflate.pc.in +0 -18
  68. data/ext/deflate_ruby/libdeflate/programs/CMakeLists.txt +0 -105
  69. data/ext/deflate_ruby/libdeflate/programs/benchmark.c +0 -696
  70. data/ext/deflate_ruby/libdeflate/programs/checksum.c +0 -218
  71. data/ext/deflate_ruby/libdeflate/programs/config.h.in +0 -19
  72. data/ext/deflate_ruby/libdeflate/programs/gzip.c +0 -688
  73. data/ext/deflate_ruby/libdeflate/programs/prog_util.c +0 -521
  74. data/ext/deflate_ruby/libdeflate/programs/prog_util.h +0 -225
  75. data/ext/deflate_ruby/libdeflate/programs/test_checksums.c +0 -200
  76. data/ext/deflate_ruby/libdeflate/programs/test_custom_malloc.c +0 -155
  77. data/ext/deflate_ruby/libdeflate/programs/test_incomplete_codes.c +0 -385
  78. data/ext/deflate_ruby/libdeflate/programs/test_invalid_streams.c +0 -130
  79. data/ext/deflate_ruby/libdeflate/programs/test_litrunlen_overflow.c +0 -72
  80. data/ext/deflate_ruby/libdeflate/programs/test_overread.c +0 -95
  81. data/ext/deflate_ruby/libdeflate/programs/test_slow_decompression.c +0 -472
  82. data/ext/deflate_ruby/libdeflate/programs/test_trailing_bytes.c +0 -151
  83. data/ext/deflate_ruby/libdeflate/programs/test_util.c +0 -237
  84. data/ext/deflate_ruby/libdeflate/programs/test_util.h +0 -61
  85. data/ext/deflate_ruby/libdeflate/programs/tgetopt.c +0 -118
  86. data/ext/deflate_ruby/libdeflate/scripts/android_build.sh +0 -118
  87. data/ext/deflate_ruby/libdeflate/scripts/android_tests.sh +0 -69
  88. data/ext/deflate_ruby/libdeflate/scripts/benchmark.sh +0 -10
  89. data/ext/deflate_ruby/libdeflate/scripts/checksum.sh +0 -10
  90. data/ext/deflate_ruby/libdeflate/scripts/checksum_benchmarks.sh +0 -253
  91. data/ext/deflate_ruby/libdeflate/scripts/cmake-helper.sh +0 -17
  92. data/ext/deflate_ruby/libdeflate/scripts/deflate_benchmarks.sh +0 -119
  93. data/ext/deflate_ruby/libdeflate/scripts/exec_tests.sh +0 -38
  94. data/ext/deflate_ruby/libdeflate/scripts/gen-release-archives.sh +0 -37
  95. data/ext/deflate_ruby/libdeflate/scripts/gen_bitreverse_tab.py +0 -19
  96. data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_multipliers.c +0 -199
  97. data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_tables.c +0 -105
  98. data/ext/deflate_ruby/libdeflate/scripts/gen_default_litlen_costs.py +0 -44
  99. data/ext/deflate_ruby/libdeflate/scripts/gen_offset_slot_map.py +0 -29
  100. data/ext/deflate_ruby/libdeflate/scripts/gzip_tests.sh +0 -523
  101. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/corpus/0 +0 -0
  102. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/fuzz.c +0 -95
  103. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/corpus/0 +0 -3
  104. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/fuzz.c +0 -62
  105. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/fuzz.sh +0 -108
  106. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/corpus/0 +0 -0
  107. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/fuzz.c +0 -19
  108. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/corpus/0 +0 -3
  109. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/fuzz.c +0 -19
  110. data/ext/deflate_ruby/libdeflate/scripts/run_tests.sh +0 -416
  111. data/ext/deflate_ruby/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake +0 -8
  112. data/ext/deflate_ruby/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake +0 -8
  113. /data/ext/deflate_ruby/{libdeflate/lib/adler32.c → adler32.c} +0 -0
  114. /data/ext/deflate_ruby/{libdeflate/lib/x86/adler32_template.h → adler32_template.h} +0 -0
  115. /data/ext/deflate_ruby/{libdeflate/lib/bt_matchfinder.h → bt_matchfinder.h} +0 -0
  116. /data/ext/deflate_ruby/{libdeflate/lib/cpu_features_common.h → cpu_features_common.h} +0 -0
  117. /data/ext/deflate_ruby/{libdeflate/lib/crc32.c → crc32.c} +0 -0
  118. /data/ext/deflate_ruby/{libdeflate/lib/arm/crc32_pmull_helpers.h → crc32_pmull_helpers.h} +0 -0
  119. /data/ext/deflate_ruby/{libdeflate/lib/arm/crc32_pmull_wide.h → crc32_pmull_wide.h} +0 -0
  120. /data/ext/deflate_ruby/{libdeflate/lib/x86/decompress_impl.h → decompress_impl.h} +0 -0
  121. /data/ext/deflate_ruby/{libdeflate/lib/decompress_template.h → decompress_template.h} +0 -0
  122. /data/ext/deflate_ruby/{libdeflate/lib/deflate_compress.h → deflate_compress.h} +0 -0
  123. /data/ext/deflate_ruby/{libdeflate/lib/deflate_constants.h → deflate_constants.h} +0 -0
  124. /data/ext/deflate_ruby/{libdeflate/lib/deflate_decompress.c → deflate_decompress.c} +0 -0
  125. /data/ext/deflate_ruby/{libdeflate/lib/gzip_compress.c → gzip_compress.c} +0 -0
  126. /data/ext/deflate_ruby/{libdeflate/lib/gzip_constants.h → gzip_constants.h} +0 -0
  127. /data/ext/deflate_ruby/{libdeflate/lib/gzip_decompress.c → gzip_decompress.c} +0 -0
  128. /data/ext/deflate_ruby/{libdeflate/lib/hc_matchfinder.h → hc_matchfinder.h} +0 -0
  129. /data/ext/deflate_ruby/{libdeflate/lib/ht_matchfinder.h → ht_matchfinder.h} +0 -0
  130. /data/ext/deflate_ruby/{libdeflate/lib/lib_common.h → lib_common.h} +0 -0
  131. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/cpu_features.c +0 -0
  132. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/cpu_features.h +0 -0
  133. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/matchfinder_impl.h +0 -0
  134. /data/ext/deflate_ruby/libdeflate/{lib/riscv → riscv}/matchfinder_impl.h +0 -0
  135. /data/ext/deflate_ruby/libdeflate/{lib/utils.c → utils.c} +0 -0
  136. /data/ext/deflate_ruby/libdeflate/{lib/x86 → x86}/matchfinder_impl.h +0 -0
  137. /data/ext/deflate_ruby/libdeflate/{lib/zlib_compress.c → zlib_compress.c} +0 -0
  138. /data/ext/deflate_ruby/libdeflate/{lib/zlib_constants.h → zlib_constants.h} +0 -0
  139. /data/ext/deflate_ruby/libdeflate/{lib/zlib_decompress.c → zlib_decompress.c} +0 -0
@@ -0,0 +1,234 @@
1
+ /*
2
+ * ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
3
+ *
4
+ * Copyright 2022 Eric Biggers
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person
7
+ * obtaining a copy of this software and associated documentation
8
+ * files (the "Software"), to deal in the Software without
9
+ * restriction, including without limitation the rights to use,
10
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following
13
+ * conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be
16
+ * included in all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ *
27
+ * ---------------------------------------------------------------------------
28
+ *
29
+ * This is a Hash Table (ht) matchfinder.
30
+ *
31
+ * This is a variant of the Hash Chains (hc) matchfinder that is optimized for
32
+ * very fast compression. The ht_matchfinder stores the hash chains inline in
33
+ * the hash table, whereas the hc_matchfinder stores them in a separate array.
34
+ * Storing the hash chains inline is the faster method when max_search_depth
35
+ * (the maximum chain length) is very small. It is not appropriate when
36
+ * max_search_depth is larger, as then it uses too much memory.
37
+ *
38
+ * Due to its focus on speed, the ht_matchfinder doesn't support length 3
39
+ * matches. It also doesn't allow max_search_depth to vary at runtime; it is
40
+ * fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
41
+ *
42
+ * See hc_matchfinder.h for more information.
43
+ */
44
+
45
+ #ifndef LIB_HT_MATCHFINDER_H
46
+ #define LIB_HT_MATCHFINDER_H
47
+
48
+ #include "matchfinder_common.h"
49
+
50
+ #define HT_MATCHFINDER_HASH_ORDER 15
51
+ #define HT_MATCHFINDER_BUCKET_SIZE 2
52
+
53
+ #define HT_MATCHFINDER_MIN_MATCH_LEN 4
54
+ /* Minimum value of max_len for ht_matchfinder_longest_match() */
55
+ #define HT_MATCHFINDER_REQUIRED_NBYTES 5
56
+
57
+ struct MATCHFINDER_ALIGNED ht_matchfinder {
58
+ mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
59
+ [HT_MATCHFINDER_BUCKET_SIZE];
60
+ };
61
+
62
+ static forceinline void
63
+ ht_matchfinder_init(struct ht_matchfinder *mf)
64
+ {
65
+ STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
66
+
67
+ matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
68
+ }
69
+
70
+ static forceinline void
71
+ ht_matchfinder_slide_window(struct ht_matchfinder *mf)
72
+ {
73
+ matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
74
+ }
75
+
76
+ /* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
77
+ static forceinline u32
78
+ ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
79
+ const u8 ** const in_base_p,
80
+ const u8 * const in_next,
81
+ const u32 max_len,
82
+ const u32 nice_len,
83
+ u32 * const next_hash,
84
+ u32 * const offset_ret)
85
+ {
86
+ u32 best_len = 0;
87
+ const u8 *best_matchptr = in_next;
88
+ u32 cur_pos = in_next - *in_base_p;
89
+ const u8 *in_base;
90
+ mf_pos_t cutoff;
91
+ u32 hash;
92
+ u32 seq;
93
+ mf_pos_t cur_node;
94
+ const u8 *matchptr;
95
+ #if HT_MATCHFINDER_BUCKET_SIZE > 1
96
+ mf_pos_t to_insert;
97
+ u32 len;
98
+ #endif
99
+ #if HT_MATCHFINDER_BUCKET_SIZE > 2
100
+ int i;
101
+ #endif
102
+
103
+ /* This is assumed throughout this function. */
104
+ STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
105
+
106
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
107
+ ht_matchfinder_slide_window(mf);
108
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
109
+ cur_pos = 0;
110
+ }
111
+ in_base = *in_base_p;
112
+ cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
113
+
114
+ hash = *next_hash;
115
+ STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
116
+ *next_hash = lz_hash(get_unaligned_le32(in_next + 1),
117
+ HT_MATCHFINDER_HASH_ORDER);
118
+ seq = load_u32_unaligned(in_next);
119
+ prefetchw(&mf->hash_tab[*next_hash]);
120
+ #if HT_MATCHFINDER_BUCKET_SIZE == 1
121
+ /* Hand-unrolled version for BUCKET_SIZE == 1 */
122
+ cur_node = mf->hash_tab[hash][0];
123
+ mf->hash_tab[hash][0] = cur_pos;
124
+ if (cur_node <= cutoff)
125
+ goto out;
126
+ matchptr = &in_base[cur_node];
127
+ if (load_u32_unaligned(matchptr) == seq) {
128
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
129
+ best_matchptr = matchptr;
130
+ }
131
+ #elif HT_MATCHFINDER_BUCKET_SIZE == 2
132
+ /*
133
+ * Hand-unrolled version for BUCKET_SIZE == 2. The logic here also
134
+ * differs slightly in that it copies the first entry to the second even
135
+ * if nice_len is reached on the first, as this can be slightly faster.
136
+ */
137
+ cur_node = mf->hash_tab[hash][0];
138
+ mf->hash_tab[hash][0] = cur_pos;
139
+ if (cur_node <= cutoff)
140
+ goto out;
141
+ matchptr = &in_base[cur_node];
142
+
143
+ to_insert = cur_node;
144
+ cur_node = mf->hash_tab[hash][1];
145
+ mf->hash_tab[hash][1] = to_insert;
146
+
147
+ if (load_u32_unaligned(matchptr) == seq) {
148
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
149
+ best_matchptr = matchptr;
150
+ if (cur_node <= cutoff || best_len >= nice_len)
151
+ goto out;
152
+ matchptr = &in_base[cur_node];
153
+ if (load_u32_unaligned(matchptr) == seq &&
154
+ load_u32_unaligned(matchptr + best_len - 3) ==
155
+ load_u32_unaligned(in_next + best_len - 3)) {
156
+ len = lz_extend(in_next, matchptr, 4, max_len);
157
+ if (len > best_len) {
158
+ best_len = len;
159
+ best_matchptr = matchptr;
160
+ }
161
+ }
162
+ } else {
163
+ if (cur_node <= cutoff)
164
+ goto out;
165
+ matchptr = &in_base[cur_node];
166
+ if (load_u32_unaligned(matchptr) == seq) {
167
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
168
+ best_matchptr = matchptr;
169
+ }
170
+ }
171
+ #else
172
+ /* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
173
+ to_insert = cur_pos;
174
+ for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
175
+ cur_node = mf->hash_tab[hash][i];
176
+ mf->hash_tab[hash][i] = to_insert;
177
+ if (cur_node <= cutoff)
178
+ goto out;
179
+ matchptr = &in_base[cur_node];
180
+ if (load_u32_unaligned(matchptr) == seq) {
181
+ len = lz_extend(in_next, matchptr, 4, max_len);
182
+ if (len > best_len) {
183
+ best_len = len;
184
+ best_matchptr = matchptr;
185
+ if (best_len >= nice_len)
186
+ goto out;
187
+ }
188
+ }
189
+ to_insert = cur_node;
190
+ }
191
+ #endif
192
+ out:
193
+ *offset_ret = in_next - best_matchptr;
194
+ return best_len;
195
+ }
196
+
197
+ static forceinline void
198
+ ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
199
+ const u8 ** const in_base_p,
200
+ const u8 *in_next,
201
+ const u8 * const in_end,
202
+ const u32 count,
203
+ u32 * const next_hash)
204
+ {
205
+ s32 cur_pos = in_next - *in_base_p;
206
+ u32 hash;
207
+ u32 remaining = count;
208
+ int i;
209
+
210
+ if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
211
+ return;
212
+
213
+ if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
214
+ ht_matchfinder_slide_window(mf);
215
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
216
+ cur_pos -= MATCHFINDER_WINDOW_SIZE;
217
+ }
218
+
219
+ hash = *next_hash;
220
+ do {
221
+ for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
222
+ mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
223
+ mf->hash_tab[hash][0] = cur_pos;
224
+
225
+ hash = lz_hash(get_unaligned_le32(++in_next),
226
+ HT_MATCHFINDER_HASH_ORDER);
227
+ cur_pos++;
228
+ } while (--remaining);
229
+
230
+ prefetchw(&mf->hash_tab[hash]);
231
+ *next_hash = hash;
232
+ }
233
+
234
+ #endif /* LIB_HT_MATCHFINDER_H */
@@ -0,0 +1,106 @@
1
+ /*
2
+ * lib_common.h - internal header included by all library code
3
+ */
4
+
5
+ #ifndef LIB_LIB_COMMON_H
6
+ #define LIB_LIB_COMMON_H
7
+
8
+ #ifdef LIBDEFLATE_H
9
+ /*
10
+ * When building the library, LIBDEFLATEAPI needs to be defined properly before
11
+ * including libdeflate.h.
12
+ */
13
+ # error "lib_common.h must always be included before libdeflate.h"
14
+ #endif
15
+
16
+ #if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
17
+ # define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
18
+ #elif defined(__GNUC__)
19
+ # define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
20
+ #else
21
+ # define LIBDEFLATE_EXPORT_SYM
22
+ #endif
23
+
24
+ /*
25
+ * On i386, gcc assumes that the stack is 16-byte aligned at function entry.
26
+ * However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
27
+ * only guarantee 4-byte alignment when calling functions. This is mainly an
28
+ * issue on Windows, but it has been seen on Linux too. Work around this ABI
29
+ * incompatibility by realigning the stack pointer when entering libdeflate.
30
+ * This prevents crashes in SSE/AVX code.
31
+ */
32
+ #if defined(__GNUC__) && defined(__i386__)
33
+ # define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
34
+ #else
35
+ # define LIBDEFLATE_ALIGN_STACK
36
+ #endif
37
+
38
+ #define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
39
+
40
+ #include "../common_defs.h"
41
+
42
+ typedef void *(*malloc_func_t)(size_t);
43
+ typedef void (*free_func_t)(void *);
44
+
45
+ extern malloc_func_t libdeflate_default_malloc_func;
46
+ extern free_func_t libdeflate_default_free_func;
47
+
48
+ void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
49
+ size_t alignment, size_t size);
50
+ void libdeflate_aligned_free(free_func_t free_func, void *ptr);
51
+
52
+ #ifdef FREESTANDING
53
+ /*
54
+ * With -ffreestanding, <string.h> may be missing, and we must provide
55
+ * implementations of memset(), memcpy(), memmove(), and memcmp().
56
+ * See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
57
+ *
58
+ * Also, -ffreestanding disables interpreting calls to these functions as
59
+ * built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
60
+ * not be optimized to a single load instruction. For performance reasons we
61
+ * don't want that. So, declare these functions as macros that expand to the
62
+ * corresponding built-ins. This approach is recommended in the gcc man page.
63
+ * We still need the actual function definitions in case gcc calls them.
64
+ */
65
+ void *memset(void *s, int c, size_t n);
66
+ #define memset(s, c, n) __builtin_memset((s), (c), (n))
67
+
68
+ void *memcpy(void *dest, const void *src, size_t n);
69
+ #define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
70
+
71
+ void *memmove(void *dest, const void *src, size_t n);
72
+ #define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
73
+
74
+ int memcmp(const void *s1, const void *s2, size_t n);
75
+ #define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
76
+
77
+ #undef LIBDEFLATE_ENABLE_ASSERTIONS
78
+ #else
79
+ # include <string.h>
80
+ /*
81
+ * To prevent false positive static analyzer warnings, ensure that assertions
82
+ * are visible to the static analyzer.
83
+ */
84
+ # ifdef __clang_analyzer__
85
+ # define LIBDEFLATE_ENABLE_ASSERTIONS
86
+ # endif
87
+ #endif
88
+
89
+ /*
90
+ * Runtime assertion support. Don't enable this in production builds; it may
91
+ * hurt performance significantly.
92
+ */
93
+ #ifdef LIBDEFLATE_ENABLE_ASSERTIONS
94
+ NORETURN void
95
+ libdeflate_assertion_failed(const char *expr, const char *file, int line);
96
+ #define ASSERT(expr) { if (unlikely(!(expr))) \
97
+ libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
98
+ #else
99
+ #define ASSERT(expr) (void)(expr)
100
+ #endif
101
+
102
+ #define CONCAT_IMPL(a, b) a##b
103
+ #define CONCAT(a, b) CONCAT_IMPL(a, b)
104
+ #define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
105
+
106
+ #endif /* LIB_LIB_COMMON_H */
@@ -13,8 +13,8 @@ extern "C" {
13
13
  #endif
14
14
 
15
15
  #define LIBDEFLATE_VERSION_MAJOR 1
16
- #define LIBDEFLATE_VERSION_MINOR 21
17
- #define LIBDEFLATE_VERSION_STRING "1.21"
16
+ #define LIBDEFLATE_VERSION_MINOR 25
17
+ #define LIBDEFLATE_VERSION_STRING "1.25"
18
18
 
19
19
  /*
20
20
  * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause
@@ -175,11 +175,11 @@ lz_hash(u32 seq, unsigned num_bits)
175
175
  * Return the number of bytes at @matchptr that match the bytes at @strptr, up
176
176
  * to a maximum of @max_len. Initially, @start_len bytes are matched.
177
177
  */
178
- static forceinline unsigned
178
+ static forceinline u32
179
179
  lz_extend(const u8 * const strptr, const u8 * const matchptr,
180
- const unsigned start_len, const unsigned max_len)
180
+ const u32 start_len, const u32 max_len)
181
181
  {
182
- unsigned len = start_len;
182
+ u32 len = start_len;
183
183
  machine_word_t v_word;
184
184
 
185
185
  if (UNALIGNED_ACCESS_IS_FAST) {
@@ -0,0 +1,135 @@
1
+ /*
2
+ * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
3
+ *
4
+ * Copyright 2016 Eric Biggers
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person
7
+ * obtaining a copy of this software and associated documentation
8
+ * files (the "Software"), to deal in the Software without
9
+ * restriction, including without limitation the rights to use,
10
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following
13
+ * conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be
16
+ * included in all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ */
27
+
28
+ #ifndef LIB_X86_ADLER32_IMPL_H
29
+ #define LIB_X86_ADLER32_IMPL_H
30
+
31
+ #include "cpu_features.h"
32
+
33
+ /* SSE2 and AVX2 implementations. Used on older CPUs. */
34
+ #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
35
+ # define adler32_x86_sse2 adler32_x86_sse2
36
+ # define SUFFIX _sse2
37
+ # define ATTRIBUTES _target_attribute("sse2")
38
+ # define VL 16
39
+ # define USE_VNNI 0
40
+ # define USE_AVX512 0
41
+ # include "adler32_template.h"
42
+
43
+ # define adler32_x86_avx2 adler32_x86_avx2
44
+ # define SUFFIX _avx2
45
+ # define ATTRIBUTES _target_attribute("avx2")
46
+ # define VL 32
47
+ # define USE_VNNI 0
48
+ # define USE_AVX512 0
49
+ # include "adler32_template.h"
50
+ #endif
51
+
52
+ /*
53
+ * AVX-VNNI implementation. This is used on CPUs that have AVX2 and AVX-VNNI
54
+ * but don't have AVX-512, for example Intel Alder Lake.
55
+ *
56
+ * Unusually for a new CPU feature, gcc added support for the AVX-VNNI
57
+ * intrinsics (in gcc 11.1) slightly before binutils added support for
58
+ * assembling AVX-VNNI instructions (in binutils 2.36). Distros can reasonably
59
+ * have gcc 11 with binutils 2.35. Because of this issue, we check for gcc 12
60
+ * instead of gcc 11. (libdeflate supports direct compilation without a
61
+ * configure step, so checking the binutils version is not always an option.)
62
+ */
63
+ #if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
64
+ !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
65
+ # define adler32_x86_avx2_vnni adler32_x86_avx2_vnni
66
+ # define SUFFIX _avx2_vnni
67
+ # define ATTRIBUTES _target_attribute("avx2,avxvnni")
68
+ # define VL 32
69
+ # define USE_VNNI 1
70
+ # define USE_AVX512 0
71
+ # include "adler32_template.h"
72
+ #endif
73
+
74
+ #if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
75
+ !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
76
+ /*
77
+ * AVX512VNNI implementation using 256-bit vectors. This is very similar to the
78
+ * AVX-VNNI implementation but takes advantage of masking and more registers.
79
+ * This is used on certain older Intel CPUs, specifically Ice Lake and Tiger
80
+ * Lake, which support AVX512VNNI but downclock a bit too eagerly when ZMM
81
+ * registers are used.
82
+ */
83
+ # define adler32_x86_avx512_vl256_vnni adler32_x86_avx512_vl256_vnni
84
+ # define SUFFIX _avx512_vl256_vnni
85
+ # define ATTRIBUTES _target_attribute("avx512bw,avx512vl,avx512vnni")
86
+ # define VL 32
87
+ # define USE_VNNI 1
88
+ # define USE_AVX512 1
89
+ # include "adler32_template.h"
90
+
91
+ /*
92
+ * AVX512VNNI implementation using 512-bit vectors. This is used on CPUs that
93
+ * have a good AVX-512 implementation including AVX512VNNI.
94
+ */
95
+ # define adler32_x86_avx512_vl512_vnni adler32_x86_avx512_vl512_vnni
96
+ # define SUFFIX _avx512_vl512_vnni
97
+ # define ATTRIBUTES _target_attribute("avx512bw,avx512vnni")
98
+ # define VL 64
99
+ # define USE_VNNI 1
100
+ # define USE_AVX512 1
101
+ # include "adler32_template.h"
102
+ #endif
103
+
104
+ static inline adler32_func_t
105
+ arch_select_adler32_func(void)
106
+ {
107
+ const u32 features MAYBE_UNUSED = get_x86_cpu_features();
108
+
109
+ #ifdef adler32_x86_avx512_vl512_vnni
110
+ if ((features & X86_CPU_FEATURE_ZMM) &&
111
+ HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
112
+ return adler32_x86_avx512_vl512_vnni;
113
+ #endif
114
+ #ifdef adler32_x86_avx512_vl256_vnni
115
+ if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
116
+ HAVE_AVX512VNNI(features))
117
+ return adler32_x86_avx512_vl256_vnni;
118
+ #endif
119
+ #ifdef adler32_x86_avx2_vnni
120
+ if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
121
+ return adler32_x86_avx2_vnni;
122
+ #endif
123
+ #ifdef adler32_x86_avx2
124
+ if (HAVE_AVX2(features))
125
+ return adler32_x86_avx2;
126
+ #endif
127
+ #ifdef adler32_x86_sse2
128
+ if (HAVE_SSE2(features))
129
+ return adler32_x86_sse2;
130
+ #endif
131
+ return NULL;
132
+ }
133
+ #define arch_select_adler32_func arch_select_adler32_func
134
+
135
+ #endif /* LIB_X86_ADLER32_IMPL_H */