yencode 1.1.0 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +79 -7
- package/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc +1 -1
- package/package.json +1 -1
- package/src/common.h +88 -24
- package/src/crc.cc +59 -27
- package/src/crc.h +20 -6
- package/src/crc_arm.cc +154 -27
- package/src/crc_common.h +3 -10
- package/src/{crc_folding.c → crc_folding.cc} +53 -122
- package/src/crc_folding_256.cc +230 -0
- package/src/decoder.cc +10 -4
- package/src/decoder.h +16 -2
- package/src/decoder_avx2_base.h +32 -21
- package/src/decoder_common.h +2 -2
- package/src/decoder_neon.cc +37 -37
- package/src/decoder_neon64.cc +41 -36
- package/src/decoder_sse_base.h +21 -14
- package/src/decoder_vbmi2.cc +30 -0
- package/src/encoder.cc +9 -3
- package/src/encoder.h +17 -1
- package/src/encoder_avx_base.h +8 -8
- package/src/encoder_common.h +3 -3
- package/src/encoder_neon.cc +31 -31
- package/src/encoder_sse_base.h +7 -8
- package/src/encoder_vbmi2.cc +23 -0
- package/src/platform.cc +57 -8
- package/src/yencode.cc +33 -44
- package/test/testcrc.js +14 -0
package/binding.gyp
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
|
+
"variables": {
|
|
3
|
+
"enable_native_tuning%": 1,
|
|
4
|
+
"disable_avx256%": 0
|
|
5
|
+
},
|
|
2
6
|
"target_defaults": {
|
|
3
|
-
"variables": {
|
|
4
|
-
"enable_native_tuning%": 1,
|
|
5
|
-
"disable_avx256%": 0
|
|
6
|
-
},
|
|
7
7
|
"conditions": [
|
|
8
8
|
['target_arch=="ia32"', {
|
|
9
9
|
"msvs_settings": {"VCCLCompilerTool": {"EnableEnhancedInstructionSet": "2"}}
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
['disable_avx256!=0', {
|
|
42
42
|
"defines": ["YENC_DISABLE_AVX256=1"]
|
|
43
43
|
}],
|
|
44
|
-
['enable_native_tuning!=0', {
|
|
44
|
+
['OS!="win" and enable_native_tuning!=0', {
|
|
45
45
|
"defines": ["YENC_BUILD_NATIVE=1"]
|
|
46
46
|
}]
|
|
47
47
|
],
|
|
@@ -64,7 +64,7 @@
|
|
|
64
64
|
"targets": [
|
|
65
65
|
{
|
|
66
66
|
"target_name": "yencode",
|
|
67
|
-
"dependencies": ["crcutil", "yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_avx", "yencode_avx2", "yencode_neon", "yencode_armcrc"],
|
|
67
|
+
"dependencies": ["crcutil", "yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_clmul256", "yencode_avx", "yencode_avx2", "yencode_vbmi2", "yencode_neon", "yencode_armcrc"],
|
|
68
68
|
"sources": [
|
|
69
69
|
"src/yencode.cc",
|
|
70
70
|
"src/platform.cc",
|
|
@@ -128,7 +128,7 @@
|
|
|
128
128
|
"target_name": "yencode_clmul",
|
|
129
129
|
"type": "static_library",
|
|
130
130
|
"sources": [
|
|
131
|
-
"src/crc_folding.
|
|
131
|
+
"src/crc_folding.cc"
|
|
132
132
|
],
|
|
133
133
|
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
134
134
|
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
@@ -206,6 +206,70 @@
|
|
|
206
206
|
}]
|
|
207
207
|
]
|
|
208
208
|
},
|
|
209
|
+
{
|
|
210
|
+
"target_name": "yencode_clmul256",
|
|
211
|
+
"type": "static_library",
|
|
212
|
+
"sources": [
|
|
213
|
+
"src/crc_folding_256.cc"
|
|
214
|
+
],
|
|
215
|
+
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
216
|
+
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
217
|
+
"xcode_settings": {
|
|
218
|
+
"OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
219
|
+
"OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
|
|
220
|
+
},
|
|
221
|
+
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
222
|
+
"conditions": [
|
|
223
|
+
['target_arch in "ia32 x64" and OS!="win"', {
|
|
224
|
+
"variables": {"supports_vpclmul%": "<!(<!(echo ${CC_target:-${CC:-cc}}) -MM -E src/crc_folding_256.cc -mavx2 -mvpclmulqdq 2>/dev/null || true)"},
|
|
225
|
+
"conditions": [
|
|
226
|
+
['supports_vpclmul!=""', {
|
|
227
|
+
"cflags": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
|
|
228
|
+
"cxxflags": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
|
|
229
|
+
"xcode_settings": {
|
|
230
|
+
"OTHER_CFLAGS": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
|
|
231
|
+
"OTHER_CXXFLAGS": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
|
|
232
|
+
}
|
|
233
|
+
}]
|
|
234
|
+
]
|
|
235
|
+
}],
|
|
236
|
+
['target_arch in "ia32 x64" and OS=="win"', {
|
|
237
|
+
"msvs_settings": {"VCCLCompilerTool": {"EnableEnhancedInstructionSet": "3"}}
|
|
238
|
+
}]
|
|
239
|
+
]
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
"target_name": "yencode_vbmi2",
|
|
243
|
+
"type": "static_library",
|
|
244
|
+
"sources": [
|
|
245
|
+
"src/decoder_vbmi2.cc", "src/encoder_vbmi2.cc"
|
|
246
|
+
],
|
|
247
|
+
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
248
|
+
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
249
|
+
"xcode_settings": {
|
|
250
|
+
"OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
251
|
+
"OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
|
|
252
|
+
},
|
|
253
|
+
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
254
|
+
"conditions": [
|
|
255
|
+
['target_arch in "ia32 x64" and OS!="win"', {
|
|
256
|
+
"variables": {"supports_vbmi2%": "<!(<!(echo ${CC_target:-${CC:-cc}}) -MM -E src/encoder_vbmi2.cc -mavx512vl -mavx512vbmi2 2>/dev/null || true)"},
|
|
257
|
+
"conditions": [
|
|
258
|
+
['supports_vbmi2!=""', {
|
|
259
|
+
"cflags": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
|
|
260
|
+
"cxxflags": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
|
|
261
|
+
"xcode_settings": {
|
|
262
|
+
"OTHER_CFLAGS": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
|
|
263
|
+
"OTHER_CXXFLAGS": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
|
|
264
|
+
}
|
|
265
|
+
}]
|
|
266
|
+
]
|
|
267
|
+
}],
|
|
268
|
+
['target_arch in "ia32 x64" and OS=="win"', {
|
|
269
|
+
"msvs_settings": {"VCCLCompilerTool": {"AdditionalOptions": ["/arch:AVX512"], "EnableEnhancedInstructionSet": "0"}}
|
|
270
|
+
}]
|
|
271
|
+
]
|
|
272
|
+
},
|
|
209
273
|
{
|
|
210
274
|
"target_name": "yencode_neon",
|
|
211
275
|
"type": "static_library",
|
|
@@ -260,6 +324,14 @@
|
|
|
260
324
|
"OTHER_CFLAGS": ["-march=armv8-a+crc"],
|
|
261
325
|
"OTHER_CXXFLAGS": ["-march=armv8-a+crc"],
|
|
262
326
|
}
|
|
327
|
+
}],
|
|
328
|
+
['OS!="win" and target_arch=="arm"', {
|
|
329
|
+
"cflags": ["-mfpu=fp-armv8"],
|
|
330
|
+
"cxxflags": ["-mfpu=fp-armv8"],
|
|
331
|
+
"xcode_settings": {
|
|
332
|
+
"OTHER_CFLAGS": ["-mfpu=fp-armv8"],
|
|
333
|
+
"OTHER_CXXFLAGS": ["-mfpu=fp-armv8"]
|
|
334
|
+
}
|
|
263
335
|
}]
|
|
264
336
|
]
|
|
265
337
|
},
|
package/package.json
CHANGED
package/src/common.h
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
defined(__LP64 ) || \
|
|
9
9
|
defined(_M_X64 ) || \
|
|
10
10
|
defined(_M_AMD64 ) || \
|
|
11
|
-
defined(_WIN64
|
|
11
|
+
(defined(_WIN64) && !defined(_M_ARM64))
|
|
12
12
|
#define PLATFORM_AMD64 1
|
|
13
13
|
#endif
|
|
14
14
|
#if defined(PLATFORM_AMD64) || \
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
defined(__i686__ ) || \
|
|
19
19
|
defined(_M_I86 ) || \
|
|
20
20
|
defined(_M_IX86 ) || \
|
|
21
|
-
defined(_WIN32
|
|
21
|
+
(defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64))
|
|
22
22
|
#define PLATFORM_X86 1
|
|
23
23
|
#endif
|
|
24
24
|
#if defined(__aarch64__) || \
|
|
@@ -35,15 +35,21 @@
|
|
|
35
35
|
#endif
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
#
|
|
39
|
-
|
|
38
|
+
#include <stdlib.h>
|
|
39
|
+
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__)
|
|
40
|
+
// MSVC doesn't support C11 aligned_alloc: https://stackoverflow.com/a/62963007
|
|
41
|
+
#define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = _aligned_malloc((len), align)
|
|
42
|
+
#define ALIGN_FREE _aligned_free
|
|
43
|
+
#elif defined(_ISOC11_SOURCE)
|
|
44
|
+
// C11 method
|
|
40
45
|
// len needs to be a multiple of alignment, although it sometimes works if it isn't...
|
|
41
|
-
#include <cstdlib>
|
|
42
46
|
#define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = aligned_alloc(align, ((len) + (align)-1) & ~((align)-1))
|
|
43
47
|
#define ALIGN_FREE free
|
|
44
|
-
#elif defined(
|
|
45
|
-
|
|
46
|
-
#
|
|
48
|
+
#elif defined(__cplusplus) && __cplusplus >= 201700
|
|
49
|
+
// C++17 method
|
|
50
|
+
#include <cstdlib>
|
|
51
|
+
#define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = std::aligned_alloc(align, ((len) + (align)-1) & ~((align)-1))
|
|
52
|
+
#define ALIGN_FREE free
|
|
47
53
|
#else
|
|
48
54
|
#define ALIGN_ALLOC(buf, len, align) if(posix_memalign((void**)&(buf), align, (len))) (buf) = NULL
|
|
49
55
|
#define ALIGN_FREE free
|
|
@@ -51,7 +57,7 @@
|
|
|
51
57
|
|
|
52
58
|
|
|
53
59
|
// MSVC compatibility
|
|
54
|
-
#if (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(_M_X64)
|
|
60
|
+
#if ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(_M_X64)) && !defined(__clang__)
|
|
55
61
|
#define __SSE2__ 1
|
|
56
62
|
#define __SSSE3__ 1
|
|
57
63
|
#define __SSE4_1__ 1
|
|
@@ -85,12 +91,16 @@
|
|
|
85
91
|
#define __ARM_NEON 1
|
|
86
92
|
#endif
|
|
87
93
|
#if defined(_M_ARM)
|
|
88
|
-
|
|
94
|
+
#define __ARM_NEON 1
|
|
89
95
|
#endif
|
|
90
96
|
#ifdef _MSC_VER
|
|
91
|
-
#
|
|
92
|
-
#define
|
|
93
|
-
#
|
|
97
|
+
# ifndef __BYTE_ORDER__
|
|
98
|
+
# define __BYTE_ORDER__ 1234
|
|
99
|
+
# endif
|
|
100
|
+
# ifndef __ORDER_BIG_ENDIAN__
|
|
101
|
+
# define __ORDER_BIG_ENDIAN__ 4321
|
|
102
|
+
# endif
|
|
103
|
+
# include <intrin.h>
|
|
94
104
|
#endif
|
|
95
105
|
|
|
96
106
|
|
|
@@ -137,6 +147,59 @@
|
|
|
137
147
|
|
|
138
148
|
#ifdef __ARM_NEON
|
|
139
149
|
# include <arm_neon.h>
|
|
150
|
+
|
|
151
|
+
// ARM provides no standard way to inline define a vector :(
|
|
152
|
+
static HEDLEY_ALWAYS_INLINE uint8x8_t vmake_u8(
|
|
153
|
+
uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e, uint8_t f, uint8_t g, uint8_t h
|
|
154
|
+
) {
|
|
155
|
+
# if defined(_MSC_VER)
|
|
156
|
+
uint8_t t[] = {a,b,c,d,e,f,g,h};
|
|
157
|
+
return vld1_u8(t);
|
|
158
|
+
# else
|
|
159
|
+
return (uint8x8_t){a,b,c,d,e,f,g,h};
|
|
160
|
+
# endif
|
|
161
|
+
}
|
|
162
|
+
static HEDLEY_ALWAYS_INLINE uint8x16_t vmakeq_u8(
|
|
163
|
+
uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e, uint8_t f, uint8_t g, uint8_t h,
|
|
164
|
+
uint8_t i, uint8_t j, uint8_t k, uint8_t l, uint8_t m, uint8_t n, uint8_t o, uint8_t p
|
|
165
|
+
) {
|
|
166
|
+
# if defined(_MSC_VER)
|
|
167
|
+
uint8_t t[] = {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p};
|
|
168
|
+
return vld1q_u8(t);
|
|
169
|
+
# else
|
|
170
|
+
return (uint8x16_t){a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p};
|
|
171
|
+
# endif
|
|
172
|
+
}
|
|
173
|
+
static HEDLEY_ALWAYS_INLINE int8x16_t vmakeq_s8(
|
|
174
|
+
int8_t a, int8_t b, int8_t c, int8_t d, int8_t e, int8_t f, int8_t g, int8_t h,
|
|
175
|
+
int8_t i, int8_t j, int8_t k, int8_t l, int8_t m, int8_t n, int8_t o, int8_t p
|
|
176
|
+
) {
|
|
177
|
+
# if defined(_MSC_VER)
|
|
178
|
+
int8_t t[] = {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p};
|
|
179
|
+
return vld1q_s8(t);
|
|
180
|
+
# else
|
|
181
|
+
return (int8x16_t){a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p};
|
|
182
|
+
# endif
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
# ifdef _MSC_VER
|
|
186
|
+
# define _CREATE_TUPLE(type, ...) type{{ __VA_ARGS__ }}
|
|
187
|
+
# else
|
|
188
|
+
# define _CREATE_TUPLE(type, ...) (type){{ __VA_ARGS__ }}
|
|
189
|
+
# endif
|
|
190
|
+
static HEDLEY_ALWAYS_INLINE uint8x16x2_t vcreate2_u8(uint8x16_t a, uint8x16_t b) {
|
|
191
|
+
return _CREATE_TUPLE(uint8x16x2_t, a, b);
|
|
192
|
+
}
|
|
193
|
+
static HEDLEY_ALWAYS_INLINE int8x16x2_t vcreate2_s8(int8x16_t a, int8x16_t b) {
|
|
194
|
+
return _CREATE_TUPLE(int8x16x2_t, a, b);
|
|
195
|
+
}
|
|
196
|
+
static HEDLEY_ALWAYS_INLINE uint8x16x3_t vcreate3_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
|
|
197
|
+
return _CREATE_TUPLE(uint8x16x3_t, a, b, c);
|
|
198
|
+
}
|
|
199
|
+
static HEDLEY_ALWAYS_INLINE uint8x16x4_t vcreate4_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c, uint8x16_t d) {
|
|
200
|
+
return _CREATE_TUPLE(uint8x16x4_t, a, b, c, d);
|
|
201
|
+
}
|
|
202
|
+
# undef _CREATE_TUPLE
|
|
140
203
|
#endif
|
|
141
204
|
#ifdef PLATFORM_ARM
|
|
142
205
|
bool cpu_supports_neon();
|
|
@@ -158,9 +221,9 @@ enum YEncDecIsaLevel {
|
|
|
158
221
|
ISA_LEVEL_SSE41 = 0x300,
|
|
159
222
|
ISA_LEVEL_SSE4_POPCNT = 0x301,
|
|
160
223
|
ISA_LEVEL_AVX = 0x381, // same as above, just used as a differentiator for `cpu_supports_isa`
|
|
161
|
-
ISA_LEVEL_AVX2 =
|
|
162
|
-
ISA_LEVEL_AVX3 =
|
|
163
|
-
ISA_LEVEL_VBMI2 =
|
|
224
|
+
ISA_LEVEL_AVX2 = 0x403, // also includes BMI1/2 and LZCNT
|
|
225
|
+
ISA_LEVEL_AVX3 = 0x503, // SKX variant; AVX512VL + AVX512BW
|
|
226
|
+
ISA_LEVEL_VBMI2 = 0x603 // ICL
|
|
164
227
|
};
|
|
165
228
|
#ifdef _MSC_VER
|
|
166
229
|
// native tuning not supported in MSVC
|
|
@@ -190,13 +253,6 @@ enum YEncDecIsaLevel {
|
|
|
190
253
|
# endif
|
|
191
254
|
#endif
|
|
192
255
|
|
|
193
|
-
#ifdef _MSC_VER
|
|
194
|
-
# define _cpuid1(ar) __cpuid(ar, 1)
|
|
195
|
-
#else
|
|
196
|
-
# include <cpuid.h>
|
|
197
|
-
# define _cpuid1(ar) __cpuid(1, ar[0], ar[1], ar[2], ar[3])
|
|
198
|
-
#endif
|
|
199
|
-
|
|
200
256
|
int cpu_supports_isa();
|
|
201
257
|
#endif // PLATFORM_X86
|
|
202
258
|
|
|
@@ -211,7 +267,7 @@ int cpu_supports_isa();
|
|
|
211
267
|
|
|
212
268
|
|
|
213
269
|
// GCC 8/9/10(dev) fails to optimize cases where KNOT should be used, so use intrinsic explicitly; Clang 6+ has no issue, but Clang 6/7 doesn't have the intrinsic; MSVC 2019 also fails and lacks the intrinsic
|
|
214
|
-
#if defined(__GNUC__) && __GNUC__ >= 7
|
|
270
|
+
#if (defined(__GNUC__) && __GNUC__ >= 7) || (defined(_MSC_VER) && _MSC_VER >= 1924)
|
|
215
271
|
# define KNOT16 _knot_mask16
|
|
216
272
|
# define KNOT32 _knot_mask32
|
|
217
273
|
#else
|
|
@@ -219,6 +275,14 @@ int cpu_supports_isa();
|
|
|
219
275
|
# define KNOT32(x) ((__mmask32)~(x))
|
|
220
276
|
#endif
|
|
221
277
|
|
|
278
|
+
// weird thing with Apple's Clang; doesn't seem to always occur, so assume that Clang >= 9 is fine: https://github.com/animetosho/node-yencode/issues/8#issuecomment-583385864
|
|
279
|
+
// seems that Clang < 3.6 also uses the old name
|
|
280
|
+
#if defined(__clang__) && ((defined(__APPLE__) && __clang_major__ < 9) || __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 6))
|
|
281
|
+
# define _lzcnt_u32 __lzcnt32
|
|
282
|
+
#endif
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
|
|
222
286
|
#ifdef __GNUC__
|
|
223
287
|
# if __GNUC__ >= 9
|
|
224
288
|
# define LIKELIHOOD(p, c) (HEDLEY_PREDICT(!!(c), 1, p))
|
package/src/crc.cc
CHANGED
|
@@ -1,54 +1,81 @@
|
|
|
1
|
-
#include "common.h"
|
|
2
1
|
#include "crc_common.h"
|
|
3
2
|
|
|
4
|
-
|
|
5
3
|
#include "interface.h"
|
|
6
4
|
crcutil_interface::CRC* crc = NULL;
|
|
7
5
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
crcutil_interface::UINT64 tmp = 0;
|
|
11
|
-
crc->Compute(data, length, &tmp);
|
|
12
|
-
UNPACK_4(out, tmp);
|
|
13
|
-
}
|
|
14
|
-
crc_func _do_crc32 = &do_crc32_generic;
|
|
15
|
-
|
|
16
|
-
static void do_crc32_incremental_generic(const void* data, size_t length, unsigned char init[4]) {
|
|
17
|
-
crcutil_interface::UINT64 tmp = PACK_4(init);
|
|
6
|
+
static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) {
|
|
7
|
+
crcutil_interface::UINT64 tmp = init;
|
|
18
8
|
crc->Compute(data, length, &tmp);
|
|
19
|
-
|
|
9
|
+
return (uint32_t)tmp;
|
|
20
10
|
}
|
|
21
11
|
crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
|
|
22
12
|
|
|
23
13
|
|
|
24
14
|
|
|
25
|
-
|
|
26
|
-
crcutil_interface::UINT64 crc1_ =
|
|
15
|
+
uint32_t do_crc32_combine(uint32_t crc1, uint32_t crc2, size_t len2) {
|
|
16
|
+
crcutil_interface::UINT64 crc1_ = crc1, crc2_ = crc2;
|
|
27
17
|
crc->Concatenate(crc2_, 0, len2, &crc1_);
|
|
28
|
-
|
|
18
|
+
return (uint32_t)crc1_;
|
|
29
19
|
}
|
|
30
20
|
|
|
31
|
-
|
|
32
|
-
crcutil_interface::UINT64 crc_ =
|
|
21
|
+
uint32_t do_crc32_zeros(uint32_t crc1, size_t len) {
|
|
22
|
+
crcutil_interface::UINT64 crc_ = crc1;
|
|
33
23
|
crc->CrcOfZeroes(len, &crc_);
|
|
34
|
-
|
|
24
|
+
return (uint32_t)crc_;
|
|
35
25
|
}
|
|
36
26
|
|
|
37
|
-
|
|
38
|
-
void
|
|
27
|
+
void crc_clmul_set_funcs(crc_func*);
|
|
28
|
+
void crc_clmul256_set_funcs(crc_func*);
|
|
29
|
+
void crc_arm_set_funcs(crc_func*);
|
|
30
|
+
|
|
31
|
+
#ifdef PLATFORM_X86
|
|
32
|
+
int cpu_supports_crc_isa();
|
|
33
|
+
#endif
|
|
39
34
|
|
|
35
|
+
#if defined(PLATFORM_ARM) && defined(_WIN32)
|
|
36
|
+
# define WIN32_LEAN_AND_MEAN
|
|
37
|
+
# include <Windows.h>
|
|
38
|
+
#endif
|
|
39
|
+
#ifdef PLATFORM_ARM
|
|
40
|
+
# ifdef __ANDROID__
|
|
41
|
+
# include <cpu-features.h>
|
|
42
|
+
# elif defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
|
|
43
|
+
# include <sys/auxv.h>
|
|
44
|
+
# include <asm/hwcap.h>
|
|
45
|
+
# elif (defined(__FreeBSD__) && __FreeBSD__ < 12)
|
|
46
|
+
# include <sys/sysctl.h>
|
|
47
|
+
# include <asm/hwcap.h>
|
|
48
|
+
# elif defined(__APPLE__)
|
|
49
|
+
# include <sys/types.h>
|
|
50
|
+
# include <sys/sysctl.h>
|
|
51
|
+
# endif
|
|
52
|
+
# ifdef __FreeBSD__
|
|
53
|
+
static unsigned long getauxval(unsigned long cap) {
|
|
54
|
+
unsigned long ret;
|
|
55
|
+
elf_aux_info(cap, &ret, sizeof(ret));
|
|
56
|
+
return ret;
|
|
57
|
+
}
|
|
58
|
+
# endif
|
|
59
|
+
#endif
|
|
40
60
|
void crc_init() {
|
|
41
61
|
crc = crcutil_interface::CRC::Create(
|
|
42
62
|
0xEDB88320, 0, 32, true, 0, 0, 0, 0, NULL);
|
|
43
63
|
// instance never deleted... oh well...
|
|
44
64
|
|
|
45
65
|
#ifdef PLATFORM_X86
|
|
46
|
-
int
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
66
|
+
int support = cpu_supports_crc_isa();
|
|
67
|
+
if(support == 2)
|
|
68
|
+
crc_clmul256_set_funcs(&_do_crc32_incremental);
|
|
69
|
+
else if(support == 1)
|
|
70
|
+
crc_clmul_set_funcs(&_do_crc32_incremental);
|
|
50
71
|
#endif
|
|
51
72
|
#ifdef PLATFORM_ARM
|
|
73
|
+
# ifdef __APPLE__
|
|
74
|
+
int supported = 0;
|
|
75
|
+
size_t len = sizeof(supported);
|
|
76
|
+
if(sysctlbyname("hw.optional.armv8_crc32", &supported, &len, NULL, 0))
|
|
77
|
+
supported = 0;
|
|
78
|
+
# endif
|
|
52
79
|
if(
|
|
53
80
|
# if defined(AT_HWCAP2) && defined(HWCAP2_CRC32)
|
|
54
81
|
getauxval(AT_HWCAP2) & HWCAP2_CRC32
|
|
@@ -56,14 +83,19 @@ void crc_init() {
|
|
|
56
83
|
getauxval(AT_HWCAP) & HWCAP_CRC32
|
|
57
84
|
# elif defined(ANDROID_CPU_FAMILY_ARM) && defined(__aarch64__)
|
|
58
85
|
android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_CRC32
|
|
59
|
-
|
|
86
|
+
# elif defined(ANDROID_CPU_FAMILY_ARM) /* aarch32 */
|
|
87
|
+
android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_CRC32
|
|
88
|
+
# elif defined(_WIN32)
|
|
89
|
+
IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)
|
|
90
|
+
# elif defined(__APPLE__)
|
|
91
|
+
supported
|
|
60
92
|
# elif defined(__ARM_FEATURE_CRC32)
|
|
61
93
|
true /* assume available if compiled as such */
|
|
62
94
|
# else
|
|
63
95
|
false
|
|
64
96
|
# endif
|
|
65
97
|
) {
|
|
66
|
-
crc_arm_set_funcs(&
|
|
98
|
+
crc_arm_set_funcs(&_do_crc32_incremental);
|
|
67
99
|
}
|
|
68
100
|
#endif
|
|
69
101
|
}
|
package/src/crc.h
CHANGED
|
@@ -1,9 +1,23 @@
|
|
|
1
|
+
#ifndef __YENC_CRC_H
|
|
2
|
+
#define __YENC_CRC_H
|
|
1
3
|
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
#define do_crc32_incremental (*_do_crc32_incremental)
|
|
4
|
+
#ifdef __cplusplus
|
|
5
|
+
extern "C" {
|
|
6
|
+
#endif
|
|
6
7
|
|
|
7
|
-
|
|
8
|
-
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
typedef uint32_t (*crc_func)(const void*, size_t, uint32_t);
|
|
11
|
+
extern crc_func _do_crc32_incremental;
|
|
12
|
+
#define do_crc32 (*_do_crc32_incremental)
|
|
13
|
+
|
|
14
|
+
uint32_t do_crc32_combine(uint32_t crc1, const uint32_t crc2, size_t len2);
|
|
15
|
+
uint32_t do_crc32_zeros(uint32_t crc1, size_t len);
|
|
9
16
|
void crc_init();
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
#ifdef __cplusplus
|
|
21
|
+
}
|
|
22
|
+
#endif
|
|
23
|
+
#endif
|