yencode 1.1.5 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +130 -189
- package/binding.gyp +115 -6
- package/index.js +2 -0
- package/package.json +1 -1
- package/src/common.h +37 -7
- package/src/crc.cc +121 -47
- package/src/crc.h +74 -10
- package/src/crc_arm.cc +51 -34
- package/src/crc_arm_pmull.cc +215 -0
- package/src/crc_common.h +22 -0
- package/src/crc_folding.cc +154 -16
- package/src/crc_folding_256.cc +7 -14
- package/src/crc_riscv.cc +251 -0
- package/src/decoder.cc +373 -13
- package/src/decoder.h +10 -14
- package/src/decoder_avx.cc +5 -6
- package/src/decoder_avx2.cc +8 -9
- package/src/decoder_avx2_base.h +7 -11
- package/src/decoder_common.h +56 -373
- package/src/decoder_neon.cc +13 -19
- package/src/decoder_neon64.cc +12 -15
- package/src/decoder_rvv.cc +280 -0
- package/src/decoder_sse2.cc +26 -5
- package/src/decoder_sse_base.h +20 -40
- package/src/decoder_ssse3.cc +5 -6
- package/src/decoder_vbmi2.cc +6 -13
- package/src/encoder.cc +42 -26
- package/src/encoder.h +5 -7
- package/src/encoder_avx.cc +3 -3
- package/src/encoder_avx2.cc +3 -3
- package/src/encoder_avx_base.h +3 -0
- package/src/encoder_common.h +26 -32
- package/src/encoder_neon.cc +6 -3
- package/src/encoder_rvv.cc +13 -26
- package/src/encoder_sse2.cc +3 -2
- package/src/encoder_sse_base.h +2 -0
- package/src/encoder_ssse3.cc +3 -3
- package/src/encoder_vbmi2.cc +6 -7
- package/src/platform.cc +24 -23
- package/src/yencode.cc +54 -11
- package/test/_speedbase.js +4 -2
- package/test/speeddec.js +25 -16
- package/test/speedenc.js +21 -17
- package/test/testcrc.js +17 -1
- package/test/testcrcfuncs.c +53 -0
- package/test/testdec.js +1 -0
package/binding.gyp
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"variables": {
|
|
3
3
|
"enable_native_tuning%": 1,
|
|
4
|
-
"disable_avx256%": 0
|
|
4
|
+
"disable_avx256%": 0,
|
|
5
|
+
"disable_crcutil%": 0
|
|
5
6
|
},
|
|
6
7
|
"target_defaults": {
|
|
7
8
|
"conditions": [
|
|
@@ -41,6 +42,9 @@
|
|
|
41
42
|
['disable_avx256!=0', {
|
|
42
43
|
"defines": ["YENC_DISABLE_AVX256=1"]
|
|
43
44
|
}],
|
|
45
|
+
['disable_crcutil!=0', {
|
|
46
|
+
"defines": ["YENC_DISABLE_CRCUTIL=1"]
|
|
47
|
+
}],
|
|
44
48
|
['OS!="win" and enable_native_tuning!=0', {
|
|
45
49
|
"defines": ["YENC_BUILD_NATIVE=1"]
|
|
46
50
|
}],
|
|
@@ -74,7 +78,7 @@
|
|
|
74
78
|
"targets": [
|
|
75
79
|
{
|
|
76
80
|
"target_name": "yencode",
|
|
77
|
-
"dependencies": ["
|
|
81
|
+
"dependencies": ["yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_clmul256", "yencode_avx", "yencode_avx2", "yencode_vbmi2", "yencode_neon", "yencode_armcrc", "yencode_pmull", "yencode_rvv", "yencode_zbkc"],
|
|
78
82
|
"sources": [
|
|
79
83
|
"src/yencode.cc",
|
|
80
84
|
"src/platform.cc",
|
|
@@ -82,7 +86,12 @@
|
|
|
82
86
|
"src/decoder.cc",
|
|
83
87
|
"src/crc.cc"
|
|
84
88
|
],
|
|
85
|
-
"
|
|
89
|
+
"conditions": [
|
|
90
|
+
['target_arch in "ia32 x64" and disable_crcutil==0', {
|
|
91
|
+
"dependencies": ["crcutil"],
|
|
92
|
+
"include_dirs": ["crcutil-1.0/code","crcutil-1.0/examples"]
|
|
93
|
+
}]
|
|
94
|
+
]
|
|
86
95
|
},
|
|
87
96
|
{
|
|
88
97
|
"target_name": "yencode_sse2",
|
|
@@ -324,7 +333,8 @@
|
|
|
324
333
|
"target_name": "yencode_rvv",
|
|
325
334
|
"type": "static_library",
|
|
326
335
|
"sources": [
|
|
327
|
-
"src/encoder_rvv.cc"
|
|
336
|
+
"src/encoder_rvv.cc",
|
|
337
|
+
"src/decoder_rvv.cc"
|
|
328
338
|
],
|
|
329
339
|
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
330
340
|
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
@@ -338,9 +348,13 @@
|
|
|
338
348
|
"variables": {"supports_rvv%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_rvv.cc -march=rv64gcv 2>/dev/null || true)"},
|
|
339
349
|
"conditions": [
|
|
340
350
|
['supports_rvv!=""', {
|
|
351
|
+
"cflags!": ["-march=native"],
|
|
352
|
+
"cxxflags!": ["-march=native"],
|
|
341
353
|
"cflags": ["-march=rv64gcv"],
|
|
342
354
|
"cxxflags": ["-march=rv64gcv"],
|
|
343
355
|
"xcode_settings": {
|
|
356
|
+
"OTHER_CFLAGS!": ["-march=native"],
|
|
357
|
+
"OTHER_CXXFLAGS!": ["-march=native"],
|
|
344
358
|
"OTHER_CFLAGS": ["-march=rv64gcv"],
|
|
345
359
|
"OTHER_CXXFLAGS": ["-march=rv64gcv"],
|
|
346
360
|
}
|
|
@@ -351,9 +365,13 @@
|
|
|
351
365
|
"variables": {"supports_rvv%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_rvv.cc -march=rv32gcv 2>/dev/null || true)"},
|
|
352
366
|
"conditions": [
|
|
353
367
|
['supports_rvv!=""', {
|
|
368
|
+
"cflags!": ["-march=native"],
|
|
369
|
+
"cxxflags!": ["-march=native"],
|
|
354
370
|
"cflags": ["-march=rv32gcv"],
|
|
355
371
|
"cxxflags": ["-march=rv32gcv"],
|
|
356
372
|
"xcode_settings": {
|
|
373
|
+
"OTHER_CFLAGS!": ["-march=native"],
|
|
374
|
+
"OTHER_CXXFLAGS!": ["-march=native"],
|
|
357
375
|
"OTHER_CFLAGS": ["-march=rv32gcv"],
|
|
358
376
|
"OTHER_CXXFLAGS": ["-march=rv32gcv"],
|
|
359
377
|
}
|
|
@@ -399,8 +417,94 @@
|
|
|
399
417
|
]
|
|
400
418
|
},
|
|
401
419
|
{
|
|
402
|
-
"target_name": "
|
|
420
|
+
"target_name": "yencode_pmull",
|
|
403
421
|
"type": "static_library",
|
|
422
|
+
"sources": [
|
|
423
|
+
"src/crc_arm_pmull.cc"
|
|
424
|
+
],
|
|
425
|
+
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
426
|
+
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
427
|
+
"xcode_settings": {
|
|
428
|
+
"OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
429
|
+
"OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
|
|
430
|
+
},
|
|
431
|
+
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
432
|
+
"conditions": [
|
|
433
|
+
['target_arch in "arm arm64"', {
|
|
434
|
+
"cflags!": ["-march=native"],
|
|
435
|
+
"cxxflags!": ["-march=native"],
|
|
436
|
+
"cflags": ["-march=armv8-a+crc+crypto"],
|
|
437
|
+
"cxxflags": ["-march=armv8-a+crc+crypto"],
|
|
438
|
+
"xcode_settings": {
|
|
439
|
+
"OTHER_CFLAGS!": ["-march=native"],
|
|
440
|
+
"OTHER_CXXFLAGS!": ["-march=native"],
|
|
441
|
+
"OTHER_CFLAGS": ["-march=armv8-a+crc+crypto"],
|
|
442
|
+
"OTHER_CXXFLAGS": ["-march=armv8-a+crc+crypto"],
|
|
443
|
+
}
|
|
444
|
+
}],
|
|
445
|
+
['OS!="win" and target_arch=="arm"', {
|
|
446
|
+
"cflags": ["-mfpu=neon","-fno-lto"],
|
|
447
|
+
"cxxflags": ["-mfpu=neon","-fno-lto"],
|
|
448
|
+
"xcode_settings": {
|
|
449
|
+
"OTHER_CFLAGS": ["-mfpu=neon","-fno-lto"],
|
|
450
|
+
"OTHER_CXXFLAGS": ["-mfpu=neon","-fno-lto"]
|
|
451
|
+
}
|
|
452
|
+
}]
|
|
453
|
+
]
|
|
454
|
+
},
|
|
455
|
+
{
|
|
456
|
+
"target_name": "yencode_zbkc",
|
|
457
|
+
"type": "static_library",
|
|
458
|
+
"sources": [
|
|
459
|
+
"src/crc_riscv.cc"
|
|
460
|
+
],
|
|
461
|
+
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
462
|
+
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
463
|
+
"xcode_settings": {
|
|
464
|
+
"OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
465
|
+
"OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
|
|
466
|
+
},
|
|
467
|
+
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
468
|
+
"conditions": [
|
|
469
|
+
['target_arch=="riscv64" and OS!="win"', {
|
|
470
|
+
"variables": {"supports_zbkc%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/crc_riscv.cc -march=rv64gc_zbkc 2>/dev/null || true)"},
|
|
471
|
+
"conditions": [
|
|
472
|
+
['supports_zbkc!=""', {
|
|
473
|
+
"cflags!": ["-march=native"],
|
|
474
|
+
"cxxflags!": ["-march=native"],
|
|
475
|
+
"cflags": ["-march=rv64gc_zbkc"],
|
|
476
|
+
"cxxflags": ["-march=rv64gc_zbkc"],
|
|
477
|
+
"xcode_settings": {
|
|
478
|
+
"OTHER_CFLAGS!": ["-march=native"],
|
|
479
|
+
"OTHER_CXXFLAGS!": ["-march=native"],
|
|
480
|
+
"OTHER_CFLAGS": ["-march=rv64gc_zbkc"],
|
|
481
|
+
"OTHER_CXXFLAGS": ["-march=rv64gc_zbkc"],
|
|
482
|
+
}
|
|
483
|
+
}]
|
|
484
|
+
]
|
|
485
|
+
}],
|
|
486
|
+
['target_arch=="riscv32" and OS!="win"', {
|
|
487
|
+
"variables": {"supports_zbkc%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/crc_riscv.cc -march=rv32gc_zbkc 2>/dev/null || true)"},
|
|
488
|
+
"conditions": [
|
|
489
|
+
['supports_zbkc!=""', {
|
|
490
|
+
"cflags!": ["-march=native"],
|
|
491
|
+
"cxxflags!": ["-march=native"],
|
|
492
|
+
"cflags": ["-march=rv32gc_zbkc"],
|
|
493
|
+
"cxxflags": ["-march=rv32gc_zbkc"],
|
|
494
|
+
"xcode_settings": {
|
|
495
|
+
"OTHER_CFLAGS!": ["-march=native"],
|
|
496
|
+
"OTHER_CXXFLAGS!": ["-march=native"],
|
|
497
|
+
"OTHER_CFLAGS": ["-march=rv32gc_zbkc"],
|
|
498
|
+
"OTHER_CXXFLAGS": ["-march=rv32gc_zbkc"],
|
|
499
|
+
}
|
|
500
|
+
}]
|
|
501
|
+
]
|
|
502
|
+
}]
|
|
503
|
+
]
|
|
504
|
+
},
|
|
505
|
+
{
|
|
506
|
+
"target_name": "crcutil",
|
|
507
|
+
"type": "none",
|
|
404
508
|
"sources": [
|
|
405
509
|
"crcutil-1.0/code/crc32c_sse4.cc",
|
|
406
510
|
"crcutil-1.0/code/multiword_64_64_cl_i386_mmx.cc",
|
|
@@ -422,7 +526,12 @@
|
|
|
422
526
|
},
|
|
423
527
|
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
424
528
|
"include_dirs": ["crcutil-1.0/code", "crcutil-1.0/tests"],
|
|
425
|
-
"defines": ["CRCUTIL_USE_MM_CRC32=0"]
|
|
529
|
+
"defines": ["CRCUTIL_USE_MM_CRC32=0"],
|
|
530
|
+
"conditions": [
|
|
531
|
+
['target_arch in "ia32 x64" and disable_crcutil==0', {
|
|
532
|
+
"type": "static_library",
|
|
533
|
+
}]
|
|
534
|
+
]
|
|
426
535
|
}
|
|
427
536
|
]
|
|
428
537
|
}
|
package/index.js
CHANGED
|
@@ -190,6 +190,8 @@ module.exports = {
|
|
|
190
190
|
crc32: y.crc32,
|
|
191
191
|
crc32_combine: y.crc32_combine,
|
|
192
192
|
crc32_zeroes: y.crc32_zeroes,
|
|
193
|
+
crc32_multiply: y.crc32_multiply,
|
|
194
|
+
crc32_shift: y.crc32_shift,
|
|
193
195
|
|
|
194
196
|
post: function(filename, data, line_size) {
|
|
195
197
|
if(!line_size) line_size = 128;
|
package/package.json
CHANGED
package/src/common.h
CHANGED
|
@@ -125,7 +125,7 @@
|
|
|
125
125
|
#ifdef __POPCNT__
|
|
126
126
|
#include <nmmintrin.h>
|
|
127
127
|
// POPCNT can never return a negative result, but GCC doesn't seem to realise this, so typecast it to hint it better
|
|
128
|
-
#define popcnt32 (unsigned int)_mm_popcnt_u32
|
|
128
|
+
#define popcnt32 (unsigned int)_mm_popcnt_u32
|
|
129
129
|
#endif
|
|
130
130
|
|
|
131
131
|
#if defined(__AVX2__) || defined(__AVX512F__)
|
|
@@ -209,7 +209,9 @@ static HEDLEY_ALWAYS_INLINE uint8x16x4_t vcreate4_u8(uint8x16_t a, uint8x16_t b,
|
|
|
209
209
|
# undef _CREATE_TUPLE
|
|
210
210
|
#endif
|
|
211
211
|
#ifdef PLATFORM_ARM
|
|
212
|
-
|
|
212
|
+
namespace RapidYenc {
|
|
213
|
+
bool cpu_supports_neon();
|
|
214
|
+
}
|
|
213
215
|
#endif
|
|
214
216
|
|
|
215
217
|
#ifdef _MSC_VER
|
|
@@ -240,11 +242,13 @@ enum YEncDecIsaLevel {
|
|
|
240
242
|
enum YEncDecIsaLevel {
|
|
241
243
|
ISA_GENERIC = 0,
|
|
242
244
|
ISA_FEATURE_CRC = 8,
|
|
245
|
+
ISA_FEATURE_PMULL = 0x40,
|
|
243
246
|
ISA_LEVEL_NEON = 0x1000
|
|
244
247
|
};
|
|
245
248
|
#elif defined(__riscv)
|
|
246
249
|
enum YEncDecIsaLevel {
|
|
247
250
|
ISA_GENERIC = 0,
|
|
251
|
+
ISA_FEATURE_ZBC = 16,
|
|
248
252
|
ISA_LEVEL_RVV = 0x10000
|
|
249
253
|
};
|
|
250
254
|
#else
|
|
@@ -273,7 +277,7 @@ enum YEncDecIsaLevel {
|
|
|
273
277
|
# if defined(__POPCNT__)
|
|
274
278
|
# if defined(__LZCNT__)
|
|
275
279
|
# define ISA_NATIVE (enum YEncDecIsaLevel)(_ISA_NATIVE | ISA_FEATURE_POPCNT | ISA_FEATURE_LZCNT)
|
|
276
|
-
# else
|
|
280
|
+
# else
|
|
277
281
|
# define ISA_NATIVE (enum YEncDecIsaLevel)(_ISA_NATIVE | ISA_FEATURE_POPCNT)
|
|
278
282
|
# endif
|
|
279
283
|
# else
|
|
@@ -281,18 +285,40 @@ enum YEncDecIsaLevel {
|
|
|
281
285
|
# endif
|
|
282
286
|
#endif
|
|
283
287
|
|
|
284
|
-
|
|
288
|
+
namespace RapidYenc {
|
|
289
|
+
int cpu_supports_isa();
|
|
290
|
+
int cpu_supports_crc_isa();
|
|
291
|
+
}
|
|
285
292
|
#endif // PLATFORM_X86
|
|
286
293
|
|
|
287
294
|
|
|
288
295
|
#ifdef __riscv
|
|
289
|
-
|
|
296
|
+
namespace RapidYenc {
|
|
297
|
+
bool cpu_supports_rvv();
|
|
298
|
+
}
|
|
290
299
|
#endif
|
|
291
300
|
#if defined(__riscv_vector) && defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(13,0,0)
|
|
292
301
|
// GCC added RVV intrinsics in GCC13
|
|
293
302
|
# undef __riscv_vector
|
|
303
|
+
#elif defined(__riscv_vector) && defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(14,0,0)
|
|
304
|
+
// ...however, GCC13 lacks necessary mask<>vector vreinterpret casts, and it crashes on type punning, so I can't be bothered trying to make it work
|
|
305
|
+
# undef __riscv_vector
|
|
306
|
+
#endif
|
|
307
|
+
#ifdef __riscv_vector
|
|
308
|
+
# include <riscv_vector.h>
|
|
309
|
+
# ifdef __riscv_v_intrinsic
|
|
310
|
+
# define RV(f) __riscv_##f
|
|
311
|
+
# else
|
|
312
|
+
# define RV(f) f
|
|
313
|
+
# endif
|
|
314
|
+
# if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000
|
|
315
|
+
# define RV_MASK_CAST(masksz, vecsz, vec) RV(vreinterpret_v_u##vecsz##m1_b##masksz)(vec)
|
|
316
|
+
# define RV_VEC_U8MF4_CAST(vec) RV(vlmul_trunc_v_u8m1_u8mf4)(RV(vreinterpret_v_b4_u8m1)(vec))
|
|
317
|
+
# else
|
|
318
|
+
# define RV_MASK_CAST(masksz, vecsz, vec) *(vbool##masksz##_t*)(&(vec))
|
|
319
|
+
# define RV_VEC_U8MF4_CAST(vec) *(vuint8mf4_t*)(&(vec))
|
|
320
|
+
# endif
|
|
294
321
|
#endif
|
|
295
|
-
|
|
296
322
|
|
|
297
323
|
#include <string.h>
|
|
298
324
|
#if !defined(_MSC_VER) || defined(_STDINT) || _MSC_VER >= 1900
|
|
@@ -300,7 +326,11 @@ bool cpu_supports_rvv();
|
|
|
300
326
|
# include <stddef.h>
|
|
301
327
|
#else
|
|
302
328
|
/* Workaround for older MSVC not supporting stdint.h - just pull it from V8 */
|
|
303
|
-
#
|
|
329
|
+
# if defined(NODE_GYP_MODULE_NAME) || defined(V8_DEPRECATION_WARNINGS)
|
|
330
|
+
# include <v8.h>
|
|
331
|
+
# else
|
|
332
|
+
# include "stdint.h"
|
|
333
|
+
# endif
|
|
304
334
|
#endif
|
|
305
335
|
|
|
306
336
|
|
package/src/crc.cc
CHANGED
|
@@ -1,16 +1,23 @@
|
|
|
1
1
|
#include "crc_common.h"
|
|
2
2
|
|
|
3
|
+
#if defined(PLATFORM_X86) && !defined(__ILP32__) && !defined(YENC_DISABLE_CRCUTIL)
|
|
4
|
+
// Use crcutil for computing CRC32 (generic implementation)
|
|
5
|
+
|
|
3
6
|
#include "interface.h"
|
|
4
7
|
crcutil_interface::CRC* crc = NULL;
|
|
8
|
+
#define GENERIC_CRC_INIT crc = crcutil_interface::CRC::Create(0xEDB88320, 0, 32, true, 0, 0, 0, 0, NULL)
|
|
9
|
+
// instance never deleted... oh well...
|
|
5
10
|
|
|
6
|
-
#if defined(PLATFORM_X86) && !defined(__ILP32__)
|
|
7
11
|
static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) {
|
|
8
12
|
// use optimised ASM on x86 platforms
|
|
9
13
|
crcutil_interface::UINT64 tmp = init;
|
|
10
14
|
crc->Compute(data, length, &tmp);
|
|
11
15
|
return (uint32_t)tmp;
|
|
12
16
|
}
|
|
17
|
+
|
|
13
18
|
#else
|
|
19
|
+
// don't use crcutil
|
|
20
|
+
|
|
14
21
|
static uint32_t* HEDLEY_RESTRICT crc_slice_table;
|
|
15
22
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
16
23
|
# if defined(__GNUC__) || defined(__clang__)
|
|
@@ -121,33 +128,70 @@ static void generate_crc32_slice_table() {
|
|
|
121
128
|
#endif
|
|
122
129
|
}
|
|
123
130
|
}
|
|
131
|
+
|
|
132
|
+
#define GENERIC_CRC_INIT generate_crc32_slice_table()
|
|
124
133
|
#endif
|
|
125
134
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
135
|
+
|
|
136
|
+
namespace RapidYenc {
|
|
137
|
+
|
|
138
|
+
// workaround MSVC complaining "unary minus operator applied to unsigned type, result still unsigned"
|
|
139
|
+
#define NEGATE(n) (uint32_t)(-((int32_t)(n)))
|
|
140
|
+
uint32_t crc32_multiply_generic(uint32_t a, uint32_t b) {
|
|
141
|
+
uint32_t res = 0;
|
|
142
|
+
for(int i=0; i<31; i++) {
|
|
143
|
+
res ^= NEGATE(b>>31) & a;
|
|
144
|
+
a = ((a >> 1) ^ (0xEDB88320 & NEGATE(a&1)));
|
|
145
|
+
b <<= 1;
|
|
146
|
+
}
|
|
147
|
+
res ^= NEGATE(b>>31) & a;
|
|
148
|
+
return res;
|
|
129
149
|
}
|
|
150
|
+
#undef NEGATE
|
|
130
151
|
|
|
152
|
+
const uint32_t crc_power[32] = { // pre-computed 2^(2^n)
|
|
153
|
+
0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, 0xedb88320, 0xb1e6b092, 0xa06a2517,
|
|
154
|
+
0xed627dae, 0x88d14467, 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, 0x09fe548f,
|
|
155
|
+
0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e,
|
|
156
|
+
0xbad90e37, 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, 0xc40ba6d0, 0xc4e22c3c
|
|
157
|
+
};
|
|
131
158
|
|
|
132
|
-
uint32_t
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
159
|
+
uint32_t crc32_shift_generic(uint32_t crc1, uint32_t n) {
|
|
160
|
+
uint32_t result = crc1;
|
|
161
|
+
#ifdef __GNUC__
|
|
162
|
+
while(n) {
|
|
163
|
+
result = crc32_multiply_generic(result, crc_power[__builtin_ctz(n)]);
|
|
164
|
+
n &= n-1;
|
|
165
|
+
}
|
|
166
|
+
#elif defined(_MSC_VER)
|
|
167
|
+
unsigned long power;
|
|
168
|
+
while(_BitScanForward(&power, n)) {
|
|
169
|
+
result = crc32_multiply_generic(result, crc_power[power]);
|
|
170
|
+
n &= n-1;
|
|
171
|
+
}
|
|
172
|
+
#else
|
|
173
|
+
unsigned power = 0;
|
|
174
|
+
while(n) {
|
|
175
|
+
if(n & 1) {
|
|
176
|
+
result = crc32_multiply_generic(result, crc_power[power]);
|
|
177
|
+
}
|
|
178
|
+
n >>= 1;
|
|
179
|
+
power++;
|
|
180
|
+
}
|
|
181
|
+
#endif
|
|
182
|
+
return result;
|
|
136
183
|
}
|
|
184
|
+
} // namespace
|
|
185
|
+
|
|
137
186
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
187
|
+
namespace RapidYenc {
|
|
188
|
+
crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
|
|
189
|
+
crc_mul_func _crc32_shift = &crc32_shift_generic;
|
|
190
|
+
crc_mul_func _crc32_multiply = &crc32_multiply_generic;
|
|
191
|
+
int _crc32_isa = ISA_GENERIC;
|
|
142
192
|
}
|
|
143
193
|
|
|
144
|
-
void crc_clmul_set_funcs();
|
|
145
|
-
void crc_clmul256_set_funcs();
|
|
146
|
-
void crc_arm_set_funcs();
|
|
147
194
|
|
|
148
|
-
#ifdef PLATFORM_X86
|
|
149
|
-
int cpu_supports_crc_isa();
|
|
150
|
-
#endif
|
|
151
195
|
|
|
152
196
|
#if defined(PLATFORM_ARM) && defined(_WIN32)
|
|
153
197
|
# define WIN32_LEAN_AND_MEAN
|
|
@@ -175,14 +219,16 @@ static unsigned long getauxval(unsigned long cap) {
|
|
|
175
219
|
# endif
|
|
176
220
|
# endif
|
|
177
221
|
#endif
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
#
|
|
184
|
-
generate_crc32_slice_table();
|
|
222
|
+
#if defined(__riscv) && defined(__has_include)
|
|
223
|
+
# if __has_include(<asm/hwprobe.h>)
|
|
224
|
+
# include <asm/hwprobe.h>
|
|
225
|
+
# include <asm/unistd.h>
|
|
226
|
+
# include <unistd.h>
|
|
227
|
+
# endif
|
|
185
228
|
#endif
|
|
229
|
+
|
|
230
|
+
void RapidYenc::crc32_init() {
|
|
231
|
+
GENERIC_CRC_INIT;
|
|
186
232
|
|
|
187
233
|
#ifdef PLATFORM_X86
|
|
188
234
|
int support = cpu_supports_crc_isa();
|
|
@@ -193,31 +239,59 @@ void crc_init() {
|
|
|
193
239
|
#endif
|
|
194
240
|
#ifdef PLATFORM_ARM
|
|
195
241
|
# ifdef __APPLE__
|
|
196
|
-
int
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
if(
|
|
202
|
-
|
|
203
|
-
getauxval(AT_HWCAP2) & HWCAP2_CRC32
|
|
204
|
-
# elif defined(AT_HWCAP) && defined(HWCAP_CRC32)
|
|
205
|
-
getauxval(AT_HWCAP) & HWCAP_CRC32
|
|
206
|
-
# elif defined(ANDROID_CPU_FAMILY_ARM) && defined(__aarch64__)
|
|
207
|
-
android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_CRC32
|
|
208
|
-
# elif defined(ANDROID_CPU_FAMILY_ARM) /* aarch32 */
|
|
209
|
-
android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_CRC32
|
|
210
|
-
# elif defined(_WIN32)
|
|
211
|
-
IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)
|
|
212
|
-
# elif defined(__APPLE__)
|
|
213
|
-
supported
|
|
214
|
-
# elif defined(__ARM_FEATURE_CRC32)
|
|
215
|
-
true /* assume available if compiled as such */
|
|
242
|
+
int supports_crc = 0;
|
|
243
|
+
int supports_pmull = 0;
|
|
244
|
+
size_t len = sizeof(supports_crc);
|
|
245
|
+
if(sysctlbyname("hw.optional.armv8_crc32", &supports_crc, &len, NULL, 0))
|
|
246
|
+
supports_crc = 0;
|
|
247
|
+
if(sysctlbyname("hw.optional.arm.FEAT_PMULL", &supports_pmull, &len, NULL, 0))
|
|
248
|
+
supports_pmull = 0;
|
|
216
249
|
# else
|
|
217
|
-
|
|
250
|
+
bool supports_crc = false;
|
|
251
|
+
bool supports_pmull = false;
|
|
252
|
+
# if defined(AT_HWCAP2) && defined(HWCAP2_CRC32)
|
|
253
|
+
supports_crc = getauxval(AT_HWCAP2) & HWCAP2_CRC32;
|
|
254
|
+
# elif defined(AT_HWCAP) && defined(HWCAP_CRC32)
|
|
255
|
+
supports_crc = getauxval(AT_HWCAP) & HWCAP_CRC32;
|
|
256
|
+
# elif defined(ANDROID_CPU_FAMILY_ARM) && defined(__aarch64__)
|
|
257
|
+
supports_crc = android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_CRC32;
|
|
258
|
+
supports_pmull = android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_PMULL;
|
|
259
|
+
# elif defined(ANDROID_CPU_FAMILY_ARM) /* aarch32 */
|
|
260
|
+
supports_crc = android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_CRC32;
|
|
261
|
+
supports_pmull = android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_PMULL;
|
|
262
|
+
# elif defined(_WIN32)
|
|
263
|
+
supports_crc = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
|
|
264
|
+
supports_pmull = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
|
|
265
|
+
# else
|
|
266
|
+
#ifdef __ARM_FEATURE_CRC32
|
|
267
|
+
supports_crc = true; /* assume available if compiled as such */
|
|
268
|
+
#endif
|
|
269
|
+
#ifdef __ARM_FEATURE_CRYPTO
|
|
270
|
+
supports_pmull = true;
|
|
271
|
+
#endif
|
|
272
|
+
# endif
|
|
273
|
+
# if defined(AT_HWCAP2) && defined(HWCAP2_PMULL)
|
|
274
|
+
supports_pmull = getauxval(AT_HWCAP2) & HWCAP2_PMULL;
|
|
275
|
+
# elif defined(AT_HWCAP) && defined(HWCAP_PMULL)
|
|
276
|
+
supports_pmull = getauxval(AT_HWCAP) & HWCAP_PMULL;
|
|
277
|
+
# endif
|
|
218
278
|
# endif
|
|
219
|
-
|
|
279
|
+
|
|
280
|
+
if(supports_crc) {
|
|
220
281
|
crc_arm_set_funcs();
|
|
282
|
+
if(supports_pmull) crc_pmull_set_funcs();
|
|
221
283
|
}
|
|
222
284
|
#endif
|
|
285
|
+
#ifdef __riscv
|
|
286
|
+
# if defined(RISCV_HWPROBE_KEY_IMA_EXT_0) && defined(__NR_riscv_hwprobe)
|
|
287
|
+
const int rv_hwprobe_ext_zbc = 1 << 7, rv_hwprobe_ext_zbkc = 1 << 9;
|
|
288
|
+
struct riscv_hwprobe p;
|
|
289
|
+
p.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
|
|
290
|
+
if(!syscall(__NR_riscv_hwprobe, &p, 1, 0, NULL, 0)) {
|
|
291
|
+
if(p.value & (rv_hwprobe_ext_zbc | rv_hwprobe_ext_zbkc)) {
|
|
292
|
+
crc_riscv_set_funcs();
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
# endif
|
|
296
|
+
#endif
|
|
223
297
|
}
|
package/src/crc.h
CHANGED
|
@@ -1,27 +1,91 @@
|
|
|
1
1
|
#ifndef __YENC_CRC_H
|
|
2
2
|
#define __YENC_CRC_H
|
|
3
|
+
#include <stdlib.h> // for llabs
|
|
3
4
|
|
|
4
|
-
#
|
|
5
|
-
|
|
5
|
+
#if !defined(__GNUC__) && defined(_MSC_VER)
|
|
6
|
+
# include <intrin.h>
|
|
6
7
|
#endif
|
|
7
8
|
|
|
9
|
+
namespace RapidYenc {
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
typedef uint32_t (*crc_func)(const void*, size_t, uint32_t);
|
|
11
13
|
extern crc_func _do_crc32_incremental;
|
|
12
|
-
extern int _crc32_isa;
|
|
13
|
-
#define do_crc32 (*_do_crc32_incremental)
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
uint32_t
|
|
17
|
-
|
|
15
|
+
extern int _crc32_isa;
|
|
16
|
+
static inline uint32_t crc32(const void* data, size_t length, uint32_t init) {
|
|
17
|
+
return (*_do_crc32_incremental)(data, length, init);
|
|
18
|
+
}
|
|
18
19
|
static inline int crc32_isa_level() {
|
|
19
20
|
return _crc32_isa;
|
|
20
21
|
}
|
|
21
22
|
|
|
22
23
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
// computes `n % 0xffffffff` (well, almost), using some bit-hacks
|
|
25
|
+
static inline uint32_t crc32_powmod(uint64_t n) {
|
|
26
|
+
#ifdef __GNUC__
|
|
27
|
+
unsigned res;
|
|
28
|
+
unsigned carry = __builtin_uadd_overflow(n >> 32, n, &res);
|
|
29
|
+
res += carry;
|
|
30
|
+
return res;
|
|
31
|
+
#elif defined(_MSC_VER) && defined(PLATFORM_X86)
|
|
32
|
+
unsigned res;
|
|
33
|
+
unsigned char carry = _addcarry_u32(0, n >> 32, n, &res);
|
|
34
|
+
_addcarry_u32(carry, res, 0, &res);
|
|
35
|
+
return res;
|
|
36
|
+
#else
|
|
37
|
+
n = (n >> 32) + (n & 0xffffffff);
|
|
38
|
+
n += n >> 32;
|
|
39
|
+
return n;
|
|
26
40
|
#endif
|
|
41
|
+
}
|
|
42
|
+
// computes `crc32_powmod(n*8)` avoiding overflow
|
|
43
|
+
static inline uint32_t crc32_bytepow(uint64_t n) {
|
|
44
|
+
#if defined(__GNUC__) || defined(_MSC_VER)
|
|
45
|
+
unsigned res = crc32_powmod(n);
|
|
46
|
+
# ifdef _MSC_VER
|
|
47
|
+
return _rotl(res, 3);
|
|
48
|
+
# else
|
|
49
|
+
return (res << 3) | (res >> 29);
|
|
50
|
+
# endif
|
|
51
|
+
#else
|
|
52
|
+
n = (n >> 32) + (n & 0xffffffff);
|
|
53
|
+
n <<= 3;
|
|
54
|
+
n += n >> 32;
|
|
55
|
+
return n;
|
|
27
56
|
#endif
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
typedef uint32_t (*crc_mul_func)(uint32_t, uint32_t);
|
|
60
|
+
extern crc_mul_func _crc32_shift;
|
|
61
|
+
extern crc_mul_func _crc32_multiply;
|
|
62
|
+
static inline uint32_t crc32_shift(uint32_t a, uint32_t b) {
|
|
63
|
+
return (*_crc32_shift)(a, b);
|
|
64
|
+
}
|
|
65
|
+
static inline uint32_t crc32_multiply(uint32_t a, uint32_t b) {
|
|
66
|
+
return (*_crc32_multiply)(a, b);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
static inline uint32_t crc32_combine(uint32_t crc1, uint32_t crc2, uint64_t len2) {
|
|
70
|
+
return crc32_shift(crc1, crc32_bytepow(len2)) ^ crc2;
|
|
71
|
+
}
|
|
72
|
+
static inline uint32_t crc32_zeros(uint32_t crc1, uint64_t len) {
|
|
73
|
+
return ~crc32_shift(~crc1, crc32_bytepow(len));
|
|
74
|
+
}
|
|
75
|
+
static inline uint32_t crc32_unzero(uint32_t crc1, uint64_t len) {
|
|
76
|
+
return ~crc32_shift(~crc1, ~crc32_bytepow(len));
|
|
77
|
+
}
|
|
78
|
+
static inline uint32_t crc32_2pow(int64_t n) {
|
|
79
|
+
uint32_t sign = (uint32_t)(n >> 63);
|
|
80
|
+
return crc32_shift(0x80000000, crc32_powmod(llabs(n)) ^ sign);
|
|
81
|
+
}
|
|
82
|
+
static inline uint32_t crc32_256pow(uint64_t n) {
|
|
83
|
+
return crc32_shift(0x80000000, crc32_bytepow(n));
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
void crc32_init();
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
} // namespace
|
|
91
|
+
#endif // defined(__YENC_CRC_H)
|