yencode 1.1.5 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +130 -189
  2. package/binding.gyp +115 -6
  3. package/index.js +2 -0
  4. package/package.json +1 -1
  5. package/src/common.h +37 -7
  6. package/src/crc.cc +121 -47
  7. package/src/crc.h +74 -10
  8. package/src/crc_arm.cc +51 -34
  9. package/src/crc_arm_pmull.cc +215 -0
  10. package/src/crc_common.h +22 -0
  11. package/src/crc_folding.cc +154 -16
  12. package/src/crc_folding_256.cc +7 -14
  13. package/src/crc_riscv.cc +251 -0
  14. package/src/decoder.cc +373 -13
  15. package/src/decoder.h +10 -14
  16. package/src/decoder_avx.cc +5 -6
  17. package/src/decoder_avx2.cc +8 -9
  18. package/src/decoder_avx2_base.h +7 -11
  19. package/src/decoder_common.h +56 -373
  20. package/src/decoder_neon.cc +13 -19
  21. package/src/decoder_neon64.cc +12 -15
  22. package/src/decoder_rvv.cc +280 -0
  23. package/src/decoder_sse2.cc +26 -5
  24. package/src/decoder_sse_base.h +20 -40
  25. package/src/decoder_ssse3.cc +5 -6
  26. package/src/decoder_vbmi2.cc +6 -13
  27. package/src/encoder.cc +42 -26
  28. package/src/encoder.h +5 -7
  29. package/src/encoder_avx.cc +3 -3
  30. package/src/encoder_avx2.cc +3 -3
  31. package/src/encoder_avx_base.h +3 -0
  32. package/src/encoder_common.h +26 -32
  33. package/src/encoder_neon.cc +6 -3
  34. package/src/encoder_rvv.cc +13 -26
  35. package/src/encoder_sse2.cc +3 -2
  36. package/src/encoder_sse_base.h +2 -0
  37. package/src/encoder_ssse3.cc +3 -3
  38. package/src/encoder_vbmi2.cc +6 -7
  39. package/src/platform.cc +24 -23
  40. package/src/yencode.cc +54 -11
  41. package/test/_speedbase.js +4 -2
  42. package/test/speeddec.js +25 -16
  43. package/test/speedenc.js +21 -17
  44. package/test/testcrc.js +17 -1
  45. package/test/testcrcfuncs.c +53 -0
  46. package/test/testdec.js +1 -0
package/binding.gyp CHANGED
@@ -1,7 +1,8 @@
1
1
  {
2
2
  "variables": {
3
3
  "enable_native_tuning%": 1,
4
- "disable_avx256%": 0
4
+ "disable_avx256%": 0,
5
+ "disable_crcutil%": 0
5
6
  },
6
7
  "target_defaults": {
7
8
  "conditions": [
@@ -41,6 +42,9 @@
41
42
  ['disable_avx256!=0', {
42
43
  "defines": ["YENC_DISABLE_AVX256=1"]
43
44
  }],
45
+ ['disable_crcutil!=0', {
46
+ "defines": ["YENC_DISABLE_CRCUTIL=1"]
47
+ }],
44
48
  ['OS!="win" and enable_native_tuning!=0', {
45
49
  "defines": ["YENC_BUILD_NATIVE=1"]
46
50
  }],
@@ -74,7 +78,7 @@
74
78
  "targets": [
75
79
  {
76
80
  "target_name": "yencode",
77
- "dependencies": ["crcutil", "yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_clmul256", "yencode_avx", "yencode_avx2", "yencode_vbmi2", "yencode_neon", "yencode_armcrc", "yencode_rvv"],
81
+ "dependencies": ["yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_clmul256", "yencode_avx", "yencode_avx2", "yencode_vbmi2", "yencode_neon", "yencode_armcrc", "yencode_pmull", "yencode_rvv", "yencode_zbkc"],
78
82
  "sources": [
79
83
  "src/yencode.cc",
80
84
  "src/platform.cc",
@@ -82,7 +86,12 @@
82
86
  "src/decoder.cc",
83
87
  "src/crc.cc"
84
88
  ],
85
- "include_dirs": ["crcutil-1.0/code","crcutil-1.0/examples"]
89
+ "conditions": [
90
+ ['target_arch in "ia32 x64" and disable_crcutil==0', {
91
+ "dependencies": ["crcutil"],
92
+ "include_dirs": ["crcutil-1.0/code","crcutil-1.0/examples"]
93
+ }]
94
+ ]
86
95
  },
87
96
  {
88
97
  "target_name": "yencode_sse2",
@@ -324,7 +333,8 @@
324
333
  "target_name": "yencode_rvv",
325
334
  "type": "static_library",
326
335
  "sources": [
327
- "src/encoder_rvv.cc"
336
+ "src/encoder_rvv.cc",
337
+ "src/decoder_rvv.cc"
328
338
  ],
329
339
  "cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
330
340
  "cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
@@ -338,9 +348,13 @@
338
348
  "variables": {"supports_rvv%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_rvv.cc -march=rv64gcv 2>/dev/null || true)"},
339
349
  "conditions": [
340
350
  ['supports_rvv!=""', {
351
+ "cflags!": ["-march=native"],
352
+ "cxxflags!": ["-march=native"],
341
353
  "cflags": ["-march=rv64gcv"],
342
354
  "cxxflags": ["-march=rv64gcv"],
343
355
  "xcode_settings": {
356
+ "OTHER_CFLAGS!": ["-march=native"],
357
+ "OTHER_CXXFLAGS!": ["-march=native"],
344
358
  "OTHER_CFLAGS": ["-march=rv64gcv"],
345
359
  "OTHER_CXXFLAGS": ["-march=rv64gcv"],
346
360
  }
@@ -351,9 +365,13 @@
351
365
  "variables": {"supports_rvv%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_rvv.cc -march=rv32gcv 2>/dev/null || true)"},
352
366
  "conditions": [
353
367
  ['supports_rvv!=""', {
368
+ "cflags!": ["-march=native"],
369
+ "cxxflags!": ["-march=native"],
354
370
  "cflags": ["-march=rv32gcv"],
355
371
  "cxxflags": ["-march=rv32gcv"],
356
372
  "xcode_settings": {
373
+ "OTHER_CFLAGS!": ["-march=native"],
374
+ "OTHER_CXXFLAGS!": ["-march=native"],
357
375
  "OTHER_CFLAGS": ["-march=rv32gcv"],
358
376
  "OTHER_CXXFLAGS": ["-march=rv32gcv"],
359
377
  }
@@ -399,8 +417,94 @@
399
417
  ]
400
418
  },
401
419
  {
402
- "target_name": "crcutil",
420
+ "target_name": "yencode_pmull",
403
421
  "type": "static_library",
422
+ "sources": [
423
+ "src/crc_arm_pmull.cc"
424
+ ],
425
+ "cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
426
+ "cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
427
+ "xcode_settings": {
428
+ "OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
429
+ "OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
430
+ },
431
+ "msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
432
+ "conditions": [
433
+ ['target_arch in "arm arm64"', {
434
+ "cflags!": ["-march=native"],
435
+ "cxxflags!": ["-march=native"],
436
+ "cflags": ["-march=armv8-a+crc+crypto"],
437
+ "cxxflags": ["-march=armv8-a+crc+crypto"],
438
+ "xcode_settings": {
439
+ "OTHER_CFLAGS!": ["-march=native"],
440
+ "OTHER_CXXFLAGS!": ["-march=native"],
441
+ "OTHER_CFLAGS": ["-march=armv8-a+crc+crypto"],
442
+ "OTHER_CXXFLAGS": ["-march=armv8-a+crc+crypto"],
443
+ }
444
+ }],
445
+ ['OS!="win" and target_arch=="arm"', {
446
+ "cflags": ["-mfpu=neon","-fno-lto"],
447
+ "cxxflags": ["-mfpu=neon","-fno-lto"],
448
+ "xcode_settings": {
449
+ "OTHER_CFLAGS": ["-mfpu=neon","-fno-lto"],
450
+ "OTHER_CXXFLAGS": ["-mfpu=neon","-fno-lto"]
451
+ }
452
+ }]
453
+ ]
454
+ },
455
+ {
456
+ "target_name": "yencode_zbkc",
457
+ "type": "static_library",
458
+ "sources": [
459
+ "src/crc_riscv.cc"
460
+ ],
461
+ "cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
462
+ "cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
463
+ "xcode_settings": {
464
+ "OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
465
+ "OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
466
+ },
467
+ "msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
468
+ "conditions": [
469
+ ['target_arch=="riscv64" and OS!="win"', {
470
+ "variables": {"supports_zbkc%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/crc_riscv.cc -march=rv64gc_zbkc 2>/dev/null || true)"},
471
+ "conditions": [
472
+ ['supports_zbkc!=""', {
473
+ "cflags!": ["-march=native"],
474
+ "cxxflags!": ["-march=native"],
475
+ "cflags": ["-march=rv64gc_zbkc"],
476
+ "cxxflags": ["-march=rv64gc_zbkc"],
477
+ "xcode_settings": {
478
+ "OTHER_CFLAGS!": ["-march=native"],
479
+ "OTHER_CXXFLAGS!": ["-march=native"],
480
+ "OTHER_CFLAGS": ["-march=rv64gc_zbkc"],
481
+ "OTHER_CXXFLAGS": ["-march=rv64gc_zbkc"],
482
+ }
483
+ }]
484
+ ]
485
+ }],
486
+ ['target_arch=="riscv32" and OS!="win"', {
487
+ "variables": {"supports_zbkc%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/crc_riscv.cc -march=rv32gc_zbkc 2>/dev/null || true)"},
488
+ "conditions": [
489
+ ['supports_zbkc!=""', {
490
+ "cflags!": ["-march=native"],
491
+ "cxxflags!": ["-march=native"],
492
+ "cflags": ["-march=rv32gc_zbkc"],
493
+ "cxxflags": ["-march=rv32gc_zbkc"],
494
+ "xcode_settings": {
495
+ "OTHER_CFLAGS!": ["-march=native"],
496
+ "OTHER_CXXFLAGS!": ["-march=native"],
497
+ "OTHER_CFLAGS": ["-march=rv32gc_zbkc"],
498
+ "OTHER_CXXFLAGS": ["-march=rv32gc_zbkc"],
499
+ }
500
+ }]
501
+ ]
502
+ }]
503
+ ]
504
+ },
505
+ {
506
+ "target_name": "crcutil",
507
+ "type": "none",
404
508
  "sources": [
405
509
  "crcutil-1.0/code/crc32c_sse4.cc",
406
510
  "crcutil-1.0/code/multiword_64_64_cl_i386_mmx.cc",
@@ -422,7 +526,12 @@
422
526
  },
423
527
  "msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
424
528
  "include_dirs": ["crcutil-1.0/code", "crcutil-1.0/tests"],
425
- "defines": ["CRCUTIL_USE_MM_CRC32=0"]
529
+ "defines": ["CRCUTIL_USE_MM_CRC32=0"],
530
+ "conditions": [
531
+ ['target_arch in "ia32 x64" and disable_crcutil==0', {
532
+ "type": "static_library",
533
+ }]
534
+ ]
426
535
  }
427
536
  ]
428
537
  }
package/index.js CHANGED
@@ -190,6 +190,8 @@ module.exports = {
190
190
  crc32: y.crc32,
191
191
  crc32_combine: y.crc32_combine,
192
192
  crc32_zeroes: y.crc32_zeroes,
193
+ crc32_multiply: y.crc32_multiply,
194
+ crc32_shift: y.crc32_shift,
193
195
 
194
196
  post: function(filename, data, line_size) {
195
197
  if(!line_size) line_size = 128;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yencode",
3
- "version": "1.1.5",
3
+ "version": "1.2.1",
4
4
  "description": "SIMD accelerated yEnc encoder/decoder and CRC32 calculator",
5
5
  "keywords": [
6
6
  "yenc",
package/src/common.h CHANGED
@@ -125,7 +125,7 @@
125
125
  #ifdef __POPCNT__
126
126
  #include <nmmintrin.h>
127
127
  // POPCNT can never return a negative result, but GCC doesn't seem to realise this, so typecast it to hint it better
128
- #define popcnt32 (unsigned int)_mm_popcnt_u32
128
+ #define popcnt32 (unsigned int)_mm_popcnt_u32
129
129
  #endif
130
130
 
131
131
  #if defined(__AVX2__) || defined(__AVX512F__)
@@ -209,7 +209,9 @@ static HEDLEY_ALWAYS_INLINE uint8x16x4_t vcreate4_u8(uint8x16_t a, uint8x16_t b,
209
209
  # undef _CREATE_TUPLE
210
210
  #endif
211
211
  #ifdef PLATFORM_ARM
212
- bool cpu_supports_neon();
212
+ namespace RapidYenc {
213
+ bool cpu_supports_neon();
214
+ }
213
215
  #endif
214
216
 
215
217
  #ifdef _MSC_VER
@@ -240,11 +242,13 @@ enum YEncDecIsaLevel {
240
242
  enum YEncDecIsaLevel {
241
243
  ISA_GENERIC = 0,
242
244
  ISA_FEATURE_CRC = 8,
245
+ ISA_FEATURE_PMULL = 0x40,
243
246
  ISA_LEVEL_NEON = 0x1000
244
247
  };
245
248
  #elif defined(__riscv)
246
249
  enum YEncDecIsaLevel {
247
250
  ISA_GENERIC = 0,
251
+ ISA_FEATURE_ZBC = 16,
248
252
  ISA_LEVEL_RVV = 0x10000
249
253
  };
250
254
  #else
@@ -273,7 +277,7 @@ enum YEncDecIsaLevel {
273
277
  # if defined(__POPCNT__)
274
278
  # if defined(__LZCNT__)
275
279
  # define ISA_NATIVE (enum YEncDecIsaLevel)(_ISA_NATIVE | ISA_FEATURE_POPCNT | ISA_FEATURE_LZCNT)
276
- # else
280
+ # else
277
281
  # define ISA_NATIVE (enum YEncDecIsaLevel)(_ISA_NATIVE | ISA_FEATURE_POPCNT)
278
282
  # endif
279
283
  # else
@@ -281,18 +285,40 @@ enum YEncDecIsaLevel {
281
285
  # endif
282
286
  #endif
283
287
 
284
- int cpu_supports_isa();
288
+ namespace RapidYenc {
289
+ int cpu_supports_isa();
290
+ int cpu_supports_crc_isa();
291
+ }
285
292
  #endif // PLATFORM_X86
286
293
 
287
294
 
288
295
  #ifdef __riscv
289
- bool cpu_supports_rvv();
296
+ namespace RapidYenc {
297
+ bool cpu_supports_rvv();
298
+ }
290
299
  #endif
291
300
  #if defined(__riscv_vector) && defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(13,0,0)
292
301
  // GCC added RVV intrinsics in GCC13
293
302
  # undef __riscv_vector
303
+ #elif defined(__riscv_vector) && defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(14,0,0)
304
+ // ...however, GCC13 lacks necessary mask<>vector vreinterpret casts, and it crashes on type punning, so I can't be bothered trying to make it work
305
+ # undef __riscv_vector
306
+ #endif
307
+ #ifdef __riscv_vector
308
+ # include <riscv_vector.h>
309
+ # ifdef __riscv_v_intrinsic
310
+ # define RV(f) __riscv_##f
311
+ # else
312
+ # define RV(f) f
313
+ # endif
314
+ # if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000
315
+ # define RV_MASK_CAST(masksz, vecsz, vec) RV(vreinterpret_v_u##vecsz##m1_b##masksz)(vec)
316
+ # define RV_VEC_U8MF4_CAST(vec) RV(vlmul_trunc_v_u8m1_u8mf4)(RV(vreinterpret_v_b4_u8m1)(vec))
317
+ # else
318
+ # define RV_MASK_CAST(masksz, vecsz, vec) *(vbool##masksz##_t*)(&(vec))
319
+ # define RV_VEC_U8MF4_CAST(vec) *(vuint8mf4_t*)(&(vec))
320
+ # endif
294
321
  #endif
295
-
296
322
 
297
323
  #include <string.h>
298
324
  #if !defined(_MSC_VER) || defined(_STDINT) || _MSC_VER >= 1900
@@ -300,7 +326,11 @@ bool cpu_supports_rvv();
300
326
  # include <stddef.h>
301
327
  #else
302
328
  /* Workaround for older MSVC not supporting stdint.h - just pull it from V8 */
303
- # include <v8.h>
329
+ # if defined(NODE_GYP_MODULE_NAME) || defined(V8_DEPRECATION_WARNINGS)
330
+ # include <v8.h>
331
+ # else
332
+ # include "stdint.h"
333
+ # endif
304
334
  #endif
305
335
 
306
336
 
package/src/crc.cc CHANGED
@@ -1,16 +1,23 @@
1
1
  #include "crc_common.h"
2
2
 
3
+ #if defined(PLATFORM_X86) && !defined(__ILP32__) && !defined(YENC_DISABLE_CRCUTIL)
4
+ // Use crcutil for computing CRC32 (generic implementation)
5
+
3
6
  #include "interface.h"
4
7
  crcutil_interface::CRC* crc = NULL;
8
+ #define GENERIC_CRC_INIT crc = crcutil_interface::CRC::Create(0xEDB88320, 0, 32, true, 0, 0, 0, 0, NULL)
9
+ // instance never deleted... oh well...
5
10
 
6
- #if defined(PLATFORM_X86) && !defined(__ILP32__)
7
11
  static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) {
8
12
  // use optimised ASM on x86 platforms
9
13
  crcutil_interface::UINT64 tmp = init;
10
14
  crc->Compute(data, length, &tmp);
11
15
  return (uint32_t)tmp;
12
16
  }
17
+
13
18
  #else
19
+ // don't use crcutil
20
+
14
21
  static uint32_t* HEDLEY_RESTRICT crc_slice_table;
15
22
  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
16
23
  # if defined(__GNUC__) || defined(__clang__)
@@ -121,33 +128,70 @@ static void generate_crc32_slice_table() {
121
128
  #endif
122
129
  }
123
130
  }
131
+
132
+ #define GENERIC_CRC_INIT generate_crc32_slice_table()
124
133
  #endif
125
134
 
126
- extern "C" {
127
- crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
128
- int _crc32_isa = ISA_GENERIC;
135
+
136
+ namespace RapidYenc {
137
+
138
+ // workaround MSVC complaining "unary minus operator applied to unsigned type, result still unsigned"
139
+ #define NEGATE(n) (uint32_t)(-((int32_t)(n)))
140
+ uint32_t crc32_multiply_generic(uint32_t a, uint32_t b) {
141
+ uint32_t res = 0;
142
+ for(int i=0; i<31; i++) {
143
+ res ^= NEGATE(b>>31) & a;
144
+ a = ((a >> 1) ^ (0xEDB88320 & NEGATE(a&1)));
145
+ b <<= 1;
146
+ }
147
+ res ^= NEGATE(b>>31) & a;
148
+ return res;
129
149
  }
150
+ #undef NEGATE
130
151
 
152
+ const uint32_t crc_power[32] = { // pre-computed 2^(2^n)
153
+ 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, 0xedb88320, 0xb1e6b092, 0xa06a2517,
154
+ 0xed627dae, 0x88d14467, 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, 0x09fe548f,
155
+ 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e,
156
+ 0xbad90e37, 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, 0xc40ba6d0, 0xc4e22c3c
157
+ };
131
158
 
132
- uint32_t do_crc32_combine(uint32_t crc1, uint32_t crc2, size_t len2) {
133
- crcutil_interface::UINT64 crc1_ = crc1, crc2_ = crc2;
134
- crc->Concatenate(crc2_, 0, len2, &crc1_);
135
- return (uint32_t)crc1_;
159
+ uint32_t crc32_shift_generic(uint32_t crc1, uint32_t n) {
160
+ uint32_t result = crc1;
161
+ #ifdef __GNUC__
162
+ while(n) {
163
+ result = crc32_multiply_generic(result, crc_power[__builtin_ctz(n)]);
164
+ n &= n-1;
165
+ }
166
+ #elif defined(_MSC_VER)
167
+ unsigned long power;
168
+ while(_BitScanForward(&power, n)) {
169
+ result = crc32_multiply_generic(result, crc_power[power]);
170
+ n &= n-1;
171
+ }
172
+ #else
173
+ unsigned power = 0;
174
+ while(n) {
175
+ if(n & 1) {
176
+ result = crc32_multiply_generic(result, crc_power[power]);
177
+ }
178
+ n >>= 1;
179
+ power++;
180
+ }
181
+ #endif
182
+ return result;
136
183
  }
184
+ } // namespace
185
+
137
186
 
138
- uint32_t do_crc32_zeros(uint32_t crc1, size_t len) {
139
- crcutil_interface::UINT64 crc_ = crc1;
140
- crc->CrcOfZeroes(len, &crc_);
141
- return (uint32_t)crc_;
187
+ namespace RapidYenc {
188
+ crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
189
+ crc_mul_func _crc32_shift = &crc32_shift_generic;
190
+ crc_mul_func _crc32_multiply = &crc32_multiply_generic;
191
+ int _crc32_isa = ISA_GENERIC;
142
192
  }
143
193
 
144
- void crc_clmul_set_funcs();
145
- void crc_clmul256_set_funcs();
146
- void crc_arm_set_funcs();
147
194
 
148
- #ifdef PLATFORM_X86
149
- int cpu_supports_crc_isa();
150
- #endif
151
195
 
152
196
  #if defined(PLATFORM_ARM) && defined(_WIN32)
153
197
  # define WIN32_LEAN_AND_MEAN
@@ -175,14 +219,16 @@ static unsigned long getauxval(unsigned long cap) {
175
219
  # endif
176
220
  # endif
177
221
  #endif
178
- void crc_init() {
179
- crc = crcutil_interface::CRC::Create(
180
- 0xEDB88320, 0, 32, true, 0, 0, 0, 0, NULL);
181
- // instance never deleted... oh well...
182
-
183
- #if !defined(PLATFORM_X86) || defined(__ILP32__)
184
- generate_crc32_slice_table();
222
+ #if defined(__riscv) && defined(__has_include)
223
+ # if __has_include(<asm/hwprobe.h>)
224
+ # include <asm/hwprobe.h>
225
+ # include <asm/unistd.h>
226
+ # include <unistd.h>
227
+ # endif
185
228
  #endif
229
+
230
+ void RapidYenc::crc32_init() {
231
+ GENERIC_CRC_INIT;
186
232
 
187
233
  #ifdef PLATFORM_X86
188
234
  int support = cpu_supports_crc_isa();
@@ -193,31 +239,59 @@ void crc_init() {
193
239
  #endif
194
240
  #ifdef PLATFORM_ARM
195
241
  # ifdef __APPLE__
196
- int supported = 0;
197
- size_t len = sizeof(supported);
198
- if(sysctlbyname("hw.optional.armv8_crc32", &supported, &len, NULL, 0))
199
- supported = 0;
200
- # endif
201
- if(
202
- # if defined(AT_HWCAP2) && defined(HWCAP2_CRC32)
203
- getauxval(AT_HWCAP2) & HWCAP2_CRC32
204
- # elif defined(AT_HWCAP) && defined(HWCAP_CRC32)
205
- getauxval(AT_HWCAP) & HWCAP_CRC32
206
- # elif defined(ANDROID_CPU_FAMILY_ARM) && defined(__aarch64__)
207
- android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_CRC32
208
- # elif defined(ANDROID_CPU_FAMILY_ARM) /* aarch32 */
209
- android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_CRC32
210
- # elif defined(_WIN32)
211
- IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)
212
- # elif defined(__APPLE__)
213
- supported
214
- # elif defined(__ARM_FEATURE_CRC32)
215
- true /* assume available if compiled as such */
242
+ int supports_crc = 0;
243
+ int supports_pmull = 0;
244
+ size_t len = sizeof(supports_crc);
245
+ if(sysctlbyname("hw.optional.armv8_crc32", &supports_crc, &len, NULL, 0))
246
+ supports_crc = 0;
247
+ if(sysctlbyname("hw.optional.arm.FEAT_PMULL", &supports_pmull, &len, NULL, 0))
248
+ supports_pmull = 0;
216
249
  # else
217
- false
250
+ bool supports_crc = false;
251
+ bool supports_pmull = false;
252
+ # if defined(AT_HWCAP2) && defined(HWCAP2_CRC32)
253
+ supports_crc = getauxval(AT_HWCAP2) & HWCAP2_CRC32;
254
+ # elif defined(AT_HWCAP) && defined(HWCAP_CRC32)
255
+ supports_crc = getauxval(AT_HWCAP) & HWCAP_CRC32;
256
+ # elif defined(ANDROID_CPU_FAMILY_ARM) && defined(__aarch64__)
257
+ supports_crc = android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_CRC32;
258
+ supports_pmull = android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_PMULL;
259
+ # elif defined(ANDROID_CPU_FAMILY_ARM) /* aarch32 */
260
+ supports_crc = android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_CRC32;
261
+ supports_pmull = android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_PMULL;
262
+ # elif defined(_WIN32)
263
+ supports_crc = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
264
+ supports_pmull = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
265
+ # else
266
+ #ifdef __ARM_FEATURE_CRC32
267
+ supports_crc = true; /* assume available if compiled as such */
268
+ #endif
269
+ #ifdef __ARM_FEATURE_CRYPTO
270
+ supports_pmull = true;
271
+ #endif
272
+ # endif
273
+ # if defined(AT_HWCAP2) && defined(HWCAP2_PMULL)
274
+ supports_pmull = getauxval(AT_HWCAP2) & HWCAP2_PMULL;
275
+ # elif defined(AT_HWCAP) && defined(HWCAP_PMULL)
276
+ supports_pmull = getauxval(AT_HWCAP) & HWCAP_PMULL;
277
+ # endif
218
278
  # endif
219
- ) {
279
+
280
+ if(supports_crc) {
220
281
  crc_arm_set_funcs();
282
+ if(supports_pmull) crc_pmull_set_funcs();
221
283
  }
222
284
  #endif
285
+ #ifdef __riscv
286
+ # if defined(RISCV_HWPROBE_KEY_IMA_EXT_0) && defined(__NR_riscv_hwprobe)
287
+ const int rv_hwprobe_ext_zbc = 1 << 7, rv_hwprobe_ext_zbkc = 1 << 9;
288
+ struct riscv_hwprobe p;
289
+ p.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
290
+ if(!syscall(__NR_riscv_hwprobe, &p, 1, 0, NULL, 0)) {
291
+ if(p.value & (rv_hwprobe_ext_zbc | rv_hwprobe_ext_zbkc)) {
292
+ crc_riscv_set_funcs();
293
+ }
294
+ }
295
+ # endif
296
+ #endif
223
297
  }
package/src/crc.h CHANGED
@@ -1,27 +1,91 @@
1
1
  #ifndef __YENC_CRC_H
2
2
  #define __YENC_CRC_H
3
+ #include <stdlib.h> // for llabs
3
4
 
4
- #ifdef __cplusplus
5
- extern "C" {
5
+ #if !defined(__GNUC__) && defined(_MSC_VER)
6
+ # include <intrin.h>
6
7
  #endif
7
8
 
9
+ namespace RapidYenc {
8
10
 
9
11
 
10
12
  typedef uint32_t (*crc_func)(const void*, size_t, uint32_t);
11
13
  extern crc_func _do_crc32_incremental;
12
- extern int _crc32_isa;
13
- #define do_crc32 (*_do_crc32_incremental)
14
14
 
15
- uint32_t do_crc32_combine(uint32_t crc1, const uint32_t crc2, size_t len2);
16
- uint32_t do_crc32_zeros(uint32_t crc1, size_t len);
17
- void crc_init();
15
+ extern int _crc32_isa;
16
+ static inline uint32_t crc32(const void* data, size_t length, uint32_t init) {
17
+ return (*_do_crc32_incremental)(data, length, init);
18
+ }
18
19
  static inline int crc32_isa_level() {
19
20
  return _crc32_isa;
20
21
  }
21
22
 
22
23
 
23
-
24
- #ifdef __cplusplus
25
- }
24
+ // computes `n % 0xffffffff` (well, almost), using some bit-hacks
25
+ static inline uint32_t crc32_powmod(uint64_t n) {
26
+ #ifdef __GNUC__
27
+ unsigned res;
28
+ unsigned carry = __builtin_uadd_overflow(n >> 32, n, &res);
29
+ res += carry;
30
+ return res;
31
+ #elif defined(_MSC_VER) && defined(PLATFORM_X86)
32
+ unsigned res;
33
+ unsigned char carry = _addcarry_u32(0, n >> 32, n, &res);
34
+ _addcarry_u32(carry, res, 0, &res);
35
+ return res;
36
+ #else
37
+ n = (n >> 32) + (n & 0xffffffff);
38
+ n += n >> 32;
39
+ return n;
26
40
  #endif
41
+ }
42
+ // computes `crc32_powmod(n*8)` avoiding overflow
43
+ static inline uint32_t crc32_bytepow(uint64_t n) {
44
+ #if defined(__GNUC__) || defined(_MSC_VER)
45
+ unsigned res = crc32_powmod(n);
46
+ # ifdef _MSC_VER
47
+ return _rotl(res, 3);
48
+ # else
49
+ return (res << 3) | (res >> 29);
50
+ # endif
51
+ #else
52
+ n = (n >> 32) + (n & 0xffffffff);
53
+ n <<= 3;
54
+ n += n >> 32;
55
+ return n;
27
56
  #endif
57
+ }
58
+
59
+ typedef uint32_t (*crc_mul_func)(uint32_t, uint32_t);
60
+ extern crc_mul_func _crc32_shift;
61
+ extern crc_mul_func _crc32_multiply;
62
+ static inline uint32_t crc32_shift(uint32_t a, uint32_t b) {
63
+ return (*_crc32_shift)(a, b);
64
+ }
65
+ static inline uint32_t crc32_multiply(uint32_t a, uint32_t b) {
66
+ return (*_crc32_multiply)(a, b);
67
+ }
68
+
69
+ static inline uint32_t crc32_combine(uint32_t crc1, uint32_t crc2, uint64_t len2) {
70
+ return crc32_shift(crc1, crc32_bytepow(len2)) ^ crc2;
71
+ }
72
+ static inline uint32_t crc32_zeros(uint32_t crc1, uint64_t len) {
73
+ return ~crc32_shift(~crc1, crc32_bytepow(len));
74
+ }
75
+ static inline uint32_t crc32_unzero(uint32_t crc1, uint64_t len) {
76
+ return ~crc32_shift(~crc1, ~crc32_bytepow(len));
77
+ }
78
+ static inline uint32_t crc32_2pow(int64_t n) {
79
+ uint32_t sign = (uint32_t)(n >> 63);
80
+ return crc32_shift(0x80000000, crc32_powmod(llabs(n)) ^ sign);
81
+ }
82
+ static inline uint32_t crc32_256pow(uint64_t n) {
83
+ return crc32_shift(0x80000000, crc32_bytepow(n));
84
+ }
85
+
86
+ void crc32_init();
87
+
88
+
89
+
90
+ } // namespace
91
+ #endif // defined(__YENC_CRC_H)