yencode 1.1.3 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -129,7 +129,8 @@ int decodeTo(Buffer data, Buffer output, bool stripDots=false)
129
129
  Same as above, but instead of returning a Buffer, writes it to the supplied
130
130
  *output* Buffer. Returns the length of the decoded data.
131
131
  Note that the *output* Buffer must be at least large enough to hold the largest
132
- possible output size (i.e. length of the input), otherwise an error is thrown.
132
+ possible output size (i.e. length of the input), otherwise an error is thrown.
133
+ The *data* and *output* Buffers can be the same, for in-situ decoding.
133
134
 
134
135
  Object decodeChunk\(Buffer data \[, string state=null\]\[, Buffer output\]\)
135
136
  -----------------------------------------------------------------------------
@@ -142,7 +143,7 @@ designed to incrementally process a stream from the network, and will perform NN
142
143
  *state* is the current state of the incremental decode. Set to *null* if this is starting the decode of a new article, otherwise this should be set to the value of *state* given from the previous invocation of *decodeChunk*
143
144
  If *output* is supplied, the output will be written here \(see *decodeTo* for notes
144
145
  on required size\), otherwise a new buffer will be created where the output will be
145
- written to.
146
+ written to. The *data* and *output* Buffers can be the same, for in-situ decoding.
146
147
 
147
148
  Returns an object with the following keys:
148
149
 
package/binding.gyp CHANGED
@@ -43,10 +43,20 @@
43
43
  }],
44
44
  ['OS!="win" and enable_native_tuning!=0', {
45
45
  "defines": ["YENC_BUILD_NATIVE=1"]
46
+ }],
47
+ ['OS!="win"', {
48
+ "variables": {
49
+ "missing_memalign%": "<!(<!(echo ${CC_target:-${CC:-cc}}) -c src/test_alignalloc.c -o /dev/null -Werror 2>/dev/null || echo failed)",
50
+ },
51
+ "conditions": [
52
+ ['missing_memalign!=""', {
53
+ "defines": ["_POSIX_C_SOURCE=200112L"],
54
+ }]
55
+ ]
46
56
  }]
47
57
  ],
48
58
  "cflags": ["-Wno-unused-function"],
49
- "cxxflags": ["-Wno-unused-function"],
59
+ "cxxflags": ["-Wno-unused-function", "-std=c++03", "-D_POSIX_C_SOURCE=200112L"],
50
60
  "xcode_settings": {
51
61
  "OTHER_CFLAGS": ["-Wno-unused-function"],
52
62
  "OTHER_CXXFLAGS": ["-Wno-unused-function"]
@@ -64,7 +74,7 @@
64
74
  "targets": [
65
75
  {
66
76
  "target_name": "yencode",
67
- "dependencies": ["crcutil", "yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_clmul256", "yencode_avx", "yencode_avx2", "yencode_vbmi2", "yencode_neon", "yencode_armcrc"],
77
+ "dependencies": ["crcutil", "yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_clmul256", "yencode_avx", "yencode_avx2", "yencode_vbmi2", "yencode_neon", "yencode_armcrc", "yencode_rvv"],
68
78
  "sources": [
69
79
  "src/yencode.cc",
70
80
  "src/platform.cc",
@@ -221,7 +231,7 @@
221
231
  "msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
222
232
  "conditions": [
223
233
  ['target_arch in "ia32 x64" and OS!="win"', {
224
- "variables": {"supports_vpclmul%": "<!(<!(echo ${CC_target:-${CC:-cc}}) -MM -E src/crc_folding_256.cc -mavx2 -mvpclmulqdq 2>/dev/null || true)"},
234
+ "variables": {"supports_vpclmul%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/crc_folding_256.cc -mavx2 -mvpclmulqdq 2>/dev/null || true)"},
225
235
  "conditions": [
226
236
  ['supports_vpclmul!=""', {
227
237
  "cflags": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
@@ -253,7 +263,10 @@
253
263
  "msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
254
264
  "conditions": [
255
265
  ['target_arch in "ia32 x64" and OS!="win"', {
256
- "variables": {"supports_vbmi2%": "<!(<!(echo ${CC_target:-${CC:-cc}}) -MM -E src/encoder_vbmi2.cc -mavx512vl -mavx512vbmi2 2>/dev/null || true)"},
266
+ "variables": {
267
+ "supports_vbmi2%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_vbmi2.cc -mavx512vl -mavx512vbmi2 2>/dev/null || true)",
268
+ "supports_avx10%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_vbmi2.cc -mavx512vl -mno-evex512 2>/dev/null || true)"
269
+ },
257
270
  "conditions": [
258
271
  ['supports_vbmi2!=""', {
259
272
  "cflags": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
@@ -262,6 +275,14 @@
262
275
  "OTHER_CFLAGS": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
263
276
  "OTHER_CXXFLAGS": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
264
277
  }
278
+ }],
279
+ ['supports_avx10!=""', {
280
+ "cflags": ["-mno-evex512"],
281
+ "cxxflags": ["-mno-evex512"],
282
+ "xcode_settings": {
283
+ "OTHER_CFLAGS": ["-mno-evex512"],
284
+ "OTHER_CXXFLAGS": ["-mno-evex512"],
285
+ }
265
286
  }]
266
287
  ]
267
288
  }],
@@ -285,11 +306,11 @@
285
306
  "msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
286
307
  "conditions": [
287
308
  ['target_arch=="arm"', {
288
- "cflags": ["-mfpu=neon"],
289
- "cxxflags": ["-mfpu=neon"],
309
+ "cflags": ["-mfpu=neon","-fno-lto"],
310
+ "cxxflags": ["-mfpu=neon","-fno-lto"],
290
311
  "xcode_settings": {
291
- "OTHER_CFLAGS": ["-mfpu=neon"],
292
- "OTHER_CXXFLAGS": ["-mfpu=neon"],
312
+ "OTHER_CFLAGS": ["-mfpu=neon","-fno-lto"],
313
+ "OTHER_CXXFLAGS": ["-mfpu=neon","-fno-lto"],
293
314
  }
294
315
  }],
295
316
  ['target_arch=="arm64"', {
@@ -299,6 +320,48 @@
299
320
  }]
300
321
  ]
301
322
  },
323
+ {
324
+ "target_name": "yencode_rvv",
325
+ "type": "static_library",
326
+ "sources": [
327
+ "src/encoder_rvv.cc"
328
+ ],
329
+ "cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
330
+ "cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
331
+ "xcode_settings": {
332
+ "OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
333
+ "OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
334
+ },
335
+ "msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
336
+ "conditions": [
337
+ ['target_arch=="riscv64" and OS!="win"', {
338
+ "variables": {"supports_rvv%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_rvv.cc -march=rv64gcv 2>/dev/null || true)"},
339
+ "conditions": [
340
+ ['supports_rvv!=""', {
341
+ "cflags": ["-march=rv64gcv"],
342
+ "cxxflags": ["-march=rv64gcv"],
343
+ "xcode_settings": {
344
+ "OTHER_CFLAGS": ["-march=rv64gcv"],
345
+ "OTHER_CXXFLAGS": ["-march=rv64gcv"],
346
+ }
347
+ }]
348
+ ]
349
+ }],
350
+ ['target_arch=="riscv32" and OS!="win"', {
351
+ "variables": {"supports_rvv%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_rvv.cc -march=rv32gcv 2>/dev/null || true)"},
352
+ "conditions": [
353
+ ['supports_rvv!=""', {
354
+ "cflags": ["-march=rv32gcv"],
355
+ "cxxflags": ["-march=rv32gcv"],
356
+ "xcode_settings": {
357
+ "OTHER_CFLAGS": ["-march=rv32gcv"],
358
+ "OTHER_CXXFLAGS": ["-march=rv32gcv"],
359
+ }
360
+ }]
361
+ ]
362
+ }]
363
+ ]
364
+ },
302
365
  {
303
366
  "target_name": "yencode_armcrc",
304
367
  "type": "static_library",
@@ -326,11 +389,11 @@
326
389
  }
327
390
  }],
328
391
  ['OS!="win" and target_arch=="arm"', {
329
- "cflags": ["-mfpu=fp-armv8"],
330
- "cxxflags": ["-mfpu=fp-armv8"],
392
+ "cflags": ["-mfpu=fp-armv8","-fno-lto"],
393
+ "cxxflags": ["-mfpu=fp-armv8","-fno-lto"],
331
394
  "xcode_settings": {
332
- "OTHER_CFLAGS": ["-mfpu=fp-armv8"],
333
- "OTHER_CXXFLAGS": ["-mfpu=fp-armv8"]
395
+ "OTHER_CFLAGS": ["-mfpu=fp-armv8","-fno-lto"],
396
+ "OTHER_CXXFLAGS": ["-mfpu=fp-armv8","-fno-lto"]
334
397
  }
335
398
  }]
336
399
  ]
package/index.js CHANGED
@@ -3,6 +3,7 @@
3
3
  var y = require('./build/Release/yencode.node');
4
4
 
5
5
  var toBuffer = Buffer.alloc ? Buffer.from : Buffer;
6
+ var bufferSlice = Buffer.prototype.readBigInt64BE ? Buffer.prototype.subarray : Buffer.prototype.slice;
6
7
 
7
8
  var nl = toBuffer([13, 10]);
8
9
  var RE_BADCHAR = /\r\n\0/g;
@@ -76,28 +77,28 @@ var decoderParseLines = function(lines, ydata) {
76
77
  for(var i=0; i<lines.length; i++) {
77
78
  var yprops = {};
78
79
 
79
- var line = lines[i].substr(2); // cut off '=y'
80
+ var line = lines[i].substring(2); // cut off '=y'
80
81
  // parse tag
81
82
  var p = line.indexOf(' ');
82
- var tag = (p<0 ? line : line.substr(0, p));
83
- line = line.substr(tag.length+1).trim();
83
+ var tag = (p<0 ? line : line.substring(0, p));
84
+ line = line.substring(tag.length+1).trim();
84
85
 
85
86
  // parse props
86
87
  var m = line.match(RE_YPROP);
87
88
  while(m) {
88
89
  if(m.index != 0) {
89
- warnings.push(DecoderWarning('ignored_line_data', 'Unknown additional data ignored: "' + line.substr(0, m.index) + '"'));
90
+ warnings.push(DecoderWarning('ignored_line_data', 'Unknown additional data ignored: "' + line.substring(0, m.index) + '"'));
90
91
  }
91
92
  var prop = m[1], val;
92
93
  var valPos = m.index + m[0].length;
93
94
  if(tag == 'begin' && prop == 'name') {
94
95
  // special treatment of filename - the value is the rest of the line (can include spaces)
95
- val = line.substr(valPos);
96
+ val = line.substring(valPos);
96
97
  line = '';
97
98
  } else {
98
99
  p = line.indexOf(' ', valPos);
99
- val = (p<0 ? line.substr(valPos) : line.substr(valPos, p-valPos));
100
- line = line.substr(valPos + val.length +1);
100
+ val = (p<0 ? line.substring(valPos) : line.substring(valPos, p));
101
+ line = line.substring(valPos + val.length +1);
101
102
  }
102
103
  if(prop in yprops) {
103
104
  warnings.push(DecoderWarning('duplicate_property', 'Duplicate property encountered: `' + prop + '`'));
@@ -139,7 +140,7 @@ module.exports = {
139
140
  prev = '\r\n';
140
141
 
141
142
  if(Buffer.isBuffer(prev)) prev = prev.toString();
142
- prev = prev.substr(-4); // only care about the last 4 chars of previous state
143
+ prev = prev.slice(-4); // only care about the last 4 chars of previous state
143
144
  if(prev == '\r\n.=') prev = '\r\n='; // aliased after dot stripped
144
145
  if(data.length == 0) return {
145
146
  read: 0,
@@ -151,7 +152,7 @@ module.exports = {
151
152
  var state = decodePrev.indexOf(prev);
152
153
  if(state < 0) {
153
154
  for(var l=-3; l<0; i++) {
154
- state = decodePrev.indexOf(prev.substr(l));
155
+ state = decodePrev.indexOf(prev.slice(l));
155
156
  if(state >= 0) break;
156
157
  }
157
158
  if(state < 0) state = decodePrev.indexOf('');
@@ -195,12 +196,13 @@ module.exports = {
195
196
 
196
197
  if(!Buffer.isBuffer(data)) data = toBuffer(data);
197
198
 
198
- filename = toBuffer(filename.replace(RE_BADCHAR, '').substr(0, 256), exports.encoding);
199
+ filename = toBuffer(filename.replace(RE_BADCHAR, '').substring(0, 256), exports.encoding);
200
+ var e = encodeCrc(data, line_size);
199
201
  return Buffer.concat([
200
202
  toBuffer('=ybegin line='+line_size+' size='+data.length+' name='),
201
203
  filename, nl,
202
- y.encode(data, line_size),
203
- toBuffer('\r\n=yend size='+data.length+' crc32=' + y.crc32(data).toString('hex'))
204
+ e.output,
205
+ toBuffer('\r\n=yend size='+data.length+' crc32=' + e.crc32.toString('hex'))
204
206
  ]);
205
207
  },
206
208
  multi_post: function(filename, size, parts, line_size) {
@@ -214,7 +216,7 @@ module.exports = {
214
216
 
215
217
  // find '=ybegin' to know where the yEnc data starts
216
218
  var yencStart;
217
- if(data.slice(0, 8).toString('hex') == '3d79626567696e20' /*=ybegin */) {
219
+ if(bufferSlice.call(data, 0, 8).toString('hex') == '3d79626567696e20' /*=ybegin */) {
218
220
  // common case: starts right at the beginning
219
221
  yencStart = 0;
220
222
  } else {
@@ -231,10 +233,10 @@ module.exports = {
231
233
  var sp = yencStart;
232
234
  var p = bufferFind(data, '\r\n', yencStart+8);
233
235
  while(p > 0) {
234
- var line = data.slice(sp, p).toString(this.encoding).trim();
236
+ var line = bufferSlice.call(data, sp, p).toString(this.encoding).trim();
235
237
  lines.push(line);
236
238
  sp = p+2;
237
- if(line.substr(0, 6) == '=yend ') { // no data in post
239
+ if(line.substring(0, 6) == '=yend ') { // no data in post
238
240
  ret.yencEnd = sp;
239
241
  break;
240
242
  }
@@ -252,7 +254,7 @@ module.exports = {
252
254
  var warnings = decoderParseLines(lines, ydata);
253
255
 
254
256
  if(!ret.yencEnd) {
255
- var yencEnd = bufferFindRev(data.slice(ret.dataStart), '\r\n=yend ');
257
+ var yencEnd = bufferFindRev(bufferSlice.call(data, ret.dataStart), '\r\n=yend ');
256
258
  if(yencEnd < 0)
257
259
  return DecoderError('no_end_found', 'yEnd end marker not found');
258
260
 
@@ -265,7 +267,7 @@ module.exports = {
265
267
  ret.yencEnd = p;
266
268
  } else
267
269
  ret.yencEnd = p+2;
268
- var endLine = data.slice(yencEnd+2, p).toString(this.encoding).trim();
270
+ var endLine = bufferSlice.call(data, yencEnd+2, p).toString(this.encoding).trim();
269
271
 
270
272
  warnings = warnings.concat(decoderParseLines([endLine], ydata));
271
273
  }
@@ -321,7 +323,7 @@ module.exports = {
321
323
  warnings.push(DecoderWarning('size_mismatch', 'Size specified for part exceeds size specified for whole file'));
322
324
 
323
325
  if(ret.dataStart) {
324
- ret.data = y.decode(data.slice(ret.dataStart, ret.dataEnd), isRaw);
326
+ ret.data = y.decode(bufferSlice.call(data, ret.dataStart, ret.dataEnd), !!isRaw);
325
327
  ret.crc32 = y.crc32(ret.data);
326
328
  var hexCrc = ret.crc32.toString('hex');
327
329
 
@@ -360,7 +362,7 @@ function YEncoder(filename, size, parts, line_size) {
360
362
  this.pos = 0;
361
363
  this.crc = toBuffer([0,0,0,0]);
362
364
 
363
- filename = toBuffer(filename.replace(RE_BADCHAR, '').substr(0, 256), exports.encoding);
365
+ filename = toBuffer(filename.replace(RE_BADCHAR, '').substring(0, 256), exports.encoding);
364
366
  if(parts > 1) {
365
367
  this.yInfo = Buffer.concat([
366
368
  toBuffer(' total='+parts+' line='+line_size+' size='+size+' name='),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yencode",
3
- "version": "1.1.3",
3
+ "version": "1.1.5",
4
4
  "description": "SIMD accelerated yEnc encoder/decoder and CRC32 calculator",
5
5
  "keywords": [
6
6
  "yenc",
@@ -21,6 +21,7 @@
21
21
  "install": "node-gyp rebuild"
22
22
  },
23
23
  "gypfile": true,
24
+ "type": "commonjs",
24
25
  "bugs": {
25
26
  "url": "https://github.com/animetosho/node-yencode/issues"
26
27
  },
package/src/common.h CHANGED
@@ -57,18 +57,18 @@
57
57
 
58
58
 
59
59
  // MSVC compatibility
60
- #if ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(_M_X64)) && !defined(__clang__)
60
+ #if ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(_M_X64)) && defined(_MSC_VER) && !defined(__clang__)
61
61
  #define __SSE2__ 1
62
62
  #define __SSSE3__ 1
63
63
  #define __SSE4_1__ 1
64
- #if defined(_MSC_VER) && _MSC_VER >= 1600
64
+ #if _MSC_VER >= 1600 && defined(__SSE2__)
65
65
  #define __POPCNT__ 1
66
66
  #define __LZCNT__ 1
67
67
  #endif
68
68
  #if !defined(__AVX__) && (_MSC_VER >= 1700 && defined(__SSE2__))
69
69
  #define __AVX__ 1
70
70
  #endif
71
- #if !defined(__AVX2__) && (_MSC_VER >= 1800 && defined(__SSE2__))
71
+ #if !defined(__AVX2__) && (_MSC_VER >= 1800 && defined(__AVX__))
72
72
  #define __AVX2__ 1
73
73
  #define __BMI2__ 1
74
74
  #endif
@@ -145,6 +145,13 @@
145
145
 
146
146
  #endif
147
147
 
148
+ #if defined(__ARM_NEON) && defined(__has_include)
149
+ # if !__has_include(<arm_neon.h>)
150
+ # undef __ARM_NEON
151
+ HEDLEY_WARNING("NEON has been disabled due to missing arm_neon.h");
152
+ # endif
153
+ #endif
154
+
148
155
  #ifdef __ARM_NEON
149
156
  # include <arm_neon.h>
150
157
 
@@ -214,17 +221,38 @@ bool cpu_supports_neon();
214
221
 
215
222
  #ifdef PLATFORM_X86
216
223
  enum YEncDecIsaLevel {
224
+ ISA_GENERIC = 0,
217
225
  ISA_FEATURE_POPCNT = 0x1,
218
226
  ISA_FEATURE_LZCNT = 0x2,
227
+ ISA_FEATURE_EVEX512 = 0x4, // AVX512 support
219
228
  ISA_LEVEL_SSE2 = 0x100,
220
229
  ISA_LEVEL_SSSE3 = 0x200,
221
230
  ISA_LEVEL_SSE41 = 0x300,
222
231
  ISA_LEVEL_SSE4_POPCNT = 0x301,
232
+ ISA_LEVEL_PCLMUL = 0x340,
223
233
  ISA_LEVEL_AVX = 0x381, // same as above, just used as a differentiator for `cpu_supports_isa`
224
234
  ISA_LEVEL_AVX2 = 0x403, // also includes BMI1/2 and LZCNT
225
- ISA_LEVEL_AVX3 = 0x503, // SKX variant; AVX512VL + AVX512BW
226
- ISA_LEVEL_VBMI2 = 0x603 // ICL
235
+ ISA_LEVEL_VPCLMUL = 0x440,
236
+ ISA_LEVEL_AVX3 = 0x507, // SKX variant; AVX512VL + AVX512BW
237
+ ISA_LEVEL_VBMI2 = 0x603 // ICL, AVX10
238
+ };
239
+ #elif defined(PLATFORM_ARM)
240
+ enum YEncDecIsaLevel {
241
+ ISA_GENERIC = 0,
242
+ ISA_FEATURE_CRC = 8,
243
+ ISA_LEVEL_NEON = 0x1000
227
244
  };
245
+ #elif defined(__riscv)
246
+ enum YEncDecIsaLevel {
247
+ ISA_GENERIC = 0,
248
+ ISA_LEVEL_RVV = 0x10000
249
+ };
250
+ #else
251
+ enum YEncDecIsaLevel {
252
+ ISA_GENERIC = 0
253
+ };
254
+ #endif
255
+ #ifdef PLATFORM_X86
228
256
  #ifdef _MSC_VER
229
257
  // native tuning not supported in MSVC
230
258
  # define ISA_NATIVE ISA_LEVEL_SSE2
@@ -256,6 +284,16 @@ enum YEncDecIsaLevel {
256
284
  int cpu_supports_isa();
257
285
  #endif // PLATFORM_X86
258
286
 
287
+
288
+ #ifdef __riscv
289
+ bool cpu_supports_rvv();
290
+ #endif
291
+ #if defined(__riscv_vector) && defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(13,0,0)
292
+ // GCC added RVV intrinsics in GCC13
293
+ # undef __riscv_vector
294
+ #endif
295
+
296
+
259
297
  #include <string.h>
260
298
  #if !defined(_MSC_VER) || defined(_STDINT) || _MSC_VER >= 1900
261
299
  # include <stdint.h>
package/src/crc.cc CHANGED
@@ -3,13 +3,130 @@
3
3
  #include "interface.h"
4
4
  crcutil_interface::CRC* crc = NULL;
5
5
 
6
+ #if defined(PLATFORM_X86) && !defined(__ILP32__)
6
7
  static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) {
8
+ // use optimised ASM on x86 platforms
7
9
  crcutil_interface::UINT64 tmp = init;
8
10
  crc->Compute(data, length, &tmp);
9
11
  return (uint32_t)tmp;
10
12
  }
11
- crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
13
+ #else
14
+ static uint32_t* HEDLEY_RESTRICT crc_slice_table;
15
+ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
16
+ # if defined(__GNUC__) || defined(__clang__)
17
+ # define bswap32 __builtin_bswap32
18
+ # else
19
+ static inline uint32_t bswap32(uint32_t x) {
20
+ return (x >> 24) | ((x >> 8) & 0x0000FF00) | ((x << 8) & 0x00FF0000) | (x << 24);
21
+ }
22
+ # endif
23
+ #endif
12
24
 
25
+ #define CRC32_GENERIC_CHAINS 4 // newer processors may prefer 8
26
+ static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) {
27
+ const uint32_t* crc_base_table = crc_slice_table + 4*256; // this also seems to help MSVC's optimiser, which otherwise keeps trying to add to crc_slice_table every time it's referenced
28
+ uint32_t crc[CRC32_GENERIC_CHAINS]; // Clang seems to be more spill happy with an array over individual variables :(
29
+ crc[0] = ~init;
30
+ uint8_t* current8 = (uint8_t*)data;
31
+
32
+ // align to multiple of 4
33
+ if(((uintptr_t)current8 & 1) && length >= 1) {
34
+ crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xFF) ^ *current8++];
35
+ length--;
36
+ }
37
+ if(((uintptr_t)current8 & 2) && length >= 2) {
38
+ crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xFF) ^ *current8++];
39
+ crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xFF) ^ *current8++];
40
+ length -= 2;
41
+ }
42
+
43
+ uint8_t* end8 = current8 + length;
44
+ uint32_t* current = (uint32_t*)current8;
45
+ if(length >= 8*CRC32_GENERIC_CHAINS-4) {
46
+ size_t lenMain = ((length-(CRC32_GENERIC_CHAINS-1)*4) / 4);
47
+ uint32_t* end = current + (lenMain / CRC32_GENERIC_CHAINS) * CRC32_GENERIC_CHAINS;
48
+ for(int c=1; c<CRC32_GENERIC_CHAINS; c++)
49
+ crc[c] = 0;
50
+ while(current != end) {
51
+ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
52
+ #define CRC_PROC4(v, in) \
53
+ v ^= bswap32(in); \
54
+ v = crc_slice_table[v >> 24] ^ crc_slice_table[0x100L + ((v >> 16) & 0xff)] ^ crc_slice_table[0x200L + ((v >> 8) & 0xff)] ^ crc_slice_table[0x300L + (v & 0xff)]
55
+ #else
56
+ #define CRC_PROC4(v, in) \
57
+ v ^= (in); \
58
+ v = crc_slice_table[v >> 24] ^ crc_slice_table[0x100L + ((v >> 16) & 0xff)] ^ crc_slice_table[0x200L + ((v >> 8) & 0xff)] ^ crc_slice_table[0x300L + (v & 0xff)]
59
+ #endif
60
+ for(int c=0; c<CRC32_GENERIC_CHAINS; c++) {
61
+ CRC_PROC4(crc[c], *current);
62
+ current++;
63
+ }
64
+ }
65
+ // aggregate accumulators
66
+ current8 = (uint8_t*)current;
67
+ #if (CRC32_GENERIC_CHAINS & (CRC32_GENERIC_CHAINS-1)) == 0
68
+ // assume that lengths which are a multiple of 4/8/16/32 are common
69
+ if((end8 - current8) & (CRC32_GENERIC_CHAINS*4)) {
70
+ CRC_PROC4(crc[0], *current);
71
+ current8 += 4;
72
+
73
+ for(int c=1; c<CRC32_GENERIC_CHAINS; c++) {
74
+ for(int i=0; i<4; i++)
75
+ crc[c] = (crc[c] >> 8) ^ crc_base_table[(crc[c] & 0xff) ^ *current8++];
76
+ crc[(c+1) & ~CRC32_GENERIC_CHAINS] ^= crc[c];
77
+ }
78
+ } else
79
+ #endif
80
+ #undef CRC_PROC4
81
+ for(int c=1; c<CRC32_GENERIC_CHAINS; c++) {
82
+ for(int i=0; i<4; i++)
83
+ crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xff) ^ *current8++];
84
+ crc[0] ^= crc[c];
85
+ }
86
+ }
87
+
88
+ // tail loop
89
+ while(current8 != end8) {
90
+ crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xFF) ^ *current8++];
91
+ }
92
+ return ~crc[0];
93
+ }
94
+ static void generate_crc32_slice_table() {
95
+ crc_slice_table = (uint32_t*)malloc(5*256*sizeof(uint32_t));
96
+ // generate standard byte-by-byte table
97
+ uint32_t* crc_base_table = crc_slice_table + 4*256;
98
+ for(int v=0; v<256; v++) {
99
+ uint32_t crc = v;
100
+ for(int j = 0; j < 8; j++) {
101
+ crc = (crc >> 1) ^ (-(int32_t)(crc & 1) & 0xEDB88320);
102
+ }
103
+ crc_base_table[v] = crc;
104
+ }
105
+
106
+ // generate slice-by-4 shifted across for X independent chains
107
+ for(int v=0; v<256; v++) {
108
+ uint32_t crc = crc_base_table[v];
109
+ #if CRC32_GENERIC_CHAINS > 1
110
+ for(int i=0; i<4*CRC32_GENERIC_CHAINS-5; i++)
111
+ crc = (crc >> 8) ^ crc_base_table[crc & 0xff];
112
+ for(int i=0; i<4; i++) {
113
+ crc = (crc >> 8) ^ crc_base_table[crc & 0xff];
114
+ crc_slice_table[i*256 + v] = crc;
115
+ }
116
+ #else
117
+ for(int i=0; i<4; i++) {
118
+ crc_slice_table[i*256 + v] = crc;
119
+ crc = (crc >> 8) ^ crc_base_table[crc & 0xff];
120
+ }
121
+ #endif
122
+ }
123
+ }
124
+ #endif
125
+
126
+ extern "C" {
127
+ crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
128
+ int _crc32_isa = ISA_GENERIC;
129
+ }
13
130
 
14
131
 
15
132
  uint32_t do_crc32_combine(uint32_t crc1, uint32_t crc2, size_t len2) {
@@ -24,9 +141,9 @@ uint32_t do_crc32_zeros(uint32_t crc1, size_t len) {
24
141
  return (uint32_t)crc_;
25
142
  }
26
143
 
27
- void crc_clmul_set_funcs(crc_func*);
28
- void crc_clmul256_set_funcs(crc_func*);
29
- void crc_arm_set_funcs(crc_func*);
144
+ void crc_clmul_set_funcs();
145
+ void crc_clmul256_set_funcs();
146
+ void crc_arm_set_funcs();
30
147
 
31
148
  #ifdef PLATFORM_X86
32
149
  int cpu_supports_crc_isa();
@@ -39,22 +156,23 @@ int cpu_supports_crc_isa();
39
156
  #ifdef PLATFORM_ARM
40
157
  # ifdef __ANDROID__
41
158
  # include <cpu-features.h>
42
- # elif defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
43
- # include <sys/auxv.h>
44
- # include <asm/hwcap.h>
45
- # elif (defined(__FreeBSD__) && __FreeBSD__ < 12)
46
- # include <sys/sysctl.h>
47
- # include <asm/hwcap.h>
48
159
  # elif defined(__APPLE__)
49
160
  # include <sys/types.h>
50
161
  # include <sys/sysctl.h>
51
- # endif
52
- # ifdef __FreeBSD__
162
+ # elif defined(__has_include)
163
+ # if __has_include(<sys/auxv.h>)
164
+ # include <sys/auxv.h>
165
+ # ifdef __FreeBSD__
53
166
  static unsigned long getauxval(unsigned long cap) {
54
167
  unsigned long ret;
55
168
  elf_aux_info(cap, &ret, sizeof(ret));
56
169
  return ret;
57
170
  }
171
+ # endif
172
+ # if __has_include(<asm/hwcap.h>)
173
+ # include <asm/hwcap.h>
174
+ # endif
175
+ # endif
58
176
  # endif
59
177
  #endif
60
178
  void crc_init() {
@@ -62,12 +180,16 @@ void crc_init() {
62
180
  0xEDB88320, 0, 32, true, 0, 0, 0, 0, NULL);
63
181
  // instance never deleted... oh well...
64
182
 
183
+ #if !defined(PLATFORM_X86) || defined(__ILP32__)
184
+ generate_crc32_slice_table();
185
+ #endif
186
+
65
187
  #ifdef PLATFORM_X86
66
188
  int support = cpu_supports_crc_isa();
67
189
  if(support == 2)
68
- crc_clmul256_set_funcs(&_do_crc32_incremental);
190
+ crc_clmul256_set_funcs();
69
191
  else if(support == 1)
70
- crc_clmul_set_funcs(&_do_crc32_incremental);
192
+ crc_clmul_set_funcs();
71
193
  #endif
72
194
  #ifdef PLATFORM_ARM
73
195
  # ifdef __APPLE__
@@ -95,7 +217,7 @@ void crc_init() {
95
217
  false
96
218
  # endif
97
219
  ) {
98
- crc_arm_set_funcs(&_do_crc32_incremental);
220
+ crc_arm_set_funcs();
99
221
  }
100
222
  #endif
101
223
  }
package/src/crc.h CHANGED
@@ -9,11 +9,15 @@ extern "C" {
9
9
 
10
10
  typedef uint32_t (*crc_func)(const void*, size_t, uint32_t);
11
11
  extern crc_func _do_crc32_incremental;
12
+ extern int _crc32_isa;
12
13
  #define do_crc32 (*_do_crc32_incremental)
13
14
 
14
15
  uint32_t do_crc32_combine(uint32_t crc1, const uint32_t crc2, size_t len2);
15
16
  uint32_t do_crc32_zeros(uint32_t crc1, size_t len);
16
17
  void crc_init();
18
+ static inline int crc32_isa_level() {
19
+ return _crc32_isa;
20
+ }
17
21
 
18
22
 
19
23
 
package/src/crc_arm.cc CHANGED
@@ -16,6 +16,12 @@ HEDLEY_WARNING("CRC32 acceleration has been disabled due to broken arm_acle.h sh
16
16
  HEDLEY_WARNING("CRC32 acceleration has been disabled due to broken arm_acle.h shipped in GCC 9.4 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100985]. If you need this feature, please use a different compiler or version of GCC");
17
17
  # endif
18
18
  #endif
19
+ #if defined(__ARM_FEATURE_CRC32) && defined(__has_include)
20
+ # if !__has_include(<arm_acle.h>)
21
+ # undef __ARM_FEATURE_CRC32
22
+ HEDLEY_WARNING("CRC32 acceleration has been disabled due to missing arm_acle.h");
23
+ # endif
24
+ #endif
19
25
 
20
26
  #if defined(__ARM_FEATURE_CRC32) || (defined(_M_ARM64) && !defined(__clang__)) // MSVC doesn't support CRC for ARM32
21
27
 
@@ -73,7 +79,7 @@ static HEDLEY_ALWAYS_INLINE uint32_t crc_multiply(uint32_t a, uint32_t b) {
73
79
  return res;
74
80
  }
75
81
 
76
- static const uint32_t crc_power[] = { // pre-computed 2^n, with first 3 entries removed (saves a shift)
82
+ static const uint32_t crc_power[] = { // pre-computed 2^(2^n), with first 3 entries removed (saves a shift)
77
83
  0x00800000, 0x00008000, 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, 0xd7bbfe6a,
78
84
  0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3,
79
85
  0x31fec169, 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, 0x2e4e5eef, 0x4eaba214,
@@ -194,11 +200,10 @@ static uint32_t do_crc32_incremental_arm(const void* data, size_t length, uint32
194
200
  return ~arm_crc_calc(~init, (const unsigned char*)data, (long)length);
195
201
  }
196
202
 
197
- void crc_arm_set_funcs(crc_func* _do_crc32_incremental) {
198
- *_do_crc32_incremental = &do_crc32_incremental_arm;
203
+ void crc_arm_set_funcs() {
204
+ _do_crc32_incremental = &do_crc32_incremental_arm;
205
+ _crc32_isa = ISA_FEATURE_CRC;
199
206
  }
200
207
  #else
201
- void crc_arm_set_funcs(crc_func* _do_crc32_incremental) {
202
- (void)_do_crc32_incremental;
203
- }
208
+ void crc_arm_set_funcs() {}
204
209
  #endif