yencode 1.1.2 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/binding.gyp +141 -6
- package/index.js +21 -19
- package/package.json +2 -1
- package/src/common.h +34 -19
- package/src/crc.cc +138 -11
- package/src/crc_arm.cc +42 -7
- package/src/crc_folding.cc +18 -53
- package/src/crc_folding_256.cc +229 -0
- package/src/decoder.cc +8 -4
- package/src/decoder.h +5 -5
- package/src/decoder_avx2_base.h +30 -13
- package/src/decoder_common.h +5 -5
- package/src/decoder_neon.cc +4 -4
- package/src/decoder_neon64.cc +10 -7
- package/src/decoder_sse_base.h +26 -12
- package/src/decoder_vbmi2.cc +37 -0
- package/src/encoder.cc +10 -1
- package/src/encoder_avx_base.h +24 -16
- package/src/encoder_neon.cc +40 -41
- package/src/encoder_rvv.cc +219 -0
- package/src/encoder_sse_base.h +7 -8
- package/src/encoder_vbmi2.cc +30 -0
- package/src/hedley.h +278 -135
- package/src/platform.cc +79 -10
- package/src/test_alignalloc.c +6 -0
- package/test/_speedbase.js +12 -11
- package/test/speeddec.js +6 -5
- package/test/testcrc.js +14 -0
- package/test/testdec.js +30 -14
- package/test/testenc.js +10 -7
- package/test/testpostdec.js +6 -5
package/README.md
CHANGED
|
@@ -129,7 +129,8 @@ int decodeTo(Buffer data, Buffer output, bool stripDots=false)
|
|
|
129
129
|
Same as above, but instead of returning a Buffer, writes it to the supplied
|
|
130
130
|
*output* Buffer. Returns the length of the decoded data.
|
|
131
131
|
Note that the *output* Buffer must be at least large enough to hold the largest
|
|
132
|
-
possible output size (i.e. length of the input), otherwise an error is thrown.
|
|
132
|
+
possible output size (i.e. length of the input), otherwise an error is thrown.
|
|
133
|
+
The *data* and *output* Buffers can be the same, for in-situ decoding.
|
|
133
134
|
|
|
134
135
|
Object decodeChunk\(Buffer data \[, string state=null\]\[, Buffer output\]\)
|
|
135
136
|
-----------------------------------------------------------------------------
|
|
@@ -142,7 +143,7 @@ designed to incrementally process a stream from the network, and will perform NN
|
|
|
142
143
|
*state* is the current state of the incremental decode. Set to *null* if this is starting the decode of a new article, otherwise this should be set to the value of *state* given from the previous invocation of *decodeChunk*
|
|
143
144
|
If *output* is supplied, the output will be written here \(see *decodeTo* for notes
|
|
144
145
|
on required size\), otherwise a new buffer will be created where the output will be
|
|
145
|
-
written to.
|
|
146
|
+
written to. The *data* and *output* Buffers can be the same, for in-situ decoding.
|
|
146
147
|
|
|
147
148
|
Returns an object with the following keys:
|
|
148
149
|
|
package/binding.gyp
CHANGED
|
@@ -43,10 +43,20 @@
|
|
|
43
43
|
}],
|
|
44
44
|
['OS!="win" and enable_native_tuning!=0', {
|
|
45
45
|
"defines": ["YENC_BUILD_NATIVE=1"]
|
|
46
|
+
}],
|
|
47
|
+
['OS!="win"', {
|
|
48
|
+
"variables": {
|
|
49
|
+
"missing_memalign%": "<!(<!(echo ${CC_target:-${CC:-cc}}) -c src/test_alignalloc.c -o /dev/null -Werror 2>/dev/null || echo failed)",
|
|
50
|
+
},
|
|
51
|
+
"conditions": [
|
|
52
|
+
['missing_memalign!=""', {
|
|
53
|
+
"defines": ["_POSIX_C_SOURCE=200112L"],
|
|
54
|
+
}]
|
|
55
|
+
]
|
|
46
56
|
}]
|
|
47
57
|
],
|
|
48
58
|
"cflags": ["-Wno-unused-function"],
|
|
49
|
-
"cxxflags": ["-Wno-unused-function"],
|
|
59
|
+
"cxxflags": ["-Wno-unused-function", "-std=c++03", "-D_POSIX_C_SOURCE=200112L"],
|
|
50
60
|
"xcode_settings": {
|
|
51
61
|
"OTHER_CFLAGS": ["-Wno-unused-function"],
|
|
52
62
|
"OTHER_CXXFLAGS": ["-Wno-unused-function"]
|
|
@@ -64,7 +74,7 @@
|
|
|
64
74
|
"targets": [
|
|
65
75
|
{
|
|
66
76
|
"target_name": "yencode",
|
|
67
|
-
"dependencies": ["crcutil", "yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_avx", "yencode_avx2", "yencode_neon", "yencode_armcrc"],
|
|
77
|
+
"dependencies": ["crcutil", "yencode_sse2", "yencode_ssse3", "yencode_clmul", "yencode_clmul256", "yencode_avx", "yencode_avx2", "yencode_vbmi2", "yencode_neon", "yencode_armcrc", "yencode_rvv"],
|
|
68
78
|
"sources": [
|
|
69
79
|
"src/yencode.cc",
|
|
70
80
|
"src/platform.cc",
|
|
@@ -206,6 +216,81 @@
|
|
|
206
216
|
}]
|
|
207
217
|
]
|
|
208
218
|
},
|
|
219
|
+
{
|
|
220
|
+
"target_name": "yencode_clmul256",
|
|
221
|
+
"type": "static_library",
|
|
222
|
+
"sources": [
|
|
223
|
+
"src/crc_folding_256.cc"
|
|
224
|
+
],
|
|
225
|
+
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
226
|
+
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
227
|
+
"xcode_settings": {
|
|
228
|
+
"OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
229
|
+
"OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
|
|
230
|
+
},
|
|
231
|
+
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
232
|
+
"conditions": [
|
|
233
|
+
['target_arch in "ia32 x64" and OS!="win"', {
|
|
234
|
+
"variables": {"supports_vpclmul%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/crc_folding_256.cc -mavx2 -mvpclmulqdq 2>/dev/null || true)"},
|
|
235
|
+
"conditions": [
|
|
236
|
+
['supports_vpclmul!=""', {
|
|
237
|
+
"cflags": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
|
|
238
|
+
"cxxflags": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
|
|
239
|
+
"xcode_settings": {
|
|
240
|
+
"OTHER_CFLAGS": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
|
|
241
|
+
"OTHER_CXXFLAGS": ["-mavx2", "-mvpclmulqdq", "-mpclmul"],
|
|
242
|
+
}
|
|
243
|
+
}]
|
|
244
|
+
]
|
|
245
|
+
}],
|
|
246
|
+
['target_arch in "ia32 x64" and OS=="win"', {
|
|
247
|
+
"msvs_settings": {"VCCLCompilerTool": {"EnableEnhancedInstructionSet": "3"}}
|
|
248
|
+
}]
|
|
249
|
+
]
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
"target_name": "yencode_vbmi2",
|
|
253
|
+
"type": "static_library",
|
|
254
|
+
"sources": [
|
|
255
|
+
"src/decoder_vbmi2.cc", "src/encoder_vbmi2.cc"
|
|
256
|
+
],
|
|
257
|
+
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
258
|
+
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
259
|
+
"xcode_settings": {
|
|
260
|
+
"OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
261
|
+
"OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
|
|
262
|
+
},
|
|
263
|
+
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
264
|
+
"conditions": [
|
|
265
|
+
['target_arch in "ia32 x64" and OS!="win"', {
|
|
266
|
+
"variables": {
|
|
267
|
+
"supports_vbmi2%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_vbmi2.cc -mavx512vl -mavx512vbmi2 2>/dev/null || true)",
|
|
268
|
+
"supports_avx10%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_vbmi2.cc -mavx512vl -mno-evex512 2>/dev/null || true)"
|
|
269
|
+
},
|
|
270
|
+
"conditions": [
|
|
271
|
+
['supports_vbmi2!=""', {
|
|
272
|
+
"cflags": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
|
|
273
|
+
"cxxflags": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
|
|
274
|
+
"xcode_settings": {
|
|
275
|
+
"OTHER_CFLAGS": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
|
|
276
|
+
"OTHER_CXXFLAGS": ["-mavx512vbmi2", "-mavx512vl", "-mavx512bw", "-mpopcnt", "-mbmi", "-mbmi2", "-mlzcnt"],
|
|
277
|
+
}
|
|
278
|
+
}],
|
|
279
|
+
['supports_avx10!=""', {
|
|
280
|
+
"cflags": ["-mno-evex512"],
|
|
281
|
+
"cxxflags": ["-mno-evex512"],
|
|
282
|
+
"xcode_settings": {
|
|
283
|
+
"OTHER_CFLAGS": ["-mno-evex512"],
|
|
284
|
+
"OTHER_CXXFLAGS": ["-mno-evex512"],
|
|
285
|
+
}
|
|
286
|
+
}]
|
|
287
|
+
]
|
|
288
|
+
}],
|
|
289
|
+
['target_arch in "ia32 x64" and OS=="win"', {
|
|
290
|
+
"msvs_settings": {"VCCLCompilerTool": {"AdditionalOptions": ["/arch:AVX512"], "EnableEnhancedInstructionSet": "0"}}
|
|
291
|
+
}]
|
|
292
|
+
]
|
|
293
|
+
},
|
|
209
294
|
{
|
|
210
295
|
"target_name": "yencode_neon",
|
|
211
296
|
"type": "static_library",
|
|
@@ -221,11 +306,11 @@
|
|
|
221
306
|
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
222
307
|
"conditions": [
|
|
223
308
|
['target_arch=="arm"', {
|
|
224
|
-
"cflags": ["-mfpu=neon"],
|
|
225
|
-
"cxxflags": ["-mfpu=neon"],
|
|
309
|
+
"cflags": ["-mfpu=neon","-fno-lto"],
|
|
310
|
+
"cxxflags": ["-mfpu=neon","-fno-lto"],
|
|
226
311
|
"xcode_settings": {
|
|
227
|
-
"OTHER_CFLAGS": ["-mfpu=neon"],
|
|
228
|
-
"OTHER_CXXFLAGS": ["-mfpu=neon"],
|
|
312
|
+
"OTHER_CFLAGS": ["-mfpu=neon","-fno-lto"],
|
|
313
|
+
"OTHER_CXXFLAGS": ["-mfpu=neon","-fno-lto"],
|
|
229
314
|
}
|
|
230
315
|
}],
|
|
231
316
|
['target_arch=="arm64"', {
|
|
@@ -235,6 +320,48 @@
|
|
|
235
320
|
}]
|
|
236
321
|
]
|
|
237
322
|
},
|
|
323
|
+
{
|
|
324
|
+
"target_name": "yencode_rvv",
|
|
325
|
+
"type": "static_library",
|
|
326
|
+
"sources": [
|
|
327
|
+
"src/encoder_rvv.cc"
|
|
328
|
+
],
|
|
329
|
+
"cflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
330
|
+
"cxxflags!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
331
|
+
"xcode_settings": {
|
|
332
|
+
"OTHER_CFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"],
|
|
333
|
+
"OTHER_CXXFLAGS!": ["-fno-omit-frame-pointer", "-fno-tree-vrp", "-fno-strict-aliasing"]
|
|
334
|
+
},
|
|
335
|
+
"msvs_settings": {"VCCLCompilerTool": {"BufferSecurityCheck": "false"}},
|
|
336
|
+
"conditions": [
|
|
337
|
+
['target_arch=="riscv64" and OS!="win"', {
|
|
338
|
+
"variables": {"supports_rvv%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_rvv.cc -march=rv64gcv 2>/dev/null || true)"},
|
|
339
|
+
"conditions": [
|
|
340
|
+
['supports_rvv!=""', {
|
|
341
|
+
"cflags": ["-march=rv64gcv"],
|
|
342
|
+
"cxxflags": ["-march=rv64gcv"],
|
|
343
|
+
"xcode_settings": {
|
|
344
|
+
"OTHER_CFLAGS": ["-march=rv64gcv"],
|
|
345
|
+
"OTHER_CXXFLAGS": ["-march=rv64gcv"],
|
|
346
|
+
}
|
|
347
|
+
}]
|
|
348
|
+
]
|
|
349
|
+
}],
|
|
350
|
+
['target_arch=="riscv32" and OS!="win"', {
|
|
351
|
+
"variables": {"supports_rvv%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -MM -E src/encoder_rvv.cc -march=rv32gcv 2>/dev/null || true)"},
|
|
352
|
+
"conditions": [
|
|
353
|
+
['supports_rvv!=""', {
|
|
354
|
+
"cflags": ["-march=rv32gcv"],
|
|
355
|
+
"cxxflags": ["-march=rv32gcv"],
|
|
356
|
+
"xcode_settings": {
|
|
357
|
+
"OTHER_CFLAGS": ["-march=rv32gcv"],
|
|
358
|
+
"OTHER_CXXFLAGS": ["-march=rv32gcv"],
|
|
359
|
+
}
|
|
360
|
+
}]
|
|
361
|
+
]
|
|
362
|
+
}]
|
|
363
|
+
]
|
|
364
|
+
},
|
|
238
365
|
{
|
|
239
366
|
"target_name": "yencode_armcrc",
|
|
240
367
|
"type": "static_library",
|
|
@@ -260,6 +387,14 @@
|
|
|
260
387
|
"OTHER_CFLAGS": ["-march=armv8-a+crc"],
|
|
261
388
|
"OTHER_CXXFLAGS": ["-march=armv8-a+crc"],
|
|
262
389
|
}
|
|
390
|
+
}],
|
|
391
|
+
['OS!="win" and target_arch=="arm"', {
|
|
392
|
+
"cflags": ["-mfpu=fp-armv8","-fno-lto"],
|
|
393
|
+
"cxxflags": ["-mfpu=fp-armv8","-fno-lto"],
|
|
394
|
+
"xcode_settings": {
|
|
395
|
+
"OTHER_CFLAGS": ["-mfpu=fp-armv8","-fno-lto"],
|
|
396
|
+
"OTHER_CXXFLAGS": ["-mfpu=fp-armv8","-fno-lto"]
|
|
397
|
+
}
|
|
263
398
|
}]
|
|
264
399
|
]
|
|
265
400
|
},
|
package/index.js
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
var y = require('./build/Release/yencode.node');
|
|
4
4
|
|
|
5
5
|
var toBuffer = Buffer.alloc ? Buffer.from : Buffer;
|
|
6
|
+
var bufferSlice = Buffer.prototype.readBigInt64BE ? Buffer.prototype.subarray : Buffer.prototype.slice;
|
|
6
7
|
|
|
7
8
|
var nl = toBuffer([13, 10]);
|
|
8
9
|
var RE_BADCHAR = /\r\n\0/g;
|
|
@@ -76,28 +77,28 @@ var decoderParseLines = function(lines, ydata) {
|
|
|
76
77
|
for(var i=0; i<lines.length; i++) {
|
|
77
78
|
var yprops = {};
|
|
78
79
|
|
|
79
|
-
var line = lines[i].
|
|
80
|
+
var line = lines[i].substring(2); // cut off '=y'
|
|
80
81
|
// parse tag
|
|
81
82
|
var p = line.indexOf(' ');
|
|
82
|
-
var tag = (p<0 ? line : line.
|
|
83
|
-
line = line.
|
|
83
|
+
var tag = (p<0 ? line : line.substring(0, p));
|
|
84
|
+
line = line.substring(tag.length+1).trim();
|
|
84
85
|
|
|
85
86
|
// parse props
|
|
86
87
|
var m = line.match(RE_YPROP);
|
|
87
88
|
while(m) {
|
|
88
89
|
if(m.index != 0) {
|
|
89
|
-
warnings.push(DecoderWarning('ignored_line_data', 'Unknown additional data ignored: "' + line.
|
|
90
|
+
warnings.push(DecoderWarning('ignored_line_data', 'Unknown additional data ignored: "' + line.substring(0, m.index) + '"'));
|
|
90
91
|
}
|
|
91
92
|
var prop = m[1], val;
|
|
92
93
|
var valPos = m.index + m[0].length;
|
|
93
94
|
if(tag == 'begin' && prop == 'name') {
|
|
94
95
|
// special treatment of filename - the value is the rest of the line (can include spaces)
|
|
95
|
-
val = line.
|
|
96
|
+
val = line.substring(valPos);
|
|
96
97
|
line = '';
|
|
97
98
|
} else {
|
|
98
99
|
p = line.indexOf(' ', valPos);
|
|
99
|
-
val = (p<0 ? line.
|
|
100
|
-
line = line.
|
|
100
|
+
val = (p<0 ? line.substring(valPos) : line.substring(valPos, p));
|
|
101
|
+
line = line.substring(valPos + val.length +1);
|
|
101
102
|
}
|
|
102
103
|
if(prop in yprops) {
|
|
103
104
|
warnings.push(DecoderWarning('duplicate_property', 'Duplicate property encountered: `' + prop + '`'));
|
|
@@ -139,7 +140,7 @@ module.exports = {
|
|
|
139
140
|
prev = '\r\n';
|
|
140
141
|
|
|
141
142
|
if(Buffer.isBuffer(prev)) prev = prev.toString();
|
|
142
|
-
prev = prev.
|
|
143
|
+
prev = prev.slice(-4); // only care about the last 4 chars of previous state
|
|
143
144
|
if(prev == '\r\n.=') prev = '\r\n='; // aliased after dot stripped
|
|
144
145
|
if(data.length == 0) return {
|
|
145
146
|
read: 0,
|
|
@@ -151,7 +152,7 @@ module.exports = {
|
|
|
151
152
|
var state = decodePrev.indexOf(prev);
|
|
152
153
|
if(state < 0) {
|
|
153
154
|
for(var l=-3; l<0; i++) {
|
|
154
|
-
state = decodePrev.indexOf(prev.
|
|
155
|
+
state = decodePrev.indexOf(prev.slice(l));
|
|
155
156
|
if(state >= 0) break;
|
|
156
157
|
}
|
|
157
158
|
if(state < 0) state = decodePrev.indexOf('');
|
|
@@ -195,12 +196,13 @@ module.exports = {
|
|
|
195
196
|
|
|
196
197
|
if(!Buffer.isBuffer(data)) data = toBuffer(data);
|
|
197
198
|
|
|
198
|
-
filename = toBuffer(filename.replace(RE_BADCHAR, '').
|
|
199
|
+
filename = toBuffer(filename.replace(RE_BADCHAR, '').substring(0, 256), exports.encoding);
|
|
200
|
+
var e = encodeCrc(data, line_size);
|
|
199
201
|
return Buffer.concat([
|
|
200
202
|
toBuffer('=ybegin line='+line_size+' size='+data.length+' name='),
|
|
201
203
|
filename, nl,
|
|
202
|
-
|
|
203
|
-
toBuffer('\r\n=yend size='+data.length+' crc32=' +
|
|
204
|
+
e.output,
|
|
205
|
+
toBuffer('\r\n=yend size='+data.length+' crc32=' + e.crc32.toString('hex'))
|
|
204
206
|
]);
|
|
205
207
|
},
|
|
206
208
|
multi_post: function(filename, size, parts, line_size) {
|
|
@@ -214,7 +216,7 @@ module.exports = {
|
|
|
214
216
|
|
|
215
217
|
// find '=ybegin' to know where the yEnc data starts
|
|
216
218
|
var yencStart;
|
|
217
|
-
if(
|
|
219
|
+
if(bufferSlice.call(data, 0, 8).toString('hex') == '3d79626567696e20' /*=ybegin */) {
|
|
218
220
|
// common case: starts right at the beginning
|
|
219
221
|
yencStart = 0;
|
|
220
222
|
} else {
|
|
@@ -231,10 +233,10 @@ module.exports = {
|
|
|
231
233
|
var sp = yencStart;
|
|
232
234
|
var p = bufferFind(data, '\r\n', yencStart+8);
|
|
233
235
|
while(p > 0) {
|
|
234
|
-
var line =
|
|
236
|
+
var line = bufferSlice.call(data, sp, p).toString(this.encoding).trim();
|
|
235
237
|
lines.push(line);
|
|
236
238
|
sp = p+2;
|
|
237
|
-
if(line.
|
|
239
|
+
if(line.substring(0, 6) == '=yend ') { // no data in post
|
|
238
240
|
ret.yencEnd = sp;
|
|
239
241
|
break;
|
|
240
242
|
}
|
|
@@ -252,7 +254,7 @@ module.exports = {
|
|
|
252
254
|
var warnings = decoderParseLines(lines, ydata);
|
|
253
255
|
|
|
254
256
|
if(!ret.yencEnd) {
|
|
255
|
-
var yencEnd = bufferFindRev(
|
|
257
|
+
var yencEnd = bufferFindRev(bufferSlice.call(data, ret.dataStart), '\r\n=yend ');
|
|
256
258
|
if(yencEnd < 0)
|
|
257
259
|
return DecoderError('no_end_found', 'yEnd end marker not found');
|
|
258
260
|
|
|
@@ -265,7 +267,7 @@ module.exports = {
|
|
|
265
267
|
ret.yencEnd = p;
|
|
266
268
|
} else
|
|
267
269
|
ret.yencEnd = p+2;
|
|
268
|
-
var endLine =
|
|
270
|
+
var endLine = bufferSlice.call(data, yencEnd+2, p).toString(this.encoding).trim();
|
|
269
271
|
|
|
270
272
|
warnings = warnings.concat(decoderParseLines([endLine], ydata));
|
|
271
273
|
}
|
|
@@ -321,7 +323,7 @@ module.exports = {
|
|
|
321
323
|
warnings.push(DecoderWarning('size_mismatch', 'Size specified for part exceeds size specified for whole file'));
|
|
322
324
|
|
|
323
325
|
if(ret.dataStart) {
|
|
324
|
-
ret.data = y.decode(
|
|
326
|
+
ret.data = y.decode(bufferSlice.call(data, ret.dataStart, ret.dataEnd), !!isRaw);
|
|
325
327
|
ret.crc32 = y.crc32(ret.data);
|
|
326
328
|
var hexCrc = ret.crc32.toString('hex');
|
|
327
329
|
|
|
@@ -360,7 +362,7 @@ function YEncoder(filename, size, parts, line_size) {
|
|
|
360
362
|
this.pos = 0;
|
|
361
363
|
this.crc = toBuffer([0,0,0,0]);
|
|
362
364
|
|
|
363
|
-
filename = toBuffer(filename.replace(RE_BADCHAR, '').
|
|
365
|
+
filename = toBuffer(filename.replace(RE_BADCHAR, '').substring(0, 256), exports.encoding);
|
|
364
366
|
if(parts > 1) {
|
|
365
367
|
this.yInfo = Buffer.concat([
|
|
366
368
|
toBuffer(' total='+parts+' line='+line_size+' size='+size+' name='),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "yencode",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.4",
|
|
4
4
|
"description": "SIMD accelerated yEnc encoder/decoder and CRC32 calculator",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"yenc",
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
"install": "node-gyp rebuild"
|
|
22
22
|
},
|
|
23
23
|
"gypfile": true,
|
|
24
|
+
"type": "commonjs",
|
|
24
25
|
"bugs": {
|
|
25
26
|
"url": "https://github.com/animetosho/node-yencode/issues"
|
|
26
27
|
},
|
package/src/common.h
CHANGED
|
@@ -35,36 +35,40 @@
|
|
|
35
35
|
#endif
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
#include <stdlib.h>
|
|
38
39
|
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__)
|
|
39
|
-
|
|
40
|
+
// MSVC doesn't support C11 aligned_alloc: https://stackoverflow.com/a/62963007
|
|
40
41
|
#define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = _aligned_malloc((len), align)
|
|
41
42
|
#define ALIGN_FREE _aligned_free
|
|
42
|
-
#elif defined(
|
|
43
|
-
//
|
|
43
|
+
#elif defined(_ISOC11_SOURCE)
|
|
44
|
+
// C11 method
|
|
44
45
|
// len needs to be a multiple of alignment, although it sometimes works if it isn't...
|
|
45
|
-
#include <cstdlib>
|
|
46
46
|
#define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = aligned_alloc(align, ((len) + (align)-1) & ~((align)-1))
|
|
47
47
|
#define ALIGN_FREE free
|
|
48
|
+
#elif defined(__cplusplus) && __cplusplus >= 201700
|
|
49
|
+
// C++17 method
|
|
50
|
+
#include <cstdlib>
|
|
51
|
+
#define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = std::aligned_alloc(align, ((len) + (align)-1) & ~((align)-1))
|
|
52
|
+
#define ALIGN_FREE free
|
|
48
53
|
#else
|
|
49
|
-
#include <stdlib.h>
|
|
50
54
|
#define ALIGN_ALLOC(buf, len, align) if(posix_memalign((void**)&(buf), align, (len))) (buf) = NULL
|
|
51
55
|
#define ALIGN_FREE free
|
|
52
56
|
#endif
|
|
53
57
|
|
|
54
58
|
|
|
55
59
|
// MSVC compatibility
|
|
56
|
-
#if ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(_M_X64)) && !defined(__clang__)
|
|
60
|
+
#if ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(_M_X64)) && defined(_MSC_VER) && !defined(__clang__)
|
|
57
61
|
#define __SSE2__ 1
|
|
58
62
|
#define __SSSE3__ 1
|
|
59
63
|
#define __SSE4_1__ 1
|
|
60
|
-
#if
|
|
64
|
+
#if _MSC_VER >= 1600 && defined(__SSE2__)
|
|
61
65
|
#define __POPCNT__ 1
|
|
62
66
|
#define __LZCNT__ 1
|
|
63
67
|
#endif
|
|
64
68
|
#if !defined(__AVX__) && (_MSC_VER >= 1700 && defined(__SSE2__))
|
|
65
69
|
#define __AVX__ 1
|
|
66
70
|
#endif
|
|
67
|
-
#if !defined(__AVX2__) && (_MSC_VER >= 1800 && defined(
|
|
71
|
+
#if !defined(__AVX2__) && (_MSC_VER >= 1800 && defined(__AVX__))
|
|
68
72
|
#define __AVX2__ 1
|
|
69
73
|
#define __BMI2__ 1
|
|
70
74
|
#endif
|
|
@@ -141,6 +145,13 @@
|
|
|
141
145
|
|
|
142
146
|
#endif
|
|
143
147
|
|
|
148
|
+
#if defined(__ARM_NEON) && defined(__has_include)
|
|
149
|
+
# if !__has_include(<arm_neon.h>)
|
|
150
|
+
# undef __ARM_NEON
|
|
151
|
+
HEDLEY_WARNING("NEON has been disabled due to missing arm_neon.h");
|
|
152
|
+
# endif
|
|
153
|
+
#endif
|
|
154
|
+
|
|
144
155
|
#ifdef __ARM_NEON
|
|
145
156
|
# include <arm_neon.h>
|
|
146
157
|
|
|
@@ -212,14 +223,15 @@ bool cpu_supports_neon();
|
|
|
212
223
|
enum YEncDecIsaLevel {
|
|
213
224
|
ISA_FEATURE_POPCNT = 0x1,
|
|
214
225
|
ISA_FEATURE_LZCNT = 0x2,
|
|
226
|
+
ISA_FEATURE_EVEX512 = 0x4, // AVX512 support
|
|
215
227
|
ISA_LEVEL_SSE2 = 0x100,
|
|
216
228
|
ISA_LEVEL_SSSE3 = 0x200,
|
|
217
229
|
ISA_LEVEL_SSE41 = 0x300,
|
|
218
230
|
ISA_LEVEL_SSE4_POPCNT = 0x301,
|
|
219
231
|
ISA_LEVEL_AVX = 0x381, // same as above, just used as a differentiator for `cpu_supports_isa`
|
|
220
|
-
ISA_LEVEL_AVX2 =
|
|
221
|
-
ISA_LEVEL_AVX3 =
|
|
222
|
-
ISA_LEVEL_VBMI2 =
|
|
232
|
+
ISA_LEVEL_AVX2 = 0x403, // also includes BMI1/2 and LZCNT
|
|
233
|
+
ISA_LEVEL_AVX3 = 0x507, // SKX variant; AVX512VL + AVX512BW
|
|
234
|
+
ISA_LEVEL_VBMI2 = 0x603 // ICL, AVX10
|
|
223
235
|
};
|
|
224
236
|
#ifdef _MSC_VER
|
|
225
237
|
// native tuning not supported in MSVC
|
|
@@ -249,16 +261,19 @@ enum YEncDecIsaLevel {
|
|
|
249
261
|
# endif
|
|
250
262
|
#endif
|
|
251
263
|
|
|
252
|
-
#ifdef _MSC_VER
|
|
253
|
-
# define _cpuid1(ar) __cpuid(ar, 1)
|
|
254
|
-
#else
|
|
255
|
-
# include <cpuid.h>
|
|
256
|
-
# define _cpuid1(ar) __cpuid(1, ar[0], ar[1], ar[2], ar[3])
|
|
257
|
-
#endif
|
|
258
|
-
|
|
259
264
|
int cpu_supports_isa();
|
|
260
265
|
#endif // PLATFORM_X86
|
|
261
266
|
|
|
267
|
+
|
|
268
|
+
#ifdef __riscv
|
|
269
|
+
bool cpu_supports_rvv();
|
|
270
|
+
#endif
|
|
271
|
+
#if defined(__riscv_vector) && defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(13,0,0)
|
|
272
|
+
// GCC added RVV intrinsics in GCC13
|
|
273
|
+
# undef __riscv_vector
|
|
274
|
+
#endif
|
|
275
|
+
|
|
276
|
+
|
|
262
277
|
#include <string.h>
|
|
263
278
|
#if !defined(_MSC_VER) || defined(_STDINT) || _MSC_VER >= 1900
|
|
264
279
|
# include <stdint.h>
|
|
@@ -270,7 +285,7 @@ int cpu_supports_isa();
|
|
|
270
285
|
|
|
271
286
|
|
|
272
287
|
// GCC 8/9/10(dev) fails to optimize cases where KNOT should be used, so use intrinsic explicitly; Clang 6+ has no issue, but Clang 6/7 doesn't have the intrinsic; MSVC 2019 also fails and lacks the intrinsic
|
|
273
|
-
#if defined(__GNUC__) && __GNUC__ >= 7
|
|
288
|
+
#if (defined(__GNUC__) && __GNUC__ >= 7) || (defined(_MSC_VER) && _MSC_VER >= 1924)
|
|
274
289
|
# define KNOT16 _knot_mask16
|
|
275
290
|
# define KNOT32 _knot_mask32
|
|
276
291
|
#else
|
package/src/crc.cc
CHANGED
|
@@ -3,11 +3,127 @@
|
|
|
3
3
|
#include "interface.h"
|
|
4
4
|
crcutil_interface::CRC* crc = NULL;
|
|
5
5
|
|
|
6
|
+
#if defined(PLATFORM_X86) && !defined(__ILP32__)
|
|
6
7
|
static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) {
|
|
8
|
+
// use optimised ASM on x86 platforms
|
|
7
9
|
crcutil_interface::UINT64 tmp = init;
|
|
8
10
|
crc->Compute(data, length, &tmp);
|
|
9
11
|
return (uint32_t)tmp;
|
|
10
12
|
}
|
|
13
|
+
#else
|
|
14
|
+
static uint32_t* HEDLEY_RESTRICT crc_slice_table;
|
|
15
|
+
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
16
|
+
# if defined(__GNUC__) || defined(__clang__)
|
|
17
|
+
# define bswap32 __builtin_bswap32
|
|
18
|
+
# else
|
|
19
|
+
static inline uint32_t bswap32(uint32_t x) {
|
|
20
|
+
return (x >> 24) | ((x >> 8) & 0x0000FF00) | ((x << 8) & 0x00FF0000) | (x << 24);
|
|
21
|
+
}
|
|
22
|
+
# endif
|
|
23
|
+
#endif
|
|
24
|
+
|
|
25
|
+
#define CRC32_GENERIC_CHAINS 4 // newer processors may prefer 8
|
|
26
|
+
static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) {
|
|
27
|
+
const uint32_t* crc_base_table = crc_slice_table + 4*256; // this also seems to help MSVC's optimiser, which otherwise keeps trying to add to crc_slice_table every time it's referenced
|
|
28
|
+
uint32_t crc[CRC32_GENERIC_CHAINS]; // Clang seems to be more spill happy with an array over individual variables :(
|
|
29
|
+
crc[0] = ~init;
|
|
30
|
+
uint8_t* current8 = (uint8_t*)data;
|
|
31
|
+
|
|
32
|
+
// align to multiple of 4
|
|
33
|
+
if(((uintptr_t)current8 & 1) && length >= 1) {
|
|
34
|
+
crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xFF) ^ *current8++];
|
|
35
|
+
length--;
|
|
36
|
+
}
|
|
37
|
+
if(((uintptr_t)current8 & 2) && length >= 2) {
|
|
38
|
+
crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xFF) ^ *current8++];
|
|
39
|
+
crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xFF) ^ *current8++];
|
|
40
|
+
length -= 2;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
uint8_t* end8 = current8 + length;
|
|
44
|
+
uint32_t* current = (uint32_t*)current8;
|
|
45
|
+
if(length >= 8*CRC32_GENERIC_CHAINS-4) {
|
|
46
|
+
size_t lenMain = ((length-(CRC32_GENERIC_CHAINS-1)*4) / 4);
|
|
47
|
+
uint32_t* end = current + (lenMain / CRC32_GENERIC_CHAINS) * CRC32_GENERIC_CHAINS;
|
|
48
|
+
for(int c=1; c<CRC32_GENERIC_CHAINS; c++)
|
|
49
|
+
crc[c] = 0;
|
|
50
|
+
while(current != end) {
|
|
51
|
+
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
52
|
+
#define CRC_PROC4(v, in) \
|
|
53
|
+
v ^= bswap32(in); \
|
|
54
|
+
v = crc_slice_table[v >> 24] ^ crc_slice_table[0x100L + ((v >> 16) & 0xff)] ^ crc_slice_table[0x200L + ((v >> 8) & 0xff)] ^ crc_slice_table[0x300L + (v & 0xff)]
|
|
55
|
+
#else
|
|
56
|
+
#define CRC_PROC4(v, in) \
|
|
57
|
+
v ^= (in); \
|
|
58
|
+
v = crc_slice_table[v >> 24] ^ crc_slice_table[0x100L + ((v >> 16) & 0xff)] ^ crc_slice_table[0x200L + ((v >> 8) & 0xff)] ^ crc_slice_table[0x300L + (v & 0xff)]
|
|
59
|
+
#endif
|
|
60
|
+
for(int c=0; c<CRC32_GENERIC_CHAINS; c++) {
|
|
61
|
+
CRC_PROC4(crc[c], *current);
|
|
62
|
+
current++;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// aggregate accumulators
|
|
66
|
+
current8 = (uint8_t*)current;
|
|
67
|
+
#if (CRC32_GENERIC_CHAINS & (CRC32_GENERIC_CHAINS-1)) == 0
|
|
68
|
+
// assume that lengths which are a multiple of 4/8/16/32 are common
|
|
69
|
+
if((end8 - current8) & (CRC32_GENERIC_CHAINS*4)) {
|
|
70
|
+
CRC_PROC4(crc[0], *current);
|
|
71
|
+
current8 += 4;
|
|
72
|
+
|
|
73
|
+
for(int c=1; c<CRC32_GENERIC_CHAINS; c++) {
|
|
74
|
+
for(int i=0; i<4; i++)
|
|
75
|
+
crc[c] = (crc[c] >> 8) ^ crc_base_table[(crc[c] & 0xff) ^ *current8++];
|
|
76
|
+
crc[(c+1) & ~CRC32_GENERIC_CHAINS] ^= crc[c];
|
|
77
|
+
}
|
|
78
|
+
} else
|
|
79
|
+
#endif
|
|
80
|
+
#undef CRC_PROC4
|
|
81
|
+
for(int c=1; c<CRC32_GENERIC_CHAINS; c++) {
|
|
82
|
+
for(int i=0; i<4; i++)
|
|
83
|
+
crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xff) ^ *current8++];
|
|
84
|
+
crc[0] ^= crc[c];
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// tail loop
|
|
89
|
+
while(current8 != end8) {
|
|
90
|
+
crc[0] = (crc[0] >> 8) ^ crc_base_table[(crc[0] & 0xFF) ^ *current8++];
|
|
91
|
+
}
|
|
92
|
+
return ~crc[0];
|
|
93
|
+
}
|
|
94
|
+
static void generate_crc32_slice_table() {
|
|
95
|
+
crc_slice_table = (uint32_t*)malloc(5*256*sizeof(uint32_t));
|
|
96
|
+
// generate standard byte-by-byte table
|
|
97
|
+
uint32_t* crc_base_table = crc_slice_table + 4*256;
|
|
98
|
+
for(int v=0; v<256; v++) {
|
|
99
|
+
uint32_t crc = v;
|
|
100
|
+
for(int j = 0; j < 8; j++) {
|
|
101
|
+
crc = (crc >> 1) ^ (-(int32_t)(crc & 1) & 0xEDB88320);
|
|
102
|
+
}
|
|
103
|
+
crc_base_table[v] = crc;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// generate slice-by-4 shifted across for X independent chains
|
|
107
|
+
for(int v=0; v<256; v++) {
|
|
108
|
+
uint32_t crc = crc_base_table[v];
|
|
109
|
+
#if CRC32_GENERIC_CHAINS > 1
|
|
110
|
+
for(int i=0; i<4*CRC32_GENERIC_CHAINS-5; i++)
|
|
111
|
+
crc = (crc >> 8) ^ crc_base_table[crc & 0xff];
|
|
112
|
+
for(int i=0; i<4; i++) {
|
|
113
|
+
crc = (crc >> 8) ^ crc_base_table[crc & 0xff];
|
|
114
|
+
crc_slice_table[i*256 + v] = crc;
|
|
115
|
+
}
|
|
116
|
+
#else
|
|
117
|
+
for(int i=0; i<4; i++) {
|
|
118
|
+
crc_slice_table[i*256 + v] = crc;
|
|
119
|
+
crc = (crc >> 8) ^ crc_base_table[crc & 0xff];
|
|
120
|
+
}
|
|
121
|
+
#endif
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
#endif
|
|
125
|
+
|
|
126
|
+
|
|
11
127
|
crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
|
|
12
128
|
|
|
13
129
|
|
|
@@ -25,8 +141,13 @@ uint32_t do_crc32_zeros(uint32_t crc1, size_t len) {
|
|
|
25
141
|
}
|
|
26
142
|
|
|
27
143
|
void crc_clmul_set_funcs(crc_func*);
|
|
144
|
+
void crc_clmul256_set_funcs(crc_func*);
|
|
28
145
|
void crc_arm_set_funcs(crc_func*);
|
|
29
146
|
|
|
147
|
+
#ifdef PLATFORM_X86
|
|
148
|
+
int cpu_supports_crc_isa();
|
|
149
|
+
#endif
|
|
150
|
+
|
|
30
151
|
#if defined(PLATFORM_ARM) && defined(_WIN32)
|
|
31
152
|
# define WIN32_LEAN_AND_MEAN
|
|
32
153
|
# include <Windows.h>
|
|
@@ -34,22 +155,23 @@ void crc_arm_set_funcs(crc_func*);
|
|
|
34
155
|
#ifdef PLATFORM_ARM
|
|
35
156
|
# ifdef __ANDROID__
|
|
36
157
|
# include <cpu-features.h>
|
|
37
|
-
# elif defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
|
|
38
|
-
# include <sys/auxv.h>
|
|
39
|
-
# include <asm/hwcap.h>
|
|
40
|
-
# elif (defined(__FreeBSD__) && __FreeBSD__ < 12)
|
|
41
|
-
# include <sys/sysctl.h>
|
|
42
|
-
# include <asm/hwcap.h>
|
|
43
158
|
# elif defined(__APPLE__)
|
|
44
159
|
# include <sys/types.h>
|
|
45
160
|
# include <sys/sysctl.h>
|
|
46
|
-
#
|
|
47
|
-
#
|
|
161
|
+
# elif defined(__has_include)
|
|
162
|
+
# if __has_include(<sys/auxv.h>)
|
|
163
|
+
# include <sys/auxv.h>
|
|
164
|
+
# ifdef __FreeBSD__
|
|
48
165
|
static unsigned long getauxval(unsigned long cap) {
|
|
49
166
|
unsigned long ret;
|
|
50
167
|
elf_aux_info(cap, &ret, sizeof(ret));
|
|
51
168
|
return ret;
|
|
52
169
|
}
|
|
170
|
+
# endif
|
|
171
|
+
# if __has_include(<asm/hwcap.h>)
|
|
172
|
+
# include <asm/hwcap.h>
|
|
173
|
+
# endif
|
|
174
|
+
# endif
|
|
53
175
|
# endif
|
|
54
176
|
#endif
|
|
55
177
|
void crc_init() {
|
|
@@ -57,10 +179,15 @@ void crc_init() {
|
|
|
57
179
|
0xEDB88320, 0, 32, true, 0, 0, 0, 0, NULL);
|
|
58
180
|
// instance never deleted... oh well...
|
|
59
181
|
|
|
182
|
+
#if !defined(PLATFORM_X86) || defined(__ILP32__)
|
|
183
|
+
generate_crc32_slice_table();
|
|
184
|
+
#endif
|
|
185
|
+
|
|
60
186
|
#ifdef PLATFORM_X86
|
|
61
|
-
int
|
|
62
|
-
|
|
63
|
-
|
|
187
|
+
int support = cpu_supports_crc_isa();
|
|
188
|
+
if(support == 2)
|
|
189
|
+
crc_clmul256_set_funcs(&_do_crc32_incremental);
|
|
190
|
+
else if(support == 1)
|
|
64
191
|
crc_clmul_set_funcs(&_do_crc32_incremental);
|
|
65
192
|
#endif
|
|
66
193
|
#ifdef PLATFORM_ARM
|