yencode 1.1.4 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yencode",
3
- "version": "1.1.4",
3
+ "version": "1.1.5",
4
4
  "description": "SIMD accelerated yEnc encoder/decoder and CRC32 calculator",
5
5
  "keywords": [
6
6
  "yenc",
package/src/common.h CHANGED
@@ -221,6 +221,7 @@ bool cpu_supports_neon();
221
221
 
222
222
  #ifdef PLATFORM_X86
223
223
  enum YEncDecIsaLevel {
224
+ ISA_GENERIC = 0,
224
225
  ISA_FEATURE_POPCNT = 0x1,
225
226
  ISA_FEATURE_LZCNT = 0x2,
226
227
  ISA_FEATURE_EVEX512 = 0x4, // AVX512 support
@@ -228,11 +229,30 @@ enum YEncDecIsaLevel {
228
229
  ISA_LEVEL_SSSE3 = 0x200,
229
230
  ISA_LEVEL_SSE41 = 0x300,
230
231
  ISA_LEVEL_SSE4_POPCNT = 0x301,
232
+ ISA_LEVEL_PCLMUL = 0x340,
231
233
  ISA_LEVEL_AVX = 0x381, // same as above, just used as a differentiator for `cpu_supports_isa`
232
234
  ISA_LEVEL_AVX2 = 0x403, // also includes BMI1/2 and LZCNT
235
+ ISA_LEVEL_VPCLMUL = 0x440,
233
236
  ISA_LEVEL_AVX3 = 0x507, // SKX variant; AVX512VL + AVX512BW
234
237
  ISA_LEVEL_VBMI2 = 0x603 // ICL, AVX10
235
238
  };
239
+ #elif defined(PLATFORM_ARM)
240
+ enum YEncDecIsaLevel {
241
+ ISA_GENERIC = 0,
242
+ ISA_FEATURE_CRC = 8,
243
+ ISA_LEVEL_NEON = 0x1000
244
+ };
245
+ #elif defined(__riscv)
246
+ enum YEncDecIsaLevel {
247
+ ISA_GENERIC = 0,
248
+ ISA_LEVEL_RVV = 0x10000
249
+ };
250
+ #else
251
+ enum YEncDecIsaLevel {
252
+ ISA_GENERIC = 0
253
+ };
254
+ #endif
255
+ #ifdef PLATFORM_X86
236
256
  #ifdef _MSC_VER
237
257
  // native tuning not supported in MSVC
238
258
  # define ISA_NATIVE ISA_LEVEL_SSE2
package/src/crc.cc CHANGED
@@ -123,9 +123,10 @@ static void generate_crc32_slice_table() {
123
123
  }
124
124
  #endif
125
125
 
126
-
127
- crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
128
-
126
+ extern "C" {
127
+ crc_func _do_crc32_incremental = &do_crc32_incremental_generic;
128
+ int _crc32_isa = ISA_GENERIC;
129
+ }
129
130
 
130
131
 
131
132
  uint32_t do_crc32_combine(uint32_t crc1, uint32_t crc2, size_t len2) {
@@ -140,9 +141,9 @@ uint32_t do_crc32_zeros(uint32_t crc1, size_t len) {
140
141
  return (uint32_t)crc_;
141
142
  }
142
143
 
143
- void crc_clmul_set_funcs(crc_func*);
144
- void crc_clmul256_set_funcs(crc_func*);
145
- void crc_arm_set_funcs(crc_func*);
144
+ void crc_clmul_set_funcs();
145
+ void crc_clmul256_set_funcs();
146
+ void crc_arm_set_funcs();
146
147
 
147
148
  #ifdef PLATFORM_X86
148
149
  int cpu_supports_crc_isa();
@@ -186,9 +187,9 @@ void crc_init() {
186
187
  #ifdef PLATFORM_X86
187
188
  int support = cpu_supports_crc_isa();
188
189
  if(support == 2)
189
- crc_clmul256_set_funcs(&_do_crc32_incremental);
190
+ crc_clmul256_set_funcs();
190
191
  else if(support == 1)
191
- crc_clmul_set_funcs(&_do_crc32_incremental);
192
+ crc_clmul_set_funcs();
192
193
  #endif
193
194
  #ifdef PLATFORM_ARM
194
195
  # ifdef __APPLE__
@@ -216,7 +217,7 @@ void crc_init() {
216
217
  false
217
218
  # endif
218
219
  ) {
219
- crc_arm_set_funcs(&_do_crc32_incremental);
220
+ crc_arm_set_funcs();
220
221
  }
221
222
  #endif
222
223
  }
package/src/crc.h CHANGED
@@ -9,11 +9,15 @@ extern "C" {
9
9
 
10
10
  typedef uint32_t (*crc_func)(const void*, size_t, uint32_t);
11
11
  extern crc_func _do_crc32_incremental;
12
+ extern int _crc32_isa;
12
13
  #define do_crc32 (*_do_crc32_incremental)
13
14
 
14
15
  uint32_t do_crc32_combine(uint32_t crc1, const uint32_t crc2, size_t len2);
15
16
  uint32_t do_crc32_zeros(uint32_t crc1, size_t len);
16
17
  void crc_init();
18
+ static inline int crc32_isa_level() {
19
+ return _crc32_isa;
20
+ }
17
21
 
18
22
 
19
23
 
package/src/crc_arm.cc CHANGED
@@ -200,11 +200,10 @@ static uint32_t do_crc32_incremental_arm(const void* data, size_t length, uint32
200
200
  return ~arm_crc_calc(~init, (const unsigned char*)data, (long)length);
201
201
  }
202
202
 
203
- void crc_arm_set_funcs(crc_func* _do_crc32_incremental) {
204
- *_do_crc32_incremental = &do_crc32_incremental_arm;
203
+ void crc_arm_set_funcs() {
204
+ _do_crc32_incremental = &do_crc32_incremental_arm;
205
+ _crc32_isa = ISA_FEATURE_CRC;
205
206
  }
206
207
  #else
207
- void crc_arm_set_funcs(crc_func* _do_crc32_incremental) {
208
- (void)_do_crc32_incremental;
209
- }
208
+ void crc_arm_set_funcs() {}
210
209
  #endif
@@ -365,12 +365,11 @@ static uint32_t do_crc32_incremental_clmul(const void* data, size_t length, uint
365
365
  return crc_fold((const unsigned char*)data, (long)length, init);
366
366
  }
367
367
 
368
- void crc_clmul_set_funcs(crc_func* _do_crc32_incremental) {
369
- *_do_crc32_incremental = &do_crc32_incremental_clmul;
368
+ void crc_clmul_set_funcs() {
369
+ _do_crc32_incremental = &do_crc32_incremental_clmul;
370
+ _crc32_isa = ISA_LEVEL_PCLMUL;
370
371
  }
371
372
  #else
372
- void crc_clmul_set_funcs(crc_func* _do_crc32_incremental) {
373
- (void)_do_crc32_incremental;
374
- }
373
+ void crc_clmul_set_funcs() {}
375
374
  #endif
376
375
 
@@ -217,13 +217,14 @@ static uint32_t do_crc32_incremental_clmul(const void* data, size_t length, uint
217
217
  return crc_fold((const unsigned char*)data, (long)length, init);
218
218
  }
219
219
 
220
- void crc_clmul256_set_funcs(crc_func* _do_crc32_incremental) {
221
- *_do_crc32_incremental = &do_crc32_incremental_clmul;
220
+ void crc_clmul256_set_funcs() {
221
+ _do_crc32_incremental = &do_crc32_incremental_clmul;
222
+ _crc32_isa = ISA_LEVEL_VPCLMUL;
222
223
  }
223
224
  #else
224
- void crc_clmul_set_funcs(crc_func* _do_crc32_incremental);
225
- void crc_clmul256_set_funcs(crc_func* _do_crc32_incremental) {
226
- crc_clmul_set_funcs(_do_crc32_incremental);
225
+ void crc_clmul_set_funcs();
226
+ void crc_clmul256_set_funcs() {
227
+ crc_clmul_set_funcs();
227
228
  }
228
229
  #endif
229
230
 
package/src/decoder.cc CHANGED
@@ -7,6 +7,8 @@ extern "C" {
7
7
  YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_scalar<false, false>;
8
8
  YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_scalar<true, false>;
9
9
  YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_end_scalar<true>;
10
+
11
+ int _decode_isa = ISA_GENERIC;
10
12
  }
11
13
 
12
14
  void decoder_set_sse2_funcs();
@@ -27,6 +29,7 @@ static inline void decoder_set_native_funcs() {
27
29
  _do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_NATIVE> >;
28
30
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_NATIVE> >;
29
31
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_NATIVE> >;
32
+ _decode_isa = ISA_NATIVE;
30
33
  }
31
34
  # else
32
35
  # include "decoder_sse_base.h"
@@ -36,6 +39,7 @@ static inline void decoder_set_native_funcs() {
36
39
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_NATIVE> >;
37
40
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_NATIVE> >;
38
41
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_NATIVE> >;
42
+ _decode_isa = ISA_NATIVE;
39
43
  }
40
44
  # endif
41
45
  #endif
package/src/decoder.h CHANGED
@@ -32,6 +32,7 @@ typedef enum {
32
32
  extern YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
33
33
  extern YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
34
34
  extern YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
35
+ extern int _decode_isa;
35
36
 
36
37
  static inline size_t do_decode(int isRaw, const unsigned char* src, unsigned char* dest, size_t len, YencDecoderState* state) {
37
38
  unsigned char* ds = dest;
@@ -45,6 +46,9 @@ static inline YencDecoderEnd do_decode_end(const unsigned char** src, unsigned c
45
46
 
46
47
  void decoder_init();
47
48
 
49
+ static inline int decode_isa_level() {
50
+ return _decode_isa;
51
+ }
48
52
 
49
53
 
50
54
  #ifdef __cplusplus
@@ -9,6 +9,7 @@ void decoder_set_avx_funcs() {
9
9
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSE4_POPCNT> >;
10
10
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSE4_POPCNT> >;
11
11
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSE4_POPCNT> >;
12
+ _decode_isa = ISA_LEVEL_AVX;
12
13
  }
13
14
  #else
14
15
  void decoder_set_ssse3_funcs();
@@ -9,6 +9,7 @@ void decoder_set_avx2_funcs() {
9
9
  _do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
10
10
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
11
11
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
12
+ _decode_isa = ISA_LEVEL_AVX2;
12
13
  }
13
14
  #else
14
15
  void decoder_set_avx_funcs();
@@ -67,6 +67,8 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
67
67
  );
68
68
  }
69
69
 
70
+ decoder_set_nextMask<isRaw>(src, len, _nextMask); // set this before the loop because we can't check src after it's been overwritten
71
+
70
72
  // for some reason, MSVC Win32 seems to crash when trying to compile _mm256_mask_cmpeq_epi8_mask
71
73
  // the crash can be fixed by switching the order of the last two arguments, but it seems to generate wrong code
72
74
  // so just disable the optimisation as it seems to be problematic there
@@ -320,6 +322,7 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
320
322
  // terminator found
321
323
  // there's probably faster ways to do this, but reverting to scalar code should be good enough
322
324
  len += (long)i;
325
+ _nextMask = decoder_set_nextMask<isRaw>(src+i, mask);
323
326
  break;
324
327
  }
325
328
  }
@@ -412,6 +415,7 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
412
415
  }
413
416
  if(endFound) {
414
417
  len += (long)i;
418
+ _nextMask = decoder_set_nextMask<isRaw>(src+i, mask);
415
419
  break;
416
420
  }
417
421
  }
@@ -613,20 +617,6 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
613
617
  }
614
618
  }
615
619
  _escFirst = (unsigned char)escFirst;
616
- if(isRaw) {
617
- // this would be the trivial solution, but requires the compiler holding onto minMask throughout the loop:
618
- //_nextMask = ~(uint16_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(minMask, _mm256_set1_epi8('.')));
619
- // instead, just scan the memory to determine what to set nextMask to
620
- if(len != 0) { // have to gone through at least one loop cycle
621
- if(src[i-2] == '\r' && src[i-1] == '\n' && src[i] == '.')
622
- _nextMask = 1;
623
- else if(src[i-1] == '\r' && src[i] == '\n' && src[i+1] == '.')
624
- _nextMask = 2;
625
- else
626
- _nextMask = 0;
627
- }
628
- } else
629
- _nextMask = 0;
630
620
  _mm256_zeroupper();
631
621
  }
632
622
  #endif
@@ -509,4 +509,29 @@ static inline void decoder_init_lut(uint8_t* eqFixLUT, void* compactLUT) {
509
509
  }
510
510
  #endif
511
511
  }
512
+ template<bool isRaw>
513
+ static inline void decoder_set_nextMask(const uint8_t* src, size_t len, uint16_t& nextMask) {
514
+ if(isRaw) {
515
+ if(len != 0) { // have to gone through at least one loop cycle
516
+ if(src[-2] == '\r' && src[-1] == '\n' && src[0] == '.')
517
+ nextMask = 1;
518
+ else if(src[-1] == '\r' && src[0] == '\n' && src[1] == '.')
519
+ nextMask = 2;
520
+ else
521
+ nextMask = 0;
522
+ }
523
+ } else
524
+ nextMask = 0;
525
+ }
512
526
 
527
+ // without backtracking
528
+ template<bool isRaw>
529
+ static inline uint16_t decoder_set_nextMask(const uint8_t* src, unsigned mask) {
530
+ if(isRaw) {
531
+ if(src[0] == '.')
532
+ return mask & 1;
533
+ if(src[1] == '.')
534
+ return mask & 2;
535
+ }
536
+ return 0;
537
+ }
@@ -78,6 +78,9 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
78
78
  lfCompare = vsetq_lane_u8('.', lfCompare, 1);
79
79
  }
80
80
  #endif
81
+
82
+ decoder_set_nextMask<isRaw>(src, len, nextMask);
83
+
81
84
  long i;
82
85
  for(i = -len; i; i += sizeof(uint8x16_t)*2) {
83
86
  uint8x16x2_t data = vld1q_u8_x2_align(src+i, 32);
@@ -251,6 +254,7 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
251
254
  // terminator found
252
255
  // there's probably faster ways to do this, but reverting to scalar code should be good enough
253
256
  len += i;
257
+ nextMask = decoder_set_nextMask<isRaw>(src+i, mask);
254
258
  break;
255
259
  }
256
260
  }
@@ -301,6 +305,7 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
301
305
  );
302
306
  if(LIKELIHOOD(0.001, neon_vect_is_nonzero(matchEnd))) {
303
307
  len += i;
308
+ nextMask = decoder_set_nextMask<isRaw>(src+i, mask);
304
309
  break;
305
310
  }
306
311
  }
@@ -449,18 +454,6 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
449
454
  #endif
450
455
  }
451
456
  }
452
-
453
- if(isRaw) {
454
- if(len != 0) { // have to gone through at least one loop cycle
455
- if(src[i-2] == '\r' && src[i-1] == '\n' && src[i] == '.')
456
- nextMask = 1;
457
- else if(src[i-1] == '\r' && src[i] == '\n' && src[i+1] == '.')
458
- nextMask = 2;
459
- else
460
- nextMask = 0;
461
- }
462
- } else
463
- nextMask = 0;
464
457
  }
465
458
 
466
459
  void decoder_set_neon_funcs() {
@@ -468,6 +461,7 @@ void decoder_set_neon_funcs() {
468
461
  _do_decode = &do_decode_simd<false, false, sizeof(uint8x16_t)*2, do_decode_neon<false, false> >;
469
462
  _do_decode_raw = &do_decode_simd<true, false, sizeof(uint8x16_t)*2, do_decode_neon<true, false> >;
470
463
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(uint8x16_t)*2, do_decode_neon<true, true> >;
464
+ _decode_isa = ISA_LEVEL_NEON;
471
465
  }
472
466
  #else
473
467
  void decoder_set_neon_funcs() {}
@@ -56,6 +56,9 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
56
56
  if(nextMask == 2)
57
57
  nextMaskMix = vsetq_lane_u8(2, nextMaskMix, 1);
58
58
  uint8x16_t yencOffset = escFirst ? vmakeq_u8(42+64,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42) : vdupq_n_u8(42);
59
+
60
+ decoder_set_nextMask<isRaw>(src, len, nextMask);
61
+
59
62
  long i;
60
63
  for(i = -len; i; i += sizeof(uint8x16_t)*4) {
61
64
  uint8x16x4_t data = _vld1q_u8_x4(src+i);
@@ -227,6 +230,7 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
227
230
  // terminator found
228
231
  // there's probably faster ways to do this, but reverting to scalar code should be good enough
229
232
  len += i;
233
+ nextMask = decoder_set_nextMask<isRaw>(src+i, mask);
230
234
  break;
231
235
  }
232
236
  }
@@ -275,6 +279,7 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
275
279
  );
276
280
  if(LIKELIHOOD(0.001, neon_vect_is_nonzero(matchEnd))) {
277
281
  len += i;
282
+ nextMask = decoder_set_nextMask<isRaw>(src+i, mask);
278
283
  break;
279
284
  }
280
285
  }
@@ -430,17 +435,6 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
430
435
  yencOffset = vdupq_n_u8(42);
431
436
  }
432
437
  }
433
- if(isRaw) {
434
- if(len != 0) { // have to gone through at least one loop cycle
435
- if(src[i-2] == '\r' && src[i-1] == '\n' && src[i] == '.')
436
- nextMask = 1;
437
- else if(src[i-1] == '\r' && src[i] == '\n' && src[i+1] == '.')
438
- nextMask = 2;
439
- else
440
- nextMask = 0;
441
- }
442
- } else
443
- nextMask = 0;
444
438
  }
445
439
 
446
440
  void decoder_set_neon_funcs() {
@@ -448,6 +442,7 @@ void decoder_set_neon_funcs() {
448
442
  _do_decode = &do_decode_simd<false, false, sizeof(uint8x16_t)*4, do_decode_neon<false, false> >;
449
443
  _do_decode_raw = &do_decode_simd<true, false, sizeof(uint8x16_t)*4, do_decode_neon<true, false> >;
450
444
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(uint8x16_t)*4, do_decode_neon<true, true> >;
445
+ _decode_isa = ISA_LEVEL_NEON;
451
446
  }
452
447
  #else
453
448
  void decoder_set_neon_funcs() {}
@@ -10,6 +10,7 @@ void decoder_set_sse2_funcs() {
10
10
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSE2> >;
11
11
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSE2> >;
12
12
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSE2> >;
13
+ _decode_isa = ISA_LEVEL_SSE2;
13
14
  }
14
15
  #else
15
16
  void decoder_set_sse2_funcs() {}
@@ -145,6 +145,9 @@ HEDLEY_ALWAYS_INLINE void do_decode_sse(const uint8_t* src, long& len, unsigned
145
145
  else
146
146
  lfCompare = _mm_insert_epi16(lfCompare, _nextMask == 1 ? 0x0a2e /*".\n"*/ : 0x2e0a /*"\n."*/, 0);
147
147
  }
148
+
149
+ decoder_set_nextMask<isRaw>(src, len, _nextMask); // set this before the loop because we can't check src after it's been overwritten
150
+
148
151
  intptr_t i;
149
152
  for(i = -len; i; i += sizeof(__m128i)*2) {
150
153
  __m128i oDataA = _mm_load_si128((__m128i *)(src+i));
@@ -383,6 +386,7 @@ HEDLEY_ALWAYS_INLINE void do_decode_sse(const uint8_t* src, long& len, unsigned
383
386
  // terminator found
384
387
  // there's probably faster ways to do this, but reverting to scalar code should be good enough
385
388
  len += (long)i;
389
+ _nextMask = decoder_set_nextMask<isRaw>(src+i, mask);
386
390
  break;
387
391
  }
388
392
  }
@@ -492,6 +496,7 @@ HEDLEY_ALWAYS_INLINE void do_decode_sse(const uint8_t* src, long& len, unsigned
492
496
 
493
497
  if(endFound) {
494
498
  len += (long)i;
499
+ _nextMask = decoder_set_nextMask<isRaw>(src+i, mask);
495
500
  break;
496
501
  }
497
502
  }
@@ -710,16 +715,5 @@ HEDLEY_ALWAYS_INLINE void do_decode_sse(const uint8_t* src, long& len, unsigned
710
715
  }
711
716
  }
712
717
  _escFirst = (unsigned char)escFirst;
713
- if(isRaw) {
714
- if(len != 0) { // have to gone through at least one loop cycle
715
- if(src[i-2] == '\r' && src[i-1] == '\n' && src[i] == '.')
716
- _nextMask = 1;
717
- else if(src[i-1] == '\r' && src[i] == '\n' && src[i+1] == '.')
718
- _nextMask = 2;
719
- else
720
- _nextMask = 0;
721
- }
722
- } else
723
- _nextMask = 0;
724
718
  }
725
719
  #endif
@@ -9,6 +9,7 @@ void decoder_set_ssse3_funcs() {
9
9
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSSE3> >;
10
10
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSSE3> >;
11
11
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSSE3> >;
12
+ _decode_isa = ISA_LEVEL_SSSE3;
12
13
  }
13
14
  #else
14
15
  void decoder_set_sse2_funcs();
@@ -18,6 +18,7 @@ void decoder_set_vbmi2_funcs() {
18
18
  _do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_VBMI2> >;
19
19
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_VBMI2> >;
20
20
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_VBMI2> >;
21
+ _decode_isa = ISA_LEVEL_VBMI2;
21
22
  }
22
23
  # else
23
24
  # include "decoder_sse_base.h"
@@ -27,6 +28,7 @@ void decoder_set_vbmi2_funcs() {
27
28
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_VBMI2> >;
28
29
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_VBMI2> >;
29
30
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_VBMI2> >;
31
+ _decode_isa = ISA_LEVEL_VBMI2;
30
32
  }
31
33
  # endif
32
34
  #else
package/src/encoder.cc CHANGED
@@ -122,6 +122,7 @@ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HED
122
122
 
123
123
  extern "C" {
124
124
  size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int) = &do_encode_generic;
125
+ int _encode_isa = ISA_GENERIC;
125
126
  }
126
127
 
127
128
  void encoder_sse2_init();
@@ -139,12 +140,14 @@ void encoder_rvv_init();
139
140
  static inline void encoder_native_init() {
140
141
  _do_encode = &do_encode_simd< do_encode_avx2<ISA_NATIVE> >;
141
142
  encoder_avx2_lut<ISA_NATIVE>();
143
+ _encode_isa = ISA_NATIVE;
142
144
  }
143
145
  # else
144
146
  # include "encoder_sse_base.h"
145
147
  static inline void encoder_native_init() {
146
148
  _do_encode = &do_encode_simd< do_encode_sse<ISA_NATIVE> >;
147
149
  encoder_sse_lut<ISA_NATIVE>();
150
+ _encode_isa = ISA_NATIVE;
148
151
  }
149
152
  # endif
150
153
  #endif
package/src/encoder.h CHANGED
@@ -10,8 +10,12 @@ extern "C" {
10
10
  #include "hedley.h"
11
11
 
12
12
  extern size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int);
13
+ extern int _encode_isa;
13
14
  #define do_encode (*_do_encode)
14
15
  void encoder_init();
16
+ static inline int encode_isa_level() {
17
+ return _encode_isa;
18
+ }
15
19
 
16
20
 
17
21
 
@@ -6,6 +6,7 @@
6
6
  void encoder_avx_init() {
7
7
  _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSE4_POPCNT> >;
8
8
  encoder_sse_lut<ISA_LEVEL_SSE4_POPCNT>();
9
+ _encode_isa = ISA_LEVEL_AVX;
9
10
  }
10
11
  #else
11
12
  void encoder_ssse3_init();
@@ -6,6 +6,7 @@
6
6
  void encoder_avx2_init() {
7
7
  _do_encode = &do_encode_simd< do_encode_avx2<ISA_LEVEL_AVX2> >;
8
8
  encoder_avx2_lut<ISA_LEVEL_AVX2>();
9
+ _encode_isa = ISA_LEVEL_AVX2;
9
10
  }
10
11
  #else
11
12
  void encoder_avx_init();
@@ -520,6 +520,7 @@ HEDLEY_ALWAYS_INLINE void do_encode_neon(int line_size, int* colOffset, const ui
520
520
 
521
521
  void encoder_neon_init() {
522
522
  _do_encode = &do_encode_simd<do_encode_neon>;
523
+ _encode_isa = ISA_LEVEL_NEON;
523
524
  // generate shuf LUT
524
525
  for(int i=0; i<256; i++) {
525
526
  int k = i;
@@ -213,6 +213,7 @@ HEDLEY_ALWAYS_INLINE void do_encode_rvv(int line_size, int* colOffset, const uin
213
213
 
214
214
  void encoder_rvv_init() {
215
215
  _do_encode = &do_encode_simd<do_encode_rvv>;
216
+ _encode_isa = ISA_LEVEL_RVV;
216
217
  }
217
218
  #else
218
219
  void encoder_rvv_init() {}
@@ -6,6 +6,7 @@
6
6
  void encoder_sse2_init() {
7
7
  _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSE2> >;
8
8
  encoder_sse_lut<ISA_LEVEL_SSE2>();
9
+ _encode_isa = ISA_LEVEL_SSE2;
9
10
  }
10
11
  #else
11
12
  void encoder_sse2_init() {}
@@ -8,6 +8,7 @@
8
8
  void encoder_ssse3_init() {
9
9
  _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSSE3> >;
10
10
  encoder_sse_lut<ISA_LEVEL_SSSE3>();
11
+ _encode_isa = ISA_LEVEL_SSSE3;
11
12
  }
12
13
  #else
13
14
  void encoder_sse2_init();
@@ -14,12 +14,14 @@ const bool encoder_has_avx10 = false;
14
14
  void encoder_vbmi2_init() {
15
15
  _do_encode = &do_encode_simd< do_encode_avx2<ISA_LEVEL_VBMI2> >;
16
16
  encoder_avx2_lut<ISA_LEVEL_VBMI2>();
17
+ _encode_isa = ISA_LEVEL_VBMI2;
17
18
  }
18
19
  # else
19
20
  # include "encoder_sse_base.h"
20
21
  void encoder_vbmi2_init() {
21
22
  _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_VBMI2> >;
22
23
  encoder_sse_lut<ISA_LEVEL_VBMI2>();
24
+ _encode_isa = ISA_LEVEL_VBMI2;
23
25
  }
24
26
  # endif
25
27
  #else
package/test/testcrc.js CHANGED
@@ -52,11 +52,11 @@ doTest('Random Continue', 'crc32', ['KZSHZ5EDOVAmDdakZZOrGSUGGKSpCJoWH7M0MHy6ohn
52
52
 
53
53
  // random tests
54
54
  for(var i=1; i<128; i++) {
55
- var rand = require('crypto').pseudoRandomBytes(i);
55
+ var rand = Buffer(require('crypto').randomBytes(i)); // Bun needs explicit Buffer for pseudoRandomBytes
56
56
  doTest('Random Short Buffer', 'crc32', rand);
57
57
  }
58
58
  for(var i=0; i<32; i++) {
59
- var rand = require('crypto').pseudoRandomBytes(100000);
59
+ var rand = Buffer(require('crypto').randomBytes(100000));
60
60
  doTest('Random Buffer', 'crc32', rand);
61
61
 
62
62
  var split = Math.random()*rand.length;
package/test/testdec.js CHANGED
@@ -177,7 +177,7 @@ doTest('Extra null issue', toBuffer('2e900a4fb6054c9126171cdc196dc41237bb1b76da9
177
177
 
178
178
  // random tests
179
179
  for(var i=0; i<32; i++) {
180
- var rand = require('crypto').pseudoRandomBytes(128*1024);
180
+ var rand = require('crypto').randomBytes(128*1024);
181
181
  doTest('Random', rand);
182
182
  }
183
183
 
package/test/testenc.js CHANGED
@@ -141,7 +141,7 @@ padding.fill(97); // 'a'
141
141
 
142
142
  // random tests
143
143
  for(var i=0; i<32; i++) {
144
- var rand = require('crypto').pseudoRandomBytes(4*1024);
144
+ var rand = require('crypto').randomBytes(4*1024);
145
145
  runLineSizes(function(ls, offs) {
146
146
  doTest('Random [ls='+ls+', offs='+offs+']', [rand, ls, offs]);
147
147
  });