yencode 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,15 +25,17 @@
25
25
  # define KOR16(a, b) ((a) | (b))
26
26
  #endif
27
27
 
28
- #pragma pack(16)
29
- typedef struct {
30
- unsigned char BitsSetTable256inv[256];
31
- /*align16*/ struct { char bytes[16]; } compact[32768];
32
- /*align8*/ uint64_t eqAdd[256];
33
- /*align16*/ int8_t unshufMask[32*16];
34
- } SSELookups;
35
- static SSELookups* HEDLEY_RESTRICT lookups;
36
- #pragma pack()
28
+ namespace RapidYenc {
29
+ #pragma pack(16)
30
+ typedef struct {
31
+ unsigned char BitsSetTable256inv[256];
32
+ /*align16*/ struct { char bytes[16]; } compact[32768];
33
+ /*align8*/ uint64_t eqAdd[256];
34
+ /*align16*/ int8_t unshufMask[32*16];
35
+ } SSELookups;
36
+ #pragma pack()
37
+ }
38
+ static RapidYenc::SSELookups* HEDLEY_RESTRICT lookups;
37
39
 
38
40
 
39
41
  static HEDLEY_ALWAYS_INLINE __m128i force_align_read_128(const void* p) {
@@ -45,7 +47,9 @@ static HEDLEY_ALWAYS_INLINE __m128i force_align_read_128(const void* p) {
45
47
  #endif
46
48
  }
47
49
 
48
- void decoder_sse_init(SSELookups* HEDLEY_RESTRICT& lookups); // defined in decoder_sse2.cc
50
+ namespace RapidYenc {
51
+ void decoder_sse_init(SSELookups* HEDLEY_RESTRICT& lookups); // defined in decoder_sse2.cc
52
+ }
49
53
 
50
54
 
51
55
  // for LZCNT/BSR
@@ -90,6 +94,8 @@ static HEDLEY_ALWAYS_INLINE __m128i sse2_compact_vect(uint32_t mask, __m128i dat
90
94
  return data;
91
95
  }
92
96
 
97
+ namespace RapidYenc {
98
+
93
99
  template<bool isRaw, bool searchEnd, enum YEncDecIsaLevel use_isa>
94
100
  HEDLEY_ALWAYS_INLINE void do_decode_sse(const uint8_t* src, long& len, unsigned char*& p, unsigned char& _escFirst, uint16_t& _nextMask) {
95
101
  HEDLEY_ASSUME(_escFirst == 0 || _escFirst == 1);
@@ -500,8 +506,9 @@ HEDLEY_ALWAYS_INLINE void do_decode_sse(const uint8_t* src, long& len, unsigned
500
506
  if(!_USING_BLEND_ADD)
501
507
  dataB = _mm_add_epi8(oDataB, _mm_set1_epi8(-42));
502
508
 
503
- if(LIKELIHOOD(0.0001, (mask & ((maskEq << 1) + escFirst)) != 0)) {
504
- maskEq = fix_eqMask<uint32_t>(maskEq & ~escFirst);
509
+ uint32_t maskEqShift1 = (maskEq << 1) + escFirst;
510
+ if(LIKELIHOOD(0.0001, (mask & maskEqShift1) != 0)) {
511
+ maskEq = fix_eqMask<uint32_t>(maskEq, maskEqShift1);
505
512
  mask &= ~escFirst;
506
513
  escFirst = maskEq >> 31;
507
514
  // next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed
@@ -688,4 +695,5 @@ HEDLEY_ALWAYS_INLINE void do_decode_sse(const uint8_t* src, long& len, unsigned
688
695
  }
689
696
  _escFirst = (unsigned char)escFirst;
690
697
  }
698
+ } // namespace
691
699
  #endif
@@ -1,9 +1,9 @@
1
1
  #include "common.h"
2
2
 
3
- #ifdef __SSSE3__
4
3
  #include "decoder_common.h"
4
+ #ifdef __SSSE3__
5
5
  #include "decoder_sse_base.h"
6
- void decoder_set_ssse3_funcs() {
6
+ void RapidYenc::decoder_set_ssse3_funcs() {
7
7
  decoder_sse_init(lookups);
8
8
  decoder_init_lut(lookups->compact);
9
9
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSSE3> >;
@@ -12,8 +12,7 @@ void decoder_set_ssse3_funcs() {
12
12
  _decode_isa = ISA_LEVEL_SSSE3;
13
13
  }
14
14
  #else
15
- void decoder_set_sse2_funcs();
16
- void decoder_set_ssse3_funcs() {
15
+ void RapidYenc::decoder_set_ssse3_funcs() {
17
16
  decoder_set_sse2_funcs();
18
17
  }
19
18
  #endif
@@ -1,17 +1,16 @@
1
1
  #include "common.h"
2
+ # include "decoder_common.h"
2
3
 
3
- extern const bool decoder_has_avx10;
4
4
  #if !defined(__EVEX512__) && (defined(__AVX10_1__) || defined(__EVEX256__)) && defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__)
5
- const bool decoder_has_avx10 = true;
5
+ const bool RapidYenc::decoder_has_avx10 = true;
6
6
  #else
7
- const bool decoder_has_avx10 = false;
7
+ const bool RapidYenc::decoder_has_avx10 = false;
8
8
  #endif
9
9
 
10
10
  #if defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__)
11
- # include "decoder_common.h"
12
11
  # ifndef YENC_DISABLE_AVX256
13
12
  # include "decoder_avx2_base.h"
14
- void decoder_set_vbmi2_funcs() {
13
+ void RapidYenc::decoder_set_vbmi2_funcs() {
15
14
  _do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_VBMI2> >;
16
15
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_VBMI2> >;
17
16
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_VBMI2> >;
@@ -19,7 +18,7 @@ void decoder_set_vbmi2_funcs() {
19
18
  }
20
19
  # else
21
20
  # include "decoder_sse_base.h"
22
- void decoder_set_vbmi2_funcs() {
21
+ void RapidYenc::decoder_set_vbmi2_funcs() {
23
22
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_VBMI2> >;
24
23
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_VBMI2> >;
25
24
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_VBMI2> >;
@@ -27,8 +26,7 @@ void decoder_set_vbmi2_funcs() {
27
26
  }
28
27
  # endif
29
28
  #else
30
- void decoder_set_avx2_funcs();
31
- void decoder_set_vbmi2_funcs() {
29
+ void RapidYenc::decoder_set_vbmi2_funcs() {
32
30
  decoder_set_avx2_funcs();
33
31
  }
34
32
  #endif
package/src/encoder.cc CHANGED
@@ -9,12 +9,12 @@
9
9
  #define _B3(n) _B2(n), _B2(n+16), _B2(n+32), _B2(n+48)
10
10
  #define _BX _B3(0), _B3(64), _B3(128), _B3(192)
11
11
 
12
- const unsigned char escapeLUT[256] = { // whether or not the character is critical
12
+ const unsigned char RapidYenc::escapeLUT[256] = { // whether or not the character is critical
13
13
  #define _B(n) ((n == 214 || n == '\r'+214 || n == '\n'+214 || n == '='-42) ? 0 : (n+42) & 0xff)
14
14
  _BX
15
15
  #undef _B
16
16
  };
17
- const uint16_t escapedLUT[256] = { // escaped sequences for characters that need escaping
17
+ const uint16_t RapidYenc::escapedLUT[256] = { // escaped sequences for characters that need escaping
18
18
  #define _B(n) ((n == 214 || n == 214+'\r' || n == 214+'\n' || n == '='-42 || n == 214+'\t' || n == 214+' ' || n == '.'-42) ? UINT16_PACK('=', ((n+42+64)&0xff)) : 0)
19
19
  _BX
20
20
  #undef _B
@@ -27,7 +27,7 @@ const uint16_t escapedLUT[256] = { // escaped sequences for characters that need
27
27
 
28
28
 
29
29
 
30
- size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
30
+ size_t RapidYenc::do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
31
31
  unsigned char* es = (unsigned char*)src + len;
32
32
  unsigned char *p = dest; // destination pointer
33
33
  long i = -(long)len; // input position
@@ -36,8 +36,8 @@ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HED
36
36
 
37
37
  if (col == 0) {
38
38
  c = es[i++];
39
- if (escapedLUT[c]) {
40
- memcpy(p, &escapedLUT[c], sizeof(uint16_t));
39
+ if (RapidYenc::escapedLUT[c]) {
40
+ memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t));
41
41
  p += 2;
42
42
  col = 2;
43
43
  } else {
@@ -52,11 +52,11 @@ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HED
52
52
  // 8 cycle unrolled version
53
53
  sp = p;
54
54
  #define DO_THING(n) \
55
- c = es[i+n], escaped = escapeLUT[c]; \
55
+ c = es[i+n], escaped = RapidYenc::escapeLUT[c]; \
56
56
  if (escaped) \
57
57
  *(p++) = escaped; \
58
58
  else { \
59
- memcpy(p, &escapedLUT[c], sizeof(uint16_t)); \
59
+ memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t)); \
60
60
  p += 2; \
61
61
  }
62
62
  DO_THING(0);
@@ -80,13 +80,13 @@ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HED
80
80
  }
81
81
  // handle remaining chars
82
82
  while(col < line_size-1) {
83
- c = es[i++], escaped = escapeLUT[c];
83
+ c = es[i++], escaped = RapidYenc::escapeLUT[c];
84
84
  if (escaped) {
85
85
  *(p++) = escaped;
86
86
  col++;
87
87
  }
88
88
  else {
89
- memcpy(p, &escapedLUT[c], sizeof(uint16_t));
89
+ memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t));
90
90
  p += 2;
91
91
  col += 2;
92
92
  }
@@ -104,8 +104,8 @@ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HED
104
104
  // last line char
105
105
  if(col < line_size) { // this can only be false if the last character was an escape sequence (or line_size is horribly small), in which case, we don't need to handle space/tab cases
106
106
  c = es[i++];
107
- if (escapedLUT[c] && c != '.'-42) {
108
- memcpy(p, &escapedLUT[c], sizeof(uint16_t));
107
+ if (RapidYenc::escapedLUT[c] && c != '.'-42) {
108
+ memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t));
109
109
  p += 2;
110
110
  } else {
111
111
  *(p++) = c + 42;
@@ -115,8 +115,8 @@ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HED
115
115
  if (i >= 0) break;
116
116
 
117
117
  c = es[i++];
118
- if (escapedLUT[c]) {
119
- uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]);
118
+ if (RapidYenc::escapedLUT[c]) {
119
+ uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]);
120
120
  memcpy(p, &w, sizeof(w));
121
121
  p += 4;
122
122
  col = 2;
@@ -145,40 +145,31 @@ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HED
145
145
  }
146
146
 
147
147
 
148
- extern "C" {
148
+ namespace RapidYenc {
149
149
  size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int) = &do_encode_generic;
150
150
  int _encode_isa = ISA_GENERIC;
151
151
  }
152
152
 
153
- void encoder_sse2_init();
154
- void encoder_ssse3_init();
155
- void encoder_avx_init();
156
- void encoder_avx2_init();
157
- void encoder_vbmi2_init();
158
- extern const bool encoder_has_avx10;
159
- void encoder_neon_init();
160
- void encoder_rvv_init();
161
-
162
153
  #if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
163
154
  # if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
164
155
  # include "encoder_avx_base.h"
165
156
  static inline void encoder_native_init() {
166
- _do_encode = &do_encode_simd< do_encode_avx2<ISA_NATIVE> >;
157
+ RapidYenc::_do_encode = &do_encode_simd< RapidYenc::do_encode_avx2<ISA_NATIVE> >;
167
158
  encoder_avx2_lut<ISA_NATIVE>();
168
- _encode_isa = ISA_NATIVE;
159
+ RapidYenc::_encode_isa = ISA_NATIVE;
169
160
  }
170
161
  # else
171
162
  # include "encoder_sse_base.h"
172
163
  static inline void encoder_native_init() {
173
- _do_encode = &do_encode_simd< do_encode_sse<ISA_NATIVE> >;
164
+ RapidYenc::_do_encode = &do_encode_simd< RapidYenc::do_encode_sse<ISA_NATIVE> >;
174
165
  encoder_sse_lut<ISA_NATIVE>();
175
- _encode_isa = ISA_NATIVE;
166
+ RapidYenc::_encode_isa = ISA_NATIVE;
176
167
  }
177
168
  # endif
178
169
  #endif
179
170
 
180
171
 
181
- void encoder_init() {
172
+ void RapidYenc::encoder_init() {
182
173
  #ifdef PLATFORM_X86
183
174
  # if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
184
175
  encoder_native_init();
package/src/encoder.h CHANGED
@@ -1,17 +1,17 @@
1
1
  #ifndef __YENC_ENCODER_H
2
2
  #define __YENC_ENCODER_H
3
3
 
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
4
+ #include "hedley.h"
7
5
 
6
+ namespace RapidYenc {
8
7
 
9
8
 
10
- #include "hedley.h"
11
9
 
12
10
  extern size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int);
13
11
  extern int _encode_isa;
14
- #define do_encode (*_do_encode)
12
+ static inline size_t encode(int line_size, int* colOffset, const void* HEDLEY_RESTRICT src, void* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
13
+ return (*_do_encode)(line_size, colOffset, (const unsigned char* HEDLEY_RESTRICT)src, (unsigned char*)dest, len, doEnd);
14
+ }
15
15
  void encoder_init();
16
16
  static inline int encode_isa_level() {
17
17
  return _encode_isa;
@@ -19,7 +19,5 @@ static inline int encode_isa_level() {
19
19
 
20
20
 
21
21
 
22
- #ifdef __cplusplus
23
22
  }
24
23
  #endif
25
- #endif
@@ -1,16 +1,16 @@
1
1
  #include "common.h"
2
+ #include "encoder_common.h"
2
3
 
3
4
  #if defined(__AVX__) && defined(__POPCNT__)
4
5
  #include "encoder_sse_base.h"
5
6
 
6
- void encoder_avx_init() {
7
+ void RapidYenc::encoder_avx_init() {
7
8
  _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSE4_POPCNT> >;
8
9
  encoder_sse_lut<ISA_LEVEL_SSE4_POPCNT>();
9
10
  _encode_isa = ISA_LEVEL_AVX;
10
11
  }
11
12
  #else
12
- void encoder_ssse3_init();
13
- void encoder_avx_init() {
13
+ void RapidYenc::encoder_avx_init() {
14
14
  encoder_ssse3_init();
15
15
  }
16
16
  #endif
@@ -1,16 +1,16 @@
1
1
  #include "common.h"
2
+ #include "encoder_common.h"
2
3
 
3
4
  #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
4
5
  #include "encoder_avx_base.h"
5
6
 
6
- void encoder_avx2_init() {
7
+ void RapidYenc::encoder_avx2_init() {
7
8
  _do_encode = &do_encode_simd< do_encode_avx2<ISA_LEVEL_AVX2> >;
8
9
  encoder_avx2_lut<ISA_LEVEL_AVX2>();
9
10
  _encode_isa = ISA_LEVEL_AVX2;
10
11
  }
11
12
  #else
12
- void encoder_avx_init();
13
- void encoder_avx2_init() {
13
+ void RapidYenc::encoder_avx2_init() {
14
14
  encoder_avx_init();
15
15
  }
16
16
  #endif
@@ -76,6 +76,8 @@ static void encoder_avx2_lut() {
76
76
  }
77
77
  }
78
78
 
79
+ namespace RapidYenc {
80
+
79
81
  template<enum YEncDecIsaLevel use_isa>
80
82
  HEDLEY_ALWAYS_INLINE void do_encode_avx2(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) {
81
83
  // offset position to enable simpler loop condition checking
@@ -568,5 +570,6 @@ HEDLEY_ALWAYS_INLINE void do_encode_avx2(int line_size, int* colOffset, const ui
568
570
  dest = p;
569
571
  len = -(i - INPUT_OFFSET);
570
572
  }
573
+ } // namespace
571
574
 
572
575
  #endif
@@ -1,19 +1,31 @@
1
1
  #ifndef __YENC_ENCODER_COMMON
2
2
  #define __YENC_ENCODER_COMMON
3
3
 
4
- // lookup tables for scalar processing
5
- extern const unsigned char escapeLUT[256];
6
- extern const uint16_t escapedLUT[256];
4
+ namespace RapidYenc {
5
+ void encoder_sse2_init();
6
+ void encoder_ssse3_init();
7
+ void encoder_avx_init();
8
+ void encoder_avx2_init();
9
+ void encoder_vbmi2_init();
10
+ extern const bool encoder_has_avx10;
11
+ void encoder_neon_init();
12
+ void encoder_rvv_init();
13
+
14
+ // lookup tables for scalar processing
15
+ extern const unsigned char escapeLUT[256];
16
+ extern const uint16_t escapedLUT[256];
17
+
18
+ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd);
19
+ }
7
20
 
8
21
 
9
- size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd);
10
22
 
11
23
  template<void(&kernel)(int, int*, const uint8_t* HEDLEY_RESTRICT, uint8_t* HEDLEY_RESTRICT&, size_t&)>
12
- static size_t do_encode_simd(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT src, uint8_t* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
24
+ static size_t do_encode_simd(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
13
25
  if(len < 1) return 0;
14
26
  if(line_size < 12) { // short lines probably not worth processing in a SIMD way
15
27
  // we assume at least the first and last char exist in the line, and since the first char could be escaped, and SIMD encoder assumes at least one non-first/last char, assumption means that line size has to be >= 4
16
- return do_encode_generic(line_size, colOffset, src, dest, len, doEnd);
28
+ return RapidYenc::do_encode_generic(line_size, colOffset, src, dest, len, doEnd);
17
29
  }
18
30
 
19
31
  const uint8_t* es = src + len;
@@ -27,8 +39,8 @@ static size_t do_encode_simd(int line_size, int* colOffset, const uint8_t* HEDLE
27
39
  long i = -(long)len;
28
40
  if(*colOffset == 0 && i < 0) {
29
41
  uint8_t c = es[i++];
30
- if (LIKELIHOOD(0.0273, escapedLUT[c] != 0)) {
31
- memcpy(p, escapedLUT + c, 2);
42
+ if (LIKELIHOOD(0.0273, RapidYenc::escapedLUT[c] != 0)) {
43
+ memcpy(p, RapidYenc::escapedLUT + c, 2);
32
44
  p += 2;
33
45
  *colOffset = 2;
34
46
  } else {
@@ -39,19 +51,19 @@ static size_t do_encode_simd(int line_size, int* colOffset, const uint8_t* HEDLE
39
51
  while(i < 0) {
40
52
  uint8_t c = es[i++];
41
53
  if(*colOffset < line_size-1) {
42
- if(!escapeLUT[c]) {
54
+ if(!RapidYenc::escapeLUT[c]) {
43
55
  p[0] = '=';
44
56
  p[1] = c+42+64;
45
57
  p += 2;
46
58
  (*colOffset) += 2;
47
59
  } else {
48
- *(p++) = escapeLUT[c];
60
+ *(p++) = RapidYenc::escapeLUT[c];
49
61
  (*colOffset) += 1;
50
62
  }
51
63
  } else {
52
64
  if(*colOffset < line_size) {
53
- if (escapedLUT[c] && c != '.'-42) {
54
- memcpy(p, escapedLUT + c, 2);
65
+ if (RapidYenc::escapedLUT[c] && c != '.'-42) {
66
+ memcpy(p, RapidYenc::escapedLUT + c, 2);
55
67
  p += 2;
56
68
  } else {
57
69
  *(p++) = c + 42;
@@ -61,8 +73,8 @@ static size_t do_encode_simd(int line_size, int* colOffset, const uint8_t* HEDLE
61
73
  }
62
74
 
63
75
  // handle EOL
64
- if (escapedLUT[c]) {
65
- uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]);
76
+ if (RapidYenc::escapedLUT[c]) {
77
+ uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]);
66
78
  memcpy(p, &w, sizeof(w));
67
79
  p += 4;
68
80
  *colOffset = 2;
@@ -1,8 +1,8 @@
1
1
  #include "common.h"
2
+ #include "encoder_common.h"
2
3
 
3
4
  #ifdef __ARM_NEON
4
5
  #include "encoder.h"
5
- #include "encoder_common.h"
6
6
 
7
7
  // Clang wrongly assumes alignment on vst1q_u8_x2, and ARMv7 GCC doesn't support the function, so effectively, it can only be used in ARMv8 compilers
8
8
  #if defined(__aarch64__) && (defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(8,5,0))
@@ -259,6 +259,8 @@ static HEDLEY_ALWAYS_INLINE void encode_eol_handle_pre(const uint8_t* HEDLEY_RES
259
259
  }
260
260
 
261
261
 
262
+ namespace RapidYenc {
263
+
262
264
  HEDLEY_ALWAYS_INLINE void do_encode_neon(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) {
263
265
  // offset position to enable simpler loop condition checking
264
266
  const int INPUT_OFFSET = sizeof(uint8x16_t)*4 -1; // extra chars for EOL handling, -1 to change <= to <
@@ -517,8 +519,9 @@ HEDLEY_ALWAYS_INLINE void do_encode_neon(int line_size, int* colOffset, const ui
517
519
  dest = p;
518
520
  len = -(i - INPUT_OFFSET);
519
521
  }
522
+ } // namespace
520
523
 
521
- void encoder_neon_init() {
524
+ void RapidYenc::encoder_neon_init() {
522
525
  _do_encode = &do_encode_simd<do_encode_neon>;
523
526
  _encode_isa = ISA_LEVEL_NEON;
524
527
  // generate shuf LUT
@@ -543,5 +546,5 @@ void encoder_neon_init() {
543
546
  }
544
547
  }
545
548
  #else
546
- void encoder_neon_init() {}
549
+ void RapidYenc::encoder_neon_init() {}
547
550
  #endif /* defined(__ARM_NEON) */
@@ -1,23 +1,23 @@
1
1
  #include "common.h"
2
+ #include "encoder_common.h"
2
3
 
3
4
  #ifdef __riscv_vector
4
5
  #include "encoder.h"
5
- #include "encoder_common.h"
6
6
 
7
7
 
8
8
  static HEDLEY_ALWAYS_INLINE void encode_eol_handle_pre(const uint8_t* HEDLEY_RESTRICT _src, long& inpos, uint8_t*& outp, long& col, long lineSizeOffset) {
9
9
  // TODO: vectorize
10
10
  uint8_t c = _src[inpos++];
11
- if(HEDLEY_UNLIKELY(escapedLUT[c] && c != '.'-42)) {
12
- memcpy(outp, &escapedLUT[c], sizeof(uint16_t));
11
+ if(HEDLEY_UNLIKELY(RapidYenc::escapedLUT[c] && c != '.'-42)) {
12
+ memcpy(outp, &RapidYenc::escapedLUT[c], sizeof(uint16_t));
13
13
  outp += 2;
14
14
  } else {
15
15
  *(outp++) = c + 42;
16
16
  }
17
17
 
18
18
  c = _src[inpos++];
19
- if(LIKELIHOOD(0.0273, escapedLUT[c]!=0)) {
20
- uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]);
19
+ if(LIKELIHOOD(0.0273, RapidYenc::escapedLUT[c]!=0)) {
20
+ uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]);
21
21
  memcpy(outp, &w, sizeof(w));
22
22
  outp += 4;
23
23
  col = lineSizeOffset + 2;
@@ -29,6 +29,7 @@ static HEDLEY_ALWAYS_INLINE void encode_eol_handle_pre(const uint8_t* HEDLEY_RES
29
29
  }
30
30
  }
31
31
 
32
+ namespace RapidYenc {
32
33
 
33
34
  HEDLEY_ALWAYS_INLINE void do_encode_rvv(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) {
34
35
  size_t vl2 = RV(vsetvlmax_e8m2)(); // TODO: limit to line length
@@ -195,11 +196,12 @@ HEDLEY_ALWAYS_INLINE void do_encode_rvv(int line_size, int* colOffset, const uin
195
196
  dest = outp;
196
197
  len = -(inpos - INPUT_OFFSET);
197
198
  }
199
+ } // namespace
198
200
 
199
- void encoder_rvv_init() {
201
+ void RapidYenc::encoder_rvv_init() {
200
202
  _do_encode = &do_encode_simd<do_encode_rvv>;
201
203
  _encode_isa = ISA_LEVEL_RVV;
202
204
  }
203
205
  #else
204
- void encoder_rvv_init() {}
206
+ void RapidYenc::encoder_rvv_init() {}
205
207
  #endif /* defined(__riscv_vector) */
@@ -1,14 +1,15 @@
1
1
  #include "common.h"
2
+ #include "encoder_common.h"
2
3
 
3
4
  #ifdef __SSE2__
4
5
  #include "encoder_sse_base.h"
5
6
 
6
- void encoder_sse2_init() {
7
+ void RapidYenc::encoder_sse2_init() {
7
8
  _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSE2> >;
8
9
  encoder_sse_lut<ISA_LEVEL_SSE2>();
9
10
  _encode_isa = ISA_LEVEL_SSE2;
10
11
  }
11
12
  #else
12
- void encoder_sse2_init() {}
13
+ void RapidYenc::encoder_sse2_init() {}
13
14
  #endif
14
15
 
@@ -147,6 +147,7 @@ static HEDLEY_ALWAYS_INLINE uintptr_t sse2_expand_store_vector(__m128i data, uns
147
147
  }
148
148
  }
149
149
 
150
+ namespace RapidYenc {
150
151
 
151
152
  template<enum YEncDecIsaLevel use_isa>
152
153
  HEDLEY_ALWAYS_INLINE void do_encode_sse(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) {
@@ -720,4 +721,5 @@ HEDLEY_ALWAYS_INLINE void do_encode_sse(int line_size, int* colOffset, const uin
720
721
  dest = p;
721
722
  len = -(i - INPUT_OFFSET);
722
723
  }
724
+ } // namespace
723
725
 
@@ -1,18 +1,18 @@
1
1
  #include "common.h"
2
+ #include "encoder_common.h"
2
3
 
3
4
  // slightly faster version which improves the worst case scenario significantly; since worst case doesn't happen often, overall speedup is relatively minor
4
5
  // requires PSHUFB (SSSE3) instruction, but will use POPCNT (SSE4.2 (or AMD's ABM, but Phenom doesn't support SSSE3 so doesn't matter)) if available (these only seem to give minor speedups, so considered optional)
5
6
  #ifdef __SSSE3__
6
7
  #include "encoder_sse_base.h"
7
8
 
8
- void encoder_ssse3_init() {
9
+ void RapidYenc::encoder_ssse3_init() {
9
10
  _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSSE3> >;
10
11
  encoder_sse_lut<ISA_LEVEL_SSSE3>();
11
12
  _encode_isa = ISA_LEVEL_SSSE3;
12
13
  }
13
14
  #else
14
- void encoder_sse2_init();
15
- void encoder_ssse3_init() {
15
+ void RapidYenc::encoder_ssse3_init() {
16
16
  encoder_sse2_init();
17
17
  }
18
18
  #endif
@@ -1,32 +1,31 @@
1
1
  #include "common.h"
2
+ #include "encoder_common.h"
2
3
 
3
- extern const bool encoder_has_avx10;
4
4
  #if !defined(__EVEX512__) && (defined(__AVX10_1__) || defined(__EVEX256__)) && defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__)
5
- const bool encoder_has_avx10 = true;
5
+ const bool RapidYenc::encoder_has_avx10 = true;
6
6
  #else
7
- const bool encoder_has_avx10 = false;
7
+ const bool RapidYenc::encoder_has_avx10 = false;
8
8
  #endif
9
9
 
10
10
  #if defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__)
11
11
  # ifndef YENC_DISABLE_AVX256
12
12
  # include "encoder_avx_base.h"
13
13
 
14
- void encoder_vbmi2_init() {
14
+ void RapidYenc::encoder_vbmi2_init() {
15
15
  _do_encode = &do_encode_simd< do_encode_avx2<ISA_LEVEL_VBMI2> >;
16
16
  encoder_avx2_lut<ISA_LEVEL_VBMI2>();
17
17
  _encode_isa = ISA_LEVEL_VBMI2;
18
18
  }
19
19
  # else
20
20
  # include "encoder_sse_base.h"
21
- void encoder_vbmi2_init() {
21
+ void RapidYenc::encoder_vbmi2_init() {
22
22
  _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_VBMI2> >;
23
23
  encoder_sse_lut<ISA_LEVEL_VBMI2>();
24
24
  _encode_isa = ISA_LEVEL_VBMI2;
25
25
  }
26
26
  # endif
27
27
  #else
28
- void encoder_avx2_init();
29
- void encoder_vbmi2_init() {
28
+ void RapidYenc::encoder_vbmi2_init() {
30
29
  encoder_avx2_init();
31
30
  }
32
31
  #endif