yencode 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/crc_riscv.cc CHANGED
@@ -189,7 +189,7 @@ static uint32_t crc32_reduce_rv_zbc(uint64_t prod) {
189
189
  return t;
190
190
  }
191
191
  #endif
192
- uint32_t crc32_multiply_rv_zbc(uint32_t a, uint32_t b) {
192
+ static uint32_t crc32_multiply_rv_zbc(uint32_t a, uint32_t b) {
193
193
  #if __riscv_xlen == 64
194
194
  uint64_t t = crc32_reduce_rv_zbc(rv_clmul(a, b));
195
195
  #else
@@ -209,7 +209,7 @@ uint32_t crc32_multiply_rv_zbc(uint32_t a, uint32_t b) {
209
209
  }
210
210
 
211
211
  #if defined(__GNUC__) || defined(_MSC_VER)
212
- uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
212
+ static uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
213
213
  // TODO: require Zbb for ctz
214
214
  uint32_t result = crc1;
215
215
  #if __riscv_xlen == 64
@@ -221,15 +221,15 @@ uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
221
221
  #endif
222
222
  if(!n) return result;
223
223
 
224
- uint32_t result2 = crc_power[ctz32(n)];
224
+ uint32_t result2 = RapidYenc::crc_power[ctz32(n)];
225
225
  n &= n-1;
226
226
 
227
227
  while(n) {
228
- result = crc32_multiply_rv_zbc(result, crc_power[ctz32(n)]);
228
+ result = crc32_multiply_rv_zbc(result, RapidYenc::crc_power[ctz32(n)]);
229
229
  n &= n-1;
230
230
 
231
231
  if(n) {
232
- result2 = crc32_multiply_rv_zbc(result2, crc_power[ctz32(n)]);
232
+ result2 = crc32_multiply_rv_zbc(result2, RapidYenc::crc_power[ctz32(n)]);
233
233
  n &= n-1;
234
234
  }
235
235
  }
@@ -238,7 +238,7 @@ uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
238
238
  #endif
239
239
 
240
240
 
241
- void crc_riscv_set_funcs() {
241
+ void RapidYenc::crc_riscv_set_funcs() {
242
242
  _do_crc32_incremental = &do_crc32_incremental_rv_zbc;
243
243
  _crc32_multiply = &crc32_multiply_rv_zbc;
244
244
  #if defined(__GNUC__) || defined(_MSC_VER)
@@ -247,5 +247,5 @@ void crc_riscv_set_funcs() {
247
247
  _crc32_isa = ISA_FEATURE_ZBC;
248
248
  }
249
249
  #else
250
- void crc_riscv_set_funcs() {}
250
+ void RapidYenc::crc_riscv_set_funcs() {}
251
251
  #endif
package/src/decoder.cc CHANGED
@@ -3,29 +3,358 @@
3
3
  #include "decoder_common.h"
4
4
  #include "decoder.h"
5
5
 
6
- extern "C" {
6
+
7
+
8
+ // TODO: add branch probabilities
9
+
10
+
11
+ // state var: refers to the previous state - only used for incremental processing
12
+ template<bool isRaw>
13
+ static size_t do_decode_noend_scalar(const unsigned char* src, unsigned char* dest, size_t len, RapidYenc::YencDecoderState* state) {
14
+ using namespace RapidYenc;
15
+
16
+ const unsigned char *es = src + len; // end source pointer
17
+ unsigned char *p = dest; // destination pointer
18
+ long i = -(long)len; // input position
19
+ unsigned char c; // input character
20
+
21
+ if(len < 1) return 0;
22
+
23
+ if(isRaw) {
24
+
25
+ if(state) switch(*state) {
26
+ case YDEC_STATE_EQ:
27
+ c = es[i];
28
+ *p++ = c - 42 - 64;
29
+ i++;
30
+ if(c == '\r') {
31
+ *state = YDEC_STATE_CR;
32
+ if(i >= 0) return 0;
33
+ } else {
34
+ *state = YDEC_STATE_NONE;
35
+ break;
36
+ }
37
+ // fall-thru
38
+ case YDEC_STATE_CR:
39
+ if(es[i] != '\n') break;
40
+ i++;
41
+ *state = YDEC_STATE_CRLF;
42
+ if(i >= 0) return 0;
43
+ // Else fall-thru
44
+ case YDEC_STATE_CRLF:
45
+ // skip past first dot
46
+ if(es[i] == '.') i++;
47
+ // fall-thru
48
+ default: break; // silence compiler warnings
49
+ } else // treat as YDEC_STATE_CRLF
50
+ if(es[i] == '.') i++;
51
+
52
+ for(; i < -2; i++) {
53
+ c = es[i];
54
+ switch(c) {
55
+ case '\r':
56
+ // skip past \r\n. sequences
57
+ //i += (es[i+1] == '\n' && es[i+2] == '.') << 1;
58
+ if(es[i+1] == '\n' && es[i+2] == '.')
59
+ i += 2;
60
+ // fall-thru
61
+ case '\n':
62
+ continue;
63
+ case '=':
64
+ c = es[i+1];
65
+ *p++ = c - 42 - 64;
66
+ i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case
67
+ continue;
68
+ default:
69
+ *p++ = c - 42;
70
+ }
71
+ }
72
+ if(state) *state = YDEC_STATE_NONE;
73
+
74
+ if(i == -2) { // 2nd last char
75
+ c = es[i];
76
+ switch(c) {
77
+ case '\r':
78
+ if(state && es[i+1] == '\n') {
79
+ *state = YDEC_STATE_CRLF;
80
+ return p - dest;
81
+ }
82
+ // Else fall-thru
83
+ case '\n':
84
+ break;
85
+ case '=':
86
+ c = es[i+1];
87
+ *p++ = c - 42 - 64;
88
+ i += (c != '\r');
89
+ break;
90
+ default:
91
+ *p++ = c - 42;
92
+ }
93
+ i++;
94
+ }
95
+
96
+ // do final char; we process this separately to prevent an overflow if the final char is '='
97
+ if(i == -1) {
98
+ c = es[i];
99
+ if(c != '\n' && c != '\r' && c != '=') {
100
+ *p++ = c - 42;
101
+ } else if(state) {
102
+ if(c == '=') *state = YDEC_STATE_EQ;
103
+ else if(c == '\r') *state = YDEC_STATE_CR;
104
+ else *state = YDEC_STATE_NONE;
105
+ }
106
+ }
107
+
108
+ } else {
109
+
110
+ if(state && *state == YDEC_STATE_EQ) {
111
+ *p++ = es[i] - 42 - 64;
112
+ i++;
113
+ *state = YDEC_STATE_NONE;
114
+ }
115
+
116
+ /*for(i = 0; i < len - 1; i++) {
117
+ c = src[i];
118
+ if(c == '\n' || c == '\r') continue;
119
+ unsigned char isEquals = (c == '=');
120
+ i += isEquals;
121
+ *p++ = src[i] - (42 + (isEquals << 6));
122
+ }*/
123
+ for(; i < -1; i++) {
124
+ c = es[i];
125
+ switch(c) {
126
+ case '\n': case '\r': continue;
127
+ case '=':
128
+ i++;
129
+ c = es[i] - 64;
130
+ }
131
+ *p++ = c - 42;
132
+ }
133
+ if(state) *state = YDEC_STATE_NONE;
134
+ // do final char; we process this separately to prevent an overflow if the final char is '='
135
+ if(i == -1) {
136
+ c = es[i];
137
+ if(c != '\n' && c != '\r' && c != '=') {
138
+ *p++ = c - 42;
139
+ } else
140
+ if(state) *state = (c == '=' ? YDEC_STATE_EQ : YDEC_STATE_NONE);
141
+ }
142
+
143
+ }
144
+
145
+ return p - dest;
146
+ }
147
+
148
+ template<bool isRaw>
149
+ static RapidYenc::YencDecoderEnd do_decode_end_scalar(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
150
+ using namespace RapidYenc;
151
+
152
+ const unsigned char *es = (*src) + len; // end source pointer
153
+ unsigned char *p = *dest; // destination pointer
154
+ long i = -(long)len; // input position
155
+ unsigned char c; // input character
156
+
157
+ if(len < 1) return YDEC_END_NONE;
158
+
159
+ #define YDEC_CHECK_END(s) if(i == 0) { \
160
+ *state = s; \
161
+ *src = es; \
162
+ *dest = p; \
163
+ return YDEC_END_NONE; \
164
+ }
165
+ if(state) switch(*state) {
166
+ case YDEC_STATE_CRLFEQ: do_decode_endable_scalar_ceq:
167
+ if(es[i] == 'y') {
168
+ *state = YDEC_STATE_NONE;
169
+ *src = es+i+1;
170
+ *dest = p;
171
+ return YDEC_END_CONTROL;
172
+ } // Else fall-thru
173
+ case YDEC_STATE_EQ:
174
+ c = es[i];
175
+ *p++ = c - 42 - 64;
176
+ i++;
177
+ if(c != '\r') break;
178
+ YDEC_CHECK_END(YDEC_STATE_CR)
179
+ // fall-through
180
+ case YDEC_STATE_CR:
181
+ if(es[i] != '\n') break;
182
+ i++;
183
+ YDEC_CHECK_END(YDEC_STATE_CRLF)
184
+ // fall-through
185
+ case YDEC_STATE_CRLF: do_decode_endable_scalar_c0:
186
+ if(es[i] == '.' && isRaw) {
187
+ i++;
188
+ YDEC_CHECK_END(YDEC_STATE_CRLFDT)
189
+ } else if(es[i] == '=') {
190
+ i++;
191
+ YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
192
+ goto do_decode_endable_scalar_ceq;
193
+ } else
194
+ break;
195
+ // fall-through
196
+ case YDEC_STATE_CRLFDT:
197
+ if(isRaw && es[i] == '\r') {
198
+ i++;
199
+ YDEC_CHECK_END(YDEC_STATE_CRLFDTCR)
200
+ } else if(isRaw && es[i] == '=') { // check for dot-stuffed ending: \r\n.=y
201
+ i++;
202
+ YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
203
+ goto do_decode_endable_scalar_ceq;
204
+ } else
205
+ break;
206
+ // fall-through
207
+ case YDEC_STATE_CRLFDTCR:
208
+ if(es[i] == '\n') {
209
+ if(isRaw) {
210
+ *state = YDEC_STATE_CRLF;
211
+ *src = es + i + 1;
212
+ *dest = p;
213
+ return YDEC_END_ARTICLE;
214
+ } else {
215
+ i++;
216
+ YDEC_CHECK_END(YDEC_STATE_CRLF)
217
+ goto do_decode_endable_scalar_c0; // handle as CRLF
218
+ }
219
+ } else
220
+ break;
221
+ case YDEC_STATE_NONE: break; // silence compiler warning
222
+ } else // treat as YDEC_STATE_CRLF
223
+ goto do_decode_endable_scalar_c0;
224
+
225
+ for(; i < -2; i++) {
226
+ c = es[i];
227
+ switch(c) {
228
+ case '\r': if(es[i+1] == '\n') {
229
+ if(isRaw && es[i+2] == '.') {
230
+ // skip past \r\n. sequences
231
+ i += 3;
232
+ YDEC_CHECK_END(YDEC_STATE_CRLFDT)
233
+ // check for end
234
+ if(es[i] == '\r') {
235
+ i++;
236
+ YDEC_CHECK_END(YDEC_STATE_CRLFDTCR)
237
+ if(es[i] == '\n') {
238
+ *src = es + i + 1;
239
+ *dest = p;
240
+ *state = YDEC_STATE_CRLF;
241
+ return YDEC_END_ARTICLE;
242
+ } else i--;
243
+ } else if(es[i] == '=') {
244
+ i++;
245
+ YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
246
+ if(es[i] == 'y') {
247
+ *src = es + i + 1;
248
+ *dest = p;
249
+ *state = YDEC_STATE_NONE;
250
+ return YDEC_END_CONTROL;
251
+ } else {
252
+ // escape char & continue
253
+ c = es[i];
254
+ *p++ = c - 42 - 64;
255
+ i -= (c == '\r');
256
+ }
257
+ } else i--;
258
+ }
259
+ else if(es[i+2] == '=') {
260
+ i += 3;
261
+ YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
262
+ if(es[i] == 'y') {
263
+ // ended
264
+ *src = es + i + 1;
265
+ *dest = p;
266
+ *state = YDEC_STATE_NONE;
267
+ return YDEC_END_CONTROL;
268
+ } else {
269
+ // escape char & continue
270
+ c = es[i];
271
+ *p++ = c - 42 - 64;
272
+ i -= (c == '\r');
273
+ }
274
+ }
275
+ } // fall-thru
276
+ case '\n':
277
+ continue;
278
+ case '=':
279
+ c = es[i+1];
280
+ *p++ = c - 42 - 64;
281
+ i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case
282
+ continue;
283
+ default:
284
+ *p++ = c - 42;
285
+ }
286
+ }
287
+ if(state) *state = YDEC_STATE_NONE;
288
+
289
+ if(i == -2) { // 2nd last char
290
+ c = es[i];
291
+ switch(c) {
292
+ case '\r':
293
+ if(state && es[i+1] == '\n') {
294
+ *state = YDEC_STATE_CRLF;
295
+ *src = es;
296
+ *dest = p;
297
+ return YDEC_END_NONE;
298
+ }
299
+ // Else fall-thru
300
+ case '\n':
301
+ break;
302
+ case '=':
303
+ c = es[i+1];
304
+ *p++ = c - 42 - 64;
305
+ i += (c != '\r');
306
+ break;
307
+ default:
308
+ *p++ = c - 42;
309
+ }
310
+ i++;
311
+ }
312
+
313
+ // do final char; we process this separately to prevent an overflow if the final char is '='
314
+ if(i == -1) {
315
+ c = es[i];
316
+ if(c != '\n' && c != '\r' && c != '=') {
317
+ *p++ = c - 42;
318
+ } else if(state) {
319
+ if(c == '=') *state = YDEC_STATE_EQ;
320
+ else if(c == '\r') *state = YDEC_STATE_CR;
321
+ else *state = YDEC_STATE_NONE;
322
+ }
323
+ }
324
+ #undef YDEC_CHECK_END
325
+
326
+ *src = es;
327
+ *dest = p;
328
+ return YDEC_END_NONE;
329
+ }
330
+
331
+ template<bool isRaw, bool searchEnd>
332
+ RapidYenc::YencDecoderEnd RapidYenc::do_decode_scalar(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
333
+ if(searchEnd)
334
+ return do_decode_end_scalar<isRaw>(src, dest, len, state);
335
+ *dest += do_decode_noend_scalar<isRaw>(*src, *dest, len, state);
336
+ *src += len;
337
+ return YDEC_END_NONE;
338
+ }
339
+
340
+
341
+ namespace RapidYenc {
7
342
  YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_scalar<false, false>;
8
343
  YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_scalar<true, false>;
9
344
  YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_end_scalar<true>;
10
345
 
11
346
  int _decode_isa = ISA_GENERIC;
347
+
348
+ template YencDecoderEnd do_decode_scalar<true, true>(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
12
349
  }
13
350
 
14
- void decoder_set_sse2_funcs();
15
- void decoder_set_ssse3_funcs();
16
- void decoder_set_avx_funcs();
17
- void decoder_set_avx2_funcs();
18
- void decoder_set_vbmi2_funcs();
19
- extern const bool decoder_has_avx10;
20
- void decoder_set_neon_funcs();
21
- void decoder_set_rvv_funcs();
22
-
23
351
 
24
352
  #if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
25
353
  # if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
26
354
  # include "decoder_avx2_base.h"
27
355
  static inline void decoder_set_native_funcs() {
28
356
  ALIGN_ALLOC(lookups, sizeof(*lookups), 16);
357
+ using namespace RapidYenc;
29
358
  decoder_init_lut(lookups->compact);
30
359
  _do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_NATIVE> >;
31
360
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_NATIVE> >;
@@ -35,6 +364,7 @@ static inline void decoder_set_native_funcs() {
35
364
  # else
36
365
  # include "decoder_sse_base.h"
37
366
  static inline void decoder_set_native_funcs() {
367
+ using namespace RapidYenc;
38
368
  decoder_sse_init(lookups);
39
369
  decoder_init_lut(lookups->compact);
40
370
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_NATIVE> >;
@@ -47,7 +377,7 @@ static inline void decoder_set_native_funcs() {
47
377
 
48
378
 
49
379
  #if defined(PLATFORM_X86) || defined(PLATFORM_ARM)
50
- void decoder_init_lut(void* compactLUT) {
380
+ void RapidYenc::decoder_init_lut(void* compactLUT) {
51
381
  #ifdef YENC_DEC_USE_THINTABLE
52
382
  const int tableSize = 8;
53
383
  #else
@@ -70,7 +400,7 @@ void decoder_init_lut(void* compactLUT) {
70
400
  #endif
71
401
 
72
402
 
73
- void decoder_init() {
403
+ void RapidYenc::decoder_init() {
74
404
  #ifdef PLATFORM_X86
75
405
  # if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
76
406
  decoder_set_native_funcs();
package/src/decoder.h CHANGED
@@ -1,10 +1,9 @@
1
1
  #ifndef __YENC_DECODER_H
2
2
  #define __YENC_DECODER_H
3
3
 
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
4
+ #include "hedley.h"
7
5
 
6
+ namespace RapidYenc {
8
7
 
9
8
 
10
9
  // the last state that the decoder was in (i.e. last few characters processed)
@@ -27,21 +26,20 @@ typedef enum {
27
26
  YDEC_END_ARTICLE // \r\n.\r\n sequence found, src points to byte after last '\n'
28
27
  } YencDecoderEnd;
29
28
 
30
- #include "hedley.h"
31
29
 
32
30
  extern YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
33
31
  extern YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
34
32
  extern YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
35
33
  extern int _decode_isa;
36
34
 
37
- static inline size_t do_decode(int isRaw, const unsigned char* src, unsigned char* dest, size_t len, YencDecoderState* state) {
38
- unsigned char* ds = dest;
39
- (*(isRaw ? _do_decode_raw : _do_decode))(&src, &ds, len, state);
40
- return ds - dest;
35
+ static inline size_t decode(int isRaw, const void* src, void* dest, size_t len, YencDecoderState* state) {
36
+ unsigned char* ds = (unsigned char*)dest;
37
+ (*(isRaw ? _do_decode_raw : _do_decode))((const unsigned char**)&src, &ds, len, state);
38
+ return ds - (unsigned char*)dest;
41
39
  }
42
40
 
43
- static inline YencDecoderEnd do_decode_end(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) {
44
- return _do_decode_end_raw(src, dest, len, state);
41
+ static inline YencDecoderEnd decode_end(const void** src, void** dest, size_t len, YencDecoderState* state) {
42
+ return _do_decode_end_raw((const unsigned char**)src, (unsigned char**)dest, len, state);
45
43
  }
46
44
 
47
45
  void decoder_init();
@@ -51,7 +49,5 @@ static inline int decode_isa_level() {
51
49
  }
52
50
 
53
51
 
54
- #ifdef __cplusplus
55
- }
56
- #endif
57
- #endif
52
+ } // namespace
53
+ #endif // defined(__YENC_DECODER_H)
@@ -1,9 +1,9 @@
1
1
  #include "common.h"
2
2
 
3
- #if defined(__AVX__) && defined(__POPCNT__)
4
3
  #include "decoder_common.h"
4
+ #if defined(__AVX__) && defined(__POPCNT__)
5
5
  #include "decoder_sse_base.h"
6
- void decoder_set_avx_funcs() {
6
+ void RapidYenc::decoder_set_avx_funcs() {
7
7
  decoder_sse_init(lookups);
8
8
  decoder_init_lut(lookups->compact);
9
9
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSE4_POPCNT> >;
@@ -12,8 +12,7 @@ void decoder_set_avx_funcs() {
12
12
  _decode_isa = ISA_LEVEL_AVX;
13
13
  }
14
14
  #else
15
- void decoder_set_ssse3_funcs();
16
- void decoder_set_avx_funcs() {
15
+ void RapidYenc::decoder_set_avx_funcs() {
17
16
  decoder_set_ssse3_funcs();
18
17
  }
19
18
  #endif
@@ -1,19 +1,18 @@
1
1
  #include "common.h"
2
2
 
3
- #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
4
3
  #include "decoder_common.h"
4
+ #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
5
5
  #include "decoder_avx2_base.h"
6
- void decoder_set_avx2_funcs() {
6
+ void RapidYenc::decoder_set_avx2_funcs() {
7
7
  ALIGN_ALLOC(lookups, sizeof(*lookups), 16);
8
8
  decoder_init_lut(lookups->compact);
9
- _do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
10
- _do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
11
- _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
12
- _decode_isa = ISA_LEVEL_AVX2;
9
+ RapidYenc::_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
10
+ RapidYenc::_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
11
+ RapidYenc::_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
12
+ RapidYenc::_decode_isa = ISA_LEVEL_AVX2;
13
13
  }
14
14
  #else
15
- void decoder_set_avx_funcs();
16
- void decoder_set_avx2_funcs() {
15
+ void RapidYenc::decoder_set_avx2_funcs() {
17
16
  decoder_set_avx_funcs();
18
17
  }
19
18
  #endif
@@ -49,6 +49,8 @@ static HEDLEY_ALWAYS_INLINE __m256i force_align_read_256(const void* p) {
49
49
  # define COMPRESS_STORE(dst, mask, vec) _mm256_storeu_si256((__m256i*)(dst), _mm256_maskz_compress_epi8(mask, vec))
50
50
  #endif
51
51
 
52
+ namespace RapidYenc {
53
+
52
54
  template<bool isRaw, bool searchEnd, enum YEncDecIsaLevel use_isa>
53
55
  HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned char*& p, unsigned char& _escFirst, uint16_t& _nextMask) {
54
56
  HEDLEY_ASSUME(_escFirst == 0 || _escFirst == 1);
@@ -429,8 +431,9 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
429
431
  if(use_isa >= ISA_LEVEL_AVX3)
430
432
  dataB = _mm256_add_epi8(oDataB, _mm256_set1_epi8(-42));
431
433
 
432
- if(LIKELIHOOD(0.0001, (mask & ((maskEq << 1) + escFirst)) != 0)) {
433
- maskEq = fix_eqMask<uint64_t>(maskEq & ~(uint64_t)escFirst);
434
+ uint64_t maskEqShift1 = (maskEq << 1) + escFirst;
435
+ if(LIKELIHOOD(0.0001, (mask & maskEqShift1) != 0)) {
436
+ maskEq = fix_eqMask<uint64_t>(maskEq, maskEqShift1);
434
437
  mask &= ~(uint64_t)escFirst;
435
438
  escFirst = maskEq>>63;
436
439
  // next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed
@@ -611,4 +614,5 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
611
614
  _escFirst = (unsigned char)escFirst;
612
615
  _mm256_zeroupper();
613
616
  }
617
+ } // namespace
614
618
  #endif