yencode 1.1.5 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +130 -189
  2. package/binding.gyp +115 -6
  3. package/index.js +2 -0
  4. package/package.json +1 -1
  5. package/src/common.h +37 -7
  6. package/src/crc.cc +121 -47
  7. package/src/crc.h +74 -10
  8. package/src/crc_arm.cc +51 -34
  9. package/src/crc_arm_pmull.cc +215 -0
  10. package/src/crc_common.h +22 -0
  11. package/src/crc_folding.cc +154 -16
  12. package/src/crc_folding_256.cc +7 -14
  13. package/src/crc_riscv.cc +251 -0
  14. package/src/decoder.cc +373 -13
  15. package/src/decoder.h +10 -14
  16. package/src/decoder_avx.cc +5 -6
  17. package/src/decoder_avx2.cc +8 -9
  18. package/src/decoder_avx2_base.h +7 -11
  19. package/src/decoder_common.h +56 -373
  20. package/src/decoder_neon.cc +13 -19
  21. package/src/decoder_neon64.cc +12 -15
  22. package/src/decoder_rvv.cc +280 -0
  23. package/src/decoder_sse2.cc +26 -5
  24. package/src/decoder_sse_base.h +20 -40
  25. package/src/decoder_ssse3.cc +5 -6
  26. package/src/decoder_vbmi2.cc +6 -13
  27. package/src/encoder.cc +42 -26
  28. package/src/encoder.h +5 -7
  29. package/src/encoder_avx.cc +3 -3
  30. package/src/encoder_avx2.cc +3 -3
  31. package/src/encoder_avx_base.h +3 -0
  32. package/src/encoder_common.h +26 -32
  33. package/src/encoder_neon.cc +6 -3
  34. package/src/encoder_rvv.cc +13 -26
  35. package/src/encoder_sse2.cc +3 -2
  36. package/src/encoder_sse_base.h +2 -0
  37. package/src/encoder_ssse3.cc +3 -3
  38. package/src/encoder_vbmi2.cc +6 -7
  39. package/src/platform.cc +24 -23
  40. package/src/yencode.cc +54 -11
  41. package/test/_speedbase.js +4 -2
  42. package/test/speeddec.js +25 -16
  43. package/test/speedenc.js +21 -17
  44. package/test/testcrc.js +17 -1
  45. package/test/testcrcfuncs.c +53 -0
  46. package/test/testdec.js +1 -0
package/src/decoder.h CHANGED
@@ -1,10 +1,9 @@
1
1
  #ifndef __YENC_DECODER_H
2
2
  #define __YENC_DECODER_H
3
3
 
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
4
+ #include "hedley.h"
7
5
 
6
+ namespace RapidYenc {
8
7
 
9
8
 
10
9
  // the last state that the decoder was in (i.e. last few characters processed)
@@ -27,21 +26,20 @@ typedef enum {
27
26
  YDEC_END_ARTICLE // \r\n.\r\n sequence found, src points to byte after last '\n'
28
27
  } YencDecoderEnd;
29
28
 
30
- #include "hedley.h"
31
29
 
32
30
  extern YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
33
31
  extern YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
34
32
  extern YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
35
33
  extern int _decode_isa;
36
34
 
37
- static inline size_t do_decode(int isRaw, const unsigned char* src, unsigned char* dest, size_t len, YencDecoderState* state) {
38
- unsigned char* ds = dest;
39
- (*(isRaw ? _do_decode_raw : _do_decode))(&src, &ds, len, state);
40
- return ds - dest;
35
+ static inline size_t decode(int isRaw, const void* src, void* dest, size_t len, YencDecoderState* state) {
36
+ unsigned char* ds = (unsigned char*)dest;
37
+ (*(isRaw ? _do_decode_raw : _do_decode))((const unsigned char**)&src, &ds, len, state);
38
+ return ds - (unsigned char*)dest;
41
39
  }
42
40
 
43
- static inline YencDecoderEnd do_decode_end(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) {
44
- return _do_decode_end_raw(src, dest, len, state);
41
+ static inline YencDecoderEnd decode_end(const void** src, void** dest, size_t len, YencDecoderState* state) {
42
+ return _do_decode_end_raw((const unsigned char**)src, (unsigned char**)dest, len, state);
45
43
  }
46
44
 
47
45
  void decoder_init();
@@ -51,7 +49,5 @@ static inline int decode_isa_level() {
51
49
  }
52
50
 
53
51
 
54
- #ifdef __cplusplus
55
- }
56
- #endif
57
- #endif
52
+ } // namespace
53
+ #endif // defined(__YENC_DECODER_H)
@@ -1,19 +1,18 @@
1
1
  #include "common.h"
2
2
 
3
- #if defined(__AVX__) && defined(__POPCNT__)
4
3
  #include "decoder_common.h"
4
+ #if defined(__AVX__) && defined(__POPCNT__)
5
5
  #include "decoder_sse_base.h"
6
- void decoder_set_avx_funcs() {
7
- decoder_sse_init();
8
- decoder_init_lut(lookups->eqFix, lookups->compact);
6
+ void RapidYenc::decoder_set_avx_funcs() {
7
+ decoder_sse_init(lookups);
8
+ decoder_init_lut(lookups->compact);
9
9
  _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSE4_POPCNT> >;
10
10
  _do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSE4_POPCNT> >;
11
11
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSE4_POPCNT> >;
12
12
  _decode_isa = ISA_LEVEL_AVX;
13
13
  }
14
14
  #else
15
- void decoder_set_ssse3_funcs();
16
- void decoder_set_avx_funcs() {
15
+ void RapidYenc::decoder_set_avx_funcs() {
17
16
  decoder_set_ssse3_funcs();
18
17
  }
19
18
  #endif
@@ -1,19 +1,18 @@
1
1
  #include "common.h"
2
2
 
3
- #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
4
3
  #include "decoder_common.h"
4
+ #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
5
5
  #include "decoder_avx2_base.h"
6
- void decoder_set_avx2_funcs() {
6
+ void RapidYenc::decoder_set_avx2_funcs() {
7
7
  ALIGN_ALLOC(lookups, sizeof(*lookups), 16);
8
- decoder_init_lut(lookups->eqFix, lookups->compact);
9
- _do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
10
- _do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
11
- _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
12
- _decode_isa = ISA_LEVEL_AVX2;
8
+ decoder_init_lut(lookups->compact);
9
+ RapidYenc::_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
10
+ RapidYenc::_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
11
+ RapidYenc::_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
12
+ RapidYenc::_decode_isa = ISA_LEVEL_AVX2;
13
13
  }
14
14
  #else
15
- void decoder_set_avx_funcs();
16
- void decoder_set_avx2_funcs() {
15
+ void RapidYenc::decoder_set_avx2_funcs() {
17
16
  decoder_set_avx_funcs();
18
17
  }
19
18
  #endif
@@ -15,7 +15,6 @@
15
15
  #pragma pack(16)
16
16
  static struct {
17
17
  /*align16*/ struct { char bytes[16]; } compact[32768];
18
- uint8_t eqFix[256];
19
18
  } * HEDLEY_RESTRICT lookups;
20
19
  #pragma pack()
21
20
 
@@ -50,6 +49,8 @@ static HEDLEY_ALWAYS_INLINE __m256i force_align_read_256(const void* p) {
50
49
  # define COMPRESS_STORE(dst, mask, vec) _mm256_storeu_si256((__m256i*)(dst), _mm256_maskz_compress_epi8(mask, vec))
51
50
  #endif
52
51
 
52
+ namespace RapidYenc {
53
+
53
54
  template<bool isRaw, bool searchEnd, enum YEncDecIsaLevel use_isa>
54
55
  HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned char*& p, unsigned char& _escFirst, uint16_t& _nextMask) {
55
56
  HEDLEY_ASSUME(_escFirst == 0 || _escFirst == 1);
@@ -430,17 +431,11 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
430
431
  if(use_isa >= ISA_LEVEL_AVX3)
431
432
  dataB = _mm256_add_epi8(oDataB, _mm256_set1_epi8(-42));
432
433
 
433
- if(LIKELIHOOD(0.0001, (mask & ((maskEq << 1) + escFirst)) != 0)) {
434
- unsigned tmp = lookups->eqFix[(maskEq&0xff) & ~(uint64_t)escFirst];
435
- uint64_t maskEq2 = tmp;
436
- for(int j=8; j<64; j+=8) {
437
- tmp = lookups->eqFix[(unsigned)((maskEq>>j)&0xff) & ~(tmp>>7)];
438
- maskEq2 |= (uint64_t)tmp<<j;
439
- }
440
- maskEq = maskEq2;
441
-
434
+ uint64_t maskEqShift1 = (maskEq << 1) + escFirst;
435
+ if(LIKELIHOOD(0.0001, (mask & maskEqShift1) != 0)) {
436
+ maskEq = fix_eqMask<uint64_t>(maskEq, maskEqShift1);
442
437
  mask &= ~(uint64_t)escFirst;
443
- escFirst = tmp>>7;
438
+ escFirst = maskEq>>63;
444
439
  // next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed
445
440
  maskEq <<= 1;
446
441
  mask &= ~maskEq;
@@ -619,4 +614,5 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
619
614
  _escFirst = (unsigned char)escFirst;
620
615
  _mm256_zeroupper();
621
616
  }
617
+ } // namespace
622
618
  #endif
@@ -1,338 +1,32 @@
1
1
  #include "decoder.h"
2
2
 
3
- // TODO: need to support max output length somehow
4
- // TODO: add branch probabilities
5
-
6
-
7
- // state var: refers to the previous state - only used for incremental processing
8
- template<bool isRaw>
9
- size_t do_decode_noend_scalar(const unsigned char* src, unsigned char* dest, size_t len, YencDecoderState* state) {
10
- const unsigned char *es = src + len; // end source pointer
11
- unsigned char *p = dest; // destination pointer
12
- long i = -(long)len; // input position
13
- unsigned char c; // input character
14
-
15
- if(len < 1) return 0;
3
+ namespace RapidYenc {
4
+ void decoder_set_sse2_funcs();
5
+ void decoder_set_ssse3_funcs();
6
+ void decoder_set_avx_funcs();
7
+ void decoder_set_avx2_funcs();
8
+ void decoder_set_vbmi2_funcs();
9
+ extern const bool decoder_has_avx10;
10
+ void decoder_set_neon_funcs();
11
+ void decoder_set_rvv_funcs();
16
12
 
17
- if(isRaw) {
18
-
19
- if(state) switch(*state) {
20
- case YDEC_STATE_EQ:
21
- c = es[i];
22
- *p++ = c - 42 - 64;
23
- i++;
24
- if(c == '\r') {
25
- *state = YDEC_STATE_CR;
26
- if(i >= 0) return 0;
27
- } else {
28
- *state = YDEC_STATE_NONE;
29
- break;
30
- }
31
- // fall-thru
32
- case YDEC_STATE_CR:
33
- if(es[i] != '\n') break;
34
- i++;
35
- *state = YDEC_STATE_CRLF;
36
- if(i >= 0) return 0;
37
- // Else fall-thru
38
- case YDEC_STATE_CRLF:
39
- // skip past first dot
40
- if(es[i] == '.') i++;
41
- // fall-thru
42
- default: break; // silence compiler warnings
43
- } else // treat as YDEC_STATE_CRLF
44
- if(es[i] == '.') i++;
45
-
46
- for(; i < -2; i++) {
47
- c = es[i];
48
- switch(c) {
49
- case '\r':
50
- // skip past \r\n. sequences
51
- //i += (es[i+1] == '\n' && es[i+2] == '.') << 1;
52
- if(es[i+1] == '\n' && es[i+2] == '.')
53
- i += 2;
54
- // fall-thru
55
- case '\n':
56
- continue;
57
- case '=':
58
- c = es[i+1];
59
- *p++ = c - 42 - 64;
60
- i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case
61
- continue;
62
- default:
63
- *p++ = c - 42;
64
- }
65
- }
66
- if(state) *state = YDEC_STATE_NONE;
67
-
68
- if(i == -2) { // 2nd last char
69
- c = es[i];
70
- switch(c) {
71
- case '\r':
72
- if(state && es[i+1] == '\n') {
73
- *state = YDEC_STATE_CRLF;
74
- return p - dest;
75
- }
76
- // Else fall-thru
77
- case '\n':
78
- break;
79
- case '=':
80
- c = es[i+1];
81
- *p++ = c - 42 - 64;
82
- i += (c != '\r');
83
- break;
84
- default:
85
- *p++ = c - 42;
86
- }
87
- i++;
88
- }
89
-
90
- // do final char; we process this separately to prevent an overflow if the final char is '='
91
- if(i == -1) {
92
- c = es[i];
93
- if(c != '\n' && c != '\r' && c != '=') {
94
- *p++ = c - 42;
95
- } else if(state) {
96
- if(c == '=') *state = YDEC_STATE_EQ;
97
- else if(c == '\r') *state = YDEC_STATE_CR;
98
- else *state = YDEC_STATE_NONE;
99
- }
100
- }
101
-
102
- } else {
103
-
104
- if(state && *state == YDEC_STATE_EQ) {
105
- *p++ = es[i] - 42 - 64;
106
- i++;
107
- *state = YDEC_STATE_NONE;
108
- }
109
-
110
- /*for(i = 0; i < len - 1; i++) {
111
- c = src[i];
112
- if(c == '\n' || c == '\r') continue;
113
- unsigned char isEquals = (c == '=');
114
- i += isEquals;
115
- *p++ = src[i] - (42 + (isEquals << 6));
116
- }*/
117
- for(; i < -1; i++) {
118
- c = es[i];
119
- switch(c) {
120
- case '\n': case '\r': continue;
121
- case '=':
122
- i++;
123
- c = es[i] - 64;
124
- }
125
- *p++ = c - 42;
126
- }
127
- if(state) *state = YDEC_STATE_NONE;
128
- // do final char; we process this separately to prevent an overflow if the final char is '='
129
- if(i == -1) {
130
- c = es[i];
131
- if(c != '\n' && c != '\r' && c != '=') {
132
- *p++ = c - 42;
133
- } else
134
- if(state) *state = (c == '=' ? YDEC_STATE_EQ : YDEC_STATE_NONE);
135
- }
136
-
137
- }
138
-
139
- return p - dest;
13
+ template<bool isRaw, bool searchEnd>
14
+ YencDecoderEnd do_decode_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state);
140
15
  }
141
16
 
142
- template<bool isRaw>
143
- YencDecoderEnd do_decode_end_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) {
144
- const unsigned char *es = (*src) + len; // end source pointer
145
- unsigned char *p = *dest; // destination pointer
146
- long i = -(long)len; // input position
147
- unsigned char c; // input character
148
-
149
- if(len < 1) return YDEC_END_NONE;
150
-
151
- #define YDEC_CHECK_END(s) if(i == 0) { \
152
- *state = s; \
153
- *src = es; \
154
- *dest = p; \
155
- return YDEC_END_NONE; \
156
- }
157
- if(state) switch(*state) {
158
- case YDEC_STATE_CRLFEQ: do_decode_endable_scalar_ceq:
159
- if(es[i] == 'y') {
160
- *state = YDEC_STATE_NONE;
161
- *src = es+i+1;
162
- *dest = p;
163
- return YDEC_END_CONTROL;
164
- } // Else fall-thru
165
- case YDEC_STATE_EQ:
166
- c = es[i];
167
- *p++ = c - 42 - 64;
168
- i++;
169
- if(c != '\r') break;
170
- YDEC_CHECK_END(YDEC_STATE_CR)
171
- // fall-through
172
- case YDEC_STATE_CR:
173
- if(es[i] != '\n') break;
174
- i++;
175
- YDEC_CHECK_END(YDEC_STATE_CRLF)
176
- // fall-through
177
- case YDEC_STATE_CRLF: do_decode_endable_scalar_c0:
178
- if(es[i] == '.' && isRaw) {
179
- i++;
180
- YDEC_CHECK_END(YDEC_STATE_CRLFDT)
181
- // fall-through
182
- } else if(es[i] == '=') {
183
- i++;
184
- YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
185
- goto do_decode_endable_scalar_ceq;
186
- } else
187
- break;
188
- case YDEC_STATE_CRLFDT:
189
- if(isRaw && es[i] == '\r') {
190
- i++;
191
- YDEC_CHECK_END(YDEC_STATE_CRLFDTCR)
192
- // fall-through
193
- } else if(isRaw && es[i] == '=') { // check for dot-stuffed ending: \r\n.=y
194
- i++;
195
- YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
196
- goto do_decode_endable_scalar_ceq;
197
- } else
198
- break;
199
- case YDEC_STATE_CRLFDTCR:
200
- if(es[i] == '\n') {
201
- if(isRaw) {
202
- *state = YDEC_STATE_CRLF;
203
- *src = es + i + 1;
204
- *dest = p;
205
- return YDEC_END_ARTICLE;
206
- } else {
207
- i++;
208
- YDEC_CHECK_END(YDEC_STATE_CRLF)
209
- goto do_decode_endable_scalar_c0; // handle as CRLF
210
- }
211
- } else
212
- break;
213
- case YDEC_STATE_NONE: break; // silence compiler warning
214
- } else // treat as YDEC_STATE_CRLF
215
- goto do_decode_endable_scalar_c0;
216
-
217
- for(; i < -2; i++) {
218
- c = es[i];
219
- switch(c) {
220
- case '\r': if(es[i+1] == '\n') {
221
- if(isRaw && es[i+2] == '.') {
222
- // skip past \r\n. sequences
223
- i += 3;
224
- YDEC_CHECK_END(YDEC_STATE_CRLFDT)
225
- // check for end
226
- if(es[i] == '\r') {
227
- i++;
228
- YDEC_CHECK_END(YDEC_STATE_CRLFDTCR)
229
- if(es[i] == '\n') {
230
- *src = es + i + 1;
231
- *dest = p;
232
- *state = YDEC_STATE_CRLF;
233
- return YDEC_END_ARTICLE;
234
- } else i--;
235
- } else if(es[i] == '=') {
236
- i++;
237
- YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
238
- if(es[i] == 'y') {
239
- *src = es + i + 1;
240
- *dest = p;
241
- *state = YDEC_STATE_NONE;
242
- return YDEC_END_CONTROL;
243
- } else {
244
- // escape char & continue
245
- c = es[i];
246
- *p++ = c - 42 - 64;
247
- i -= (c == '\r');
248
- }
249
- } else i--;
250
- }
251
- else if(es[i+2] == '=') {
252
- i += 3;
253
- YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
254
- if(es[i] == 'y') {
255
- // ended
256
- *src = es + i + 1;
257
- *dest = p;
258
- *state = YDEC_STATE_NONE;
259
- return YDEC_END_CONTROL;
260
- } else {
261
- // escape char & continue
262
- c = es[i];
263
- *p++ = c - 42 - 64;
264
- i -= (c == '\r');
265
- }
266
- }
267
- } // fall-thru
268
- case '\n':
269
- continue;
270
- case '=':
271
- c = es[i+1];
272
- *p++ = c - 42 - 64;
273
- i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case
274
- continue;
275
- default:
276
- *p++ = c - 42;
277
- }
278
- }
279
- if(state) *state = YDEC_STATE_NONE;
280
-
281
- if(i == -2) { // 2nd last char
282
- c = es[i];
283
- switch(c) {
284
- case '\r':
285
- if(state && es[i+1] == '\n') {
286
- *state = YDEC_STATE_CRLF;
287
- *src = es;
288
- *dest = p;
289
- return YDEC_END_NONE;
290
- }
291
- // Else fall-thru
292
- case '\n':
293
- break;
294
- case '=':
295
- c = es[i+1];
296
- *p++ = c - 42 - 64;
297
- i += (c != '\r');
298
- break;
299
- default:
300
- *p++ = c - 42;
301
- }
302
- i++;
303
- }
304
-
305
- // do final char; we process this separately to prevent an overflow if the final char is '='
306
- if(i == -1) {
307
- c = es[i];
308
- if(c != '\n' && c != '\r' && c != '=') {
309
- *p++ = c - 42;
310
- } else if(state) {
311
- if(c == '=') *state = YDEC_STATE_EQ;
312
- else if(c == '\r') *state = YDEC_STATE_CR;
313
- else *state = YDEC_STATE_NONE;
314
- }
315
- }
316
- #undef YDEC_CHECK_END
317
-
318
- *src = es;
319
- *dest = p;
320
- return YDEC_END_NONE;
321
- }
322
17
 
323
- template<bool isRaw, bool searchEnd>
324
- YencDecoderEnd do_decode_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) {
325
- if(searchEnd)
326
- return do_decode_end_scalar<isRaw>(src, dest, len, state);
327
- *dest += do_decode_noend_scalar<isRaw>(*src, *dest, len, state);
328
- *src += len;
329
- return YDEC_END_NONE;
330
- }
18
+ #if defined(PLATFORM_ARM) && !defined(__aarch64__)
19
+ #define YENC_DEC_USE_THINTABLE 1
20
+ #endif
331
21
 
22
+ // TODO: need to support max output length somehow
332
23
 
333
24
 
334
- template<bool isRaw, bool searchEnd, int width, void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
335
- YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) {
25
+
26
+ template<bool isRaw, bool searchEnd, void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
27
+ static inline RapidYenc::YencDecoderEnd _do_decode_simd(size_t width, const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
28
+ using namespace RapidYenc;
29
+
336
30
  if(len <= width*2) return do_decode_scalar<isRaw, searchEnd>(src, dest, len, state);
337
31
 
338
32
  YencDecoderState tState = YDEC_STATE_CRLF;
@@ -461,54 +155,22 @@ YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, s
461
155
  return YDEC_END_NONE;
462
156
  }
463
157
 
464
- static inline void decoder_init_lut(uint8_t* eqFixLUT, void* compactLUT) {
465
- for(int i=0; i<256; i++) {
466
- int k = i;
467
- int p = 0;
468
-
469
- // fix LUT
470
- k = i;
471
- p = 0;
472
- for(int j=0; j<8; j++) {
473
- k = i >> j;
474
- if(k & 1) {
475
- p |= 1 << j;
476
- j++;
477
- }
478
- }
479
- eqFixLUT[i] = p;
480
-
481
- #ifdef YENC_DEC_USE_THINTABLE
482
- uint8_t* res = (uint8_t*)compactLUT + i*8;
483
- k = i;
484
- p = 0;
485
- for(int j=0; j<8; j++) {
486
- if(!(k & 1)) {
487
- res[p++] = j;
488
- }
489
- k >>= 1;
490
- }
491
- for(; p<8; p++)
492
- res[p] = 0x80;
493
- #endif
494
- }
495
- #ifndef YENC_DEC_USE_THINTABLE
496
- for(int i=0; i<32768; i++) {
497
- int k = i;
498
- uint8_t* res = (uint8_t*)compactLUT + i*16;
499
- int p = 0;
500
-
501
- for(int j=0; j<16; j++) {
502
- if(!(k & 1)) {
503
- res[p++] = j;
504
- }
505
- k >>= 1;
506
- }
507
- for(; p<16; p++)
508
- res[p] = 0x80;
509
- }
510
- #endif
158
+ template<bool isRaw, bool searchEnd, size_t width, void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
159
+ static RapidYenc::YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
160
+ return _do_decode_simd<isRaw, searchEnd, kernel>(width, src, dest, len, state);
511
161
  }
162
+ template<bool isRaw, bool searchEnd, size_t(&getWidth)(), void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
163
+ static RapidYenc::YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
164
+ return _do_decode_simd<isRaw, searchEnd, kernel>(getWidth(), src, dest, len, state);
165
+ }
166
+
167
+
168
+ #if defined(PLATFORM_X86) || defined(PLATFORM_ARM)
169
+ namespace RapidYenc {
170
+ void decoder_init_lut(void* compactLUT);
171
+ }
172
+ #endif
173
+
512
174
  template<bool isRaw>
513
175
  static inline void decoder_set_nextMask(const uint8_t* src, size_t len, uint16_t& nextMask) {
514
176
  if(isRaw) {
@@ -535,3 +197,24 @@ static inline uint16_t decoder_set_nextMask(const uint8_t* src, unsigned mask) {
535
197
  }
536
198
  return 0;
537
199
  }
200
+
201
+ // resolve invalid sequences of = to deal with cases like '===='
202
+ // bit hack inspired from simdjson: https://youtu.be/wlvKAT7SZIQ?t=33m38s
203
+ template<typename T>
204
+ static inline T fix_eqMask(T mask, T maskShift1) {
205
+ // isolate the start of each consecutive bit group (e.g. 01011101 -> 01000101)
206
+ T start = mask & ~maskShift1;
207
+
208
+ // this strategy works by firstly separating groups that start on even/odd bits
209
+ // generally, it doesn't matter which one (even/odd) we pick, but clearing even groups specifically allows the escFirst bit in maskShift1 to work
210
+ // (this is because the start of the escFirst group is at index -1, an odd bit, but we can't clear it due to being < 0, so we just retain all odd groups instead)
211
+
212
+ const T even = (T)0x5555555555555555; // every even bit (01010101...)
213
+
214
+ // obtain groups which start on an odd bit (clear groups that start on an even bit, but this leaves an unwanted trailing bit)
215
+ T oddGroups = mask + (start & even);
216
+
217
+ // clear even bits in odd groups, whilst conversely preserving even bits in even groups
218
+ // the `& mask` also conveniently gets rid of unwanted trailing bits
219
+ return (oddGroups ^ even) & mask;
220
+ }
@@ -1,11 +1,8 @@
1
1
  #include "common.h"
2
- #ifdef __ARM_NEON
3
-
4
- #ifndef __aarch64__
5
- #define YENC_DEC_USE_THINTABLE 1
6
- #endif
7
2
  #include "decoder_common.h"
8
3
 
4
+ #ifdef __ARM_NEON
5
+
9
6
 
10
7
  #if defined(_MSC_VER) && !defined(__clang__)
11
8
  # define vld1_u8_align(p, a) vld1_u8_ex(p, a*8)
@@ -43,8 +40,6 @@ static struct { char bytes[16]; } ALIGN_TO(16, compactLUT[32768]);
43
40
  # pragma pack()
44
41
  #endif
45
42
 
46
- static uint8_t eqFixLUT[256];
47
-
48
43
 
49
44
 
50
45
  static bool neon_vect_is_nonzero(uint8x16_t v) {
@@ -58,6 +53,8 @@ static bool neon_vect_is_nonzero(uint8x16_t v) {
58
53
  }
59
54
 
60
55
 
56
+ namespace RapidYenc {
57
+
61
58
  template<bool isRaw, bool searchEnd>
62
59
  HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned char*& p, unsigned char& escFirst, uint16_t& nextMask) {
63
60
  HEDLEY_ASSUME(escFirst == 0 || escFirst == 1);
@@ -327,19 +324,15 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
327
324
  // a spec compliant encoder should never generate sequences: ==, =\n and =\r, but we'll handle them to be spec compliant
328
325
  // the yEnc specification requires any character following = to be unescaped, not skipped over, so we'll deal with that
329
326
  // firstly, check for invalid sequences of = (we assume that these are rare, as a spec compliant yEnc encoder should not generate these)
330
- if(LIKELIHOOD(0.0001, (mask & ((maskEq << 1) | escFirst)) != 0)) {
331
- uint8_t tmp = eqFixLUT[(maskEq&0xff) & ~escFirst];
332
- uint32_t maskEq2 = tmp;
333
- for(int j=8; j<32; j+=8) {
334
- tmp = eqFixLUT[((maskEq>>j)&0xff) & ~(tmp>>7)];
335
- maskEq2 |= tmp<<j;
336
- }
337
- maskEq = maskEq2;
327
+ uint32_t maskEqShift1 = (maskEq << 1) | escFirst;
328
+ if(LIKELIHOOD(0.0001, (mask & maskEqShift1) != 0)) {
329
+ maskEq = fix_eqMask<uint32_t>(maskEq, maskEqShift1);
338
330
 
331
+ unsigned char nextEscFirst = maskEq>>31;
339
332
  // next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed
340
333
  maskEq = (maskEq<<1) | escFirst;
341
334
  mask &= ~maskEq;
342
- escFirst = tmp>>7;
335
+ escFirst = nextEscFirst;
343
336
 
344
337
  // unescape chars following `=`
345
338
  uint8x8_t maskEqTemp = vreinterpret_u8_u32(vmov_n_u32(maskEq));
@@ -455,14 +448,15 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
455
448
  }
456
449
  }
457
450
  }
451
+ } // namespace
458
452
 
459
- void decoder_set_neon_funcs() {
460
- decoder_init_lut(eqFixLUT, compactLUT);
453
+ void RapidYenc::decoder_set_neon_funcs() {
454
+ decoder_init_lut(compactLUT);
461
455
  _do_decode = &do_decode_simd<false, false, sizeof(uint8x16_t)*2, do_decode_neon<false, false> >;
462
456
  _do_decode_raw = &do_decode_simd<true, false, sizeof(uint8x16_t)*2, do_decode_neon<true, false> >;
463
457
  _do_decode_end_raw = &do_decode_simd<true, true, sizeof(uint8x16_t)*2, do_decode_neon<true, true> >;
464
458
  _decode_isa = ISA_LEVEL_NEON;
465
459
  }
466
460
  #else
467
- void decoder_set_neon_funcs() {}
461
+ void RapidYenc::decoder_set_neon_funcs() {}
468
462
  #endif