yencode 1.1.5 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +130 -189
- package/binding.gyp +115 -6
- package/index.js +2 -0
- package/package.json +1 -1
- package/src/common.h +37 -7
- package/src/crc.cc +121 -47
- package/src/crc.h +74 -10
- package/src/crc_arm.cc +51 -34
- package/src/crc_arm_pmull.cc +215 -0
- package/src/crc_common.h +22 -0
- package/src/crc_folding.cc +154 -16
- package/src/crc_folding_256.cc +7 -14
- package/src/crc_riscv.cc +251 -0
- package/src/decoder.cc +373 -13
- package/src/decoder.h +10 -14
- package/src/decoder_avx.cc +5 -6
- package/src/decoder_avx2.cc +8 -9
- package/src/decoder_avx2_base.h +7 -11
- package/src/decoder_common.h +56 -373
- package/src/decoder_neon.cc +13 -19
- package/src/decoder_neon64.cc +12 -15
- package/src/decoder_rvv.cc +280 -0
- package/src/decoder_sse2.cc +26 -5
- package/src/decoder_sse_base.h +20 -40
- package/src/decoder_ssse3.cc +5 -6
- package/src/decoder_vbmi2.cc +6 -13
- package/src/encoder.cc +42 -26
- package/src/encoder.h +5 -7
- package/src/encoder_avx.cc +3 -3
- package/src/encoder_avx2.cc +3 -3
- package/src/encoder_avx_base.h +3 -0
- package/src/encoder_common.h +26 -32
- package/src/encoder_neon.cc +6 -3
- package/src/encoder_rvv.cc +13 -26
- package/src/encoder_sse2.cc +3 -2
- package/src/encoder_sse_base.h +2 -0
- package/src/encoder_ssse3.cc +3 -3
- package/src/encoder_vbmi2.cc +6 -7
- package/src/platform.cc +24 -23
- package/src/yencode.cc +54 -11
- package/test/_speedbase.js +4 -2
- package/test/speeddec.js +25 -16
- package/test/speedenc.js +21 -17
- package/test/testcrc.js +17 -1
- package/test/testcrcfuncs.c +53 -0
- package/test/testdec.js +1 -0
package/src/decoder.h
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
#ifndef __YENC_DECODER_H
|
|
2
2
|
#define __YENC_DECODER_H
|
|
3
3
|
|
|
4
|
-
#
|
|
5
|
-
extern "C" {
|
|
6
|
-
#endif
|
|
4
|
+
#include "hedley.h"
|
|
7
5
|
|
|
6
|
+
namespace RapidYenc {
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
// the last state that the decoder was in (i.e. last few characters processed)
|
|
@@ -27,21 +26,20 @@ typedef enum {
|
|
|
27
26
|
YDEC_END_ARTICLE // \r\n.\r\n sequence found, src points to byte after last '\n'
|
|
28
27
|
} YencDecoderEnd;
|
|
29
28
|
|
|
30
|
-
#include "hedley.h"
|
|
31
29
|
|
|
32
30
|
extern YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
|
|
33
31
|
extern YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
|
|
34
32
|
extern YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
|
|
35
33
|
extern int _decode_isa;
|
|
36
34
|
|
|
37
|
-
static inline size_t
|
|
38
|
-
unsigned char* ds = dest;
|
|
39
|
-
(*(isRaw ? _do_decode_raw : _do_decode))(&src, &ds, len, state);
|
|
40
|
-
return ds - dest;
|
|
35
|
+
static inline size_t decode(int isRaw, const void* src, void* dest, size_t len, YencDecoderState* state) {
|
|
36
|
+
unsigned char* ds = (unsigned char*)dest;
|
|
37
|
+
(*(isRaw ? _do_decode_raw : _do_decode))((const unsigned char**)&src, &ds, len, state);
|
|
38
|
+
return ds - (unsigned char*)dest;
|
|
41
39
|
}
|
|
42
40
|
|
|
43
|
-
static inline YencDecoderEnd
|
|
44
|
-
return _do_decode_end_raw(src, dest, len, state);
|
|
41
|
+
static inline YencDecoderEnd decode_end(const void** src, void** dest, size_t len, YencDecoderState* state) {
|
|
42
|
+
return _do_decode_end_raw((const unsigned char**)src, (unsigned char**)dest, len, state);
|
|
45
43
|
}
|
|
46
44
|
|
|
47
45
|
void decoder_init();
|
|
@@ -51,7 +49,5 @@ static inline int decode_isa_level() {
|
|
|
51
49
|
}
|
|
52
50
|
|
|
53
51
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
#endif
|
|
57
|
-
#endif
|
|
52
|
+
} // namespace
|
|
53
|
+
#endif // defined(__YENC_DECODER_H)
|
package/src/decoder_avx.cc
CHANGED
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
#include "common.h"
|
|
2
2
|
|
|
3
|
-
#if defined(__AVX__) && defined(__POPCNT__)
|
|
4
3
|
#include "decoder_common.h"
|
|
4
|
+
#if defined(__AVX__) && defined(__POPCNT__)
|
|
5
5
|
#include "decoder_sse_base.h"
|
|
6
|
-
void decoder_set_avx_funcs() {
|
|
7
|
-
decoder_sse_init();
|
|
8
|
-
decoder_init_lut(lookups->
|
|
6
|
+
void RapidYenc::decoder_set_avx_funcs() {
|
|
7
|
+
decoder_sse_init(lookups);
|
|
8
|
+
decoder_init_lut(lookups->compact);
|
|
9
9
|
_do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSE4_POPCNT> >;
|
|
10
10
|
_do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSE4_POPCNT> >;
|
|
11
11
|
_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSE4_POPCNT> >;
|
|
12
12
|
_decode_isa = ISA_LEVEL_AVX;
|
|
13
13
|
}
|
|
14
14
|
#else
|
|
15
|
-
void
|
|
16
|
-
void decoder_set_avx_funcs() {
|
|
15
|
+
void RapidYenc::decoder_set_avx_funcs() {
|
|
17
16
|
decoder_set_ssse3_funcs();
|
|
18
17
|
}
|
|
19
18
|
#endif
|
package/src/decoder_avx2.cc
CHANGED
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
#include "common.h"
|
|
2
2
|
|
|
3
|
-
#if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
|
|
4
3
|
#include "decoder_common.h"
|
|
4
|
+
#if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
|
|
5
5
|
#include "decoder_avx2_base.h"
|
|
6
|
-
void decoder_set_avx2_funcs() {
|
|
6
|
+
void RapidYenc::decoder_set_avx2_funcs() {
|
|
7
7
|
ALIGN_ALLOC(lookups, sizeof(*lookups), 16);
|
|
8
|
-
decoder_init_lut(lookups->
|
|
9
|
-
_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
|
|
10
|
-
_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
|
|
11
|
-
_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
|
|
12
|
-
_decode_isa = ISA_LEVEL_AVX2;
|
|
8
|
+
decoder_init_lut(lookups->compact);
|
|
9
|
+
RapidYenc::_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
|
|
10
|
+
RapidYenc::_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
|
|
11
|
+
RapidYenc::_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
|
|
12
|
+
RapidYenc::_decode_isa = ISA_LEVEL_AVX2;
|
|
13
13
|
}
|
|
14
14
|
#else
|
|
15
|
-
void
|
|
16
|
-
void decoder_set_avx2_funcs() {
|
|
15
|
+
void RapidYenc::decoder_set_avx2_funcs() {
|
|
17
16
|
decoder_set_avx_funcs();
|
|
18
17
|
}
|
|
19
18
|
#endif
|
package/src/decoder_avx2_base.h
CHANGED
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
#pragma pack(16)
|
|
16
16
|
static struct {
|
|
17
17
|
/*align16*/ struct { char bytes[16]; } compact[32768];
|
|
18
|
-
uint8_t eqFix[256];
|
|
19
18
|
} * HEDLEY_RESTRICT lookups;
|
|
20
19
|
#pragma pack()
|
|
21
20
|
|
|
@@ -50,6 +49,8 @@ static HEDLEY_ALWAYS_INLINE __m256i force_align_read_256(const void* p) {
|
|
|
50
49
|
# define COMPRESS_STORE(dst, mask, vec) _mm256_storeu_si256((__m256i*)(dst), _mm256_maskz_compress_epi8(mask, vec))
|
|
51
50
|
#endif
|
|
52
51
|
|
|
52
|
+
namespace RapidYenc {
|
|
53
|
+
|
|
53
54
|
template<bool isRaw, bool searchEnd, enum YEncDecIsaLevel use_isa>
|
|
54
55
|
HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned char*& p, unsigned char& _escFirst, uint16_t& _nextMask) {
|
|
55
56
|
HEDLEY_ASSUME(_escFirst == 0 || _escFirst == 1);
|
|
@@ -430,17 +431,11 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
|
|
|
430
431
|
if(use_isa >= ISA_LEVEL_AVX3)
|
|
431
432
|
dataB = _mm256_add_epi8(oDataB, _mm256_set1_epi8(-42));
|
|
432
433
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
for(int j=8; j<64; j+=8) {
|
|
437
|
-
tmp = lookups->eqFix[(unsigned)((maskEq>>j)&0xff) & ~(tmp>>7)];
|
|
438
|
-
maskEq2 |= (uint64_t)tmp<<j;
|
|
439
|
-
}
|
|
440
|
-
maskEq = maskEq2;
|
|
441
|
-
|
|
434
|
+
uint64_t maskEqShift1 = (maskEq << 1) + escFirst;
|
|
435
|
+
if(LIKELIHOOD(0.0001, (mask & maskEqShift1) != 0)) {
|
|
436
|
+
maskEq = fix_eqMask<uint64_t>(maskEq, maskEqShift1);
|
|
442
437
|
mask &= ~(uint64_t)escFirst;
|
|
443
|
-
escFirst =
|
|
438
|
+
escFirst = maskEq>>63;
|
|
444
439
|
// next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed
|
|
445
440
|
maskEq <<= 1;
|
|
446
441
|
mask &= ~maskEq;
|
|
@@ -619,4 +614,5 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
|
|
|
619
614
|
_escFirst = (unsigned char)escFirst;
|
|
620
615
|
_mm256_zeroupper();
|
|
621
616
|
}
|
|
617
|
+
} // namespace
|
|
622
618
|
#endif
|
package/src/decoder_common.h
CHANGED
|
@@ -1,338 +1,32 @@
|
|
|
1
1
|
#include "decoder.h"
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
long i = -(long)len; // input position
|
|
13
|
-
unsigned char c; // input character
|
|
14
|
-
|
|
15
|
-
if(len < 1) return 0;
|
|
3
|
+
namespace RapidYenc {
|
|
4
|
+
void decoder_set_sse2_funcs();
|
|
5
|
+
void decoder_set_ssse3_funcs();
|
|
6
|
+
void decoder_set_avx_funcs();
|
|
7
|
+
void decoder_set_avx2_funcs();
|
|
8
|
+
void decoder_set_vbmi2_funcs();
|
|
9
|
+
extern const bool decoder_has_avx10;
|
|
10
|
+
void decoder_set_neon_funcs();
|
|
11
|
+
void decoder_set_rvv_funcs();
|
|
16
12
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
if(state) switch(*state) {
|
|
20
|
-
case YDEC_STATE_EQ:
|
|
21
|
-
c = es[i];
|
|
22
|
-
*p++ = c - 42 - 64;
|
|
23
|
-
i++;
|
|
24
|
-
if(c == '\r') {
|
|
25
|
-
*state = YDEC_STATE_CR;
|
|
26
|
-
if(i >= 0) return 0;
|
|
27
|
-
} else {
|
|
28
|
-
*state = YDEC_STATE_NONE;
|
|
29
|
-
break;
|
|
30
|
-
}
|
|
31
|
-
// fall-thru
|
|
32
|
-
case YDEC_STATE_CR:
|
|
33
|
-
if(es[i] != '\n') break;
|
|
34
|
-
i++;
|
|
35
|
-
*state = YDEC_STATE_CRLF;
|
|
36
|
-
if(i >= 0) return 0;
|
|
37
|
-
// Else fall-thru
|
|
38
|
-
case YDEC_STATE_CRLF:
|
|
39
|
-
// skip past first dot
|
|
40
|
-
if(es[i] == '.') i++;
|
|
41
|
-
// fall-thru
|
|
42
|
-
default: break; // silence compiler warnings
|
|
43
|
-
} else // treat as YDEC_STATE_CRLF
|
|
44
|
-
if(es[i] == '.') i++;
|
|
45
|
-
|
|
46
|
-
for(; i < -2; i++) {
|
|
47
|
-
c = es[i];
|
|
48
|
-
switch(c) {
|
|
49
|
-
case '\r':
|
|
50
|
-
// skip past \r\n. sequences
|
|
51
|
-
//i += (es[i+1] == '\n' && es[i+2] == '.') << 1;
|
|
52
|
-
if(es[i+1] == '\n' && es[i+2] == '.')
|
|
53
|
-
i += 2;
|
|
54
|
-
// fall-thru
|
|
55
|
-
case '\n':
|
|
56
|
-
continue;
|
|
57
|
-
case '=':
|
|
58
|
-
c = es[i+1];
|
|
59
|
-
*p++ = c - 42 - 64;
|
|
60
|
-
i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case
|
|
61
|
-
continue;
|
|
62
|
-
default:
|
|
63
|
-
*p++ = c - 42;
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
if(state) *state = YDEC_STATE_NONE;
|
|
67
|
-
|
|
68
|
-
if(i == -2) { // 2nd last char
|
|
69
|
-
c = es[i];
|
|
70
|
-
switch(c) {
|
|
71
|
-
case '\r':
|
|
72
|
-
if(state && es[i+1] == '\n') {
|
|
73
|
-
*state = YDEC_STATE_CRLF;
|
|
74
|
-
return p - dest;
|
|
75
|
-
}
|
|
76
|
-
// Else fall-thru
|
|
77
|
-
case '\n':
|
|
78
|
-
break;
|
|
79
|
-
case '=':
|
|
80
|
-
c = es[i+1];
|
|
81
|
-
*p++ = c - 42 - 64;
|
|
82
|
-
i += (c != '\r');
|
|
83
|
-
break;
|
|
84
|
-
default:
|
|
85
|
-
*p++ = c - 42;
|
|
86
|
-
}
|
|
87
|
-
i++;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// do final char; we process this separately to prevent an overflow if the final char is '='
|
|
91
|
-
if(i == -1) {
|
|
92
|
-
c = es[i];
|
|
93
|
-
if(c != '\n' && c != '\r' && c != '=') {
|
|
94
|
-
*p++ = c - 42;
|
|
95
|
-
} else if(state) {
|
|
96
|
-
if(c == '=') *state = YDEC_STATE_EQ;
|
|
97
|
-
else if(c == '\r') *state = YDEC_STATE_CR;
|
|
98
|
-
else *state = YDEC_STATE_NONE;
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
} else {
|
|
103
|
-
|
|
104
|
-
if(state && *state == YDEC_STATE_EQ) {
|
|
105
|
-
*p++ = es[i] - 42 - 64;
|
|
106
|
-
i++;
|
|
107
|
-
*state = YDEC_STATE_NONE;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
/*for(i = 0; i < len - 1; i++) {
|
|
111
|
-
c = src[i];
|
|
112
|
-
if(c == '\n' || c == '\r') continue;
|
|
113
|
-
unsigned char isEquals = (c == '=');
|
|
114
|
-
i += isEquals;
|
|
115
|
-
*p++ = src[i] - (42 + (isEquals << 6));
|
|
116
|
-
}*/
|
|
117
|
-
for(; i < -1; i++) {
|
|
118
|
-
c = es[i];
|
|
119
|
-
switch(c) {
|
|
120
|
-
case '\n': case '\r': continue;
|
|
121
|
-
case '=':
|
|
122
|
-
i++;
|
|
123
|
-
c = es[i] - 64;
|
|
124
|
-
}
|
|
125
|
-
*p++ = c - 42;
|
|
126
|
-
}
|
|
127
|
-
if(state) *state = YDEC_STATE_NONE;
|
|
128
|
-
// do final char; we process this separately to prevent an overflow if the final char is '='
|
|
129
|
-
if(i == -1) {
|
|
130
|
-
c = es[i];
|
|
131
|
-
if(c != '\n' && c != '\r' && c != '=') {
|
|
132
|
-
*p++ = c - 42;
|
|
133
|
-
} else
|
|
134
|
-
if(state) *state = (c == '=' ? YDEC_STATE_EQ : YDEC_STATE_NONE);
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
return p - dest;
|
|
13
|
+
template<bool isRaw, bool searchEnd>
|
|
14
|
+
YencDecoderEnd do_decode_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state);
|
|
140
15
|
}
|
|
141
16
|
|
|
142
|
-
template<bool isRaw>
|
|
143
|
-
YencDecoderEnd do_decode_end_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) {
|
|
144
|
-
const unsigned char *es = (*src) + len; // end source pointer
|
|
145
|
-
unsigned char *p = *dest; // destination pointer
|
|
146
|
-
long i = -(long)len; // input position
|
|
147
|
-
unsigned char c; // input character
|
|
148
|
-
|
|
149
|
-
if(len < 1) return YDEC_END_NONE;
|
|
150
|
-
|
|
151
|
-
#define YDEC_CHECK_END(s) if(i == 0) { \
|
|
152
|
-
*state = s; \
|
|
153
|
-
*src = es; \
|
|
154
|
-
*dest = p; \
|
|
155
|
-
return YDEC_END_NONE; \
|
|
156
|
-
}
|
|
157
|
-
if(state) switch(*state) {
|
|
158
|
-
case YDEC_STATE_CRLFEQ: do_decode_endable_scalar_ceq:
|
|
159
|
-
if(es[i] == 'y') {
|
|
160
|
-
*state = YDEC_STATE_NONE;
|
|
161
|
-
*src = es+i+1;
|
|
162
|
-
*dest = p;
|
|
163
|
-
return YDEC_END_CONTROL;
|
|
164
|
-
} // Else fall-thru
|
|
165
|
-
case YDEC_STATE_EQ:
|
|
166
|
-
c = es[i];
|
|
167
|
-
*p++ = c - 42 - 64;
|
|
168
|
-
i++;
|
|
169
|
-
if(c != '\r') break;
|
|
170
|
-
YDEC_CHECK_END(YDEC_STATE_CR)
|
|
171
|
-
// fall-through
|
|
172
|
-
case YDEC_STATE_CR:
|
|
173
|
-
if(es[i] != '\n') break;
|
|
174
|
-
i++;
|
|
175
|
-
YDEC_CHECK_END(YDEC_STATE_CRLF)
|
|
176
|
-
// fall-through
|
|
177
|
-
case YDEC_STATE_CRLF: do_decode_endable_scalar_c0:
|
|
178
|
-
if(es[i] == '.' && isRaw) {
|
|
179
|
-
i++;
|
|
180
|
-
YDEC_CHECK_END(YDEC_STATE_CRLFDT)
|
|
181
|
-
// fall-through
|
|
182
|
-
} else if(es[i] == '=') {
|
|
183
|
-
i++;
|
|
184
|
-
YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
|
|
185
|
-
goto do_decode_endable_scalar_ceq;
|
|
186
|
-
} else
|
|
187
|
-
break;
|
|
188
|
-
case YDEC_STATE_CRLFDT:
|
|
189
|
-
if(isRaw && es[i] == '\r') {
|
|
190
|
-
i++;
|
|
191
|
-
YDEC_CHECK_END(YDEC_STATE_CRLFDTCR)
|
|
192
|
-
// fall-through
|
|
193
|
-
} else if(isRaw && es[i] == '=') { // check for dot-stuffed ending: \r\n.=y
|
|
194
|
-
i++;
|
|
195
|
-
YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
|
|
196
|
-
goto do_decode_endable_scalar_ceq;
|
|
197
|
-
} else
|
|
198
|
-
break;
|
|
199
|
-
case YDEC_STATE_CRLFDTCR:
|
|
200
|
-
if(es[i] == '\n') {
|
|
201
|
-
if(isRaw) {
|
|
202
|
-
*state = YDEC_STATE_CRLF;
|
|
203
|
-
*src = es + i + 1;
|
|
204
|
-
*dest = p;
|
|
205
|
-
return YDEC_END_ARTICLE;
|
|
206
|
-
} else {
|
|
207
|
-
i++;
|
|
208
|
-
YDEC_CHECK_END(YDEC_STATE_CRLF)
|
|
209
|
-
goto do_decode_endable_scalar_c0; // handle as CRLF
|
|
210
|
-
}
|
|
211
|
-
} else
|
|
212
|
-
break;
|
|
213
|
-
case YDEC_STATE_NONE: break; // silence compiler warning
|
|
214
|
-
} else // treat as YDEC_STATE_CRLF
|
|
215
|
-
goto do_decode_endable_scalar_c0;
|
|
216
|
-
|
|
217
|
-
for(; i < -2; i++) {
|
|
218
|
-
c = es[i];
|
|
219
|
-
switch(c) {
|
|
220
|
-
case '\r': if(es[i+1] == '\n') {
|
|
221
|
-
if(isRaw && es[i+2] == '.') {
|
|
222
|
-
// skip past \r\n. sequences
|
|
223
|
-
i += 3;
|
|
224
|
-
YDEC_CHECK_END(YDEC_STATE_CRLFDT)
|
|
225
|
-
// check for end
|
|
226
|
-
if(es[i] == '\r') {
|
|
227
|
-
i++;
|
|
228
|
-
YDEC_CHECK_END(YDEC_STATE_CRLFDTCR)
|
|
229
|
-
if(es[i] == '\n') {
|
|
230
|
-
*src = es + i + 1;
|
|
231
|
-
*dest = p;
|
|
232
|
-
*state = YDEC_STATE_CRLF;
|
|
233
|
-
return YDEC_END_ARTICLE;
|
|
234
|
-
} else i--;
|
|
235
|
-
} else if(es[i] == '=') {
|
|
236
|
-
i++;
|
|
237
|
-
YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
|
|
238
|
-
if(es[i] == 'y') {
|
|
239
|
-
*src = es + i + 1;
|
|
240
|
-
*dest = p;
|
|
241
|
-
*state = YDEC_STATE_NONE;
|
|
242
|
-
return YDEC_END_CONTROL;
|
|
243
|
-
} else {
|
|
244
|
-
// escape char & continue
|
|
245
|
-
c = es[i];
|
|
246
|
-
*p++ = c - 42 - 64;
|
|
247
|
-
i -= (c == '\r');
|
|
248
|
-
}
|
|
249
|
-
} else i--;
|
|
250
|
-
}
|
|
251
|
-
else if(es[i+2] == '=') {
|
|
252
|
-
i += 3;
|
|
253
|
-
YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
|
|
254
|
-
if(es[i] == 'y') {
|
|
255
|
-
// ended
|
|
256
|
-
*src = es + i + 1;
|
|
257
|
-
*dest = p;
|
|
258
|
-
*state = YDEC_STATE_NONE;
|
|
259
|
-
return YDEC_END_CONTROL;
|
|
260
|
-
} else {
|
|
261
|
-
// escape char & continue
|
|
262
|
-
c = es[i];
|
|
263
|
-
*p++ = c - 42 - 64;
|
|
264
|
-
i -= (c == '\r');
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
} // fall-thru
|
|
268
|
-
case '\n':
|
|
269
|
-
continue;
|
|
270
|
-
case '=':
|
|
271
|
-
c = es[i+1];
|
|
272
|
-
*p++ = c - 42 - 64;
|
|
273
|
-
i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case
|
|
274
|
-
continue;
|
|
275
|
-
default:
|
|
276
|
-
*p++ = c - 42;
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
if(state) *state = YDEC_STATE_NONE;
|
|
280
|
-
|
|
281
|
-
if(i == -2) { // 2nd last char
|
|
282
|
-
c = es[i];
|
|
283
|
-
switch(c) {
|
|
284
|
-
case '\r':
|
|
285
|
-
if(state && es[i+1] == '\n') {
|
|
286
|
-
*state = YDEC_STATE_CRLF;
|
|
287
|
-
*src = es;
|
|
288
|
-
*dest = p;
|
|
289
|
-
return YDEC_END_NONE;
|
|
290
|
-
}
|
|
291
|
-
// Else fall-thru
|
|
292
|
-
case '\n':
|
|
293
|
-
break;
|
|
294
|
-
case '=':
|
|
295
|
-
c = es[i+1];
|
|
296
|
-
*p++ = c - 42 - 64;
|
|
297
|
-
i += (c != '\r');
|
|
298
|
-
break;
|
|
299
|
-
default:
|
|
300
|
-
*p++ = c - 42;
|
|
301
|
-
}
|
|
302
|
-
i++;
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
// do final char; we process this separately to prevent an overflow if the final char is '='
|
|
306
|
-
if(i == -1) {
|
|
307
|
-
c = es[i];
|
|
308
|
-
if(c != '\n' && c != '\r' && c != '=') {
|
|
309
|
-
*p++ = c - 42;
|
|
310
|
-
} else if(state) {
|
|
311
|
-
if(c == '=') *state = YDEC_STATE_EQ;
|
|
312
|
-
else if(c == '\r') *state = YDEC_STATE_CR;
|
|
313
|
-
else *state = YDEC_STATE_NONE;
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
#undef YDEC_CHECK_END
|
|
317
|
-
|
|
318
|
-
*src = es;
|
|
319
|
-
*dest = p;
|
|
320
|
-
return YDEC_END_NONE;
|
|
321
|
-
}
|
|
322
17
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
return do_decode_end_scalar<isRaw>(src, dest, len, state);
|
|
327
|
-
*dest += do_decode_noend_scalar<isRaw>(*src, *dest, len, state);
|
|
328
|
-
*src += len;
|
|
329
|
-
return YDEC_END_NONE;
|
|
330
|
-
}
|
|
18
|
+
#if defined(PLATFORM_ARM) && !defined(__aarch64__)
|
|
19
|
+
#define YENC_DEC_USE_THINTABLE 1
|
|
20
|
+
#endif
|
|
331
21
|
|
|
22
|
+
// TODO: need to support max output length somehow
|
|
332
23
|
|
|
333
24
|
|
|
334
|
-
|
|
335
|
-
|
|
25
|
+
|
|
26
|
+
template<bool isRaw, bool searchEnd, void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
|
|
27
|
+
static inline RapidYenc::YencDecoderEnd _do_decode_simd(size_t width, const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
|
|
28
|
+
using namespace RapidYenc;
|
|
29
|
+
|
|
336
30
|
if(len <= width*2) return do_decode_scalar<isRaw, searchEnd>(src, dest, len, state);
|
|
337
31
|
|
|
338
32
|
YencDecoderState tState = YDEC_STATE_CRLF;
|
|
@@ -461,54 +155,22 @@ YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, s
|
|
|
461
155
|
return YDEC_END_NONE;
|
|
462
156
|
}
|
|
463
157
|
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
int p = 0;
|
|
468
|
-
|
|
469
|
-
// fix LUT
|
|
470
|
-
k = i;
|
|
471
|
-
p = 0;
|
|
472
|
-
for(int j=0; j<8; j++) {
|
|
473
|
-
k = i >> j;
|
|
474
|
-
if(k & 1) {
|
|
475
|
-
p |= 1 << j;
|
|
476
|
-
j++;
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
eqFixLUT[i] = p;
|
|
480
|
-
|
|
481
|
-
#ifdef YENC_DEC_USE_THINTABLE
|
|
482
|
-
uint8_t* res = (uint8_t*)compactLUT + i*8;
|
|
483
|
-
k = i;
|
|
484
|
-
p = 0;
|
|
485
|
-
for(int j=0; j<8; j++) {
|
|
486
|
-
if(!(k & 1)) {
|
|
487
|
-
res[p++] = j;
|
|
488
|
-
}
|
|
489
|
-
k >>= 1;
|
|
490
|
-
}
|
|
491
|
-
for(; p<8; p++)
|
|
492
|
-
res[p] = 0x80;
|
|
493
|
-
#endif
|
|
494
|
-
}
|
|
495
|
-
#ifndef YENC_DEC_USE_THINTABLE
|
|
496
|
-
for(int i=0; i<32768; i++) {
|
|
497
|
-
int k = i;
|
|
498
|
-
uint8_t* res = (uint8_t*)compactLUT + i*16;
|
|
499
|
-
int p = 0;
|
|
500
|
-
|
|
501
|
-
for(int j=0; j<16; j++) {
|
|
502
|
-
if(!(k & 1)) {
|
|
503
|
-
res[p++] = j;
|
|
504
|
-
}
|
|
505
|
-
k >>= 1;
|
|
506
|
-
}
|
|
507
|
-
for(; p<16; p++)
|
|
508
|
-
res[p] = 0x80;
|
|
509
|
-
}
|
|
510
|
-
#endif
|
|
158
|
+
template<bool isRaw, bool searchEnd, size_t width, void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
|
|
159
|
+
static RapidYenc::YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
|
|
160
|
+
return _do_decode_simd<isRaw, searchEnd, kernel>(width, src, dest, len, state);
|
|
511
161
|
}
|
|
162
|
+
template<bool isRaw, bool searchEnd, size_t(&getWidth)(), void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
|
|
163
|
+
static RapidYenc::YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
|
|
164
|
+
return _do_decode_simd<isRaw, searchEnd, kernel>(getWidth(), src, dest, len, state);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
#if defined(PLATFORM_X86) || defined(PLATFORM_ARM)
|
|
169
|
+
namespace RapidYenc {
|
|
170
|
+
void decoder_init_lut(void* compactLUT);
|
|
171
|
+
}
|
|
172
|
+
#endif
|
|
173
|
+
|
|
512
174
|
template<bool isRaw>
|
|
513
175
|
static inline void decoder_set_nextMask(const uint8_t* src, size_t len, uint16_t& nextMask) {
|
|
514
176
|
if(isRaw) {
|
|
@@ -535,3 +197,24 @@ static inline uint16_t decoder_set_nextMask(const uint8_t* src, unsigned mask) {
|
|
|
535
197
|
}
|
|
536
198
|
return 0;
|
|
537
199
|
}
|
|
200
|
+
|
|
201
|
+
// resolve invalid sequences of = to deal with cases like '===='
|
|
202
|
+
// bit hack inspired from simdjson: https://youtu.be/wlvKAT7SZIQ?t=33m38s
|
|
203
|
+
template<typename T>
|
|
204
|
+
static inline T fix_eqMask(T mask, T maskShift1) {
|
|
205
|
+
// isolate the start of each consecutive bit group (e.g. 01011101 -> 01000101)
|
|
206
|
+
T start = mask & ~maskShift1;
|
|
207
|
+
|
|
208
|
+
// this strategy works by firstly separating groups that start on even/odd bits
|
|
209
|
+
// generally, it doesn't matter which one (even/odd) we pick, but clearing even groups specifically allows the escFirst bit in maskShift1 to work
|
|
210
|
+
// (this is because the start of the escFirst group is at index -1, an odd bit, but we can't clear it due to being < 0, so we just retain all odd groups instead)
|
|
211
|
+
|
|
212
|
+
const T even = (T)0x5555555555555555; // every even bit (01010101...)
|
|
213
|
+
|
|
214
|
+
// obtain groups which start on an odd bit (clear groups that start on an even bit, but this leaves an unwanted trailing bit)
|
|
215
|
+
T oddGroups = mask + (start & even);
|
|
216
|
+
|
|
217
|
+
// clear even bits in odd groups, whilst conversely preserving even bits in even groups
|
|
218
|
+
// the `& mask` also conveniently gets rid of unwanted trailing bits
|
|
219
|
+
return (oddGroups ^ even) & mask;
|
|
220
|
+
}
|
package/src/decoder_neon.cc
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
1
|
#include "common.h"
|
|
2
|
-
#ifdef __ARM_NEON
|
|
3
|
-
|
|
4
|
-
#ifndef __aarch64__
|
|
5
|
-
#define YENC_DEC_USE_THINTABLE 1
|
|
6
|
-
#endif
|
|
7
2
|
#include "decoder_common.h"
|
|
8
3
|
|
|
4
|
+
#ifdef __ARM_NEON
|
|
5
|
+
|
|
9
6
|
|
|
10
7
|
#if defined(_MSC_VER) && !defined(__clang__)
|
|
11
8
|
# define vld1_u8_align(p, a) vld1_u8_ex(p, a*8)
|
|
@@ -43,8 +40,6 @@ static struct { char bytes[16]; } ALIGN_TO(16, compactLUT[32768]);
|
|
|
43
40
|
# pragma pack()
|
|
44
41
|
#endif
|
|
45
42
|
|
|
46
|
-
static uint8_t eqFixLUT[256];
|
|
47
|
-
|
|
48
43
|
|
|
49
44
|
|
|
50
45
|
static bool neon_vect_is_nonzero(uint8x16_t v) {
|
|
@@ -58,6 +53,8 @@ static bool neon_vect_is_nonzero(uint8x16_t v) {
|
|
|
58
53
|
}
|
|
59
54
|
|
|
60
55
|
|
|
56
|
+
namespace RapidYenc {
|
|
57
|
+
|
|
61
58
|
template<bool isRaw, bool searchEnd>
|
|
62
59
|
HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned char*& p, unsigned char& escFirst, uint16_t& nextMask) {
|
|
63
60
|
HEDLEY_ASSUME(escFirst == 0 || escFirst == 1);
|
|
@@ -327,19 +324,15 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
|
|
|
327
324
|
// a spec compliant encoder should never generate sequences: ==, =\n and =\r, but we'll handle them to be spec compliant
|
|
328
325
|
// the yEnc specification requires any character following = to be unescaped, not skipped over, so we'll deal with that
|
|
329
326
|
// firstly, check for invalid sequences of = (we assume that these are rare, as a spec compliant yEnc encoder should not generate these)
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
for(int j=8; j<32; j+=8) {
|
|
334
|
-
tmp = eqFixLUT[((maskEq>>j)&0xff) & ~(tmp>>7)];
|
|
335
|
-
maskEq2 |= tmp<<j;
|
|
336
|
-
}
|
|
337
|
-
maskEq = maskEq2;
|
|
327
|
+
uint32_t maskEqShift1 = (maskEq << 1) | escFirst;
|
|
328
|
+
if(LIKELIHOOD(0.0001, (mask & maskEqShift1) != 0)) {
|
|
329
|
+
maskEq = fix_eqMask<uint32_t>(maskEq, maskEqShift1);
|
|
338
330
|
|
|
331
|
+
unsigned char nextEscFirst = maskEq>>31;
|
|
339
332
|
// next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed
|
|
340
333
|
maskEq = (maskEq<<1) | escFirst;
|
|
341
334
|
mask &= ~maskEq;
|
|
342
|
-
escFirst =
|
|
335
|
+
escFirst = nextEscFirst;
|
|
343
336
|
|
|
344
337
|
// unescape chars following `=`
|
|
345
338
|
uint8x8_t maskEqTemp = vreinterpret_u8_u32(vmov_n_u32(maskEq));
|
|
@@ -455,14 +448,15 @@ HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned
|
|
|
455
448
|
}
|
|
456
449
|
}
|
|
457
450
|
}
|
|
451
|
+
} // namespace
|
|
458
452
|
|
|
459
|
-
void decoder_set_neon_funcs() {
|
|
460
|
-
decoder_init_lut(
|
|
453
|
+
void RapidYenc::decoder_set_neon_funcs() {
|
|
454
|
+
decoder_init_lut(compactLUT);
|
|
461
455
|
_do_decode = &do_decode_simd<false, false, sizeof(uint8x16_t)*2, do_decode_neon<false, false> >;
|
|
462
456
|
_do_decode_raw = &do_decode_simd<true, false, sizeof(uint8x16_t)*2, do_decode_neon<true, false> >;
|
|
463
457
|
_do_decode_end_raw = &do_decode_simd<true, true, sizeof(uint8x16_t)*2, do_decode_neon<true, true> >;
|
|
464
458
|
_decode_isa = ISA_LEVEL_NEON;
|
|
465
459
|
}
|
|
466
460
|
#else
|
|
467
|
-
void decoder_set_neon_funcs() {}
|
|
461
|
+
void RapidYenc::decoder_set_neon_funcs() {}
|
|
468
462
|
#endif
|