yencode 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +37 -1
- package/package.json +1 -1
- package/src/common.h +18 -6
- package/src/crc.cc +42 -33
- package/src/crc.h +16 -14
- package/src/crc_arm.cc +7 -8
- package/src/crc_arm_pmull.cc +215 -0
- package/src/crc_common.h +13 -2
- package/src/crc_folding.cc +5 -5
- package/src/crc_folding_256.cc +2 -4
- package/src/crc_riscv.cc +7 -7
- package/src/decoder.cc +342 -12
- package/src/decoder.h +10 -14
- package/src/decoder_avx.cc +3 -4
- package/src/decoder_avx2.cc +7 -8
- package/src/decoder_avx2_base.h +6 -2
- package/src/decoder_common.h +34 -338
- package/src/decoder_neon.cc +10 -6
- package/src/decoder_neon64.cc +9 -5
- package/src/decoder_rvv.cc +47 -41
- package/src/decoder_sse2.cc +4 -4
- package/src/decoder_sse_base.h +20 -12
- package/src/decoder_ssse3.cc +3 -4
- package/src/decoder_vbmi2.cc +6 -8
- package/src/encoder.cc +19 -28
- package/src/encoder.h +5 -7
- package/src/encoder_avx.cc +3 -3
- package/src/encoder_avx2.cc +3 -3
- package/src/encoder_avx_base.h +3 -0
- package/src/encoder_common.h +26 -14
- package/src/encoder_neon.cc +6 -3
- package/src/encoder_rvv.cc +9 -7
- package/src/encoder_sse2.cc +3 -2
- package/src/encoder_sse_base.h +2 -0
- package/src/encoder_ssse3.cc +3 -3
- package/src/encoder_vbmi2.cc +6 -7
- package/src/platform.cc +24 -23
- package/src/yencode.cc +9 -8
- package/test/_speedbase.js +4 -2
- package/test/speeddec.js +25 -16
- package/test/speedenc.js +21 -17
package/src/crc_riscv.cc
CHANGED
|
@@ -189,7 +189,7 @@ static uint32_t crc32_reduce_rv_zbc(uint64_t prod) {
|
|
|
189
189
|
return t;
|
|
190
190
|
}
|
|
191
191
|
#endif
|
|
192
|
-
uint32_t crc32_multiply_rv_zbc(uint32_t a, uint32_t b) {
|
|
192
|
+
static uint32_t crc32_multiply_rv_zbc(uint32_t a, uint32_t b) {
|
|
193
193
|
#if __riscv_xlen == 64
|
|
194
194
|
uint64_t t = crc32_reduce_rv_zbc(rv_clmul(a, b));
|
|
195
195
|
#else
|
|
@@ -209,7 +209,7 @@ uint32_t crc32_multiply_rv_zbc(uint32_t a, uint32_t b) {
|
|
|
209
209
|
}
|
|
210
210
|
|
|
211
211
|
#if defined(__GNUC__) || defined(_MSC_VER)
|
|
212
|
-
uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
|
|
212
|
+
static uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
|
|
213
213
|
// TODO: require Zbb for ctz
|
|
214
214
|
uint32_t result = crc1;
|
|
215
215
|
#if __riscv_xlen == 64
|
|
@@ -221,15 +221,15 @@ uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
|
|
|
221
221
|
#endif
|
|
222
222
|
if(!n) return result;
|
|
223
223
|
|
|
224
|
-
uint32_t result2 = crc_power[ctz32(n)];
|
|
224
|
+
uint32_t result2 = RapidYenc::crc_power[ctz32(n)];
|
|
225
225
|
n &= n-1;
|
|
226
226
|
|
|
227
227
|
while(n) {
|
|
228
|
-
result = crc32_multiply_rv_zbc(result, crc_power[ctz32(n)]);
|
|
228
|
+
result = crc32_multiply_rv_zbc(result, RapidYenc::crc_power[ctz32(n)]);
|
|
229
229
|
n &= n-1;
|
|
230
230
|
|
|
231
231
|
if(n) {
|
|
232
|
-
result2 = crc32_multiply_rv_zbc(result2, crc_power[ctz32(n)]);
|
|
232
|
+
result2 = crc32_multiply_rv_zbc(result2, RapidYenc::crc_power[ctz32(n)]);
|
|
233
233
|
n &= n-1;
|
|
234
234
|
}
|
|
235
235
|
}
|
|
@@ -238,7 +238,7 @@ uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
|
|
|
238
238
|
#endif
|
|
239
239
|
|
|
240
240
|
|
|
241
|
-
void crc_riscv_set_funcs() {
|
|
241
|
+
void RapidYenc::crc_riscv_set_funcs() {
|
|
242
242
|
_do_crc32_incremental = &do_crc32_incremental_rv_zbc;
|
|
243
243
|
_crc32_multiply = &crc32_multiply_rv_zbc;
|
|
244
244
|
#if defined(__GNUC__) || defined(_MSC_VER)
|
|
@@ -247,5 +247,5 @@ void crc_riscv_set_funcs() {
|
|
|
247
247
|
_crc32_isa = ISA_FEATURE_ZBC;
|
|
248
248
|
}
|
|
249
249
|
#else
|
|
250
|
-
void crc_riscv_set_funcs() {}
|
|
250
|
+
void RapidYenc::crc_riscv_set_funcs() {}
|
|
251
251
|
#endif
|
package/src/decoder.cc
CHANGED
|
@@ -3,29 +3,358 @@
|
|
|
3
3
|
#include "decoder_common.h"
|
|
4
4
|
#include "decoder.h"
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
// TODO: add branch probabilities
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
// state var: refers to the previous state - only used for incremental processing
|
|
12
|
+
template<bool isRaw>
|
|
13
|
+
static size_t do_decode_noend_scalar(const unsigned char* src, unsigned char* dest, size_t len, RapidYenc::YencDecoderState* state) {
|
|
14
|
+
using namespace RapidYenc;
|
|
15
|
+
|
|
16
|
+
const unsigned char *es = src + len; // end source pointer
|
|
17
|
+
unsigned char *p = dest; // destination pointer
|
|
18
|
+
long i = -(long)len; // input position
|
|
19
|
+
unsigned char c; // input character
|
|
20
|
+
|
|
21
|
+
if(len < 1) return 0;
|
|
22
|
+
|
|
23
|
+
if(isRaw) {
|
|
24
|
+
|
|
25
|
+
if(state) switch(*state) {
|
|
26
|
+
case YDEC_STATE_EQ:
|
|
27
|
+
c = es[i];
|
|
28
|
+
*p++ = c - 42 - 64;
|
|
29
|
+
i++;
|
|
30
|
+
if(c == '\r') {
|
|
31
|
+
*state = YDEC_STATE_CR;
|
|
32
|
+
if(i >= 0) return 0;
|
|
33
|
+
} else {
|
|
34
|
+
*state = YDEC_STATE_NONE;
|
|
35
|
+
break;
|
|
36
|
+
}
|
|
37
|
+
// fall-thru
|
|
38
|
+
case YDEC_STATE_CR:
|
|
39
|
+
if(es[i] != '\n') break;
|
|
40
|
+
i++;
|
|
41
|
+
*state = YDEC_STATE_CRLF;
|
|
42
|
+
if(i >= 0) return 0;
|
|
43
|
+
// Else fall-thru
|
|
44
|
+
case YDEC_STATE_CRLF:
|
|
45
|
+
// skip past first dot
|
|
46
|
+
if(es[i] == '.') i++;
|
|
47
|
+
// fall-thru
|
|
48
|
+
default: break; // silence compiler warnings
|
|
49
|
+
} else // treat as YDEC_STATE_CRLF
|
|
50
|
+
if(es[i] == '.') i++;
|
|
51
|
+
|
|
52
|
+
for(; i < -2; i++) {
|
|
53
|
+
c = es[i];
|
|
54
|
+
switch(c) {
|
|
55
|
+
case '\r':
|
|
56
|
+
// skip past \r\n. sequences
|
|
57
|
+
//i += (es[i+1] == '\n' && es[i+2] == '.') << 1;
|
|
58
|
+
if(es[i+1] == '\n' && es[i+2] == '.')
|
|
59
|
+
i += 2;
|
|
60
|
+
// fall-thru
|
|
61
|
+
case '\n':
|
|
62
|
+
continue;
|
|
63
|
+
case '=':
|
|
64
|
+
c = es[i+1];
|
|
65
|
+
*p++ = c - 42 - 64;
|
|
66
|
+
i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case
|
|
67
|
+
continue;
|
|
68
|
+
default:
|
|
69
|
+
*p++ = c - 42;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if(state) *state = YDEC_STATE_NONE;
|
|
73
|
+
|
|
74
|
+
if(i == -2) { // 2nd last char
|
|
75
|
+
c = es[i];
|
|
76
|
+
switch(c) {
|
|
77
|
+
case '\r':
|
|
78
|
+
if(state && es[i+1] == '\n') {
|
|
79
|
+
*state = YDEC_STATE_CRLF;
|
|
80
|
+
return p - dest;
|
|
81
|
+
}
|
|
82
|
+
// Else fall-thru
|
|
83
|
+
case '\n':
|
|
84
|
+
break;
|
|
85
|
+
case '=':
|
|
86
|
+
c = es[i+1];
|
|
87
|
+
*p++ = c - 42 - 64;
|
|
88
|
+
i += (c != '\r');
|
|
89
|
+
break;
|
|
90
|
+
default:
|
|
91
|
+
*p++ = c - 42;
|
|
92
|
+
}
|
|
93
|
+
i++;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// do final char; we process this separately to prevent an overflow if the final char is '='
|
|
97
|
+
if(i == -1) {
|
|
98
|
+
c = es[i];
|
|
99
|
+
if(c != '\n' && c != '\r' && c != '=') {
|
|
100
|
+
*p++ = c - 42;
|
|
101
|
+
} else if(state) {
|
|
102
|
+
if(c == '=') *state = YDEC_STATE_EQ;
|
|
103
|
+
else if(c == '\r') *state = YDEC_STATE_CR;
|
|
104
|
+
else *state = YDEC_STATE_NONE;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
} else {
|
|
109
|
+
|
|
110
|
+
if(state && *state == YDEC_STATE_EQ) {
|
|
111
|
+
*p++ = es[i] - 42 - 64;
|
|
112
|
+
i++;
|
|
113
|
+
*state = YDEC_STATE_NONE;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/*for(i = 0; i < len - 1; i++) {
|
|
117
|
+
c = src[i];
|
|
118
|
+
if(c == '\n' || c == '\r') continue;
|
|
119
|
+
unsigned char isEquals = (c == '=');
|
|
120
|
+
i += isEquals;
|
|
121
|
+
*p++ = src[i] - (42 + (isEquals << 6));
|
|
122
|
+
}*/
|
|
123
|
+
for(; i < -1; i++) {
|
|
124
|
+
c = es[i];
|
|
125
|
+
switch(c) {
|
|
126
|
+
case '\n': case '\r': continue;
|
|
127
|
+
case '=':
|
|
128
|
+
i++;
|
|
129
|
+
c = es[i] - 64;
|
|
130
|
+
}
|
|
131
|
+
*p++ = c - 42;
|
|
132
|
+
}
|
|
133
|
+
if(state) *state = YDEC_STATE_NONE;
|
|
134
|
+
// do final char; we process this separately to prevent an overflow if the final char is '='
|
|
135
|
+
if(i == -1) {
|
|
136
|
+
c = es[i];
|
|
137
|
+
if(c != '\n' && c != '\r' && c != '=') {
|
|
138
|
+
*p++ = c - 42;
|
|
139
|
+
} else
|
|
140
|
+
if(state) *state = (c == '=' ? YDEC_STATE_EQ : YDEC_STATE_NONE);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return p - dest;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
template<bool isRaw>
|
|
149
|
+
static RapidYenc::YencDecoderEnd do_decode_end_scalar(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
|
|
150
|
+
using namespace RapidYenc;
|
|
151
|
+
|
|
152
|
+
const unsigned char *es = (*src) + len; // end source pointer
|
|
153
|
+
unsigned char *p = *dest; // destination pointer
|
|
154
|
+
long i = -(long)len; // input position
|
|
155
|
+
unsigned char c; // input character
|
|
156
|
+
|
|
157
|
+
if(len < 1) return YDEC_END_NONE;
|
|
158
|
+
|
|
159
|
+
#define YDEC_CHECK_END(s) if(i == 0) { \
|
|
160
|
+
*state = s; \
|
|
161
|
+
*src = es; \
|
|
162
|
+
*dest = p; \
|
|
163
|
+
return YDEC_END_NONE; \
|
|
164
|
+
}
|
|
165
|
+
if(state) switch(*state) {
|
|
166
|
+
case YDEC_STATE_CRLFEQ: do_decode_endable_scalar_ceq:
|
|
167
|
+
if(es[i] == 'y') {
|
|
168
|
+
*state = YDEC_STATE_NONE;
|
|
169
|
+
*src = es+i+1;
|
|
170
|
+
*dest = p;
|
|
171
|
+
return YDEC_END_CONTROL;
|
|
172
|
+
} // Else fall-thru
|
|
173
|
+
case YDEC_STATE_EQ:
|
|
174
|
+
c = es[i];
|
|
175
|
+
*p++ = c - 42 - 64;
|
|
176
|
+
i++;
|
|
177
|
+
if(c != '\r') break;
|
|
178
|
+
YDEC_CHECK_END(YDEC_STATE_CR)
|
|
179
|
+
// fall-through
|
|
180
|
+
case YDEC_STATE_CR:
|
|
181
|
+
if(es[i] != '\n') break;
|
|
182
|
+
i++;
|
|
183
|
+
YDEC_CHECK_END(YDEC_STATE_CRLF)
|
|
184
|
+
// fall-through
|
|
185
|
+
case YDEC_STATE_CRLF: do_decode_endable_scalar_c0:
|
|
186
|
+
if(es[i] == '.' && isRaw) {
|
|
187
|
+
i++;
|
|
188
|
+
YDEC_CHECK_END(YDEC_STATE_CRLFDT)
|
|
189
|
+
} else if(es[i] == '=') {
|
|
190
|
+
i++;
|
|
191
|
+
YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
|
|
192
|
+
goto do_decode_endable_scalar_ceq;
|
|
193
|
+
} else
|
|
194
|
+
break;
|
|
195
|
+
// fall-through
|
|
196
|
+
case YDEC_STATE_CRLFDT:
|
|
197
|
+
if(isRaw && es[i] == '\r') {
|
|
198
|
+
i++;
|
|
199
|
+
YDEC_CHECK_END(YDEC_STATE_CRLFDTCR)
|
|
200
|
+
} else if(isRaw && es[i] == '=') { // check for dot-stuffed ending: \r\n.=y
|
|
201
|
+
i++;
|
|
202
|
+
YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
|
|
203
|
+
goto do_decode_endable_scalar_ceq;
|
|
204
|
+
} else
|
|
205
|
+
break;
|
|
206
|
+
// fall-through
|
|
207
|
+
case YDEC_STATE_CRLFDTCR:
|
|
208
|
+
if(es[i] == '\n') {
|
|
209
|
+
if(isRaw) {
|
|
210
|
+
*state = YDEC_STATE_CRLF;
|
|
211
|
+
*src = es + i + 1;
|
|
212
|
+
*dest = p;
|
|
213
|
+
return YDEC_END_ARTICLE;
|
|
214
|
+
} else {
|
|
215
|
+
i++;
|
|
216
|
+
YDEC_CHECK_END(YDEC_STATE_CRLF)
|
|
217
|
+
goto do_decode_endable_scalar_c0; // handle as CRLF
|
|
218
|
+
}
|
|
219
|
+
} else
|
|
220
|
+
break;
|
|
221
|
+
case YDEC_STATE_NONE: break; // silence compiler warning
|
|
222
|
+
} else // treat as YDEC_STATE_CRLF
|
|
223
|
+
goto do_decode_endable_scalar_c0;
|
|
224
|
+
|
|
225
|
+
for(; i < -2; i++) {
|
|
226
|
+
c = es[i];
|
|
227
|
+
switch(c) {
|
|
228
|
+
case '\r': if(es[i+1] == '\n') {
|
|
229
|
+
if(isRaw && es[i+2] == '.') {
|
|
230
|
+
// skip past \r\n. sequences
|
|
231
|
+
i += 3;
|
|
232
|
+
YDEC_CHECK_END(YDEC_STATE_CRLFDT)
|
|
233
|
+
// check for end
|
|
234
|
+
if(es[i] == '\r') {
|
|
235
|
+
i++;
|
|
236
|
+
YDEC_CHECK_END(YDEC_STATE_CRLFDTCR)
|
|
237
|
+
if(es[i] == '\n') {
|
|
238
|
+
*src = es + i + 1;
|
|
239
|
+
*dest = p;
|
|
240
|
+
*state = YDEC_STATE_CRLF;
|
|
241
|
+
return YDEC_END_ARTICLE;
|
|
242
|
+
} else i--;
|
|
243
|
+
} else if(es[i] == '=') {
|
|
244
|
+
i++;
|
|
245
|
+
YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
|
|
246
|
+
if(es[i] == 'y') {
|
|
247
|
+
*src = es + i + 1;
|
|
248
|
+
*dest = p;
|
|
249
|
+
*state = YDEC_STATE_NONE;
|
|
250
|
+
return YDEC_END_CONTROL;
|
|
251
|
+
} else {
|
|
252
|
+
// escape char & continue
|
|
253
|
+
c = es[i];
|
|
254
|
+
*p++ = c - 42 - 64;
|
|
255
|
+
i -= (c == '\r');
|
|
256
|
+
}
|
|
257
|
+
} else i--;
|
|
258
|
+
}
|
|
259
|
+
else if(es[i+2] == '=') {
|
|
260
|
+
i += 3;
|
|
261
|
+
YDEC_CHECK_END(YDEC_STATE_CRLFEQ)
|
|
262
|
+
if(es[i] == 'y') {
|
|
263
|
+
// ended
|
|
264
|
+
*src = es + i + 1;
|
|
265
|
+
*dest = p;
|
|
266
|
+
*state = YDEC_STATE_NONE;
|
|
267
|
+
return YDEC_END_CONTROL;
|
|
268
|
+
} else {
|
|
269
|
+
// escape char & continue
|
|
270
|
+
c = es[i];
|
|
271
|
+
*p++ = c - 42 - 64;
|
|
272
|
+
i -= (c == '\r');
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
} // fall-thru
|
|
276
|
+
case '\n':
|
|
277
|
+
continue;
|
|
278
|
+
case '=':
|
|
279
|
+
c = es[i+1];
|
|
280
|
+
*p++ = c - 42 - 64;
|
|
281
|
+
i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case
|
|
282
|
+
continue;
|
|
283
|
+
default:
|
|
284
|
+
*p++ = c - 42;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
if(state) *state = YDEC_STATE_NONE;
|
|
288
|
+
|
|
289
|
+
if(i == -2) { // 2nd last char
|
|
290
|
+
c = es[i];
|
|
291
|
+
switch(c) {
|
|
292
|
+
case '\r':
|
|
293
|
+
if(state && es[i+1] == '\n') {
|
|
294
|
+
*state = YDEC_STATE_CRLF;
|
|
295
|
+
*src = es;
|
|
296
|
+
*dest = p;
|
|
297
|
+
return YDEC_END_NONE;
|
|
298
|
+
}
|
|
299
|
+
// Else fall-thru
|
|
300
|
+
case '\n':
|
|
301
|
+
break;
|
|
302
|
+
case '=':
|
|
303
|
+
c = es[i+1];
|
|
304
|
+
*p++ = c - 42 - 64;
|
|
305
|
+
i += (c != '\r');
|
|
306
|
+
break;
|
|
307
|
+
default:
|
|
308
|
+
*p++ = c - 42;
|
|
309
|
+
}
|
|
310
|
+
i++;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// do final char; we process this separately to prevent an overflow if the final char is '='
|
|
314
|
+
if(i == -1) {
|
|
315
|
+
c = es[i];
|
|
316
|
+
if(c != '\n' && c != '\r' && c != '=') {
|
|
317
|
+
*p++ = c - 42;
|
|
318
|
+
} else if(state) {
|
|
319
|
+
if(c == '=') *state = YDEC_STATE_EQ;
|
|
320
|
+
else if(c == '\r') *state = YDEC_STATE_CR;
|
|
321
|
+
else *state = YDEC_STATE_NONE;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
#undef YDEC_CHECK_END
|
|
325
|
+
|
|
326
|
+
*src = es;
|
|
327
|
+
*dest = p;
|
|
328
|
+
return YDEC_END_NONE;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
template<bool isRaw, bool searchEnd>
|
|
332
|
+
RapidYenc::YencDecoderEnd RapidYenc::do_decode_scalar(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
|
|
333
|
+
if(searchEnd)
|
|
334
|
+
return do_decode_end_scalar<isRaw>(src, dest, len, state);
|
|
335
|
+
*dest += do_decode_noend_scalar<isRaw>(*src, *dest, len, state);
|
|
336
|
+
*src += len;
|
|
337
|
+
return YDEC_END_NONE;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
namespace RapidYenc {
|
|
7
342
|
YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_scalar<false, false>;
|
|
8
343
|
YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_scalar<true, false>;
|
|
9
344
|
YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_end_scalar<true>;
|
|
10
345
|
|
|
11
346
|
int _decode_isa = ISA_GENERIC;
|
|
347
|
+
|
|
348
|
+
template YencDecoderEnd do_decode_scalar<true, true>(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
|
|
12
349
|
}
|
|
13
350
|
|
|
14
|
-
void decoder_set_sse2_funcs();
|
|
15
|
-
void decoder_set_ssse3_funcs();
|
|
16
|
-
void decoder_set_avx_funcs();
|
|
17
|
-
void decoder_set_avx2_funcs();
|
|
18
|
-
void decoder_set_vbmi2_funcs();
|
|
19
|
-
extern const bool decoder_has_avx10;
|
|
20
|
-
void decoder_set_neon_funcs();
|
|
21
|
-
void decoder_set_rvv_funcs();
|
|
22
|
-
|
|
23
351
|
|
|
24
352
|
#if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
|
|
25
353
|
# if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
|
|
26
354
|
# include "decoder_avx2_base.h"
|
|
27
355
|
static inline void decoder_set_native_funcs() {
|
|
28
356
|
ALIGN_ALLOC(lookups, sizeof(*lookups), 16);
|
|
357
|
+
using namespace RapidYenc;
|
|
29
358
|
decoder_init_lut(lookups->compact);
|
|
30
359
|
_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_NATIVE> >;
|
|
31
360
|
_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_NATIVE> >;
|
|
@@ -35,6 +364,7 @@ static inline void decoder_set_native_funcs() {
|
|
|
35
364
|
# else
|
|
36
365
|
# include "decoder_sse_base.h"
|
|
37
366
|
static inline void decoder_set_native_funcs() {
|
|
367
|
+
using namespace RapidYenc;
|
|
38
368
|
decoder_sse_init(lookups);
|
|
39
369
|
decoder_init_lut(lookups->compact);
|
|
40
370
|
_do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_NATIVE> >;
|
|
@@ -47,7 +377,7 @@ static inline void decoder_set_native_funcs() {
|
|
|
47
377
|
|
|
48
378
|
|
|
49
379
|
#if defined(PLATFORM_X86) || defined(PLATFORM_ARM)
|
|
50
|
-
void decoder_init_lut(void* compactLUT) {
|
|
380
|
+
void RapidYenc::decoder_init_lut(void* compactLUT) {
|
|
51
381
|
#ifdef YENC_DEC_USE_THINTABLE
|
|
52
382
|
const int tableSize = 8;
|
|
53
383
|
#else
|
|
@@ -70,7 +400,7 @@ void decoder_init_lut(void* compactLUT) {
|
|
|
70
400
|
#endif
|
|
71
401
|
|
|
72
402
|
|
|
73
|
-
void decoder_init() {
|
|
403
|
+
void RapidYenc::decoder_init() {
|
|
74
404
|
#ifdef PLATFORM_X86
|
|
75
405
|
# if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
|
|
76
406
|
decoder_set_native_funcs();
|
package/src/decoder.h
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
#ifndef __YENC_DECODER_H
|
|
2
2
|
#define __YENC_DECODER_H
|
|
3
3
|
|
|
4
|
-
#
|
|
5
|
-
extern "C" {
|
|
6
|
-
#endif
|
|
4
|
+
#include "hedley.h"
|
|
7
5
|
|
|
6
|
+
namespace RapidYenc {
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
// the last state that the decoder was in (i.e. last few characters processed)
|
|
@@ -27,21 +26,20 @@ typedef enum {
|
|
|
27
26
|
YDEC_END_ARTICLE // \r\n.\r\n sequence found, src points to byte after last '\n'
|
|
28
27
|
} YencDecoderEnd;
|
|
29
28
|
|
|
30
|
-
#include "hedley.h"
|
|
31
29
|
|
|
32
30
|
extern YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
|
|
33
31
|
extern YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
|
|
34
32
|
extern YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
|
|
35
33
|
extern int _decode_isa;
|
|
36
34
|
|
|
37
|
-
static inline size_t
|
|
38
|
-
unsigned char* ds = dest;
|
|
39
|
-
(*(isRaw ? _do_decode_raw : _do_decode))(&src, &ds, len, state);
|
|
40
|
-
return ds - dest;
|
|
35
|
+
static inline size_t decode(int isRaw, const void* src, void* dest, size_t len, YencDecoderState* state) {
|
|
36
|
+
unsigned char* ds = (unsigned char*)dest;
|
|
37
|
+
(*(isRaw ? _do_decode_raw : _do_decode))((const unsigned char**)&src, &ds, len, state);
|
|
38
|
+
return ds - (unsigned char*)dest;
|
|
41
39
|
}
|
|
42
40
|
|
|
43
|
-
static inline YencDecoderEnd
|
|
44
|
-
return _do_decode_end_raw(src, dest, len, state);
|
|
41
|
+
static inline YencDecoderEnd decode_end(const void** src, void** dest, size_t len, YencDecoderState* state) {
|
|
42
|
+
return _do_decode_end_raw((const unsigned char**)src, (unsigned char**)dest, len, state);
|
|
45
43
|
}
|
|
46
44
|
|
|
47
45
|
void decoder_init();
|
|
@@ -51,7 +49,5 @@ static inline int decode_isa_level() {
|
|
|
51
49
|
}
|
|
52
50
|
|
|
53
51
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
#endif
|
|
57
|
-
#endif
|
|
52
|
+
} // namespace
|
|
53
|
+
#endif // defined(__YENC_DECODER_H)
|
package/src/decoder_avx.cc
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
#include "common.h"
|
|
2
2
|
|
|
3
|
-
#if defined(__AVX__) && defined(__POPCNT__)
|
|
4
3
|
#include "decoder_common.h"
|
|
4
|
+
#if defined(__AVX__) && defined(__POPCNT__)
|
|
5
5
|
#include "decoder_sse_base.h"
|
|
6
|
-
void decoder_set_avx_funcs() {
|
|
6
|
+
void RapidYenc::decoder_set_avx_funcs() {
|
|
7
7
|
decoder_sse_init(lookups);
|
|
8
8
|
decoder_init_lut(lookups->compact);
|
|
9
9
|
_do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSE4_POPCNT> >;
|
|
@@ -12,8 +12,7 @@ void decoder_set_avx_funcs() {
|
|
|
12
12
|
_decode_isa = ISA_LEVEL_AVX;
|
|
13
13
|
}
|
|
14
14
|
#else
|
|
15
|
-
void
|
|
16
|
-
void decoder_set_avx_funcs() {
|
|
15
|
+
void RapidYenc::decoder_set_avx_funcs() {
|
|
17
16
|
decoder_set_ssse3_funcs();
|
|
18
17
|
}
|
|
19
18
|
#endif
|
package/src/decoder_avx2.cc
CHANGED
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
#include "common.h"
|
|
2
2
|
|
|
3
|
-
#if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
|
|
4
3
|
#include "decoder_common.h"
|
|
4
|
+
#if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
|
|
5
5
|
#include "decoder_avx2_base.h"
|
|
6
|
-
void decoder_set_avx2_funcs() {
|
|
6
|
+
void RapidYenc::decoder_set_avx2_funcs() {
|
|
7
7
|
ALIGN_ALLOC(lookups, sizeof(*lookups), 16);
|
|
8
8
|
decoder_init_lut(lookups->compact);
|
|
9
|
-
_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
|
|
10
|
-
_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
|
|
11
|
-
_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
|
|
12
|
-
_decode_isa = ISA_LEVEL_AVX2;
|
|
9
|
+
RapidYenc::_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
|
|
10
|
+
RapidYenc::_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
|
|
11
|
+
RapidYenc::_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
|
|
12
|
+
RapidYenc::_decode_isa = ISA_LEVEL_AVX2;
|
|
13
13
|
}
|
|
14
14
|
#else
|
|
15
|
-
void
|
|
16
|
-
void decoder_set_avx2_funcs() {
|
|
15
|
+
void RapidYenc::decoder_set_avx2_funcs() {
|
|
17
16
|
decoder_set_avx_funcs();
|
|
18
17
|
}
|
|
19
18
|
#endif
|
package/src/decoder_avx2_base.h
CHANGED
|
@@ -49,6 +49,8 @@ static HEDLEY_ALWAYS_INLINE __m256i force_align_read_256(const void* p) {
|
|
|
49
49
|
# define COMPRESS_STORE(dst, mask, vec) _mm256_storeu_si256((__m256i*)(dst), _mm256_maskz_compress_epi8(mask, vec))
|
|
50
50
|
#endif
|
|
51
51
|
|
|
52
|
+
namespace RapidYenc {
|
|
53
|
+
|
|
52
54
|
template<bool isRaw, bool searchEnd, enum YEncDecIsaLevel use_isa>
|
|
53
55
|
HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned char*& p, unsigned char& _escFirst, uint16_t& _nextMask) {
|
|
54
56
|
HEDLEY_ASSUME(_escFirst == 0 || _escFirst == 1);
|
|
@@ -429,8 +431,9 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
|
|
|
429
431
|
if(use_isa >= ISA_LEVEL_AVX3)
|
|
430
432
|
dataB = _mm256_add_epi8(oDataB, _mm256_set1_epi8(-42));
|
|
431
433
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
+
uint64_t maskEqShift1 = (maskEq << 1) + escFirst;
|
|
435
|
+
if(LIKELIHOOD(0.0001, (mask & maskEqShift1) != 0)) {
|
|
436
|
+
maskEq = fix_eqMask<uint64_t>(maskEq, maskEqShift1);
|
|
434
437
|
mask &= ~(uint64_t)escFirst;
|
|
435
438
|
escFirst = maskEq>>63;
|
|
436
439
|
// next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed
|
|
@@ -611,4 +614,5 @@ HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned
|
|
|
611
614
|
_escFirst = (unsigned char)escFirst;
|
|
612
615
|
_mm256_zeroupper();
|
|
613
616
|
}
|
|
617
|
+
} // namespace
|
|
614
618
|
#endif
|