yencode 1.0.8 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +339 -231
- package/binding.gyp +292 -39
- package/crcutil-1.0/code/multiword_64_64_gcc_amd64_asm.cc +7 -7
- package/crcutil-1.0/code/multiword_64_64_gcc_i386_mmx.cc +14 -14
- package/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc +1 -1
- package/crcutil-1.0/code/uint128_sse2.h +2 -0
- package/index.js +329 -22
- package/package.json +2 -2
- package/src/common.h +299 -0
- package/src/crc.cc +95 -0
- package/src/crc.h +23 -0
- package/src/crc_arm.cc +175 -0
- package/src/crc_common.h +4 -0
- package/{crc_folding.c → src/crc_folding.cc} +175 -185
- package/src/decoder.cc +61 -0
- package/src/decoder.h +53 -0
- package/src/decoder_avx.cc +18 -0
- package/src/decoder_avx2.cc +18 -0
- package/src/decoder_avx2_base.h +615 -0
- package/src/decoder_common.h +512 -0
- package/src/decoder_neon.cc +474 -0
- package/src/decoder_neon64.cc +451 -0
- package/src/decoder_sse2.cc +16 -0
- package/src/decoder_sse_base.h +711 -0
- package/src/decoder_ssse3.cc +18 -0
- package/src/encoder.cc +170 -0
- package/src/encoder.h +21 -0
- package/src/encoder_avx.cc +16 -0
- package/src/encoder_avx2.cc +16 -0
- package/src/encoder_avx_base.h +564 -0
- package/src/encoder_common.h +109 -0
- package/src/encoder_neon.cc +547 -0
- package/src/encoder_sse2.cc +13 -0
- package/src/encoder_sse_base.h +724 -0
- package/src/encoder_ssse3.cc +18 -0
- package/src/hedley.h +1899 -0
- package/src/platform.cc +147 -0
- package/src/yencode.cc +449 -0
- package/test/_maxsize.js +9 -0
- package/test/_speedbase.js +147 -0
- package/test/speedcrc.js +20 -0
- package/test/speeddec.js +92 -0
- package/test/speedenc.js +44 -0
- package/{testcrc.js → test/testcrc.js} +53 -39
- package/test/testdec.js +183 -0
- package/test/testenc.js +163 -0
- package/test/testpostdec.js +126 -0
- package/test.js +0 -91
- package/yencode.cc +0 -1622
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#include "common.h"
|
|
2
|
+
|
|
3
|
+
#ifdef __SSSE3__
|
|
4
|
+
#include "decoder_common.h"
|
|
5
|
+
#include "decoder_sse_base.h"
|
|
6
|
+
void decoder_set_ssse3_funcs() {
|
|
7
|
+
decoder_sse_init();
|
|
8
|
+
decoder_init_lut(lookups->eqFix, lookups->compact);
|
|
9
|
+
_do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSSE3> >;
|
|
10
|
+
_do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSSE3> >;
|
|
11
|
+
_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSSE3> >;
|
|
12
|
+
}
|
|
13
|
+
#else
|
|
14
|
+
void decoder_set_sse2_funcs();
|
|
15
|
+
void decoder_set_ssse3_funcs() {
|
|
16
|
+
decoder_set_sse2_funcs();
|
|
17
|
+
}
|
|
18
|
+
#endif
|
package/src/encoder.cc
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
#include "common.h"
|
|
2
|
+
#include "encoder_common.h"
|
|
3
|
+
#include "encoder.h"
|
|
4
|
+
|
|
5
|
+
size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
|
|
6
|
+
unsigned char* es = (unsigned char*)src + len;
|
|
7
|
+
unsigned char *p = dest; // destination pointer
|
|
8
|
+
long i = -(long)len; // input position
|
|
9
|
+
unsigned char c, escaped; // input character; escaped input character
|
|
10
|
+
int col = *colOffset;
|
|
11
|
+
|
|
12
|
+
if (col == 0) {
|
|
13
|
+
c = es[i++];
|
|
14
|
+
if (escapedLUT[c]) {
|
|
15
|
+
memcpy(p, &escapedLUT[c], sizeof(uint16_t));
|
|
16
|
+
p += 2;
|
|
17
|
+
col = 2;
|
|
18
|
+
} else {
|
|
19
|
+
*(p++) = c + 42;
|
|
20
|
+
col = 1;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
while(i < 0) {
|
|
24
|
+
// main line
|
|
25
|
+
unsigned char* sp = NULL;
|
|
26
|
+
while (i < -1-8 && line_size-col-1 > 8) {
|
|
27
|
+
// 8 cycle unrolled version
|
|
28
|
+
sp = p;
|
|
29
|
+
#define DO_THING(n) \
|
|
30
|
+
c = es[i+n], escaped = escapeLUT[c]; \
|
|
31
|
+
if (escaped) \
|
|
32
|
+
*(p++) = escaped; \
|
|
33
|
+
else { \
|
|
34
|
+
memcpy(p, &escapedLUT[c], sizeof(uint16_t)); \
|
|
35
|
+
p += 2; \
|
|
36
|
+
}
|
|
37
|
+
DO_THING(0);
|
|
38
|
+
DO_THING(1);
|
|
39
|
+
DO_THING(2);
|
|
40
|
+
DO_THING(3);
|
|
41
|
+
DO_THING(4);
|
|
42
|
+
DO_THING(5);
|
|
43
|
+
DO_THING(6);
|
|
44
|
+
DO_THING(7);
|
|
45
|
+
|
|
46
|
+
i += 8;
|
|
47
|
+
col += (int)(p - sp);
|
|
48
|
+
}
|
|
49
|
+
if(sp && col >= line_size-1) {
|
|
50
|
+
// TODO: consider revert optimisation from SIMD code
|
|
51
|
+
// we overflowed - need to revert and use slower method :(
|
|
52
|
+
col -= (int)(p - sp);
|
|
53
|
+
p = sp;
|
|
54
|
+
i -= 8;
|
|
55
|
+
}
|
|
56
|
+
// handle remaining chars
|
|
57
|
+
while(col < line_size-1) {
|
|
58
|
+
c = es[i++], escaped = escapeLUT[c];
|
|
59
|
+
if (escaped) {
|
|
60
|
+
*(p++) = escaped;
|
|
61
|
+
col++;
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
memcpy(p, &escapedLUT[c], sizeof(uint16_t));
|
|
65
|
+
p += 2;
|
|
66
|
+
col += 2;
|
|
67
|
+
}
|
|
68
|
+
/* experimental branchless version
|
|
69
|
+
*p = '=';
|
|
70
|
+
c = (es[i++] + 42) & 0xFF;
|
|
71
|
+
int cond = (c=='\0' || c=='=' || c=='\r' || c=='\n');
|
|
72
|
+
*(p+cond) = c + (cond << 6);
|
|
73
|
+
p += 1+cond;
|
|
74
|
+
col += 1+cond;
|
|
75
|
+
*/
|
|
76
|
+
if (i >= 0) goto end;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// last line char
|
|
80
|
+
if(col < line_size) { // this can only be false if the last character was an escape sequence (or line_size is horribly small), in which case, we don't need to handle space/tab cases
|
|
81
|
+
c = es[i++];
|
|
82
|
+
if (escapedLUT[c] && c != '.'-42) {
|
|
83
|
+
memcpy(p, &escapedLUT[c], sizeof(uint16_t));
|
|
84
|
+
p += 2;
|
|
85
|
+
} else {
|
|
86
|
+
*(p++) = c + 42;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (i >= 0) break;
|
|
91
|
+
|
|
92
|
+
c = es[i++];
|
|
93
|
+
if (escapedLUT[c]) {
|
|
94
|
+
uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]);
|
|
95
|
+
memcpy(p, &w, sizeof(w));
|
|
96
|
+
p += 4;
|
|
97
|
+
col = 2;
|
|
98
|
+
} else {
|
|
99
|
+
// another option may be to just write the EOL and let the first char be handled by the faster methods above, but it appears that writing the extra byte here is generally faster...
|
|
100
|
+
uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0);
|
|
101
|
+
memcpy(p, &w, sizeof(w));
|
|
102
|
+
p += 3;
|
|
103
|
+
col = 1;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
end:
|
|
108
|
+
if(doEnd) {
|
|
109
|
+
// special case: if the last character is a space/tab, it needs to be escaped as it's the final character on the line
|
|
110
|
+
unsigned char lc = *(p-1);
|
|
111
|
+
if(lc == '\t' || lc == ' ') {
|
|
112
|
+
*(p-1) = '=';
|
|
113
|
+
*p = lc+64;
|
|
114
|
+
p++;
|
|
115
|
+
col++;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
*colOffset = col;
|
|
119
|
+
return p - dest;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
extern "C" {
|
|
124
|
+
size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int) = &do_encode_generic;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
void encoder_sse2_init();
|
|
128
|
+
void encoder_ssse3_init();
|
|
129
|
+
void encoder_avx_init();
|
|
130
|
+
void encoder_avx2_init();
|
|
131
|
+
void encoder_neon_init();
|
|
132
|
+
|
|
133
|
+
#if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
|
|
134
|
+
# if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
|
|
135
|
+
# include "encoder_avx_base.h"
|
|
136
|
+
static inline void encoder_native_init() {
|
|
137
|
+
_do_encode = &do_encode_simd< do_encode_avx2<ISA_NATIVE> >;
|
|
138
|
+
encoder_avx2_lut<ISA_NATIVE>();
|
|
139
|
+
}
|
|
140
|
+
# else
|
|
141
|
+
# include "encoder_sse_base.h"
|
|
142
|
+
static inline void encoder_native_init() {
|
|
143
|
+
_do_encode = &do_encode_simd< do_encode_sse<ISA_NATIVE> >;
|
|
144
|
+
encoder_sse_lut<ISA_NATIVE>();
|
|
145
|
+
}
|
|
146
|
+
# endif
|
|
147
|
+
#endif
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
void encoder_init() {
|
|
151
|
+
#ifdef PLATFORM_X86
|
|
152
|
+
# if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
|
|
153
|
+
encoder_native_init();
|
|
154
|
+
# else
|
|
155
|
+
int use_isa = cpu_supports_isa();
|
|
156
|
+
if(use_isa >= ISA_LEVEL_AVX2)
|
|
157
|
+
encoder_avx2_init();
|
|
158
|
+
else if(use_isa >= ISA_LEVEL_AVX)
|
|
159
|
+
encoder_avx_init();
|
|
160
|
+
else if(use_isa >= ISA_LEVEL_SSSE3)
|
|
161
|
+
encoder_ssse3_init();
|
|
162
|
+
else
|
|
163
|
+
encoder_sse2_init();
|
|
164
|
+
# endif
|
|
165
|
+
#endif
|
|
166
|
+
#ifdef PLATFORM_ARM
|
|
167
|
+
if(cpu_supports_neon())
|
|
168
|
+
encoder_neon_init();
|
|
169
|
+
#endif
|
|
170
|
+
}
|
package/src/encoder.h
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#ifndef __YENC_ENCODER_H
|
|
2
|
+
#define __YENC_ENCODER_H
|
|
3
|
+
|
|
4
|
+
#ifdef __cplusplus
|
|
5
|
+
extern "C" {
|
|
6
|
+
#endif
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
#include "hedley.h"
|
|
11
|
+
|
|
12
|
+
extern size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int);
|
|
13
|
+
#define do_encode (*_do_encode)
|
|
14
|
+
void encoder_init();
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
#ifdef __cplusplus
|
|
19
|
+
}
|
|
20
|
+
#endif
|
|
21
|
+
#endif
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#include "common.h"
|
|
2
|
+
|
|
3
|
+
#if defined(__AVX__) && defined(__POPCNT__)
|
|
4
|
+
#include "encoder_sse_base.h"
|
|
5
|
+
|
|
6
|
+
void encoder_avx_init() {
|
|
7
|
+
_do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSE4_POPCNT> >;
|
|
8
|
+
encoder_sse_lut<ISA_LEVEL_SSE4_POPCNT>();
|
|
9
|
+
}
|
|
10
|
+
#else
|
|
11
|
+
void encoder_ssse3_init();
|
|
12
|
+
void encoder_avx_init() {
|
|
13
|
+
encoder_ssse3_init();
|
|
14
|
+
}
|
|
15
|
+
#endif
|
|
16
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#include "common.h"
|
|
2
|
+
|
|
3
|
+
#if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
|
|
4
|
+
#include "encoder_avx_base.h"
|
|
5
|
+
|
|
6
|
+
void encoder_avx2_init() {
|
|
7
|
+
_do_encode = &do_encode_simd< do_encode_avx2<ISA_LEVEL_AVX2> >;
|
|
8
|
+
encoder_avx2_lut<ISA_LEVEL_AVX2>();
|
|
9
|
+
}
|
|
10
|
+
#else
|
|
11
|
+
void encoder_avx_init();
|
|
12
|
+
void encoder_avx2_init() {
|
|
13
|
+
encoder_avx_init();
|
|
14
|
+
}
|
|
15
|
+
#endif
|
|
16
|
+
|