yencode 1.0.8 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +339 -231
  2. package/binding.gyp +292 -39
  3. package/crcutil-1.0/code/multiword_64_64_gcc_amd64_asm.cc +7 -7
  4. package/crcutil-1.0/code/multiword_64_64_gcc_i386_mmx.cc +14 -14
  5. package/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc +1 -1
  6. package/crcutil-1.0/code/uint128_sse2.h +2 -0
  7. package/index.js +329 -22
  8. package/package.json +2 -2
  9. package/src/common.h +299 -0
  10. package/src/crc.cc +95 -0
  11. package/src/crc.h +23 -0
  12. package/src/crc_arm.cc +175 -0
  13. package/src/crc_common.h +4 -0
  14. package/{crc_folding.c → src/crc_folding.cc} +175 -185
  15. package/src/decoder.cc +61 -0
  16. package/src/decoder.h +53 -0
  17. package/src/decoder_avx.cc +18 -0
  18. package/src/decoder_avx2.cc +18 -0
  19. package/src/decoder_avx2_base.h +615 -0
  20. package/src/decoder_common.h +512 -0
  21. package/src/decoder_neon.cc +474 -0
  22. package/src/decoder_neon64.cc +451 -0
  23. package/src/decoder_sse2.cc +16 -0
  24. package/src/decoder_sse_base.h +711 -0
  25. package/src/decoder_ssse3.cc +18 -0
  26. package/src/encoder.cc +170 -0
  27. package/src/encoder.h +21 -0
  28. package/src/encoder_avx.cc +16 -0
  29. package/src/encoder_avx2.cc +16 -0
  30. package/src/encoder_avx_base.h +564 -0
  31. package/src/encoder_common.h +109 -0
  32. package/src/encoder_neon.cc +547 -0
  33. package/src/encoder_sse2.cc +13 -0
  34. package/src/encoder_sse_base.h +724 -0
  35. package/src/encoder_ssse3.cc +18 -0
  36. package/src/hedley.h +1899 -0
  37. package/src/platform.cc +147 -0
  38. package/src/yencode.cc +449 -0
  39. package/test/_maxsize.js +9 -0
  40. package/test/_speedbase.js +147 -0
  41. package/test/speedcrc.js +20 -0
  42. package/test/speeddec.js +92 -0
  43. package/test/speedenc.js +44 -0
  44. package/{testcrc.js → test/testcrc.js} +53 -39
  45. package/test/testdec.js +183 -0
  46. package/test/testenc.js +163 -0
  47. package/test/testpostdec.js +126 -0
  48. package/test.js +0 -91
  49. package/yencode.cc +0 -1622
@@ -0,0 +1,18 @@
1
+ #include "common.h"
2
+
3
+ #ifdef __SSSE3__
4
+ #include "decoder_common.h"
5
+ #include "decoder_sse_base.h"
6
+ void decoder_set_ssse3_funcs() {
7
+ decoder_sse_init();
8
+ decoder_init_lut(lookups->eqFix, lookups->compact);
9
+ _do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSSE3> >;
10
+ _do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSSE3> >;
11
+ _do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSSE3> >;
12
+ }
13
+ #else
14
+ void decoder_set_sse2_funcs();
15
+ void decoder_set_ssse3_funcs() {
16
+ decoder_set_sse2_funcs();
17
+ }
18
+ #endif
package/src/encoder.cc ADDED
@@ -0,0 +1,170 @@
1
+ #include "common.h"
2
+ #include "encoder_common.h"
3
+ #include "encoder.h"
4
+
5
+ size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
6
+ unsigned char* es = (unsigned char*)src + len;
7
+ unsigned char *p = dest; // destination pointer
8
+ long i = -(long)len; // input position
9
+ unsigned char c, escaped; // input character; escaped input character
10
+ int col = *colOffset;
11
+
12
+ if (col == 0) {
13
+ c = es[i++];
14
+ if (escapedLUT[c]) {
15
+ memcpy(p, &escapedLUT[c], sizeof(uint16_t));
16
+ p += 2;
17
+ col = 2;
18
+ } else {
19
+ *(p++) = c + 42;
20
+ col = 1;
21
+ }
22
+ }
23
+ while(i < 0) {
24
+ // main line
25
+ unsigned char* sp = NULL;
26
+ while (i < -1-8 && line_size-col-1 > 8) {
27
+ // 8 cycle unrolled version
28
+ sp = p;
29
+ #define DO_THING(n) \
30
+ c = es[i+n], escaped = escapeLUT[c]; \
31
+ if (escaped) \
32
+ *(p++) = escaped; \
33
+ else { \
34
+ memcpy(p, &escapedLUT[c], sizeof(uint16_t)); \
35
+ p += 2; \
36
+ }
37
+ DO_THING(0);
38
+ DO_THING(1);
39
+ DO_THING(2);
40
+ DO_THING(3);
41
+ DO_THING(4);
42
+ DO_THING(5);
43
+ DO_THING(6);
44
+ DO_THING(7);
45
+
46
+ i += 8;
47
+ col += (int)(p - sp);
48
+ }
49
+ if(sp && col >= line_size-1) {
50
+ // TODO: consider revert optimisation from SIMD code
51
+ // we overflowed - need to revert and use slower method :(
52
+ col -= (int)(p - sp);
53
+ p = sp;
54
+ i -= 8;
55
+ }
56
+ // handle remaining chars
57
+ while(col < line_size-1) {
58
+ c = es[i++], escaped = escapeLUT[c];
59
+ if (escaped) {
60
+ *(p++) = escaped;
61
+ col++;
62
+ }
63
+ else {
64
+ memcpy(p, &escapedLUT[c], sizeof(uint16_t));
65
+ p += 2;
66
+ col += 2;
67
+ }
68
+ /* experimental branchless version
69
+ *p = '=';
70
+ c = (es[i++] + 42) & 0xFF;
71
+ int cond = (c=='\0' || c=='=' || c=='\r' || c=='\n');
72
+ *(p+cond) = c + (cond << 6);
73
+ p += 1+cond;
74
+ col += 1+cond;
75
+ */
76
+ if (i >= 0) goto end;
77
+ }
78
+
79
+ // last line char
80
+ if(col < line_size) { // this can only be false if the last character was an escape sequence (or line_size is horribly small), in which case, we don't need to handle space/tab cases
81
+ c = es[i++];
82
+ if (escapedLUT[c] && c != '.'-42) {
83
+ memcpy(p, &escapedLUT[c], sizeof(uint16_t));
84
+ p += 2;
85
+ } else {
86
+ *(p++) = c + 42;
87
+ }
88
+ }
89
+
90
+ if (i >= 0) break;
91
+
92
+ c = es[i++];
93
+ if (escapedLUT[c]) {
94
+ uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]);
95
+ memcpy(p, &w, sizeof(w));
96
+ p += 4;
97
+ col = 2;
98
+ } else {
99
+ // another option may be to just write the EOL and let the first char be handled by the faster methods above, but it appears that writing the extra byte here is generally faster...
100
+ uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0);
101
+ memcpy(p, &w, sizeof(w));
102
+ p += 3;
103
+ col = 1;
104
+ }
105
+ }
106
+
107
+ end:
108
+ if(doEnd) {
109
+ // special case: if the last character is a space/tab, it needs to be escaped as it's the final character on the line
110
+ unsigned char lc = *(p-1);
111
+ if(lc == '\t' || lc == ' ') {
112
+ *(p-1) = '=';
113
+ *p = lc+64;
114
+ p++;
115
+ col++;
116
+ }
117
+ }
118
+ *colOffset = col;
119
+ return p - dest;
120
+ }
121
+
122
+
123
+ extern "C" {
124
+ size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int) = &do_encode_generic;
125
+ }
126
+
127
+ void encoder_sse2_init();
128
+ void encoder_ssse3_init();
129
+ void encoder_avx_init();
130
+ void encoder_avx2_init();
131
+ void encoder_neon_init();
132
+
133
+ #if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
134
+ # if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
135
+ # include "encoder_avx_base.h"
136
+ static inline void encoder_native_init() {
137
+ _do_encode = &do_encode_simd< do_encode_avx2<ISA_NATIVE> >;
138
+ encoder_avx2_lut<ISA_NATIVE>();
139
+ }
140
+ # else
141
+ # include "encoder_sse_base.h"
142
+ static inline void encoder_native_init() {
143
+ _do_encode = &do_encode_simd< do_encode_sse<ISA_NATIVE> >;
144
+ encoder_sse_lut<ISA_NATIVE>();
145
+ }
146
+ # endif
147
+ #endif
148
+
149
+
150
+ void encoder_init() {
151
+ #ifdef PLATFORM_X86
152
+ # if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
153
+ encoder_native_init();
154
+ # else
155
+ int use_isa = cpu_supports_isa();
156
+ if(use_isa >= ISA_LEVEL_AVX2)
157
+ encoder_avx2_init();
158
+ else if(use_isa >= ISA_LEVEL_AVX)
159
+ encoder_avx_init();
160
+ else if(use_isa >= ISA_LEVEL_SSSE3)
161
+ encoder_ssse3_init();
162
+ else
163
+ encoder_sse2_init();
164
+ # endif
165
+ #endif
166
+ #ifdef PLATFORM_ARM
167
+ if(cpu_supports_neon())
168
+ encoder_neon_init();
169
+ #endif
170
+ }
package/src/encoder.h ADDED
@@ -0,0 +1,21 @@
1
+ #ifndef __YENC_ENCODER_H
2
+ #define __YENC_ENCODER_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+
9
+
10
+ #include "hedley.h"
11
+
12
+ extern size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int);
13
+ #define do_encode (*_do_encode)
14
+ void encoder_init();
15
+
16
+
17
+
18
+ #ifdef __cplusplus
19
+ }
20
+ #endif
21
+ #endif
@@ -0,0 +1,16 @@
1
+ #include "common.h"
2
+
3
+ #if defined(__AVX__) && defined(__POPCNT__)
4
+ #include "encoder_sse_base.h"
5
+
6
+ void encoder_avx_init() {
7
+ _do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSE4_POPCNT> >;
8
+ encoder_sse_lut<ISA_LEVEL_SSE4_POPCNT>();
9
+ }
10
+ #else
11
+ void encoder_ssse3_init();
12
+ void encoder_avx_init() {
13
+ encoder_ssse3_init();
14
+ }
15
+ #endif
16
+
@@ -0,0 +1,16 @@
1
+ #include "common.h"
2
+
3
+ #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
4
+ #include "encoder_avx_base.h"
5
+
6
+ void encoder_avx2_init() {
7
+ _do_encode = &do_encode_simd< do_encode_avx2<ISA_LEVEL_AVX2> >;
8
+ encoder_avx2_lut<ISA_LEVEL_AVX2>();
9
+ }
10
+ #else
11
+ void encoder_avx_init();
12
+ void encoder_avx2_init() {
13
+ encoder_avx_init();
14
+ }
15
+ #endif
16
+