@exodus/bytes 1.0.0-rc.5 → 1.0.0-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/base32.js +12 -5
- package/base58.js +4 -4
- package/base58check.js +49 -11
- package/base64.js +72 -40
- package/fallback/_utils.js +10 -1
- package/fallback/base32.js +71 -36
- package/fallback/base64.js +57 -27
- package/fallback/hex.js +137 -46
- package/fallback/latin1.js +113 -0
- package/fallback/utf8.js +77 -112
- package/hex.js +2 -16
- package/hex.node.js +26 -0
- package/package.json +22 -6
- package/utf8.js +10 -46
- package/utf8.node.js +33 -0
package/fallback/hex.js
CHANGED
|
@@ -1,37 +1,105 @@
|
|
|
1
1
|
import { assertUint8 } from '../assert.js'
|
|
2
|
-
import { nativeEncoder } from './_utils.js'
|
|
2
|
+
import { nativeDecoder, nativeEncoder } from './_utils.js'
|
|
3
|
+
import { encodeAscii, decodeAscii } from './latin1.js'
|
|
3
4
|
|
|
4
|
-
let hexArray
|
|
5
|
+
let hexArray // array of 256 bytes converted to two-char hex strings
|
|
6
|
+
let hexCodes // hexArray converted to u16 code pairs
|
|
5
7
|
let dehexArray
|
|
8
|
+
const _00 = 0x30_30 // '00' string in hex, the only allowed char pair to generate 0 byte
|
|
9
|
+
const _ff = 0x66_66 // 'ff' string in hex, max allowed char pair (larger than 'FF' string)
|
|
10
|
+
const allowed = '0123456789ABCDEFabcdef'
|
|
6
11
|
|
|
7
12
|
export const E_HEX = 'Input is not a hex string'
|
|
8
13
|
|
|
9
|
-
function
|
|
14
|
+
function toHexPartAddition(a, start, end) {
|
|
10
15
|
let o = ''
|
|
11
16
|
let i = start
|
|
12
|
-
const
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
const
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
|
|
19
|
-
o +=
|
|
20
|
-
o +=
|
|
21
|
-
o +=
|
|
22
|
-
o += hexArray[d]
|
|
17
|
+
const h = hexArray
|
|
18
|
+
for (const last3 = end - 3; i < last3; i += 4) {
|
|
19
|
+
const x0 = a[i]
|
|
20
|
+
const x1 = a[i + 1]
|
|
21
|
+
const x2 = a[i + 2]
|
|
22
|
+
const x3 = a[i + 3]
|
|
23
|
+
o += h[x0]
|
|
24
|
+
o += h[x1]
|
|
25
|
+
o += h[x2]
|
|
26
|
+
o += h[x3]
|
|
23
27
|
}
|
|
24
28
|
|
|
25
|
-
while (i < end) o +=
|
|
29
|
+
while (i < end) o += h[a[i++]]
|
|
26
30
|
return o
|
|
27
31
|
}
|
|
28
32
|
|
|
33
|
+
// Optimiziation for Hermes which is the main user of fallback
|
|
34
|
+
function toHexPartTemplates(a, start, end) {
|
|
35
|
+
let o = ''
|
|
36
|
+
let i = start
|
|
37
|
+
const h = hexArray
|
|
38
|
+
for (const last15 = end - 15; i < last15; i += 16) {
|
|
39
|
+
const x0 = a[i]
|
|
40
|
+
const x1 = a[i + 1]
|
|
41
|
+
const x2 = a[i + 2]
|
|
42
|
+
const x3 = a[i + 3]
|
|
43
|
+
const x4 = a[i + 4]
|
|
44
|
+
const x5 = a[i + 5]
|
|
45
|
+
const x6 = a[i + 6]
|
|
46
|
+
const x7 = a[i + 7]
|
|
47
|
+
const x8 = a[i + 8]
|
|
48
|
+
const x9 = a[i + 9]
|
|
49
|
+
const x10 = a[i + 10]
|
|
50
|
+
const x11 = a[i + 11]
|
|
51
|
+
const x12 = a[i + 12]
|
|
52
|
+
const x13 = a[i + 13]
|
|
53
|
+
const x14 = a[i + 14]
|
|
54
|
+
const x15 = a[i + 15]
|
|
55
|
+
o += `${h[x0]}${h[x1]}${h[x2]}${h[x3]}${h[x4]}${h[x5]}${h[x6]}${h[x7]}${h[x8]}${h[x9]}${h[x10]}${h[x11]}${h[x12]}${h[x13]}${h[x14]}${h[x15]}`
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
while (i < end) o += h[a[i++]]
|
|
59
|
+
return o
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Using templates is significantly faster in Hermes and JSC
|
|
63
|
+
// It's harder to detect JSC and not important anyway as it has native impl, so we detect only Hermes
|
|
64
|
+
const toHexPart = globalThis.HermesInternal ? toHexPartTemplates : toHexPartAddition
|
|
65
|
+
|
|
29
66
|
export function toHex(arr) {
|
|
30
67
|
assertUint8(arr)
|
|
31
68
|
|
|
32
69
|
if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
|
|
33
70
|
const length = arr.length // this helps Hermes
|
|
34
71
|
|
|
72
|
+
// Only old browsers use this, barebone engines don't have TextDecoder
|
|
73
|
+
// But Hermes can use this when it (hopefully) implements TextDecoder
|
|
74
|
+
if (nativeDecoder) {
|
|
75
|
+
if (!hexCodes) {
|
|
76
|
+
hexCodes = new Uint16Array(256)
|
|
77
|
+
const u8 = new Uint8Array(hexCodes.buffer, hexCodes.byteOffset, hexCodes.byteLength)
|
|
78
|
+
for (let i = 0; i < 256; i++) {
|
|
79
|
+
const pair = hexArray[i]
|
|
80
|
+
u8[2 * i] = pair.charCodeAt(0)
|
|
81
|
+
u8[2 * i + 1] = pair.charCodeAt(1)
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const oa = new Uint16Array(length)
|
|
86
|
+
let i = 0
|
|
87
|
+
for (const last3 = arr.length - 3; ; i += 4) {
|
|
88
|
+
if (i >= last3) break // loop is fast enough for moving this here to be useful on JSC
|
|
89
|
+
const x0 = arr[i]
|
|
90
|
+
const x1 = arr[i + 1]
|
|
91
|
+
const x2 = arr[i + 2]
|
|
92
|
+
const x3 = arr[i + 3]
|
|
93
|
+
oa[i] = hexCodes[x0]
|
|
94
|
+
oa[i + 1] = hexCodes[x1]
|
|
95
|
+
oa[i + 2] = hexCodes[x2]
|
|
96
|
+
oa[i + 3] = hexCodes[x3]
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
for (; i < length; i++) oa[i] = hexCodes[arr[i]]
|
|
100
|
+
return decodeAscii(oa)
|
|
101
|
+
}
|
|
102
|
+
|
|
35
103
|
if (length > 30_000) {
|
|
36
104
|
// Limit concatenation to avoid excessive GC
|
|
37
105
|
// Thresholds checked on Hermes
|
|
@@ -51,51 +119,74 @@ export function toHex(arr) {
|
|
|
51
119
|
return toHexPart(arr, 0, length)
|
|
52
120
|
}
|
|
53
121
|
|
|
54
|
-
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
|
|
55
|
-
const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
|
|
56
|
-
|
|
57
122
|
export function fromHex(str) {
|
|
58
123
|
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
59
124
|
if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
|
|
60
125
|
|
|
61
|
-
if (!dehexArray) {
|
|
62
|
-
dehexArray = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
|
|
63
|
-
for (let i = 0; i < 16; i++) {
|
|
64
|
-
const s = i.toString(16)
|
|
65
|
-
dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
126
|
const length = str.length / 2 // this helps Hermes in loops
|
|
70
127
|
const arr = new Uint8Array(length)
|
|
71
|
-
|
|
128
|
+
|
|
129
|
+
// Native encoder path is beneficial even for small arrays in Hermes
|
|
72
130
|
if (nativeEncoder) {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
131
|
+
if (!dehexArray) {
|
|
132
|
+
dehexArray = new Uint8Array(_ff + 1) // 26 KiB cache, >2x perf improvement on Hermes
|
|
133
|
+
const u8 = new Uint8Array(2)
|
|
134
|
+
const u16 = new Uint16Array(u8.buffer, u8.byteOffset, 1) // for endianess-agnostic transform
|
|
135
|
+
const map = [...allowed].map((c) => [c.charCodeAt(0), parseInt(c, 16)])
|
|
136
|
+
for (const [ch, vh] of map) {
|
|
137
|
+
u8[0] = ch // first we read high hex char
|
|
138
|
+
for (const [cl, vl] of map) {
|
|
139
|
+
u8[1] = cl // then we read low hex char
|
|
140
|
+
dehexArray[u16[0]] = (vh << 4) | vl
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const codes = encodeAscii(str, E_HEX)
|
|
146
|
+
const codes16 = new Uint16Array(codes.buffer, codes.byteOffset, codes.byteLength / 2)
|
|
77
147
|
let i = 0
|
|
78
|
-
|
|
79
|
-
const
|
|
80
|
-
const
|
|
81
|
-
const
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
148
|
+
for (const last3 = length - 3; i < last3; i += 4) {
|
|
149
|
+
const ai = codes16[i]
|
|
150
|
+
const bi = codes16[i + 1]
|
|
151
|
+
const ci = codes16[i + 2]
|
|
152
|
+
const di = codes16[i + 3]
|
|
153
|
+
const a = dehexArray[ai]
|
|
154
|
+
const b = dehexArray[bi]
|
|
155
|
+
const c = dehexArray[ci]
|
|
156
|
+
const d = dehexArray[di]
|
|
157
|
+
if ((!a && ai !== _00) || (!b && bi !== _00) || (!c && ci !== _00) || (!d && di !== _00)) {
|
|
158
|
+
throw new SyntaxError(E_HEX)
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
arr[i] = a
|
|
162
|
+
arr[i + 1] = b
|
|
163
|
+
arr[i + 2] = c
|
|
164
|
+
arr[i + 3] = d
|
|
88
165
|
}
|
|
89
166
|
|
|
90
167
|
while (i < length) {
|
|
91
|
-
const
|
|
92
|
-
|
|
93
|
-
|
|
168
|
+
const ai = codes16[i]
|
|
169
|
+
const a = dehexArray[ai]
|
|
170
|
+
if (!a && ai !== _00) throw new SyntaxError(E_HEX)
|
|
171
|
+
arr[i++] = a
|
|
94
172
|
}
|
|
95
173
|
} else {
|
|
174
|
+
if (!dehexArray) {
|
|
175
|
+
// no regex input validation here, so we map all other bytes to -1 and recheck sign
|
|
176
|
+
// non-ASCII chars throw already though, so we should process only 0-127
|
|
177
|
+
dehexArray = new Int8Array(128).fill(-1)
|
|
178
|
+
for (let i = 0; i < 16; i++) {
|
|
179
|
+
const s = i.toString(16)
|
|
180
|
+
dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let j = 0
|
|
96
185
|
for (let i = 0; i < length; i++) {
|
|
97
|
-
const
|
|
98
|
-
|
|
186
|
+
const a = str.charCodeAt(j++)
|
|
187
|
+
const b = str.charCodeAt(j++)
|
|
188
|
+
const res = (dehexArray[a] << 4) | dehexArray[b]
|
|
189
|
+
if (res < 0 || (0x7f | a | b) !== 0x7f) throw new SyntaxError(E_HEX) // 0-127
|
|
99
190
|
arr[i] = res
|
|
100
191
|
}
|
|
101
192
|
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { nativeEncoder, nativeDecoder, nativeDecoderLatin1, nativeBuffer } from './_utils.js'
|
|
2
|
+
|
|
3
|
+
// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
|
|
4
|
+
// On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
|
|
5
|
+
const maxFunctionArgs = 0x20_00
|
|
6
|
+
|
|
7
|
+
export function asciiPrefix(arr) {
|
|
8
|
+
let p = 0 // verified ascii bytes
|
|
9
|
+
const length = arr.length
|
|
10
|
+
// Threshold tested on Hermes (worse on <=48, better on >=52)
|
|
11
|
+
// Also on v8 arrs of size <=64 might be on heap and using Uint32Array on them is unoptimal
|
|
12
|
+
if (length > 64) {
|
|
13
|
+
// Speedup with u32
|
|
14
|
+
const u32start = (4 - (arr.byteOffset & 3)) % 4 // offset start by this many bytes for alignment
|
|
15
|
+
for (; p < u32start; p++) if (arr[p] >= 0x80) return p
|
|
16
|
+
const u32length = ((arr.byteLength - u32start) / 4) | 0
|
|
17
|
+
const u32 = new Uint32Array(arr.buffer, arr.byteOffset + u32start, u32length)
|
|
18
|
+
let i = 0
|
|
19
|
+
for (const last3 = u32length - 3; ; p += 16, i += 4) {
|
|
20
|
+
if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
|
|
21
|
+
const a = u32[i]
|
|
22
|
+
const b = u32[i + 1]
|
|
23
|
+
const c = u32[i + 2]
|
|
24
|
+
const d = u32[i + 3]
|
|
25
|
+
if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
for (; i < u32length; p += 4, i++) if (u32[i] & 0x80_80_80_80) break
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
for (; p < length; p++) if (arr[p] >= 0x80) return p
|
|
32
|
+
return length
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Capable of decoding Uint16Array to UTF-16 as well as Uint8Array to Latin-1
|
|
36
|
+
export function decodeLatin1(arr, start = 0, stop = arr.length) {
|
|
37
|
+
start |= 0
|
|
38
|
+
stop |= 0
|
|
39
|
+
const total = stop - start
|
|
40
|
+
if (total === 0) return ''
|
|
41
|
+
if (total > maxFunctionArgs) {
|
|
42
|
+
let prefix = ''
|
|
43
|
+
for (let i = start; i < stop; ) {
|
|
44
|
+
const i1 = Math.min(stop, i + maxFunctionArgs)
|
|
45
|
+
prefix += String.fromCharCode.apply(String, arr.subarray(i, i1))
|
|
46
|
+
i = i1
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return prefix
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
|
|
53
|
+
return String.fromCharCode.apply(String, sliced)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Does not check input, uses best available method
|
|
57
|
+
// Building an array for this is only faster than proper string concatenation when TextDecoder or native Buffer are available
|
|
58
|
+
export const decodeAscii = nativeBuffer
|
|
59
|
+
? (a) =>
|
|
60
|
+
// Buffer is faster on Node.js (but only for long enough data), if we know that output is ascii
|
|
61
|
+
a.byteLength >= 0x3_00
|
|
62
|
+
? nativeBuffer.from(a.buffer, a.byteOffset, a.byteLength).latin1Slice(0, a.byteLength) // .latin1Slice is faster than .asciiSlice
|
|
63
|
+
: nativeDecoder.decode(a) // On Node.js, utf8 decoder is faster than latin1
|
|
64
|
+
: nativeDecoderLatin1
|
|
65
|
+
? (a) => nativeDecoderLatin1.decode(a) // On browsers (specifically WebKit), latin1 decoder is faster than utf8
|
|
66
|
+
: (a) => decodeLatin1(new Uint8Array(a.buffer, a.byteOffset, a.byteLength)) // Fallback. We shouldn't get here, constructing with strings directly is faster
|
|
67
|
+
|
|
68
|
+
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
|
|
69
|
+
|
|
70
|
+
export const encodeCharcodes = globalThis.HermesInternal
|
|
71
|
+
? (str, arr) => {
|
|
72
|
+
const length = str.length
|
|
73
|
+
if (length > 64) {
|
|
74
|
+
const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
|
|
75
|
+
for (let i = 0; i < length; i++) arr[i] = at(i)
|
|
76
|
+
} else {
|
|
77
|
+
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return arr
|
|
81
|
+
}
|
|
82
|
+
: (str, arr) => {
|
|
83
|
+
const length = str.length
|
|
84
|
+
// Can be optimized with unrolling, but this is not used on non-Hermes atm
|
|
85
|
+
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
|
|
86
|
+
return arr
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
|
|
90
|
+
|
|
91
|
+
export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
|
|
92
|
+
|
|
93
|
+
// Expects nativeEncoder to be present
|
|
94
|
+
export const encodeAscii = globalThis.HermesInternal
|
|
95
|
+
? (str, ERR) => {
|
|
96
|
+
// Much faster in Hermes
|
|
97
|
+
const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
|
|
98
|
+
const info = nativeEncoder.encodeInto(str, codes)
|
|
99
|
+
if (info.read !== str.length || info.written !== str.length) throw new SyntaxError(ERR) // non-ascii
|
|
100
|
+
return codes.subarray(0, str.length)
|
|
101
|
+
}
|
|
102
|
+
: nativeBuffer
|
|
103
|
+
? (str, ERR) => {
|
|
104
|
+
// TextEncoder is slow on Node.js 24 / 25 (was ok on 22)
|
|
105
|
+
const codes = nativeBuffer.from(str, 'utf8') // ascii/latin1 coerces, we need to check
|
|
106
|
+
if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
|
|
107
|
+
return new Uint8Array(codes.buffer, codes.byteOffset, codes.byteLength)
|
|
108
|
+
}
|
|
109
|
+
: (str, ERR) => {
|
|
110
|
+
const codes = nativeEncoder.encode(str)
|
|
111
|
+
if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
|
|
112
|
+
return codes
|
|
113
|
+
}
|
package/fallback/utf8.js
CHANGED
|
@@ -5,72 +5,53 @@ const replacementPoint = 0xff_fd
|
|
|
5
5
|
|
|
6
6
|
// https://encoding.spec.whatwg.org/#utf-8-decoder
|
|
7
7
|
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
|
|
8
|
-
export function decode(arr, loose) {
|
|
9
|
-
|
|
8
|
+
export function decode(arr, loose, start = 0) {
|
|
9
|
+
start |= 0
|
|
10
10
|
const end = arr.length
|
|
11
11
|
let out = ''
|
|
12
|
-
const
|
|
12
|
+
const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
|
|
13
|
+
const tmpSize = Math.min(end - start, chunkSize + 1) // need 1 extra slot for last codepoint, which can be 2 charcodes
|
|
14
|
+
const tmp = new Array(tmpSize).fill(0)
|
|
15
|
+
let ti = 0
|
|
13
16
|
|
|
14
17
|
for (let i = start; i < end; i++) {
|
|
15
|
-
if (
|
|
16
|
-
//
|
|
17
|
-
// length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
|
|
18
|
+
if (ti >= chunkSize) {
|
|
19
|
+
tmp.length = ti // can be larger by 1 if last codepoint is two charcodes
|
|
18
20
|
out += String.fromCharCode.apply(String, tmp)
|
|
19
|
-
tmp.length
|
|
21
|
+
if (tmp.length <= chunkSize) tmp.push(0) // restore 1 extra slot for last codepoint
|
|
22
|
+
ti = 0
|
|
20
23
|
}
|
|
21
24
|
|
|
22
25
|
const byte = arr[i]
|
|
23
26
|
if (byte < 0x80) {
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
//
|
|
27
|
-
for (let j = 0; j < 5; j++) {
|
|
28
|
-
if (i + 1 >= end) break
|
|
29
|
-
const byte1 = arr[i + 1]
|
|
30
|
-
if (byte1 >= 0x80) break
|
|
31
|
-
tmp.push(byte1)
|
|
32
|
-
i++
|
|
33
|
-
if (i + 1 >= end) break
|
|
34
|
-
const byte2 = arr[i + 1]
|
|
35
|
-
if (byte2 >= 0x80) break
|
|
36
|
-
tmp.push(byte2)
|
|
37
|
-
i++
|
|
38
|
-
if (i + 1 >= end) break
|
|
39
|
-
const byte3 = arr[i + 1]
|
|
40
|
-
if (byte3 >= 0x80) break
|
|
41
|
-
tmp.push(byte3)
|
|
42
|
-
i++
|
|
43
|
-
if (i + 1 >= end) break
|
|
44
|
-
const byte4 = arr[i + 1]
|
|
45
|
-
if (byte4 >= 0x80) break
|
|
46
|
-
tmp.push(byte4)
|
|
47
|
-
i++
|
|
48
|
-
}
|
|
27
|
+
tmp[ti++] = byte
|
|
28
|
+
// ascii fast path is in ../utf8.js, this is called only on non-ascii input
|
|
29
|
+
// so we don't unroll this anymore
|
|
49
30
|
} else if (byte < 0xc2) {
|
|
50
31
|
if (!loose) throw new TypeError(E_STRICT)
|
|
51
|
-
tmp
|
|
32
|
+
tmp[ti++] = replacementPoint
|
|
52
33
|
} else if (byte < 0xe0) {
|
|
53
34
|
// need 1 more
|
|
54
35
|
if (i + 1 >= end) {
|
|
55
36
|
if (!loose) throw new TypeError(E_STRICT)
|
|
56
|
-
tmp
|
|
37
|
+
tmp[ti++] = replacementPoint
|
|
57
38
|
break
|
|
58
39
|
}
|
|
59
40
|
|
|
60
41
|
const byte1 = arr[i + 1]
|
|
61
42
|
if (byte1 < 0x80 || byte1 > 0xbf) {
|
|
62
43
|
if (!loose) throw new TypeError(E_STRICT)
|
|
63
|
-
tmp
|
|
44
|
+
tmp[ti++] = replacementPoint
|
|
64
45
|
continue
|
|
65
46
|
}
|
|
66
47
|
|
|
67
48
|
i++
|
|
68
|
-
tmp
|
|
49
|
+
tmp[ti++] = ((byte & 0x1f) << 6) | (byte1 & 0x3f)
|
|
69
50
|
} else if (byte < 0xf0) {
|
|
70
51
|
// need 2 more
|
|
71
52
|
if (i + 1 >= end) {
|
|
72
53
|
if (!loose) throw new TypeError(E_STRICT)
|
|
73
|
-
tmp
|
|
54
|
+
tmp[ti++] = replacementPoint
|
|
74
55
|
break
|
|
75
56
|
}
|
|
76
57
|
|
|
@@ -79,31 +60,31 @@ export function decode(arr, loose) {
|
|
|
79
60
|
const byte1 = arr[i + 1]
|
|
80
61
|
if (byte1 < lower || byte1 > upper) {
|
|
81
62
|
if (!loose) throw new TypeError(E_STRICT)
|
|
82
|
-
tmp
|
|
63
|
+
tmp[ti++] = replacementPoint
|
|
83
64
|
continue
|
|
84
65
|
}
|
|
85
66
|
|
|
86
67
|
i++
|
|
87
68
|
if (i + 1 >= end) {
|
|
88
69
|
if (!loose) throw new TypeError(E_STRICT)
|
|
89
|
-
tmp
|
|
70
|
+
tmp[ti++] = replacementPoint
|
|
90
71
|
break
|
|
91
72
|
}
|
|
92
73
|
|
|
93
74
|
const byte2 = arr[i + 1]
|
|
94
75
|
if (byte2 < 0x80 || byte2 > 0xbf) {
|
|
95
76
|
if (!loose) throw new TypeError(E_STRICT)
|
|
96
|
-
tmp
|
|
77
|
+
tmp[ti++] = replacementPoint
|
|
97
78
|
continue
|
|
98
79
|
}
|
|
99
80
|
|
|
100
81
|
i++
|
|
101
|
-
tmp
|
|
82
|
+
tmp[ti++] = ((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f)
|
|
102
83
|
} else if (byte <= 0xf4) {
|
|
103
84
|
// need 3 more
|
|
104
85
|
if (i + 1 >= end) {
|
|
105
86
|
if (!loose) throw new TypeError(E_STRICT)
|
|
106
|
-
tmp
|
|
87
|
+
tmp[ti++] = replacementPoint
|
|
107
88
|
break
|
|
108
89
|
}
|
|
109
90
|
|
|
@@ -112,35 +93,35 @@ export function decode(arr, loose) {
|
|
|
112
93
|
const byte1 = arr[i + 1]
|
|
113
94
|
if (byte1 < lower || byte1 > upper) {
|
|
114
95
|
if (!loose) throw new TypeError(E_STRICT)
|
|
115
|
-
tmp
|
|
96
|
+
tmp[ti++] = replacementPoint
|
|
116
97
|
continue
|
|
117
98
|
}
|
|
118
99
|
|
|
119
100
|
i++
|
|
120
101
|
if (i + 1 >= end) {
|
|
121
102
|
if (!loose) throw new TypeError(E_STRICT)
|
|
122
|
-
tmp
|
|
103
|
+
tmp[ti++] = replacementPoint
|
|
123
104
|
break
|
|
124
105
|
}
|
|
125
106
|
|
|
126
107
|
const byte2 = arr[i + 1]
|
|
127
108
|
if (byte2 < 0x80 || byte2 > 0xbf) {
|
|
128
109
|
if (!loose) throw new TypeError(E_STRICT)
|
|
129
|
-
tmp
|
|
110
|
+
tmp[ti++] = replacementPoint
|
|
130
111
|
continue
|
|
131
112
|
}
|
|
132
113
|
|
|
133
114
|
i++
|
|
134
115
|
if (i + 1 >= end) {
|
|
135
116
|
if (!loose) throw new TypeError(E_STRICT)
|
|
136
|
-
tmp
|
|
117
|
+
tmp[ti++] = replacementPoint
|
|
137
118
|
break
|
|
138
119
|
}
|
|
139
120
|
|
|
140
121
|
const byte3 = arr[i + 1]
|
|
141
122
|
if (byte3 < 0x80 || byte3 > 0xbf) {
|
|
142
123
|
if (!loose) throw new TypeError(E_STRICT)
|
|
143
|
-
tmp
|
|
124
|
+
tmp[ti++] = replacementPoint
|
|
144
125
|
continue
|
|
145
126
|
}
|
|
146
127
|
|
|
@@ -150,71 +131,65 @@ export function decode(arr, loose) {
|
|
|
150
131
|
if (codePoint > 0xff_ff) {
|
|
151
132
|
// split into char codes as String.fromCharCode is faster than String.fromCodePoint
|
|
152
133
|
const u = codePoint - 0x1_00_00
|
|
153
|
-
tmp
|
|
134
|
+
tmp[ti++] = 0xd8_00 + ((u >> 10) & 0x3_ff)
|
|
135
|
+
tmp[ti++] = 0xdc_00 + (u & 0x3_ff)
|
|
154
136
|
} else {
|
|
155
|
-
tmp
|
|
137
|
+
tmp[ti++] = codePoint
|
|
156
138
|
}
|
|
157
139
|
// eslint-disable-next-line sonarjs/no-duplicated-branches
|
|
158
140
|
} else {
|
|
159
141
|
if (!loose) throw new TypeError(E_STRICT)
|
|
160
|
-
tmp
|
|
142
|
+
tmp[ti++] = replacementPoint
|
|
161
143
|
}
|
|
162
144
|
}
|
|
163
145
|
|
|
164
|
-
if (
|
|
165
|
-
|
|
146
|
+
if (ti === 0) return out
|
|
147
|
+
tmp.length = ti
|
|
148
|
+
return out + String.fromCharCode.apply(String, tmp)
|
|
166
149
|
}
|
|
167
150
|
|
|
168
151
|
export function encode(string, loose) {
|
|
169
152
|
const length = string.length
|
|
170
|
-
let lead = null
|
|
171
153
|
let small = true
|
|
172
154
|
let bytes = new Uint8Array(length) // assume ascii
|
|
173
155
|
let p = 0
|
|
174
156
|
|
|
175
157
|
for (let i = 0; i < length; i++) {
|
|
176
|
-
|
|
158
|
+
let code = string.charCodeAt(i)
|
|
177
159
|
if (code < 0x80) {
|
|
178
|
-
// Fast path for ascii
|
|
179
|
-
if (lead) {
|
|
180
|
-
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
181
|
-
bytes[p++] = 0xef
|
|
182
|
-
bytes[p++] = 0xbf
|
|
183
|
-
bytes[p++] = 0xbd
|
|
184
|
-
lead = null
|
|
185
|
-
}
|
|
186
|
-
|
|
187
160
|
bytes[p++] = code
|
|
188
161
|
// Unroll the loop a bit for faster ops
|
|
189
|
-
|
|
190
|
-
if (i + 1 >= length) break
|
|
191
|
-
const c1 = string.charCodeAt(i + 1)
|
|
192
|
-
if (c1 >= 0x80) break
|
|
193
|
-
bytes[p++] = c1
|
|
162
|
+
while (true) {
|
|
194
163
|
i++
|
|
195
|
-
if (i
|
|
196
|
-
|
|
197
|
-
if (
|
|
198
|
-
bytes[p++] =
|
|
164
|
+
if (i >= length) break
|
|
165
|
+
code = string.charCodeAt(i)
|
|
166
|
+
if (code >= 0x80) break
|
|
167
|
+
bytes[p++] = code
|
|
199
168
|
i++
|
|
200
|
-
if (i
|
|
201
|
-
|
|
202
|
-
if (
|
|
203
|
-
bytes[p++] =
|
|
169
|
+
if (i >= length) break
|
|
170
|
+
code = string.charCodeAt(i)
|
|
171
|
+
if (code >= 0x80) break
|
|
172
|
+
bytes[p++] = code
|
|
204
173
|
i++
|
|
205
|
-
if (i
|
|
206
|
-
|
|
207
|
-
if (
|
|
208
|
-
bytes[p++] =
|
|
174
|
+
if (i >= length) break
|
|
175
|
+
code = string.charCodeAt(i)
|
|
176
|
+
if (code >= 0x80) break
|
|
177
|
+
bytes[p++] = code
|
|
209
178
|
i++
|
|
179
|
+
if (i >= length) break
|
|
180
|
+
code = string.charCodeAt(i)
|
|
181
|
+
if (code >= 0x80) break
|
|
182
|
+
bytes[p++] = code
|
|
210
183
|
}
|
|
211
184
|
|
|
212
|
-
|
|
185
|
+
if (i >= length) break
|
|
186
|
+
// now, code is present and >= 0x80
|
|
213
187
|
}
|
|
214
188
|
|
|
215
189
|
if (small) {
|
|
216
190
|
// TODO: use resizable array buffers? will have to return a non-resizeable one
|
|
217
|
-
|
|
191
|
+
if (p !== i) throw new Error('Unreachable') // Here, p === i (only when small is still true)
|
|
192
|
+
const bytesNew = new Uint8Array(p + (length - i) * 3) // maximium can be 3x of the string length in charcodes
|
|
218
193
|
bytesNew.set(bytes)
|
|
219
194
|
bytes = bytesNew
|
|
220
195
|
small = false
|
|
@@ -224,45 +199,35 @@ export function encode(string, loose) {
|
|
|
224
199
|
// lead: d800 - dbff
|
|
225
200
|
// trail: dc00 - dfff
|
|
226
201
|
if (code >= 0xd8_00 && code < 0xe0_00) {
|
|
227
|
-
|
|
228
|
-
|
|
202
|
+
// Can't be a valid trail as we already processed that below
|
|
203
|
+
|
|
204
|
+
if (code > 0xdb_ff || i + 1 >= length) {
|
|
205
|
+
// An unexpected trail or a lead at the very end of input
|
|
229
206
|
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
230
207
|
bytes[p++] = 0xef
|
|
231
208
|
bytes[p++] = 0xbf
|
|
232
209
|
bytes[p++] = 0xbd
|
|
233
|
-
|
|
234
|
-
// code is still processed as a new lead
|
|
210
|
+
continue
|
|
235
211
|
}
|
|
236
212
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
lead
|
|
248
|
-
|
|
213
|
+
const next = string.charCodeAt(i + 1) // Process valid pairs immediately
|
|
214
|
+
if (next >= 0xdc_00 && next < 0xe0_00) {
|
|
215
|
+
// here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
|
|
216
|
+
const codePoint = (((code - 0xd8_00) << 10) | (next - 0xdc_00)) + 0x1_00_00
|
|
217
|
+
bytes[p++] = (codePoint >> 18) | 0xf0
|
|
218
|
+
bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
|
|
219
|
+
bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
|
|
220
|
+
bytes[p++] = (codePoint & 0x3f) | 0x80
|
|
221
|
+
i++ // consume next
|
|
222
|
+
} else {
|
|
223
|
+
// Next is not a trail, leave next unconsumed but process unmatched lead error
|
|
224
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
225
|
+
bytes[p++] = 0xef
|
|
226
|
+
bytes[p++] = 0xbf
|
|
227
|
+
bytes[p++] = 0xbd
|
|
249
228
|
}
|
|
250
229
|
|
|
251
|
-
// here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
|
|
252
|
-
const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
|
|
253
|
-
bytes[p++] = (codePoint >> 18) | 0xf0
|
|
254
|
-
bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
|
|
255
|
-
bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
|
|
256
|
-
bytes[p++] = (codePoint & 0x3f) | 0x80
|
|
257
|
-
lead = null
|
|
258
230
|
continue
|
|
259
|
-
} else if (lead) {
|
|
260
|
-
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
261
|
-
bytes[p++] = 0xef
|
|
262
|
-
bytes[p++] = 0xbf
|
|
263
|
-
bytes[p++] = 0xbd
|
|
264
|
-
lead = null
|
|
265
|
-
// code is still processed
|
|
266
231
|
}
|
|
267
232
|
|
|
268
233
|
// We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes
|