@exodus/bytes 1.0.0-rc.3 → 1.0.0-rc.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -2
- package/assert.js +8 -5
- package/base32.js +33 -0
- package/base58.js +212 -0
- package/base58check.js +30 -0
- package/base64.js +90 -50
- package/fallback/_utils.js +6 -0
- package/fallback/base32.js +198 -0
- package/fallback/base64.js +86 -51
- package/fallback/hex.js +31 -17
- package/fallback/utf8.js +280 -0
- package/hex.js +13 -4
- package/package.json +37 -8
- package/utf8.js +117 -0
package/fallback/utf8.js
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
export const E_STRICT = 'Input is not well-formed utf8'
|
|
2
|
+
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
|
|
3
|
+
|
|
4
|
+
const replacementPoint = 0xff_fd
|
|
5
|
+
|
|
6
|
+
// https://encoding.spec.whatwg.org/#utf-8-decoder
|
|
7
|
+
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
|
|
8
|
+
export function decode(arr, loose) {
|
|
9
|
+
const start = 0
|
|
10
|
+
const end = arr.length
|
|
11
|
+
let out = ''
|
|
12
|
+
const tmp = []
|
|
13
|
+
|
|
14
|
+
for (let i = start; i < end; i++) {
|
|
15
|
+
if (tmp.length > 0x2_00) {
|
|
16
|
+
// far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
|
|
17
|
+
// length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
|
|
18
|
+
out += String.fromCharCode.apply(String, tmp)
|
|
19
|
+
tmp.length = 0
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const byte = arr[i]
|
|
23
|
+
if (byte < 0x80) {
|
|
24
|
+
// Fast path ascii
|
|
25
|
+
tmp.push(byte)
|
|
26
|
+
// Unroll the loop a bit for faster ops, overshoot by 20 chars
|
|
27
|
+
for (let j = 0; j < 5; j++) {
|
|
28
|
+
if (i + 1 >= end) break
|
|
29
|
+
const byte1 = arr[i + 1]
|
|
30
|
+
if (byte1 >= 0x80) break
|
|
31
|
+
tmp.push(byte1)
|
|
32
|
+
i++
|
|
33
|
+
if (i + 1 >= end) break
|
|
34
|
+
const byte2 = arr[i + 1]
|
|
35
|
+
if (byte2 >= 0x80) break
|
|
36
|
+
tmp.push(byte2)
|
|
37
|
+
i++
|
|
38
|
+
if (i + 1 >= end) break
|
|
39
|
+
const byte3 = arr[i + 1]
|
|
40
|
+
if (byte3 >= 0x80) break
|
|
41
|
+
tmp.push(byte3)
|
|
42
|
+
i++
|
|
43
|
+
if (i + 1 >= end) break
|
|
44
|
+
const byte4 = arr[i + 1]
|
|
45
|
+
if (byte4 >= 0x80) break
|
|
46
|
+
tmp.push(byte4)
|
|
47
|
+
i++
|
|
48
|
+
}
|
|
49
|
+
} else if (byte < 0xc2) {
|
|
50
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
51
|
+
tmp.push(replacementPoint)
|
|
52
|
+
} else if (byte < 0xe0) {
|
|
53
|
+
// need 1 more
|
|
54
|
+
if (i + 1 >= end) {
|
|
55
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
56
|
+
tmp.push(replacementPoint)
|
|
57
|
+
break
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const byte1 = arr[i + 1]
|
|
61
|
+
if (byte1 < 0x80 || byte1 > 0xbf) {
|
|
62
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
63
|
+
tmp.push(replacementPoint)
|
|
64
|
+
continue
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
i++
|
|
68
|
+
tmp.push(((byte & 0x1f) << 6) | (byte1 & 0x3f))
|
|
69
|
+
} else if (byte < 0xf0) {
|
|
70
|
+
// need 2 more
|
|
71
|
+
if (i + 1 >= end) {
|
|
72
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
73
|
+
tmp.push(replacementPoint)
|
|
74
|
+
break
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const lower = byte === 0xe0 ? 0xa0 : 0x80
|
|
78
|
+
const upper = byte === 0xed ? 0x9f : 0xbf
|
|
79
|
+
const byte1 = arr[i + 1]
|
|
80
|
+
if (byte1 < lower || byte1 > upper) {
|
|
81
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
82
|
+
tmp.push(replacementPoint)
|
|
83
|
+
continue
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
i++
|
|
87
|
+
if (i + 1 >= end) {
|
|
88
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
89
|
+
tmp.push(replacementPoint)
|
|
90
|
+
break
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const byte2 = arr[i + 1]
|
|
94
|
+
if (byte2 < 0x80 || byte2 > 0xbf) {
|
|
95
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
96
|
+
tmp.push(replacementPoint)
|
|
97
|
+
continue
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
i++
|
|
101
|
+
tmp.push(((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f))
|
|
102
|
+
} else if (byte <= 0xf4) {
|
|
103
|
+
// need 3 more
|
|
104
|
+
if (i + 1 >= end) {
|
|
105
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
106
|
+
tmp.push(replacementPoint)
|
|
107
|
+
break
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const lower = byte === 0xf0 ? 0x90 : 0x80
|
|
111
|
+
const upper = byte === 0xf4 ? 0x8f : 0xbf
|
|
112
|
+
const byte1 = arr[i + 1]
|
|
113
|
+
if (byte1 < lower || byte1 > upper) {
|
|
114
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
115
|
+
tmp.push(replacementPoint)
|
|
116
|
+
continue
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
i++
|
|
120
|
+
if (i + 1 >= end) {
|
|
121
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
122
|
+
tmp.push(replacementPoint)
|
|
123
|
+
break
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const byte2 = arr[i + 1]
|
|
127
|
+
if (byte2 < 0x80 || byte2 > 0xbf) {
|
|
128
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
129
|
+
tmp.push(replacementPoint)
|
|
130
|
+
continue
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
i++
|
|
134
|
+
if (i + 1 >= end) {
|
|
135
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
136
|
+
tmp.push(replacementPoint)
|
|
137
|
+
break
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const byte3 = arr[i + 1]
|
|
141
|
+
if (byte3 < 0x80 || byte3 > 0xbf) {
|
|
142
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
143
|
+
tmp.push(replacementPoint)
|
|
144
|
+
continue
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
i++
|
|
148
|
+
const codePoint =
|
|
149
|
+
((byte & 0xf) << 18) | ((byte1 & 0x3f) << 12) | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f)
|
|
150
|
+
if (codePoint > 0xff_ff) {
|
|
151
|
+
// split into char codes as String.fromCharCode is faster than String.fromCodePoint
|
|
152
|
+
const u = codePoint - 0x1_00_00
|
|
153
|
+
tmp.push(0xd8_00 + ((u >> 10) & 0x3_ff), 0xdc_00 + (u & 0x3_ff))
|
|
154
|
+
} else {
|
|
155
|
+
tmp.push(codePoint)
|
|
156
|
+
}
|
|
157
|
+
// eslint-disable-next-line sonarjs/no-duplicated-branches
|
|
158
|
+
} else {
|
|
159
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
160
|
+
tmp.push(replacementPoint)
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (tmp.length > 0) out += String.fromCharCode.apply(String, tmp)
|
|
165
|
+
return out
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export function encode(string, loose) {
|
|
169
|
+
const length = string.length
|
|
170
|
+
let lead = null
|
|
171
|
+
let small = true
|
|
172
|
+
let bytes = new Uint8Array(length) // assume ascii
|
|
173
|
+
let p = 0
|
|
174
|
+
|
|
175
|
+
for (let i = 0; i < length; i++) {
|
|
176
|
+
const code = string.charCodeAt(i)
|
|
177
|
+
if (code < 0x80) {
|
|
178
|
+
// Fast path for ascii
|
|
179
|
+
if (lead) {
|
|
180
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
181
|
+
bytes[p++] = 0xef
|
|
182
|
+
bytes[p++] = 0xbf
|
|
183
|
+
bytes[p++] = 0xbd
|
|
184
|
+
lead = null
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
bytes[p++] = code
|
|
188
|
+
// Unroll the loop a bit for faster ops
|
|
189
|
+
for (let j = 0; j < 5; j++) {
|
|
190
|
+
if (i + 1 >= length) break
|
|
191
|
+
const c1 = string.charCodeAt(i + 1)
|
|
192
|
+
if (c1 >= 0x80) break
|
|
193
|
+
bytes[p++] = c1
|
|
194
|
+
i++
|
|
195
|
+
if (i + 1 >= length) break
|
|
196
|
+
const c2 = string.charCodeAt(i + 1)
|
|
197
|
+
if (c2 >= 0x80) break
|
|
198
|
+
bytes[p++] = c2
|
|
199
|
+
i++
|
|
200
|
+
if (i + 1 >= length) break
|
|
201
|
+
const c3 = string.charCodeAt(i + 1)
|
|
202
|
+
if (c3 >= 0x80) break
|
|
203
|
+
bytes[p++] = c3
|
|
204
|
+
i++
|
|
205
|
+
if (i + 1 >= length) break
|
|
206
|
+
const c4 = string.charCodeAt(i + 1)
|
|
207
|
+
if (c4 >= 0x80) break
|
|
208
|
+
bytes[p++] = c4
|
|
209
|
+
i++
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
continue
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (small) {
|
|
216
|
+
// TODO: use resizable array buffers? will have to return a non-resizeable one
|
|
217
|
+
const bytesNew = new Uint8Array(length * 3) // maximium can be 3x of the string length in charcodes
|
|
218
|
+
bytesNew.set(bytes)
|
|
219
|
+
bytes = bytesNew
|
|
220
|
+
small = false
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// surrogate, charcodes = [d800 + a & 3ff, dc00 + b & 3ff]; codePoint = 0x1_00_00 | (a << 10) | b
|
|
224
|
+
// lead: d800 - dbff
|
|
225
|
+
// trail: dc00 - dfff
|
|
226
|
+
if (code >= 0xd8_00 && code < 0xe0_00) {
|
|
227
|
+
if (lead && code < 0xdc_00) {
|
|
228
|
+
// a second lead, meaning the previous one was unpaired
|
|
229
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
230
|
+
bytes[p++] = 0xef
|
|
231
|
+
bytes[p++] = 0xbf
|
|
232
|
+
bytes[p++] = 0xbd
|
|
233
|
+
lead = null
|
|
234
|
+
// code is still processed as a new lead
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (!lead) {
|
|
238
|
+
if (code > 0xdb_ff || i + 1 >= length) {
|
|
239
|
+
// lead out of range || unpaired
|
|
240
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
241
|
+
bytes[p++] = 0xef
|
|
242
|
+
bytes[p++] = 0xbf
|
|
243
|
+
bytes[p++] = 0xbd
|
|
244
|
+
continue
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
lead = code
|
|
248
|
+
continue
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
|
|
252
|
+
const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
|
|
253
|
+
bytes[p++] = (codePoint >> 18) | 0xf0
|
|
254
|
+
bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
|
|
255
|
+
bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
|
|
256
|
+
bytes[p++] = (codePoint & 0x3f) | 0x80
|
|
257
|
+
lead = null
|
|
258
|
+
continue
|
|
259
|
+
} else if (lead) {
|
|
260
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
261
|
+
bytes[p++] = 0xef
|
|
262
|
+
bytes[p++] = 0xbf
|
|
263
|
+
bytes[p++] = 0xbd
|
|
264
|
+
lead = null
|
|
265
|
+
// code is still processed
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes
|
|
269
|
+
if (code < 0x8_00) {
|
|
270
|
+
bytes[p++] = (code >> 6) | 0xc0
|
|
271
|
+
bytes[p++] = (code & 0x3f) | 0x80
|
|
272
|
+
} else {
|
|
273
|
+
bytes[p++] = (code >> 12) | 0xe0
|
|
274
|
+
bytes[p++] = ((code >> 6) & 0x3f) | 0x80
|
|
275
|
+
bytes[p++] = (code & 0x3f) | 0x80
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return bytes.length === p ? bytes : bytes.slice(0, p)
|
|
280
|
+
}
|
package/hex.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { assertUint8 } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
3
|
import * as js from './fallback/hex.js'
|
|
4
4
|
|
|
@@ -6,9 +6,10 @@ const { Buffer } = globalThis // Buffer is optional, only used when native
|
|
|
6
6
|
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
7
7
|
const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
|
|
8
8
|
|
|
9
|
+
const { E_HEX } = js
|
|
10
|
+
|
|
9
11
|
export function toHex(arr) {
|
|
10
|
-
|
|
11
|
-
if (!(arr instanceof Uint8Array)) arr = new Uint8Array(arr.buffer, arr.byteOffset, arr.byteLength)
|
|
12
|
+
assertUint8(arr)
|
|
12
13
|
if (arr.length === 0) return ''
|
|
13
14
|
if (webHex && arr.toHex === webHex) return arr.toHex()
|
|
14
15
|
if (!haveNativeBuffer) return js.toHex(arr)
|
|
@@ -19,4 +20,12 @@ export function toHex(arr) {
|
|
|
19
20
|
// Unlike Buffer.from(), throws on invalid input
|
|
20
21
|
export const fromHex = Uint8Array.fromHex
|
|
21
22
|
? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
|
|
22
|
-
:
|
|
23
|
+
: haveNativeBuffer
|
|
24
|
+
? (str, format = 'uint8') => {
|
|
25
|
+
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
26
|
+
if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
|
|
27
|
+
const buf = Buffer.from(str, 'hex') // will stop on first non-hex character, so we can just validate length
|
|
28
|
+
if (buf.length * 2 !== str.length) throw new SyntaxError(E_HEX)
|
|
29
|
+
return typedView(buf, format)
|
|
30
|
+
}
|
|
31
|
+
: (str, format = 'uint8') => typedView(js.fromHex(str), format)
|
package/package.json
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/bytes",
|
|
3
|
-
"version": "1.0.0-rc.
|
|
3
|
+
"version": "1.0.0-rc.5",
|
|
4
4
|
"description": "Various operations on Uint8Array data",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"lint": "eslint .",
|
|
7
|
-
"test:v8": "npm run test:d8 --",
|
|
8
7
|
"test:javascriptcore": "npm run test:jsc --",
|
|
9
|
-
"test:
|
|
8
|
+
"test:v8": "exodus-test --engine=v8:bundle",
|
|
10
9
|
"test:jsc": "exodus-test --engine=jsc:bundle",
|
|
11
10
|
"test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
|
|
12
11
|
"test:hermes": "exodus-test --engine=hermes:bundle",
|
|
@@ -39,32 +38,62 @@
|
|
|
39
38
|
},
|
|
40
39
|
"type": "module",
|
|
41
40
|
"files": [
|
|
41
|
+
"/fallback/_utils.js",
|
|
42
|
+
"/fallback/base32.js",
|
|
42
43
|
"/fallback/base64.js",
|
|
43
44
|
"/fallback/hex.js",
|
|
45
|
+
"/fallback/utf8.js",
|
|
44
46
|
"/array.js",
|
|
45
47
|
"/assert.js",
|
|
48
|
+
"/base32.js",
|
|
49
|
+
"/base58.js",
|
|
50
|
+
"/base58check.js",
|
|
46
51
|
"/base64.js",
|
|
47
|
-
"/hex.js"
|
|
52
|
+
"/hex.js",
|
|
53
|
+
"/utf8.js"
|
|
48
54
|
],
|
|
49
55
|
"exports": {
|
|
50
56
|
"./array.js": "./array.js",
|
|
57
|
+
"./base32.js": "./base32.js",
|
|
58
|
+
"./base58.js": "./base58.js",
|
|
59
|
+
"./base58check.js": "./base58check.js",
|
|
51
60
|
"./base64.js": "./base64.js",
|
|
52
|
-
"./hex.js": "./hex.js"
|
|
61
|
+
"./hex.js": "./hex.js",
|
|
62
|
+
"./utf8.js": "./utf8.js"
|
|
63
|
+
},
|
|
64
|
+
"peerDependencies": {
|
|
65
|
+
"@exodus/crypto": "^1.0.0-rc.4"
|
|
66
|
+
},
|
|
67
|
+
"peerDependenciesMeta": {
|
|
68
|
+
"@exodus/crypto": {
|
|
69
|
+
"optional": true
|
|
70
|
+
}
|
|
53
71
|
},
|
|
54
|
-
"dependencies": {},
|
|
55
72
|
"devDependencies": {
|
|
73
|
+
"@ethersproject/strings": "^5.8.0",
|
|
74
|
+
"@exodus/crypto": "1.0.0-rc.29",
|
|
56
75
|
"@exodus/eslint-config": "^5.24.0",
|
|
57
76
|
"@exodus/prettier": "^1.0.0",
|
|
58
|
-
"@exodus/test": "^1.0.0-rc.
|
|
77
|
+
"@exodus/test": "^1.0.0-rc.107",
|
|
78
|
+
"@noble/hashes": "^2.0.1",
|
|
59
79
|
"@scure/base": "^1.2.6",
|
|
80
|
+
"@stablelib/base64": "^2.0.1",
|
|
81
|
+
"@stablelib/hex": "^2.0.1",
|
|
60
82
|
"@types/node": "^24.0.10",
|
|
61
83
|
"base-x": "^5.0.1",
|
|
62
84
|
"base32.js": "^0.1.0",
|
|
63
85
|
"base64-js": "^1.5.1",
|
|
86
|
+
"bs58": "^6.0.0",
|
|
87
|
+
"bs58check": "^4.0.0",
|
|
88
|
+
"bstring": "^0.3.9",
|
|
64
89
|
"buffer": "^6.0.3",
|
|
65
90
|
"electron": "36.5.0",
|
|
66
91
|
"eslint": "^8.44.0",
|
|
67
|
-
"
|
|
92
|
+
"fast-base64-decode": "^2.0.0",
|
|
93
|
+
"fast-base64-encode": "^1.0.0",
|
|
94
|
+
"hi-base32": "^0.5.1",
|
|
95
|
+
"jsvu": "^3.0.0",
|
|
96
|
+
"text-encoding": "^0.7.0"
|
|
68
97
|
},
|
|
69
98
|
"prettier": "@exodus/prettier",
|
|
70
99
|
"packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
|
package/utf8.js
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { assertUint8 } from './assert.js'
|
|
2
|
+
import { typedView } from './array.js'
|
|
3
|
+
import * as js from './fallback/utf8.js'
|
|
4
|
+
|
|
5
|
+
const { Buffer, TextEncoder, TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
|
|
6
|
+
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
7
|
+
const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
|
|
8
|
+
const haveDecoder = isNative(TextDecoder)
|
|
9
|
+
const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
|
|
10
|
+
// ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
|
|
11
|
+
// We don't want to strip anything unexpectedly
|
|
12
|
+
const decoderFatal = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true, fatal: true }) : null
|
|
13
|
+
const decoderLoose = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true }) : null
|
|
14
|
+
const { isWellFormed } = String.prototype
|
|
15
|
+
|
|
16
|
+
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
17
|
+
|
|
18
|
+
const shouldUseEscapePath = Boolean(globalThis.HermesInternal) // faster only on Hermes, js path beats it on normal engines
|
|
19
|
+
|
|
20
|
+
function deLoose(str, loose, res) {
|
|
21
|
+
if (loose) return res
|
|
22
|
+
if (isWellFormed) {
|
|
23
|
+
// We have a fast native method
|
|
24
|
+
if (isWellFormed.call(str)) return res
|
|
25
|
+
throw new TypeError(E_STRICT_UNICODE)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Recheck if the string was encoded correctly
|
|
29
|
+
let start = 0
|
|
30
|
+
const last = res.length - 2
|
|
31
|
+
// Search for EFBFBD
|
|
32
|
+
while (start < last) {
|
|
33
|
+
const pos = res.indexOf(0xef, start)
|
|
34
|
+
if (pos === -1) break
|
|
35
|
+
start = pos + 1
|
|
36
|
+
if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
|
|
37
|
+
// Found a replacement char in output, need to recheck if we encoded the input correctly
|
|
38
|
+
if (str !== decode(res)) throw new TypeError(E_STRICT_UNICODE)
|
|
39
|
+
return res
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return res
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function encode(str, loose = false) {
|
|
47
|
+
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
48
|
+
if (haveNativeBuffer) return deLoose(str, loose, Buffer.from(str)) // faster on ascii on Node.js
|
|
49
|
+
if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str)) // Node.js, browsers, and Hermes
|
|
50
|
+
// No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
|
|
51
|
+
return js.encode(str, loose)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
let escapes
|
|
55
|
+
|
|
56
|
+
function toEscapesPart(arr, start, end) {
|
|
57
|
+
let o = ''
|
|
58
|
+
let i = start
|
|
59
|
+
const last3 = end - 3
|
|
60
|
+
// Unrolled loop is faster
|
|
61
|
+
while (i < last3) {
|
|
62
|
+
const a = arr[i++]
|
|
63
|
+
const b = arr[i++]
|
|
64
|
+
const c = arr[i++]
|
|
65
|
+
const d = arr[i++]
|
|
66
|
+
o += escapes[a]
|
|
67
|
+
o += escapes[b]
|
|
68
|
+
o += escapes[c]
|
|
69
|
+
o += escapes[d]
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
while (i < end) o += escapes[arr[i++]]
|
|
73
|
+
return o
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function decode(arr, loose = false) {
|
|
77
|
+
assertUint8(arr)
|
|
78
|
+
if (haveDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
|
|
79
|
+
// No reason to use native Buffer: it's not faster than TextDecoder, needs rechecks in non-loose mode, and Node.js has TextDecoder
|
|
80
|
+
|
|
81
|
+
// This codepath gives a ~2x perf boost on Hermes
|
|
82
|
+
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
|
83
|
+
if (!escapes) escapes = Array.from({ length: 256 }, (_, i) => escape(String.fromCharCode(i)))
|
|
84
|
+
const length = arr.length
|
|
85
|
+
let o
|
|
86
|
+
if (length > 30_000) {
|
|
87
|
+
// Limit concatenation to avoid excessive GC
|
|
88
|
+
// TODO: recheck thresholds on Hermes (taken from hex)
|
|
89
|
+
const concat = []
|
|
90
|
+
for (let i = 0; i < length; ) {
|
|
91
|
+
const step = i + 500
|
|
92
|
+
const end = step > length ? length : step
|
|
93
|
+
concat.push(toEscapesPart(arr, i, end))
|
|
94
|
+
i = end
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
o = concat.join('')
|
|
98
|
+
concat.length = 0
|
|
99
|
+
} else {
|
|
100
|
+
o = toEscapesPart(arr, 0, length)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
try {
|
|
104
|
+
return decodeURIComponent(o) // asci to utf8, escape() is precalucated
|
|
105
|
+
} catch {
|
|
106
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
107
|
+
// Ok, we have to use manual implementation for loose decoder
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return js.decode(arr, loose)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
|
|
115
|
+
export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
|
|
116
|
+
export const utf8toString = (arr) => decode(arr, false)
|
|
117
|
+
export const utf8toStringLoose = (arr) => decode(arr, true)
|