@exodus/bytes 1.0.0-rc.4 → 1.0.0-rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assert.js +0 -4
- package/base32.js +12 -5
- package/base58.js +212 -0
- package/base58check.js +68 -0
- package/base64.js +72 -40
- package/fallback/base32.js +70 -35
- package/fallback/base64.js +56 -26
- package/fallback/hex.js +137 -49
- package/fallback/latin1.js +89 -0
- package/fallback/utf8.js +77 -112
- package/hex.js +11 -1
- package/package.json +29 -6
- package/utf8.js +20 -49
package/fallback/utf8.js
CHANGED
|
@@ -5,72 +5,53 @@ const replacementPoint = 0xff_fd
|
|
|
5
5
|
|
|
6
6
|
// https://encoding.spec.whatwg.org/#utf-8-decoder
|
|
7
7
|
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
|
|
8
|
-
export function decode(arr, loose) {
|
|
9
|
-
|
|
8
|
+
export function decode(arr, loose, start = 0) {
|
|
9
|
+
start |= 0
|
|
10
10
|
const end = arr.length
|
|
11
11
|
let out = ''
|
|
12
|
-
const
|
|
12
|
+
const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
|
|
13
|
+
const tmpSize = Math.min(end - start, chunkSize + 1) // need 1 extra slot for last codepoint, which can be 2 charcodes
|
|
14
|
+
const tmp = new Array(tmpSize).fill(0)
|
|
15
|
+
let ti = 0
|
|
13
16
|
|
|
14
17
|
for (let i = start; i < end; i++) {
|
|
15
|
-
if (
|
|
16
|
-
//
|
|
17
|
-
// length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
|
|
18
|
+
if (ti >= chunkSize) {
|
|
19
|
+
tmp.length = ti // can be larger by 1 if last codepoint is two charcodes
|
|
18
20
|
out += String.fromCharCode.apply(String, tmp)
|
|
19
|
-
tmp.length
|
|
21
|
+
if (tmp.length <= chunkSize) tmp.push(0) // restore 1 extra slot for last codepoint
|
|
22
|
+
ti = 0
|
|
20
23
|
}
|
|
21
24
|
|
|
22
25
|
const byte = arr[i]
|
|
23
26
|
if (byte < 0x80) {
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
//
|
|
27
|
-
for (let j = 0; j < 5; j++) {
|
|
28
|
-
if (i + 1 >= end) break
|
|
29
|
-
const byte1 = arr[i + 1]
|
|
30
|
-
if (byte1 >= 0x80) break
|
|
31
|
-
tmp.push(byte1)
|
|
32
|
-
i++
|
|
33
|
-
if (i + 1 >= end) break
|
|
34
|
-
const byte2 = arr[i + 1]
|
|
35
|
-
if (byte2 >= 0x80) break
|
|
36
|
-
tmp.push(byte2)
|
|
37
|
-
i++
|
|
38
|
-
if (i + 1 >= end) break
|
|
39
|
-
const byte3 = arr[i + 1]
|
|
40
|
-
if (byte3 >= 0x80) break
|
|
41
|
-
tmp.push(byte3)
|
|
42
|
-
i++
|
|
43
|
-
if (i + 1 >= end) break
|
|
44
|
-
const byte4 = arr[i + 1]
|
|
45
|
-
if (byte4 >= 0x80) break
|
|
46
|
-
tmp.push(byte4)
|
|
47
|
-
i++
|
|
48
|
-
}
|
|
27
|
+
tmp[ti++] = byte
|
|
28
|
+
// ascii fast path is in ../utf8.js, this is called only on non-ascii input
|
|
29
|
+
// so we don't unroll this anymore
|
|
49
30
|
} else if (byte < 0xc2) {
|
|
50
31
|
if (!loose) throw new TypeError(E_STRICT)
|
|
51
|
-
tmp
|
|
32
|
+
tmp[ti++] = replacementPoint
|
|
52
33
|
} else if (byte < 0xe0) {
|
|
53
34
|
// need 1 more
|
|
54
35
|
if (i + 1 >= end) {
|
|
55
36
|
if (!loose) throw new TypeError(E_STRICT)
|
|
56
|
-
tmp
|
|
37
|
+
tmp[ti++] = replacementPoint
|
|
57
38
|
break
|
|
58
39
|
}
|
|
59
40
|
|
|
60
41
|
const byte1 = arr[i + 1]
|
|
61
42
|
if (byte1 < 0x80 || byte1 > 0xbf) {
|
|
62
43
|
if (!loose) throw new TypeError(E_STRICT)
|
|
63
|
-
tmp
|
|
44
|
+
tmp[ti++] = replacementPoint
|
|
64
45
|
continue
|
|
65
46
|
}
|
|
66
47
|
|
|
67
48
|
i++
|
|
68
|
-
tmp
|
|
49
|
+
tmp[ti++] = ((byte & 0x1f) << 6) | (byte1 & 0x3f)
|
|
69
50
|
} else if (byte < 0xf0) {
|
|
70
51
|
// need 2 more
|
|
71
52
|
if (i + 1 >= end) {
|
|
72
53
|
if (!loose) throw new TypeError(E_STRICT)
|
|
73
|
-
tmp
|
|
54
|
+
tmp[ti++] = replacementPoint
|
|
74
55
|
break
|
|
75
56
|
}
|
|
76
57
|
|
|
@@ -79,31 +60,31 @@ export function decode(arr, loose) {
|
|
|
79
60
|
const byte1 = arr[i + 1]
|
|
80
61
|
if (byte1 < lower || byte1 > upper) {
|
|
81
62
|
if (!loose) throw new TypeError(E_STRICT)
|
|
82
|
-
tmp
|
|
63
|
+
tmp[ti++] = replacementPoint
|
|
83
64
|
continue
|
|
84
65
|
}
|
|
85
66
|
|
|
86
67
|
i++
|
|
87
68
|
if (i + 1 >= end) {
|
|
88
69
|
if (!loose) throw new TypeError(E_STRICT)
|
|
89
|
-
tmp
|
|
70
|
+
tmp[ti++] = replacementPoint
|
|
90
71
|
break
|
|
91
72
|
}
|
|
92
73
|
|
|
93
74
|
const byte2 = arr[i + 1]
|
|
94
75
|
if (byte2 < 0x80 || byte2 > 0xbf) {
|
|
95
76
|
if (!loose) throw new TypeError(E_STRICT)
|
|
96
|
-
tmp
|
|
77
|
+
tmp[ti++] = replacementPoint
|
|
97
78
|
continue
|
|
98
79
|
}
|
|
99
80
|
|
|
100
81
|
i++
|
|
101
|
-
tmp
|
|
82
|
+
tmp[ti++] = ((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f)
|
|
102
83
|
} else if (byte <= 0xf4) {
|
|
103
84
|
// need 3 more
|
|
104
85
|
if (i + 1 >= end) {
|
|
105
86
|
if (!loose) throw new TypeError(E_STRICT)
|
|
106
|
-
tmp
|
|
87
|
+
tmp[ti++] = replacementPoint
|
|
107
88
|
break
|
|
108
89
|
}
|
|
109
90
|
|
|
@@ -112,35 +93,35 @@ export function decode(arr, loose) {
|
|
|
112
93
|
const byte1 = arr[i + 1]
|
|
113
94
|
if (byte1 < lower || byte1 > upper) {
|
|
114
95
|
if (!loose) throw new TypeError(E_STRICT)
|
|
115
|
-
tmp
|
|
96
|
+
tmp[ti++] = replacementPoint
|
|
116
97
|
continue
|
|
117
98
|
}
|
|
118
99
|
|
|
119
100
|
i++
|
|
120
101
|
if (i + 1 >= end) {
|
|
121
102
|
if (!loose) throw new TypeError(E_STRICT)
|
|
122
|
-
tmp
|
|
103
|
+
tmp[ti++] = replacementPoint
|
|
123
104
|
break
|
|
124
105
|
}
|
|
125
106
|
|
|
126
107
|
const byte2 = arr[i + 1]
|
|
127
108
|
if (byte2 < 0x80 || byte2 > 0xbf) {
|
|
128
109
|
if (!loose) throw new TypeError(E_STRICT)
|
|
129
|
-
tmp
|
|
110
|
+
tmp[ti++] = replacementPoint
|
|
130
111
|
continue
|
|
131
112
|
}
|
|
132
113
|
|
|
133
114
|
i++
|
|
134
115
|
if (i + 1 >= end) {
|
|
135
116
|
if (!loose) throw new TypeError(E_STRICT)
|
|
136
|
-
tmp
|
|
117
|
+
tmp[ti++] = replacementPoint
|
|
137
118
|
break
|
|
138
119
|
}
|
|
139
120
|
|
|
140
121
|
const byte3 = arr[i + 1]
|
|
141
122
|
if (byte3 < 0x80 || byte3 > 0xbf) {
|
|
142
123
|
if (!loose) throw new TypeError(E_STRICT)
|
|
143
|
-
tmp
|
|
124
|
+
tmp[ti++] = replacementPoint
|
|
144
125
|
continue
|
|
145
126
|
}
|
|
146
127
|
|
|
@@ -150,71 +131,65 @@ export function decode(arr, loose) {
|
|
|
150
131
|
if (codePoint > 0xff_ff) {
|
|
151
132
|
// split into char codes as String.fromCharCode is faster than String.fromCodePoint
|
|
152
133
|
const u = codePoint - 0x1_00_00
|
|
153
|
-
tmp
|
|
134
|
+
tmp[ti++] = 0xd8_00 + ((u >> 10) & 0x3_ff)
|
|
135
|
+
tmp[ti++] = 0xdc_00 + (u & 0x3_ff)
|
|
154
136
|
} else {
|
|
155
|
-
tmp
|
|
137
|
+
tmp[ti++] = codePoint
|
|
156
138
|
}
|
|
157
139
|
// eslint-disable-next-line sonarjs/no-duplicated-branches
|
|
158
140
|
} else {
|
|
159
141
|
if (!loose) throw new TypeError(E_STRICT)
|
|
160
|
-
tmp
|
|
142
|
+
tmp[ti++] = replacementPoint
|
|
161
143
|
}
|
|
162
144
|
}
|
|
163
145
|
|
|
164
|
-
if (
|
|
165
|
-
|
|
146
|
+
if (ti === 0) return out
|
|
147
|
+
tmp.length = ti
|
|
148
|
+
return out + String.fromCharCode.apply(String, tmp)
|
|
166
149
|
}
|
|
167
150
|
|
|
168
151
|
export function encode(string, loose) {
|
|
169
152
|
const length = string.length
|
|
170
|
-
let lead = null
|
|
171
153
|
let small = true
|
|
172
154
|
let bytes = new Uint8Array(length) // assume ascii
|
|
173
155
|
let p = 0
|
|
174
156
|
|
|
175
157
|
for (let i = 0; i < length; i++) {
|
|
176
|
-
|
|
158
|
+
let code = string.charCodeAt(i)
|
|
177
159
|
if (code < 0x80) {
|
|
178
|
-
// Fast path for ascii
|
|
179
|
-
if (lead) {
|
|
180
|
-
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
181
|
-
bytes[p++] = 0xef
|
|
182
|
-
bytes[p++] = 0xbf
|
|
183
|
-
bytes[p++] = 0xbd
|
|
184
|
-
lead = null
|
|
185
|
-
}
|
|
186
|
-
|
|
187
160
|
bytes[p++] = code
|
|
188
161
|
// Unroll the loop a bit for faster ops
|
|
189
|
-
|
|
190
|
-
if (i + 1 >= length) break
|
|
191
|
-
const c1 = string.charCodeAt(i + 1)
|
|
192
|
-
if (c1 >= 0x80) break
|
|
193
|
-
bytes[p++] = c1
|
|
162
|
+
while (true) {
|
|
194
163
|
i++
|
|
195
|
-
if (i
|
|
196
|
-
|
|
197
|
-
if (
|
|
198
|
-
bytes[p++] =
|
|
164
|
+
if (i >= length) break
|
|
165
|
+
code = string.charCodeAt(i)
|
|
166
|
+
if (code >= 0x80) break
|
|
167
|
+
bytes[p++] = code
|
|
199
168
|
i++
|
|
200
|
-
if (i
|
|
201
|
-
|
|
202
|
-
if (
|
|
203
|
-
bytes[p++] =
|
|
169
|
+
if (i >= length) break
|
|
170
|
+
code = string.charCodeAt(i)
|
|
171
|
+
if (code >= 0x80) break
|
|
172
|
+
bytes[p++] = code
|
|
204
173
|
i++
|
|
205
|
-
if (i
|
|
206
|
-
|
|
207
|
-
if (
|
|
208
|
-
bytes[p++] =
|
|
174
|
+
if (i >= length) break
|
|
175
|
+
code = string.charCodeAt(i)
|
|
176
|
+
if (code >= 0x80) break
|
|
177
|
+
bytes[p++] = code
|
|
209
178
|
i++
|
|
179
|
+
if (i >= length) break
|
|
180
|
+
code = string.charCodeAt(i)
|
|
181
|
+
if (code >= 0x80) break
|
|
182
|
+
bytes[p++] = code
|
|
210
183
|
}
|
|
211
184
|
|
|
212
|
-
|
|
185
|
+
if (i >= length) break
|
|
186
|
+
// now, code is present and >= 0x80
|
|
213
187
|
}
|
|
214
188
|
|
|
215
189
|
if (small) {
|
|
216
190
|
// TODO: use resizable array buffers? will have to return a non-resizeable one
|
|
217
|
-
|
|
191
|
+
if (p !== i) throw new Error('Unreachable') // Here, p === i (only when small is still true)
|
|
192
|
+
const bytesNew = new Uint8Array(p + (length - i) * 3) // maximium can be 3x of the string length in charcodes
|
|
218
193
|
bytesNew.set(bytes)
|
|
219
194
|
bytes = bytesNew
|
|
220
195
|
small = false
|
|
@@ -224,45 +199,35 @@ export function encode(string, loose) {
|
|
|
224
199
|
// lead: d800 - dbff
|
|
225
200
|
// trail: dc00 - dfff
|
|
226
201
|
if (code >= 0xd8_00 && code < 0xe0_00) {
|
|
227
|
-
|
|
228
|
-
|
|
202
|
+
// Can't be a valid trail as we already processed that below
|
|
203
|
+
|
|
204
|
+
if (code > 0xdb_ff || i + 1 >= length) {
|
|
205
|
+
// An unexpected trail or a lead at the very end of input
|
|
229
206
|
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
230
207
|
bytes[p++] = 0xef
|
|
231
208
|
bytes[p++] = 0xbf
|
|
232
209
|
bytes[p++] = 0xbd
|
|
233
|
-
|
|
234
|
-
// code is still processed as a new lead
|
|
210
|
+
continue
|
|
235
211
|
}
|
|
236
212
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
lead
|
|
248
|
-
|
|
213
|
+
const next = string.charCodeAt(i + 1) // Process valid pairs immediately
|
|
214
|
+
if (next >= 0xdc_00 && next < 0xe0_00) {
|
|
215
|
+
// here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
|
|
216
|
+
const codePoint = (((code - 0xd8_00) << 10) | (next - 0xdc_00)) + 0x1_00_00
|
|
217
|
+
bytes[p++] = (codePoint >> 18) | 0xf0
|
|
218
|
+
bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
|
|
219
|
+
bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
|
|
220
|
+
bytes[p++] = (codePoint & 0x3f) | 0x80
|
|
221
|
+
i++ // consume next
|
|
222
|
+
} else {
|
|
223
|
+
// Next is not a trail, leave next unconsumed but process unmatched lead error
|
|
224
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
225
|
+
bytes[p++] = 0xef
|
|
226
|
+
bytes[p++] = 0xbf
|
|
227
|
+
bytes[p++] = 0xbd
|
|
249
228
|
}
|
|
250
229
|
|
|
251
|
-
// here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
|
|
252
|
-
const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
|
|
253
|
-
bytes[p++] = (codePoint >> 18) | 0xf0
|
|
254
|
-
bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
|
|
255
|
-
bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
|
|
256
|
-
bytes[p++] = (codePoint & 0x3f) | 0x80
|
|
257
|
-
lead = null
|
|
258
230
|
continue
|
|
259
|
-
} else if (lead) {
|
|
260
|
-
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
261
|
-
bytes[p++] = 0xef
|
|
262
|
-
bytes[p++] = 0xbf
|
|
263
|
-
bytes[p++] = 0xbd
|
|
264
|
-
lead = null
|
|
265
|
-
// code is still processed
|
|
266
231
|
}
|
|
267
232
|
|
|
268
233
|
// We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes
|
package/hex.js
CHANGED
|
@@ -6,6 +6,8 @@ const { Buffer } = globalThis // Buffer is optional, only used when native
|
|
|
6
6
|
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
7
7
|
const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
|
|
8
8
|
|
|
9
|
+
const { E_HEX } = js
|
|
10
|
+
|
|
9
11
|
export function toHex(arr) {
|
|
10
12
|
assertUint8(arr)
|
|
11
13
|
if (arr.length === 0) return ''
|
|
@@ -18,4 +20,12 @@ export function toHex(arr) {
|
|
|
18
20
|
// Unlike Buffer.from(), throws on invalid input
|
|
19
21
|
export const fromHex = Uint8Array.fromHex
|
|
20
22
|
? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
|
|
21
|
-
:
|
|
23
|
+
: haveNativeBuffer
|
|
24
|
+
? (str, format = 'uint8') => {
|
|
25
|
+
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
26
|
+
if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
|
|
27
|
+
const buf = Buffer.from(str, 'hex') // will stop on first non-hex character, so we can just validate length
|
|
28
|
+
if (buf.length * 2 !== str.length) throw new SyntaxError(E_HEX)
|
|
29
|
+
return typedView(buf, format)
|
|
30
|
+
}
|
|
31
|
+
: (str, format = 'uint8') => typedView(js.fromHex(str), format)
|
package/package.json
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/bytes",
|
|
3
|
-
"version": "1.0.0-rc.
|
|
3
|
+
"version": "1.0.0-rc.6",
|
|
4
4
|
"description": "Various operations on Uint8Array data",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"lint": "eslint .",
|
|
7
|
-
"test:v8": "npm run test:d8 --",
|
|
8
7
|
"test:javascriptcore": "npm run test:jsc --",
|
|
9
|
-
"test:
|
|
8
|
+
"test:v8": "exodus-test --engine=v8:bundle",
|
|
10
9
|
"test:jsc": "exodus-test --engine=jsc:bundle",
|
|
11
10
|
"test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
|
|
12
11
|
"test:hermes": "exodus-test --engine=hermes:bundle",
|
|
@@ -40,6 +39,7 @@
|
|
|
40
39
|
"type": "module",
|
|
41
40
|
"files": [
|
|
42
41
|
"/fallback/_utils.js",
|
|
42
|
+
"/fallback/latin1.js",
|
|
43
43
|
"/fallback/base32.js",
|
|
44
44
|
"/fallback/base64.js",
|
|
45
45
|
"/fallback/hex.js",
|
|
@@ -47,6 +47,8 @@
|
|
|
47
47
|
"/array.js",
|
|
48
48
|
"/assert.js",
|
|
49
49
|
"/base32.js",
|
|
50
|
+
"/base58.js",
|
|
51
|
+
"/base58check.js",
|
|
50
52
|
"/base64.js",
|
|
51
53
|
"/hex.js",
|
|
52
54
|
"/utf8.js"
|
|
@@ -54,26 +56,47 @@
|
|
|
54
56
|
"exports": {
|
|
55
57
|
"./array.js": "./array.js",
|
|
56
58
|
"./base32.js": "./base32.js",
|
|
59
|
+
"./base58.js": "./base58.js",
|
|
60
|
+
"./base58check.js": "./base58check.js",
|
|
57
61
|
"./base64.js": "./base64.js",
|
|
58
62
|
"./hex.js": "./hex.js",
|
|
59
63
|
"./utf8.js": "./utf8.js"
|
|
60
64
|
},
|
|
65
|
+
"peerDependencies": {
|
|
66
|
+
"@exodus/crypto": "^1.0.0-rc.4"
|
|
67
|
+
},
|
|
68
|
+
"peerDependenciesMeta": {
|
|
69
|
+
"@exodus/crypto": {
|
|
70
|
+
"optional": true
|
|
71
|
+
}
|
|
72
|
+
},
|
|
61
73
|
"devDependencies": {
|
|
74
|
+
"@ethersproject/strings": "^5.8.0",
|
|
75
|
+
"@exodus/crypto": "1.0.0-rc.29",
|
|
62
76
|
"@exodus/eslint-config": "^5.24.0",
|
|
63
77
|
"@exodus/prettier": "^1.0.0",
|
|
64
|
-
"@exodus/test": "^1.0.0-rc.
|
|
78
|
+
"@exodus/test": "^1.0.0-rc.108",
|
|
79
|
+
"@noble/hashes": "^2.0.1",
|
|
65
80
|
"@scure/base": "^1.2.6",
|
|
66
|
-
"@
|
|
81
|
+
"@stablelib/base64": "^2.0.1",
|
|
82
|
+
"@stablelib/hex": "^2.0.1",
|
|
83
|
+
"@types/node": "^22.13.0",
|
|
67
84
|
"base-x": "^5.0.1",
|
|
68
85
|
"base32.js": "^0.1.0",
|
|
69
86
|
"base64-js": "^1.5.1",
|
|
87
|
+
"bs58": "^6.0.0",
|
|
88
|
+
"bs58check": "^4.0.0",
|
|
89
|
+
"bstring": "^0.3.9",
|
|
70
90
|
"buffer": "^6.0.3",
|
|
71
91
|
"electron": "36.5.0",
|
|
72
92
|
"eslint": "^8.44.0",
|
|
73
93
|
"fast-base64-decode": "^2.0.0",
|
|
94
|
+
"fast-base64-encode": "^1.0.0",
|
|
95
|
+
"hextreme": "^1.0.7",
|
|
74
96
|
"hi-base32": "^0.5.1",
|
|
75
97
|
"jsvu": "^3.0.0",
|
|
76
|
-
"text-encoding": "^0.7.0"
|
|
98
|
+
"text-encoding": "^0.7.0",
|
|
99
|
+
"typescript": "^5.9.3"
|
|
77
100
|
},
|
|
78
101
|
"prettier": "@exodus/prettier",
|
|
79
102
|
"packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
|
package/utf8.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { assertUint8 } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
3
|
import * as js from './fallback/utf8.js'
|
|
4
|
+
import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
|
|
4
5
|
|
|
5
6
|
const { Buffer, TextEncoder, TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
|
|
6
7
|
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
@@ -11,13 +12,20 @@ const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
|
|
|
11
12
|
// We don't want to strip anything unexpectedly
|
|
12
13
|
const decoderFatal = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true, fatal: true }) : null
|
|
13
14
|
const decoderLoose = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true }) : null
|
|
15
|
+
const { isWellFormed } = String.prototype
|
|
14
16
|
|
|
15
17
|
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
16
18
|
|
|
17
19
|
const shouldUseEscapePath = Boolean(globalThis.HermesInternal) // faster only on Hermes, js path beats it on normal engines
|
|
18
20
|
|
|
19
21
|
function deLoose(str, loose, res) {
|
|
20
|
-
if (loose) return res
|
|
22
|
+
if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine
|
|
23
|
+
if (isWellFormed) {
|
|
24
|
+
// We have a fast native method
|
|
25
|
+
if (isWellFormed.call(str)) return res
|
|
26
|
+
throw new TypeError(E_STRICT_UNICODE)
|
|
27
|
+
}
|
|
28
|
+
|
|
21
29
|
// Recheck if the string was encoded correctly
|
|
22
30
|
let start = 0
|
|
23
31
|
const last = res.length - 2
|
|
@@ -28,7 +36,7 @@ function deLoose(str, loose, res) {
|
|
|
28
36
|
start = pos + 1
|
|
29
37
|
if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
|
|
30
38
|
// Found a replacement char in output, need to recheck if we encoded the input correctly
|
|
31
|
-
|
|
39
|
+
if (str !== decode(res)) throw new TypeError(E_STRICT_UNICODE)
|
|
32
40
|
return res
|
|
33
41
|
}
|
|
34
42
|
}
|
|
@@ -37,71 +45,34 @@ function deLoose(str, loose, res) {
|
|
|
37
45
|
}
|
|
38
46
|
|
|
39
47
|
function encode(str, loose = false) {
|
|
40
|
-
|
|
48
|
+
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
41
49
|
if (haveNativeBuffer) return deLoose(str, loose, Buffer.from(str)) // faster on ascii on Node.js
|
|
42
50
|
if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str)) // Node.js, browsers, and Hermes
|
|
43
51
|
// No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
|
|
44
52
|
return js.encode(str, loose)
|
|
45
53
|
}
|
|
46
54
|
|
|
47
|
-
let escapes
|
|
48
|
-
|
|
49
|
-
function toEscapesPart(arr, start, end) {
|
|
50
|
-
let o = ''
|
|
51
|
-
let i = start
|
|
52
|
-
const last3 = end - 3
|
|
53
|
-
// Unrolled loop is faster
|
|
54
|
-
while (i < last3) {
|
|
55
|
-
const a = arr[i++]
|
|
56
|
-
const b = arr[i++]
|
|
57
|
-
const c = arr[i++]
|
|
58
|
-
const d = arr[i++]
|
|
59
|
-
o += escapes[a]
|
|
60
|
-
o += escapes[b]
|
|
61
|
-
o += escapes[c]
|
|
62
|
-
o += escapes[d]
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
while (i < end) o += escapes[arr[i++]]
|
|
66
|
-
return o
|
|
67
|
-
}
|
|
68
|
-
|
|
69
55
|
function decode(arr, loose = false) {
|
|
70
56
|
assertUint8(arr)
|
|
71
57
|
if (haveDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
|
|
72
58
|
// No reason to use native Buffer: it's not faster than TextDecoder, needs rechecks in non-loose mode, and Node.js has TextDecoder
|
|
73
59
|
|
|
74
|
-
//
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
const length = arr.length
|
|
78
|
-
let o
|
|
79
|
-
if (length > 30_000) {
|
|
80
|
-
// Limit concatenation to avoid excessive GC
|
|
81
|
-
// TODO: recheck thresholds on Hermes (taken from hex)
|
|
82
|
-
const concat = []
|
|
83
|
-
for (let i = 0; i < length; ) {
|
|
84
|
-
const step = i + 500
|
|
85
|
-
const end = step > length ? length : step
|
|
86
|
-
concat.push(toEscapesPart(arr, i, end))
|
|
87
|
-
i = end
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
o = concat.join('')
|
|
91
|
-
concat.length = 0
|
|
92
|
-
} else {
|
|
93
|
-
o = toEscapesPart(arr, 0, length)
|
|
94
|
-
}
|
|
60
|
+
// Fast path for ASCII prefix, this is faster than all alternatives below
|
|
61
|
+
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr))
|
|
62
|
+
if (prefix.length === arr.length) return prefix
|
|
95
63
|
|
|
64
|
+
// This codepath gives a ~3x perf boost on Hermes
|
|
65
|
+
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
|
66
|
+
const o = escape(decodeLatin1(arr, prefix.length, arr.length))
|
|
96
67
|
try {
|
|
97
|
-
return decodeURIComponent(o) //
|
|
68
|
+
return prefix + decodeURIComponent(o) // Latin1 to utf8
|
|
98
69
|
} catch {
|
|
99
70
|
if (!loose) throw new TypeError(E_STRICT)
|
|
100
71
|
// Ok, we have to use manual implementation for loose decoder
|
|
101
72
|
}
|
|
102
73
|
}
|
|
103
74
|
|
|
104
|
-
return js.decode(arr, loose)
|
|
75
|
+
return prefix + js.decode(arr, loose, prefix.length)
|
|
105
76
|
}
|
|
106
77
|
|
|
107
78
|
export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
|