@exodus/bytes 1.11.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -35
- package/base58.js +3 -3
- package/base64.js +7 -6
- package/bech32.js +3 -3
- package/encoding-browser.browser.js +43 -17
- package/fallback/_utils.js +7 -123
- package/fallback/base32.js +3 -3
- package/fallback/base58check.js +3 -3
- package/fallback/base64.js +2 -3
- package/fallback/encoding.api.js +0 -43
- package/fallback/encoding.js +41 -2
- package/fallback/encoding.labels.js +20 -16
- package/fallback/hex.js +3 -4
- package/fallback/latin1.js +6 -6
- package/fallback/multi-byte.table.js +17 -28
- package/fallback/percent.js +1 -1
- package/fallback/platform.browser.js +31 -0
- package/fallback/platform.js +2 -0
- package/fallback/platform.native.js +97 -0
- package/fallback/single-byte.encodings.js +40 -49
- package/fallback/single-byte.js +4 -4
- package/fallback/utf16.js +69 -2
- package/fallback/utf8.auto.browser.js +2 -0
- package/fallback/utf8.auto.js +1 -0
- package/fallback/utf8.auto.native.js +1 -0
- package/fallback/utf8.js +25 -3
- package/hex.js +6 -8
- package/hex.node.js +2 -3
- package/multi-byte.js +2 -2
- package/multi-byte.node.js +3 -3
- package/package.json +32 -9
- package/single-byte.js +6 -6
- package/single-byte.node.js +4 -4
- package/utf16.browser.js +8 -0
- package/utf16.js +1 -75
- package/utf16.native.js +22 -0
- package/utf16.node.js +5 -20
- package/utf8.js +9 -28
- package/utf8.node.js +3 -4
- package/whatwg.js +6 -2
package/fallback/utf16.js
CHANGED
|
@@ -1,14 +1,81 @@
|
|
|
1
1
|
import { decodeUCS2, encodeCharcodes } from './latin1.js'
|
|
2
|
-
import {
|
|
2
|
+
import { assertU8, E_STRING, E_STRICT_UNICODE } from './_utils.js'
|
|
3
|
+
import { nativeDecoder, isLE } from './platform.js'
|
|
3
4
|
|
|
4
5
|
export const E_STRICT = 'Input is not well-formed utf16'
|
|
5
|
-
|
|
6
|
+
const isWellFormedStr = String.prototype.isWellFormed
|
|
7
|
+
const toWellFormedStr = /* @__PURE__ */ (() => String.prototype.toWellFormed)()
|
|
6
8
|
|
|
7
9
|
const replacementCodepoint = 0xff_fd
|
|
8
10
|
const replacementCodepointSwapped = 0xfd_ff
|
|
9
11
|
|
|
10
12
|
const to16 = (a) => new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2) // Requires checked length and alignment!
|
|
11
13
|
|
|
14
|
+
export function encodeApi(str, loose, format) {
|
|
15
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
16
|
+
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
|
17
|
+
throw new TypeError('Unknown format')
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// On v8 and SpiderMonkey, check via isWellFormed is faster than js
|
|
21
|
+
// On JSC, check during loop is faster than isWellFormed
|
|
22
|
+
// If isWellFormed is available, we skip check during decoding and recheck after
|
|
23
|
+
// If isWellFormed is unavailable, we check in js during decoding
|
|
24
|
+
if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT_UNICODE)
|
|
25
|
+
const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
|
|
26
|
+
const u16 = encode(str, loose, !loose && isWellFormedStr, shouldSwap)
|
|
27
|
+
|
|
28
|
+
// Bytes are already swapped and format is already checked, we need to just cast the view
|
|
29
|
+
return format === 'uint16' ? u16 : new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const fatalLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true, fatal: true }) : null
|
|
33
|
+
const looseLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true }) : null
|
|
34
|
+
const fatalBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true, fatal: true }) : null
|
|
35
|
+
const looseBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true }) : null
|
|
36
|
+
|
|
37
|
+
export function decodeApiDecoders(input, loose, format) {
|
|
38
|
+
if (format === 'uint16') {
|
|
39
|
+
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
40
|
+
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
|
41
|
+
assertU8(input)
|
|
42
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
43
|
+
} else {
|
|
44
|
+
throw new TypeError('Unknown format')
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const le = format === 'uint8-le' || (format === 'uint16' && isLE)
|
|
48
|
+
return (le ? (loose ? looseLE : fatalLE) : loose ? looseBE : fatalBE).decode(input)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function decodeApiJS(input, loose, format) {
|
|
52
|
+
let u16
|
|
53
|
+
switch (format) {
|
|
54
|
+
case 'uint16':
|
|
55
|
+
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
56
|
+
u16 = input
|
|
57
|
+
break
|
|
58
|
+
case 'uint8-le':
|
|
59
|
+
assertU8(input)
|
|
60
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
61
|
+
u16 = to16input(input, true)
|
|
62
|
+
break
|
|
63
|
+
case 'uint8-be':
|
|
64
|
+
assertU8(input)
|
|
65
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
66
|
+
u16 = to16input(input, false)
|
|
67
|
+
break
|
|
68
|
+
default:
|
|
69
|
+
throw new TypeError('Unknown format')
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const str = decode(u16, loose, (!loose && isWellFormedStr) || (loose && toWellFormedStr))
|
|
73
|
+
if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT)
|
|
74
|
+
if (loose && toWellFormedStr) return toWellFormedStr.call(str)
|
|
75
|
+
|
|
76
|
+
return str
|
|
77
|
+
}
|
|
78
|
+
|
|
12
79
|
export function to16input(u8, le) {
|
|
13
80
|
// Assume even number of bytes
|
|
14
81
|
if (le === isLE) return to16(u8.byteOffset % 2 === 0 ? u8 : Uint8Array.from(u8))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { decodeFast, encode } from './utf8.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { decodeFast, encode } from './utf8.js'
|
package/fallback/utf8.js
CHANGED
|
@@ -1,9 +1,31 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { E_STRICT_UNICODE } from './_utils.js'
|
|
2
|
+
import { isHermes } from './platform.js'
|
|
3
|
+
import { asciiPrefix, decodeLatin1, encodeAsciiPrefix } from './latin1.js'
|
|
2
4
|
|
|
3
5
|
export const E_STRICT = 'Input is not well-formed utf8'
|
|
4
|
-
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
|
|
5
6
|
|
|
6
7
|
const replacementPoint = 0xff_fd
|
|
8
|
+
const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
|
|
9
|
+
const { decodeURIComponent, escape } = globalThis
|
|
10
|
+
|
|
11
|
+
export function decodeFast(arr, loose) {
|
|
12
|
+
// Fast path for ASCII prefix, this is faster than all alternatives below
|
|
13
|
+
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
|
|
14
|
+
if (prefix.length === arr.length) return prefix
|
|
15
|
+
|
|
16
|
+
// This codepath gives a ~3x perf boost on Hermes
|
|
17
|
+
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
|
18
|
+
const o = escape(decodeLatin1(arr, prefix.length, arr.length))
|
|
19
|
+
try {
|
|
20
|
+
return prefix + decodeURIComponent(o) // Latin1 to utf8
|
|
21
|
+
} catch {
|
|
22
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
23
|
+
// Ok, we have to use manual implementation for loose decoder
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return prefix + decode(arr, loose, prefix.length)
|
|
28
|
+
}
|
|
7
29
|
|
|
8
30
|
// https://encoding.spec.whatwg.org/#utf-8-decoder
|
|
9
31
|
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
|
|
@@ -27,7 +49,7 @@ export function decode(arr, loose, start = 0) {
|
|
|
27
49
|
const byte = arr[i]
|
|
28
50
|
if (byte < 0x80) {
|
|
29
51
|
tmp[ti++] = byte
|
|
30
|
-
// ascii fast path is in
|
|
52
|
+
// ascii fast path is in decodeFast(), this is called only on non-ascii input
|
|
31
53
|
// so we don't unroll this anymore
|
|
32
54
|
} else if (byte < 0xc2) {
|
|
33
55
|
if (!loose) throw new TypeError(E_STRICT)
|
package/hex.js
CHANGED
|
@@ -1,19 +1,17 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { typedView } from './array.js'
|
|
3
|
-
import {
|
|
2
|
+
import { assertU8 } from './fallback/_utils.js'
|
|
4
3
|
import * as js from './fallback/hex.js'
|
|
5
4
|
|
|
6
5
|
const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
|
|
7
6
|
|
|
8
7
|
export function toHex(arr) {
|
|
9
|
-
|
|
8
|
+
assertU8(arr)
|
|
10
9
|
if (arr.length === 0) return ''
|
|
11
|
-
if (
|
|
10
|
+
if (webHex && arr.toHex === webHex) return arr.toHex()
|
|
12
11
|
return js.toHex(arr)
|
|
13
12
|
}
|
|
14
13
|
|
|
15
14
|
// Unlike Buffer.from(), throws on invalid input
|
|
16
|
-
export const fromHex =
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
: (str, format = 'uint8') => typedView(js.fromHex(str), format)
|
|
15
|
+
export const fromHex = Uint8Array.fromHex
|
|
16
|
+
? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
|
|
17
|
+
: (str, format = 'uint8') => typedView(js.fromHex(str), format)
|
package/hex.node.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { typedView } from './array.js'
|
|
3
|
-
import { E_STRING } from './fallback/_utils.js'
|
|
2
|
+
import { assertU8, E_STRING } from './fallback/_utils.js'
|
|
4
3
|
import { E_HEX } from './fallback/hex.js'
|
|
5
4
|
|
|
6
5
|
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
@@ -9,7 +8,7 @@ const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
|
|
|
9
8
|
const denoBug = Buffer.from('ag', 'hex').length > 0
|
|
10
9
|
|
|
11
10
|
export function toHex(arr) {
|
|
12
|
-
|
|
11
|
+
assertU8(arr)
|
|
13
12
|
if (arr.length === 0) return ''
|
|
14
13
|
if (webHex && arr.toHex === webHex) return arr.toHex()
|
|
15
14
|
if (arr.constructor === Buffer && Buffer.isBuffer(arr)) return arr.hexSlice(0, arr.byteLength)
|
package/multi-byte.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { assertU8 } from './fallback/_utils.js'
|
|
2
2
|
import { multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
|
|
3
3
|
|
|
4
4
|
export function createMultibyteDecoder(encoding, loose = false) {
|
|
5
5
|
const jsDecoder = multibyteDecoder(encoding, loose) // asserts
|
|
6
6
|
let streaming = false
|
|
7
7
|
return (arr, stream = false) => {
|
|
8
|
-
|
|
8
|
+
assertU8(arr)
|
|
9
9
|
if (!streaming && arr.byteLength === 0) return ''
|
|
10
10
|
streaming = stream
|
|
11
11
|
return jsDecoder(arr, stream)
|
package/multi-byte.node.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { isDeno
|
|
1
|
+
import { assertU8, toBuf } from './fallback/_utils.js'
|
|
2
|
+
import { isDeno } from './fallback/platform.js'
|
|
3
3
|
import { isAsciiSuperset, multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
|
|
4
4
|
import { isAscii } from 'node:buffer'
|
|
5
5
|
|
|
@@ -8,7 +8,7 @@ export function createMultibyteDecoder(encoding, loose = false) {
|
|
|
8
8
|
let streaming = false
|
|
9
9
|
const asciiSuperset = isAsciiSuperset(encoding)
|
|
10
10
|
return (arr, stream = false) => {
|
|
11
|
-
|
|
11
|
+
assertU8(arr)
|
|
12
12
|
if (!streaming) {
|
|
13
13
|
if (arr.byteLength === 0) return ''
|
|
14
14
|
if (asciiSuperset && isAscii(arr)) {
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/bytes",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.13.0",
|
|
4
4
|
"description": "Various operations on Uint8Array data",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"encoding",
|
|
7
|
-
"
|
|
8
|
-
"
|
|
9
|
-
"
|
|
7
|
+
"Uint8Array",
|
|
8
|
+
"TextDecoder",
|
|
9
|
+
"TextEncoder",
|
|
10
10
|
"utf8",
|
|
11
11
|
"utf16",
|
|
12
12
|
"hex",
|
|
@@ -27,10 +27,14 @@
|
|
|
27
27
|
"test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
|
|
28
28
|
"test:hermes": "exodus-test --engine=hermes:bundle",
|
|
29
29
|
"test:quickjs": "exodus-test --engine=quickjs:bundle",
|
|
30
|
-
"test:xs": "
|
|
30
|
+
"test:xs": "exodus-test --engine=xs:bundle",
|
|
31
31
|
"test:engine262": "exodus-test --engine=engine262:bundle",
|
|
32
|
+
"test:graaljs": "exodus-test --engine=graaljs:bundle",
|
|
33
|
+
"test:escargot": "exodus-test --engine=escargot:bundle",
|
|
34
|
+
"test:boa": "exodus-test --engine=boa:bundle",
|
|
32
35
|
"test:deno": "exodus-test --engine=deno:pure",
|
|
33
36
|
"test:bun": "exodus-test --engine=bun:pure",
|
|
37
|
+
"test:workerd": "exodus-test --engine=workerd:bundle",
|
|
34
38
|
"test:electron:bundle": "exodus-test --engine=electron:bundle",
|
|
35
39
|
"test:electron:as-node": "exodus-test --engine=electron-as-node:test",
|
|
36
40
|
"test:chrome:puppeteer": "exodus-test --engine=chrome:puppeteer",
|
|
@@ -72,6 +76,9 @@
|
|
|
72
76
|
"/fallback/hex.js",
|
|
73
77
|
"/fallback/latin1.js",
|
|
74
78
|
"/fallback/percent.js",
|
|
79
|
+
"/fallback/platform.js",
|
|
80
|
+
"/fallback/platform.browser.js",
|
|
81
|
+
"/fallback/platform.native.js",
|
|
75
82
|
"/fallback/multi-byte.encodings.cjs",
|
|
76
83
|
"/fallback/multi-byte.encodings.json",
|
|
77
84
|
"/fallback/multi-byte.js",
|
|
@@ -80,6 +87,9 @@
|
|
|
80
87
|
"/fallback/single-byte.js",
|
|
81
88
|
"/fallback/utf16.js",
|
|
82
89
|
"/fallback/utf8.js",
|
|
90
|
+
"/fallback/utf8.auto.js",
|
|
91
|
+
"/fallback/utf8.auto.browser.js",
|
|
92
|
+
"/fallback/utf8.auto.native.js",
|
|
83
93
|
"/array.js",
|
|
84
94
|
"/array.d.ts",
|
|
85
95
|
"/assert.js",
|
|
@@ -117,6 +127,8 @@
|
|
|
117
127
|
"/single-byte.node.js",
|
|
118
128
|
"/utf16.js",
|
|
119
129
|
"/utf16.d.ts",
|
|
130
|
+
"/utf16.browser.js",
|
|
131
|
+
"/utf16.native.js",
|
|
120
132
|
"/utf16.node.js",
|
|
121
133
|
"/utf8.js",
|
|
122
134
|
"/utf8.d.ts",
|
|
@@ -196,6 +208,8 @@
|
|
|
196
208
|
"./utf16.js": {
|
|
197
209
|
"types": "./utf16.d.ts",
|
|
198
210
|
"node": "./utf16.node.js",
|
|
211
|
+
"react-native": "./utf16.native.js",
|
|
212
|
+
"browser": "./utf16.browser.js",
|
|
199
213
|
"default": "./utf16.js"
|
|
200
214
|
},
|
|
201
215
|
"./utf8.js": {
|
|
@@ -212,8 +226,16 @@
|
|
|
212
226
|
"default": "./wif.js"
|
|
213
227
|
}
|
|
214
228
|
},
|
|
229
|
+
"browser": {
|
|
230
|
+
"./utf16.js": "./utf16.browser.js",
|
|
231
|
+
"./fallback/platform.js": "./fallback/platform.browser.js",
|
|
232
|
+
"./fallback/utf8.auto.js": "./fallback/utf8.auto.browser.js"
|
|
233
|
+
},
|
|
215
234
|
"react-native": {
|
|
216
|
-
"./encoding-browser.js": "./encoding-browser.native.js"
|
|
235
|
+
"./encoding-browser.js": "./encoding-browser.native.js",
|
|
236
|
+
"./utf16.js": "./utf16.native.js",
|
|
237
|
+
"./fallback/platform.js": "./fallback/platform.native.js",
|
|
238
|
+
"./fallback/utf8.auto.js": "./fallback/utf8.auto.native.js"
|
|
217
239
|
},
|
|
218
240
|
"peerDependencies": {
|
|
219
241
|
"@noble/hashes": "^1.8.0 || ^2.0.0"
|
|
@@ -228,7 +250,7 @@
|
|
|
228
250
|
"@exodus/crypto": "^1.0.0-rc.30",
|
|
229
251
|
"@exodus/eslint-config": "^5.24.0",
|
|
230
252
|
"@exodus/prettier": "^1.0.0",
|
|
231
|
-
"@exodus/test": "
|
|
253
|
+
"@exodus/test": "1.0.0-rc.115",
|
|
232
254
|
"@hexagon/base64": "^2.0.4",
|
|
233
255
|
"@noble/hashes": "^2.0.1",
|
|
234
256
|
"@oslojs/encoding": "^1.1.0",
|
|
@@ -250,7 +272,7 @@
|
|
|
250
272
|
"decode-utf8": "^1.0.1",
|
|
251
273
|
"electron": "36.5.0",
|
|
252
274
|
"encode-utf8": "^2.0.0",
|
|
253
|
-
"esbuild": "^0.27.
|
|
275
|
+
"esbuild": "^0.27.3",
|
|
254
276
|
"eslint": "^8.44.0",
|
|
255
277
|
"fast-base64-decode": "^2.0.0",
|
|
256
278
|
"fast-base64-encode": "^1.0.0",
|
|
@@ -265,7 +287,8 @@
|
|
|
265
287
|
"uint8array-tools": "^0.0.9",
|
|
266
288
|
"utf8": "^3.0.0",
|
|
267
289
|
"web-streams-polyfill": "^4.2.0",
|
|
268
|
-
"wif": "^5.0.0"
|
|
290
|
+
"wif": "^5.0.0",
|
|
291
|
+
"workerd": "^1.20260206.0"
|
|
269
292
|
},
|
|
270
293
|
"prettier": "@exodus/prettier",
|
|
271
294
|
"packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
|
package/single-byte.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { assertU8, E_STRING } from './fallback/_utils.js'
|
|
2
|
+
import { nativeDecoderLatin1, nativeEncoder } from './fallback/platform.js'
|
|
3
3
|
import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
|
|
4
4
|
import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
5
5
|
|
|
@@ -39,12 +39,12 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
39
39
|
if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
|
|
40
40
|
assertEncoding(encoding)
|
|
41
41
|
|
|
42
|
-
if (
|
|
42
|
+
if (nativeDecoderLatin1 && shouldUseNative(encoding)) {
|
|
43
43
|
// In try, as not all encodings might be implemented in all engines which have native TextDecoder
|
|
44
44
|
try {
|
|
45
45
|
const decoder = new TextDecoder(encoding, { fatal: !loose })
|
|
46
46
|
return (arr) => {
|
|
47
|
-
|
|
47
|
+
assertU8(arr)
|
|
48
48
|
if (arr.byteLength === 0) return ''
|
|
49
49
|
return decoder.decode(arr)
|
|
50
50
|
}
|
|
@@ -53,7 +53,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
53
53
|
|
|
54
54
|
const jsDecoder = encodingDecoder(encoding)
|
|
55
55
|
return (arr) => {
|
|
56
|
-
|
|
56
|
+
assertU8(arr)
|
|
57
57
|
if (arr.byteLength === 0) return ''
|
|
58
58
|
return jsDecoder(arr, loose)
|
|
59
59
|
}
|
|
@@ -88,7 +88,7 @@ function encode(s, m) {
|
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
// fromBase64+btoa path is faster on everything where fromBase64 is fast
|
|
91
|
-
const useLatin1btoa = Uint8Array.fromBase64 && btoa
|
|
91
|
+
const useLatin1btoa = Uint8Array.fromBase64 && btoa
|
|
92
92
|
|
|
93
93
|
export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
94
94
|
// TODO: replacement, truncate (replacement will need varying length)
|
package/single-byte.node.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { isAscii } from 'node:buffer'
|
|
3
|
-
import {
|
|
2
|
+
import { assertU8, toBuf, E_STRING } from './fallback/_utils.js'
|
|
3
|
+
import { isDeno, isLE } from './fallback/platform.js'
|
|
4
4
|
import { asciiPrefix } from './fallback/latin1.js'
|
|
5
5
|
import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
6
6
|
|
|
@@ -26,7 +26,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
26
26
|
if (isDeno) {
|
|
27
27
|
const jsDecoder = encodingDecoder(encoding) // asserts
|
|
28
28
|
return (arr) => {
|
|
29
|
-
|
|
29
|
+
assertU8(arr)
|
|
30
30
|
if (arr.byteLength === 0) return ''
|
|
31
31
|
if (isAscii(arr)) return toBuf(arr).toString()
|
|
32
32
|
return jsDecoder(arr, loose) // somewhy faster on Deno anyway, TODO: optimize?
|
|
@@ -37,7 +37,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
37
37
|
const latin1path = encoding === 'windows-1252'
|
|
38
38
|
const { incomplete, mapper } = encodingMapper(encoding) // asserts
|
|
39
39
|
return (arr) => {
|
|
40
|
-
|
|
40
|
+
assertU8(arr)
|
|
41
41
|
if (arr.byteLength === 0) return ''
|
|
42
42
|
if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
|
|
43
43
|
|
package/utf16.browser.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
// We trust browsers to always have correct TextDecoder for utf-16le/utf-16be with ignoreBOM without streaming
|
|
2
|
+
|
|
3
|
+
import { encodeApi, decodeApiDecoders } from './fallback/utf16.js'
|
|
4
|
+
|
|
5
|
+
export const utf16fromString = (str, format = 'uint16') => encodeApi(str, false, format)
|
|
6
|
+
export const utf16fromStringLoose = (str, format = 'uint16') => encodeApi(str, true, format)
|
|
7
|
+
export const utf16toString = (arr, format = 'uint16') => decodeApiDecoders(arr, false, format)
|
|
8
|
+
export const utf16toStringLoose = (arr, format = 'uint16') => decodeApiDecoders(arr, true, format)
|
package/utf16.js
CHANGED
|
@@ -1,75 +1 @@
|
|
|
1
|
-
|
|
2
|
-
import { canDecoders, isLE, E_STRING } from './fallback/_utils.js'
|
|
3
|
-
|
|
4
|
-
const { TextDecoder } = globalThis // Buffer is optional
|
|
5
|
-
const ignoreBOM = true
|
|
6
|
-
const decoderFatalLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM, fatal: true }) : null
|
|
7
|
-
const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM }) : null
|
|
8
|
-
const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
|
|
9
|
-
const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
|
|
10
|
-
const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
|
|
11
|
-
const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE
|
|
12
|
-
const { isWellFormed, toWellFormed } = String.prototype
|
|
13
|
-
|
|
14
|
-
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
15
|
-
|
|
16
|
-
// Unlike utf8, operates on Uint16Arrays by default
|
|
17
|
-
|
|
18
|
-
const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
|
|
19
|
-
|
|
20
|
-
function encode(str, loose = false, format = 'uint16') {
|
|
21
|
-
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
22
|
-
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
|
23
|
-
throw new TypeError('Unknown format')
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
|
|
27
|
-
|
|
28
|
-
// On v8 and SpiderMonkey, check via isWellFormed is faster than js
|
|
29
|
-
// On JSC, check during loop is faster than isWellFormed
|
|
30
|
-
// If isWellFormed is available, we skip check during decoding and recheck after
|
|
31
|
-
// If isWellFormed is unavailable, we check in js during decoding
|
|
32
|
-
if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT_UNICODE)
|
|
33
|
-
const u16 = js.encode(str, loose, !loose && isWellFormed, shouldSwap)
|
|
34
|
-
|
|
35
|
-
if (format === 'uint8-le' || format === 'uint8-be') return to8(u16) // Already swapped
|
|
36
|
-
if (format === 'uint16') return u16
|
|
37
|
-
/* c8 ignore next */
|
|
38
|
-
throw new Error('Unreachable')
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
function decode(input, loose = false, format = 'uint16') {
|
|
42
|
-
let u16
|
|
43
|
-
switch (format) {
|
|
44
|
-
case 'uint16':
|
|
45
|
-
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
46
|
-
if (canDecoders) return loose ? decoderLoose16.decode(input) : decoderFatal16.decode(input)
|
|
47
|
-
u16 = input
|
|
48
|
-
break
|
|
49
|
-
case 'uint8-le':
|
|
50
|
-
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
51
|
-
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
52
|
-
if (canDecoders) return loose ? decoderLooseLE.decode(input) : decoderFatalLE.decode(input)
|
|
53
|
-
u16 = js.to16input(input, true)
|
|
54
|
-
break
|
|
55
|
-
case 'uint8-be':
|
|
56
|
-
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
57
|
-
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
58
|
-
if (canDecoders) return loose ? decoderLooseBE.decode(input) : decoderFatalBE.decode(input)
|
|
59
|
-
u16 = js.to16input(input, false)
|
|
60
|
-
break
|
|
61
|
-
default:
|
|
62
|
-
throw new TypeError('Unknown format')
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
const str = js.decode(u16, loose, (!loose && isWellFormed) || (loose && toWellFormed))
|
|
66
|
-
if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT)
|
|
67
|
-
if (loose && toWellFormed) return toWellFormed.call(str)
|
|
68
|
-
|
|
69
|
-
return str
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
|
|
73
|
-
export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
|
|
74
|
-
export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
|
|
75
|
-
export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
|
|
1
|
+
export * from './utf16.native.js'
|
package/utf16.native.js
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { encodeApi, decodeApiDecoders, decodeApiJS } from './fallback/utf16.js'
|
|
2
|
+
import { nativeDecoder } from './fallback/platform.native.js'
|
|
3
|
+
|
|
4
|
+
function checkDecoders() {
|
|
5
|
+
// Not all barebone engines with TextDecoder support something except utf-8
|
|
6
|
+
// Also workerd specifically has a broken utf-16le implementation
|
|
7
|
+
if (!nativeDecoder) return false
|
|
8
|
+
try {
|
|
9
|
+
const a = new TextDecoder('utf-16le').decode(Uint8Array.of(1, 2, 3, 0xd8))
|
|
10
|
+
const b = new TextDecoder('utf-16be').decode(Uint8Array.of(2, 1, 0xd8, 3))
|
|
11
|
+
return a === b && a === '\u0201\uFFFD'
|
|
12
|
+
} catch {}
|
|
13
|
+
|
|
14
|
+
return false
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const decode = checkDecoders() ? decodeApiDecoders : decodeApiJS
|
|
18
|
+
|
|
19
|
+
export const utf16fromString = (str, format = 'uint16') => encodeApi(str, false, format)
|
|
20
|
+
export const utf16fromStringLoose = (str, format = 'uint16') => encodeApi(str, true, format)
|
|
21
|
+
export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
|
|
22
|
+
export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
|
package/utf16.node.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
|
|
2
|
+
import { isDeno, isLE } from './fallback/platform.js'
|
|
3
|
+
import { E_STRICT, decodeApiDecoders } from './fallback/utf16.js'
|
|
3
4
|
|
|
4
5
|
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
5
6
|
|
|
@@ -48,7 +49,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
|
|
|
48
49
|
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
49
50
|
ble = swapped(input, !isLE)
|
|
50
51
|
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
|
51
|
-
|
|
52
|
+
assertU8(input)
|
|
52
53
|
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
53
54
|
ble = swapped(input, format === 'uint8-be')
|
|
54
55
|
} else {
|
|
@@ -61,23 +62,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
|
|
|
61
62
|
throw new TypeError(E_STRICT)
|
|
62
63
|
}
|
|
63
64
|
|
|
64
|
-
|
|
65
|
-
let encoding
|
|
66
|
-
if (format === 'uint16') {
|
|
67
|
-
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
68
|
-
encoding = isLE ? 'utf-16le' : 'utf-16be'
|
|
69
|
-
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
|
70
|
-
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
71
|
-
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
72
|
-
encoding = format === 'uint8-le' ? 'utf-16le' : 'utf-16be'
|
|
73
|
-
} else {
|
|
74
|
-
throw new TypeError('Unknown format')
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
return new TextDecoder(encoding, { ignoreBOM: true, fatal: !loose }).decode(input) // TODO: cache decoder?
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
const decode = isDeno ? decodeDecoder : decodeNode
|
|
65
|
+
const decode = isDeno ? decodeApiDecoders : decodeNode
|
|
81
66
|
|
|
82
67
|
export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
|
|
83
68
|
export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
|
package/utf8.js
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { typedView } from './array.js'
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import * as js from './fallback/utf8.js'
|
|
2
|
+
import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
|
|
3
|
+
import { nativeDecoder, nativeEncoder } from './fallback/platform.js'
|
|
4
|
+
import * as js from './fallback/utf8.auto.js'
|
|
6
5
|
|
|
7
|
-
const { TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
|
|
8
6
|
// ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
|
|
9
7
|
// We don't want to strip anything unexpectedly
|
|
10
8
|
const decoderLoose = nativeDecoder
|
|
@@ -13,10 +11,6 @@ const decoderFatal = nativeDecoder
|
|
|
13
11
|
: null
|
|
14
12
|
const { isWellFormed } = String.prototype
|
|
15
13
|
|
|
16
|
-
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
17
|
-
|
|
18
|
-
const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
|
|
19
|
-
|
|
20
14
|
function deLoose(str, loose, res) {
|
|
21
15
|
if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine
|
|
22
16
|
if (isWellFormed) {
|
|
@@ -35,7 +29,7 @@ function deLoose(str, loose, res) {
|
|
|
35
29
|
start = pos + 1
|
|
36
30
|
if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
|
|
37
31
|
// Found a replacement char in output, need to recheck if we encoded the input correctly
|
|
38
|
-
if (!nativeDecoder && str.length < 1e7) {
|
|
32
|
+
if (js.decodeFast && !nativeDecoder && str.length < 1e7) {
|
|
39
33
|
// This is ~2x faster than decode in Hermes
|
|
40
34
|
try {
|
|
41
35
|
if (encodeURI(str) !== null) return res // guard against optimizing out
|
|
@@ -51,32 +45,19 @@ function deLoose(str, loose, res) {
|
|
|
51
45
|
function encode(str, loose = false) {
|
|
52
46
|
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
53
47
|
if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
|
|
54
|
-
if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
|
|
48
|
+
if (nativeEncoder || !js.encode) return deLoose(str, loose, nativeEncoder.encode(str))
|
|
55
49
|
// No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
|
|
56
50
|
return js.encode(str, loose)
|
|
57
51
|
}
|
|
58
52
|
|
|
59
53
|
function decode(arr, loose = false) {
|
|
60
|
-
|
|
54
|
+
assertU8(arr)
|
|
61
55
|
if (arr.byteLength === 0) return ''
|
|
62
|
-
if (nativeDecoder
|
|
63
|
-
|
|
64
|
-
// Fast path for ASCII prefix, this is faster than all alternatives below
|
|
65
|
-
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
|
|
66
|
-
if (prefix.length === arr.length) return prefix
|
|
67
|
-
|
|
68
|
-
// This codepath gives a ~3x perf boost on Hermes
|
|
69
|
-
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
|
70
|
-
const o = escape(decodeLatin1(arr, prefix.length, arr.length))
|
|
71
|
-
try {
|
|
72
|
-
return prefix + decodeURIComponent(o) // Latin1 to utf8
|
|
73
|
-
} catch {
|
|
74
|
-
if (!loose) throw new TypeError(E_STRICT)
|
|
75
|
-
// Ok, we have to use manual implementation for loose decoder
|
|
76
|
-
}
|
|
56
|
+
if (nativeDecoder || !js.decodeFast) {
|
|
57
|
+
return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
|
|
77
58
|
}
|
|
78
59
|
|
|
79
|
-
return
|
|
60
|
+
return js.decodeFast(arr, loose)
|
|
80
61
|
}
|
|
81
62
|
|
|
82
63
|
export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
|
package/utf8.node.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { typedView } from './array.js'
|
|
3
|
-
import { E_STRING } from './fallback/_utils.js'
|
|
4
|
-
import { E_STRICT
|
|
2
|
+
import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
|
|
3
|
+
import { E_STRICT } from './fallback/utf8.js'
|
|
5
4
|
import { isAscii } from 'node:buffer'
|
|
6
5
|
|
|
7
6
|
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
@@ -40,7 +39,7 @@ function encode(str, loose = false) {
|
|
|
40
39
|
}
|
|
41
40
|
|
|
42
41
|
function decode(arr, loose = false) {
|
|
43
|
-
|
|
42
|
+
assertU8(arr)
|
|
44
43
|
const byteLength = arr.byteLength
|
|
45
44
|
if (byteLength === 0) return ''
|
|
46
45
|
if (byteLength > 0x6_00 && !(isDeno && loose) && isAscii(arr)) {
|