@exodus/bytes 1.12.0 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -29
- package/array.js +1 -1
- package/base32.js +1 -3
- package/base58.js +3 -3
- package/base58check.d.ts +2 -2
- package/base58check.js +6 -7
- package/base64.js +7 -6
- package/bech32.js +3 -3
- package/encoding-browser.browser.js +43 -17
- package/fallback/_utils.js +7 -122
- package/fallback/base32.js +3 -3
- package/fallback/base58check.js +3 -3
- package/fallback/base64.js +2 -3
- package/fallback/encoding.api.js +0 -43
- package/fallback/encoding.js +41 -2
- package/fallback/encoding.labels.js +20 -16
- package/fallback/hex.js +3 -4
- package/fallback/latin1.js +6 -25
- package/fallback/percent.js +1 -1
- package/fallback/platform.browser.js +31 -0
- package/fallback/platform.js +2 -0
- package/fallback/platform.native.js +122 -0
- package/fallback/single-byte.encodings.js +40 -49
- package/fallback/single-byte.js +4 -4
- package/fallback/utf16.js +70 -3
- package/fallback/utf8.auto.browser.js +2 -0
- package/fallback/utf8.auto.js +1 -0
- package/fallback/utf8.auto.native.js +1 -0
- package/fallback/utf8.js +25 -3
- package/hex.js +6 -8
- package/hex.node.js +2 -3
- package/multi-byte.js +2 -2
- package/multi-byte.node.js +3 -3
- package/package.json +28 -7
- package/single-byte.js +9 -9
- package/single-byte.node.js +8 -8
- package/utf16.browser.js +8 -0
- package/utf16.js +1 -90
- package/utf16.native.js +22 -0
- package/utf16.node.js +5 -20
- package/utf8.js +9 -28
- package/utf8.node.js +3 -4
- package/whatwg.js +6 -2
package/fallback/utf16.js
CHANGED
|
@@ -1,14 +1,81 @@
|
|
|
1
|
-
import { decodeUCS2
|
|
2
|
-
import {
|
|
1
|
+
import { decodeUCS2 } from './latin1.js'
|
|
2
|
+
import { assertU8, E_STRING, E_STRICT_UNICODE } from './_utils.js'
|
|
3
|
+
import { nativeDecoder, isLE, encodeCharcodes } from './platform.js'
|
|
3
4
|
|
|
4
5
|
export const E_STRICT = 'Input is not well-formed utf16'
|
|
5
|
-
|
|
6
|
+
const isWellFormedStr = /* @__PURE__ */ (() => String.prototype.isWellFormed)()
|
|
7
|
+
const toWellFormedStr = /* @__PURE__ */ (() => String.prototype.toWellFormed)()
|
|
6
8
|
|
|
7
9
|
const replacementCodepoint = 0xff_fd
|
|
8
10
|
const replacementCodepointSwapped = 0xfd_ff
|
|
9
11
|
|
|
10
12
|
const to16 = (a) => new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2) // Requires checked length and alignment!
|
|
11
13
|
|
|
14
|
+
export function encodeApi(str, loose, format) {
|
|
15
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
16
|
+
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
|
17
|
+
throw new TypeError('Unknown format')
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// On v8 and SpiderMonkey, check via isWellFormed is faster than js
|
|
21
|
+
// On JSC, check during loop is faster than isWellFormed
|
|
22
|
+
// If isWellFormed is available, we skip check during decoding and recheck after
|
|
23
|
+
// If isWellFormed is unavailable, we check in js during decoding
|
|
24
|
+
if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT_UNICODE)
|
|
25
|
+
const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
|
|
26
|
+
const u16 = encode(str, loose, !loose && isWellFormedStr, shouldSwap)
|
|
27
|
+
|
|
28
|
+
// Bytes are already swapped and format is already checked, we need to just cast the view
|
|
29
|
+
return format === 'uint16' ? u16 : new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const fatalLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true, fatal: true }) : null
|
|
33
|
+
const looseLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true }) : null
|
|
34
|
+
const fatalBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true, fatal: true }) : null
|
|
35
|
+
const looseBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true }) : null
|
|
36
|
+
|
|
37
|
+
export function decodeApiDecoders(input, loose, format) {
|
|
38
|
+
if (format === 'uint16') {
|
|
39
|
+
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
40
|
+
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
|
41
|
+
assertU8(input)
|
|
42
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
43
|
+
} else {
|
|
44
|
+
throw new TypeError('Unknown format')
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const le = format === 'uint8-le' || (format === 'uint16' && isLE)
|
|
48
|
+
return (le ? (loose ? looseLE : fatalLE) : loose ? looseBE : fatalBE).decode(input)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function decodeApiJS(input, loose, format) {
|
|
52
|
+
let u16
|
|
53
|
+
switch (format) {
|
|
54
|
+
case 'uint16':
|
|
55
|
+
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
56
|
+
u16 = input
|
|
57
|
+
break
|
|
58
|
+
case 'uint8-le':
|
|
59
|
+
assertU8(input)
|
|
60
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
61
|
+
u16 = to16input(input, true)
|
|
62
|
+
break
|
|
63
|
+
case 'uint8-be':
|
|
64
|
+
assertU8(input)
|
|
65
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
66
|
+
u16 = to16input(input, false)
|
|
67
|
+
break
|
|
68
|
+
default:
|
|
69
|
+
throw new TypeError('Unknown format')
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const str = decode(u16, loose, (!loose && isWellFormedStr) || (loose && toWellFormedStr))
|
|
73
|
+
if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT)
|
|
74
|
+
if (loose && toWellFormedStr) return toWellFormedStr.call(str)
|
|
75
|
+
|
|
76
|
+
return str
|
|
77
|
+
}
|
|
78
|
+
|
|
12
79
|
export function to16input(u8, le) {
|
|
13
80
|
// Assume even number of bytes
|
|
14
81
|
if (le === isLE) return to16(u8.byteOffset % 2 === 0 ? u8 : Uint8Array.from(u8))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { decodeFast, encode } from './utf8.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { decodeFast, encode } from './utf8.js'
|
package/fallback/utf8.js
CHANGED
|
@@ -1,9 +1,31 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { E_STRICT_UNICODE } from './_utils.js'
|
|
2
|
+
import { isHermes } from './platform.js'
|
|
3
|
+
import { asciiPrefix, decodeLatin1, encodeAsciiPrefix } from './latin1.js'
|
|
2
4
|
|
|
3
5
|
export const E_STRICT = 'Input is not well-formed utf8'
|
|
4
|
-
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
|
|
5
6
|
|
|
6
7
|
const replacementPoint = 0xff_fd
|
|
8
|
+
const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
|
|
9
|
+
const { decodeURIComponent, escape } = globalThis
|
|
10
|
+
|
|
11
|
+
export function decodeFast(arr, loose) {
|
|
12
|
+
// Fast path for ASCII prefix, this is faster than all alternatives below
|
|
13
|
+
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
|
|
14
|
+
if (prefix.length === arr.length) return prefix
|
|
15
|
+
|
|
16
|
+
// This codepath gives a ~3x perf boost on Hermes
|
|
17
|
+
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
|
18
|
+
const o = escape(decodeLatin1(arr, prefix.length, arr.length))
|
|
19
|
+
try {
|
|
20
|
+
return prefix + decodeURIComponent(o) // Latin1 to utf8
|
|
21
|
+
} catch {
|
|
22
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
23
|
+
// Ok, we have to use manual implementation for loose decoder
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return prefix + decode(arr, loose, prefix.length)
|
|
28
|
+
}
|
|
7
29
|
|
|
8
30
|
// https://encoding.spec.whatwg.org/#utf-8-decoder
|
|
9
31
|
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
|
|
@@ -27,7 +49,7 @@ export function decode(arr, loose, start = 0) {
|
|
|
27
49
|
const byte = arr[i]
|
|
28
50
|
if (byte < 0x80) {
|
|
29
51
|
tmp[ti++] = byte
|
|
30
|
-
// ascii fast path is in
|
|
52
|
+
// ascii fast path is in decodeFast(), this is called only on non-ascii input
|
|
31
53
|
// so we don't unroll this anymore
|
|
32
54
|
} else if (byte < 0xc2) {
|
|
33
55
|
if (!loose) throw new TypeError(E_STRICT)
|
package/hex.js
CHANGED
|
@@ -1,19 +1,17 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { typedView } from './array.js'
|
|
3
|
-
import {
|
|
2
|
+
import { assertU8 } from './fallback/_utils.js'
|
|
4
3
|
import * as js from './fallback/hex.js'
|
|
5
4
|
|
|
6
5
|
const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
|
|
7
6
|
|
|
8
7
|
export function toHex(arr) {
|
|
9
|
-
|
|
8
|
+
assertU8(arr)
|
|
10
9
|
if (arr.length === 0) return ''
|
|
11
|
-
if (
|
|
10
|
+
if (webHex && arr.toHex === webHex) return arr.toHex()
|
|
12
11
|
return js.toHex(arr)
|
|
13
12
|
}
|
|
14
13
|
|
|
15
14
|
// Unlike Buffer.from(), throws on invalid input
|
|
16
|
-
export const fromHex =
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
: (str, format = 'uint8') => typedView(js.fromHex(str), format)
|
|
15
|
+
export const fromHex = Uint8Array.fromHex
|
|
16
|
+
? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
|
|
17
|
+
: (str, format = 'uint8') => typedView(js.fromHex(str), format)
|
package/hex.node.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { typedView } from './array.js'
|
|
3
|
-
import { E_STRING } from './fallback/_utils.js'
|
|
2
|
+
import { assertU8, E_STRING } from './fallback/_utils.js'
|
|
4
3
|
import { E_HEX } from './fallback/hex.js'
|
|
5
4
|
|
|
6
5
|
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
@@ -9,7 +8,7 @@ const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
|
|
|
9
8
|
const denoBug = Buffer.from('ag', 'hex').length > 0
|
|
10
9
|
|
|
11
10
|
export function toHex(arr) {
|
|
12
|
-
|
|
11
|
+
assertU8(arr)
|
|
13
12
|
if (arr.length === 0) return ''
|
|
14
13
|
if (webHex && arr.toHex === webHex) return arr.toHex()
|
|
15
14
|
if (arr.constructor === Buffer && Buffer.isBuffer(arr)) return arr.hexSlice(0, arr.byteLength)
|
package/multi-byte.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { assertU8 } from './fallback/_utils.js'
|
|
2
2
|
import { multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
|
|
3
3
|
|
|
4
4
|
export function createMultibyteDecoder(encoding, loose = false) {
|
|
5
5
|
const jsDecoder = multibyteDecoder(encoding, loose) // asserts
|
|
6
6
|
let streaming = false
|
|
7
7
|
return (arr, stream = false) => {
|
|
8
|
-
|
|
8
|
+
assertU8(arr)
|
|
9
9
|
if (!streaming && arr.byteLength === 0) return ''
|
|
10
10
|
streaming = stream
|
|
11
11
|
return jsDecoder(arr, stream)
|
package/multi-byte.node.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { isDeno
|
|
1
|
+
import { assertU8, toBuf } from './fallback/_utils.js'
|
|
2
|
+
import { isDeno } from './fallback/platform.js'
|
|
3
3
|
import { isAsciiSuperset, multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
|
|
4
4
|
import { isAscii } from 'node:buffer'
|
|
5
5
|
|
|
@@ -8,7 +8,7 @@ export function createMultibyteDecoder(encoding, loose = false) {
|
|
|
8
8
|
let streaming = false
|
|
9
9
|
const asciiSuperset = isAsciiSuperset(encoding)
|
|
10
10
|
return (arr, stream = false) => {
|
|
11
|
-
|
|
11
|
+
assertU8(arr)
|
|
12
12
|
if (!streaming) {
|
|
13
13
|
if (arr.byteLength === 0) return ''
|
|
14
14
|
if (asciiSuperset && isAscii(arr)) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/bytes",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.14.0",
|
|
4
4
|
"description": "Various operations on Uint8Array data",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"encoding",
|
|
@@ -40,8 +40,8 @@
|
|
|
40
40
|
"test:chrome:puppeteer": "exodus-test --engine=chrome:puppeteer",
|
|
41
41
|
"test:chromium:playwright": "exodus-test --engine=chromium:playwright",
|
|
42
42
|
"test:webkit:playwright": "exodus-test --engine=webkit:playwright",
|
|
43
|
-
"test:firefox:puppeteer": "exodus-test --engine=firefox:puppeteer",
|
|
44
|
-
"test:firefox:playwright": "exodus-test --engine=firefox:playwright",
|
|
43
|
+
"test:firefox:puppeteer": "exodus-test --engine=firefox:puppeteer --testTimeout=60000",
|
|
44
|
+
"test:firefox:playwright": "exodus-test --engine=firefox:playwright --testTimeout=60000",
|
|
45
45
|
"test:servo:bundle": "exodus-test --engine=servo:bundle",
|
|
46
46
|
"test": "exodus-test",
|
|
47
47
|
"size": "esbuild --minify --bundle",
|
|
@@ -76,6 +76,9 @@
|
|
|
76
76
|
"/fallback/hex.js",
|
|
77
77
|
"/fallback/latin1.js",
|
|
78
78
|
"/fallback/percent.js",
|
|
79
|
+
"/fallback/platform.js",
|
|
80
|
+
"/fallback/platform.browser.js",
|
|
81
|
+
"/fallback/platform.native.js",
|
|
79
82
|
"/fallback/multi-byte.encodings.cjs",
|
|
80
83
|
"/fallback/multi-byte.encodings.json",
|
|
81
84
|
"/fallback/multi-byte.js",
|
|
@@ -84,6 +87,9 @@
|
|
|
84
87
|
"/fallback/single-byte.js",
|
|
85
88
|
"/fallback/utf16.js",
|
|
86
89
|
"/fallback/utf8.js",
|
|
90
|
+
"/fallback/utf8.auto.js",
|
|
91
|
+
"/fallback/utf8.auto.browser.js",
|
|
92
|
+
"/fallback/utf8.auto.native.js",
|
|
87
93
|
"/array.js",
|
|
88
94
|
"/array.d.ts",
|
|
89
95
|
"/assert.js",
|
|
@@ -121,6 +127,8 @@
|
|
|
121
127
|
"/single-byte.node.js",
|
|
122
128
|
"/utf16.js",
|
|
123
129
|
"/utf16.d.ts",
|
|
130
|
+
"/utf16.browser.js",
|
|
131
|
+
"/utf16.native.js",
|
|
124
132
|
"/utf16.node.js",
|
|
125
133
|
"/utf8.js",
|
|
126
134
|
"/utf8.d.ts",
|
|
@@ -200,6 +208,8 @@
|
|
|
200
208
|
"./utf16.js": {
|
|
201
209
|
"types": "./utf16.d.ts",
|
|
202
210
|
"node": "./utf16.node.js",
|
|
211
|
+
"react-native": "./utf16.native.js",
|
|
212
|
+
"browser": "./utf16.browser.js",
|
|
203
213
|
"default": "./utf16.js"
|
|
204
214
|
},
|
|
205
215
|
"./utf8.js": {
|
|
@@ -216,9 +226,20 @@
|
|
|
216
226
|
"default": "./wif.js"
|
|
217
227
|
}
|
|
218
228
|
},
|
|
229
|
+
"browser": {
|
|
230
|
+
"./utf16.js": "./utf16.browser.js",
|
|
231
|
+
"./fallback/platform.js": "./fallback/platform.browser.js",
|
|
232
|
+
"./fallback/utf8.auto.js": "./fallback/utf8.auto.browser.js"
|
|
233
|
+
},
|
|
219
234
|
"react-native": {
|
|
220
|
-
"./encoding-browser.js": "./encoding-browser.native.js"
|
|
235
|
+
"./encoding-browser.js": "./encoding-browser.native.js",
|
|
236
|
+
"./utf16.js": "./utf16.native.js",
|
|
237
|
+
"./fallback/platform.js": "./fallback/platform.native.js",
|
|
238
|
+
"./fallback/utf8.auto.js": "./fallback/utf8.auto.native.js"
|
|
221
239
|
},
|
|
240
|
+
"sideEffects": [
|
|
241
|
+
"./encoding.js"
|
|
242
|
+
],
|
|
222
243
|
"peerDependencies": {
|
|
223
244
|
"@noble/hashes": "^1.8.0 || ^2.0.0"
|
|
224
245
|
},
|
|
@@ -232,7 +253,7 @@
|
|
|
232
253
|
"@exodus/crypto": "^1.0.0-rc.30",
|
|
233
254
|
"@exodus/eslint-config": "^5.24.0",
|
|
234
255
|
"@exodus/prettier": "^1.0.0",
|
|
235
|
-
"@exodus/test": "1.0.0-rc.
|
|
256
|
+
"@exodus/test": "1.0.0-rc.115",
|
|
236
257
|
"@hexagon/base64": "^2.0.4",
|
|
237
258
|
"@noble/hashes": "^2.0.1",
|
|
238
259
|
"@oslojs/encoding": "^1.1.0",
|
|
@@ -254,7 +275,7 @@
|
|
|
254
275
|
"decode-utf8": "^1.0.1",
|
|
255
276
|
"electron": "36.5.0",
|
|
256
277
|
"encode-utf8": "^2.0.0",
|
|
257
|
-
"esbuild": "^0.27.
|
|
278
|
+
"esbuild": "^0.27.3",
|
|
258
279
|
"eslint": "^8.44.0",
|
|
259
280
|
"fast-base64-decode": "^2.0.0",
|
|
260
281
|
"fast-base64-encode": "^1.0.0",
|
|
@@ -270,7 +291,7 @@
|
|
|
270
291
|
"utf8": "^3.0.0",
|
|
271
292
|
"web-streams-polyfill": "^4.2.0",
|
|
272
293
|
"wif": "^5.0.0",
|
|
273
|
-
"workerd": "^1.
|
|
294
|
+
"workerd": "^1.20260210.0"
|
|
274
295
|
},
|
|
275
296
|
"prettier": "@exodus/prettier",
|
|
276
297
|
"packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
|
package/single-byte.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { nativeDecoderLatin1, nativeEncoder
|
|
1
|
+
import { assertU8, E_STRING } from './fallback/_utils.js'
|
|
2
|
+
import { nativeDecoderLatin1, nativeEncoder } from './fallback/platform.js'
|
|
3
3
|
import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
|
|
4
4
|
import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
5
5
|
|
|
@@ -44,7 +44,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
44
44
|
try {
|
|
45
45
|
const decoder = new TextDecoder(encoding, { fatal: !loose })
|
|
46
46
|
return (arr) => {
|
|
47
|
-
|
|
47
|
+
assertU8(arr)
|
|
48
48
|
if (arr.byteLength === 0) return ''
|
|
49
49
|
return decoder.decode(arr)
|
|
50
50
|
}
|
|
@@ -53,7 +53,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
53
53
|
|
|
54
54
|
const jsDecoder = encodingDecoder(encoding)
|
|
55
55
|
return (arr) => {
|
|
56
|
-
|
|
56
|
+
assertU8(arr)
|
|
57
57
|
if (arr.byteLength === 0) return ''
|
|
58
58
|
return jsDecoder(arr, loose)
|
|
59
59
|
}
|
|
@@ -88,7 +88,7 @@ function encode(s, m) {
|
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
// fromBase64+btoa path is faster on everything where fromBase64 is fast
|
|
91
|
-
const useLatin1btoa = Uint8Array.fromBase64 && btoa
|
|
91
|
+
const useLatin1btoa = Uint8Array.fromBase64 && btoa
|
|
92
92
|
|
|
93
93
|
export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
94
94
|
// TODO: replacement, truncate (replacement will need varying length)
|
|
@@ -129,7 +129,7 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
|
129
129
|
}
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
-
export const latin1toString = createSinglebyteDecoder('iso-8859-1')
|
|
133
|
-
export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
|
|
134
|
-
export const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
135
|
-
export const windows1252fromString = createSinglebyteEncoder('windows-1252')
|
|
132
|
+
export const latin1toString = /* @__PURE__ */ createSinglebyteDecoder('iso-8859-1')
|
|
133
|
+
export const latin1fromString = /* @__PURE__ */ createSinglebyteEncoder('iso-8859-1')
|
|
134
|
+
export const windows1252toString = /* @__PURE__ */ createSinglebyteDecoder('windows-1252')
|
|
135
|
+
export const windows1252fromString = /* @__PURE__ */ createSinglebyteEncoder('windows-1252')
|
package/single-byte.node.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { isAscii } from 'node:buffer'
|
|
3
|
-
import {
|
|
2
|
+
import { assertU8, toBuf, E_STRING } from './fallback/_utils.js'
|
|
3
|
+
import { isDeno, isLE } from './fallback/platform.js'
|
|
4
4
|
import { asciiPrefix } from './fallback/latin1.js'
|
|
5
5
|
import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
6
6
|
|
|
@@ -26,7 +26,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
26
26
|
if (isDeno) {
|
|
27
27
|
const jsDecoder = encodingDecoder(encoding) // asserts
|
|
28
28
|
return (arr) => {
|
|
29
|
-
|
|
29
|
+
assertU8(arr)
|
|
30
30
|
if (arr.byteLength === 0) return ''
|
|
31
31
|
if (isAscii(arr)) return toBuf(arr).toString()
|
|
32
32
|
return jsDecoder(arr, loose) // somewhy faster on Deno anyway, TODO: optimize?
|
|
@@ -37,7 +37,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
37
37
|
const latin1path = encoding === 'windows-1252'
|
|
38
38
|
const { incomplete, mapper } = encodingMapper(encoding) // asserts
|
|
39
39
|
return (arr) => {
|
|
40
|
-
|
|
40
|
+
assertU8(arr)
|
|
41
41
|
if (arr.byteLength === 0) return ''
|
|
42
42
|
if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
|
|
43
43
|
|
|
@@ -114,7 +114,7 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
-
export const latin1toString = createSinglebyteDecoder('iso-8859-1')
|
|
118
|
-
export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
|
|
119
|
-
export const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
120
|
-
export const windows1252fromString = createSinglebyteEncoder('windows-1252')
|
|
117
|
+
export const latin1toString = /* @__PURE__ */ createSinglebyteDecoder('iso-8859-1')
|
|
118
|
+
export const latin1fromString = /* @__PURE__ */ createSinglebyteEncoder('iso-8859-1')
|
|
119
|
+
export const windows1252toString = /* @__PURE__ */ createSinglebyteDecoder('windows-1252')
|
|
120
|
+
export const windows1252fromString = /* @__PURE__ */ createSinglebyteEncoder('windows-1252')
|
package/utf16.browser.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
// We trust browsers to always have correct TextDecoder for utf-16le/utf-16be with ignoreBOM without streaming
|
|
2
|
+
|
|
3
|
+
import { encodeApi, decodeApiDecoders } from './fallback/utf16.js'
|
|
4
|
+
|
|
5
|
+
export const utf16fromString = (str, format = 'uint16') => encodeApi(str, false, format)
|
|
6
|
+
export const utf16fromStringLoose = (str, format = 'uint16') => encodeApi(str, true, format)
|
|
7
|
+
export const utf16toString = (arr, format = 'uint16') => decodeApiDecoders(arr, false, format)
|
|
8
|
+
export const utf16toStringLoose = (arr, format = 'uint16') => decodeApiDecoders(arr, true, format)
|
package/utf16.js
CHANGED
|
@@ -1,90 +1 @@
|
|
|
1
|
-
|
|
2
|
-
import { nativeDecoder, isLE, E_STRING } from './fallback/_utils.js'
|
|
3
|
-
|
|
4
|
-
const { TextDecoder } = globalThis
|
|
5
|
-
|
|
6
|
-
function checkDecoders() {
|
|
7
|
-
// Not all barebone engines with TextDecoder support something except utf-8
|
|
8
|
-
// Also workerd specifically has a broken utf-16le implementation
|
|
9
|
-
if (!nativeDecoder) return false
|
|
10
|
-
try {
|
|
11
|
-
const a = new TextDecoder('utf-16le').decode(Uint8Array.of(1, 2, 3, 0xd8))
|
|
12
|
-
const b = new TextDecoder('utf-16be').decode(Uint8Array.of(2, 1, 0xd8, 3))
|
|
13
|
-
return a === b && a === '\u0201\uFFFD'
|
|
14
|
-
} catch {}
|
|
15
|
-
|
|
16
|
-
return false
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
const canDecoders = checkDecoders()
|
|
20
|
-
const ignoreBOM = true
|
|
21
|
-
const decoderFatalLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM, fatal: true }) : null
|
|
22
|
-
const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM }) : null
|
|
23
|
-
const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
|
|
24
|
-
const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
|
|
25
|
-
const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
|
|
26
|
-
const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE
|
|
27
|
-
const { isWellFormed, toWellFormed } = String.prototype
|
|
28
|
-
|
|
29
|
-
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
30
|
-
|
|
31
|
-
// Unlike utf8, operates on Uint16Arrays by default
|
|
32
|
-
|
|
33
|
-
const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
|
|
34
|
-
|
|
35
|
-
function encode(str, loose = false, format = 'uint16') {
|
|
36
|
-
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
37
|
-
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
|
38
|
-
throw new TypeError('Unknown format')
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
|
|
42
|
-
|
|
43
|
-
// On v8 and SpiderMonkey, check via isWellFormed is faster than js
|
|
44
|
-
// On JSC, check during loop is faster than isWellFormed
|
|
45
|
-
// If isWellFormed is available, we skip check during decoding and recheck after
|
|
46
|
-
// If isWellFormed is unavailable, we check in js during decoding
|
|
47
|
-
if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT_UNICODE)
|
|
48
|
-
const u16 = js.encode(str, loose, !loose && isWellFormed, shouldSwap)
|
|
49
|
-
|
|
50
|
-
if (format === 'uint8-le' || format === 'uint8-be') return to8(u16) // Already swapped
|
|
51
|
-
if (format === 'uint16') return u16
|
|
52
|
-
/* c8 ignore next */
|
|
53
|
-
throw new Error('Unreachable')
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
function decode(input, loose = false, format = 'uint16') {
|
|
57
|
-
let u16
|
|
58
|
-
switch (format) {
|
|
59
|
-
case 'uint16':
|
|
60
|
-
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
61
|
-
if (canDecoders) return loose ? decoderLoose16.decode(input) : decoderFatal16.decode(input)
|
|
62
|
-
u16 = input
|
|
63
|
-
break
|
|
64
|
-
case 'uint8-le':
|
|
65
|
-
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
66
|
-
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
67
|
-
if (canDecoders) return loose ? decoderLooseLE.decode(input) : decoderFatalLE.decode(input)
|
|
68
|
-
u16 = js.to16input(input, true)
|
|
69
|
-
break
|
|
70
|
-
case 'uint8-be':
|
|
71
|
-
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
72
|
-
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
73
|
-
if (canDecoders) return loose ? decoderLooseBE.decode(input) : decoderFatalBE.decode(input)
|
|
74
|
-
u16 = js.to16input(input, false)
|
|
75
|
-
break
|
|
76
|
-
default:
|
|
77
|
-
throw new TypeError('Unknown format')
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
const str = js.decode(u16, loose, (!loose && isWellFormed) || (loose && toWellFormed))
|
|
81
|
-
if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT)
|
|
82
|
-
if (loose && toWellFormed) return toWellFormed.call(str)
|
|
83
|
-
|
|
84
|
-
return str
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
|
|
88
|
-
export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
|
|
89
|
-
export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
|
|
90
|
-
export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
|
|
1
|
+
export * from './utf16.native.js'
|
package/utf16.native.js
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { encodeApi, decodeApiDecoders, decodeApiJS } from './fallback/utf16.js'
|
|
2
|
+
import { nativeDecoder } from './fallback/platform.native.js'
|
|
3
|
+
|
|
4
|
+
function checkDecoders() {
|
|
5
|
+
// Not all barebone engines with TextDecoder support something except utf-8
|
|
6
|
+
// Also workerd specifically has a broken utf-16le implementation
|
|
7
|
+
if (!nativeDecoder) return false
|
|
8
|
+
try {
|
|
9
|
+
const a = new TextDecoder('utf-16le').decode(Uint8Array.of(1, 2, 3, 0xd8))
|
|
10
|
+
const b = new TextDecoder('utf-16be').decode(Uint8Array.of(2, 1, 0xd8, 3))
|
|
11
|
+
return a === b && a === '\u0201\uFFFD'
|
|
12
|
+
} catch {}
|
|
13
|
+
|
|
14
|
+
return false
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const decode = checkDecoders() ? decodeApiDecoders : decodeApiJS
|
|
18
|
+
|
|
19
|
+
export const utf16fromString = (str, format = 'uint16') => encodeApi(str, false, format)
|
|
20
|
+
export const utf16fromStringLoose = (str, format = 'uint16') => encodeApi(str, true, format)
|
|
21
|
+
export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
|
|
22
|
+
export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
|
package/utf16.node.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
|
|
2
|
+
import { isDeno, isLE } from './fallback/platform.js'
|
|
3
|
+
import { E_STRICT, decodeApiDecoders } from './fallback/utf16.js'
|
|
3
4
|
|
|
4
5
|
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
5
6
|
|
|
@@ -48,7 +49,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
|
|
|
48
49
|
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
49
50
|
ble = swapped(input, !isLE)
|
|
50
51
|
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
|
51
|
-
|
|
52
|
+
assertU8(input)
|
|
52
53
|
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
53
54
|
ble = swapped(input, format === 'uint8-be')
|
|
54
55
|
} else {
|
|
@@ -61,23 +62,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
|
|
|
61
62
|
throw new TypeError(E_STRICT)
|
|
62
63
|
}
|
|
63
64
|
|
|
64
|
-
|
|
65
|
-
let encoding
|
|
66
|
-
if (format === 'uint16') {
|
|
67
|
-
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
68
|
-
encoding = isLE ? 'utf-16le' : 'utf-16be'
|
|
69
|
-
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
|
70
|
-
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
71
|
-
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
72
|
-
encoding = format === 'uint8-le' ? 'utf-16le' : 'utf-16be'
|
|
73
|
-
} else {
|
|
74
|
-
throw new TypeError('Unknown format')
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
return new TextDecoder(encoding, { ignoreBOM: true, fatal: !loose }).decode(input) // TODO: cache decoder?
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
const decode = isDeno ? decodeDecoder : decodeNode
|
|
65
|
+
const decode = isDeno ? decodeApiDecoders : decodeNode
|
|
81
66
|
|
|
82
67
|
export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
|
|
83
68
|
export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
|
package/utf8.js
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import { assertUint8 } from './assert.js'
|
|
2
1
|
import { typedView } from './array.js'
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import * as js from './fallback/utf8.js'
|
|
2
|
+
import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
|
|
3
|
+
import { nativeDecoder, nativeEncoder } from './fallback/platform.js'
|
|
4
|
+
import * as js from './fallback/utf8.auto.js'
|
|
6
5
|
|
|
7
|
-
const { TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
|
|
8
6
|
// ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
|
|
9
7
|
// We don't want to strip anything unexpectedly
|
|
10
8
|
const decoderLoose = nativeDecoder
|
|
@@ -13,10 +11,6 @@ const decoderFatal = nativeDecoder
|
|
|
13
11
|
: null
|
|
14
12
|
const { isWellFormed } = String.prototype
|
|
15
13
|
|
|
16
|
-
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
17
|
-
|
|
18
|
-
const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
|
|
19
|
-
|
|
20
14
|
function deLoose(str, loose, res) {
|
|
21
15
|
if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine
|
|
22
16
|
if (isWellFormed) {
|
|
@@ -35,7 +29,7 @@ function deLoose(str, loose, res) {
|
|
|
35
29
|
start = pos + 1
|
|
36
30
|
if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
|
|
37
31
|
// Found a replacement char in output, need to recheck if we encoded the input correctly
|
|
38
|
-
if (!nativeDecoder && str.length < 1e7) {
|
|
32
|
+
if (js.decodeFast && !nativeDecoder && str.length < 1e7) {
|
|
39
33
|
// This is ~2x faster than decode in Hermes
|
|
40
34
|
try {
|
|
41
35
|
if (encodeURI(str) !== null) return res // guard against optimizing out
|
|
@@ -51,32 +45,19 @@ function deLoose(str, loose, res) {
|
|
|
51
45
|
function encode(str, loose = false) {
|
|
52
46
|
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
53
47
|
if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
|
|
54
|
-
if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
|
|
48
|
+
if (nativeEncoder || !js.encode) return deLoose(str, loose, nativeEncoder.encode(str))
|
|
55
49
|
// No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
|
|
56
50
|
return js.encode(str, loose)
|
|
57
51
|
}
|
|
58
52
|
|
|
59
53
|
function decode(arr, loose = false) {
|
|
60
|
-
|
|
54
|
+
assertU8(arr)
|
|
61
55
|
if (arr.byteLength === 0) return ''
|
|
62
|
-
if (nativeDecoder
|
|
63
|
-
|
|
64
|
-
// Fast path for ASCII prefix, this is faster than all alternatives below
|
|
65
|
-
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
|
|
66
|
-
if (prefix.length === arr.length) return prefix
|
|
67
|
-
|
|
68
|
-
// This codepath gives a ~3x perf boost on Hermes
|
|
69
|
-
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
|
70
|
-
const o = escape(decodeLatin1(arr, prefix.length, arr.length))
|
|
71
|
-
try {
|
|
72
|
-
return prefix + decodeURIComponent(o) // Latin1 to utf8
|
|
73
|
-
} catch {
|
|
74
|
-
if (!loose) throw new TypeError(E_STRICT)
|
|
75
|
-
// Ok, we have to use manual implementation for loose decoder
|
|
76
|
-
}
|
|
56
|
+
if (nativeDecoder || !js.decodeFast) {
|
|
57
|
+
return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
|
|
77
58
|
}
|
|
78
59
|
|
|
79
|
-
return
|
|
60
|
+
return js.decodeFast(arr, loose)
|
|
80
61
|
}
|
|
81
62
|
|
|
82
63
|
export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
|