@exodus/bytes 1.8.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +370 -90
- package/array.d.ts +41 -4
- package/base32.d.ts +83 -0
- package/base58.d.ts +62 -0
- package/base58.js +1 -1
- package/base58check.d.ts +131 -0
- package/base58check.js +3 -3
- package/base64.d.ts +40 -20
- package/bech32.d.ts +76 -0
- package/bigint.d.ts +48 -0
- package/encoding-browser.browser.js +29 -0
- package/encoding-browser.d.ts +24 -0
- package/encoding-browser.js +1 -0
- package/encoding-browser.native.js +1 -0
- package/encoding-lite.d.ts +61 -0
- package/encoding.d.ts +93 -11
- package/encoding.js +4 -3
- package/fallback/_utils.js +15 -11
- package/fallback/encoding.api.js +81 -0
- package/fallback/encoding.js +37 -121
- package/fallback/encoding.util.js +34 -0
- package/fallback/latin1.js +1 -0
- package/fallback/multi-byte.encodings.json +1 -0
- package/fallback/multi-byte.js +527 -71
- package/fallback/multi-byte.table.js +23 -15
- package/fallback/single-byte.js +1 -1
- package/fallback/utf16.js +45 -26
- package/fallback/utf8.js +1 -1
- package/hex.d.ts +22 -9
- package/index.d.ts +43 -0
- package/index.js +5 -0
- package/multi-byte.d.ts +57 -0
- package/multi-byte.js +7 -1
- package/multi-byte.node.js +7 -1
- package/package.json +83 -10
- package/single-byte.d.ts +149 -0
- package/single-byte.js +9 -11
- package/single-byte.node.js +29 -26
- package/utf16.d.ts +92 -0
- package/utf16.js +1 -0
- package/utf16.node.js +6 -2
- package/utf8.d.ts +52 -18
- package/utf8.js +7 -2
- package/utf8.node.js +1 -1
- package/wif.d.ts +76 -0
package/encoding-lite.d.ts
CHANGED
|
@@ -1 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The exact same exports as `@exodus/bytes/encoding.js` are also exported as
|
|
3
|
+
* `@exodus/bytes/encoding-lite.js`, with the difference that the lite version does not load
|
|
4
|
+
* multi-byte `TextDecoder` encodings by default to reduce bundle size 10x.
|
|
5
|
+
*
|
|
6
|
+
* ```js
|
|
7
|
+
* import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
|
|
8
|
+
* import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
|
|
9
|
+
*
|
|
10
|
+
* // Hooks for standards
|
|
11
|
+
* import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-lite.js'
|
|
12
|
+
* ```
|
|
13
|
+
*
|
|
14
|
+
* The only affected encodings are: `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`
|
|
15
|
+
* and their [labels](https://encoding.spec.whatwg.org/#names-and-labels) when used with `TextDecoder`.
|
|
16
|
+
*
|
|
17
|
+
* Legacy single-byte encodingds are loaded by default in both cases.
|
|
18
|
+
*
|
|
19
|
+
* `TextEncoder` and hooks for standards (including `labelToName` / `normalizeEncoding`) do not have any behavior
|
|
20
|
+
* differences in the lite version and support full range if inputs.
|
|
21
|
+
*
|
|
22
|
+
* To avoid inconsistencies, the exported classes and methods are exactly the same objects.
|
|
23
|
+
*
|
|
24
|
+
* ```console
|
|
25
|
+
* > lite = require('@exodus/bytes/encoding-lite.js')
|
|
26
|
+
* [Module: null prototype] {
|
|
27
|
+
* TextDecoder: [class TextDecoder],
|
|
28
|
+
* TextDecoderStream: [class TextDecoderStream],
|
|
29
|
+
* TextEncoder: [class TextEncoder],
|
|
30
|
+
* TextEncoderStream: [class TextEncoderStream],
|
|
31
|
+
* getBOMEncoding: [Function: getBOMEncoding],
|
|
32
|
+
* labelToName: [Function: labelToName],
|
|
33
|
+
* legacyHookDecode: [Function: legacyHookDecode],
|
|
34
|
+
* normalizeEncoding: [Function: normalizeEncoding]
|
|
35
|
+
* }
|
|
36
|
+
* > new lite.TextDecoder('big5').decode(Uint8Array.of(0x25))
|
|
37
|
+
* Uncaught:
|
|
38
|
+
* Error: Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support
|
|
39
|
+
*
|
|
40
|
+
* > full = require('@exodus/bytes/encoding.js')
|
|
41
|
+
* [Module: null prototype] {
|
|
42
|
+
* TextDecoder: [class TextDecoder],
|
|
43
|
+
* TextDecoderStream: [class TextDecoderStream],
|
|
44
|
+
* TextEncoder: [class TextEncoder],
|
|
45
|
+
* TextEncoderStream: [class TextEncoderStream],
|
|
46
|
+
* getBOMEncoding: [Function: getBOMEncoding],
|
|
47
|
+
* labelToName: [Function: labelToName],
|
|
48
|
+
* legacyHookDecode: [Function: legacyHookDecode],
|
|
49
|
+
* normalizeEncoding: [Function: normalizeEncoding]
|
|
50
|
+
* }
|
|
51
|
+
* > full.TextDecoder === lite.TextDecoder
|
|
52
|
+
* true
|
|
53
|
+
* > new full.TextDecoder('big5').decode(Uint8Array.of(0x25))
|
|
54
|
+
* '%'
|
|
55
|
+
* > new lite.TextDecoder('big5').decode(Uint8Array.of(0x25))
|
|
56
|
+
* '%'
|
|
57
|
+
* ```
|
|
58
|
+
*
|
|
59
|
+
* @module @exodus/bytes/encoding-lite.js
|
|
60
|
+
*/
|
|
61
|
+
|
|
1
62
|
export * from './encoding.js'
|
package/encoding.d.ts
CHANGED
|
@@ -1,14 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
|
|
3
|
+
* [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
|
|
4
|
+
* [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder),
|
|
5
|
+
* [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream),
|
|
6
|
+
* [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream),
|
|
7
|
+
* some [hooks](https://encoding.spec.whatwg.org/#specification-hooks).
|
|
8
|
+
*
|
|
9
|
+
* ```js
|
|
10
|
+
* import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
|
|
11
|
+
* import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
|
|
12
|
+
*
|
|
13
|
+
* // Hooks for standards
|
|
14
|
+
* import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* @module @exodus/bytes/encoding.js
|
|
18
|
+
*/
|
|
19
|
+
|
|
1
20
|
/// <reference types="node" />
|
|
2
21
|
|
|
3
22
|
/**
|
|
4
|
-
*
|
|
23
|
+
* Convert an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
|
|
24
|
+
* as an ASCII-lowercased string.
|
|
25
|
+
*
|
|
26
|
+
* If an encoding with that label does not exist, returns `null`.
|
|
27
|
+
*
|
|
28
|
+
* This is the same as [`decoder.encoding` getter](https://encoding.spec.whatwg.org/#dom-textdecoder-encoding),
|
|
29
|
+
* except that it:
|
|
30
|
+
* 1. Supports [`replacement` encoding](https://encoding.spec.whatwg.org/#replacement) and its
|
|
31
|
+
* [labels](https://encoding.spec.whatwg.org/#ref-for-replacement%E2%91%A1)
|
|
32
|
+
* 2. Does not throw for invalid labels and instead returns `null`
|
|
33
|
+
*
|
|
34
|
+
* It is identical to:
|
|
35
|
+
* ```js
|
|
36
|
+
* labelToName(label)?.toLowerCase() ?? null
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* All encoding names are also valid labels for corresponding encodings.
|
|
40
|
+
*
|
|
5
41
|
* @param label - The encoding label to normalize
|
|
6
42
|
* @returns The normalized encoding name, or null if invalid
|
|
7
43
|
*/
|
|
8
44
|
export function normalizeEncoding(label: string): string | null;
|
|
9
45
|
|
|
10
46
|
/**
|
|
11
|
-
* Implements BOM sniff
|
|
47
|
+
* Implements [BOM sniff](https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
|
|
48
|
+
*
|
|
49
|
+
* Given a `TypedArray` or an `ArrayBuffer` instance `input`, returns either of:
|
|
50
|
+
* - `'utf-8'`, if `input` starts with UTF-8 byte order mark.
|
|
51
|
+
* - `'utf-16le'`, if `input` starts with UTF-16LE byte order mark.
|
|
52
|
+
* - `'utf-16be'`, if `input` starts with UTF-16BE byte order mark.
|
|
53
|
+
* - `null` otherwise.
|
|
54
|
+
*
|
|
12
55
|
* @param input - The bytes to check for BOM
|
|
13
56
|
* @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
|
|
14
57
|
*/
|
|
@@ -17,7 +60,27 @@ export function getBOMEncoding(
|
|
|
17
60
|
): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
|
|
18
61
|
|
|
19
62
|
/**
|
|
20
|
-
* Implements decode
|
|
63
|
+
* Implements [decode](https://encoding.spec.whatwg.org/#decode) legacy hook.
|
|
64
|
+
*
|
|
65
|
+
* Given a `TypedArray` or an `ArrayBuffer` instance `input` and an optional `fallbackEncoding`
|
|
66
|
+
* encoding [label](https://encoding.spec.whatwg.org/#names-and-labels),
|
|
67
|
+
* sniffs encoding from BOM with `fallbackEncoding` fallback and then
|
|
68
|
+
* decodes the `input` using that encoding, skipping BOM if it was present.
|
|
69
|
+
*
|
|
70
|
+
* Notes:
|
|
71
|
+
*
|
|
72
|
+
* - BOM-sniffed encoding takes precedence over `fallbackEncoding` option per spec.
|
|
73
|
+
* Use with care.
|
|
74
|
+
* - Always operates in non-fatal [mode](https://encoding.spec.whatwg.org/#textdecoder-error-mode),
|
|
75
|
+
* aka replacement. It can convert different byte sequences to equal strings.
|
|
76
|
+
*
|
|
77
|
+
* This method is similar to the following code, except that it doesn't support encoding labels and
|
|
78
|
+
* only expects lowercased encoding name:
|
|
79
|
+
*
|
|
80
|
+
* ```js
|
|
81
|
+
* new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
|
|
82
|
+
* ```
|
|
83
|
+
*
|
|
21
84
|
* @param input - The bytes to decode
|
|
22
85
|
* @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
|
|
23
86
|
* @returns The decoded string
|
|
@@ -28,31 +91,50 @@ export function legacyHookDecode(
|
|
|
28
91
|
): string;
|
|
29
92
|
|
|
30
93
|
/**
|
|
31
|
-
*
|
|
94
|
+
* Implements [get an encoding from a string `label`](https://encoding.spec.whatwg.org/#concept-encoding-get).
|
|
95
|
+
*
|
|
96
|
+
* Convert an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
|
|
97
|
+
* as a case-sensitive string.
|
|
98
|
+
*
|
|
99
|
+
* If an encoding with that label does not exist, returns `null`.
|
|
100
|
+
*
|
|
101
|
+
* All encoding names are also valid labels for corresponding encodings.
|
|
102
|
+
*
|
|
32
103
|
* @param label - The encoding label
|
|
33
104
|
* @returns The proper case encoding name, or null if invalid
|
|
34
105
|
*/
|
|
35
106
|
export function labelToName(label: string): string | null;
|
|
36
107
|
|
|
37
108
|
/**
|
|
38
|
-
*
|
|
39
|
-
*
|
|
109
|
+
* [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
|
|
110
|
+
*
|
|
111
|
+
* Decode bytes to strings according to [WHATWG Encoding](https://encoding.spec.whatwg.org) specification.
|
|
40
112
|
*/
|
|
41
113
|
export const TextDecoder: typeof globalThis.TextDecoder;
|
|
42
114
|
|
|
43
115
|
/**
|
|
44
|
-
*
|
|
116
|
+
* [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder) implementation/polyfill.
|
|
117
|
+
*
|
|
118
|
+
* Encode strings to UTF-8 bytes according to [WHATWG Encoding](https://encoding.spec.whatwg.org) specification.
|
|
45
119
|
*/
|
|
46
120
|
export const TextEncoder: typeof globalThis.TextEncoder;
|
|
47
121
|
|
|
48
122
|
/**
|
|
49
|
-
*
|
|
50
|
-
*
|
|
123
|
+
* [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream) implementation/polyfill.
|
|
124
|
+
*
|
|
125
|
+
* A [Streams](https://streams.spec.whatwg.org/) wrapper for `TextDecoder`.
|
|
126
|
+
*
|
|
127
|
+
* Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
|
|
128
|
+
* [polyfilled](https://npmjs.com/package/web-streams-polyfill).
|
|
51
129
|
*/
|
|
52
130
|
export const TextDecoderStream: typeof globalThis.TextDecoderStream;
|
|
53
131
|
|
|
54
132
|
/**
|
|
55
|
-
*
|
|
56
|
-
*
|
|
133
|
+
* [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream) implementation/polyfill.
|
|
134
|
+
*
|
|
135
|
+
* A [Streams](https://streams.spec.whatwg.org/) wrapper for `TextEncoder`.
|
|
136
|
+
*
|
|
137
|
+
* Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
|
|
138
|
+
* [polyfilled](https://npmjs.com/package/web-streams-polyfill).
|
|
57
139
|
*/
|
|
58
140
|
export const TextEncoderStream: typeof globalThis.TextEncoderStream;
|
package/encoding.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
|
|
2
|
-
import {
|
|
1
|
+
import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
|
|
2
|
+
import { multibyteEncoder } from './fallback/multi-byte.js'
|
|
3
|
+
import { setMultibyte } from './fallback/encoding.js'
|
|
3
4
|
|
|
4
|
-
|
|
5
|
+
setMultibyte(createMultibyteDecoder, multibyteEncoder)
|
|
5
6
|
|
|
6
7
|
export {
|
|
7
8
|
TextDecoder,
|
package/fallback/_utils.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const { Buffer, TextEncoder, TextDecoder } = globalThis
|
|
2
2
|
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
3
3
|
export const nativeBuffer = haveNativeBuffer ? Buffer : null
|
|
4
|
-
export const isHermes =
|
|
5
|
-
export const isDeno =
|
|
6
|
-
export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
|
|
4
|
+
export const isHermes = !!globalThis.HermesInternal
|
|
5
|
+
export const isDeno = !!globalThis.Deno
|
|
6
|
+
export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
|
|
7
7
|
|
|
8
8
|
// We consider Node.js TextDecoder/TextEncoder native
|
|
9
9
|
let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
|
|
@@ -17,16 +17,19 @@ export const nativeDecoder = isNative(TextDecoder)
|
|
|
17
17
|
// Actually windows-1252, compatible with ascii and latin1 decoding
|
|
18
18
|
// Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
|
|
19
19
|
// in 2025 due to a regression, so we call it Latin1 as it's usable only for that
|
|
20
|
-
|
|
21
|
-
if (nativeDecoder) {
|
|
20
|
+
const getNativeLain1 = () => {
|
|
22
21
|
// Not all barebone engines with TextDecoder support something except utf-8, detect
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
22
|
+
if (!nativeDecoder) {
|
|
23
|
+
try {
|
|
24
|
+
return new TextDecoder('latin1', { ignoreBOM: true })
|
|
25
|
+
} catch {}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return null
|
|
26
29
|
}
|
|
27
30
|
|
|
28
|
-
export const nativeDecoderLatin1 =
|
|
29
|
-
export const canDecoders =
|
|
31
|
+
export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLain1()
|
|
32
|
+
export const canDecoders = !!nativeDecoderLatin1
|
|
30
33
|
|
|
31
34
|
// Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
|
|
32
35
|
// Refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1994067 (and linked issues), fixed in 146
|
|
@@ -47,10 +50,11 @@ function shouldSkipBuiltins() {
|
|
|
47
50
|
return /firefox/i.test(g.navigator.userAgent || '') // as simple as we can
|
|
48
51
|
}
|
|
49
52
|
|
|
53
|
+
/* c8 ignore next */
|
|
50
54
|
return false // eslint-disable-line no-unreachable
|
|
51
55
|
}
|
|
52
56
|
|
|
53
|
-
export const skipWeb = shouldSkipBuiltins()
|
|
57
|
+
export const skipWeb = /* @__PURE__ */ shouldSkipBuiltins()
|
|
54
58
|
|
|
55
59
|
function decodePartAddition(a, start, end, m) {
|
|
56
60
|
let o = ''
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import labels from './encoding.labels.js'
|
|
2
|
+
|
|
3
|
+
let labelsMap
|
|
4
|
+
|
|
5
|
+
export const E_ENCODING = 'Unknown encoding'
|
|
6
|
+
|
|
7
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
8
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
9
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
10
|
+
export function normalizeEncoding(label) {
|
|
11
|
+
// fast path
|
|
12
|
+
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
13
|
+
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
14
|
+
// full map
|
|
15
|
+
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
16
|
+
const low = `${label}`.trim().toLowerCase()
|
|
17
|
+
if (Object.hasOwn(labels, low)) return low
|
|
18
|
+
if (!labelsMap) {
|
|
19
|
+
labelsMap = new Map()
|
|
20
|
+
for (const [label, aliases] of Object.entries(labels)) {
|
|
21
|
+
for (const alias of aliases) labelsMap.set(alias, label)
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const mapped = labelsMap.get(low)
|
|
26
|
+
if (mapped) return mapped
|
|
27
|
+
return null
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// TODO: make this more strict against Symbol.toStringTag
|
|
31
|
+
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
|
32
|
+
// prototypes, which is not something we protect against
|
|
33
|
+
|
|
34
|
+
function isAnyArrayBuffer(x) {
|
|
35
|
+
if (x instanceof ArrayBuffer) return true
|
|
36
|
+
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
|
|
37
|
+
if (!x || typeof x.byteLength !== 'number') return false
|
|
38
|
+
const s = Object.prototype.toString.call(x)
|
|
39
|
+
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function fromSource(x) {
|
|
43
|
+
if (x instanceof Uint8Array) return x
|
|
44
|
+
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
45
|
+
if (isAnyArrayBuffer(x)) {
|
|
46
|
+
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
|
|
47
|
+
// Old engines without .detached, try-catch
|
|
48
|
+
try {
|
|
49
|
+
return new Uint8Array(x)
|
|
50
|
+
} catch {
|
|
51
|
+
return new Uint8Array()
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
59
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
60
|
+
export function getBOMEncoding(input) {
|
|
61
|
+
const u8 = fromSource(input) // asserts
|
|
62
|
+
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
|
|
63
|
+
if (u8.length < 2) return null
|
|
64
|
+
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
|
|
65
|
+
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
|
66
|
+
return null
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
70
|
+
|
|
71
|
+
// Unlike normalizeEncoding, case-sensitive
|
|
72
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
73
|
+
export function labelToName(label) {
|
|
74
|
+
const enc = normalizeEncoding(label)
|
|
75
|
+
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
76
|
+
if (!enc) return enc
|
|
77
|
+
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
78
|
+
if (enc === 'big5') return 'Big5'
|
|
79
|
+
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
80
|
+
return enc
|
|
81
|
+
}
|
package/fallback/encoding.js
CHANGED
|
@@ -5,82 +5,36 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
|
|
|
5
5
|
import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
|
|
6
6
|
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
|
|
7
7
|
import labels from './encoding.labels.js'
|
|
8
|
-
import {
|
|
8
|
+
import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
|
|
9
|
+
import { unfinishedBytes, mergePrefix } from './encoding.util.js'
|
|
9
10
|
|
|
11
|
+
export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
|
|
12
|
+
|
|
13
|
+
const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
|
|
10
14
|
const E_OPTIONS = 'The "options" argument must be of type object'
|
|
11
|
-
const E_ENCODING = 'Unknown encoding'
|
|
12
15
|
const replacementChar = '\uFFFD'
|
|
13
|
-
|
|
14
|
-
const E_MULTI =
|
|
15
|
-
'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
|
|
16
16
|
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
|
|
17
|
-
let createMultibyteDecoder
|
|
17
|
+
let createMultibyteDecoder, multibyteEncoder
|
|
18
18
|
|
|
19
|
-
export
|
|
19
|
+
export const isMultibyte = (enc) => multibyteSet.has(enc)
|
|
20
|
+
export function setMultibyte(createDecoder, createEncoder) {
|
|
20
21
|
createMultibyteDecoder = createDecoder
|
|
22
|
+
multibyteEncoder = createEncoder
|
|
21
23
|
}
|
|
22
24
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
27
|
-
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
28
|
-
export function normalizeEncoding(label) {
|
|
29
|
-
// fast path
|
|
30
|
-
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
31
|
-
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
32
|
-
// full map
|
|
33
|
-
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
34
|
-
const low = `${label}`.trim().toLowerCase()
|
|
35
|
-
if (Object.hasOwn(labels, low)) return low
|
|
36
|
-
if (!labelsMap) {
|
|
37
|
-
labelsMap = new Map()
|
|
38
|
-
for (const [label, aliases] of Object.entries(labels)) {
|
|
39
|
-
for (const alias of aliases) labelsMap.set(alias, label)
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
const mapped = labelsMap.get(low)
|
|
44
|
-
if (mapped) return mapped
|
|
45
|
-
return null
|
|
25
|
+
export function getMultibyteEncoder() {
|
|
26
|
+
if (!multibyteEncoder) throw new Error(E_MULTI)
|
|
27
|
+
return multibyteEncoder
|
|
46
28
|
}
|
|
47
29
|
|
|
48
30
|
const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
|
|
49
31
|
|
|
50
|
-
// TODO: make this more strict against Symbol.toStringTag
|
|
51
|
-
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
|
52
|
-
// prototypes, which is not something we protect against
|
|
53
|
-
|
|
54
|
-
function isAnyArrayBuffer(x) {
|
|
55
|
-
if (x instanceof ArrayBuffer) return true
|
|
56
|
-
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
|
|
57
|
-
if (!x || typeof x.byteLength !== 'number') return false
|
|
58
|
-
const s = Object.prototype.toString.call(x)
|
|
59
|
-
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
|
|
60
|
-
}
|
|
61
|
-
|
|
62
32
|
function isAnyUint8Array(x) {
|
|
63
33
|
if (x instanceof Uint8Array) return true
|
|
64
34
|
if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
|
|
65
35
|
return Object.prototype.toString.call(x) === '[object Uint8Array]'
|
|
66
36
|
}
|
|
67
37
|
|
|
68
|
-
const fromSource = (x) => {
|
|
69
|
-
if (x instanceof Uint8Array) return x
|
|
70
|
-
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
71
|
-
if (isAnyArrayBuffer(x)) {
|
|
72
|
-
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
|
|
73
|
-
// Old engines without .detached, try-catch
|
|
74
|
-
try {
|
|
75
|
-
return new Uint8Array(x)
|
|
76
|
-
} catch {
|
|
77
|
-
return new Uint8Array()
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
|
82
|
-
}
|
|
83
|
-
|
|
84
38
|
function unicodeDecoder(encoding, loose) {
|
|
85
39
|
if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
|
|
86
40
|
const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
|
|
@@ -99,10 +53,10 @@ export class TextDecoder {
|
|
|
99
53
|
const enc = normalizeEncoding(encoding)
|
|
100
54
|
if (!enc || enc === 'replacement') throw new RangeError(E_ENCODING)
|
|
101
55
|
define(this, 'encoding', enc)
|
|
102
|
-
define(this, 'fatal',
|
|
103
|
-
define(this, 'ignoreBOM',
|
|
56
|
+
define(this, 'fatal', !!options.fatal)
|
|
57
|
+
define(this, 'ignoreBOM', !!options.ignoreBOM)
|
|
104
58
|
this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
|
|
105
|
-
this.#multibyte = !this.#unicode &&
|
|
59
|
+
this.#multibyte = !this.#unicode && isMultibyte(enc)
|
|
106
60
|
this.#canBOM = this.#unicode && !this.ignoreBOM
|
|
107
61
|
}
|
|
108
62
|
|
|
@@ -112,44 +66,26 @@ export class TextDecoder {
|
|
|
112
66
|
|
|
113
67
|
decode(input, options = {}) {
|
|
114
68
|
if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
|
|
115
|
-
const stream =
|
|
69
|
+
const stream = !!options.stream
|
|
116
70
|
let u = input === undefined ? new Uint8Array() : fromSource(input)
|
|
71
|
+
const empty = u.length === 0 // also can't be streaming after next line
|
|
72
|
+
if (empty && stream) return '' // no state change
|
|
117
73
|
|
|
118
74
|
if (this.#unicode) {
|
|
119
75
|
let prefix
|
|
120
76
|
if (this.#chunk) {
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
u =
|
|
124
|
-
} else if (u.length < 3) {
|
|
125
|
-
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
|
|
126
|
-
const a = new Uint8Array(u.length + this.#chunk.length)
|
|
127
|
-
a.set(this.#chunk)
|
|
128
|
-
a.set(u, this.#chunk.length)
|
|
129
|
-
u = a
|
|
77
|
+
const merged = mergePrefix(u, this.#chunk, this.encoding)
|
|
78
|
+
if (u.length < 3) {
|
|
79
|
+
u = merged // might be unfinished, but fully consumed old u
|
|
130
80
|
} else {
|
|
131
|
-
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
t.set(u.subarray(0, 3), this.#chunk.length)
|
|
135
|
-
|
|
136
|
-
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
|
|
137
|
-
// If that doesn't happen (u too short), just concat chunk and u completely
|
|
138
|
-
for (let i = 1; i <= 3; i++) {
|
|
139
|
-
const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
|
|
140
|
-
if (unfinished <= i) {
|
|
141
|
-
// Always reachable at 3, but we still need 'unfinished' value for it
|
|
142
|
-
const add = i - unfinished // 0-3
|
|
143
|
-
prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
|
|
144
|
-
if (add > 0) u = u.subarray(add)
|
|
145
|
-
break
|
|
146
|
-
}
|
|
147
|
-
}
|
|
81
|
+
prefix = merged // stops at complete chunk
|
|
82
|
+
const add = prefix.length - this.#chunk.length
|
|
83
|
+
if (add > 0) u = u.subarray(add)
|
|
148
84
|
}
|
|
149
85
|
|
|
150
86
|
this.#chunk = null
|
|
151
|
-
} else if (
|
|
152
|
-
|
|
87
|
+
} else if (empty) {
|
|
88
|
+
this.#canBOM = !this.ignoreBOM // not streaming
|
|
153
89
|
return ''
|
|
154
90
|
}
|
|
155
91
|
|
|
@@ -170,27 +106,31 @@ export class TextDecoder {
|
|
|
170
106
|
}
|
|
171
107
|
}
|
|
172
108
|
|
|
109
|
+
let seenBOM = false
|
|
173
110
|
if (this.#canBOM) {
|
|
174
111
|
const bom = this.#findBom(prefix ?? u)
|
|
175
112
|
if (bom) {
|
|
176
|
-
|
|
113
|
+
seenBOM = true
|
|
177
114
|
if (prefix) {
|
|
178
115
|
prefix = prefix.subarray(bom)
|
|
179
116
|
} else {
|
|
180
117
|
u = u.subarray(bom)
|
|
181
118
|
}
|
|
182
119
|
}
|
|
120
|
+
} else if (!stream && !this.ignoreBOM) {
|
|
121
|
+
this.#canBOM = true
|
|
183
122
|
}
|
|
184
123
|
|
|
185
124
|
if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
|
|
186
125
|
try {
|
|
187
126
|
const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
|
|
188
|
-
if
|
|
189
|
-
|
|
190
|
-
if (!stream) this.#canBOM = !this.ignoreBOM
|
|
127
|
+
// "BOM seen" is set on the current decode call only if it did not error, in "serialize I/O queue" after decoding
|
|
128
|
+
if (stream && (seenBOM || res.length > 0)) this.#canBOM = false
|
|
191
129
|
return res
|
|
192
130
|
} catch (err) {
|
|
193
131
|
this.#chunk = null // reset unfinished chunk on errors
|
|
132
|
+
// The correct way per spec seems to be not destroying the decoder state (aka BOM here) in stream mode
|
|
133
|
+
// See also multi-byte.js
|
|
194
134
|
throw err
|
|
195
135
|
}
|
|
196
136
|
|
|
@@ -215,6 +155,7 @@ export class TextDecoder {
|
|
|
215
155
|
return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
|
|
216
156
|
}
|
|
217
157
|
|
|
158
|
+
/* c8 ignore next */
|
|
218
159
|
throw new Error('Unreachable')
|
|
219
160
|
}
|
|
220
161
|
}
|
|
@@ -341,17 +282,6 @@ export class TextEncoderStream {
|
|
|
341
282
|
}
|
|
342
283
|
}
|
|
343
284
|
|
|
344
|
-
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
345
|
-
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
346
|
-
export function getBOMEncoding(input) {
|
|
347
|
-
const u8 = fromSource(input) // asserts
|
|
348
|
-
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
|
|
349
|
-
if (u8.length < 2) return null
|
|
350
|
-
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
|
|
351
|
-
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
|
352
|
-
return null
|
|
353
|
-
}
|
|
354
|
-
|
|
355
285
|
// https://encoding.spec.whatwg.org/#decode
|
|
356
286
|
// Warning: encoding sniffed from BOM takes preference over the supplied one
|
|
357
287
|
// Warning: lossy, performs replacement, no option of throwing
|
|
@@ -368,7 +298,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
|
368
298
|
let suffix = ''
|
|
369
299
|
if (u8.byteLength % 2 !== 0) {
|
|
370
300
|
suffix = replacementChar
|
|
371
|
-
u8 = u8.subarray(0, -
|
|
301
|
+
u8 = u8.subarray(0, -unfinishedBytes(u8, u8.byteLength, enc))
|
|
372
302
|
}
|
|
373
303
|
|
|
374
304
|
return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
|
|
@@ -376,7 +306,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
|
376
306
|
|
|
377
307
|
if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
|
|
378
308
|
|
|
379
|
-
if (
|
|
309
|
+
if (isMultibyte(enc)) {
|
|
380
310
|
if (!createMultibyteDecoder) throw new Error(E_MULTI)
|
|
381
311
|
return createMultibyteDecoder(enc, true)(u8)
|
|
382
312
|
}
|
|
@@ -387,17 +317,3 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
|
387
317
|
|
|
388
318
|
return createSinglebyteDecoder(enc, true)(u8)
|
|
389
319
|
}
|
|
390
|
-
|
|
391
|
-
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
392
|
-
|
|
393
|
-
// Unlike normalizeEncoding, case-sensitive
|
|
394
|
-
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
395
|
-
export function labelToName(label) {
|
|
396
|
-
const enc = normalizeEncoding(label)
|
|
397
|
-
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
398
|
-
if (!enc) return enc
|
|
399
|
-
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
400
|
-
if (enc === 'big5') return 'Big5'
|
|
401
|
-
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
402
|
-
return enc
|
|
403
|
-
}
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
// Get a number of last bytes in an Uint8Array `u` ending at `len` that don't
|
|
2
|
+
// form a codepoint yet, but can be a part of a single codepoint on more data
|
|
1
3
|
export function unfinishedBytes(u, len, enc) {
|
|
2
4
|
switch (enc) {
|
|
3
5
|
case 'utf-8': {
|
|
@@ -32,3 +34,35 @@ export function unfinishedBytes(u, len, enc) {
|
|
|
32
34
|
|
|
33
35
|
throw new Error('Unsupported encoding')
|
|
34
36
|
}
|
|
37
|
+
|
|
38
|
+
// Merge prefix `chunk` with `u` and return new combined prefix
|
|
39
|
+
// For u.length < 3, fully consumes u and can return unfinished data,
|
|
40
|
+
// otherwise returns a prefix with no unfinished bytes
|
|
41
|
+
export function mergePrefix(u, chunk, enc) {
|
|
42
|
+
if (u.length === 0) return chunk
|
|
43
|
+
if (u.length < 3) {
|
|
44
|
+
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
|
|
45
|
+
const a = new Uint8Array(u.length + chunk.length)
|
|
46
|
+
a.set(chunk)
|
|
47
|
+
a.set(u, chunk.length)
|
|
48
|
+
return a
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
|
|
52
|
+
const t = new Uint8Array(chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
|
|
53
|
+
t.set(chunk)
|
|
54
|
+
t.set(u.subarray(0, 3), chunk.length)
|
|
55
|
+
|
|
56
|
+
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
|
|
57
|
+
// If that doesn't happen (u too short), just concat chunk and u completely (above)
|
|
58
|
+
for (let i = 1; i <= 3; i++) {
|
|
59
|
+
const unfinished = unfinishedBytes(t, chunk.length + i, enc) // 0-3
|
|
60
|
+
if (unfinished <= i) {
|
|
61
|
+
// Always reachable at 3, but we still need 'unfinished' value for it
|
|
62
|
+
const add = i - unfinished // 0-3
|
|
63
|
+
return add > 0 ? t.subarray(0, chunk.length + add) : chunk
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Unreachable
|
|
68
|
+
}
|
package/fallback/latin1.js
CHANGED
|
@@ -37,6 +37,7 @@ export function asciiPrefix(arr) {
|
|
|
37
37
|
const b = u32[i + 1]
|
|
38
38
|
const c = u32[i + 2]
|
|
39
39
|
const d = u32[i + 3]
|
|
40
|
+
// "(a | b | c | d) & mask" is slower on Hermes though faster on v8
|
|
40
41
|
if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
|
|
41
42
|
}
|
|
42
43
|
|