@exodus/bytes 1.9.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +341 -89
- package/array.d.ts +41 -3
- package/base32.d.ts +83 -0
- package/base58.d.ts +62 -0
- package/base58check.d.ts +131 -0
- package/base58check.js +2 -1
- package/base64.d.ts +40 -19
- package/bech32.d.ts +76 -0
- package/bigint.d.ts +48 -0
- package/encoding-browser.d.ts +23 -0
- package/encoding-lite.d.ts +61 -0
- package/encoding.d.ts +93 -11
- package/encoding.js +4 -3
- package/fallback/_utils.js +14 -11
- package/fallback/encoding.js +34 -42
- package/fallback/encoding.util.js +34 -0
- package/fallback/multi-byte.encodings.json +1 -0
- package/fallback/multi-byte.js +87 -16
- package/fallback/multi-byte.table.js +3 -0
- package/hex.d.ts +22 -8
- package/index.d.ts +1 -1
- package/multi-byte.d.ts +57 -0
- package/package.json +52 -8
- package/single-byte.d.ts +149 -0
- package/utf16.d.ts +92 -0
- package/utf8.d.ts +52 -18
- package/utf8.js +7 -2
- package/utf8.node.js +1 -1
- package/wif.d.ts +76 -0
package/encoding.d.ts
CHANGED
|
@@ -1,14 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
|
|
3
|
+
* [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
|
|
4
|
+
* [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder),
|
|
5
|
+
* [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream),
|
|
6
|
+
* [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream),
|
|
7
|
+
* some [hooks](https://encoding.spec.whatwg.org/#specification-hooks).
|
|
8
|
+
*
|
|
9
|
+
* ```js
|
|
10
|
+
* import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
|
|
11
|
+
* import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
|
|
12
|
+
*
|
|
13
|
+
* // Hooks for standards
|
|
14
|
+
* import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* @module @exodus/bytes/encoding.js
|
|
18
|
+
*/
|
|
19
|
+
|
|
1
20
|
/// <reference types="node" />
|
|
2
21
|
|
|
3
22
|
/**
|
|
4
|
-
*
|
|
23
|
+
* Convert an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
|
|
24
|
+
* as an ASCII-lowercased string.
|
|
25
|
+
*
|
|
26
|
+
* If an encoding with that label does not exist, returns `null`.
|
|
27
|
+
*
|
|
28
|
+
* This is the same as [`decoder.encoding` getter](https://encoding.spec.whatwg.org/#dom-textdecoder-encoding),
|
|
29
|
+
* except that it:
|
|
30
|
+
* 1. Supports [`replacement` encoding](https://encoding.spec.whatwg.org/#replacement) and its
|
|
31
|
+
* [labels](https://encoding.spec.whatwg.org/#ref-for-replacement%E2%91%A1)
|
|
32
|
+
* 2. Does not throw for invalid labels and instead returns `null`
|
|
33
|
+
*
|
|
34
|
+
* It is identical to:
|
|
35
|
+
* ```js
|
|
36
|
+
* labelToName(label)?.toLowerCase() ?? null
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* All encoding names are also valid labels for corresponding encodings.
|
|
40
|
+
*
|
|
5
41
|
* @param label - The encoding label to normalize
|
|
6
42
|
* @returns The normalized encoding name, or null if invalid
|
|
7
43
|
*/
|
|
8
44
|
export function normalizeEncoding(label: string): string | null;
|
|
9
45
|
|
|
10
46
|
/**
|
|
11
|
-
* Implements BOM sniff
|
|
47
|
+
* Implements [BOM sniff](https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
|
|
48
|
+
*
|
|
49
|
+
* Given a `TypedArray` or an `ArrayBuffer` instance `input`, returns either of:
|
|
50
|
+
* - `'utf-8'`, if `input` starts with UTF-8 byte order mark.
|
|
51
|
+
* - `'utf-16le'`, if `input` starts with UTF-16LE byte order mark.
|
|
52
|
+
* - `'utf-16be'`, if `input` starts with UTF-16BE byte order mark.
|
|
53
|
+
* - `null` otherwise.
|
|
54
|
+
*
|
|
12
55
|
* @param input - The bytes to check for BOM
|
|
13
56
|
* @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
|
|
14
57
|
*/
|
|
@@ -17,7 +60,27 @@ export function getBOMEncoding(
|
|
|
17
60
|
): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
|
|
18
61
|
|
|
19
62
|
/**
|
|
20
|
-
* Implements decode
|
|
63
|
+
* Implements [decode](https://encoding.spec.whatwg.org/#decode) legacy hook.
|
|
64
|
+
*
|
|
65
|
+
* Given a `TypedArray` or an `ArrayBuffer` instance `input` and an optional `fallbackEncoding`
|
|
66
|
+
* encoding [label](https://encoding.spec.whatwg.org/#names-and-labels),
|
|
67
|
+
* sniffs encoding from BOM with `fallbackEncoding` fallback and then
|
|
68
|
+
* decodes the `input` using that encoding, skipping BOM if it was present.
|
|
69
|
+
*
|
|
70
|
+
* Notes:
|
|
71
|
+
*
|
|
72
|
+
* - BOM-sniffed encoding takes precedence over `fallbackEncoding` option per spec.
|
|
73
|
+
* Use with care.
|
|
74
|
+
* - Always operates in non-fatal [mode](https://encoding.spec.whatwg.org/#textdecoder-error-mode),
|
|
75
|
+
* aka replacement. It can convert different byte sequences to equal strings.
|
|
76
|
+
*
|
|
77
|
+
* This method is similar to the following code, except that it doesn't support encoding labels and
|
|
78
|
+
* only expects lowercased encoding name:
|
|
79
|
+
*
|
|
80
|
+
* ```js
|
|
81
|
+
* new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
|
|
82
|
+
* ```
|
|
83
|
+
*
|
|
21
84
|
* @param input - The bytes to decode
|
|
22
85
|
* @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
|
|
23
86
|
* @returns The decoded string
|
|
@@ -28,31 +91,50 @@ export function legacyHookDecode(
|
|
|
28
91
|
): string;
|
|
29
92
|
|
|
30
93
|
/**
|
|
31
|
-
*
|
|
94
|
+
* Implements [get an encoding from a string `label`](https://encoding.spec.whatwg.org/#concept-encoding-get).
|
|
95
|
+
*
|
|
96
|
+
* Convert an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
|
|
97
|
+
* as a case-sensitive string.
|
|
98
|
+
*
|
|
99
|
+
* If an encoding with that label does not exist, returns `null`.
|
|
100
|
+
*
|
|
101
|
+
* All encoding names are also valid labels for corresponding encodings.
|
|
102
|
+
*
|
|
32
103
|
* @param label - The encoding label
|
|
33
104
|
* @returns The proper case encoding name, or null if invalid
|
|
34
105
|
*/
|
|
35
106
|
export function labelToName(label: string): string | null;
|
|
36
107
|
|
|
37
108
|
/**
|
|
38
|
-
*
|
|
39
|
-
*
|
|
109
|
+
* [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
|
|
110
|
+
*
|
|
111
|
+
* Decode bytes to strings according to [WHATWG Encoding](https://encoding.spec.whatwg.org) specification.
|
|
40
112
|
*/
|
|
41
113
|
export const TextDecoder: typeof globalThis.TextDecoder;
|
|
42
114
|
|
|
43
115
|
/**
|
|
44
|
-
*
|
|
116
|
+
* [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder) implementation/polyfill.
|
|
117
|
+
*
|
|
118
|
+
* Encode strings to UTF-8 bytes according to [WHATWG Encoding](https://encoding.spec.whatwg.org) specification.
|
|
45
119
|
*/
|
|
46
120
|
export const TextEncoder: typeof globalThis.TextEncoder;
|
|
47
121
|
|
|
48
122
|
/**
|
|
49
|
-
*
|
|
50
|
-
*
|
|
123
|
+
* [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream) implementation/polyfill.
|
|
124
|
+
*
|
|
125
|
+
* A [Streams](https://streams.spec.whatwg.org/) wrapper for `TextDecoder`.
|
|
126
|
+
*
|
|
127
|
+
* Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
|
|
128
|
+
* [polyfilled](https://npmjs.com/package/web-streams-polyfill).
|
|
51
129
|
*/
|
|
52
130
|
export const TextDecoderStream: typeof globalThis.TextDecoderStream;
|
|
53
131
|
|
|
54
132
|
/**
|
|
55
|
-
*
|
|
56
|
-
*
|
|
133
|
+
* [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream) implementation/polyfill.
|
|
134
|
+
*
|
|
135
|
+
* A [Streams](https://streams.spec.whatwg.org/) wrapper for `TextEncoder`.
|
|
136
|
+
*
|
|
137
|
+
* Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
|
|
138
|
+
* [polyfilled](https://npmjs.com/package/web-streams-polyfill).
|
|
57
139
|
*/
|
|
58
140
|
export const TextEncoderStream: typeof globalThis.TextEncoderStream;
|
package/encoding.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
|
|
2
|
-
import {
|
|
1
|
+
import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
|
|
2
|
+
import { multibyteEncoder } from './fallback/multi-byte.js'
|
|
3
|
+
import { setMultibyte } from './fallback/encoding.js'
|
|
3
4
|
|
|
4
|
-
|
|
5
|
+
setMultibyte(createMultibyteDecoder, multibyteEncoder)
|
|
5
6
|
|
|
6
7
|
export {
|
|
7
8
|
TextDecoder,
|
package/fallback/_utils.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const { Buffer, TextEncoder, TextDecoder } = globalThis
|
|
2
2
|
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
3
3
|
export const nativeBuffer = haveNativeBuffer ? Buffer : null
|
|
4
|
-
export const isHermes =
|
|
5
|
-
export const isDeno =
|
|
6
|
-
export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
|
|
4
|
+
export const isHermes = !!globalThis.HermesInternal
|
|
5
|
+
export const isDeno = !!globalThis.Deno
|
|
6
|
+
export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
|
|
7
7
|
|
|
8
8
|
// We consider Node.js TextDecoder/TextEncoder native
|
|
9
9
|
let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
|
|
@@ -17,16 +17,19 @@ export const nativeDecoder = isNative(TextDecoder)
|
|
|
17
17
|
// Actually windows-1252, compatible with ascii and latin1 decoding
|
|
18
18
|
// Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
|
|
19
19
|
// in 2025 due to a regression, so we call it Latin1 as it's usable only for that
|
|
20
|
-
|
|
21
|
-
if (nativeDecoder) {
|
|
20
|
+
const getNativeLain1 = () => {
|
|
22
21
|
// Not all barebone engines with TextDecoder support something except utf-8, detect
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
22
|
+
if (!nativeDecoder) {
|
|
23
|
+
try {
|
|
24
|
+
return new TextDecoder('latin1', { ignoreBOM: true })
|
|
25
|
+
} catch {}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return null
|
|
26
29
|
}
|
|
27
30
|
|
|
28
|
-
export const nativeDecoderLatin1 =
|
|
29
|
-
export const canDecoders =
|
|
31
|
+
export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLain1()
|
|
32
|
+
export const canDecoders = !!nativeDecoderLatin1
|
|
30
33
|
|
|
31
34
|
// Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
|
|
32
35
|
// Refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1994067 (and linked issues), fixed in 146
|
|
@@ -51,7 +54,7 @@ function shouldSkipBuiltins() {
|
|
|
51
54
|
return false // eslint-disable-line no-unreachable
|
|
52
55
|
}
|
|
53
56
|
|
|
54
|
-
export const skipWeb = shouldSkipBuiltins()
|
|
57
|
+
export const skipWeb = /* @__PURE__ */ shouldSkipBuiltins()
|
|
55
58
|
|
|
56
59
|
function decodePartAddition(a, start, end, m) {
|
|
57
60
|
let o = ''
|
package/fallback/encoding.js
CHANGED
|
@@ -6,19 +6,25 @@ import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/by
|
|
|
6
6
|
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
|
|
7
7
|
import labels from './encoding.labels.js'
|
|
8
8
|
import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
|
|
9
|
-
import { unfinishedBytes } from './encoding.util.js'
|
|
9
|
+
import { unfinishedBytes, mergePrefix } from './encoding.util.js'
|
|
10
10
|
|
|
11
11
|
export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
|
|
12
12
|
|
|
13
|
+
const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
|
|
13
14
|
const E_OPTIONS = 'The "options" argument must be of type object'
|
|
14
|
-
const E_MULTI =
|
|
15
|
-
'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
|
|
16
15
|
const replacementChar = '\uFFFD'
|
|
17
16
|
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
|
|
18
|
-
let createMultibyteDecoder
|
|
17
|
+
let createMultibyteDecoder, multibyteEncoder
|
|
19
18
|
|
|
20
|
-
export
|
|
19
|
+
export const isMultibyte = (enc) => multibyteSet.has(enc)
|
|
20
|
+
export function setMultibyte(createDecoder, createEncoder) {
|
|
21
21
|
createMultibyteDecoder = createDecoder
|
|
22
|
+
multibyteEncoder = createEncoder
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function getMultibyteEncoder() {
|
|
26
|
+
if (!multibyteEncoder) throw new Error(E_MULTI)
|
|
27
|
+
return multibyteEncoder
|
|
22
28
|
}
|
|
23
29
|
|
|
24
30
|
const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
|
|
@@ -47,10 +53,10 @@ export class TextDecoder {
|
|
|
47
53
|
const enc = normalizeEncoding(encoding)
|
|
48
54
|
if (!enc || enc === 'replacement') throw new RangeError(E_ENCODING)
|
|
49
55
|
define(this, 'encoding', enc)
|
|
50
|
-
define(this, 'fatal',
|
|
51
|
-
define(this, 'ignoreBOM',
|
|
56
|
+
define(this, 'fatal', !!options.fatal)
|
|
57
|
+
define(this, 'ignoreBOM', !!options.ignoreBOM)
|
|
52
58
|
this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
|
|
53
|
-
this.#multibyte = !this.#unicode &&
|
|
59
|
+
this.#multibyte = !this.#unicode && isMultibyte(enc)
|
|
54
60
|
this.#canBOM = this.#unicode && !this.ignoreBOM
|
|
55
61
|
}
|
|
56
62
|
|
|
@@ -60,44 +66,26 @@ export class TextDecoder {
|
|
|
60
66
|
|
|
61
67
|
decode(input, options = {}) {
|
|
62
68
|
if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
|
|
63
|
-
const stream =
|
|
69
|
+
const stream = !!options.stream
|
|
64
70
|
let u = input === undefined ? new Uint8Array() : fromSource(input)
|
|
71
|
+
const empty = u.length === 0 // also can't be streaming after next line
|
|
72
|
+
if (empty && stream) return '' // no state change
|
|
65
73
|
|
|
66
74
|
if (this.#unicode) {
|
|
67
75
|
let prefix
|
|
68
76
|
if (this.#chunk) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
u =
|
|
72
|
-
} else if (u.length < 3) {
|
|
73
|
-
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
|
|
74
|
-
const a = new Uint8Array(u.length + this.#chunk.length)
|
|
75
|
-
a.set(this.#chunk)
|
|
76
|
-
a.set(u, this.#chunk.length)
|
|
77
|
-
u = a
|
|
77
|
+
const merged = mergePrefix(u, this.#chunk, this.encoding)
|
|
78
|
+
if (u.length < 3) {
|
|
79
|
+
u = merged // might be unfinished, but fully consumed old u
|
|
78
80
|
} else {
|
|
79
|
-
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
t.set(u.subarray(0, 3), this.#chunk.length)
|
|
83
|
-
|
|
84
|
-
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
|
|
85
|
-
// If that doesn't happen (u too short), just concat chunk and u completely
|
|
86
|
-
for (let i = 1; i <= 3; i++) {
|
|
87
|
-
const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
|
|
88
|
-
if (unfinished <= i) {
|
|
89
|
-
// Always reachable at 3, but we still need 'unfinished' value for it
|
|
90
|
-
const add = i - unfinished // 0-3
|
|
91
|
-
prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
|
|
92
|
-
if (add > 0) u = u.subarray(add)
|
|
93
|
-
break
|
|
94
|
-
}
|
|
95
|
-
}
|
|
81
|
+
prefix = merged // stops at complete chunk
|
|
82
|
+
const add = prefix.length - this.#chunk.length
|
|
83
|
+
if (add > 0) u = u.subarray(add)
|
|
96
84
|
}
|
|
97
85
|
|
|
98
86
|
this.#chunk = null
|
|
99
|
-
} else if (
|
|
100
|
-
|
|
87
|
+
} else if (empty) {
|
|
88
|
+
this.#canBOM = !this.ignoreBOM // not streaming
|
|
101
89
|
return ''
|
|
102
90
|
}
|
|
103
91
|
|
|
@@ -118,27 +106,31 @@ export class TextDecoder {
|
|
|
118
106
|
}
|
|
119
107
|
}
|
|
120
108
|
|
|
109
|
+
let seenBOM = false
|
|
121
110
|
if (this.#canBOM) {
|
|
122
111
|
const bom = this.#findBom(prefix ?? u)
|
|
123
112
|
if (bom) {
|
|
124
|
-
|
|
113
|
+
seenBOM = true
|
|
125
114
|
if (prefix) {
|
|
126
115
|
prefix = prefix.subarray(bom)
|
|
127
116
|
} else {
|
|
128
117
|
u = u.subarray(bom)
|
|
129
118
|
}
|
|
130
119
|
}
|
|
120
|
+
} else if (!stream && !this.ignoreBOM) {
|
|
121
|
+
this.#canBOM = true
|
|
131
122
|
}
|
|
132
123
|
|
|
133
124
|
if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
|
|
134
125
|
try {
|
|
135
126
|
const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
|
|
136
|
-
if
|
|
137
|
-
|
|
138
|
-
if (!stream) this.#canBOM = !this.ignoreBOM
|
|
127
|
+
// "BOM seen" is set on the current decode call only if it did not error, in "serialize I/O queue" after decoding
|
|
128
|
+
if (stream && (seenBOM || res.length > 0)) this.#canBOM = false
|
|
139
129
|
return res
|
|
140
130
|
} catch (err) {
|
|
141
131
|
this.#chunk = null // reset unfinished chunk on errors
|
|
132
|
+
// The correct way per spec seems to be not destroying the decoder state (aka BOM here) in stream mode
|
|
133
|
+
// See also multi-byte.js
|
|
142
134
|
throw err
|
|
143
135
|
}
|
|
144
136
|
|
|
@@ -314,7 +306,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
|
314
306
|
|
|
315
307
|
if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
|
|
316
308
|
|
|
317
|
-
if (
|
|
309
|
+
if (isMultibyte(enc)) {
|
|
318
310
|
if (!createMultibyteDecoder) throw new Error(E_MULTI)
|
|
319
311
|
return createMultibyteDecoder(enc, true)(u8)
|
|
320
312
|
}
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
// Get a number of last bytes in an Uint8Array `u` ending at `len` that don't
|
|
2
|
+
// form a codepoint yet, but can be a part of a single codepoint on more data
|
|
1
3
|
export function unfinishedBytes(u, len, enc) {
|
|
2
4
|
switch (enc) {
|
|
3
5
|
case 'utf-8': {
|
|
@@ -32,3 +34,35 @@ export function unfinishedBytes(u, len, enc) {
|
|
|
32
34
|
|
|
33
35
|
throw new Error('Unsupported encoding')
|
|
34
36
|
}
|
|
37
|
+
|
|
38
|
+
// Merge prefix `chunk` with `u` and return new combined prefix
|
|
39
|
+
// For u.length < 3, fully consumes u and can return unfinished data,
|
|
40
|
+
// otherwise returns a prefix with no unfinished bytes
|
|
41
|
+
export function mergePrefix(u, chunk, enc) {
|
|
42
|
+
if (u.length === 0) return chunk
|
|
43
|
+
if (u.length < 3) {
|
|
44
|
+
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
|
|
45
|
+
const a = new Uint8Array(u.length + chunk.length)
|
|
46
|
+
a.set(chunk)
|
|
47
|
+
a.set(u, chunk.length)
|
|
48
|
+
return a
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
|
|
52
|
+
const t = new Uint8Array(chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
|
|
53
|
+
t.set(chunk)
|
|
54
|
+
t.set(u.subarray(0, 3), chunk.length)
|
|
55
|
+
|
|
56
|
+
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
|
|
57
|
+
// If that doesn't happen (u too short), just concat chunk and u completely (above)
|
|
58
|
+
for (let i = 1; i <= 3; i++) {
|
|
59
|
+
const unfinished = unfinishedBytes(t, chunk.length + i, enc) // 0-3
|
|
60
|
+
if (unfinished <= i) {
|
|
61
|
+
// Always reachable at 3, but we still need 'unfinished' value for it
|
|
62
|
+
const add = i - unfinished // 0-3
|
|
63
|
+
return add > 0 ? t.subarray(0, chunk.length + add) : chunk
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Unreachable
|
|
68
|
+
}
|
|
@@ -255,6 +255,7 @@
|
|
|
255
255
|
0,96,"jTDyPYHPZsuW6DaOAK_MYbVV1Cc3hQPtxUtKV3vkuxWVbAKIJVtbpKg5G_YAYBudaZjpT-o5ZSoE-Ts1a64cjtcxYm6cmFG_wfn_U3Wf0xu-7iPUmOmX1uo8Q9XtOHkACQEHAQELY2oBEwAC1AcEAgMCAwj7AgYDAwkTCgLRAQUZCbwNAQEAAQAFAAMDAgABAgMCBQEHBAkGCLUDCAEBAAEFAQMDAgEDCwMCAwQCxiHoDyrAAgUKAAECBQA",
|
|
256
256
|
0,96,"O3jA12FpgHMtTDt5TLAodifVF3BpNYprtGJ0H_7cxeYXTyeXYzCavakroFIokJoA_OmpTq4RLcHYQw10PC11jQ4JHAjxN8yXMs0Hvc5hHhwCjP6YLIeyHwDs_1ixvXIOEQm1AQMBAAEACQEICgIdwgAGABkAABQF3B_QEQsTzwIW9AAEAwsZBQ-xFQwFEdorErQAAhoTHsgFBQXfCCEN4QAA6QwNEgPOAQQBAAMCAwMAAQQACwAAAQcBBQs"
|
|
257
257
|
],
|
|
258
|
+
"iso-2022-jp-katakana":[12290,9,0,-13,249,-10,-82,1,1,1,1,57,1,1,-37,56,-91,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,0,0,0,0,0,2,2,2,2,2,0,0,0,0,1,1,1,0,0,0,0,0,1,3,-89,0],
|
|
258
259
|
"gb18030-ranges": [0,128,36,37,2,4,7,9,5,6,31,32,8,10,6,9,1,3,4,6,3,4,1,3,1,2,4,5,17,18,7,8,15,16,24,25,3,4,4,5,29,30,98,99,1,2,1,2,1,2,1,2,1,2,1,2,1,2,28,29,87,88,15,16,101,102,1,4,13,14,183,200,1,8,7,24,1,8,55,56,14,78,1,2,7102,7103,2,6,1,3,2,4,7,9,9,10,1,3,1,2,5,6,112,113,86,87,1,2,3,4,12,13,10,11,62,74,4,14,22,26,2,6,110,111,6,7,1,2,3,4,4,5,2,6,2,3,1,2,1,6,2,3,5,9,5,6,10,11,3,4,5,6,13,15,2,6,6,8,37,38,3,4,11,12,25,26,82,83,333,343,10,50,100,176,4,40,13,28,3,6,10,12,16,18,8,10,8,10,3,4,2,4,18,22,31,33,2,3,54,55,1,2,2110,2111,2,3,3,4,2,4,10,11,15,16,2,3,3,4,4,5,2,4,3,4,14,15,293,305,4,8,1,20,5,7,2,11,20,21,2,85,7,11,2,88,5,8,6,43,246,256,7,8,113,114,234,236,12,15,2,3,34,35,9,10,2,4,2,3,113,114,43,44,298,299,111,112,11,12,765,766,85,86,96,98,14,15,147,148,218,219,287,288,113,114,885,886,264,265,471,472,116,117,4,5,43,44,248,249,373,374,20,21,193,194,5,6,82,83,16,17,441,442,50,51,2,3,4,6,1,3,20,21,3,4,22,24,703,704,39,44,111,118,148,149,81,20983,14426,18374,1,92,1,31,13,46,1,4,5,6,7,8,4,6,4,6,8,9,7,8,16,18,14,15,4295,4296,76,77,27,28,81,82,9,10,26,30,1,2,1,3,3,4,6,9,1,3,2,5,1030,1032,1,19,4,14,1,5,1,15,1,5,149,243,129,135,149606,26],
|
|
259
260
|
"gb18030": [
|
|
260
261
|
1,19970,3,1,1,8,-2,1,4,3,7,-1,-2,-2,2,4,-1,-1,-1,-1,1,4,3,3,-1,-1,-3,1,6,-3,-1,2,2,4,6,2,1,6,1,-2,10,1,7,1,-1,-2,1,5,2,5,-1,3,2,1,4,1,6,3,4,-2,4,1,3,2,1,9,-3,2,2,-1,3,7,-3,-1,2,3,-1,3,3,-1,-2,3,3,
|
package/fallback/multi-byte.js
CHANGED
|
@@ -688,6 +688,7 @@ const preencoders = {
|
|
|
688
688
|
const t = p % 188
|
|
689
689
|
return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t)
|
|
690
690
|
},
|
|
691
|
+
'iso-2022-jp': (p) => ((((p / 94) | 0) + 0x21) << 8) | ((p % 94) + 0x21),
|
|
691
692
|
'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1),
|
|
692
693
|
'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41),
|
|
693
694
|
gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)),
|
|
@@ -697,11 +698,13 @@ preencoders.gbk = preencoders.gb18030
|
|
|
697
698
|
|
|
698
699
|
// We accept that encoders use non-trivial amount of mem, for perf
|
|
699
700
|
// most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
|
|
700
|
-
function getMap(id, size) {
|
|
701
|
+
function getMap(id, size, ascii) {
|
|
701
702
|
const cached = maps.get(id)
|
|
702
703
|
if (cached) return cached
|
|
703
704
|
let tname = id
|
|
704
705
|
const sjis = id === 'shift_jis'
|
|
706
|
+
const iso2022jp = id === 'iso-2022-jp'
|
|
707
|
+
if (iso2022jp) tname = 'jis0208'
|
|
705
708
|
if (id === 'gbk') tname = 'gb18030'
|
|
706
709
|
if (id === 'euc-jp' || sjis) tname = 'jis0208'
|
|
707
710
|
const table = getTable(tname)
|
|
@@ -738,7 +741,7 @@ function getMap(id, size) {
|
|
|
738
741
|
}
|
|
739
742
|
}
|
|
740
743
|
|
|
741
|
-
for (let i = 0; i < 0x80; i++) map[i] = i
|
|
744
|
+
if (ascii) for (let i = 0; i < 0x80; i++) map[i] = i
|
|
742
745
|
if (sjis || id === 'euc-jp') {
|
|
743
746
|
if (sjis) map[0x80] = 0x80
|
|
744
747
|
const d = sjis ? 0xfe_c0 : 0x70_c0
|
|
@@ -757,32 +760,38 @@ function getMap(id, size) {
|
|
|
757
760
|
return map
|
|
758
761
|
}
|
|
759
762
|
|
|
760
|
-
const encoders = new Set(['big5', 'euc-kr', 'euc-jp', 'shift_jis', 'gbk', 'gb18030'])
|
|
761
763
|
const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
|
|
762
|
-
let gb18030r
|
|
764
|
+
let gb18030r, katakana
|
|
763
765
|
|
|
764
766
|
export function multibyteEncoder(enc, onError) {
|
|
765
|
-
if (!
|
|
767
|
+
if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
|
|
766
768
|
const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
|
|
767
|
-
const
|
|
768
|
-
const
|
|
769
|
-
|
|
770
|
-
|
|
769
|
+
const iso2022jp = enc === 'iso-2022-jp'
|
|
770
|
+
const gb18030 = enc === 'gb18030'
|
|
771
|
+
const ascii = isAsciiSuperset(enc)
|
|
772
|
+
const width = iso2022jp ? 5 : gb18030 ? 4 : 2
|
|
773
|
+
const tailsize = iso2022jp ? 3 : 0
|
|
774
|
+
const map = getMap(enc, size, ascii)
|
|
775
|
+
if (gb18030 && !gb18030r) gb18030r = getTable('gb18030-ranges')
|
|
776
|
+
if (iso2022jp && !katakana) katakana = getTable('iso-2022-jp-katakana')
|
|
771
777
|
return (str) => {
|
|
772
778
|
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
773
|
-
if (!NON_LATIN.test(str)) {
|
|
779
|
+
if (ascii && !NON_LATIN.test(str)) {
|
|
774
780
|
try {
|
|
775
781
|
return encodeAscii(str, E_STRICT)
|
|
776
782
|
} catch {}
|
|
777
783
|
}
|
|
778
784
|
|
|
779
785
|
const length = str.length
|
|
780
|
-
const u8 = new Uint8Array(length * width)
|
|
786
|
+
const u8 = new Uint8Array(length * width + tailsize)
|
|
781
787
|
let i = 0
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
788
|
+
|
|
789
|
+
if (ascii) {
|
|
790
|
+
while (i < length) {
|
|
791
|
+
const x = str.charCodeAt(i)
|
|
792
|
+
if (x >= 128) break
|
|
793
|
+
u8[i++] = x
|
|
794
|
+
}
|
|
786
795
|
}
|
|
787
796
|
|
|
788
797
|
// eslint-disable-next-line unicorn/consistent-function-scoping
|
|
@@ -793,7 +802,69 @@ export function multibyteEncoder(enc, onError) {
|
|
|
793
802
|
|
|
794
803
|
if (!map || map.length < size) /* c8 ignore next */ throw new Error('Unreachable') // Important for perf
|
|
795
804
|
|
|
796
|
-
if (
|
|
805
|
+
if (iso2022jp) {
|
|
806
|
+
let state = 0 // 0 = ASCII, 1 = Roman, 2 = jis0208
|
|
807
|
+
const restore = () => {
|
|
808
|
+
state = 0
|
|
809
|
+
u8[i++] = 0x1b
|
|
810
|
+
u8[i++] = 0x28
|
|
811
|
+
u8[i++] = 0x42
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
for (let j = 0; j < length; j++) {
|
|
815
|
+
let x = str.charCodeAt(j)
|
|
816
|
+
if (x >= 0xd8_00 && x < 0xe0_00) {
|
|
817
|
+
if (state === 2) restore()
|
|
818
|
+
if (x >= 0xdc_00 || j + 1 === length) {
|
|
819
|
+
i += err(x) // lone
|
|
820
|
+
} else {
|
|
821
|
+
const x1 = str.charCodeAt(j + 1)
|
|
822
|
+
if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
|
|
823
|
+
i += err(x) // lone
|
|
824
|
+
} else {
|
|
825
|
+
j++ // consume x1
|
|
826
|
+
i += err(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
} else if (x < 0x80) {
|
|
830
|
+
if (state === 2 || (state === 1 && (x === 0x5c || x === 0x7e))) restore()
|
|
831
|
+
if (x === 0xe || x === 0xf || x === 0x1b) {
|
|
832
|
+
i += err(0xff_fd) // 12.2.2. step 3: This returns U+FFFD rather than codePoint to prevent attacks
|
|
833
|
+
} else {
|
|
834
|
+
u8[i++] = x
|
|
835
|
+
}
|
|
836
|
+
} else if (x === 0xa5 || x === 0x20_3e) {
|
|
837
|
+
if (state !== 1) {
|
|
838
|
+
state = 1
|
|
839
|
+
u8[i++] = 0x1b
|
|
840
|
+
u8[i++] = 0x28
|
|
841
|
+
u8[i++] = 0x4a
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
u8[i++] = x === 0xa5 ? 0x5c : 0x7e
|
|
845
|
+
} else {
|
|
846
|
+
if (x === 0x22_12) x = 0xff_0d
|
|
847
|
+
if (x >= 0xff_61 && x <= 0xff_9f) x = katakana[x - 0xff_61]
|
|
848
|
+
const e = map[x]
|
|
849
|
+
if (e) {
|
|
850
|
+
if (state !== 2) {
|
|
851
|
+
state = 2
|
|
852
|
+
u8[i++] = 0x1b
|
|
853
|
+
u8[i++] = 0x24
|
|
854
|
+
u8[i++] = 0x42
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
u8[i++] = e >> 8
|
|
858
|
+
u8[i++] = e & 0xff
|
|
859
|
+
} else {
|
|
860
|
+
if (state === 2) restore()
|
|
861
|
+
i += err(x)
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
if (state) restore()
|
|
867
|
+
} else if (gb18030) {
|
|
797
868
|
// Deduping this branch hurts other encoders perf
|
|
798
869
|
const encode = (cp) => {
|
|
799
870
|
let a = 0, b = 0 // prettier-ignore
|
|
@@ -104,6 +104,9 @@ export function getTable(id) {
|
|
|
104
104
|
let a = 0, b = 0 // prettier-ignore
|
|
105
105
|
const idx = indices[id]
|
|
106
106
|
while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway
|
|
107
|
+
} else if (id.endsWith('-katakana')) {
|
|
108
|
+
let a = -1
|
|
109
|
+
res = new Uint16Array(indices[id].map((x) => (a += x + 1)))
|
|
107
110
|
} else if (id === 'big5') {
|
|
108
111
|
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
|
|
109
112
|
res = new Uint32Array(sizes[id]) // array of strings or undefined
|
package/hex.d.ts
CHANGED
|
@@ -1,21 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Implements Base16 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648)
|
|
3
|
+
* (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
|
|
4
|
+
*
|
|
5
|
+
* ```js
|
|
6
|
+
* import { fromHex, toHex } from '@exodus/bytes/hex.js'
|
|
7
|
+
* ```
|
|
8
|
+
*
|
|
9
|
+
* @module @exodus/bytes/hex.js
|
|
10
|
+
*/
|
|
11
|
+
|
|
1
12
|
/// <reference types="node" />
|
|
2
13
|
|
|
3
14
|
import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
|
|
4
15
|
|
|
5
16
|
/**
|
|
6
|
-
*
|
|
17
|
+
* Encode a `Uint8Array` to a lowercase hex string
|
|
18
|
+
*
|
|
7
19
|
* @param arr - The input bytes
|
|
8
20
|
* @returns The hex encoded string
|
|
9
21
|
*/
|
|
10
|
-
export function toHex(arr:
|
|
22
|
+
export function toHex(arr: Uint8Array): string;
|
|
11
23
|
|
|
12
24
|
/**
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
25
|
+
* Decode a hex string to bytes
|
|
26
|
+
*
|
|
27
|
+
* Unlike `Buffer.from()`, throws on invalid input
|
|
28
|
+
*
|
|
29
|
+
* @param string - The hex encoded string (case-insensitive)
|
|
16
30
|
* @param format - Output format (default: 'uint8')
|
|
17
31
|
* @returns The decoded bytes
|
|
18
32
|
*/
|
|
19
|
-
export function fromHex(
|
|
20
|
-
export function fromHex(
|
|
21
|
-
export function fromHex(
|
|
33
|
+
export function fromHex(string: string, format?: 'uint8'): Uint8ArrayBuffer;
|
|
34
|
+
export function fromHex(string: string, format: 'buffer'): Buffer;
|
|
35
|
+
export function fromHex(string: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
|
package/index.d.ts
CHANGED