@exodus/bytes 1.0.0-rc.9 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 Exodus Movement
3
+ Copyright (c) 2024-2025 Exodus Movement
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
package/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  `Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
4
4
 
5
+ And a [`TextEncoder` / `TextDecoder` polyfill](#textencoder--textdecoder-polyfill)
6
+
5
7
  ## Strict
6
8
 
7
9
  Performs proper input validation, ensures no garbage-in-garbage-out
@@ -32,6 +34,7 @@ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
32
34
  ```
33
35
 
34
36
  Less than half the bundle size of [text-encoding](https://npmjs.com/text-encoding), [whatwg-encoding](https://npmjs.com/whatwg-encoding) or [iconv-lite](https://npmjs.com/iconv-lite) (gzipped or not), and [is much faster](#fast).
37
+ See also [lite version](#lite-version).
35
38
 
36
39
  Spec compliant, passing WPT and covered with extra tests.
37
40
 
@@ -54,6 +57,22 @@ _These are only provided as a compatibility layer, prefer hardened APIs instead
54
57
  Use strict APIs in new applications, see `utf8fromString` / `utf16fromString` below.\
55
58
  Those throw on non-well-formed strings by default.
56
59
 
60
+ ### Lite version
61
+
62
+ If you don't need support for legacy multi-byte encodings, you can use the lite import:
63
+ ```js
64
+ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
65
+ ```
66
+
67
+ This reduces the bundle size 10x:\
68
+ from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 9 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
69
+ (For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped).
70
+
71
+ It still supports `utf-8`, `utf-16le`, `utf-16be` and all single-byte encodings specified by the spec,
72
+ the only difference is support for legacy multi-byte encodings.
73
+
74
+ See [the list of encodings](https://encoding.spec.whatwg.org/#names-and-labels).
75
+
57
76
  ## API
58
77
 
59
78
  ### `@exodus/bytes/utf8.js`
@@ -98,6 +117,11 @@ There is no loose variant for this encoding, all bytes can be decoded.
98
117
 
99
118
  Same as `windows1252toString = createSinglebyteDecoder('windows-1252')`.
100
119
 
120
+ ### `@exodus/bytes/bigint.js`
121
+
122
+ ##### `fromBigInt(bigint, { length, format = 'uint8' })`
123
+ ##### `toBigInt(arr)`
124
+
101
125
  ### `@exodus/bytes/hex.js`
102
126
 
103
127
  ##### `toHex(arr)`
@@ -131,6 +155,9 @@ Same as `windows1252toString = createSinglebyteDecoder('windows-1252')`.
131
155
  ##### `toBase58(arr)`
132
156
  ##### `fromBase58(str, format = 'uint8')`
133
157
 
158
+ ##### `toBase58xrp(arr)`
159
+ ##### `fromBase58xrp(str, format = 'uint8')`
160
+
134
161
  ### `@exodus/bytes/base58check.js`
135
162
 
136
163
  ##### `async toBase58check(arr)`
@@ -146,6 +173,130 @@ Same as `windows1252toString = createSinglebyteDecoder('windows-1252')`.
146
173
  ##### `async toWifString({ version, privateKey, compressed })`
147
174
  ##### `toWifStringSync({ version, privateKey, compressed })`
148
175
 
176
+ ### `@exodus/bytes/encoding.js`
177
+
178
+ Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
179
+ [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
180
+ [TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
181
+ some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
182
+
183
+ ```js
184
+ import { TextDecoder, TextDecoder } from '@exodus/bytes/encoding.js'
185
+
186
+ // Hooks for standards
187
+ import { getBOMEncoding, legacyHookDecode, normalizeEncoding } from '@exodus/bytes/encoding.js'
188
+ ```
189
+
190
+ #### `new TextDecoder(label = 'utf-8', { fatal = false, ignoreBOM = false })`
191
+
192
+ [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
193
+
194
+ #### `new TextEncoder()`
195
+
196
+ [TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
197
+
198
+ #### `normalizeEncoding(label)`
199
+
200
+ Implements [get an encoding from a string `label`](https://encoding.spec.whatwg.org/#concept-encoding-get).
201
+
202
+ Converts an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
203
+ as an ASCII-lowercased string.
204
+
205
+ If an encoding with that label does not exist, returns `null`.
206
+
207
+ This is the same as [`decoder.encoding` getter](https://encoding.spec.whatwg.org/#dom-textdecoder-encoding),
208
+ except that it:
209
+ 1. Supports [`replacement` encoding](https://encoding.spec.whatwg.org/#replacement) and its
210
+ [labels](https://encoding.spec.whatwg.org/#ref-for-replacement%E2%91%A1)
211
+ 2. Does not throw for invalid labels and instead returns `null`
212
+
213
+ All encoding names are also valid labels for corresponding encodings.
214
+
215
+ #### `getBOMEncoding(input)`
216
+
217
+ Implements [BOM sniff](https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
218
+
219
+ Given a `TypedArray` or an `ArrayBuffer` instance `input`, returns either of:
220
+ * `'utf-8'`, if `input` starts with UTF-8 byte order mark.
221
+ * `'utf-16le'`, if `input` starts with UTF-16LE byte order mark.
222
+ * `'utf-16be'`, if `input` starts with UTF-16BE byte order mark.
223
+ * `null` otherwise.
224
+
225
+ #### `legacyHookDecode(input, fallbackEncoding = 'utf-8')`
226
+
227
+ Implements [decode](https://encoding.spec.whatwg.org/#decode) legacy hook.
228
+
229
+ Given a `TypedArray` or an `ArrayBuffer` instance `input` and an optional `fallbackEncoding`
230
+ encoding [label](https://encoding.spec.whatwg.org/#names-and-labels),
231
+ sniffs encoding from BOM with `fallbackEncoding` fallback and then
232
+ decodes the `input` using that encoding, skipping BOM if it was present.
233
+
234
+ Notes:
235
+
236
+ * BOM-sniffed encoding takes precedence over `fallbackEncoding` option per spec.
237
+ Use with care.
238
+ * Always operates in non-fatal [mode](https://encoding.spec.whatwg.org/#textdecoder-error-mode),
239
+ aka replacement. It can convert different byte sequences to equal strings.
240
+
241
+ This method is similar to the following code, except that it doesn't support encoding labels and
242
+ only expects lowercased encoding name:
243
+
244
+ ```js
245
+ new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
246
+ ```
247
+
248
+ ### `@exodus/bytes/encoding-lite.js`
249
+
250
+ ```js
251
+ import { TextDecoder, TextDecoder } from '@exodus/bytes/encoding-lite.js'
252
+
253
+ // Hooks for standards
254
+ import { getBOMEncoding, legacyHookDecode, normalizeEncoding } from '@exodus/bytes/encoding-lite.js'
255
+ ```
256
+
257
+ The exact same exports as `@exodus/bytes/encoding.js` are also exported as
258
+ `@exodus/bytes/encoding-lite.js`, with the difference that the lite version does not load
259
+ multi-byte `TextDecoder` encodings by default to reduce bundle size 10x.
260
+
261
+ The only affected encodings are: `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`
262
+ and their [labels](https://encoding.spec.whatwg.org/#names-and-labels) when used with `TextDecoder`.
263
+
264
+ Legacy single-byte encodingds are loaded by default in both cases.
265
+
266
+ `TextEncoder` and hooks for standards (including `normalizeEncoding`) do not have any behavior
267
+ differences in the lite version and support full range if inputs.
268
+
269
+ To avoid inconsistencies, the exported classes and methods are exactly the same objects.
270
+
271
+ ```console
272
+ > lite = require('@exodus/bytes/encoding-lite.js')
273
+ [Module: null prototype] {
274
+ TextDecoder: [class TextDecoder],
275
+ TextEncoder: [class TextEncoder],
276
+ getBOMEncoding: [Function: getBOMEncoding],
277
+ legacyHookDecode: [Function: legacyHookDecode],
278
+ normalizeEncoding: [Function: normalizeEncoding]
279
+ }
280
+ > new lite.TextDecoder('big5').decode(Uint8Array.of(0x25))
281
+ Uncaught:
282
+ Error: Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support
283
+
284
+ > full = require('@exodus/bytes/encoding.js')
285
+ [Module: null prototype] {
286
+ TextDecoder: [class TextDecoder],
287
+ TextEncoder: [class TextEncoder],
288
+ getBOMEncoding: [Function: getBOMEncoding],
289
+ legacyHookDecode: [Function: legacyHookDecode],
290
+ normalizeEncoding: [Function: normalizeEncoding]
291
+ }
292
+ > full.TextDecoder === lite.TextDecoder
293
+ true
294
+ > new full.TextDecoder('big5').decode(Uint8Array.of(0x25))
295
+ '%'
296
+ > new lite.TextDecoder('big5').decode(Uint8Array.of(0x25))
297
+ '%'
298
+ ```
299
+
149
300
  ## License
150
301
 
151
302
  [MIT](./LICENSE)
package/array.d.ts ADDED
@@ -0,0 +1,24 @@
1
+ /// <reference types="node" />
2
+
3
+ // >= TypeScript 5.9 made Uint8Array templated with <> and defaulted to ArrayBufferLike
4
+ // which would incorrectly accept SharedArrayBuffer instances.
5
+ // < TypeScript 5.7 doesn't support templates for Uint8Array.
6
+ // So this type is defined as a workaround to evaluate to Uint8Array<ArrayBuffer> on all versions of TypeScript.
7
+ export type Uint8ArrayBuffer = ReturnType<typeof Uint8Array.from>;
8
+
9
+ /**
10
+ * Output format for typed array conversions
11
+ */
12
+ export type OutputFormat = 'uint8' | 'buffer';
13
+
14
+ /**
15
+ * Creates a view of a TypedArray in the specified format
16
+ * Note: This does not copy data - returns a view on the same underlying buffer
17
+ * @param arr - The input TypedArray
18
+ * @param format - The desired output format ('uint8' or 'buffer')
19
+ * @returns A view on the same underlying buffer
20
+ */
21
+ export function typedView(arr: ArrayBufferView, format: 'uint8'): Uint8Array;
22
+ export function typedView(arr: ArrayBufferView, format: 'buffer'): Buffer;
23
+ export function typedView(arr: ArrayBufferView, format: OutputFormat): Uint8Array | Buffer;
24
+
package/base58.js CHANGED
@@ -3,10 +3,10 @@ import { assertUint8 } from './assert.js'
3
3
  import { nativeDecoder, nativeEncoder, isHermes } from './fallback/_utils.js'
4
4
  import { encodeAscii, decodeAscii } from './fallback/latin1.js'
5
5
 
6
- const alphabet = [...'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz']
7
- const codes = new Uint8Array(alphabet.map((x) => x.charCodeAt(0)))
8
- const ZERO = alphabet[0]
9
- const zeroC = codes[0]
6
+ const alphabet58 = [...'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz']
7
+ const alphabetXRP = [...'rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz']
8
+ const codes58 = new Uint8Array(alphabet58.map((x) => x.charCodeAt(0)))
9
+ const codesXRP = new Uint8Array(alphabetXRP.map((x) => x.charCodeAt(0)))
10
10
 
11
11
  const _0n = BigInt(0)
12
12
  const _1n = BigInt(1)
@@ -16,17 +16,18 @@ const _58n = BigInt(58)
16
16
  const _0xffffffffn = BigInt(0xff_ff_ff_ff)
17
17
 
18
18
  let table // 15 * 82, diagonal, <1kb
19
- let fromMap
19
+ const fromMaps = new Map()
20
20
 
21
21
  const E_CHAR = 'Invalid character in base58 input'
22
22
 
23
23
  const shouldUseBigIntFrom = isHermes // faster only on Hermes, numbers path beats it on normal engines
24
24
 
25
- export function toBase58(arr) {
25
+ function toBase58core(arr, alphabet, codes) {
26
26
  assertUint8(arr)
27
27
  const length = arr.length
28
28
  if (length === 0) return ''
29
29
 
30
+ const ZERO = alphabet[0]
30
31
  let zeros = 0
31
32
  while (zeros < length && arr[zeros] === 0) zeros++
32
33
 
@@ -120,18 +121,20 @@ export function toBase58(arr) {
120
121
  return ZERO.repeat(zeros) + out
121
122
  }
122
123
 
123
- // TODO: test on 'z'.repeat(from 1 to smth)
124
- export function fromBase58(str, format = 'uint8') {
124
+ function fromBase58core(str, alphabet, codes, format = 'uint8') {
125
125
  if (typeof str !== 'string') throw new TypeError('Input is not a string')
126
126
  const length = str.length
127
127
  if (length === 0) return typedView(new Uint8Array(), format)
128
128
 
129
+ const zeroC = codes[0]
129
130
  let zeros = 0
130
131
  while (zeros < length && str.charCodeAt(zeros) === zeroC) zeros++
131
132
 
133
+ let fromMap = fromMaps.get(alphabet)
132
134
  if (!fromMap) {
133
135
  fromMap = new Int8Array(256).fill(-1)
134
136
  for (let i = 0; i < 58; i++) fromMap[alphabet[i].charCodeAt(0)] = i
137
+ fromMaps.set(alphabet, fromMap)
135
138
  }
136
139
 
137
140
  const size = zeros + (((length - zeros + 1) * 3) >> 2) // 3/4 rounded up, larger than ~0.73 coef to fit everything
@@ -210,3 +213,8 @@ export function fromBase58(str, format = 'uint8') {
210
213
 
211
214
  return typedView(res.slice(at - zeros), format) // slice is faster for small sizes than subarray
212
215
  }
216
+
217
+ export const toBase58 = (arr) => toBase58core(arr, alphabet58, codes58)
218
+ export const fromBase58 = (str, format) => fromBase58core(str, alphabet58, codes58, format)
219
+ export const toBase58xrp = (arr) => toBase58core(arr, alphabetXRP, codesXRP)
220
+ export const fromBase58xrp = (str, format) => fromBase58core(str, alphabetXRP, codesXRP, format)
package/base64.d.ts ADDED
@@ -0,0 +1,76 @@
1
+ /// <reference types="node" />
2
+
3
+ import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
4
+
5
+ /**
6
+ * Options for base64 encoding
7
+ */
8
+ export interface ToBase64Options {
9
+ /** Whether to include padding characters (default: true for base64, false for base64url) */
10
+ padding?: boolean;
11
+ }
12
+
13
+ /**
14
+ * Padding mode for base64 decoding
15
+ * - true: padding is required
16
+ * - false: padding is not allowed
17
+ * - 'both': padding is optional (default for base64)
18
+ */
19
+ export type PaddingMode = boolean | 'both';
20
+
21
+ /**
22
+ * Options for base64 decoding
23
+ */
24
+ export interface FromBase64Options {
25
+ /** Output format (default: 'uint8') */
26
+ format?: OutputFormat;
27
+ /** Padding mode */
28
+ padding?: PaddingMode;
29
+ }
30
+
31
+ /**
32
+ * Encodes a Uint8Array to a base64 string (RFC 4648)
33
+ * @param arr - The input bytes
34
+ * @param options - Encoding options
35
+ * @returns The base64 encoded string
36
+ */
37
+ export function toBase64(arr: Uint8ArrayBuffer, options?: ToBase64Options): string;
38
+
39
+ /**
40
+ * Encodes a Uint8Array to a base64url string (RFC 4648)
41
+ * @param arr - The input bytes
42
+ * @param options - Encoding options (padding defaults to false)
43
+ * @returns The base64url encoded string
44
+ */
45
+ export function toBase64url(arr: Uint8ArrayBuffer, options?: ToBase64Options): string;
46
+
47
+ /**
48
+ * Decodes a base64 string to bytes
49
+ * Operates in strict mode for last chunk, does not allow whitespace
50
+ * @param str - The base64 encoded string
51
+ * @param options - Decoding options
52
+ * @returns The decoded bytes
53
+ */
54
+ export function fromBase64(str: string, options?: FromBase64Options): Uint8ArrayBuffer;
55
+ export function fromBase64(str: string, options: FromBase64Options & { format: 'buffer' }): Buffer;
56
+
57
+ /**
58
+ * Decodes a base64url string to bytes
59
+ * Operates in strict mode for last chunk, does not allow whitespace
60
+ * @param str - The base64url encoded string
61
+ * @param options - Decoding options (padding defaults to false)
62
+ * @returns The decoded bytes
63
+ */
64
+ export function fromBase64url(str: string, options?: FromBase64Options): Uint8ArrayBuffer;
65
+ export function fromBase64url(str: string, options: FromBase64Options & { format: 'buffer' }): Buffer;
66
+
67
+ /**
68
+ * Decodes either base64 or base64url string to bytes
69
+ * Automatically detects the variant based on characters present
70
+ * @param str - The base64 or base64url encoded string
71
+ * @param options - Decoding options
72
+ * @returns The decoded bytes
73
+ */
74
+ export function fromBase64any(str: string, options?: FromBase64Options): Uint8ArrayBuffer;
75
+ export function fromBase64any(str: string, options: FromBase64Options & { format: 'buffer' }): Buffer;
76
+
package/bigint.js ADDED
@@ -0,0 +1,14 @@
1
+ import { toHex, fromHex } from '@exodus/bytes/hex.js'
2
+ import { assert } from './fallback/_utils.js'
3
+
4
+ const _0n = BigInt(0)
5
+
6
+ export function fromBigInt(x, { length, format } = {}) {
7
+ assert(Number.isSafeInteger(length) && length > 0, 'Expected length arg to be a positive integer')
8
+ assert(typeof x === 'bigint' && x >= _0n, 'Expected a non-negative bigint')
9
+ const hex = x.toString(16)
10
+ assert(length * 2 >= hex.length, `Can not fit supplied number into ${length} bytes`)
11
+ return fromHex(hex.padStart(length * 2, '0'), format)
12
+ }
13
+
14
+ export const toBigInt = (a) => BigInt('0x' + (toHex(a) || '0'))
@@ -0,0 +1,7 @@
1
+ export {
2
+ TextDecoder,
3
+ TextEncoder,
4
+ normalizeEncoding,
5
+ getBOMEncoding,
6
+ legacyHookDecode,
7
+ } from './fallback/encoding.js'
package/encoding.js CHANGED
@@ -1,234 +1,12 @@
1
- // A limited subset of TextEncoder / TextDecoder API
2
-
3
- // We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that
4
- // We are also faster than Node.js built-in on both TextEncoder and TextDecoder
5
-
6
- /* eslint-disable @exodus/import/no-unresolved */
7
-
8
- import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
9
- import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
10
- import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
11
- import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
12
- import { multibyteSupported } from './fallback/multi-byte.js'
13
- import labels from './fallback/encoding.labels.js'
14
- import { unfinishedBytes } from './fallback/encoding.util.js'
15
-
16
- const E_OPTIONS = 'The "options" argument must be of type object'
17
- const replacementChar = '\uFFFD'
18
-
19
- let labelsMap
20
- const normalizeEncoding = (enc) => {
21
- // fast path
22
- if (enc === 'utf-8' || enc === 'utf8') return 'utf-8'
23
- if (enc === 'windows-1252' || enc === 'ascii' || enc === 'latin1') return 'windows-1252'
24
- // full map
25
- let low = `${enc}`.toLowerCase()
26
- if (low !== low.trim()) low = low.replace(/^[\t\n\f\r ]+/, '').replace(/[\t\n\f\r ]+$/, '') // only ASCII whitespace
27
- if (Object.hasOwn(labels, low) && low !== 'replacement') return low
28
- if (!labelsMap) {
29
- labelsMap = new Map()
30
- for (const [label, aliases] of Object.entries(labels)) {
31
- for (const alias of aliases) labelsMap.set(alias, label)
32
- }
33
- }
34
-
35
- const mapped = labelsMap.get(low)
36
- if (mapped && mapped !== 'replacement') return mapped
37
- throw new RangeError('Unknown encoding')
38
- }
39
-
40
- const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
41
-
42
- const fromSource = (x) => {
43
- if (x instanceof Uint8Array) return x
44
- if (x instanceof ArrayBuffer) return new Uint8Array(x)
45
- if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
46
- if (globalThis.SharedArrayBuffer && x instanceof globalThis.SharedArrayBuffer) {
47
- return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
48
- }
49
-
50
- throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
51
- }
52
-
53
- function unicodeDecoder(encoding, loose) {
54
- if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
55
- const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
56
- return loose ? (u) => utf16toStringLoose(u, form) : (u) => utf16toString(u, form)
57
- }
58
-
59
- export class TextDecoder {
60
- #decode
61
- #unicode
62
- #multibyte
63
- #chunk
64
- #canBOM
65
-
66
- constructor(encoding = 'utf-8', options = {}) {
67
- if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
68
- const enc = normalizeEncoding(encoding)
69
- define(this, 'encoding', enc)
70
- define(this, 'fatal', Boolean(options.fatal))
71
- define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
72
- this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
73
- this.#multibyte = !this.#unicode && enc !== 'windows-1252' && multibyteSupported(enc)
74
- this.#canBOM = this.#unicode && !this.ignoreBOM
75
- }
76
-
77
- get [Symbol.toStringTag]() {
78
- return 'TextDecoder'
79
- }
80
-
81
- decode(input, options = {}) {
82
- if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
83
- const stream = Boolean(options.stream)
84
- let u = input === undefined ? new Uint8Array() : fromSource(input)
85
-
86
- if (this.#unicode) {
87
- let prefix
88
- if (this.#chunk) {
89
- if (u.length === 0) {
90
- if (stream) return '' // no change
91
- u = this.#chunk // process as final chunk to handle errors and state changes
92
- } else if (u.length < 3) {
93
- // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
94
- const a = new Uint8Array(u.length + this.#chunk.length)
95
- a.set(this.#chunk)
96
- a.set(u, this.#chunk.length)
97
- u = a
98
- } else {
99
- // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
100
- const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
101
- t.set(this.#chunk)
102
- t.set(u.subarray(0, 3), this.#chunk.length)
103
-
104
- // Stop at the first offset where unfinished bytes reaches 0 or fits into u
105
- // If that doesn't happen (u too short), just concat chunk and u completely
106
- for (let i = 1; i <= 3; i++) {
107
- const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
108
- if (unfinished <= i) {
109
- // Always reachable at 3, but we still need 'unfinished' value for it
110
- const add = i - unfinished // 0-3
111
- prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
112
- if (add > 0) u = u.subarray(add)
113
- break
114
- }
115
- }
116
- }
117
-
118
- this.#chunk = null
119
- } else if (u.byteLength === 0) {
120
- if (!stream) this.#canBOM = !this.ignoreBOM
121
- return ''
122
- }
123
-
124
- // For non-stream utf-8 we don't have to do this as it matches utf8toStringLoose already
125
- // For non-stream loose utf-16 we still have to do this as this API supports uneven byteLength unlike utf16toStringLoose
126
- let suffix = ''
127
- if (stream || (!this.fatal && this.encoding !== 'utf-8')) {
128
- const trail = unfinishedBytes(u, u.byteLength, this.encoding)
129
- if (trail > 0) {
130
- if (stream) {
131
- this.#chunk = Uint8Array.from(u.subarray(-trail)) // copy
132
- } else {
133
- // non-fatal mode as already checked
134
- suffix = replacementChar
135
- }
136
-
137
- u = u.subarray(0, -trail)
138
- }
139
- }
140
-
141
- if (this.#canBOM) {
142
- const bom = this.#findBom(prefix ?? u)
143
- if (bom) {
144
- if (stream) this.#canBOM = false
145
- if (prefix) {
146
- prefix = prefix.subarray(bom)
147
- } else {
148
- u = u.subarray(bom)
149
- }
150
- }
151
- }
152
-
153
- if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
154
- try {
155
- const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
156
- if (res.length > 0 && stream) this.#canBOM = false
157
-
158
- if (!stream) this.#canBOM = !this.ignoreBOM
159
- return res
160
- } catch (err) {
161
- this.#chunk = null // reset unfinished chunk on errors
162
- throw err
163
- }
164
-
165
- // eslint-disable-next-line no-else-return
166
- } else if (this.#multibyte) {
167
- if (!this.#decode) this.#decode = createMultibyteDecoder(this.encoding, !this.fatal) // can contain state!
168
- return this.#decode(u, stream)
169
- } else {
170
- if (!this.#decode) this.#decode = createSinglebyteDecoder(this.encoding, !this.fatal)
171
- return this.#decode(u)
172
- }
173
- }
174
-
175
- #findBom(u) {
176
- switch (this.encoding) {
177
- case 'utf-8':
178
- return u.byteLength >= 3 && u[0] === 0xef && u[1] === 0xbb && u[2] === 0xbf ? 3 : 0
179
- case 'utf-16le':
180
- return u.byteLength >= 2 && u[0] === 0xff && u[1] === 0xfe ? 2 : 0
181
- case 'utf-16be':
182
- return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
183
- }
184
-
185
- throw new Error('Unreachable')
186
- }
187
- }
188
-
189
- export class TextEncoder {
190
- constructor() {
191
- define(this, 'encoding', 'utf-8')
192
- }
193
-
194
- get [Symbol.toStringTag]() {
195
- return 'TextEncoder'
196
- }
197
-
198
- encode(str = '') {
199
- if (typeof str !== 'string') str = `${str}`
200
- const res = utf8fromStringLoose(str)
201
- return res.byteOffset === 0 ? res : res.slice(0) // Ensure 0-offset. TODO: do we need this?
202
- }
203
-
204
- encodeInto(str, target) {
205
- if (typeof str !== 'string') str = `${str}`
206
- if (!(target instanceof Uint8Array)) throw new TypeError('Target must be an Uint8Array')
207
- if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
208
-
209
- let u8 = utf8fromStringLoose(str) // TODO: perf?
210
- let read
211
- if (target.length >= u8.length) {
212
- read = str.length
213
- } else if (u8.length === str.length) {
214
- if (u8.length > target.length) u8 = u8.subarray(0, target.length) // ascii can be truncated
215
- read = u8.length
216
- } else {
217
- u8 = u8.subarray(0, target.length)
218
- const unfinished = unfinishedBytes(u8, u8.length, 'utf-8')
219
- if (unfinished > 0) u8 = u8.subarray(0, u8.length - unfinished)
220
-
221
- // We can do this because loose str -> u8 -> str preserves length, unlike loose u8 -> str -> u8
222
- // Each unpaired surrogate (1 charcode) is replaced with a single charcode
223
- read = utf8toStringLoose(u8).length // FIXME: Converting back is very inefficient
224
- }
225
-
226
- try {
227
- target.set(u8)
228
- } catch {
229
- return { read: 0, written: 0 } // see above, likely detached but no .detached property support
230
- }
231
-
232
- return { read, written: u8.length }
233
- }
234
- }
1
+ import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js' // eslint-disable-line @exodus/import/no-unresolved
2
+ import { setMultibyteDecoder } from './fallback/encoding.js'
3
+
4
+ setMultibyteDecoder(createMultibyteDecoder)
5
+
6
+ export {
7
+ TextDecoder,
8
+ TextEncoder,
9
+ normalizeEncoding,
10
+ getBOMEncoding,
11
+ legacyHookDecode,
12
+ } from './fallback/encoding.js'