npm - @exodus/bytes - Versions diffs - 1.8.0 → 1.10.0 - Mend

@exodus/bytes 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/README.md +370 -90
package/array.d.ts +41 -4
package/base32.d.ts +83 -0
package/base58.d.ts +62 -0
package/base58.js +1 -1
package/base58check.d.ts +131 -0
package/base58check.js +3 -3
package/base64.d.ts +40 -20
package/bech32.d.ts +76 -0
package/bigint.d.ts +48 -0
package/encoding-browser.browser.js +29 -0
package/encoding-browser.d.ts +24 -0
package/encoding-browser.js +1 -0
package/encoding-browser.native.js +1 -0
package/encoding-lite.d.ts +61 -0
package/encoding.d.ts +93 -11
package/encoding.js +4 -3
package/fallback/_utils.js +15 -11
package/fallback/encoding.api.js +81 -0
package/fallback/encoding.js +37 -121
package/fallback/encoding.util.js +34 -0
package/fallback/latin1.js +1 -0
package/fallback/multi-byte.encodings.json +1 -0
package/fallback/multi-byte.js +527 -71
package/fallback/multi-byte.table.js +23 -15
package/fallback/single-byte.js +1 -1
package/fallback/utf16.js +45 -26
package/fallback/utf8.js +1 -1
package/hex.d.ts +22 -9
package/index.d.ts +43 -0
package/index.js +5 -0
package/multi-byte.d.ts +57 -0
package/multi-byte.js +7 -1
package/multi-byte.node.js +7 -1
package/package.json +83 -10
package/single-byte.d.ts +149 -0
package/single-byte.js +9 -11
package/single-byte.node.js +29 -26
package/utf16.d.ts +92 -0
package/utf16.js +1 -0
package/utf16.node.js +6 -2
package/utf8.d.ts +52 -18
package/utf8.js +7 -2
package/utf8.node.js +1 -1
package/wif.d.ts +76 -0

package/encoding-lite.d.ts CHANGED Viewed

@@ -1 +1,62 @@
+/**
+ * The exact same exports as `@exodus/bytes/encoding.js` are also exported as
+ * `@exodus/bytes/encoding-lite.js`, with the difference that the lite version does not load
+ * multi-byte `TextDecoder` encodings by default to reduce bundle size 10x.
+ *
+ * ```js
+ * import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
+ * import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
+ *
+ * // Hooks for standards
+ * import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-lite.js'
+ * ```
+ *
+ * The only affected encodings are: `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`
+ * and their [labels](https://encoding.spec.whatwg.org/#names-and-labels) when used with `TextDecoder`.
+ *
+ * Legacy single-byte encodingds are loaded by default in both cases.
+ *
+ * `TextEncoder` and hooks for standards (including `labelToName` / `normalizeEncoding`) do not have any behavior
+ * differences in the lite version and support full range if inputs.
+ *
+ * To avoid inconsistencies, the exported classes and methods are exactly the same objects.
+ *
+ * ```console
+ * > lite = require('@exodus/bytes/encoding-lite.js')
+ * [Module: null prototype] {
+ *   TextDecoder: [class TextDecoder],
+ *   TextDecoderStream: [class TextDecoderStream],
+ *   TextEncoder: [class TextEncoder],
+ *   TextEncoderStream: [class TextEncoderStream],
+ *   getBOMEncoding: [Function: getBOMEncoding],
+ *   labelToName: [Function: labelToName],
+ *   legacyHookDecode: [Function: legacyHookDecode],
+ *   normalizeEncoding: [Function: normalizeEncoding]
+ * }
+ * > new lite.TextDecoder('big5').decode(Uint8Array.of(0x25))
+ * Uncaught:
+ * Error: Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support
+ *
+ * > full = require('@exodus/bytes/encoding.js')
+ * [Module: null prototype] {
+ *   TextDecoder: [class TextDecoder],
+ *   TextDecoderStream: [class TextDecoderStream],
+ *   TextEncoder: [class TextEncoder],
+ *   TextEncoderStream: [class TextEncoderStream],
+ *   getBOMEncoding: [Function: getBOMEncoding],
+ *   labelToName: [Function: labelToName],
+ *   legacyHookDecode: [Function: legacyHookDecode],
+ *   normalizeEncoding: [Function: normalizeEncoding]
+ * }
+ * > full.TextDecoder === lite.TextDecoder
+ * true
+ * > new full.TextDecoder('big5').decode(Uint8Array.of(0x25))
+ * '%'
+ * > new lite.TextDecoder('big5').decode(Uint8Array.of(0x25))
+ * '%'
+ * ```
+ *
+ * @module @exodus/bytes/encoding-lite.js
+ */
 export * from './encoding.js'

package/encoding.d.ts CHANGED Viewed

@@ -1,14 +1,57 @@
+/**
+ * Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
+ * [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
+ * [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder),
+ * [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream),
+ * [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream),
+ * some [hooks](https://encoding.spec.whatwg.org/#specification-hooks).
+ *
+ * ```js
+ * import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
+ * import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
+ *
+ * // Hooks for standards
+ * import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
+ * ```
+ *
+ * @module @exodus/bytes/encoding.js
+ */
 /// <reference types="node" />
 /**
- * Converts an encoding label to its name, as an ASCII-lowercased string
+ * Convert an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
+ * as an ASCII-lowercased string.
+ *
+ * If an encoding with that label does not exist, returns `null`.
+ *
+ * This is the same as [`decoder.encoding` getter](https://encoding.spec.whatwg.org/#dom-textdecoder-encoding),
+ * except that it:
+ *  1. Supports [`replacement` encoding](https://encoding.spec.whatwg.org/#replacement) and its
+ *     [labels](https://encoding.spec.whatwg.org/#ref-for-replacement%E2%91%A1)
+ *  2. Does not throw for invalid labels and instead returns `null`
+ *
+ * It is identical to:
+ * ```js
+ * labelToName(label)?.toLowerCase() ?? null
+ * ```
+ *
+ * All encoding names are also valid labels for corresponding encodings.
+ *
  * @param label - The encoding label to normalize
  * @returns The normalized encoding name, or null if invalid
  */
 export function normalizeEncoding(label: string): string | null;
 /**
- * Implements BOM sniff (https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
+ * Implements [BOM sniff](https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
+ *
+ * Given a `TypedArray` or an `ArrayBuffer` instance `input`, returns either of:
+ * - `'utf-8'`, if `input` starts with UTF-8 byte order mark.
+ * - `'utf-16le'`, if `input` starts with UTF-16LE byte order mark.
+ * - `'utf-16be'`, if `input` starts with UTF-16BE byte order mark.
+ * - `null` otherwise.
+ *
  * @param input - The bytes to check for BOM
  * @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
  */
@@ -17,7 +60,27 @@ export function getBOMEncoding(
 ): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
 /**
- * Implements decode (https://encoding.spec.whatwg.org/#decode) legacy hook.
+ * Implements [decode](https://encoding.spec.whatwg.org/#decode) legacy hook.
+ *
+ * Given a `TypedArray` or an `ArrayBuffer` instance `input` and an optional `fallbackEncoding`
+ * encoding [label](https://encoding.spec.whatwg.org/#names-and-labels),
+ * sniffs encoding from BOM with `fallbackEncoding` fallback and then
+ * decodes the `input` using that encoding, skipping BOM if it was present.
+ *
+ * Notes:
+ *
+ * - BOM-sniffed encoding takes precedence over `fallbackEncoding` option per spec.
+ *   Use with care.
+ * - Always operates in non-fatal [mode](https://encoding.spec.whatwg.org/#textdecoder-error-mode),
+ *   aka replacement. It can convert different byte sequences to equal strings.
+ *
+ * This method is similar to the following code, except that it doesn't support encoding labels and
+ * only expects lowercased encoding name:
+ *
+ * ```js
+ * new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
+ * ```
+ *
  * @param input - The bytes to decode
  * @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
  * @returns The decoded string
@@ -28,31 +91,50 @@ export function legacyHookDecode(
 ): string;
 /**
- * Converts an encoding label to its name, as a case-sensitive string.
+ * Implements [get an encoding from a string `label`](https://encoding.spec.whatwg.org/#concept-encoding-get).
+ *
+ * Convert an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
+ * as a case-sensitive string.
+ *
+ * If an encoding with that label does not exist, returns `null`.
+ *
+ * All encoding names are also valid labels for corresponding encodings.
+ *
  * @param label - The encoding label
  * @returns The proper case encoding name, or null if invalid
  */
 export function labelToName(label: string): string | null;
 /**
- * Text decoder for decoding bytes to strings in various encodings
- * Supports strict and lossy modes
+ * [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
+ *
+ * Decode bytes to strings according to [WHATWG Encoding](https://encoding.spec.whatwg.org) specification.
  */
 export const TextDecoder: typeof globalThis.TextDecoder;
 /**
- * Text encoder for encoding strings to UTF-8 bytes
+ * [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder) implementation/polyfill.
+ *
+ * Encode strings to UTF-8 bytes according to [WHATWG Encoding](https://encoding.spec.whatwg.org) specification.
  */
 export const TextEncoder: typeof globalThis.TextEncoder;
 /**
- * Transform stream wrapper for TextDecoder
- * Decodes chunks of bytes to strings
+ * [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream) implementation/polyfill.
+ *
+ * A [Streams](https://streams.spec.whatwg.org/) wrapper for `TextDecoder`.
+ *
+ * Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
+ * [polyfilled](https://npmjs.com/package/web-streams-polyfill).
  */
 export const TextDecoderStream: typeof globalThis.TextDecoderStream;
 /**
- * Transform stream wrapper for TextEncoder
- * Encodes chunks of strings to UTF-8 bytes
+ * [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream) implementation/polyfill.
+ *
+ * A [Streams](https://streams.spec.whatwg.org/) wrapper for `TextEncoder`.
+ *
+ * Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
+ * [polyfilled](https://npmjs.com/package/web-streams-polyfill).
  */
 export const TextEncoderStream: typeof globalThis.TextEncoderStream;

package/encoding.js CHANGED Viewed

@@ -1,7 +1,8 @@
-import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js' // eslint-disable-line @exodus/import/no-unresolved
-import { setMultibyteDecoder } from './fallback/encoding.js'
+import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
+import { multibyteEncoder } from './fallback/multi-byte.js'
+import { setMultibyte } from './fallback/encoding.js'
-setMultibyteDecoder(createMultibyteDecoder)
+setMultibyte(createMultibyteDecoder, multibyteEncoder)
 export {
   TextDecoder,

package/fallback/_utils.js CHANGED Viewed

@@ -1,9 +1,9 @@
 const { Buffer, TextEncoder, TextDecoder } = globalThis
 const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
 export const nativeBuffer = haveNativeBuffer ? Buffer : null
-export const isHermes = Boolean(globalThis.HermesInternal)
-export const isDeno = Boolean(globalThis.Deno)
-export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
+export const isHermes = !!globalThis.HermesInternal
+export const isDeno = !!globalThis.Deno
+export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
 // We consider Node.js TextDecoder/TextEncoder native
 let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
@@ -17,16 +17,19 @@ export const nativeDecoder = isNative(TextDecoder)
 // Actually windows-1252, compatible with ascii and latin1 decoding
 // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
 // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
-let nativeDecoderLatin1impl = null
-if (nativeDecoder) {
+const getNativeLain1 = () => {
   // Not all barebone engines with TextDecoder support something except utf-8, detect
-  try {
-    nativeDecoderLatin1impl = new TextDecoder('latin1', { ignoreBOM: true })
-  } catch {}
+  if (!nativeDecoder) {
+    try {
+      return new TextDecoder('latin1', { ignoreBOM: true })
+    } catch {}
+  }
+  return null
 }
-export const nativeDecoderLatin1 = nativeDecoderLatin1impl
-export const canDecoders = Boolean(nativeDecoderLatin1impl)
+export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLain1()
+export const canDecoders = !!nativeDecoderLatin1
 // Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
 // Refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1994067 (and linked issues), fixed in 146
@@ -47,10 +50,11 @@ function shouldSkipBuiltins() {
     return /firefox/i.test(g.navigator.userAgent || '') // as simple as we can
   }
+  /* c8 ignore next */
   return false // eslint-disable-line no-unreachable
 }
-export const skipWeb = shouldSkipBuiltins()
+export const skipWeb = /* @__PURE__ */ shouldSkipBuiltins()
 function decodePartAddition(a, start, end, m) {
   let o = ''

package/fallback/encoding.api.js ADDED Viewed

@@ -0,0 +1,81 @@
+import labels from './encoding.labels.js'
+let labelsMap
+export const E_ENCODING = 'Unknown encoding'
+// Warning: unlike whatwg-encoding, returns lowercased labels
+// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
+// https://encoding.spec.whatwg.org/#names-and-labels
+export function normalizeEncoding(label) {
+  // fast path
+  if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
+  if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
+  // full map
+  if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
+  const low = `${label}`.trim().toLowerCase()
+  if (Object.hasOwn(labels, low)) return low
+  if (!labelsMap) {
+    labelsMap = new Map()
+    for (const [label, aliases] of Object.entries(labels)) {
+      for (const alias of aliases) labelsMap.set(alias, label)
+    }
+  }
+  const mapped = labelsMap.get(low)
+  if (mapped) return mapped
+  return null
+}
+// TODO: make this more strict against Symbol.toStringTag
+// Is not very significant though, anything faking Symbol.toStringTag could as well override
+// prototypes, which is not something we protect against
+function isAnyArrayBuffer(x) {
+  if (x instanceof ArrayBuffer) return true
+  if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
+  if (!x || typeof x.byteLength !== 'number') return false
+  const s = Object.prototype.toString.call(x)
+  return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
+}
+export function fromSource(x) {
+  if (x instanceof Uint8Array) return x
+  if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
+  if (isAnyArrayBuffer(x)) {
+    if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
+    // Old engines without .detached, try-catch
+    try {
+      return new Uint8Array(x)
+    } catch {
+      return new Uint8Array()
+    }
+  }
+  throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
+}
+// Warning: unlike whatwg-encoding, returns lowercased labels
+// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
+export function getBOMEncoding(input) {
+  const u8 = fromSource(input) // asserts
+  if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
+  if (u8.length < 2) return null
+  if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
+  if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
+  return null
+}
+const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
+// Unlike normalizeEncoding, case-sensitive
+// https://encoding.spec.whatwg.org/#names-and-labels
+export function labelToName(label) {
+  const enc = normalizeEncoding(label)
+  if (enc === 'utf-8') return 'UTF-8' // fast path
+  if (!enc) return enc
+  if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
+  if (enc === 'big5') return 'Big5'
+  if (enc === 'shift_jis') return 'Shift_JIS'
+  return enc
+}

package/fallback/encoding.js CHANGED Viewed

@@ -5,82 +5,36 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
 import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
 import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
 import labels from './encoding.labels.js'
-import { unfinishedBytes } from './encoding.util.js'
+import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
+import { unfinishedBytes, mergePrefix } from './encoding.util.js'
+export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
+const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
 const E_OPTIONS = 'The "options" argument must be of type object'
-const E_ENCODING = 'Unknown encoding'
 const replacementChar = '\uFFFD'
-const E_MULTI =
-  'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
 const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
-let createMultibyteDecoder
+let createMultibyteDecoder, multibyteEncoder
-export function setMultibyteDecoder(createDecoder) {
+export const isMultibyte = (enc) => multibyteSet.has(enc)
+export function setMultibyte(createDecoder, createEncoder) {
   createMultibyteDecoder = createDecoder
+  multibyteEncoder = createEncoder
 }
-let labelsMap
-// Warning: unlike whatwg-encoding, returns lowercased labels
-// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
-// https://encoding.spec.whatwg.org/#names-and-labels
-export function normalizeEncoding(label) {
-  // fast path
-  if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
-  if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
-  // full map
-  if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
-  const low = `${label}`.trim().toLowerCase()
-  if (Object.hasOwn(labels, low)) return low
-  if (!labelsMap) {
-    labelsMap = new Map()
-    for (const [label, aliases] of Object.entries(labels)) {
-      for (const alias of aliases) labelsMap.set(alias, label)
-    }
-  }
-  const mapped = labelsMap.get(low)
-  if (mapped) return mapped
-  return null
+export function getMultibyteEncoder() {
+  if (!multibyteEncoder) throw new Error(E_MULTI)
+  return multibyteEncoder
 }
 const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
-// TODO: make this more strict against Symbol.toStringTag
-// Is not very significant though, anything faking Symbol.toStringTag could as well override
-// prototypes, which is not something we protect against
-function isAnyArrayBuffer(x) {
-  if (x instanceof ArrayBuffer) return true
-  if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
-  if (!x || typeof x.byteLength !== 'number') return false
-  const s = Object.prototype.toString.call(x)
-  return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
-}
 function isAnyUint8Array(x) {
   if (x instanceof Uint8Array) return true
   if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
   return Object.prototype.toString.call(x) === '[object Uint8Array]'
 }
-const fromSource = (x) => {
-  if (x instanceof Uint8Array) return x
-  if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
-  if (isAnyArrayBuffer(x)) {
-    if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
-    // Old engines without .detached, try-catch
-    try {
-      return new Uint8Array(x)
-    } catch {
-      return new Uint8Array()
-    }
-  }
-  throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
-}
 function unicodeDecoder(encoding, loose) {
   if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
   const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
@@ -99,10 +53,10 @@ export class TextDecoder {
     const enc = normalizeEncoding(encoding)
     if (!enc || enc === 'replacement') throw new RangeError(E_ENCODING)
     define(this, 'encoding', enc)
-    define(this, 'fatal', Boolean(options.fatal))
-    define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
+    define(this, 'fatal', !!options.fatal)
+    define(this, 'ignoreBOM', !!options.ignoreBOM)
     this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
-    this.#multibyte = !this.#unicode && multibyteSet.has(enc)
+    this.#multibyte = !this.#unicode && isMultibyte(enc)
     this.#canBOM = this.#unicode && !this.ignoreBOM
   }
@@ -112,44 +66,26 @@ export class TextDecoder {
   decode(input, options = {}) {
     if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
-    const stream = Boolean(options.stream)
+    const stream = !!options.stream
     let u = input === undefined ? new Uint8Array() : fromSource(input)
+    const empty = u.length === 0 // also can't be streaming after next line
+    if (empty && stream) return '' // no state change
     if (this.#unicode) {
       let prefix
       if (this.#chunk) {
-        if (u.length === 0) {
-          if (stream) return '' // no change
-          u = this.#chunk // process as final chunk to handle errors and state changes
-        } else if (u.length < 3) {
-          // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
-          const a = new Uint8Array(u.length + this.#chunk.length)
-          a.set(this.#chunk)
-          a.set(u, this.#chunk.length)
-          u = a
+        const merged = mergePrefix(u, this.#chunk, this.encoding)
+        if (u.length < 3) {
+          u = merged // might be unfinished, but fully consumed old u
         } else {
-          // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
-          const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
-          t.set(this.#chunk)
-          t.set(u.subarray(0, 3), this.#chunk.length)
-          // Stop at the first offset where unfinished bytes reaches 0 or fits into u
-          // If that doesn't happen (u too short), just concat chunk and u completely
-          for (let i = 1; i <= 3; i++) {
-            const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
-            if (unfinished <= i) {
-              // Always reachable at 3, but we still need 'unfinished' value for it
-              const add = i - unfinished // 0-3
-              prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
-              if (add > 0) u = u.subarray(add)
-              break
-            }
-          }
+          prefix = merged // stops at complete chunk
+          const add = prefix.length - this.#chunk.length
+          if (add > 0) u = u.subarray(add)
         }
         this.#chunk = null
-      } else if (u.byteLength === 0) {
-        if (!stream) this.#canBOM = !this.ignoreBOM
+      } else if (empty) {
+        this.#canBOM = !this.ignoreBOM // not streaming
         return ''
       }
@@ -170,27 +106,31 @@ export class TextDecoder {
         }
       }
+      let seenBOM = false
       if (this.#canBOM) {
         const bom = this.#findBom(prefix ?? u)
         if (bom) {
-          if (stream) this.#canBOM = false
+          seenBOM = true
           if (prefix) {
             prefix = prefix.subarray(bom)
           } else {
             u = u.subarray(bom)
           }
         }
+      } else if (!stream && !this.ignoreBOM) {
+        this.#canBOM = true
       }
       if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
       try {
         const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
-        if (res.length > 0 && stream) this.#canBOM = false
-        if (!stream) this.#canBOM = !this.ignoreBOM
+        // "BOM seen" is set on the current decode call only if it did not error, in "serialize I/O queue" after decoding
+        if (stream && (seenBOM || res.length > 0)) this.#canBOM = false
         return res
       } catch (err) {
         this.#chunk = null // reset unfinished chunk on errors
+        // The correct way per spec seems to be not destroying the decoder state (aka BOM here) in stream mode
+        // See also multi-byte.js
         throw err
       }
@@ -215,6 +155,7 @@ export class TextDecoder {
         return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
     }
+    /* c8 ignore next */
     throw new Error('Unreachable')
   }
 }
@@ -341,17 +282,6 @@ export class TextEncoderStream {
   }
 }
-// Warning: unlike whatwg-encoding, returns lowercased labels
-// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
-export function getBOMEncoding(input) {
-  const u8 = fromSource(input) // asserts
-  if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
-  if (u8.length < 2) return null
-  if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
-  if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
-  return null
-}
 // https://encoding.spec.whatwg.org/#decode
 // Warning: encoding sniffed from BOM takes preference over the supplied one
 // Warning: lossy, performs replacement, no option of throwing
@@ -368,7 +298,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
     let suffix = ''
     if (u8.byteLength % 2 !== 0) {
       suffix = replacementChar
-      u8 = u8.subarray(0, -1)
+      u8 = u8.subarray(0, -unfinishedBytes(u8, u8.byteLength, enc))
     }
     return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
@@ -376,7 +306,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
   if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
-  if (multibyteSet.has(enc)) {
+  if (isMultibyte(enc)) {
     if (!createMultibyteDecoder) throw new Error(E_MULTI)
     return createMultibyteDecoder(enc, true)(u8)
   }
@@ -387,17 +317,3 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
   return createSinglebyteDecoder(enc, true)(u8)
 }
-const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
-// Unlike normalizeEncoding, case-sensitive
-// https://encoding.spec.whatwg.org/#names-and-labels
-export function labelToName(label) {
-  const enc = normalizeEncoding(label)
-  if (enc === 'utf-8') return 'UTF-8' // fast path
-  if (!enc) return enc
-  if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
-  if (enc === 'big5') return 'Big5'
-  if (enc === 'shift_jis') return 'Shift_JIS'
-  return enc
-}

package/fallback/encoding.util.js CHANGED Viewed

@@ -1,3 +1,5 @@
+// Get a number of last bytes in an Uint8Array `u` ending at `len` that don't
+// form a codepoint yet, but can be a part of a single codepoint on more data
 export function unfinishedBytes(u, len, enc) {
   switch (enc) {
     case 'utf-8': {
@@ -32,3 +34,35 @@ export function unfinishedBytes(u, len, enc) {
   throw new Error('Unsupported encoding')
 }
+// Merge prefix `chunk` with `u` and return new combined prefix
+// For u.length < 3, fully consumes u and can return unfinished data,
+// otherwise returns a prefix with no unfinished bytes
+export function mergePrefix(u, chunk, enc) {
+  if (u.length === 0) return chunk
+  if (u.length < 3) {
+    // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
+    const a = new Uint8Array(u.length + chunk.length)
+    a.set(chunk)
+    a.set(u, chunk.length)
+    return a
+  }
+  // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
+  const t = new Uint8Array(chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
+  t.set(chunk)
+  t.set(u.subarray(0, 3), chunk.length)
+  // Stop at the first offset where unfinished bytes reaches 0 or fits into u
+  // If that doesn't happen (u too short), just concat chunk and u completely (above)
+  for (let i = 1; i <= 3; i++) {
+    const unfinished = unfinishedBytes(t, chunk.length + i, enc) // 0-3
+    if (unfinished <= i) {
+      // Always reachable at 3, but we still need 'unfinished' value for it
+      const add = i - unfinished // 0-3
+      return add > 0 ? t.subarray(0, chunk.length + add) : chunk
+    }
+  }
+  // Unreachable
+}

package/fallback/latin1.js CHANGED Viewed

@@ -37,6 +37,7 @@ export function asciiPrefix(arr) {
       const b = u32[i + 1]
       const c = u32[i + 2]
       const d = u32[i + 3]
+      // "(a | b | c | d) & mask" is slower on Hermes though faster on v8
       if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
     }