npm - @exodus/bytes - Versions diffs - 1.9.0 → 1.11.0 - Mend

@exodus/bytes 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/README.md +417 -90
package/array.d.ts +42 -3
package/base32.d.ts +83 -0
package/base58.d.ts +62 -0
package/base58check.d.ts +131 -0
package/base58check.js +2 -1
package/base64.d.ts +40 -19
package/bech32.d.ts +76 -0
package/bigint.d.ts +48 -0
package/encoding-browser.d.ts +23 -0
package/encoding-lite.d.ts +61 -0
package/encoding.d.ts +93 -11
package/encoding.js +4 -3
package/fallback/_utils.js +14 -11
package/fallback/encoding.js +34 -42
package/fallback/encoding.util.js +38 -8
package/fallback/multi-byte.encodings.json +4 -3
package/fallback/multi-byte.js +87 -16
package/fallback/multi-byte.table.js +3 -0
package/fallback/percent.js +31 -0
package/hex.d.ts +22 -8
package/index.d.ts +1 -1
package/multi-byte.d.ts +64 -0
package/package.json +63 -9
package/single-byte.d.ts +159 -0
package/utf16.d.ts +92 -0
package/utf16.js +1 -1
package/utf8.d.ts +72 -18
package/utf8.js +11 -6
package/utf8.node.js +1 -1
package/whatwg.d.ts +48 -0
package/whatwg.js +76 -0
package/wif.d.ts +76 -0
package/wif.js +1 -2

package/encoding.d.ts CHANGED Viewed

@@ -1,14 +1,57 @@
+/**
+ * Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
+ * [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
+ * [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder),
+ * [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream),
+ * [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream),
+ * some [hooks](https://encoding.spec.whatwg.org/#specification-hooks).
+ *
+ * ```js
+ * import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
+ * import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
+ *
+ * // Hooks for standards
+ * import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
+ * ```
+ *
+ * @module @exodus/bytes/encoding.js
+ */
 /// <reference types="node" />
 /**
- * Converts an encoding label to its name, as an ASCII-lowercased string
+ * Convert an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
+ * as an ASCII-lowercased string.
+ *
+ * If an encoding with that label does not exist, returns `null`.
+ *
+ * This is the same as [`decoder.encoding` getter](https://encoding.spec.whatwg.org/#dom-textdecoder-encoding),
+ * except that it:
+ *  1. Supports [`replacement` encoding](https://encoding.spec.whatwg.org/#replacement) and its
+ *     [labels](https://encoding.spec.whatwg.org/#ref-for-replacement%E2%91%A1)
+ *  2. Does not throw for invalid labels and instead returns `null`
+ *
+ * It is identical to:
+ * ```js
+ * labelToName(label)?.toLowerCase() ?? null
+ * ```
+ *
+ * All encoding names are also valid labels for corresponding encodings.
+ *
  * @param label - The encoding label to normalize
  * @returns The normalized encoding name, or null if invalid
  */
 export function normalizeEncoding(label: string): string | null;
 /**
- * Implements BOM sniff (https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
+ * Implements [BOM sniff](https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
+ *
+ * Given a `TypedArray` or an `ArrayBuffer` instance `input`, returns either of:
+ * - `'utf-8'`, if `input` starts with UTF-8 byte order mark.
+ * - `'utf-16le'`, if `input` starts with UTF-16LE byte order mark.
+ * - `'utf-16be'`, if `input` starts with UTF-16BE byte order mark.
+ * - `null` otherwise.
+ *
  * @param input - The bytes to check for BOM
  * @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
  */
@@ -17,7 +60,27 @@ export function getBOMEncoding(
 ): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
 /**
- * Implements decode (https://encoding.spec.whatwg.org/#decode) legacy hook.
+ * Implements [decode](https://encoding.spec.whatwg.org/#decode) legacy hook.
+ *
+ * Given a `TypedArray` or an `ArrayBuffer` instance `input` and an optional `fallbackEncoding`
+ * encoding [label](https://encoding.spec.whatwg.org/#names-and-labels),
+ * sniffs encoding from BOM with `fallbackEncoding` fallback and then
+ * decodes the `input` using that encoding, skipping BOM if it was present.
+ *
+ * Notes:
+ *
+ * - BOM-sniffed encoding takes precedence over `fallbackEncoding` option per spec.
+ *   Use with care.
+ * - Always operates in non-fatal [mode](https://encoding.spec.whatwg.org/#textdecoder-error-mode),
+ *   aka replacement. It can convert different byte sequences to equal strings.
+ *
+ * This method is similar to the following code, except that it doesn't support encoding labels and
+ * only expects lowercased encoding name:
+ *
+ * ```js
+ * new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
+ * ```
+ *
  * @param input - The bytes to decode
  * @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
  * @returns The decoded string
@@ -28,31 +91,50 @@ export function legacyHookDecode(
 ): string;
 /**
- * Converts an encoding label to its name, as a case-sensitive string.
+ * Implements [get an encoding from a string `label`](https://encoding.spec.whatwg.org/#concept-encoding-get).
+ *
+ * Convert an encoding [label](https://encoding.spec.whatwg.org/#names-and-labels) to its name,
+ * as a case-sensitive string.
+ *
+ * If an encoding with that label does not exist, returns `null`.
+ *
+ * All encoding names are also valid labels for corresponding encodings.
+ *
  * @param label - The encoding label
  * @returns The proper case encoding name, or null if invalid
  */
 export function labelToName(label: string): string | null;
 /**
- * Text decoder for decoding bytes to strings in various encodings
- * Supports strict and lossy modes
+ * [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
+ *
+ * Decode bytes to strings according to [WHATWG Encoding](https://encoding.spec.whatwg.org) specification.
  */
 export const TextDecoder: typeof globalThis.TextDecoder;
 /**
- * Text encoder for encoding strings to UTF-8 bytes
+ * [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder) implementation/polyfill.
+ *
+ * Encode strings to UTF-8 bytes according to [WHATWG Encoding](https://encoding.spec.whatwg.org) specification.
  */
 export const TextEncoder: typeof globalThis.TextEncoder;
 /**
- * Transform stream wrapper for TextDecoder
- * Decodes chunks of bytes to strings
+ * [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream) implementation/polyfill.
+ *
+ * A [Streams](https://streams.spec.whatwg.org/) wrapper for `TextDecoder`.
+ *
+ * Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
+ * [polyfilled](https://npmjs.com/package/web-streams-polyfill).
  */
 export const TextDecoderStream: typeof globalThis.TextDecoderStream;
 /**
- * Transform stream wrapper for TextEncoder
- * Encodes chunks of strings to UTF-8 bytes
+ * [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream) implementation/polyfill.
+ *
+ * A [Streams](https://streams.spec.whatwg.org/) wrapper for `TextEncoder`.
+ *
+ * Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
+ * [polyfilled](https://npmjs.com/package/web-streams-polyfill).
  */
 export const TextEncoderStream: typeof globalThis.TextEncoderStream;

package/encoding.js CHANGED Viewed

@@ -1,7 +1,8 @@
-import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js' // eslint-disable-line @exodus/import/no-unresolved
-import { setMultibyteDecoder } from './fallback/encoding.js'
+import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
+import { multibyteEncoder } from './fallback/multi-byte.js'
+import { setMultibyte } from './fallback/encoding.js'
-setMultibyteDecoder(createMultibyteDecoder)
+setMultibyte(createMultibyteDecoder, multibyteEncoder)
 export {
   TextDecoder,

package/fallback/_utils.js CHANGED Viewed

@@ -1,9 +1,9 @@
 const { Buffer, TextEncoder, TextDecoder } = globalThis
 const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
 export const nativeBuffer = haveNativeBuffer ? Buffer : null
-export const isHermes = Boolean(globalThis.HermesInternal)
-export const isDeno = Boolean(globalThis.Deno)
-export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
+export const isHermes = !!globalThis.HermesInternal
+export const isDeno = !!globalThis.Deno
+export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
 // We consider Node.js TextDecoder/TextEncoder native
 let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
@@ -17,16 +17,19 @@ export const nativeDecoder = isNative(TextDecoder)
 // Actually windows-1252, compatible with ascii and latin1 decoding
 // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
 // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
-let nativeDecoderLatin1impl = null
-if (nativeDecoder) {
+const getNativeLatin1 = () => {
   // Not all barebone engines with TextDecoder support something except utf-8, detect
-  try {
-    nativeDecoderLatin1impl = new TextDecoder('latin1', { ignoreBOM: true })
-  } catch {}
+  if (nativeDecoder) {
+    try {
+      return new TextDecoder('latin1', { ignoreBOM: true })
+    } catch {}
+  }
+  return null
 }
-export const nativeDecoderLatin1 = nativeDecoderLatin1impl
-export const canDecoders = Boolean(nativeDecoderLatin1impl)
+export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLatin1()
+export const canDecoders = !!nativeDecoderLatin1
 // Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
 // Refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1994067 (and linked issues), fixed in 146
@@ -51,7 +54,7 @@ function shouldSkipBuiltins() {
   return false // eslint-disable-line no-unreachable
 }
-export const skipWeb = shouldSkipBuiltins()
+export const skipWeb = /* @__PURE__ */ shouldSkipBuiltins()
 function decodePartAddition(a, start, end, m) {
   let o = ''

package/fallback/encoding.js CHANGED Viewed

@@ -6,19 +6,25 @@ import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/by
 import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
 import labels from './encoding.labels.js'
 import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
-import { unfinishedBytes } from './encoding.util.js'
+import { unfinishedBytes, mergePrefix } from './encoding.util.js'
 export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
+const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
 const E_OPTIONS = 'The "options" argument must be of type object'
-const E_MULTI =
-  'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
 const replacementChar = '\uFFFD'
 const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
-let createMultibyteDecoder
+let createMultibyteDecoder, multibyteEncoder
-export function setMultibyteDecoder(createDecoder) {
+export const isMultibyte = (enc) => multibyteSet.has(enc)
+export function setMultibyte(createDecoder, createEncoder) {
   createMultibyteDecoder = createDecoder
+  multibyteEncoder = createEncoder
+}
+export function getMultibyteEncoder() {
+  if (!multibyteEncoder) throw new Error(E_MULTI)
+  return multibyteEncoder
 }
 const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
@@ -47,10 +53,10 @@ export class TextDecoder {
     const enc = normalizeEncoding(encoding)
     if (!enc || enc === 'replacement') throw new RangeError(E_ENCODING)
     define(this, 'encoding', enc)
-    define(this, 'fatal', Boolean(options.fatal))
-    define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
+    define(this, 'fatal', !!options.fatal)
+    define(this, 'ignoreBOM', !!options.ignoreBOM)
     this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
-    this.#multibyte = !this.#unicode && multibyteSet.has(enc)
+    this.#multibyte = !this.#unicode && isMultibyte(enc)
     this.#canBOM = this.#unicode && !this.ignoreBOM
   }
@@ -60,44 +66,26 @@ export class TextDecoder {
   decode(input, options = {}) {
     if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
-    const stream = Boolean(options.stream)
+    const stream = !!options.stream
     let u = input === undefined ? new Uint8Array() : fromSource(input)
+    const empty = u.length === 0 // also can't be streaming after next line
+    if (empty && stream) return '' // no state change
     if (this.#unicode) {
       let prefix
       if (this.#chunk) {
-        if (u.length === 0) {
-          if (stream) return '' // no change
-          u = this.#chunk // process as final chunk to handle errors and state changes
-        } else if (u.length < 3) {
-          // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
-          const a = new Uint8Array(u.length + this.#chunk.length)
-          a.set(this.#chunk)
-          a.set(u, this.#chunk.length)
-          u = a
+        const merged = mergePrefix(u, this.#chunk, this.encoding)
+        if (u.length < 3) {
+          u = merged // might be unfinished, but fully consumed old u
         } else {
-          // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
-          const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
-          t.set(this.#chunk)
-          t.set(u.subarray(0, 3), this.#chunk.length)
-          // Stop at the first offset where unfinished bytes reaches 0 or fits into u
-          // If that doesn't happen (u too short), just concat chunk and u completely
-          for (let i = 1; i <= 3; i++) {
-            const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
-            if (unfinished <= i) {
-              // Always reachable at 3, but we still need 'unfinished' value for it
-              const add = i - unfinished // 0-3
-              prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
-              if (add > 0) u = u.subarray(add)
-              break
-            }
-          }
+          prefix = merged // stops at complete chunk
+          const add = prefix.length - this.#chunk.length
+          if (add > 0) u = u.subarray(add)
         }
         this.#chunk = null
-      } else if (u.byteLength === 0) {
-        if (!stream) this.#canBOM = !this.ignoreBOM
+      } else if (empty) {
+        this.#canBOM = !this.ignoreBOM // not streaming
         return ''
       }
@@ -118,27 +106,31 @@ export class TextDecoder {
         }
       }
+      let seenBOM = false
       if (this.#canBOM) {
         const bom = this.#findBom(prefix ?? u)
         if (bom) {
-          if (stream) this.#canBOM = false
+          seenBOM = true
           if (prefix) {
             prefix = prefix.subarray(bom)
           } else {
             u = u.subarray(bom)
           }
         }
+      } else if (!stream && !this.ignoreBOM) {
+        this.#canBOM = true
       }
       if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
       try {
         const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
-        if (res.length > 0 && stream) this.#canBOM = false
-        if (!stream) this.#canBOM = !this.ignoreBOM
+        // "BOM seen" is set on the current decode call only if it did not error, in "serialize I/O queue" after decoding
+        if (stream && (seenBOM || res.length > 0)) this.#canBOM = false
         return res
       } catch (err) {
         this.#chunk = null // reset unfinished chunk on errors
+        // The correct way per spec seems to be not destroying the decoder state (aka BOM here) in stream mode
+        // See also multi-byte.js
         throw err
       }
@@ -314,7 +306,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
   if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
-  if (multibyteSet.has(enc)) {
+  if (isMultibyte(enc)) {
     if (!createMultibyteDecoder) throw new Error(E_MULTI)
     return createMultibyteDecoder(enc, true)(u8)
   }

package/fallback/encoding.util.js CHANGED Viewed

@@ -1,3 +1,5 @@
+// Get a number of last bytes in an Uint8Array `u` ending at `len` that don't
+// form a codepoint yet, but can be a part of a single codepoint on more data
 export function unfinishedBytes(u, len, enc) {
   switch (enc) {
     case 'utf-8': {
@@ -18,17 +20,45 @@ export function unfinishedBytes(u, len, enc) {
     case 'utf-16le':
     case 'utf-16be': {
       // 0-3
-      let p = 0
-      if (len % 2 !== 0) p++ // uneven bytes
+      const p = len % 2 // uneven byte length adds 1
+      if (len < 2) return p
       const l = len - p - 1
-      if (len - p >= 2) {
-        const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
-        if (last >= 0xd8_00 && last < 0xdc_00) p += 2 // lone lead
-      }
-      return p
+      const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
+      return last >= 0xd8_00 && last < 0xdc_00 ? p + 2 : p // lone lead adds 2
     }
   }
   throw new Error('Unsupported encoding')
 }
+// Merge prefix `chunk` with `u` and return new combined prefix
+// For u.length < 3, fully consumes u and can return unfinished data,
+// otherwise returns a prefix with no unfinished bytes
+export function mergePrefix(u, chunk, enc) {
+  if (u.length === 0) return chunk
+  if (u.length < 3) {
+    // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
+    const a = new Uint8Array(u.length + chunk.length)
+    a.set(chunk)
+    a.set(u, chunk.length)
+    return a
+  }
+  // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
+  const t = new Uint8Array(chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
+  t.set(chunk)
+  t.set(u.subarray(0, 3), chunk.length)
+  // Stop at the first offset where unfinished bytes reaches 0 or fits into u
+  // If that doesn't happen (u too short), just concat chunk and u completely (above)
+  for (let i = 1; i <= 3; i++) {
+    const unfinished = unfinishedBytes(t, chunk.length + i, enc) // 0-3
+    if (unfinished <= i) {
+      // Always reachable at 3, but we still need 'unfinished' value for it
+      const add = i - unfinished // 0-3
+      return add > 0 ? t.subarray(0, chunk.length + add) : chunk
+    }
+  }
+  // Unreachable
+}

package/fallback/multi-byte.encodings.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "$7": [2,12541,2,-98,1,-156,1,7897,3,-7897,1,244],
   "$8": ["E9UxzELB4htgpd4feI7ZJNwBAQEfAf8B_wH_Af8B_wH_Af8B_wEAAAD_"],
   "$9": [1,26142,4,2,1,1,4,2,1,1,1,1],
-  "$jis0208tail": [
+  "$j": [
    "ipErP1Ps8XWWMAFJ4rgaAwI1HDv3D_k4cuHcHicp0VFf43EZOCAtKIYJGRokBhxNIB4qMI3tHlWG0gtGG_5HAI0TWEcHAQVAFZtpbqgTMYcTOjErvTAqSgAWUBIyTyZ-JwRT9krRHiX4Z3qSTmo8MH-xFCXNJO8FQPEBuGAlBhEMOhMaPFSWbUBCikNUq4NJTTraLApjAfFoHCnoaimC5yYVIij5CTwiyhSCyCw_DwEgXCVj9FfpAM2rPLIMZfFgRQsMDO407TAD_gQzJhVhbRIZAfwKcC5ocSwVFbV-Cwr_8ssh9gIq1PnvAAABAAAAAAAAAAABAP8BAAAAAAAAAQAAAAABAAAAAAEAAAAAAACnWgAAAAECAAAAAKMAXgABAAAAAAAAAgAa5gABAAAAAQCdYwAAAAACAAAAAAEAAAAAAf8BAAAAAQABAQAAAQEAAAAAAAAAAACUbAAAAAAAAZJuAAH_AQAAkm-RbwABAAAAAAAAAQEAAAAAAQAAAAAAAAAAAQAAAAABAAABAAAAAAEAAAAAiXcAAQABh3kAAAAAAAH_AQAAAAAAAAEAAQAAAACEfdsmAAAAAQ",
     3,32999,
     "lIZ_NRU0zrJ-KhNa6DV79Fl84mAcRy5Ra54FEbOQbwDl7RwkQS0WIELTXCtwAx1jrKtUAEF2R-4RsvwGDgD1ACAJ-S8F-xEK9-ctP88Abu8B9latCvJR-9ks9eAd5G3mTCEXGTgTAklJTHMRgwcHCQEBAwENxAD7BHGvigKY_BwhCURv-sHrt3mBfwEAgIABf4MAAAAAAQEAe4kAAAAAAQABAAB1i3UAjgACAAAAAABwkAEAAQABAG2UbACWAGqXAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAABomAAAAQD_aJn_AQAAAAAAAAABAAABZAGcAAEAAAAAAAABAGIAn2GfAQAAAQAAAAABAQBdpAA"
@@ -39,8 +39,8 @@
     1,8,1,13,1,-11,1,-36,1,54,1,17,1,11,1,13,1,-4,1,-3,-2,1,-6,1,22,1,-9,1,13,1,-5,1,7,-2,-2,1,-2,1,4,-1,1,-2,1,66,-2,1,11,-3,1,14,-1,2,1,2,9,2,2,-2,1,8,1,-5,-2,1,5,1,-4,1,5,-3,1,10,1,17,-1,1,4,2,2,
     -1,4,1,2,3,
     "MhH-C_0AAgYbHu8NBAAHAAcLCwnuDAgBABD-Ae8MWLf-_iT-9An4-QEJ9_kp7An-_A4W6hP3GwUGDgFuBAMY_uIXAiP7iHP7JND8Lv3-BwEUAAXs_ggYFv0jwwofCQQIEfz5_AELCPIW-STtBgENHAACVgMDAwYCAAUC-wsHAw7xArNqmwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAf8AAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAP8BAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAPkH",
-    3,1,-3,1,7,-1,1,-2,-2,1,4,-2,1,6,1,4,2,1,-1,3,1,2,9,"t10LChELBwEOdikAAAAAAAAA",3,-5,2,4,"bP0M-gMeBgOOl5EKd7yfAAAAAAAAALkRJ-TdIA",0,464,"$jis0208tail",0,2,10,-20522,"4gEi-v8AAAA",0,2068,10,-56723,
-    10,-26,"4gEi-i7kChP_AAAAM-8AAQ","$jis0208tail"
+    3,1,-3,1,7,-1,1,-2,-2,1,4,-2,1,6,1,4,2,1,-1,3,1,2,9,"t10LChELBwEOdikAAAAAAAAA",3,-5,2,4,"bP0M-gMeBgOOl5EKd7yfAAAAAAAAALkRJ-TdIA",0,464,"$j",0,2,10,-20522,"4gEi-v8AAAA",0,2068,10,-56723,
+    10,-26,"4gEi-i7kChP_AAAAM-8AAQ","$j"
   ],
   "jis0212": [
     0,108,"2O7wIAPRK_6DJQACAP4CAP4CAP0EAA",0,8,1,-741,1,4,1,24,0,38,"uu_-BHOBcQAAAAAh3yE",0,359,1,-7569,3,1,1,31,0,1,1,-31,0,1,-1,1,28,0,1,1,-29,0,4,4,28,"ysU79Qr95B0DAAAAAAAAAA",0,35,11,51,2,1,0,35,
@@ -255,6 +255,7 @@
     0,96,"jTDyPYHPZsuW6DaOAK_MYbVV1Cc3hQPtxUtKV3vkuxWVbAKIJVtbpKg5G_YAYBudaZjpT-o5ZSoE-Ts1a64cjtcxYm6cmFG_wfn_U3Wf0xu-7iPUmOmX1uo8Q9XtOHkACQEHAQELY2oBEwAC1AcEAgMCAwj7AgYDAwkTCgLRAQUZCbwNAQEAAQAFAAMDAgABAgMCBQEHBAkGCLUDCAEBAAEFAQMDAgEDCwMCAwQCxiHoDyrAAgUKAAECBQA",
     0,96,"O3jA12FpgHMtTDt5TLAodifVF3BpNYprtGJ0H_7cxeYXTyeXYzCavakroFIokJoA_OmpTq4RLcHYQw10PC11jQ4JHAjxN8yXMs0Hvc5hHhwCjP6YLIeyHwDs_1ixvXIOEQm1AQMBAAEACQEICgIdwgAGABkAABQF3B_QEQsTzwIW9AAEAwsZBQ-xFQwFEdorErQAAhoTHsgFBQXfCCEN4QAA6QwNEgPOAQQBAAMCAwMAAQQACwAAAQcBBQs"
   ],
+  "iso-2022-jp-katakana":[12290,9,0,-13,249,-10,-82,1,1,1,1,57,1,1,-37,56,-91,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,0,0,0,0,0,2,2,2,2,2,0,0,0,0,1,1,1,0,0,0,0,0,1,3,-89,0],
   "gb18030-ranges": [0,128,36,37,2,4,7,9,5,6,31,32,8,10,6,9,1,3,4,6,3,4,1,3,1,2,4,5,17,18,7,8,15,16,24,25,3,4,4,5,29,30,98,99,1,2,1,2,1,2,1,2,1,2,1,2,1,2,28,29,87,88,15,16,101,102,1,4,13,14,183,200,1,8,7,24,1,8,55,56,14,78,1,2,7102,7103,2,6,1,3,2,4,7,9,9,10,1,3,1,2,5,6,112,113,86,87,1,2,3,4,12,13,10,11,62,74,4,14,22,26,2,6,110,111,6,7,1,2,3,4,4,5,2,6,2,3,1,2,1,6,2,3,5,9,5,6,10,11,3,4,5,6,13,15,2,6,6,8,37,38,3,4,11,12,25,26,82,83,333,343,10,50,100,176,4,40,13,28,3,6,10,12,16,18,8,10,8,10,3,4,2,4,18,22,31,33,2,3,54,55,1,2,2110,2111,2,3,3,4,2,4,10,11,15,16,2,3,3,4,4,5,2,4,3,4,14,15,293,305,4,8,1,20,5,7,2,11,20,21,2,85,7,11,2,88,5,8,6,43,246,256,7,8,113,114,234,236,12,15,2,3,34,35,9,10,2,4,2,3,113,114,43,44,298,299,111,112,11,12,765,766,85,86,96,98,14,15,147,148,218,219,287,288,113,114,885,886,264,265,471,472,116,117,4,5,43,44,248,249,373,374,20,21,193,194,5,6,82,83,16,17,441,442,50,51,2,3,4,6,1,3,20,21,3,4,22,24,703,704,39,44,111,118,148,149,81,20983,14426,18374,1,92,1,31,13,46,1,4,5,6,7,8,4,6,4,6,8,9,7,8,16,18,14,15,4295,4296,76,77,27,28,81,82,9,10,26,30,1,2,1,3,3,4,6,9,1,3,2,5,1030,1032,1,19,4,14,1,5,1,15,1,5,149,243,129,135,149606,26],
   "gb18030": [
     1,19970,3,1,1,8,-2,1,4,3,7,-1,-2,-2,2,4,-1,-1,-1,-1,1,4,3,3,-1,-1,-3,1,6,-3,-1,2,2,4,6,2,1,6,1,-2,10,1,7,1,-1,-2,1,5,2,5,-1,3,2,1,4,1,6,3,4,-2,4,1,3,2,1,9,-3,2,2,-1,3,7,-3,-1,2,3,-1,3,3,-1,-2,3,3,

package/fallback/multi-byte.js CHANGED Viewed

@@ -688,6 +688,7 @@ const preencoders = {
     const t = p % 188
     return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t)
   },
+  'iso-2022-jp': (p) => ((((p / 94) | 0) + 0x21) << 8) | ((p % 94) + 0x21),
   'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1),
   'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41),
   gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)),
@@ -697,11 +698,13 @@ preencoders.gbk = preencoders.gb18030
 // We accept that encoders use non-trivial amount of mem, for perf
 // most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
-function getMap(id, size) {
+function getMap(id, size, ascii) {
   const cached = maps.get(id)
   if (cached) return cached
   let tname = id
   const sjis = id === 'shift_jis'
+  const iso2022jp = id === 'iso-2022-jp'
+  if (iso2022jp) tname = 'jis0208'
   if (id === 'gbk') tname = 'gb18030'
   if (id === 'euc-jp' || sjis) tname = 'jis0208'
   const table = getTable(tname)
@@ -738,7 +741,7 @@ function getMap(id, size) {
     }
   }
-  for (let i = 0; i < 0x80; i++) map[i] = i
+  if (ascii) for (let i = 0; i < 0x80; i++) map[i] = i
   if (sjis || id === 'euc-jp') {
     if (sjis) map[0x80] = 0x80
     const d = sjis ? 0xfe_c0 : 0x70_c0
@@ -757,32 +760,38 @@ function getMap(id, size) {
   return map
 }
-const encoders = new Set(['big5', 'euc-kr', 'euc-jp', 'shift_jis', 'gbk', 'gb18030'])
 const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
-let gb18030r
+let gb18030r, katakana
 export function multibyteEncoder(enc, onError) {
-  if (!encoders.has(enc)) throw new RangeError('Unsupported encoding')
+  if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
   const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
-  const width = enc === 'gb18030' ? 4 : 2
-  const map = getMap(enc, size)
-  if (enc === 'gb18030' && !gb18030r) gb18030r = getTable('gb18030-ranges')
+  const iso2022jp = enc === 'iso-2022-jp'
+  const gb18030 = enc === 'gb18030'
+  const ascii = isAsciiSuperset(enc)
+  const width = iso2022jp ? 5 : gb18030 ? 4 : 2
+  const tailsize = iso2022jp ? 3 : 0
+  const map = getMap(enc, size, ascii)
+  if (gb18030 && !gb18030r) gb18030r = getTable('gb18030-ranges')
+  if (iso2022jp && !katakana) katakana = getTable('iso-2022-jp-katakana')
   return (str) => {
     if (typeof str !== 'string') throw new TypeError(E_STRING)
-    if (!NON_LATIN.test(str)) {
+    if (ascii && !NON_LATIN.test(str)) {
       try {
         return encodeAscii(str, E_STRICT)
       } catch {}
     }
     const length = str.length
-    const u8 = new Uint8Array(length * width)
+    const u8 = new Uint8Array(length * width + tailsize)
     let i = 0
-    while (i < length) {
-      const x = str.charCodeAt(i)
-      if (x >= 128) break
-      u8[i++] = x
+    if (ascii) {
+      while (i < length) {
+        const x = str.charCodeAt(i)
+        if (x >= 128) break
+        u8[i++] = x
+      }
     }
     // eslint-disable-next-line unicorn/consistent-function-scoping
@@ -793,7 +802,69 @@ export function multibyteEncoder(enc, onError) {
     if (!map || map.length < size) /* c8 ignore next */ throw new Error('Unreachable') // Important for perf
-    if (enc === 'gb18030') {
+    if (iso2022jp) {
+      let state = 0 // 0 = ASCII, 1 = Roman, 2 = jis0208
+      const restore = () => {
+        state = 0
+        u8[i++] = 0x1b
+        u8[i++] = 0x28
+        u8[i++] = 0x42
+      }
+      for (let j = 0; j < length; j++) {
+        let x = str.charCodeAt(j)
+        if (x >= 0xd8_00 && x < 0xe0_00) {
+          if (state === 2) restore()
+          if (x >= 0xdc_00 || j + 1 === length) {
+            i += err(x) // lone
+          } else {
+            const x1 = str.charCodeAt(j + 1)
+            if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
+              i += err(x) // lone
+            } else {
+              j++ // consume x1
+              i += err(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
+            }
+          }
+        } else if (x < 0x80) {
+          if (state === 2 || (state === 1 && (x === 0x5c || x === 0x7e))) restore()
+          if (x === 0xe || x === 0xf || x === 0x1b) {
+            i += err(0xff_fd) // 12.2.2. step 3: This returns U+FFFD rather than codePoint to prevent attacks
+          } else {
+            u8[i++] = x
+          }
+        } else if (x === 0xa5 || x === 0x20_3e) {
+          if (state !== 1) {
+            state = 1
+            u8[i++] = 0x1b
+            u8[i++] = 0x28
+            u8[i++] = 0x4a
+          }
+          u8[i++] = x === 0xa5 ? 0x5c : 0x7e
+        } else {
+          if (x === 0x22_12) x = 0xff_0d
+          if (x >= 0xff_61 && x <= 0xff_9f) x = katakana[x - 0xff_61]
+          const e = map[x]
+          if (e) {
+            if (state !== 2) {
+              state = 2
+              u8[i++] = 0x1b
+              u8[i++] = 0x24
+              u8[i++] = 0x42
+            }
+            u8[i++] = e >> 8
+            u8[i++] = e & 0xff
+          } else {
+            if (state === 2) restore()
+            i += err(x)
+          }
+        }
+      }
+      if (state) restore()
+    } else if (gb18030) {
       // Deduping this branch hurts other encoders perf
       const encode = (cp) => {
         let a = 0, b = 0 // prettier-ignore

package/fallback/multi-byte.table.js CHANGED Viewed

@@ -104,6 +104,9 @@ export function getTable(id) {
     let a = 0, b = 0 // prettier-ignore
     const idx = indices[id]
     while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway
+  } else if (id.endsWith('-katakana')) {
+    let a = -1
+    res = new Uint16Array(indices[id].map((x) => (a += x + 1)))
   } else if (id === 'big5') {
     if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
     res = new Uint32Array(sizes[id]) // array of strings or undefined

package/fallback/percent.js ADDED Viewed

@@ -0,0 +1,31 @@
+import { decodeAscii, encodeLatin1 } from './latin1.js'
+import { decode2string } from './_utils.js'
+const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
+const percentMap = new Map()
+let hex, base
+export function percentEncoder(set, spaceAsPlus = false) {
+  if (typeof set !== 'string' || /[^\x20-\x7E]/.test(set)) throw new TypeError(ERR)
+  if (typeof spaceAsPlus !== 'boolean') throw new TypeError('spaceAsPlus must be boolean')
+  const id = set + +spaceAsPlus
+  const cached = percentMap.get(id)
+  if (cached) return cached
+  const n = encodeLatin1(set).sort() // string checked above to be ascii
+  if (decodeAscii(n) !== set || new Set(n).size !== n.length) throw new TypeError(ERR)
+  if (!base) {
+    hex = Array.from({ length: 256 }, (_, i) => `%${i.toString(16).padStart(2, '0').toUpperCase()}`)
+    base = hex.map((h, i) => (i < 0x20 || i > 0x7e ? h : String.fromCharCode(i)))
+  }
+  const map = base.slice() // copy
+  for (const c of n) map[c] = hex[c]
+  if (spaceAsPlus) map[0x20] = '+' // overrides whatever percentEncodeSet thinks about it
+  // Input is not typechecked, for internal use only
+  const percentEncode = (u8, start = 0, end = u8.length) => decode2string(u8, start, end, map)
+  percentMap.set(id, percentEncode)
+  return percentEncode
+}