npm - @exodus/bytes - Versions diffs - 1.10.0 → 1.11.0 - Mend

@exodus/bytes 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +78 -3
package/array.d.ts +2 -1
package/fallback/_utils.js +3 -3
package/fallback/encoding.util.js +4 -8
package/fallback/multi-byte.encodings.json +3 -3
package/fallback/percent.js +31 -0
package/multi-byte.d.ts +7 -0
package/package.json +12 -2
package/single-byte.d.ts +12 -2
package/utf16.js +1 -1
package/utf8.d.ts +20 -0
package/utf8.js +4 -4
package/whatwg.d.ts +48 -0
package/whatwg.js +76 -0
package/wif.js +1 -2

package/README.md CHANGED Viewed

@@ -10,6 +10,8 @@
 And a [`TextEncoder` / `TextDecoder` polyfill](#textencoder--textdecoder-polyfill)
+See [documentation](https://exodusoss.github.io/bytes).
 ## Strict
 Performs proper input validation, ensures no garbage-in-garbage-out
@@ -118,6 +120,13 @@ Encode a string to UTF-8 bytes (strict mode)
 Throws on invalid Unicode (unpaired surrogates)
+This is similar to the following snippet (but works on all engines):
+```js
+// Strict encode, requiring Unicode codepoints to be valid
+if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError()
+return new TextEncoder().encode(string)
+```
 #### `utf8fromStringLoose(string, format = 'uint8')`
 Encode a string to UTF-8 bytes (loose mode)
@@ -128,12 +137,22 @@ per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
 _Such replacement is a non-injective function, is irreversable and causes collisions.\
 Prefer using strict throwing methods for cryptography applications._
+This is similar to the following snippet (but works on all engines):
+```js
+// Loose encode, replacing invalid Unicode codepoints with U+FFFD
+if (typeof string !== 'string') throw new TypeError()
+return new TextEncoder().encode(string)
+```
 #### `utf8toString(arr)`
 Decode UTF-8 bytes to a string (strict mode)
 Throws on invalid UTF-8 byte sequences
+This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`,
+but works on all engines.
 #### `utf8toStringLoose(arr)`
 Decode UTF-8 bytes to a string (loose mode)
@@ -144,6 +163,9 @@ per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
 _Such replacement is a non-injective function, is irreversable and causes collisions.\
 Prefer using strict throwing methods for cryptography applications._
+This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`,
+but works on all engines.
 ### `@exodus/bytes/utf16.js`
 UTF-16 encoding/decoding
@@ -208,6 +230,15 @@ import { windows1252toString, windows1252fromString } from '@exodus/bytes/single
 import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
 ```
+> [!WARNING]
+> This is a lower-level API for single-byte encodings.
+> It might not match what you expect, as it supports both WHATWG and unicode.org encodings under
+> different names, with the main intended usecase for the latter being either non-web or legacy contexts.
+>
+> For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
+>
+> Be sure to know what you are doing and check documentation when directly using encodings from this file.
 Supports all single-byte encodings listed in the WHATWG Encoding standard:
 `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
 `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
@@ -269,8 +300,9 @@ Same as:
 const latin1toString = createSinglebyteDecoder('iso-8859-1')
 ```
-Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
-those alias to `new TextDecoder('windows-1252')`.
+> [!NOTE]
+> This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those
+> alias to `new TextDecoder('windows-1252')`.
 #### `latin1fromString(string)`
@@ -318,6 +350,13 @@ Decode / encode the legacy multi-byte encodings according to the
 import { createMultibyteDecoder, createMultibyteEncoder } from '@exodus/bytes/multi-byte.js'
 ```
+> [!WARNING]
+> This is a lower-level API for legacy multi-byte encodings.
+>
+> For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
+>
+> Be sure to know what you are doing and check documentation when directly using encodings from this file.
 Supports all legacy multi-byte encodings listed in the WHATWG Encoding standard:
 `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`.
@@ -608,7 +647,8 @@ import { typedView } from '@exodus/bytes/array.js'
 Create a view of a TypedArray in the specified format (`'uint8'` or `'buffer'`)
-Important: does not copy data, returns a view on the same underlying buffer
+> [!IMPORTANT]
+> Does not copy data, returns a view on the same underlying buffer
 ### `@exodus/bytes/encoding.js`
@@ -801,6 +841,41 @@ do not provide sufficiently complete / non-buggy `TextDecoder` APIs.
 > but they are fixing them and the expected update window is short.\
 > If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import.
+### `@exodus/bytes/whatwg.js`
+WHATWG helpers
+```js
+import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support
+import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
+```
+#### `percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false)`
+Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding)
+per WHATWG URL specification.
+> [!IMPORTANT]
+> You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings.
+Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted.
+[C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is
+always percent-encoded.
+`percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints
+in range 0x20 - 0x7e, e.g. `' "#<>'`.
+This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them
+to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString).
+This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates:
+```js
+> percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component
+'%EF%BF%BD'
+> encodeURIComponent('\ud800')
+Uncaught URIError: URI malformed
+```
 ## Changelog
 See [GitHub Releases](https://github.com/ExodusOSS/bytes/releases) tab

package/array.d.ts CHANGED Viewed

@@ -50,7 +50,8 @@ export type OutputFormat = 'uint8' | 'buffer';
 /**
  * Create a view of a TypedArray in the specified format (`'uint8'` or `'buffer'`)
  *
- * Important: does not copy data, returns a view on the same underlying buffer
+ * > [!IMPORTANT]
+ * > Does not copy data, returns a view on the same underlying buffer
  *
  * @param arr - The input TypedArray
  * @param format - The desired output format (`'uint8'` or `'buffer'`)

package/fallback/_utils.js CHANGED Viewed

@@ -17,9 +17,9 @@ export const nativeDecoder = isNative(TextDecoder)
 // Actually windows-1252, compatible with ascii and latin1 decoding
 // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
 // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
-const getNativeLain1 = () => {
+const getNativeLatin1 = () => {
   // Not all barebone engines with TextDecoder support something except utf-8, detect
-  if (!nativeDecoder) {
+  if (nativeDecoder) {
     try {
       return new TextDecoder('latin1', { ignoreBOM: true })
     } catch {}
@@ -28,7 +28,7 @@ const getNativeLain1 = () => {
   return null
 }
-export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLain1()
+export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLatin1()
 export const canDecoders = !!nativeDecoderLatin1
 // Block Firefox < 146 specifically from using native hex/base64, as it's very slow there

package/fallback/encoding.util.js CHANGED Viewed

@@ -20,15 +20,11 @@ export function unfinishedBytes(u, len, enc) {
     case 'utf-16le':
     case 'utf-16be': {
       // 0-3
-      let p = 0
-      if (len % 2 !== 0) p++ // uneven bytes
+      const p = len % 2 // uneven byte length adds 1
+      if (len < 2) return p
       const l = len - p - 1
-      if (len - p >= 2) {
-        const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
-        if (last >= 0xd8_00 && last < 0xdc_00) p += 2 // lone lead
-      }
-      return p
+      const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
+      return last >= 0xd8_00 && last < 0xdc_00 ? p + 2 : p // lone lead adds 2
     }
   }

package/fallback/multi-byte.encodings.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "$7": [2,12541,2,-98,1,-156,1,7897,3,-7897,1,244],
   "$8": ["E9UxzELB4htgpd4feI7ZJNwBAQEfAf8B_wH_Af8B_wH_Af8B_wEAAAD_"],
   "$9": [1,26142,4,2,1,1,4,2,1,1,1,1],
-  "$jis0208tail": [
+  "$j": [
    "ipErP1Ps8XWWMAFJ4rgaAwI1HDv3D_k4cuHcHicp0VFf43EZOCAtKIYJGRokBhxNIB4qMI3tHlWG0gtGG_5HAI0TWEcHAQVAFZtpbqgTMYcTOjErvTAqSgAWUBIyTyZ-JwRT9krRHiX4Z3qSTmo8MH-xFCXNJO8FQPEBuGAlBhEMOhMaPFSWbUBCikNUq4NJTTraLApjAfFoHCnoaimC5yYVIij5CTwiyhSCyCw_DwEgXCVj9FfpAM2rPLIMZfFgRQsMDO407TAD_gQzJhVhbRIZAfwKcC5ocSwVFbV-Cwr_8ssh9gIq1PnvAAABAAAAAAAAAAABAP8BAAAAAAAAAQAAAAABAAAAAAEAAAAAAACnWgAAAAECAAAAAKMAXgABAAAAAAAAAgAa5gABAAAAAQCdYwAAAAACAAAAAAEAAAAAAf8BAAAAAQABAQAAAQEAAAAAAAAAAACUbAAAAAAAAZJuAAH_AQAAkm-RbwABAAAAAAAAAQEAAAAAAQAAAAAAAAAAAQAAAAABAAABAAAAAAEAAAAAiXcAAQABh3kAAAAAAAH_AQAAAAAAAAEAAQAAAACEfdsmAAAAAQ",
     3,32999,
     "lIZ_NRU0zrJ-KhNa6DV79Fl84mAcRy5Ra54FEbOQbwDl7RwkQS0WIELTXCtwAx1jrKtUAEF2R-4RsvwGDgD1ACAJ-S8F-xEK9-ctP88Abu8B9latCvJR-9ks9eAd5G3mTCEXGTgTAklJTHMRgwcHCQEBAwENxAD7BHGvigKY_BwhCURv-sHrt3mBfwEAgIABf4MAAAAAAQEAe4kAAAAAAQABAAB1i3UAjgACAAAAAABwkAEAAQABAG2UbACWAGqXAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAABomAAAAQD_aJn_AQAAAAAAAAABAAABZAGcAAEAAAAAAAABAGIAn2GfAQAAAQAAAAABAQBdpAA"
@@ -39,8 +39,8 @@
     1,8,1,13,1,-11,1,-36,1,54,1,17,1,11,1,13,1,-4,1,-3,-2,1,-6,1,22,1,-9,1,13,1,-5,1,7,-2,-2,1,-2,1,4,-1,1,-2,1,66,-2,1,11,-3,1,14,-1,2,1,2,9,2,2,-2,1,8,1,-5,-2,1,5,1,-4,1,5,-3,1,10,1,17,-1,1,4,2,2,
     -1,4,1,2,3,
     "MhH-C_0AAgYbHu8NBAAHAAcLCwnuDAgBABD-Ae8MWLf-_iT-9An4-QEJ9_kp7An-_A4W6hP3GwUGDgFuBAMY_uIXAiP7iHP7JND8Lv3-BwEUAAXs_ggYFv0jwwofCQQIEfz5_AELCPIW-STtBgENHAACVgMDAwYCAAUC-wsHAw7xArNqmwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAf8AAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAP8BAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAPkH",
-    3,1,-3,1,7,-1,1,-2,-2,1,4,-2,1,6,1,4,2,1,-1,3,1,2,9,"t10LChELBwEOdikAAAAAAAAA",3,-5,2,4,"bP0M-gMeBgOOl5EKd7yfAAAAAAAAALkRJ-TdIA",0,464,"$jis0208tail",0,2,10,-20522,"4gEi-v8AAAA",0,2068,10,-56723,
-    10,-26,"4gEi-i7kChP_AAAAM-8AAQ","$jis0208tail"
+    3,1,-3,1,7,-1,1,-2,-2,1,4,-2,1,6,1,4,2,1,-1,3,1,2,9,"t10LChELBwEOdikAAAAAAAAA",3,-5,2,4,"bP0M-gMeBgOOl5EKd7yfAAAAAAAAALkRJ-TdIA",0,464,"$j",0,2,10,-20522,"4gEi-v8AAAA",0,2068,10,-56723,
+    10,-26,"4gEi-i7kChP_AAAAM-8AAQ","$j"
   ],
   "jis0212": [
     0,108,"2O7wIAPRK_6DJQACAP4CAP4CAP0EAA",0,8,1,-741,1,4,1,24,0,38,"uu_-BHOBcQAAAAAh3yE",0,359,1,-7569,3,1,1,31,0,1,1,-31,0,1,-1,1,28,0,1,1,-29,0,4,4,28,"ysU79Qr95B0DAAAAAAAAAA",0,35,11,51,2,1,0,35,

package/fallback/percent.js ADDED Viewed

@@ -0,0 +1,31 @@
+import { decodeAscii, encodeLatin1 } from './latin1.js'
+import { decode2string } from './_utils.js'
+const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
+const percentMap = new Map()
+let hex, base
+export function percentEncoder(set, spaceAsPlus = false) {
+  if (typeof set !== 'string' || /[^\x20-\x7E]/.test(set)) throw new TypeError(ERR)
+  if (typeof spaceAsPlus !== 'boolean') throw new TypeError('spaceAsPlus must be boolean')
+  const id = set + +spaceAsPlus
+  const cached = percentMap.get(id)
+  if (cached) return cached
+  const n = encodeLatin1(set).sort() // string checked above to be ascii
+  if (decodeAscii(n) !== set || new Set(n).size !== n.length) throw new TypeError(ERR)
+  if (!base) {
+    hex = Array.from({ length: 256 }, (_, i) => `%${i.toString(16).padStart(2, '0').toUpperCase()}`)
+    base = hex.map((h, i) => (i < 0x20 || i > 0x7e ? h : String.fromCharCode(i)))
+  }
+  const map = base.slice() // copy
+  for (const c of n) map[c] = hex[c]
+  if (spaceAsPlus) map[0x20] = '+' // overrides whatever percentEncodeSet thinks about it
+  // Input is not typechecked, for internal use only
+  const percentEncode = (u8, start = 0, end = u8.length) => decode2string(u8, start, end, map)
+  percentMap.set(id, percentEncode)
+  return percentEncode
+}

package/multi-byte.d.ts CHANGED Viewed

@@ -10,6 +10,13 @@
  * import { createMultibyteDecoder, createMultibyteEncoder } from '@exodus/bytes/multi-byte.js'
  * ```
  *
+ * > [!WARNING]
+ * > This is a lower-level API for legacy multi-byte encodings.
+ * >
+ * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
+ * >
+ * > Be sure to know what you are doing and check documentation when directly using encodings from this file.
+ *
  * Supports all legacy multi-byte encodings listed in the WHATWG Encoding standard:
  * `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`.
  *

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exodus/bytes",
-  "version": "1.10.0",
+  "version": "1.11.0",
   "description": "Various operations on Uint8Array data",
   "keywords": [
     "encoding",
@@ -20,13 +20,14 @@
   ],
   "scripts": {
     "lint": "eslint .",
+    "typedoc": "typedoc && mkdir -p doc/assets && cp -r theme/styles doc/assets/",
     "test:javascriptcore": "npm run test:jsc --",
     "test:v8": "exodus-test --engine=v8:bundle",
     "test:jsc": "exodus-test --engine=jsc:bundle",
     "test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
     "test:hermes": "exodus-test --engine=hermes:bundle",
     "test:quickjs": "exodus-test --engine=quickjs:bundle",
-    "test:xs": "exodus-test --engine=xs:bundle",
+    "test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle",
     "test:engine262": "exodus-test --engine=engine262:bundle",
     "test:deno": "exodus-test --engine=deno:pure",
     "test:bun": "exodus-test --engine=bun:pure",
@@ -70,6 +71,7 @@
     "/fallback/encoding.util.js",
     "/fallback/hex.js",
     "/fallback/latin1.js",
+    "/fallback/percent.js",
     "/fallback/multi-byte.encodings.cjs",
     "/fallback/multi-byte.encodings.json",
     "/fallback/multi-byte.js",
@@ -119,6 +121,8 @@
     "/utf8.js",
     "/utf8.d.ts",
     "/utf8.node.js",
+    "/whatwg.js",
+    "/whatwg.d.ts",
     "/wif.js",
     "/wif.d.ts"
   ],
@@ -199,6 +203,10 @@
       "node": "./utf8.node.js",
       "default": "./utf8.js"
     },
+    "./whatwg.js": {
+      "types": "./whatwg.d.ts",
+      "default": "./whatwg.js"
+    },
     "./wif.js": {
       "types": "./wif.d.ts",
       "default": "./wif.js"
@@ -221,6 +229,7 @@
     "@exodus/eslint-config": "^5.24.0",
     "@exodus/prettier": "^1.0.0",
     "@exodus/test": "^1.0.0-rc.109",
+    "@hexagon/base64": "^2.0.4",
     "@noble/hashes": "^2.0.1",
     "@oslojs/encoding": "^1.1.0",
     "@petamoriken/float16": "^3.9.3",
@@ -251,6 +260,7 @@
     "jsvu": "^3.0.3",
     "punycode": "^2.3.1",
     "text-encoding": "^0.7.0",
+    "typedoc": "^0.28.16",
     "typescript": "^5.9.3",
     "uint8array-tools": "^0.0.9",
     "utf8": "^3.0.0",

package/single-byte.d.ts CHANGED Viewed

@@ -11,6 +11,15 @@
  * import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
  * ```
  *
+ * > [!WARNING]
+ * > This is a lower-level API for single-byte encodings.
+ * > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under
+ * > different names, with the main intended usecase for the latter being either non-web or legacy contexts.
+ * >
+ * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
+ * >
+ * > Be sure to know what you are doing and check documentation when directly using encodings from this file.
+ *
  * Supports all single-byte encodings listed in the WHATWG Encoding standard:
  * `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
  * `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
@@ -95,8 +104,9 @@ export function createSinglebyteEncoder(
  * const latin1toString = createSinglebyteDecoder('iso-8859-1')
  * ```
  *
- * Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
- * those alias to `new TextDecoder('windows-1252')`.
+ * > [!NOTE]
+ * > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those
+ * > alias to `new TextDecoder('windows-1252')`.
  *
  * @param arr - The bytes to decode
  * @returns The decoded string

package/utf16.js CHANGED Viewed

@@ -8,7 +8,7 @@ const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM })
 const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
 const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
 const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
-const decoderLoose16 = isLE ? decoderLooseLE : decoderFatalBE
+const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE
 const { isWellFormed, toWellFormed } = String.prototype
 const { E_STRICT, E_STRICT_UNICODE } = js

package/utf8.d.ts CHANGED Viewed

@@ -23,6 +23,13 @@ import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
  *
  * Throws on invalid Unicode (unpaired surrogates)
  *
+ * This is similar to the following snippet (but works on all engines):
+ * ```js
+ * // Strict encode, requiring Unicode codepoints to be valid
+ * if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError()
+ * return new TextEncoder().encode(string)
+ * ```
+ *
  * @param string - The string to encode
  * @param format - Output format (default: 'uint8')
  * @returns The encoded bytes
@@ -40,6 +47,13 @@ export function utf8fromString(string: string, format?: OutputFormat): Uint8Arra
  * _Such replacement is a non-injective function, is irreversable and causes collisions.\
  * Prefer using strict throwing methods for cryptography applications._
  *
+ * This is similar to the following snippet (but works on all engines):
+ * ```js
+ * // Loose encode, replacing invalid Unicode codepoints with U+FFFD
+ * if (typeof string !== 'string') throw new TypeError()
+ * return new TextEncoder().encode(string)
+ * ```
+ *
  * @param string - The string to encode
  * @param format - Output format (default: 'uint8')
  * @returns The encoded bytes
@@ -56,6 +70,9 @@ export function utf8fromStringLoose(
  *
  * Throws on invalid UTF-8 byte sequences
  *
+ * This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`,
+ * but works on all engines.
+ *
  * @param arr - The bytes to decode
  * @returns The decoded string
  */
@@ -70,6 +87,9 @@ export function utf8toString(arr: Uint8Array): string;
  * _Such replacement is a non-injective function, is irreversable and causes collisions.\
  * Prefer using strict throwing methods for cryptography applications._
  *
+ * This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`,
+ * but works on all engines.
+ *
  * @param arr - The bytes to decode
  * @returns The decoded string
  */

package/utf8.js CHANGED Viewed

@@ -27,11 +27,11 @@ function deLoose(str, loose, res) {
   // Recheck if the string was encoded correctly
   let start = 0
-  const last = res.length - 2
-  // Search for EFBFBD
-  while (start < last) {
+  const last = res.length - 3
+  // Search for EFBFBD (3-byte sequence)
+  while (start <= last) {
     const pos = res.indexOf(0xef, start)
-    if (pos === -1) break
+    if (pos === -1 || pos > last) break
     start = pos + 1
     if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
       // Found a replacement char in output, need to recheck if we encoded the input correctly

package/whatwg.d.ts ADDED Viewed

@@ -0,0 +1,48 @@
+/**
+ * WHATWG helpers
+ *
+ * ```js
+ * import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support
+ * import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
+ * ```
+ *
+ * @module @exodus/bytes/whatwg.js
+ */
+/**
+ * Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding)
+ * per WHATWG URL specification.
+ *
+ * > [!IMPORTANT]
+ * > You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings.
+ *
+ * Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted.
+ *
+ * [C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is
+ * always percent-encoded.
+ *
+ * `percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints
+ * in range 0x20 - 0x7e, e.g. `' "#<>'`.
+ *
+ * This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them
+ * to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString).
+ * This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates:
+ * ```js
+ * > percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component
+ * '%EF%BF%BD'
+ * > encodeURIComponent('\ud800')
+ * Uncaught URIError: URI malformed
+ * ```
+ *
+ * @param encoding - The encoding label per WHATWG Encoding spec
+ * @param input - Input scalar-value string to encode
+ * @param percentEncodeSet - A string of ASCII chars to escape in addition to C0 control percent-encode set
+ * @param spaceAsPlus - Whether to encode space as `'+'` instead of `'%20'` or `' '` (default: false)
+ * @returns The percent-encoded string
+ */
+export function percentEncodeAfterEncoding(
+  encoding: string,
+  input: string,
+  percentEncodeSet: string,
+  spaceAsPlus?: boolean
+): string;

package/whatwg.js ADDED Viewed

@@ -0,0 +1,76 @@
+import { utf8fromStringLoose } from '@exodus/bytes/utf8.js'
+import { createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
+import { isMultibyte, getMultibyteEncoder } from './fallback/encoding.js'
+import { normalizeEncoding, E_ENCODING } from './fallback/encoding.api.js'
+import { percentEncoder } from './fallback/percent.js'
+import { encodeMap } from './fallback/single-byte.js'
+import { E_STRING } from './fallback/_utils.js'
+// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
+// Codepoints below 0x20, 0x7F specifically, and above 0x7F (non-ASCII) are always encoded
+// > A C0 control is a code point in the range U+0000 NULL to U+001F INFORMATION SEPARATOR ONE, inclusive.
+// > The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~).
+export function percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false) {
+  const enc = normalizeEncoding(encoding)
+  // Ref: https://encoding.spec.whatwg.org/#get-an-encoder
+  if (!enc || enc === 'replacement' || enc === 'utf-16le' || enc === 'utf-16be') {
+    throw new RangeError(E_ENCODING)
+  }
+  const percent = percentEncoder(percentEncodeSet, spaceAsPlus)
+  if (enc === 'utf-8') return percent(utf8fromStringLoose(input))
+  const multi = isMultibyte(enc)
+  const encoder = multi ? getMultibyteEncoder() : createSinglebyteEncoder
+  const fatal = encoder(enc)
+  try {
+    return percent(fatal(input))
+  } catch {}
+  let res = ''
+  let last = 0
+  if (multi) {
+    const rep = enc === 'gb18030' ? percent(fatal('\uFFFD')) : `%26%23${0xff_fd}%3B` // only gb18030 can encode it
+    const escaping = encoder(enc, (cp, u, i) => {
+      res += percent(u, last, i)
+      res += cp >= 0xd8_00 && cp < 0xe0_00 ? rep : `%26%23${cp}%3B` // &#cp;
+      last = i
+      return 0 // no bytes emitted
+    })
+    const u = escaping(input) // has side effects on res
+    res += percent(u, last)
+  } else {
+    if (typeof input !== 'string') throw new TypeError(E_STRING) // all other paths have their own validation
+    const m = encodeMap(enc)
+    const len = input.length
+    const u = new Uint8Array(len)
+    for (let i = 0; i < len; i++) {
+      const x = input.charCodeAt(i)
+      const b = m[x]
+      if (!b && x) {
+        let cp = x
+        const i0 = i
+        if (x >= 0xd8_00 && x < 0xe0_00) {
+          cp = 0xff_fd
+          if (x < 0xdc_00 && i + 1 < len) {
+            const x1 = input.charCodeAt(i + 1)
+            if (x1 >= 0xdc_00 && x1 < 0xe0_00) {
+              cp = 0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10))
+              i++
+            }
+          }
+        }
+        res += `${percent(u, last, i0)}%26%23${cp}%3B` // &#cp;
+        last = i + 1 // skip current
+      } else {
+        u[i] = b
+      }
+    }
+    res += percent(u, last)
+  }
+  return res
+}

package/wif.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { assertUint8 } from './assert.js'
 function from(arr, expectedVersion) {
   assertUint8(arr)
+  if (arr.length !== 33 && arr.length !== 34) throw new Error('Invalid WIF length')
   const version = arr[0]
   if (expectedVersion !== undefined && version !== expectedVersion) {
     throw new Error('Invalid network version')
@@ -14,7 +15,6 @@ function from(arr, expectedVersion) {
   // Makes a copy, regardless of input being a Buffer or a Uint8Array (unlike .slice)
   const privateKey = Uint8Array.from(arr.subarray(1, 33))
   if (arr.length === 33) return { version, privateKey, compressed: false }
-  if (arr.length !== 34) throw new Error('Invalid WIF length')
   if (arr[33] !== 1) throw new Error('Invalid compression flag')
   return { version, privateKey, compressed: true }
 }
@@ -22,7 +22,6 @@ function from(arr, expectedVersion) {
 function to({ version: v, privateKey, compressed }) {
   if (!Number.isSafeInteger(v) || v < 0 || v > 0xff) throw new Error('Missing or invalid version')
   assertUint8(privateKey, { length: 32, name: 'privateKey' })
-  if (privateKey.length !== 32) throw new TypeError('Invalid privateKey length')
   const out = new Uint8Array(compressed ? 34 : 33)
   out[0] = v
   out.set(privateKey, 1)