npm - @exodus/bytes - Versions diffs - 1.6.0 → 1.8.0 - Mend

@exodus/bytes 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/README.md +121 -16
package/base32.js +2 -1
package/base58.js +2 -2
package/base64.js +2 -2
package/bech32.js +5 -2
package/encoding-lite.d.ts +1 -0
package/encoding-lite.js +2 -0
package/encoding.d.ts +58 -0
package/encoding.js +2 -0
package/fallback/_utils.js +4 -8
package/fallback/encoding.js +83 -1
package/fallback/hex.js +2 -2
package/fallback/latin1.js +34 -0
package/fallback/multi-byte.js +1 -0
package/fallback/single-byte.encodings.js +64 -43
package/fallback/single-byte.js +27 -4
package/fallback/utf8.js +5 -2
package/hex.node.js +2 -1
package/package.json +13 -4
package/single-byte.js +82 -4
package/single-byte.node.js +61 -4
package/utf16.js +2 -2
package/utf16.node.js +2 -2
package/utf8.js +2 -2
package/utf8.node.js +2 -1

package/README.md CHANGED Viewed

@@ -1,5 +1,9 @@
 # `@exodus/bytes`
+[![](https://flat.badgen.net/npm/v/@exodus/bytes)](https://npmjs.org/package/@exodus/bytes)
+![](https://flat.badgen.net/npm/dm/@exodus/bytes)
+[![](https://flat.badgen.net/npm/license/@exodus/bytes)](https://github.com/ExodusOSS/bytes/blob/HEAD/LICENSE)
 `Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
 And a [`TextEncoder` / `TextDecoder` polyfill](#textencoder--textdecoder-polyfill)
@@ -31,13 +35,14 @@ See [Performance](./Performance.md) for more info
 ```js
 import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
+import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
 ```
 Less than half the bundle size of [text-encoding](https://npmjs.com/text-encoding), [whatwg-encoding](https://npmjs.com/whatwg-encoding) or [iconv-lite](https://npmjs.com/iconv-lite) (gzipped or not).\
 Also [much faster](#fast) than all of those.
 > [!TIP]
-> See also the [lite version](#lite-version) to get this down to 9 KiB gzipped.
+> See also the [lite version](#lite-version) to get this down to 10 KiB gzipped.
 Spec compliant, passing WPT and covered with extra tests.\
 Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).\
@@ -77,11 +82,12 @@ _These are only provided as a compatibility layer, prefer hardened APIs instead
 If you don't need support for legacy multi-byte encodings, you can use the lite import:
 ```js
 import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
+import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
 ```
-This reduces the bundle size 10x:\
-from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 9 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
-(For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped).
+This reduces the bundle size 9x:\
+from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 10 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
+(For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped):
 It still supports `utf-8`, `utf-16le`, `utf-16be` and all single-byte encodings specified by the spec,
 the only difference is support for legacy multi-byte encodings.
@@ -121,32 +127,90 @@ import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js
 ### `@exodus/bytes/single-byte.js`
 ```js
-import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
-import { windows1252toString } from '@exodus/bytes/single-byte.js'
+import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
+import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
 ```
-Decode the legacy single-byte encodings according to the [Encoding standard](https://encoding.spec.whatwg.org/)
-([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings) and
-[§14.5](https://encoding.spec.whatwg.org/#x-user-defined)).
+Decode / encode the legacy single-byte encodings according to the
+[Encoding standard](https://encoding.spec.whatwg.org/)
+([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
+[§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
+and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
-Supports all single-byte encodings listed in the standard:
+Supports all single-byte encodings listed in the WHATWG Encoding standard:
 `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
 `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
 `macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
 `windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
+Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
+[unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
+(and all other `iso-8859-*` encodings there as they match WHATWG).
+> [!NOTE]
+> While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
+> [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
+> `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
+> `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
+> so its results will differ from `TextDecoder` for those encoding names.
+```js
+> new TextDecoder('iso-8859-1').encoding
+'windows-1252'
+> new TextDecoder('iso-8859-9').encoding
+'windows-1254'
+> new TextDecoder('iso-8859-11').encoding
+'windows-874'
+> new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
+'€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
+> createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
+'\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
+```
 ##### `createSinglebyteDecoder(encoding, loose = false)`
-Create a decoder for a supported one-byte `encoding`, given it's lowercased name `encoding`.
+Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
 Returns a function `decode(arr)` that decodes bytes to a string.
+##### `createSinglebyteEncoder(encoding, { mode = 'fatal' })`
+Create an encoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
+Returns a function `encode(string)` that encodes a string to bytes.
+In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
+not be encoded in the target encoding.
+##### `latin1toString(arr)`
+Decode `iso-8859-1` bytes to a string.
+There is no loose variant for this encoding, all bytes can be decoded.
+Same as:
+```js
+const latin1toString = createSinglebyteDecoder('iso-8859-1')
+```
+Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
+those alias to `new TextDecoder('windows-1252')`.
+##### `latin1fromString(string)`
+Encode a string to `iso-8859-1` bytes.
+Will throw on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
+Same as:
+```js
+const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
+```
 ##### `windows1252toString(arr)`
 Decode `windows-1252` bytes to a string.
-Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
 There is no loose variant for this encoding, all bytes can be decoded.
 Same as:
@@ -154,6 +218,17 @@ Same as:
 const windows1252toString = createSinglebyteDecoder('windows-1252')
 ```
+##### `windows1252fromString(string)`
+Encode a string to `windows-1252` bytes.
+Will throw on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
+Same as:
+```js
+const windows1252fromString = createSinglebyteEncoder('windows-1252', { mode: 'fatal' })
+```
 ### `@exodus/bytes/multi-byte.js`
 ```js
@@ -171,7 +246,7 @@ Supports all legacy multi-byte encodings listed in the standard:
 ##### `createMultibyteDecoder(encoding, loose = false)`
-Create a decoder for a supported legacy multi-byte `encoding`, given it's lowercased name `encoding`.
+Create a decoder for a supported legacy multi-byte `encoding`, given its lowercased name `encoding`.
 Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
@@ -188,6 +263,8 @@ import { fromBigInt, toBigInt } from '@exodus/bytes/bigint.js'
 ### `@exodus/bytes/hex.js`
+Implements Base16 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
 ```js
 import { fromHex, toHex } from '@exodus/bytes/hex.js'
 ```
@@ -197,6 +274,8 @@ import { fromHex, toHex } from '@exodus/bytes/hex.js'
 ### `@exodus/bytes/base64.js`
+Implements Base64 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
 ```js
 import { fromBase64, toBase64 } from '@exodus/bytes/base64.js'
 import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js'
@@ -211,6 +290,8 @@ import { fromBase64any } from '@exodus/bytes/base64.js'
 ### `@exodus/bytes/base32.js`
+Implements Base32 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
 ```js
 import { fromBase32, toBase32 } from '@exodus/bytes/base32.js'
 import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
@@ -223,6 +304,8 @@ import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
 ### `@exodus/bytes/bech32.js`
+Implements [BIP-0173](https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#specification) and [BIP-0350](https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki#specification).
 ```js
 import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js'
 import { fromBech32m, toBech32m } from '@exodus/bytes/base32.js'
@@ -284,6 +367,7 @@ On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/
 ```js
 import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
+import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
 // Hooks for standards
 import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
@@ -291,7 +375,9 @@ import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from
 Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
 [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
-[TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
+[TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder),
+[TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream),
+[TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream),
 some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
 #### `new TextDecoder(label = 'utf-8', { fatal = false, ignoreBOM = false })`
@@ -300,7 +386,21 @@ some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
 #### `new TextEncoder()`
-[TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
+[TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder) implementation/polyfill.
+#### `new TextDecoderStream(label = 'utf-8', { fatal = false, ignoreBOM = false })`
+[TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream) implementation/polyfill.
+Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
+[polyfilled](https://npmjs.com/package/web-streams-polyfill).
+#### `new TextEncoderStream()`
+[TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream) implementation/polyfill.
+Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
+[polyfilled](https://npmjs.com/package/web-streams-polyfill).
 #### `labelToName(label)`
@@ -370,6 +470,7 @@ new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
 ```js
 import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
+import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
 // Hooks for standards
 import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-lite.js'
@@ -393,7 +494,9 @@ To avoid inconsistencies, the exported classes and methods are exactly the same
 > lite = require('@exodus/bytes/encoding-lite.js')
 [Module: null prototype] {
   TextDecoder: [class TextDecoder],
+  TextDecoderStream: [class TextDecoderStream],
   TextEncoder: [class TextEncoder],
+  TextEncoderStream: [class TextEncoderStream],
   getBOMEncoding: [Function: getBOMEncoding],
   labelToName: [Function: labelToName],
   legacyHookDecode: [Function: legacyHookDecode],
@@ -406,7 +509,9 @@ Error: Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encod
 > full = require('@exodus/bytes/encoding.js')
 [Module: null prototype] {
   TextDecoder: [class TextDecoder],
+  TextDecoderStream: [class TextDecoderStream],
   TextEncoder: [class TextEncoder],
+  TextEncoderStream: [class TextEncoderStream],
   getBOMEncoding: [Function: getBOMEncoding],
   labelToName: [Function: labelToName],
   legacyHookDecode: [Function: legacyHookDecode],

package/base32.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { assertEmptyRest } from './assert.js'
 import { typedView } from './array.js'
+import { E_STRING } from './fallback/_utils.js'
 import * as js from './fallback/base32.js'
 // See https://datatracker.ietf.org/doc/html/rfc4648
@@ -25,7 +26,7 @@ export function fromBase32hex(str, options) {
 }
 function fromBase32common(str, isBase32Hex, padding, format, rest) {
-  if (typeof str !== 'string') throw new TypeError('Input is not a string')
+  if (typeof str !== 'string') throw new TypeError(E_STRING)
   if (rest !== null) assertEmptyRest(rest)
   if (padding === true) {

package/base58.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { typedView } from './array.js'
 import { assertUint8 } from './assert.js'
-import { nativeDecoder, nativeEncoder, isHermes } from './fallback/_utils.js'
+import { nativeDecoder, nativeEncoder, isHermes, E_STRING } from './fallback/_utils.js'
 import { encodeAscii, decodeAscii } from './fallback/latin1.js'
 const alphabet58 = [...'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz']
@@ -122,7 +122,7 @@ function toBase58core(arr, alphabet, codes) {
 }
 function fromBase58core(str, alphabet, codes, format = 'uint8') {
-  if (typeof str !== 'string') throw new TypeError('Input is not a string')
+  if (typeof str !== 'string') throw new TypeError(E_STRING)
   const length = str.length
   if (length === 0) return typedView(new Uint8Array(), format)

package/base64.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { assertUint8, assertEmptyRest } from './assert.js'
 import { typedView } from './array.js'
-import { isHermes, skipWeb } from './fallback/_utils.js'
+import { isHermes, skipWeb, E_STRING } from './fallback/_utils.js'
 import { decodeLatin1, encodeLatin1 } from './fallback/latin1.js'
 import * as js from './fallback/base64.js'
@@ -79,7 +79,7 @@ export function fromBase64any(str, { format = 'uint8', padding = 'both', ...rest
 }
 function fromBase64common(str, isBase64url, padding, format, rest) {
-  if (typeof str !== 'string') throw new TypeError('Input is not a string')
+  if (typeof str !== 'string') throw new TypeError(E_STRING)
   if (rest !== null) assertEmptyRest(rest)
   const auto = padding === 'both' ? str.endsWith('=') : undefined
   // Older JSC supporting Uint8Array.fromBase64 lacks proper checks

package/bech32.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { assertUint8 } from './assert.js'
-import { nativeEncoder } from './fallback/_utils.js'
+import { nativeEncoder, E_STRING } from './fallback/_utils.js'
 import { decodeAscii, encodeAscii, encodeLatin1 } from './fallback/latin1.js'
 const alphabet = [...'qpzry9x8gf2tvdw0s3jn54khce6mua7l']
@@ -12,7 +12,6 @@ const E_MIXED = 'Mixed-case string'
 const E_PADDING = 'Padding is invalid'
 const E_CHECKSUM = 'Invalid checksum'
 const E_CHARACTER = 'Non-bech32 character'
-const E_STRING = 'Input is not a string'
 // nativeEncoder path uses encodeAscii which asserts ascii, otherwise we have 0-255 bytes from encodeLatin1
 const c2x = new Int8Array(nativeEncoder ? 128 : 256).fill(-1)
@@ -179,6 +178,9 @@ function assertDecodeArgs(str, limit) {
   if (typeof limit !== 'number' || str.length < 8 || !(str.length <= limit)) throw new Error(E_SIZE)
 }
+// this is instant on 8-bit strings
+const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
 function fromBech32enc(str, limit, encoding) {
   assertDecodeArgs(str, limit)
   const lower = str.toLowerCase()
@@ -195,6 +197,7 @@ function fromBech32enc(str, limit, encoding) {
   if (wordsLength < 0) throw new Error(E_SIZE)
   const bytesLength = (wordsLength * 5) >> 3
   const slice = str.slice(split + 1)
+  if (!nativeEncoder && NON_LATIN.test(slice)) throw new SyntaxError(E_CHARACTER) // otherwise can't use encodeLatin1
   const c = nativeEncoder ? encodeAscii(slice, E_CHARACTER) : encodeLatin1(slice) // suboptimal, but only affects non-Hermes barebones
   const bytes = new Uint8Array(bytesLength)

package/encoding-lite.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export * from './encoding.js'

package/encoding-lite.js CHANGED Viewed

@@ -1,6 +1,8 @@
 export {
   TextDecoder,
   TextEncoder,
+  TextDecoderStream,
+  TextEncoderStream,
   normalizeEncoding,
   getBOMEncoding,
   labelToName,

package/encoding.d.ts ADDED Viewed

@@ -0,0 +1,58 @@
+/// <reference types="node" />
+/**
+ * Converts an encoding label to its name, as an ASCII-lowercased string
+ * @param label - The encoding label to normalize
+ * @returns The normalized encoding name, or null if invalid
+ */
+export function normalizeEncoding(label: string): string | null;
+/**
+ * Implements BOM sniff (https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
+ * @param input - The bytes to check for BOM
+ * @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
+ */
+export function getBOMEncoding(
+  input: ArrayBufferLike | ArrayBufferView
+): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
+/**
+ * Implements decode (https://encoding.spec.whatwg.org/#decode) legacy hook.
+ * @param input - The bytes to decode
+ * @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
+ * @returns The decoded string
+ */
+export function legacyHookDecode(
+  input: ArrayBufferLike | ArrayBufferView,
+  fallbackEncoding?: string
+): string;
+/**
+ * Converts an encoding label to its name, as a case-sensitive string.
+ * @param label - The encoding label
+ * @returns The proper case encoding name, or null if invalid
+ */
+export function labelToName(label: string): string | null;
+/**
+ * Text decoder for decoding bytes to strings in various encodings
+ * Supports strict and lossy modes
+ */
+export const TextDecoder: typeof globalThis.TextDecoder;
+/**
+ * Text encoder for encoding strings to UTF-8 bytes
+ */
+export const TextEncoder: typeof globalThis.TextEncoder;
+/**
+ * Transform stream wrapper for TextDecoder
+ * Decodes chunks of bytes to strings
+ */
+export const TextDecoderStream: typeof globalThis.TextDecoderStream;
+/**
+ * Transform stream wrapper for TextEncoder
+ * Encodes chunks of strings to UTF-8 bytes
+ */
+export const TextEncoderStream: typeof globalThis.TextEncoderStream;

package/encoding.js CHANGED Viewed

@@ -6,6 +6,8 @@ setMultibyteDecoder(createMultibyteDecoder)
 export {
   TextDecoder,
   TextEncoder,
+  TextDecoderStream,
+  TextEncoderStream,
   normalizeEncoding,
   getBOMEncoding,
   labelToName,

package/fallback/_utils.js CHANGED Viewed

@@ -5,14 +5,8 @@ export const isHermes = Boolean(globalThis.HermesInternal)
 export const isDeno = Boolean(globalThis.Deno)
 export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
-let isNative = (x) => {
-  if (!x) return false
-  if (haveNativeBuffer) return true // we consider Node.js TextDecoder/TextEncoder native
-  const s = `${x}`
-  // See https://github.com/facebook/hermes/pull/1855#issuecomment-3659386410
-  return s.includes('[native code]') || s.includes(`[bytecode]`) // Static Hermes has [bytecode] for contrib, which includes TextEncoder/TextDecoder
-}
+// We consider Node.js TextDecoder/TextEncoder native
+let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
 if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
 export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
@@ -134,3 +128,5 @@ export const toBuf = (x) =>
   x.byteLength <= 64 && x.BYTES_PER_ELEMENT === 1
     ? Buffer.from(x)
     : Buffer.from(x.buffer, x.byteOffset, x.byteLength)
+export const E_STRING = 'Input is not a string'

package/fallback/encoding.js CHANGED Viewed

@@ -68,7 +68,16 @@ function isAnyUint8Array(x) {
 const fromSource = (x) => {
   if (x instanceof Uint8Array) return x
   if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
-  if (isAnyArrayBuffer(x)) return new Uint8Array(x)
+  if (isAnyArrayBuffer(x)) {
+    if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
+    // Old engines without .detached, try-catch
+    try {
+      return new Uint8Array(x)
+    } catch {
+      return new Uint8Array()
+    }
+  }
   throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
 }
@@ -259,6 +268,79 @@ export class TextEncoder {
   }
 }
+const E_NO_STREAMS = 'TransformStream global not present in the environment'
+// https://encoding.spec.whatwg.org/#interface-textdecoderstream
+export class TextDecoderStream {
+  constructor(encoding = 'utf-8', options = {}) {
+    if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
+    const decoder = new TextDecoder(encoding, options)
+    const transform = new TransformStream({
+      transform: (chunk, controller) => {
+        const value = decoder.decode(fromSource(chunk), { stream: true })
+        if (value) controller.enqueue(value)
+      },
+      flush: (controller) => {
+        // https://streams.spec.whatwg.org/#dom-transformer-flush
+        const value = decoder.decode()
+        if (value) controller.enqueue(value)
+        // No need to call .terminate() (Node.js is wrong)
+      },
+    })
+    define(this, 'encoding', decoder.encoding)
+    define(this, 'fatal', decoder.fatal)
+    define(this, 'ignoreBOM', decoder.ignoreBOM)
+    define(this, 'readable', transform.readable)
+    define(this, 'writable', transform.writable)
+  }
+  get [Symbol.toStringTag]() {
+    return 'TextDecoderStream'
+  }
+}
+// https://encoding.spec.whatwg.org/#interface-textencoderstream
+// Only UTF-8 per spec
+export class TextEncoderStream {
+  constructor() {
+    if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
+    let lead
+    const transform = new TransformStream({
+      // https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk
+      // Not identical in code, but reuses loose mode to have identical behavior
+      transform: (chunk, controller) => {
+        let s = String(chunk) // DOMString, might contain unpaired surrogates
+        if (s.length === 0) return
+        if (lead) {
+          s = lead + s
+          lead = null
+        }
+        const last = s.charCodeAt(s.length - 1) // Can't come from previous lead due to length check
+        if ((last & 0xfc_00) === 0xd8_00) {
+          lead = s[s.length - 1]
+          s = s.slice(0, -1)
+        }
+        if (s) controller.enqueue(utf8fromStringLoose(s))
+      },
+      // https://encoding.spec.whatwg.org/#encode-and-flush
+      flush: (controller) => {
+        if (lead) controller.enqueue(Uint8Array.of(0xef, 0xbf, 0xbd))
+      },
+    })
+    define(this, 'encoding', 'utf-8')
+    define(this, 'readable', transform.readable)
+    define(this, 'writable', transform.writable)
+  }
+  get [Symbol.toStringTag]() {
+    return 'TextEncoderStream'
+  }
+}
 // Warning: unlike whatwg-encoding, returns lowercased labels
 // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
 export function getBOMEncoding(input) {

package/fallback/hex.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { assertUint8 } from '../assert.js'
-import { nativeDecoder, nativeEncoder, decode2string } from './_utils.js'
+import { nativeDecoder, nativeEncoder, decode2string, E_STRING } from './_utils.js'
 import { encodeAscii, decodeAscii } from './latin1.js'
 let hexArray // array of 256 bytes converted to two-char hex strings
@@ -52,7 +52,7 @@ export function toHex(arr) {
 }
 export function fromHex(str) {
-  if (typeof str !== 'string') throw new TypeError('Input is not a string')
+  if (typeof str !== 'string') throw new TypeError(E_STRING)
   if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
   const length = str.length / 2 // this helps Hermes in loops

package/fallback/latin1.js CHANGED Viewed

@@ -6,12 +6,19 @@ import {
   isHermes,
   isDeno,
   isLE,
+  skipWeb,
 } from './_utils.js'
+const { atob } = globalThis
+const { toBase64: web64 } = Uint8Array.prototype
 // See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
 // On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
 const maxFunctionArgs = 0x20_00
+// toBase64+atob path is faster on everything where fromBase64 is fast
+const useLatin1atob = web64 && atob && !skipWeb
 export function asciiPrefix(arr) {
   let p = 0 // verified ascii bytes
   const length = arr.length
@@ -46,6 +53,18 @@ export function decodeLatin1(arr, start = 0, stop = arr.length) {
   stop |= 0
   const total = stop - start
   if (total === 0) return ''
+  if (
+    useLatin1atob &&
+    total >= 256 &&
+    total < 1e8 &&
+    arr.toBase64 === web64 &&
+    arr.BYTES_PER_ELEMENT === 1
+  ) {
+    const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
+    return atob(sliced.toBase64())
+  }
   if (total > maxFunctionArgs) {
     let prefix = ''
     for (let i = start; i < stop; ) {
@@ -107,8 +126,23 @@ export const encodeCharcodes = isHermes
       return arr
     }
+export function encodeAsciiPrefix(x, s) {
+  let i = 0
+  for (const len3 = s.length - 3; i < len3; i += 4) {
+    const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
+    if ((x0 | x1 | x2 | x3) >= 128) break
+    x[i] = x0
+    x[i + 1] = x1
+    x[i + 2] = x2
+    x[i + 3] = x3
+  }
+  return i
+}
 /* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
+// Warning: can be used only on checked strings, converts strings to 8-bit
 export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
 // Expects nativeEncoder to be present

package/fallback/multi-byte.js CHANGED Viewed

@@ -474,6 +474,7 @@ const mappers = {
 export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
 export function multibyteDecoder(enc, loose = false) {
+  if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
   if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
   // Input is assumed to be typechecked already

package/fallback/single-byte.encodings.js CHANGED Viewed

@@ -8,54 +8,75 @@ const h = (x) => new Array(x).fill(r)
 // Common ranges
-// prettier-ignore
-const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
-// prettier-ignore
-const k8b = [-30,1,21,-18,1,15,-17,18,-13,...e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
-const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
-const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
-// prettier-ignore
-const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,...p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,...p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
-const i0 = e(33)
 // prettier-ignore
 const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
-const i4a = [-75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1]
-const i4b = [34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1]
-const i7 = [721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r]
-const i8 = [...e(26), r, r, 6692, 1, r]
+const i4a = [-75, -63, e(5), 104, -34, -67, 79, -77, 75, -73, 1]
+const i4b = [34, -32, e(5), 73, -34, -36, 48, -46, 44, -42, 1]
+const i7 = [721, 1, 1, -719, 721, -719, 721, e(19), r, 2, e(43), r]
+const i8 = [e(26), r, r, 6692, 1, r]
+const i9 = [79, -77, e(11), 84, 46, -127, e(16), 48, -46, e(11), 53, 46]
+const iB = [3425, e(57), h(4), 5, e(28), h(4)]
+const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
+const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
 const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
 const w8 = [8072, 1, 3, 1, 5, -15, 1]
-const w1 = [...w8, -7480, 7750, -8129, 7897, -7911, -182]
-const w3 = [...w8, -8060, 8330, -8328, 8096, -8094]
+const w1 = [w8, -7480, 7750, -8129, 7897, -7911, -182]
+const w3 = [w8, -8060, 8330, -8328, 8096, -8094]
 const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
+// prettier-ignore
+const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
+// prettier-ignore
+const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
+// prettier-ignore
+const k8b = [-30,1,21,-18,1,15,-17,18,-13,e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
 // prettier-ignore
-export default {
-  ibm866: [913,...e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,...e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
-  'iso-8859-10': [...i0,100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,...i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,...e(5),...i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
-  'iso-8859-13': [...i0,8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,...p3,7835],
-  'iso-8859-14': [...i0,7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,...e(15),165,-163,...e(5),7572,-7570,...e(5),153,-151,...e(16),134,-132,...e(5),7541,-7539,...e(5),122],
-  'iso-8859-15': [...i0,1,1,1,8201,-8199,187,-185,186,-184,...e(10),202,-200,1,1,199,-197,1,1,151,1,37],
-  'iso-8859-16': [...i0,100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,...e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,...e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
-  'iso-8859-2': [...i0,100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,...i2],
-  'iso-8859-3': [...i0,134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,...e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,...e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
-  'iso-8859-4': [...i0,100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,...i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,...i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
-  'iso-8859-5': [...i0,865,...e(11),-863,865,...e(65),7367,-7365,...e(11),-949,951,1],
-  'iso-8859-6': [...i0,r,r,r,4,...h(7),1384,-1375,...h(13),1390,r,r,r,4,r,2,...e(25),r,r,r,r,r,6,...e(18),...h(13)],
-  'iso-8859-7': [...i0,8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,...i7],
-  'iso-8859-8': [...i0,r,2,...e(7),46,-44,...e(14),62,-60,1,1,1,...h(32),8025,-6727,...i8],
-  'koi8-r': [...k8a,8450,...e(14),-8544,8545,...e(10),-9411,933,...k8b,-28,...k8b],
-  'koi8-u': [...k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,...k8b,-28,...k8b],
-  macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,...m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
-  'windows-1250': [...w0,-7888,7897,-7903,10,25,-4,-233,...w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,...i2],
-  'windows-1251': [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,...e(63)],
-  'windows-1252': [...p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,...w1,225,-6],
-  'windows-1253': [...p1,-8089,8104,-8102,8111,-8109,1,1,1,1,...w3,1,1,1,1,741,1,-739,1,1,1,1,1,1,r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,...i7],
-  'windows-1254': [...p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,...w1,1,218,-216,...e(47),79,-77,...e(11),84,46,-127,...e(16),48,-46,...e(11),53,46],
-  'windows-1255': [...p1,-7515,7530,-8102,8111,-8109,1,1,1,1,...w8,-7480,7750,-8328,8096,-8094,...e(7),8199,-8197,1,1,1,1,46,-44,...e(14),62,-60,1,1,1,1,1265,...e(19),45,1,1,1,1,...h(7),-36,...i8],
-  'windows-1256': [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,...e(7),1557,-1555,...e(14),1378,-1376,1,1,1,1377,162,-160,...e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
-  'windows-1257': [...w0,-8102,8111,-8109,28,543,-527,-40,...w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,...e(5),...p3,347],
-  'windows-1258': [...p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,...w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,...e(34),64,-62,...e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,1,1,1,1,1,1,211,340,-548,1,1,1,33,-31,...e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,1,1,1,1,1,1,180,7931],
-  'windows-874': [8237,-8235,1,1,1,8098,-8096,...e(10),...w8,-8060,...e(8),3425,...e(57),r,r,r,r,5,...e(28),r,r,r,r],
-  'x-mac-cyrillic': [913,...e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,...m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,...e(30),7262]
+const maps = {
+  ibm866: [913,e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
+  'koi8-r': [k8a,8450,e(14),-8544,8545,e(10),-9411,933,k8b,-28,k8b],
+  'koi8-u': [k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,k8b,-28,k8b],
+  'x-mac-cyrillic': [913,e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,e(30),7262],
+  macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
+  'windows-874': [8237,-8235,1,1,1,8098,-8096,e(10),w8,-8060,e(8),iB],
 }
+// windows-1250 - windows-1258
+// prettier-ignore
+;[
+  [w0,-7888,7897,-7903,10,25,-4,-233,w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,i2],
+  [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,e(63)],
+  [p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,w1,225,-6],
+  [p1,-8089,8104,-8102,8111,-8109,1,1,1,1,w3,1,1,1,1,741,1,-739,e(6),r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,i7],
+  [p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,w1,1,218,-216,e(47),i9],
+  [p1,-7515,7530,-8102,8111,-8109,1,1,1,1,w8,-7480,7750,-8328,8096,-8094,e(7),8199,-8197,1,1,1,1,46,-44,e(14),62,-60,1,1,1,1,1265,e(19),45,1,1,1,1,h(7),-36,i8],
+  [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,e(7),1557,-1555,e(14),1378,-1376,1,1,1,1377,162,-160,e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
+  [w0,-8102,8111,-8109,28,543,-527,-40,w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,e(5),p3,347],
+  [p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,e(34),64,-62,e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,e(6),211,340,-548,1,1,1,33,-31,e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,e(6),180,7931],
+].forEach((m, i) => {
+  maps[`windows-${i + 1250}`] = m
+});
+// iso-8859-1 - iso-8859-16
+// prettier-ignore
+;[
+  [], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
+  [100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,i2],
+  [134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
+  [100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
+  [865,e(11),-863,865,e(65),7367,-7365,e(11),-949,951,1],
+  [r,r,r,4,h(7),1384,-1375,h(13),1390,r,r,r,4,r,2,e(25),h(5),6,e(18),h(13)],
+  [8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,i7],
+  [r,2,e(7),46,-44,e(14),62,-60,1,1,1,h(32),8025,-6727,i8],
+  [e(47),i9], // non-WHATWG, which maps iso-8859-9 to windows-1254
+  [100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,e(5),i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
+  iB, // non-WHATWG, which maps iso-8859-11 to windows-874
+  null, // no 12
+  [8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,p3,7835],
+  [7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,e(15),165,-163,e(5),7572,-7570,e(5),153,-151,e(16),134,-132,e(5),7541,-7539,e(5),122],
+  [1,1,1,8201,-8199,187,-185,186,-184,e(10),202,-200,1,1,199,-197,1,1,151,1,37],
+  [100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
+].forEach((m, i) => {
+  if (m) maps[`iso-8859-${i + 1}`] = [e(33), m]
+})
+export default maps

package/fallback/single-byte.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
 import encodings from './single-byte.encodings.js'
-import { decode2string } from './_utils.js'
+import { decode2string, nativeDecoder } from './_utils.js'
 export const E_STRICT = 'Input is not well-formed for this encoding'
 const xUserDefined = 'x-user-defined'
@@ -18,11 +18,13 @@ function getEncoding(encoding) {
   if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
   if (encoding === iso8i) encoding = 'iso-8859-8'
   let prev = 127
-  return encodings[encoding].map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
+  const enc = encodings[encoding].flat().flat().flat() // max depth is 3, rechecked by tests
+  return enc.map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
 }
 const mappers = new Map()
 const decoders = new Map()
+const encmaps = new Map()
 // Used only on Node.js, no reason to optimize for anything else
 // E.g. avoiding .from and filling zero-initialized arr manually is faster on Hermes, but we avoid this codepath on Hermes completely
@@ -31,7 +33,7 @@ export function encodingMapper(encoding) {
   if (cached) return cached
   const codes = getEncoding(encoding)
-  const incomplete = codes.includes(0xff_fd)
+  const incomplete = codes.includes(r)
   let map
   const mapper = (arr, start = 0) => {
     if (!map) {
@@ -63,10 +65,12 @@ export function encodingMapper(encoding) {
 export function encodingDecoder(encoding) {
   const cached = decoders.get(encoding)
   if (cached) return cached
+  const isLatin1 = encoding === 'iso-8859-1'
+  if (isLatin1 && !nativeDecoder) return (arr, loose = false) => decodeLatin1(arr) // native decoder is faster for ascii below
   let strings
   const codes = getEncoding(encoding)
-  const incomplete = codes.includes(0xff_fd)
+  const incomplete = codes.includes(r)
   const decoder = (arr, loose = false) => {
     if (!strings) {
       const allCodes = Array.from({ length: 128 }, (_, i) => i).concat(codes)
@@ -76,6 +80,7 @@ export function encodingDecoder(encoding) {
     const prefixLen = asciiPrefix(arr)
     if (prefixLen === arr.length) return decodeAscii(arr)
+    if (isLatin1) return decodeLatin1(arr) // TODO: check if decodeAscii with subarray is faster for small prefixes too
     const prefix = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
     const suffix = decode2string(arr, prefix.length, arr.length, strings)
     if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
@@ -85,3 +90,21 @@ export function encodingDecoder(encoding) {
   decoders.set(encoding, decoder)
   return decoder
 }
+export function encodeMap(encoding) {
+  const cached = encmaps.get(encoding)
+  if (cached) return cached
+  const codes = getEncoding(encoding)
+  let max = 128
+  while (codes.length < 128) codes.push(128 + codes.length)
+  for (const code of codes) if (code > max && code !== r) max = code
+  const map = new Uint8Array(max + 1) // < 10 KiB for all except macintosh, 63 KiB for macintosh
+  for (let i = 0; i < 128; i++) {
+    map[i] = i
+    if (codes[i] !== r) map[codes[i]] = 128 + i
+  }
+  encmaps.set(encoding, map)
+  return map
+}

package/fallback/utf8.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import { encodeAsciiPrefix } from './latin1.js'
 export const E_STRICT = 'Input is not well-formed utf8'
 export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
@@ -152,9 +154,10 @@ export function encode(string, loose) {
   const length = string.length
   let small = true
   let bytes = new Uint8Array(length) // assume ascii
-  let p = 0
-  for (let i = 0; i < length; i++) {
+  let i = encodeAsciiPrefix(bytes, string)
+  let p = i
+  for (; i < length; i++) {
     let code = string.charCodeAt(i)
     if (code < 0x80) {
       bytes[p++] = code

package/hex.node.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { assertUint8 } from './assert.js'
 import { typedView } from './array.js'
+import { E_STRING } from './fallback/_utils.js'
 import { E_HEX } from './fallback/hex.js'
 if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
@@ -19,7 +20,7 @@ export function toHex(arr) {
 export const fromHex = Uint8Array.fromHex
   ? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
   : (str, format = 'uint8') => {
-      if (typeof str !== 'string') throw new TypeError('Input is not a string')
+      if (typeof str !== 'string') throw new TypeError(E_STRING)
       if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
       if (denoBug && /[^\dA-Fa-f]/.test(str)) throw new SyntaxError(E_HEX)
       const buf = Buffer.from(str, 'hex') // will stop on first non-hex character, so we can just validate length

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exodus/bytes",
-  "version": "1.6.0",
+  "version": "1.8.0",
   "description": "Various operations on Uint8Array data",
   "scripts": {
     "lint": "eslint .",
@@ -72,7 +72,9 @@
     "/bech32.js",
     "/bigint.js",
     "/encoding.js",
+    "/encoding.d.ts",
     "/encoding-lite.js",
+    "/encoding-lite.d.ts",
     "/hex.js",
     "/hex.d.ts",
     "/hex.node.js",
@@ -117,8 +119,14 @@
       "node": "./single-byte.node.js",
       "default": "./single-byte.js"
     },
-    "./encoding.js": "./encoding.js",
-    "./encoding-lite.js": "./encoding-lite.js",
+    "./encoding.js": {
+      "types": "./encoding.d.ts",
+      "default": "./encoding.js"
+    },
+    "./encoding-lite.js": {
+      "types": "./encoding-lite.d.ts",
+      "default": "./encoding-lite.js"
+    },
     "./utf16.js": {
       "node": "./utf16.node.js",
       "default": "./utf16.js"
@@ -145,6 +153,7 @@
     "@exodus/prettier": "^1.0.0",
     "@exodus/test": "^1.0.0-rc.109",
     "@noble/hashes": "^2.0.1",
+    "@petamoriken/float16": "^3.9.3",
     "@scure/base": "^1.2.6",
     "@stablelib/base64": "^2.0.1",
     "@stablelib/hex": "^2.0.1",
@@ -172,7 +181,7 @@
     "typescript": "^5.9.3",
     "uint8array-tools": "^0.0.9",
     "utf8": "^3.0.0",
-    "whatwg-encoding": "^3.1.1",
+    "web-streams-polyfill": "^4.2.0",
     "wif": "^5.0.0"
   },
   "prettier": "@exodus/prettier",

package/single-byte.js CHANGED Viewed

@@ -1,15 +1,17 @@
 import { assertUint8 } from './assert.js'
-import { canDecoders } from './fallback/_utils.js'
-import { assertEncoding, encodingDecoder } from './fallback/single-byte.js'
+import { canDecoders, nativeEncoder, isHermes, skipWeb, E_STRING } from './fallback/_utils.js'
+import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
+import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
-const { TextDecoder } = globalThis
+const { TextDecoder, btoa } = globalThis
 let windows1252works
 // prettier-ignore
 const skipNative = new Set([
-  'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
+  'iso-8859-1', 'iso-8859-9', 'iso-8859-11', // non-WHATWG
   'iso-8859-6', 'iso-8859-8', 'iso-8859-8-i', // slow in all 3 engines
+  'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
 ])
 function shouldUseNative(enc) {
@@ -34,6 +36,7 @@ function shouldUseNative(enc) {
 }
 export function createSinglebyteDecoder(encoding, loose = false) {
+  if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
   assertEncoding(encoding)
   if (canDecoders && shouldUseNative(encoding)) {
@@ -56,4 +59,79 @@ export function createSinglebyteDecoder(encoding, loose = false) {
   }
 }
+const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
+function encode(s, m) {
+  const len = s.length
+  const x = new Uint8Array(len)
+  let i = nativeEncoder ? 0 : encodeAsciiPrefix(x, s)
+  if (!isHermes) {
+    for (const len3 = len - 3; i < len3; i += 4) {
+      const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
+      const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
+      if ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3)) throw new TypeError(E_STRICT)
+      x[i] = c0
+      x[i + 1] = c1
+      x[i + 2] = c2
+      x[i + 3] = c3
+    }
+  }
+  for (; i < len; i++) {
+    const x0 = s.charCodeAt(i)
+    const c0 = m[x0]
+    if (!c0 && x0) return null
+    x[i] = c0
+  }
+  return x
+}
+// fromBase64+btoa path is faster on everything where fromBase64 is fast
+const useLatin1btoa = Uint8Array.fromBase64 && btoa && !skipWeb
+export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
+  // TODO: replacement, truncate (replacement will need varying length)
+  if (mode !== 'fatal') throw new Error('Unsupported mode')
+  const m = encodeMap(encoding) // asserts
+  const isLatin1 = encoding === 'iso-8859-1'
+  // No single-byte encoder produces surrogate pairs, so any surrogate is invalid
+  // This needs special treatment only to decide how many replacement chars to output, one or two
+  // Not much use in running isWellFormed, most likely cause of error is unmapped chars, not surrogate pairs
+  return (s) => {
+    if (typeof s !== 'string') throw new TypeError(E_STRING)
+    if (isLatin1) {
+      // max limit is to not produce base64 strings that are too long
+      if (useLatin1btoa && s.length >= 1024 && s.length < 1e8) {
+        try {
+          return Uint8Array.fromBase64(btoa(s)) // fails on non-latin1
+        } catch {
+          throw new TypeError(E_STRICT)
+        }
+      }
+      if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
+      return encodeLatin1(s)
+    }
+    // Instead of an ASCII regex check, encode optimistically - this is faster
+    // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
+    if (nativeEncoder && !NON_LATIN.test(s)) {
+      try {
+        return encodeAscii(s, E_STRICT)
+      } catch {}
+    }
+    const res = encode(s, m)
+    if (!res) throw new TypeError(E_STRICT)
+    return res
+  }
+}
+export const latin1toString = createSinglebyteDecoder('iso-8859-1')
+export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
 export const windows1252toString = createSinglebyteDecoder('windows-1252')
+export const windows1252fromString = createSinglebyteEncoder('windows-1252')

package/single-byte.node.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { assertUint8 } from './assert.js'
 import { isAscii } from 'node:buffer'
-import { isDeno, isLE, toBuf } from './fallback/_utils.js'
+import { isDeno, isLE, toBuf, E_STRING } from './fallback/_utils.js'
 import { asciiPrefix } from './fallback/latin1.js'
-import { encodingMapper, encodingDecoder, E_STRICT } from './fallback/single-byte.js'
+import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
 function latin1Prefix(arr, start) {
   let p = start | 0
@@ -22,7 +22,7 @@ function latin1Prefix(arr, start) {
 }
 export function createSinglebyteDecoder(encoding, loose = false) {
-  const latin1path = encoding === 'windows-1252'
+  if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
   if (isDeno) {
     const jsDecoder = encodingDecoder(encoding) // asserts
     return (arr) => {
@@ -33,11 +33,13 @@ export function createSinglebyteDecoder(encoding, loose = false) {
     }
   }
+  const isLatin1 = encoding === 'iso-8859-1'
+  const latin1path = encoding === 'windows-1252'
   const { incomplete, mapper } = encodingMapper(encoding) // asserts
   return (arr) => {
     assertUint8(arr)
     if (arr.byteLength === 0) return ''
-    if (isAscii(arr)) return toBuf(arr).latin1Slice(0, arr.byteLength) // .latin1Slice is faster than .asciiSlice
+    if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
     // Node.js TextDecoder is broken, so we can't use it. It's also slow anyway
@@ -57,4 +59,59 @@ export function createSinglebyteDecoder(encoding, loose = false) {
   }
 }
+const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
+export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
+  // TODO: replacement, truncate (replacement will need varying length)
+  if (mode !== 'fatal') throw new Error('Unsupported mode')
+  const m = encodeMap(encoding) // asserts
+  const isLatin1 = encoding === 'iso-8859-1'
+  return (s) => {
+    if (typeof s !== 'string') throw new TypeError(E_STRING)
+    if (isLatin1) {
+      if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
+      const b = Buffer.from(s, 'latin1')
+      return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
+    }
+    // Instead of an ASCII regex check, encode optimistically - this is faster
+    // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
+    if (!NON_LATIN.test(s)) {
+      const b = Buffer.from(s, 'utf8') // ascii/latin1 coerces, we need to check
+      if (b.length === s.length) return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
+    }
+    const len = s.length
+    let i = 0
+    const b = Buffer.from(s, 'utf-16le') // aligned
+    if (!isLE) b.swap16()
+    const x = new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
+    for (const len3 = len - 3; i < len3; i += 4) {
+      const x0 = x[i], x1 = x[i + 1], x2 = x[i + 2], x3 = x[i + 3] // prettier-ignore
+      const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
+      if (!(c0 && c1 && c2 && c3) && ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3))) {
+        throw new TypeError(E_STRICT)
+      }
+      x[i] = c0
+      x[i + 1] = c1
+      x[i + 2] = c2
+      x[i + 3] = c3
+    }
+    for (; i < len; i++) {
+      const x0 = x[i]
+      const c0 = m[x0]
+      if (!c0 && x0) throw new TypeError(E_STRICT)
+      x[i] = c0
+    }
+    return new Uint8Array(x)
+  }
+}
+export const latin1toString = createSinglebyteDecoder('iso-8859-1')
+export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
 export const windows1252toString = createSinglebyteDecoder('windows-1252')
+export const windows1252fromString = createSinglebyteEncoder('windows-1252')

package/utf16.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as js from './fallback/utf16.js'
-import { canDecoders, isLE } from './fallback/_utils.js'
+import { canDecoders, isLE, E_STRING } from './fallback/_utils.js'
 const { TextDecoder } = globalThis // Buffer is optional
 const ignoreBOM = true
@@ -18,7 +18,7 @@ const { E_STRICT, E_STRICT_UNICODE } = js
 const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
 function encode(str, loose = false, format = 'uint16') {
-  if (typeof str !== 'string') throw new TypeError('Input is not a string')
+  if (typeof str !== 'string') throw new TypeError(E_STRING)
   if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
     throw new TypeError('Unknown format')
   }

package/utf16.node.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { isDeno, isLE } from './fallback/_utils.js'
+import { isDeno, isLE, E_STRING } from './fallback/_utils.js'
 import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf16.js'
 if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
@@ -9,7 +9,7 @@ const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
 // Unlike utf8, operates on Uint16Arrays by default
 function encode(str, loose = false, format = 'uint16') {
-  if (typeof str !== 'string') throw new TypeError('Input is not a string')
+  if (typeof str !== 'string') throw new TypeError(E_STRING)
   if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
     throw new TypeError('Unknown format')
   }

package/utf8.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { assertUint8 } from './assert.js'
 import { typedView } from './array.js'
-import { isHermes, nativeDecoder, nativeEncoder } from './fallback/_utils.js'
+import { isHermes, nativeDecoder, nativeEncoder, E_STRING } from './fallback/_utils.js'
 import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
 import * as js from './fallback/utf8.js'
@@ -44,7 +44,7 @@ function deLoose(str, loose, res) {
 }
 function encode(str, loose = false) {
-  if (typeof str !== 'string') throw new TypeError('Input is not a string')
+  if (typeof str !== 'string') throw new TypeError(E_STRING)
   if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
   if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
   // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder

package/utf8.node.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { assertUint8 } from './assert.js'
 import { typedView } from './array.js'
+import { E_STRING } from './fallback/_utils.js'
 import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf8.js'
 import { isAscii } from 'node:buffer'
@@ -17,7 +18,7 @@ try {
 }
 function encode(str, loose = false) {
-  if (typeof str !== 'string') throw new TypeError('Input is not a string')
+  if (typeof str !== 'string') throw new TypeError(E_STRING)
   const strLength = str.length
   if (strLength === 0) return new Uint8Array() // faster than Uint8Array.of
   let res