npm - @exodus/bytes - Versions diffs - 1.0.0-rc.8 → 1.0.0-rc.9 - Mend

@exodus/bytes 1.0.0-rc.8 → 1.0.0-rc.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +128 -4
package/encoding.js +234 -0
package/fallback/_utils.js +88 -10
package/fallback/encoding.labels.js +46 -0
package/fallback/encoding.util.js +34 -0
package/fallback/hex.js +2 -70
package/fallback/latin1.js +2 -1
package/fallback/multi-byte.encodings.cjs +1 -0
package/fallback/multi-byte.encodings.json +545 -0
package/fallback/multi-byte.js +449 -0
package/fallback/multi-byte.table.js +114 -0
package/fallback/single-byte.encodings.js +45 -0
package/fallback/single-byte.js +83 -0
package/fallback/utf16.js +180 -0
package/hex.node.js +2 -0
package/multi-byte.js +13 -0
package/multi-byte.node.js +25 -0
package/package.json +39 -8
package/single-byte.js +55 -0
package/single-byte.node.js +62 -0
package/utf16.js +73 -0
package/utf16.node.js +79 -0
package/utf8.js +7 -9
package/utf8.node.js +8 -5

package/README.md CHANGED Viewed

@@ -1,27 +1,151 @@
 # `@exodus/bytes`
-`Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, and `utf8`
+`Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
-[Fast](./Performance.md)
+## Strict
-Performs proper input validation
+Performs proper input validation, ensures no garbage-in-garbage-out
+Tested on Node.js, Deno, Bun, browsers (including Servo), Hermes, QuickJS and barebone engines in CI [(how?)](https://github.com/ExodusMovement/test#exodustest)
+## Fast
+* `10-20x` faster than `Buffer` polyfill
+* `2-10x` faster than `iconv-lite`
+The above was for the js fallback
+It's up to `100x` when native impl is available \
+e.g. in `utf8fromString` on Hermes / React Native or `fromHex` in Chrome
+Also:
+* `3-8x` faster than `bs58`
+* `10-30x` faster than `@scure/base` (or `>100x` on Node.js <25)
+* Faster in `utf8toString` / `utf8fromString` than `Buffer` or `TextDecoder` / `TextEncoder` on Node.js
+See [Performance](./Performance.md) for more info
+## TextEncoder / TextDecoder polyfill
+```js
+import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
+```
+Less than half the bundle size of [text-encoding](https://npmjs.com/text-encoding), [whatwg-encoding](https://npmjs.com/whatwg-encoding) or [iconv-lite](https://npmjs.com/iconv-lite) (gzipped or not), and [is much faster](#fast).
+Spec compliant, passing WPT and covered with extra tests.
+Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).
+[Faster than Node.js native implementation on Node.js](https://github.com/nodejs/node/issues/61041#issuecomment-3649242024).
+### Caveat: `TextDecoder` / `TextEncoder` APIs are lossy by default per spec
+_These are only provided as a compatibility layer, prefer hardened APIs instead in new code._
+ * `TextDecoder` can (and should) be used with `{ fatal: true }` option for all purposes demanding correctness / lossless transforms
+ * `TextEncoder` does not support a fatal mode per spec, it always performs replacement.
+   That is not suitable for hashing, cryptography or consensus applications.\
+   Otherwise there would be non-equal strings with equal signatures and hashes — the collision is caused by the lossy transform of a JS string to bytes.
+   Those also survive e.g. `JSON.stringify`/`JSON.parse` or being sent over network.
+   Use strict APIs in new applications, see `utf8fromString` / `utf16fromString` below.\
+   Those throw on non-well-formed strings by default.
 ## API
+### `@exodus/bytes/utf8.js`
+##### `utf8fromString(str, format = 'uint8')`
+##### `utf8fromStringLoose(str, format = 'uint8')`
+##### `utf8toString(arr)`
+##### `utf8toStringLoose(arr)`
+### `@exodus/bytes/utf16.js`
+##### `utf16fromString(str, format = 'uint16')`
+##### `utf16fromStringLoose(str, format = 'uint16')`
+##### `utf16toString(arr, 'uint16')`
+##### `utf16toStringLoose(arr, 'uint16')`
+### `@exodus/bytes/single-byte.js`
+##### `createSinglebyteDecoder(encoding, loose = false)`
+Create a decoder for a supported one-byte `encoding`.
+Returns a function `decode(arr)` that decodes bytes to a string.
+### `@exodus/bytes/multi-byte.js`
+##### `createMultibyteDecoder(encoding, loose = false)`
+Create a decoder for a supported legacy multi-byte `encoding`.
+Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
+That function will have state while `stream = true` is used.
+##### `windows1252toString(arr)`
+Decode `windows-1252` bytes to a string.
+Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
+There is no loose variant for this encoding, all bytes can be decoded.
+Same as `windows1252toString = createSinglebyteDecoder('windows-1252')`.
 ### `@exodus/bytes/hex.js`
+##### `toHex(arr)`
+##### `fromHex(string)`
 ### `@exodus/bytes/base64.js`
+##### `toBase64(arr, { padding = true })`
+##### `toBase64url(arr, { padding = false })`
+##### `fromBase64(str, { format = 'uint8', padding = 'both' })`
+##### `fromBase64url(str, { format = 'uint8', padding = false })`
+##### `fromBase64any(str, { format = 'uint8', padding = 'both' })`
 ### `@exodus/bytes/base32.js`
-### `@exodus/bytes/hex.js`
+##### `toBase32(arr, { padding = false })`
+##### `toBase32hex(arr, { padding = false })`
+##### `fromBase32(str, { format = 'uint8', padding = 'both' })`
+##### `fromBase32hex(str, { format = 'uint8', padding = 'both' })`
+### `@exodus/bytes/bech32.js`
+##### `getPrefix(str, limit = 90)`
+##### `toBech32(prefix, bytes, limit = 90)`
+##### `fromBech32(str, limit = 90)`
+##### `toBech32m(prefix, bytes, limit = 90)`
+##### `fromBech32m(str, limit = 90)`
 ### `@exodus/bytes/base58.js`
+##### `toBase58(arr)`
+##### `fromBase58(str, format = 'uint8')`
 ### `@exodus/bytes/base58check.js`
+##### `async toBase58check(arr)`
+##### `toBase58checkSync(arr)`
+##### `async fromBase58check(str, format = 'uint8')`
+##### `fromBase58checkSync(str, format = 'uint8')`
+##### `makeBase58check(hashAlgo, hashAlgoSync)`
 ### `@exodus/bytes/wif.js`
+##### `async fromWifString(string, version)`
+##### `fromWifStringSync(string, version)`
+##### `async toWifString({ version, privateKey, compressed })`
+##### `toWifStringSync({ version, privateKey, compressed })`
 ## License
 [MIT](./LICENSE)

package/encoding.js ADDED Viewed

@@ -0,0 +1,234 @@
+// A limited subset of TextEncoder / TextDecoder API
+// We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that
+// We are also faster than Node.js built-in on both TextEncoder and TextDecoder
+/* eslint-disable @exodus/import/no-unresolved */
+import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
+import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
+import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
+import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
+import { multibyteSupported } from './fallback/multi-byte.js'
+import labels from './fallback/encoding.labels.js'
+import { unfinishedBytes } from './fallback/encoding.util.js'
+const E_OPTIONS = 'The "options" argument must be of type object'
+const replacementChar = '\uFFFD'
+let labelsMap
+const normalizeEncoding = (enc) => {
+  // fast path
+  if (enc === 'utf-8' || enc === 'utf8') return 'utf-8'
+  if (enc === 'windows-1252' || enc === 'ascii' || enc === 'latin1') return 'windows-1252'
+  // full map
+  let low = `${enc}`.toLowerCase()
+  if (low !== low.trim()) low = low.replace(/^[\t\n\f\r ]+/, '').replace(/[\t\n\f\r ]+$/, '') // only ASCII whitespace
+  if (Object.hasOwn(labels, low) && low !== 'replacement') return low
+  if (!labelsMap) {
+    labelsMap = new Map()
+    for (const [label, aliases] of Object.entries(labels)) {
+      for (const alias of aliases) labelsMap.set(alias, label)
+    }
+  }
+  const mapped = labelsMap.get(low)
+  if (mapped && mapped !== 'replacement') return mapped
+  throw new RangeError('Unknown encoding')
+}
+const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
+const fromSource = (x) => {
+  if (x instanceof Uint8Array) return x
+  if (x instanceof ArrayBuffer) return new Uint8Array(x)
+  if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
+  if (globalThis.SharedArrayBuffer && x instanceof globalThis.SharedArrayBuffer) {
+    return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
+  }
+  throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
+}
+function unicodeDecoder(encoding, loose) {
+  if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
+  const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
+  return loose ? (u) => utf16toStringLoose(u, form) : (u) => utf16toString(u, form)
+}
+export class TextDecoder {
+  #decode
+  #unicode
+  #multibyte
+  #chunk
+  #canBOM
+  constructor(encoding = 'utf-8', options = {}) {
+    if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
+    const enc = normalizeEncoding(encoding)
+    define(this, 'encoding', enc)
+    define(this, 'fatal', Boolean(options.fatal))
+    define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
+    this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
+    this.#multibyte = !this.#unicode && enc !== 'windows-1252' && multibyteSupported(enc)
+    this.#canBOM = this.#unicode && !this.ignoreBOM
+  }
+  get [Symbol.toStringTag]() {
+    return 'TextDecoder'
+  }
+  decode(input, options = {}) {
+    if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
+    const stream = Boolean(options.stream)
+    let u = input === undefined ? new Uint8Array() : fromSource(input)
+    if (this.#unicode) {
+      let prefix
+      if (this.#chunk) {
+        if (u.length === 0) {
+          if (stream) return '' // no change
+          u = this.#chunk // process as final chunk to handle errors and state changes
+        } else if (u.length < 3) {
+          // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
+          const a = new Uint8Array(u.length + this.#chunk.length)
+          a.set(this.#chunk)
+          a.set(u, this.#chunk.length)
+          u = a
+        } else {
+          // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
+          const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
+          t.set(this.#chunk)
+          t.set(u.subarray(0, 3), this.#chunk.length)
+          // Stop at the first offset where unfinished bytes reaches 0 or fits into u
+          // If that doesn't happen (u too short), just concat chunk and u completely
+          for (let i = 1; i <= 3; i++) {
+            const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
+            if (unfinished <= i) {
+              // Always reachable at 3, but we still need 'unfinished' value for it
+              const add = i - unfinished // 0-3
+              prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
+              if (add > 0) u = u.subarray(add)
+              break
+            }
+          }
+        }
+        this.#chunk = null
+      } else if (u.byteLength === 0) {
+        if (!stream) this.#canBOM = !this.ignoreBOM
+        return ''
+      }
+      // For non-stream utf-8 we don't have to do this as it matches utf8toStringLoose already
+      // For non-stream loose utf-16 we still have to do this as this API supports uneven byteLength unlike utf16toStringLoose
+      let suffix = ''
+      if (stream || (!this.fatal && this.encoding !== 'utf-8')) {
+        const trail = unfinishedBytes(u, u.byteLength, this.encoding)
+        if (trail > 0) {
+          if (stream) {
+            this.#chunk = Uint8Array.from(u.subarray(-trail)) // copy
+          } else {
+            // non-fatal mode as already checked
+            suffix = replacementChar
+          }
+          u = u.subarray(0, -trail)
+        }
+      }
+      if (this.#canBOM) {
+        const bom = this.#findBom(prefix ?? u)
+        if (bom) {
+          if (stream) this.#canBOM = false
+          if (prefix) {
+            prefix = prefix.subarray(bom)
+          } else {
+            u = u.subarray(bom)
+          }
+        }
+      }
+      if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
+      try {
+        const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
+        if (res.length > 0 && stream) this.#canBOM = false
+        if (!stream) this.#canBOM = !this.ignoreBOM
+        return res
+      } catch (err) {
+        this.#chunk = null // reset unfinished chunk on errors
+        throw err
+      }
+      // eslint-disable-next-line no-else-return
+    } else if (this.#multibyte) {
+      if (!this.#decode) this.#decode = createMultibyteDecoder(this.encoding, !this.fatal) // can contain state!
+      return this.#decode(u, stream)
+    } else {
+      if (!this.#decode) this.#decode = createSinglebyteDecoder(this.encoding, !this.fatal)
+      return this.#decode(u)
+    }
+  }
+  #findBom(u) {
+    switch (this.encoding) {
+      case 'utf-8':
+        return u.byteLength >= 3 && u[0] === 0xef && u[1] === 0xbb && u[2] === 0xbf ? 3 : 0
+      case 'utf-16le':
+        return u.byteLength >= 2 && u[0] === 0xff && u[1] === 0xfe ? 2 : 0
+      case 'utf-16be':
+        return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
+    }
+    throw new Error('Unreachable')
+  }
+}
+export class TextEncoder {
+  constructor() {
+    define(this, 'encoding', 'utf-8')
+  }
+  get [Symbol.toStringTag]() {
+    return 'TextEncoder'
+  }
+  encode(str = '') {
+    if (typeof str !== 'string') str = `${str}`
+    const res = utf8fromStringLoose(str)
+    return res.byteOffset === 0 ? res : res.slice(0) // Ensure 0-offset. TODO: do we need this?
+  }
+  encodeInto(str, target) {
+    if (typeof str !== 'string') str = `${str}`
+    if (!(target instanceof Uint8Array)) throw new TypeError('Target must be an Uint8Array')
+    if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
+    let u8 = utf8fromStringLoose(str) // TODO: perf?
+    let read
+    if (target.length >= u8.length) {
+      read = str.length
+    } else if (u8.length === str.length) {
+      if (u8.length > target.length) u8 = u8.subarray(0, target.length) // ascii can be truncated
+      read = u8.length
+    } else {
+      u8 = u8.subarray(0, target.length)
+      const unfinished = unfinishedBytes(u8, u8.length, 'utf-8')
+      if (unfinished > 0) u8 = u8.subarray(0, u8.length - unfinished)
+      // We can do this because loose str -> u8 -> str preserves length, unlike loose u8 -> str -> u8
+      // Each unpaired surrogate (1 charcode) is replaced with a single charcode
+      read = utf8toStringLoose(u8).length // FIXME: Converting back is very inefficient
+    }
+    try {
+      target.set(u8)
+    } catch {
+      return { read: 0, written: 0 } // see above, likely detached but no .detached property support
+    }
+    return { read, written: u8.length }
+  }
+}

package/fallback/_utils.js CHANGED Viewed

@@ -1,17 +1,30 @@
 const { Buffer, TextEncoder, TextDecoder } = globalThis
 const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
-const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
-const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
-const nativeDecoder = isNative(TextDecoder) ? new TextDecoder('utf8', { ignoreBOM: true }) : null
-const nativeBuffer = haveNativeBuffer ? Buffer : null
-const isHermes = Boolean(globalThis.HermesInternal)
+let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
+if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
+export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
+export const nativeDecoder = isNative(TextDecoder)
+  ? new TextDecoder('utf-8', { ignoreBOM: true })
+  : null
+export const nativeBuffer = haveNativeBuffer ? Buffer : null
+export const isHermes = Boolean(globalThis.HermesInternal)
+export const isDeno = Boolean(globalThis.Deno)
+export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
 // Actually windows-1252, compatible with ascii and latin1 decoding
 // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
 // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
-const nativeDecoderLatin1 = isNative(TextDecoder)
-  ? new TextDecoder('latin1', { ignoreBOM: true })
-  : null
+let nativeDecoderLatin1impl = null
+if (isNative(TextDecoder)) {
+  // Not all barebone engines with TextDecoder support something except utf-8, detect
+  try {
+    nativeDecoderLatin1impl = new TextDecoder('latin1', { ignoreBOM: true })
+  } catch {}
+}
+export const nativeDecoderLatin1 = nativeDecoderLatin1impl
+export const canDecoders = Boolean(nativeDecoderLatin1impl)
 // Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
 // Refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1994067 (and linked issues), fixed in 146
@@ -35,6 +48,71 @@ function shouldSkipBuiltins() {
   return false // eslint-disable-line no-unreachable
 }
-const skipWeb = shouldSkipBuiltins()
+export const skipWeb = shouldSkipBuiltins()
+function decodePartAddition(a, start, end, m) {
+  let o = ''
+  let i = start
+  for (const last3 = end - 3; i < last3; i += 4) {
+    const x0 = a[i]
+    const x1 = a[i + 1]
+    const x2 = a[i + 2]
+    const x3 = a[i + 3]
+    o += m[x0]
+    o += m[x1]
+    o += m[x2]
+    o += m[x3]
+  }
+  while (i < end) o += m[a[i++]]
+  return o
+}
+// Decoding with templates is faster on Hermes
+function decodePartTemplates(a, start, end, m) {
+  let o = ''
+  let i = start
+  for (const last15 = end - 15; i < last15; i += 16) {
+    const x0 = a[i]
+    const x1 = a[i + 1]
+    const x2 = a[i + 2]
+    const x3 = a[i + 3]
+    const x4 = a[i + 4]
+    const x5 = a[i + 5]
+    const x6 = a[i + 6]
+    const x7 = a[i + 7]
+    const x8 = a[i + 8]
+    const x9 = a[i + 9]
+    const x10 = a[i + 10]
+    const x11 = a[i + 11]
+    const x12 = a[i + 12]
+    const x13 = a[i + 13]
+    const x14 = a[i + 14]
+    const x15 = a[i + 15]
+    o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
+  }
+  while (i < end) o += m[a[i++]]
+  return o
+}
-export { nativeEncoder, nativeDecoder, nativeDecoderLatin1, nativeBuffer, isHermes, skipWeb }
+const decodePart = isHermes ? decodePartTemplates : decodePartAddition
+export function decode2string(arr, start, end, m) {
+  if (start - end > 30_000) {
+    // Limit concatenation to avoid excessive GC
+    // Thresholds checked on Hermes for toHex
+    const concat = []
+    for (let i = start; i < end; ) {
+      const step = i + 500
+      const iNext = step > end ? end : step
+      concat.push(decodePart(arr, i, iNext, m))
+      i = iNext
+    }
+    const res = concat.join('')
+    concat.length = 0
+    return res
+  }
+  return decodePart(arr, start, end, m)
+}

package/fallback/encoding.labels.js ADDED Viewed

@@ -0,0 +1,46 @@
+// See https://encoding.spec.whatwg.org/#names-and-labels
+/* eslint-disable @exodus/export-default/named */
+// prettier-ignore
+export default {
+  'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
+  ibm866: ['866', 'cp866', 'csibm866'],
+  'iso-8859-2': ['csisolatin2', 'iso-ir-101', 'iso8859-2', 'iso88592', 'iso_8859-2', 'iso_8859-2:1987', 'l2', 'latin2'],
+  'iso-8859-3': ['csisolatin3', 'iso-ir-109', 'iso8859-3', 'iso88593', 'iso_8859-3', 'iso_8859-3:1988', 'l3', 'latin3'],
+  'iso-8859-4': ['csisolatin4', 'iso-ir-110', 'iso8859-4', 'iso88594', 'iso_8859-4', 'iso_8859-4:1988', 'l4', 'latin4'],
+  'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144', 'iso8859-5', 'iso88595', 'iso_8859-5', 'iso_8859-5:1988'],
+  'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127', 'iso8859-6', 'iso88596', 'iso_8859-6', 'iso_8859-6:1987'],
+  'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'iso8859-7', 'iso88597', 'iso_8859-7', 'iso_8859-7:1987', 'sun_eu_greek'],
+  'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'iso8859-8', 'iso88598', 'iso_8859-8', 'iso_8859-8:1988', 'visual'],
+  'iso-8859-8-i': ['csiso88598i', 'logical'],
+  'iso-8859-10': ['csisolatin6', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'],
+  'iso-8859-13': ['iso8859-13', 'iso885913'],
+  'iso-8859-14': ['iso8859-14', 'iso885914'],
+  'iso-8859-15': ['csisolatin9', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
+  'iso-8859-16': [],
+  'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
+  'koi8-u': ['koi8-ru'],
+  macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
+  'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
+  'windows-1250': ['cp1250', 'x-cp1250'],
+  'windows-1251': ['cp1251', 'x-cp1251'],
+  'windows-1252': ['ansi_x3.4-1968', 'ascii', 'cp1252', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii', 'x-cp1252'],
+  'windows-1253': ['cp1253', 'x-cp1253'],
+  'windows-1254': ['cp1254', 'csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5', 'x-cp1254'],
+  'windows-1255': ['cp1255', 'x-cp1255'],
+  'windows-1256': ['cp1256', 'x-cp1256'],
+  'windows-1257': ['cp1257', 'x-cp1257'],
+  'windows-1258': ['cp1258', 'x-cp1258'],
+  'x-mac-cyrillic': ['x-mac-ukrainian'],
+  gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
+  gb18030: [],
+  big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
+  'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
+  'iso-2022-jp': ['csiso2022jp'],
+  shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
+  'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
+  replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
+  'utf-16be': ['unicodefffe'],
+  'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
+  'x-user-defined': [],
+}

package/fallback/encoding.util.js ADDED Viewed

@@ -0,0 +1,34 @@
+export function unfinishedBytes(u, len, enc) {
+  switch (enc) {
+    case 'utf-8': {
+      // 0-3
+      let p = 0
+      while (p < 2 && p < len && (u[len - p - 1] & 0xc0) === 0x80) p++ // go back 0-2 trailing bytes
+      if (p === len) return 0 // no space for lead
+      const l = u[len - p - 1]
+      if (l < 0xc2 || l > 0xf4) return 0 // not a lead
+      if (p === 0) return 1 // nothing to recheck, we have only lead, return it. 2-byte must return here
+      if (l < 0xe0 || (l < 0xf0 && p >= 2)) return 0 // 2-byte, or 3-byte or less and we already have 2 trailing
+      const lower = l === 0xf0 ? 0x90 : l === 0xe0 ? 0xa0 : 0x80
+      const upper = l === 0xf4 ? 0x8f : l === 0xed ? 0x9f : 0xbf
+      const n = u[len - p]
+      return n >= lower && n <= upper ? p + 1 : 0
+    }
+    case 'utf-16le':
+    case 'utf-16be': {
+      // 0-3
+      let p = 0
+      if (len % 2 !== 0) p++ // uneven bytes
+      const l = len - p - 1
+      if (len - p >= 2) {
+        const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
+        if (last >= 0xd8_00 && last < 0xdc_00) p += 2 // lone lead
+      }
+      return p
+    }
+  }
+  throw new Error('Unsupported encoding')
+}

package/fallback/hex.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { assertUint8 } from '../assert.js'
-import { nativeDecoder, nativeEncoder, isHermes } from './_utils.js'
+import { nativeDecoder, nativeEncoder, decode2string } from './_utils.js'
 import { encodeAscii, decodeAscii } from './latin1.js'
 let hexArray // array of 256 bytes converted to two-char hex strings
@@ -11,58 +11,6 @@ const allowed = '0123456789ABCDEFabcdef'
 export const E_HEX = 'Input is not a hex string'
-function toHexPartAddition(a, start, end) {
-  let o = ''
-  let i = start
-  const h = hexArray
-  for (const last3 = end - 3; i < last3; i += 4) {
-    const x0 = a[i]
-    const x1 = a[i + 1]
-    const x2 = a[i + 2]
-    const x3 = a[i + 3]
-    o += h[x0]
-    o += h[x1]
-    o += h[x2]
-    o += h[x3]
-  }
-  while (i < end) o += h[a[i++]]
-  return o
-}
-// Optimiziation for Hermes which is the main user of fallback
-function toHexPartTemplates(a, start, end) {
-  let o = ''
-  let i = start
-  const h = hexArray
-  for (const last15 = end - 15; i < last15; i += 16) {
-    const x0 = a[i]
-    const x1 = a[i + 1]
-    const x2 = a[i + 2]
-    const x3 = a[i + 3]
-    const x4 = a[i + 4]
-    const x5 = a[i + 5]
-    const x6 = a[i + 6]
-    const x7 = a[i + 7]
-    const x8 = a[i + 8]
-    const x9 = a[i + 9]
-    const x10 = a[i + 10]
-    const x11 = a[i + 11]
-    const x12 = a[i + 12]
-    const x13 = a[i + 13]
-    const x14 = a[i + 14]
-    const x15 = a[i + 15]
-    o += `${h[x0]}${h[x1]}${h[x2]}${h[x3]}${h[x4]}${h[x5]}${h[x6]}${h[x7]}${h[x8]}${h[x9]}${h[x10]}${h[x11]}${h[x12]}${h[x13]}${h[x14]}${h[x15]}`
-  }
-  while (i < end) o += h[a[i++]]
-  return o
-}
-// Using templates is significantly faster in Hermes and JSC
-// It's harder to detect JSC and not important anyway as it has native impl, so we detect only Hermes
-const toHexPart = isHermes ? toHexPartTemplates : toHexPartAddition
 export function toHex(arr) {
   assertUint8(arr)
@@ -100,23 +48,7 @@ export function toHex(arr) {
     return decodeAscii(oa)
   }
-  if (length > 30_000) {
-    // Limit concatenation to avoid excessive GC
-    // Thresholds checked on Hermes
-    const concat = []
-    for (let i = 0; i < length; ) {
-      const step = i + 500
-      const end = step > length ? length : step
-      concat.push(toHexPart(arr, i, end))
-      i = end
-    }
-    const res = concat.join('')
-    concat.length = 0
-    return res
-  }
-  return toHexPart(arr, 0, length)
+  return decode2string(arr, 0, length, hexArray)
 }
 export function fromHex(str) {