npm - @exodus/bytes - Versions diffs - 1.3.0 → 1.5.0 - Mend

@exodus/bytes 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +119 -29
package/fallback/_utils.js +1 -1
package/fallback/encoding.js +21 -3
package/fallback/multi-byte.js +285 -270
package/package.json +1 -1
package/wif.js +1 -1

package/README.md CHANGED Viewed

@@ -78,6 +78,13 @@ See [the list of encodings](https://encoding.spec.whatwg.org/#names-and-labels).
 ### `@exodus/bytes/utf8.js`
+```js
+import { utf8fromString, utf8toString } from '@exodus/bytes/utf8.js'
+// loose
+import { utf8fromStringLoose, utf8toStringLoose } from '@exodus/bytes/utf8.js'
+```
 ##### `utf8fromString(str, format = 'uint8')`
 ##### `utf8fromStringLoose(str, format = 'uint8')`
 ##### `utf8toString(arr)`
@@ -85,6 +92,13 @@ See [the list of encodings](https://encoding.spec.whatwg.org/#names-and-labels).
 ### `@exodus/bytes/utf16.js`
+```js
+import { utf16fromString, utf16toString } from '@exodus/bytes/utf16.js'
+// loose
+import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js'
+```
 ##### `utf16fromString(str, format = 'uint16')`
 ##### `utf16fromStringLoose(str, format = 'uint16')`
 ##### `utf16toString(arr, 'uint16')`
@@ -92,21 +106,26 @@ See [the list of encodings](https://encoding.spec.whatwg.org/#names-and-labels).
 ### `@exodus/bytes/single-byte.js`
-##### `createSinglebyteDecoder(encoding, loose = false)`
-Create a decoder for a supported one-byte `encoding`.
-Returns a function `decode(arr)` that decodes bytes to a string.
+```js
+import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
+import { windows1252toString } from '@exodus/bytes/single-byte.js'
+```
-### `@exodus/bytes/multi-byte.js`
+Decode the legacy single-byte encodings according to the [Encoding standard](https://encoding.spec.whatwg.org/)
+([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings) and
+[§14.5](https://encoding.spec.whatwg.org/#x-user-defined)).
-##### `createMultibyteDecoder(encoding, loose = false)`
+Supports all single-byte encodings listed in the standard:
+`ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
+`iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
+`macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
+`windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
-Create a decoder for a supported legacy multi-byte `encoding`.
+##### `createSinglebyteDecoder(encoding, loose = false)`
-Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
+Create a decoder for a supported one-byte `encoding`, given it's lowercased name `encoding`.
-That function will have state while `stream = true` is used.
+Returns a function `decode(arr)` that decodes bytes to a string.
 ##### `windows1252toString(arr)`
@@ -116,61 +135,132 @@ Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252
 There is no loose variant for this encoding, all bytes can be decoded.
-Same as `windows1252toString = createSinglebyteDecoder('windows-1252')`.
+Same as:
+```js
+const windows1252toString = createSinglebyteDecoder('windows-1252')
+```
+### `@exodus/bytes/multi-byte.js`
+```js
+import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
+```
+Decode the legacy multi-byte encodings according to the [Encoding standard](https://encoding.spec.whatwg.org/)
+([§10](https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(simplified)-encodings),
+[§11](https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(traditional)-encodings),
+[§12](https://encoding.spec.whatwg.org/#legacy-multi-byte-japanese-encodings),
+[§13](https://encoding.spec.whatwg.org/#legacy-multi-byte-korean-encodings)).
+Supports all legacy multi-byte encodings listed in the standard:
+`gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`.
+##### `createMultibyteDecoder(encoding, loose = false)`
+Create a decoder for a supported legacy multi-byte `encoding`, given it's lowercased name `encoding`.
+Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
+That function will have state while `stream = true` is used.
 ### `@exodus/bytes/bigint.js`
+```js
+import { fromBigInt, toBigInt } from '@exodus/bytes/bigint.js'
+```
 ##### `fromBigInt(bigint, { length, format = 'uint8' })`
 ##### `toBigInt(arr)`
 ### `@exodus/bytes/hex.js`
-##### `toHex(arr)`
+```js
+import { fromHex, toHex } from '@exodus/bytes/hex.js'
+```
 ##### `fromHex(string)`
+##### `toHex(arr)`
 ### `@exodus/bytes/base64.js`
-##### `toBase64(arr, { padding = true })`
-##### `toBase64url(arr, { padding = false })`
+```js
+import { fromBase64, toBase64 } from '@exodus/bytes/base64.js'
+import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js'
+import { fromBase64any } from '@exodus/bytes/base64.js'
+```
 ##### `fromBase64(str, { format = 'uint8', padding = 'both' })`
 ##### `fromBase64url(str, { format = 'uint8', padding = false })`
 ##### `fromBase64any(str, { format = 'uint8', padding = 'both' })`
+##### `toBase64(arr, { padding = true })`
+##### `toBase64url(arr, { padding = false })`
 ### `@exodus/bytes/base32.js`
-##### `toBase32(arr, { padding = false })`
-##### `toBase32hex(arr, { padding = false })`
+```js
+import { fromBase32, toBase32 } from '@exodus/bytes/base32.js'
+import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
+```
 ##### `fromBase32(str, { format = 'uint8', padding = 'both' })`
 ##### `fromBase32hex(str, { format = 'uint8', padding = 'both' })`
+##### `toBase32(arr, { padding = false })`
+##### `toBase32hex(arr, { padding = false })`
 ### `@exodus/bytes/bech32.js`
+```js
+import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js'
+import { fromBech32m, toBech32m } from '@exodus/bytes/base32.js'
+import { getPrefix } from '@exodus/bytes/base32.js'
+```
 ##### `getPrefix(str, limit = 90)`
-##### `toBech32(prefix, bytes, limit = 90)`
 ##### `fromBech32(str, limit = 90)`
-##### `toBech32m(prefix, bytes, limit = 90)`
+##### `toBech32(prefix, bytes, limit = 90)`
 ##### `fromBech32m(str, limit = 90)`
+##### `toBech32m(prefix, bytes, limit = 90)`
 ### `@exodus/bytes/base58.js`
-##### `toBase58(arr)`
+```js
+import { fromBase58, toBase58 } from '@exodus/bytes/base58.js'
+import { fromBase58xrp, toBase58xrp } from '@exodus/bytes/base58.js'
+```
 ##### `fromBase58(str, format = 'uint8')`
+##### `toBase58(arr)`
-##### `toBase58xrp(arr)`
 ##### `fromBase58xrp(str, format = 'uint8')`
+##### `toBase58xrp(arr)`
 ### `@exodus/bytes/base58check.js`
+```js
+import { fromBase58check, toBase58check } from '@exodus/bytes/base58check.js'
+import { fromBase58checkSync, toBase58checkSync } from '@exodus/bytes/base58check.js'
+import { makeBase58check } from '@exodus/bytes/base58check.js'
+```
 On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/package/@exodus/crypto) to be installed.
-##### `async toBase58check(arr)`
-##### `toBase58checkSync(arr)`
 ##### `async fromBase58check(str, format = 'uint8')`
+##### `async toBase58check(arr)`
 ##### `fromBase58checkSync(str, format = 'uint8')`
+##### `toBase58checkSync(arr)`
 ##### `makeBase58check(hashAlgo, hashAlgoSync)`
 ### `@exodus/bytes/wif.js`
+```js
+import { fromWifString, toWifString } from '@exodus/bytes/wif.js'
+import { fromWifStringSync, toWifStringSync } from '@exodus/bytes/wif.js'
+```
+On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/package/@exodus/crypto) to be installed.
 ##### `async fromWifString(string, version)`
 ##### `fromWifStringSync(string, version)`
 ##### `async toWifString({ version, privateKey, compressed })`
@@ -178,18 +268,18 @@ On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/
 ### `@exodus/bytes/encoding.js`
-Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
-[TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
-[TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
-some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
 ```js
-import { TextDecoder, TextDecoder } from '@exodus/bytes/encoding.js'
+import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
 // Hooks for standards
 import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
 ```
+Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
+[TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
+[TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
+some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
 #### `new TextDecoder(label = 'utf-8', { fatal = false, ignoreBOM = false })`
 [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
@@ -265,7 +355,7 @@ new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
 ### `@exodus/bytes/encoding-lite.js`
 ```js
-import { TextDecoder, TextDecoder } from '@exodus/bytes/encoding-lite.js'
+import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
 // Hooks for standards
 import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-lite.js'

package/fallback/_utils.js CHANGED Viewed

@@ -106,7 +106,7 @@ function decodePartTemplates(a, start, end, m) {
 const decodePart = isHermes ? decodePartTemplates : decodePartAddition
 export function decode2string(arr, start, end, m) {
-  if (start - end > 30_000) {
+  if (end - start > 30_000) {
     // Limit concatenation to avoid excessive GC
     // Thresholds checked on Hermes for toHex
     const concat = []

package/fallback/encoding.js CHANGED Viewed

@@ -47,11 +47,28 @@ export function normalizeEncoding(label) {
 const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
+// TODO: make this more strict against Symbol.toStringTag
+// Is not very significant though, anything faking Symbol.toStringTag could as well override
+// prototypes, which is not something we protect against
+function isAnyArrayBuffer(x) {
+  if (x instanceof ArrayBuffer) return true
+  if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
+  if (!x || typeof x.byteLength !== 'number') return false
+  const s = Object.prototype.toString.call(x)
+  return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
+}
+function isAnyUint8Array(x) {
+  if (x instanceof Uint8Array) return true
+  if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
+  return Object.prototype.toString.call(x) === '[object Uint8Array]'
+}
 const fromSource = (x) => {
   if (x instanceof Uint8Array) return x
-  if (x instanceof ArrayBuffer) return new Uint8Array(x)
   if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
-  if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return new Uint8Array(x)
+  if (isAnyArrayBuffer(x)) return new Uint8Array(x)
   throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
 }
@@ -210,7 +227,7 @@ export class TextEncoder {
   encodeInto(str, target) {
     if (typeof str !== 'string') str = `${str}`
-    if (!(target instanceof Uint8Array)) throw new TypeError('Target must be an Uint8Array')
+    if (!isAnyUint8Array(target)) throw new TypeError('Target must be an Uint8Array')
     if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
     const tlen = target.length
@@ -295,6 +312,7 @@ const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
 // https://encoding.spec.whatwg.org/#names-and-labels
 export function labelToName(label) {
   const enc = normalizeEncoding(label)
+  if (enc === 'utf-8') return 'UTF-8' // fast path
   if (!enc) return enc
   if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
   if (enc === 'big5') return 'Big5'

package/fallback/multi-byte.js CHANGED Viewed

@@ -8,105 +8,130 @@ export const E_STRICT = 'Input is not well-formed for this encoding'
 // If the decoder is not cleared properly, state can be preserved between non-streaming calls!
 // See comment about fatal stream
-// All except iso-2022-jp are ASCII supersets
-// When adding something that is not an ASCII superset, ajust the ASCII fast path
-const REP = 0xff_fd
-const mappers = {
-  // https://encoding.spec.whatwg.org/#euc-kr-decoder
-  'euc-kr': () => {
-    const euc = getTable('euc-kr')
-    let lead = 0
+// Common between euc-kr and big5
+function bigDecoder(err, pair) {
+  let lead = 0
-    const pushback = []
-    const bytes = (b) => {
-      if (lead) {
-        const cp = b >= 0x41 && b <= 0xfe ? euc[(lead - 0x81) * 190 + b - 0x41] : undefined
-        lead = 0
-        if (cp !== undefined && cp !== REP) return cp
-        if (b < 128) pushback.push(b)
-        return -2
-      }
+  const decodeLead = (b) => {
+    const str = pair(lead, b)
+    lead = 0
+    if (str) return str
+    return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
+  }
+  const decode = (arr, start, end, stream) => {
+    let res = ''
+    let i = start
-      if (b < 128) return b
-      if (b < 0x81 || b === 0xff) return -2
-      lead = b
-      return -1
+    if (lead && i < end) res += decodeLead(arr[i++])
+    while (i < end) {
+      const b = arr[i++]
+      if (b < 128) {
+        res += String.fromCharCode(b)
+      } else if (b === 0x80 || b === 0xff) {
+        res += String.fromCharCode(err())
+      } else {
+        lead = b
+        if (i < end) res += decodeLead(arr[i++])
+      }
     }
-    const eof = () => {
-      if (!lead) return null
+    if (lead && !stream) {
       lead = 0
-      return -2
+      res += String.fromCharCode(err())
     }
-    return { bytes, eof, pushback }
+    return res
+  }
+  return { decode, isAscii: () => lead === 0 }
+}
+// All except iso-2022-jp are ASCII supersets
+// When adding something that is not an ASCII superset, ajust the ASCII fast path
+const REP = 0xff_fd
+const mappers = {
+  // https://encoding.spec.whatwg.org/#euc-kr-decoder
+  'euc-kr': (err) => {
+    const euc = getTable('euc-kr')
+    return bigDecoder(err, (l, b) => {
+      if (b < 0x41 || b > 0xfe) return
+      const cp = euc[(l - 0x81) * 190 + b - 0x41]
+      return cp !== undefined && cp !== REP ? String.fromCharCode(cp) : undefined
+    })
   },
   // https://encoding.spec.whatwg.org/#euc-jp-decoder
-  'euc-jp': () => {
+  'euc-jp': (err) => {
     const jis0208 = getTable('jis0208')
     const jis0212 = getTable('jis0212')
     let j12 = false
     let lead = 0
-    const pushback = []
-    const bytes = (b) => {
+    const decodeLead = (b) => {
       if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) {
         lead = 0
-        return 0xfe_c0 + b
+        return String.fromCharCode(0xfe_c0 + b)
       }
       if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
         j12 = true
         lead = b
-        return -1
+        return ''
+      }
+      let cp
+      if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
+        cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
       }
-      if (lead) {
-        let cp
-        if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
-          cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
+      lead = 0
+      j12 = false
+      if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
+      return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
+    }
+    const decode = (arr, start, end, stream) => {
+      let res = ''
+      let i = start
+      if (lead && i < end) res += decodeLead(arr[i++])
+      if (lead && i < end) res += decodeLead(arr[i++]) // could be two leads, but no more
+      while (i < end) {
+        const b = arr[i++]
+        if (b < 128) {
+          res += String.fromCharCode(b)
+        } else if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) {
+          res += String.fromCharCode(err())
+        } else {
+          lead = b
+          if (i < end) res += decodeLead(arr[i++])
+          if (lead && i < end) res += decodeLead(arr[i++]) // could be two leads
         }
+      }
+      if (lead && !stream) {
         lead = 0
-        j12 = false
-        if (cp !== undefined && cp !== REP) return cp
-        if (b < 128) pushback.push(b)
-        return -2
+        j12 = false // can be true only when lead is non-zero
+        res += String.fromCharCode(err())
       }
-      if (b < 128) return b
-      if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) return -2
-      lead = b
-      return -1
+      return res
     }
-    // eslint-disable-next-line sonarjs/no-identical-functions
-    const eof = () => {
-      if (!lead) return null
-      lead = 0
-      return -2
-    }
-    return { bytes, eof, pushback }
+    return { decode, isAscii: () => lead === 0 } // j12 can be true only when lead is non-zero
   },
   // https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
-  // Per-letter of the spec, don't shortcut on state changes on EOF. Some code is regrouped but preserving the logic
-  'iso-2022-jp': () => {
+  'iso-2022-jp': (err) => {
     const jis0208 = getTable('jis0208')
-    const EOF = -1
     let dState = 1
     let oState = 1
-    let lead = 0
+    let lead = 0 // 0 or 0x21-0x7e
     let out = false
-    const pushback = []
-    const bytes = (b) => {
-      if (dState < 5) {
-        if (b === EOF) return null
-        if (b === 0x1b) {
-          dState = 6 // escape start
-          return -1
-        }
+    const bytes = (pushback, b) => {
+      if (dState < 5 && b === 0x1b) {
+        dState = 6 // escape start
+        return
       }
       switch (dState) {
@@ -120,49 +145,46 @@ const mappers = {
           }
           if (b <= 0x7f && b !== 0x0e && b !== 0x0f) return b
-          return -2
+          return err()
         case 3:
           // Katakana
           out = false
           if (b >= 0x21 && b <= 0x5f) return 0xff_40 + b
-          return -2
+          return err()
         case 4:
           // Leading byte
           out = false
-          if ((b >= 0x21) & (b <= 0x7e)) {
-            lead = b
-            dState = 5
-            return -1
-          }
-          return -2
+          if (b < 0x21 || b > 0x7e) return err()
+          lead = b
+          dState = 5
+          return
         case 5:
           // Trailing byte
           out = false
           if (b === 0x1b) {
             dState = 6 // escape start
-            return -2
+            return err()
           }
           dState = 4
           if (b >= 0x21 && b <= 0x7e) {
             const cp = jis0208[(lead - 0x21) * 94 + b - 0x21]
-            return cp !== undefined && cp !== REP ? cp : -2
+            if (cp !== undefined && cp !== REP) return cp
           }
-          return -2
+          return err()
         case 6:
           // Escape start
           if (b === 0x24 || b === 0x28) {
             lead = b
             dState = 7
-            return -1
+            return
           }
           out = false
           dState = oState
-          if (b !== EOF) pushback.push(b)
-          return -2
+          pushback.push(b)
+          return err()
         case 7: {
           // Escape
           const l = lead
@@ -185,63 +207,127 @@ const mappers = {
             dState = oState = s
             const output = out
             out = true
-            return output ? -2 : -1
+            return output ? err() : undefined
           }
           out = false
           dState = oState
-          if (b !== EOF) pushback.push(b)
-          pushback.push(l)
-          return -2
+          pushback.push(b, l)
+          return err()
+        }
+      }
+    }
+    const eof = (pushback) => {
+      if (dState < 5) return null
+      out = false
+      switch (dState) {
+        case 5:
+          dState = 4
+          return err()
+        case 6:
+          dState = oState
+          return err()
+        case 7: {
+          dState = oState
+          pushback.push(lead)
+          lead = 0
+          return err()
         }
       }
     }
-    const eof = () => bytes(EOF)
+    const decode = (arr, start, end, stream) => {
+      let res = ''
+      let i = start
+      const pushback = [] // local and auto-cleared
-    return { bytes, eof, pushback }
+      // First, dump everything until EOF
+      // Same as the full loop, but without EOF handling
+      while (i < end || pushback.length > 0) {
+        const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
+        if (c !== undefined) res += String.fromCodePoint(c)
+      }
+      // Then, dump EOF. This needs the same loop as the characters can be pushed back
+      if (!stream) {
+        while (i <= end || pushback.length > 0) {
+          if (i < end || pushback.length > 0) {
+            const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
+            if (c !== undefined) res += String.fromCodePoint(c)
+          } else {
+            const c = eof(pushback)
+            if (c === null) break // clean exit
+            res += String.fromCodePoint(c)
+          }
+        }
+      }
+      // Chrome and WebKit fail on this, we don't: completely destroy the old decoder state when finished streaming
+      // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
+      // > Set this’s do not flush to options["stream"]
+      if (!stream) {
+        dState = oState = 1
+        lead = 0
+        out = false
+      }
+      return res
+    }
+    return { decode, isAscii: () => false }
   },
   // https://encoding.spec.whatwg.org/#shift_jis-decoder
-  shift_jis: () => {
+  shift_jis: (err) => {
     const jis0208 = getTable('jis0208')
     let lead = 0
-    const pushback = []
-    const bytes = (b) => {
-      if (lead) {
-        const l = lead
-        lead = 0
-        if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
-          const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
-          if (p >= 8836 && p <= 10_715) return 0xe0_00 - 8836 + p // 16-bit
-          const cp = jis0208[p]
-          if (cp !== undefined && cp !== REP) return cp
-        }
-        if (b < 128) pushback.push(b)
-        return -2
+    const decodeLead = (b) => {
+      const l = lead
+      lead = 0
+      if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
+        const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
+        if (p >= 8836 && p <= 10_715) return String.fromCharCode(0xe0_00 - 8836 + p)
+        const cp = jis0208[p]
+        if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
       }
-      if (b <= 0x80) return b // 0x80 is allowed
-      if (b >= 0xa1 && b <= 0xdf) return 0xff_61 - 0xa1 + b
-      if (b < 0x81 || (b > 0x9f && b < 0xe0) || b > 0xfc) return -2
-      lead = b
-      return -1
+      return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
     }
-    // eslint-disable-next-line sonarjs/no-identical-functions
-    const eof = () => {
-      if (!lead) return null
-      lead = 0 // this clears state completely on EOF
-      return -2
+    const decode = (arr, start, end, stream) => {
+      let res = ''
+      let i = start
+      if (lead && i < end) res += decodeLead(arr[i++])
+      while (i < end) {
+        const b = arr[i++]
+        if (b <= 0x80) {
+          res += String.fromCharCode(b) // 0x80 is allowed
+        } else if (b >= 0xa1 && b <= 0xdf) {
+          res += String.fromCharCode(0xfe_c0 + b)
+        } else if (b === 0xa0 || b > 0xfc) {
+          res += String.fromCharCode(err())
+        } else {
+          lead = b
+          if (i < end) res += decodeLead(arr[i++])
+        }
+      }
+      if (lead && !stream) {
+        lead = 0
+        res += String.fromCharCode(err())
+      }
+      return res
     }
-    return { bytes, eof, pushback }
+    return { decode, isAscii: () => lead === 0 }
   },
   // https://encoding.spec.whatwg.org/#gbk-decoder
-  gbk: () => mappers.gb18030(), // 10.1.1. GBK’s decoder is gb18030’s decoder
+  gbk: (err) => mappers.gb18030(err), // 10.1.1. GBK’s decoder is gb18030’s decoder
   // https://encoding.spec.whatwg.org/#gb18030-decoder
-  gb18030: () => {
+  gb18030: (err) => {
     const gb18030 = getTable('gb18030')
     const gb18030r = getTable('gb18030-ranges')
     let g1 = 0, g2 = 0, g3 = 0 // prettier-ignore
@@ -258,191 +344,120 @@ const mappers = {
       return b + p - a
     }
-    const pushback = []
-    const bytes = (b) => {
-      if (g3) {
-        if (b < 0x30 || b > 0x39) {
-          pushback.push(b, g3, g2)
-          g1 = g2 = g3 = 0
-          return -2
-        }
-        const cp = index((g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30)
-        g1 = g2 = g3 = 0
-        if (cp !== undefined) return cp // Can validly return replacement
-        return -2
-      }
+    // g1 is 0 or 0x81-0xfe
+    // g2 is 0 or 0x30-0x39
+    // g3 is 0 or 0x81-0xfe
+    const decode = (arr, start, end, stream) => {
+      let res = ''
+      let i = start
+      const pushback = [] // local and auto-cleared
+      // First, dump everything until EOF
+      // Same as the full loop, but without EOF handling
+      while (i < end || pushback.length > 0) {
+        const b = pushback.length > 0 ? pushback.pop() : arr[i++]
+        if (g3) {
+          if (b < 0x30 || b > 0x39) {
+            pushback.push(b, g3, g2)
+            g1 = g2 = g3 = 0
+            res += String.fromCharCode(err())
+          } else {
+            const p = index((g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30)
+            g1 = g2 = g3 = 0
+            if (p === undefined) {
+              res += String.fromCharCode(err())
+            } else {
+              res += String.fromCodePoint(p) // Can validly return replacement
+            }
+          }
+        } else if (g2) {
+          if (b >= 0x81 && b <= 0xfe) {
+            g3 = b
+          } else {
+            pushback.push(b, g2)
+            g1 = g2 = 0
+            res += String.fromCharCode(err())
+          }
+        } else if (g1) {
+          if (b >= 0x30 && b <= 0x39) {
+            g2 = b
+          } else {
+            let cp
+            if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
+              cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
+            }
-      if (g2) {
-        if (b >= 0x81 && b <= 0xfe) {
-          g3 = b
-          return -1
+            g1 = 0
+            if (cp !== undefined && cp !== REP) {
+              res += String.fromCodePoint(cp)
+            } else {
+              res += String.fromCharCode(err())
+              if (b < 128) res += String.fromCharCode(b) // can be processed immediately
+            }
+          }
+        } else if (b < 128) {
+          res += String.fromCharCode(b)
+        } else if (b === 0x80) {
+          res += '\u20AC'
+        } else if (b === 0xff) {
+          res += String.fromCharCode(err())
+        } else {
+          g1 = b
         }
-        pushback.push(b, g2)
-        g1 = g2 = 0
-        return -2
       }
-      if (g1) {
-        if (b >= 0x30 && b <= 0x39) {
-          g2 = b
-          return -1
-        }
-        let cp
-        if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
-          cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
-        }
-        g1 = 0
-        if (cp !== undefined && cp !== REP) return cp
-        if (b < 128) pushback.push(b)
-        return -2
+      // if g1 = 0 then g2 = g3 = 0
+      if (g1 && !stream) {
+        g1 = g2 = g3 = 0
+        res += String.fromCharCode(err())
       }
-      if (b < 128) return b
-      if (b === 0x80) return 0x20_ac
-      if (b === 0xff) return -2
-      g1 = b
-      return -1
+      return res
     }
-    const eof = () => {
-      if (!g1 && !g2 && !g3) return null
-      g1 = g2 = g3 = 0
-      return -2
-    }
-    return { bytes, eof, pushback }
+    return { decode, isAscii: () => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
+  },
+  // https://encoding.spec.whatwg.org/#big5
+  big5: (err) => {
+    // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
+    // We store that as strings
+    const big5 = getTable('big5')
+    return bigDecoder(err, (l, b) => {
+      if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) return
+      return big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] // strings
+    })
   },
 }
 export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
 export function multibyteDecoder(enc, loose = false) {
-  if (enc === 'big5') return big5decoder(loose)
   if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
   // Input is assumed to be typechecked already
   let mapper
   const asciiSuperset = isAsciiSuperset(enc)
-  return (arr, stream = false) => {
-    const onErr = loose
-      ? () => '\uFFFD'
-      : () => {
-          mapper.pushback.length = 0 // the queue is cleared on returning an error
-          // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
-          // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
-          // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
-          if (!stream) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
-          throw new TypeError(E_STRICT)
-        }
-    let res = ''
-    const length = arr.length
-    if (asciiSuperset && !mapper) {
-      res = decodeLatin1(arr, 0, asciiPrefix(arr))
-      if (res.length === arr.length) return res // ascii
-    }
-    if (!mapper) mapper = mappers[enc]()
-    const { bytes, eof, pushback } = mapper
-    let i = res.length
-    // First, dump everything until EOF
-    // Same as the full loop, but without EOF handling
-    while (i < length || pushback.length > 0) {
-      const c = bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
-      if (c >= 0) {
-        res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
-      } else if (c === -2) {
-        res += onErr()
+  let streaming // because onErr is cached in mapper
+  const onErr = loose
+    ? () => REP
+    : () => {
+        // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
+        // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
+        // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
+        if (!streaming) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
+        throw new TypeError(E_STRICT)
       }
-    }
-    // Then, dump EOF. This needs the same loop as the characters can be pushed back
-    // TODO: only some encodings need this, most can be optimized
-    if (!stream) {
-      while (i <= length || pushback.length > 0) {
-        const isEOF = i === length && pushback.length === 0
-        const c = isEOF ? eof() : bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
-        if (isEOF && c === null) break // clean exit
-        if (c === -1) continue // consuming
-        if (c === -2) {
-          res += onErr()
-        } else {
-          res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
-        }
-      }
-    }
-    // Chrome and WebKit fail on this, we don't: completely destroy the old decoder instance when finished streaming
-    // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
-    // > Set this’s do not flush to options["stream"]
-    if (!stream) mapper = null
-    return res
-  }
-}
-// The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
-// We store that as strings
-function big5decoder(loose) {
-  // Input is assumed to be typechecked already
-  let lead = 0
-  let big5
-  const pushback = []
   return (arr, stream = false) => {
-    const onErr = loose
-      ? () => '\uFFFD'
-      : () => {
-          pushback.length = 0 // the queue is cleared on returning an error
-          // Lead is always already cleared before throwing
-          throw new TypeError(E_STRICT)
-        }
     let res = ''
-    const length = arr.length
-    if (!lead) {
+    if (asciiSuperset && (!mapper || mapper.isAscii?.())) {
       res = decodeLatin1(arr, 0, asciiPrefix(arr))
       if (res.length === arr.length) return res // ascii
     }
-    if (!big5) big5 = getTable('big5')
-    for (let i = res.length; i < length || pushback.length > 0; ) {
-      const b = pushback.length > 0 ? pushback.pop() : arr[i++]
-      if (lead) {
-        let cp
-        if ((b >= 0x40 && b <= 0x7e) || (b >= 0xa1 && b !== 0xff)) {
-          cp = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
-        }
-        lead = 0
-        if (cp) {
-          res += cp // strings
-        } else {
-          res += onErr()
-          if (b < 128) pushback.push(b)
-        }
-      } else if (b < 128) {
-        res += String.fromCharCode(b)
-      } else if (b < 0x81 || b === 0xff) {
-        res += onErr()
-      } else {
-        lead = b
-      }
-    }
-    if (!stream) {
-      // Destroy decoder state
-      pushback.length = 0
-      if (lead) {
-        lead = 0
-        res += onErr()
-      }
-    }
-    return res
+    streaming = stream // affects onErr
+    if (!mapper) mapper = mappers[enc](onErr)
+    return res + mapper.decode(arr, res.length, arr.length, stream)
   }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exodus/bytes",
-  "version": "1.3.0",
+  "version": "1.5.0",
   "description": "Various operations on Uint8Array data",
   "scripts": {
     "lint": "eslint .",

package/wif.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { toBase58checkSync, fromBase58checkSync } from './base58check.js'
+import { toBase58checkSync, fromBase58checkSync } from '@exodus/bytes/base58check.js'
 import { assertUint8 } from './assert.js'
 // Mostly matches npmjs.com/wif, but with extra checks + using our base58check