npm - @exodus/bytes - Versions diffs - 1.0.0-rc.4 → 1.0.0-rc.6 - Mend

@exodus/bytes 1.0.0-rc.4 → 1.0.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/fallback/utf8.js CHANGED Viewed

@@ -5,72 +5,53 @@ const replacementPoint = 0xff_fd
 // https://encoding.spec.whatwg.org/#utf-8-decoder
 // We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
-export function decode(arr, loose) {
-  const start = 0
+export function decode(arr, loose, start = 0) {
+  start |= 0
   const end = arr.length
   let out = ''
-  const tmp = []
+  const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
+  const tmpSize = Math.min(end - start, chunkSize + 1) // need 1 extra slot for last codepoint, which can be 2 charcodes
+  const tmp = new Array(tmpSize).fill(0)
+  let ti = 0
   for (let i = start; i < end; i++) {
-    if (tmp.length > 0x2_00) {
-      // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
-      // length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
+    if (ti >= chunkSize) {
+      tmp.length = ti // can be larger by 1 if last codepoint is two charcodes
       out += String.fromCharCode.apply(String, tmp)
-      tmp.length = 0
+      if (tmp.length <= chunkSize) tmp.push(0) // restore 1 extra slot for last codepoint
+      ti = 0
     }
     const byte = arr[i]
     if (byte < 0x80) {
-      // Fast path ascii
-      tmp.push(byte)
-      // Unroll the loop a bit for faster ops, overshoot by 20 chars
-      for (let j = 0; j < 5; j++) {
-        if (i + 1 >= end) break
-        const byte1 = arr[i + 1]
-        if (byte1 >= 0x80) break
-        tmp.push(byte1)
-        i++
-        if (i + 1 >= end) break
-        const byte2 = arr[i + 1]
-        if (byte2 >= 0x80) break
-        tmp.push(byte2)
-        i++
-        if (i + 1 >= end) break
-        const byte3 = arr[i + 1]
-        if (byte3 >= 0x80) break
-        tmp.push(byte3)
-        i++
-        if (i + 1 >= end) break
-        const byte4 = arr[i + 1]
-        if (byte4 >= 0x80) break
-        tmp.push(byte4)
-        i++
-      }
+      tmp[ti++] = byte
+      // ascii fast path is in ../utf8.js, this is called only on non-ascii input
+      // so we don't unroll this anymore
     } else if (byte < 0xc2) {
       if (!loose) throw new TypeError(E_STRICT)
-      tmp.push(replacementPoint)
+      tmp[ti++] = replacementPoint
     } else if (byte < 0xe0) {
       // need 1 more
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
       const byte1 = arr[i + 1]
       if (byte1 < 0x80 || byte1 > 0xbf) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
-      tmp.push(((byte & 0x1f) << 6) | (byte1 & 0x3f))
+      tmp[ti++] = ((byte & 0x1f) << 6) | (byte1 & 0x3f)
     } else if (byte < 0xf0) {
       // need 2 more
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
@@ -79,31 +60,31 @@ export function decode(arr, loose) {
       const byte1 = arr[i + 1]
       if (byte1 < lower || byte1 > upper) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
       const byte2 = arr[i + 1]
       if (byte2 < 0x80 || byte2 > 0xbf) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
-      tmp.push(((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f))
+      tmp[ti++] = ((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f)
     } else if (byte <= 0xf4) {
       // need 3 more
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
@@ -112,35 +93,35 @@ export function decode(arr, loose) {
       const byte1 = arr[i + 1]
       if (byte1 < lower || byte1 > upper) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
       const byte2 = arr[i + 1]
       if (byte2 < 0x80 || byte2 > 0xbf) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
       const byte3 = arr[i + 1]
       if (byte3 < 0x80 || byte3 > 0xbf) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
@@ -150,71 +131,65 @@ export function decode(arr, loose) {
       if (codePoint > 0xff_ff) {
         // split into char codes as String.fromCharCode is faster than String.fromCodePoint
         const u = codePoint - 0x1_00_00
-        tmp.push(0xd8_00 + ((u >> 10) & 0x3_ff), 0xdc_00 + (u & 0x3_ff))
+        tmp[ti++] = 0xd8_00 + ((u >> 10) & 0x3_ff)
+        tmp[ti++] = 0xdc_00 + (u & 0x3_ff)
       } else {
-        tmp.push(codePoint)
+        tmp[ti++] = codePoint
       }
       // eslint-disable-next-line sonarjs/no-duplicated-branches
     } else {
       if (!loose) throw new TypeError(E_STRICT)
-      tmp.push(replacementPoint)
+      tmp[ti++] = replacementPoint
     }
   }
-  if (tmp.length > 0) out += String.fromCharCode.apply(String, tmp)
-  return out
+  if (ti === 0) return out
+  tmp.length = ti
+  return out + String.fromCharCode.apply(String, tmp)
 }
 export function encode(string, loose) {
   const length = string.length
-  let lead = null
   let small = true
   let bytes = new Uint8Array(length) // assume ascii
   let p = 0
   for (let i = 0; i < length; i++) {
-    const code = string.charCodeAt(i)
+    let code = string.charCodeAt(i)
     if (code < 0x80) {
-      // Fast path for ascii
-      if (lead) {
-        if (!loose) throw new TypeError(E_STRICT_UNICODE)
-        bytes[p++] = 0xef
-        bytes[p++] = 0xbf
-        bytes[p++] = 0xbd
-        lead = null
-      }
       bytes[p++] = code
       // Unroll the loop a bit for faster ops
-      for (let j = 0; j < 5; j++) {
-        if (i + 1 >= length) break
-        const c1 = string.charCodeAt(i + 1)
-        if (c1 >= 0x80) break
-        bytes[p++] = c1
+      while (true) {
         i++
-        if (i + 1 >= length) break
-        const c2 = string.charCodeAt(i + 1)
-        if (c2 >= 0x80) break
-        bytes[p++] = c2
+        if (i >= length) break
+        code = string.charCodeAt(i)
+        if (code >= 0x80) break
+        bytes[p++] = code
         i++
-        if (i + 1 >= length) break
-        const c3 = string.charCodeAt(i + 1)
-        if (c3 >= 0x80) break
-        bytes[p++] = c3
+        if (i >= length) break
+        code = string.charCodeAt(i)
+        if (code >= 0x80) break
+        bytes[p++] = code
         i++
-        if (i + 1 >= length) break
-        const c4 = string.charCodeAt(i + 1)
-        if (c4 >= 0x80) break
-        bytes[p++] = c4
+        if (i >= length) break
+        code = string.charCodeAt(i)
+        if (code >= 0x80) break
+        bytes[p++] = code
         i++
+        if (i >= length) break
+        code = string.charCodeAt(i)
+        if (code >= 0x80) break
+        bytes[p++] = code
       }
-      continue
+      if (i >= length) break
+      // now, code is present and >= 0x80
     }
     if (small) {
       // TODO: use resizable array buffers? will have to return a non-resizeable one
-      const bytesNew = new Uint8Array(length * 3) // maximium can be 3x of the string length in charcodes
+      if (p !== i) throw new Error('Unreachable') // Here, p === i (only when small is still true)
+      const bytesNew = new Uint8Array(p + (length - i) * 3) // maximium can be 3x of the string length in charcodes
       bytesNew.set(bytes)
       bytes = bytesNew
       small = false
@@ -224,45 +199,35 @@ export function encode(string, loose) {
     // lead: d800 - dbff
     // trail: dc00 - dfff
     if (code >= 0xd8_00 && code < 0xe0_00) {
-      if (lead && code < 0xdc_00) {
-        // a second lead, meaning the previous one was unpaired
+      // Can't be a valid trail as we already processed that below
+      if (code > 0xdb_ff || i + 1 >= length) {
+        // An unexpected trail or a lead at the very end of input
         if (!loose) throw new TypeError(E_STRICT_UNICODE)
         bytes[p++] = 0xef
         bytes[p++] = 0xbf
         bytes[p++] = 0xbd
-        lead = null
-        // code is still processed as a new lead
+        continue
       }
-      if (!lead) {
-        if (code > 0xdb_ff || i + 1 >= length) {
-          // lead out of range || unpaired
-          if (!loose) throw new TypeError(E_STRICT_UNICODE)
-          bytes[p++] = 0xef
-          bytes[p++] = 0xbf
-          bytes[p++] = 0xbd
-          continue
-        }
-        lead = code
-        continue
+      const next = string.charCodeAt(i + 1) // Process valid pairs immediately
+      if (next >= 0xdc_00 && next < 0xe0_00) {
+        // here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
+        const codePoint = (((code - 0xd8_00) << 10) | (next - 0xdc_00)) + 0x1_00_00
+        bytes[p++] = (codePoint >> 18) | 0xf0
+        bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
+        bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
+        bytes[p++] = (codePoint & 0x3f) | 0x80
+        i++ // consume next
+      } else {
+        // Next is not a trail, leave next unconsumed but process unmatched lead error
+        if (!loose) throw new TypeError(E_STRICT_UNICODE)
+        bytes[p++] = 0xef
+        bytes[p++] = 0xbf
+        bytes[p++] = 0xbd
       }
-      // here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
-      const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
-      bytes[p++] = (codePoint >> 18) | 0xf0
-      bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
-      bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
-      bytes[p++] = (codePoint & 0x3f) | 0x80
-      lead = null
       continue
-    } else if (lead) {
-      if (!loose) throw new TypeError(E_STRICT_UNICODE)
-      bytes[p++] = 0xef
-      bytes[p++] = 0xbf
-      bytes[p++] = 0xbd
-      lead = null
-      // code is still processed
     }
     // We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes

package/hex.js CHANGED Viewed

@@ -6,6 +6,8 @@ const { Buffer } = globalThis // Buffer is optional, only used when native
 const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
 const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
+const { E_HEX } = js
 export function toHex(arr) {
   assertUint8(arr)
   if (arr.length === 0) return ''
@@ -18,4 +20,12 @@ export function toHex(arr) {
 // Unlike Buffer.from(), throws on invalid input
 export const fromHex = Uint8Array.fromHex
   ? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
-  : (str, format = 'uint8') => typedView(js.fromHex(str), format)
+  : haveNativeBuffer
+    ? (str, format = 'uint8') => {
+        if (typeof str !== 'string') throw new TypeError('Input is not a string')
+        if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
+        const buf = Buffer.from(str, 'hex') // will stop on first non-hex character, so we can just validate length
+        if (buf.length * 2 !== str.length) throw new SyntaxError(E_HEX)
+        return typedView(buf, format)
+      }
+    : (str, format = 'uint8') => typedView(js.fromHex(str), format)

package/package.json CHANGED Viewed

@@ -1,12 +1,11 @@
 {
   "name": "@exodus/bytes",
-  "version": "1.0.0-rc.4",
+  "version": "1.0.0-rc.6",
   "description": "Various operations on Uint8Array data",
   "scripts": {
     "lint": "eslint .",
-    "test:v8": "npm run test:d8 --",
     "test:javascriptcore": "npm run test:jsc --",
-    "test:d8": "exodus-test --engine=d8:bundle",
+    "test:v8": "exodus-test --engine=v8:bundle",
     "test:jsc": "exodus-test --engine=jsc:bundle",
     "test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
     "test:hermes": "exodus-test --engine=hermes:bundle",
@@ -40,6 +39,7 @@
   "type": "module",
   "files": [
     "/fallback/_utils.js",
+    "/fallback/latin1.js",
     "/fallback/base32.js",
     "/fallback/base64.js",
     "/fallback/hex.js",
@@ -47,6 +47,8 @@
     "/array.js",
     "/assert.js",
     "/base32.js",
+    "/base58.js",
+    "/base58check.js",
     "/base64.js",
     "/hex.js",
     "/utf8.js"
@@ -54,26 +56,47 @@
   "exports": {
     "./array.js": "./array.js",
     "./base32.js": "./base32.js",
+    "./base58.js": "./base58.js",
+    "./base58check.js": "./base58check.js",
     "./base64.js": "./base64.js",
     "./hex.js": "./hex.js",
     "./utf8.js": "./utf8.js"
   },
+  "peerDependencies": {
+    "@exodus/crypto": "^1.0.0-rc.4"
+  },
+  "peerDependenciesMeta": {
+    "@exodus/crypto": {
+      "optional": true
+    }
+  },
   "devDependencies": {
+    "@ethersproject/strings": "^5.8.0",
+    "@exodus/crypto": "1.0.0-rc.29",
     "@exodus/eslint-config": "^5.24.0",
     "@exodus/prettier": "^1.0.0",
-    "@exodus/test": "^1.0.0-rc.105",
+    "@exodus/test": "^1.0.0-rc.108",
+    "@noble/hashes": "^2.0.1",
     "@scure/base": "^1.2.6",
-    "@types/node": "^24.0.10",
+    "@stablelib/base64": "^2.0.1",
+    "@stablelib/hex": "^2.0.1",
+    "@types/node": "^22.13.0",
     "base-x": "^5.0.1",
     "base32.js": "^0.1.0",
     "base64-js": "^1.5.1",
+    "bs58": "^6.0.0",
+    "bs58check": "^4.0.0",
+    "bstring": "^0.3.9",
     "buffer": "^6.0.3",
     "electron": "36.5.0",
     "eslint": "^8.44.0",
     "fast-base64-decode": "^2.0.0",
+    "fast-base64-encode": "^1.0.0",
+    "hextreme": "^1.0.7",
     "hi-base32": "^0.5.1",
     "jsvu": "^3.0.0",
-    "text-encoding": "^0.7.0"
+    "text-encoding": "^0.7.0",
+    "typescript": "^5.9.3"
   },
   "prettier": "@exodus/prettier",
   "packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"

package/utf8.js CHANGED Viewed

@@ -1,6 +1,7 @@
-import { assert, assertUint8 } from './assert.js'
+import { assertUint8 } from './assert.js'
 import { typedView } from './array.js'
 import * as js from './fallback/utf8.js'
+import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
 const { Buffer, TextEncoder, TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
 const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
@@ -11,13 +12,20 @@ const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
 // We don't want to strip anything unexpectedly
 const decoderFatal = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true, fatal: true }) : null
 const decoderLoose = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true }) : null
+const { isWellFormed } = String.prototype
 const { E_STRICT, E_STRICT_UNICODE } = js
 const shouldUseEscapePath = Boolean(globalThis.HermesInternal) // faster only on Hermes, js path beats it on normal engines
 function deLoose(str, loose, res) {
-  if (loose) return res
+  if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine
+  if (isWellFormed) {
+    // We have a fast native method
+    if (isWellFormed.call(str)) return res
+    throw new TypeError(E_STRICT_UNICODE)
+  }
   // Recheck if the string was encoded correctly
   let start = 0
   const last = res.length - 2
@@ -28,7 +36,7 @@ function deLoose(str, loose, res) {
     start = pos + 1
     if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
       // Found a replacement char in output, need to recheck if we encoded the input correctly
-      assert(str === decode(res), E_STRICT_UNICODE)
+      if (str !== decode(res)) throw new TypeError(E_STRICT_UNICODE)
       return res
     }
   }
@@ -37,71 +45,34 @@ function deLoose(str, loose, res) {
 }
 function encode(str, loose = false) {
-  assert(typeof str === 'string')
+  if (typeof str !== 'string') throw new TypeError('Input is not a string')
   if (haveNativeBuffer) return deLoose(str, loose, Buffer.from(str)) // faster on ascii on Node.js
   if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str)) // Node.js, browsers, and Hermes
   // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
   return js.encode(str, loose)
 }
-let escapes
-function toEscapesPart(arr, start, end) {
-  let o = ''
-  let i = start
-  const last3 = end - 3
-  // Unrolled loop is faster
-  while (i < last3) {
-    const a = arr[i++]
-    const b = arr[i++]
-    const c = arr[i++]
-    const d = arr[i++]
-    o += escapes[a]
-    o += escapes[b]
-    o += escapes[c]
-    o += escapes[d]
-  }
-  while (i < end) o += escapes[arr[i++]]
-  return o
-}
 function decode(arr, loose = false) {
   assertUint8(arr)
   if (haveDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
   // No reason to use native Buffer: it's not faster than TextDecoder, needs rechecks in non-loose mode, and Node.js has TextDecoder
-  // This codepath gives a ~2x perf boost on Hermes
-  if (shouldUseEscapePath && escape && decodeURIComponent) {
-    if (!escapes) escapes = Array.from({ length: 256 }, (_, i) => escape(String.fromCharCode(i)))
-    const length = arr.length
-    let o
-    if (length > 30_000) {
-      // Limit concatenation to avoid excessive GC
-      // TODO: recheck thresholds on Hermes (taken from hex)
-      const concat = []
-      for (let i = 0; i < length; ) {
-        const step = i + 500
-        const end = step > length ? length : step
-        concat.push(toEscapesPart(arr, i, end))
-        i = end
-      }
-      o = concat.join('')
-      concat.length = 0
-    } else {
-      o = toEscapesPart(arr, 0, length)
-    }
+  // Fast path for ASCII prefix, this is faster than all alternatives below
+  const prefix = decodeLatin1(arr, 0, asciiPrefix(arr))
+  if (prefix.length === arr.length) return prefix
+  // This codepath gives a ~3x perf boost on Hermes
+  if (shouldUseEscapePath && escape && decodeURIComponent) {
+    const o = escape(decodeLatin1(arr, prefix.length, arr.length))
     try {
-      return decodeURIComponent(o) // asci to utf8, escape() is precalucated
+      return prefix + decodeURIComponent(o) // Latin1 to utf8
     } catch {
       if (!loose) throw new TypeError(E_STRICT)
       // Ok, we have to use manual implementation for loose decoder
     }
   }
-  return js.decode(arr, loose)
+  return prefix + js.decode(arr, loose, prefix.length)
 }
 export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)