npm - @exodus/bytes - Versions diffs - 1.0.0-rc.5 → 1.0.0-rc.7 - Mend

@exodus/bytes 1.0.0-rc.5 → 1.0.0-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/fallback/hex.js CHANGED Viewed

@@ -1,37 +1,105 @@
 import { assertUint8 } from '../assert.js'
-import { nativeEncoder } from './_utils.js'
+import { nativeDecoder, nativeEncoder } from './_utils.js'
+import { encodeAscii, decodeAscii } from './latin1.js'
-let hexArray
+let hexArray // array of 256 bytes converted to two-char hex strings
+let hexCodes // hexArray converted to u16 code pairs
 let dehexArray
+const _00 = 0x30_30 // '00' string in hex, the only allowed char pair to generate 0 byte
+const _ff = 0x66_66 // 'ff' string in hex, max allowed char pair (larger than 'FF' string)
+const allowed = '0123456789ABCDEFabcdef'
 export const E_HEX = 'Input is not a hex string'
-function toHexPart(arr, start, end) {
+function toHexPartAddition(a, start, end) {
   let o = ''
   let i = start
-  const last3 = end - 3
-  // Unrolled loop is faster
-  while (i < last3) {
-    const a = arr[i++]
-    const b = arr[i++]
-    const c = arr[i++]
-    const d = arr[i++]
-    o += hexArray[a]
-    o += hexArray[b]
-    o += hexArray[c]
-    o += hexArray[d]
+  const h = hexArray
+  for (const last3 = end - 3; i < last3; i += 4) {
+    const x0 = a[i]
+    const x1 = a[i + 1]
+    const x2 = a[i + 2]
+    const x3 = a[i + 3]
+    o += h[x0]
+    o += h[x1]
+    o += h[x2]
+    o += h[x3]
   }
-  while (i < end) o += hexArray[arr[i++]]
+  while (i < end) o += h[a[i++]]
   return o
 }
+// Optimiziation for Hermes which is the main user of fallback
+function toHexPartTemplates(a, start, end) {
+  let o = ''
+  let i = start
+  const h = hexArray
+  for (const last15 = end - 15; i < last15; i += 16) {
+    const x0 = a[i]
+    const x1 = a[i + 1]
+    const x2 = a[i + 2]
+    const x3 = a[i + 3]
+    const x4 = a[i + 4]
+    const x5 = a[i + 5]
+    const x6 = a[i + 6]
+    const x7 = a[i + 7]
+    const x8 = a[i + 8]
+    const x9 = a[i + 9]
+    const x10 = a[i + 10]
+    const x11 = a[i + 11]
+    const x12 = a[i + 12]
+    const x13 = a[i + 13]
+    const x14 = a[i + 14]
+    const x15 = a[i + 15]
+    o += `${h[x0]}${h[x1]}${h[x2]}${h[x3]}${h[x4]}${h[x5]}${h[x6]}${h[x7]}${h[x8]}${h[x9]}${h[x10]}${h[x11]}${h[x12]}${h[x13]}${h[x14]}${h[x15]}`
+  }
+  while (i < end) o += h[a[i++]]
+  return o
+}
+// Using templates is significantly faster in Hermes and JSC
+// It's harder to detect JSC and not important anyway as it has native impl, so we detect only Hermes
+const toHexPart = globalThis.HermesInternal ? toHexPartTemplates : toHexPartAddition
 export function toHex(arr) {
   assertUint8(arr)
   if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
   const length = arr.length // this helps Hermes
+  // Only old browsers use this, barebone engines don't have TextDecoder
+  // But Hermes can use this when it (hopefully) implements TextDecoder
+  if (nativeDecoder) {
+    if (!hexCodes) {
+      hexCodes = new Uint16Array(256)
+      const u8 = new Uint8Array(hexCodes.buffer, hexCodes.byteOffset, hexCodes.byteLength)
+      for (let i = 0; i < 256; i++) {
+        const pair = hexArray[i]
+        u8[2 * i] = pair.charCodeAt(0)
+        u8[2 * i + 1] = pair.charCodeAt(1)
+      }
+    }
+    const oa = new Uint16Array(length)
+    let i = 0
+    for (const last3 = arr.length - 3; ; i += 4) {
+      if (i >= last3) break // loop is fast enough for moving this here to be useful on JSC
+      const x0 = arr[i]
+      const x1 = arr[i + 1]
+      const x2 = arr[i + 2]
+      const x3 = arr[i + 3]
+      oa[i] = hexCodes[x0]
+      oa[i + 1] = hexCodes[x1]
+      oa[i + 2] = hexCodes[x2]
+      oa[i + 3] = hexCodes[x3]
+    }
+    for (; i < length; i++) oa[i] = hexCodes[arr[i]]
+    return decodeAscii(oa)
+  }
   if (length > 30_000) {
     // Limit concatenation to avoid excessive GC
     // Thresholds checked on Hermes
@@ -51,51 +119,74 @@ export function toHex(arr) {
   return toHexPart(arr, 0, length)
 }
-// TODO: can this be optimized? This only affects non-Hermes barebone engines though
-const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
 export function fromHex(str) {
   if (typeof str !== 'string') throw new TypeError('Input is not a string')
   if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
-  if (!dehexArray) {
-    dehexArray = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
-    for (let i = 0; i < 16; i++) {
-      const s = i.toString(16)
-      dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
-    }
-  }
   const length = str.length / 2 // this helps Hermes in loops
   const arr = new Uint8Array(length)
-  let j = 0
+  // Native encoder path is beneficial even for small arrays in Hermes
   if (nativeEncoder) {
-    // Native encoder path is beneficial even for small arrays in Hermes
-    const codes = nativeEncoder.encode(str)
-    if (codes.length !== str.length) throw new SyntaxError(E_HEX) // non-ascii
-    const last3 = length - 3 // Unroll nativeEncoder path as this is what modern Hermes takes and a small perf improvement is nice there
+    if (!dehexArray) {
+      dehexArray = new Uint8Array(_ff + 1) // 26 KiB cache, >2x perf improvement on Hermes
+      const u8 = new Uint8Array(2)
+      const u16 = new Uint16Array(u8.buffer, u8.byteOffset, 1) // for endianess-agnostic transform
+      const map = [...allowed].map((c) => [c.charCodeAt(0), parseInt(c, 16)])
+      for (const [ch, vh] of map) {
+        u8[0] = ch // first we read high hex char
+        for (const [cl, vl] of map) {
+          u8[1] = cl // then we read low hex char
+          dehexArray[u16[0]] = (vh << 4) | vl
+        }
+      }
+    }
+    const codes = encodeAscii(str, E_HEX)
+    const codes16 = new Uint16Array(codes.buffer, codes.byteOffset, codes.byteLength / 2)
     let i = 0
-    while (i < last3) {
-      const a = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
-      const b = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
-      const c = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
-      const d = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
-      if (a < 0 || b < 0 || c < 0 || d < 0) throw new SyntaxError(E_HEX)
-      arr[i++] = a
-      arr[i++] = b
-      arr[i++] = c
-      arr[i++] = d
+    for (const last3 = length - 3; i < last3; i += 4) {
+      const ai = codes16[i]
+      const bi = codes16[i + 1]
+      const ci = codes16[i + 2]
+      const di = codes16[i + 3]
+      const a = dehexArray[ai]
+      const b = dehexArray[bi]
+      const c = dehexArray[ci]
+      const d = dehexArray[di]
+      if ((!a && ai !== _00) || (!b && bi !== _00) || (!c && ci !== _00) || (!d && di !== _00)) {
+        throw new SyntaxError(E_HEX)
+      }
+      arr[i] = a
+      arr[i + 1] = b
+      arr[i + 2] = c
+      arr[i + 3] = d
     }
     while (i < length) {
-      const res = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
-      if (res < 0) throw new SyntaxError(E_HEX)
-      arr[i++] = res
+      const ai = codes16[i]
+      const a = dehexArray[ai]
+      if (!a && ai !== _00) throw new SyntaxError(E_HEX)
+      arr[i++] = a
     }
   } else {
+    if (!dehexArray) {
+      // no regex input validation here, so we map all other bytes to -1 and recheck sign
+      // non-ASCII chars throw already though, so we should process only 0-127
+      dehexArray = new Int8Array(128).fill(-1)
+      for (let i = 0; i < 16; i++) {
+        const s = i.toString(16)
+        dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
+      }
+    }
+    let j = 0
     for (let i = 0; i < length; i++) {
-      const res = (dehexArray[str.charCodeAt(j++)] << 4) | dehexArray[str.charCodeAt(j++)]
-      if (res < 0) throw new SyntaxError(E_HEX)
+      const a = str.charCodeAt(j++)
+      const b = str.charCodeAt(j++)
+      const res = (dehexArray[a] << 4) | dehexArray[b]
+      if (res < 0 || (0x7f | a | b) !== 0x7f) throw new SyntaxError(E_HEX) // 0-127
       arr[i] = res
     }
   }

package/fallback/latin1.js ADDED Viewed

@@ -0,0 +1,113 @@
+import { nativeEncoder, nativeDecoder, nativeDecoderLatin1, nativeBuffer } from './_utils.js'
+// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
+// On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
+const maxFunctionArgs = 0x20_00
+export function asciiPrefix(arr) {
+  let p = 0 // verified ascii bytes
+  const length = arr.length
+  // Threshold tested on Hermes (worse on <=48, better on >=52)
+  // Also on v8 arrs of size <=64 might be on heap and using Uint32Array on them is unoptimal
+  if (length > 64) {
+    // Speedup with u32
+    const u32start = (4 - (arr.byteOffset & 3)) % 4 // offset start by this many bytes for alignment
+    for (; p < u32start; p++) if (arr[p] >= 0x80) return p
+    const u32length = ((arr.byteLength - u32start) / 4) | 0
+    const u32 = new Uint32Array(arr.buffer, arr.byteOffset + u32start, u32length)
+    let i = 0
+    for (const last3 = u32length - 3; ; p += 16, i += 4) {
+      if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
+      const a = u32[i]
+      const b = u32[i + 1]
+      const c = u32[i + 2]
+      const d = u32[i + 3]
+      if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
+    }
+    for (; i < u32length; p += 4, i++) if (u32[i] & 0x80_80_80_80) break
+  }
+  for (; p < length; p++) if (arr[p] >= 0x80) return p
+  return length
+}
+// Capable of decoding Uint16Array to UTF-16 as well as Uint8Array to Latin-1
+export function decodeLatin1(arr, start = 0, stop = arr.length) {
+  start |= 0
+  stop |= 0
+  const total = stop - start
+  if (total === 0) return ''
+  if (total > maxFunctionArgs) {
+    let prefix = ''
+    for (let i = start; i < stop; ) {
+      const i1 = Math.min(stop, i + maxFunctionArgs)
+      prefix += String.fromCharCode.apply(String, arr.subarray(i, i1))
+      i = i1
+    }
+    return prefix
+  }
+  const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
+  return String.fromCharCode.apply(String, sliced)
+}
+// Does not check input, uses best available method
+// Building an array for this is only faster than proper string concatenation when TextDecoder or native Buffer are available
+export const decodeAscii = nativeBuffer
+  ? (a) =>
+      // Buffer is faster on Node.js (but only for long enough data), if we know that output is ascii
+      a.byteLength >= 0x3_00
+        ? nativeBuffer.from(a.buffer, a.byteOffset, a.byteLength).latin1Slice(0, a.byteLength) // .latin1Slice is faster than .asciiSlice
+        : nativeDecoder.decode(a) // On Node.js, utf8 decoder is faster than latin1
+  : nativeDecoderLatin1
+    ? (a) => nativeDecoderLatin1.decode(a) // On browsers (specifically WebKit), latin1 decoder is faster than utf8
+    : (a) => decodeLatin1(new Uint8Array(a.buffer, a.byteOffset, a.byteLength)) // Fallback. We shouldn't get here, constructing with strings directly is faster
+/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
+export const encodeCharcodes = globalThis.HermesInternal
+  ? (str, arr) => {
+      const length = str.length
+      if (length > 64) {
+        const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
+        for (let i = 0; i < length; i++) arr[i] = at(i)
+      } else {
+        for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
+      }
+      return arr
+    }
+  : (str, arr) => {
+      const length = str.length
+      // Can be optimized with unrolling, but this is not used on non-Hermes atm
+      for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
+      return arr
+    }
+/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
+export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
+// Expects nativeEncoder to be present
+export const encodeAscii = globalThis.HermesInternal
+  ? (str, ERR) => {
+      // Much faster in Hermes
+      const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
+      const info = nativeEncoder.encodeInto(str, codes)
+      if (info.read !== str.length || info.written !== str.length) throw new SyntaxError(ERR) // non-ascii
+      return codes.subarray(0, str.length)
+    }
+  : nativeBuffer
+    ? (str, ERR) => {
+        // TextEncoder is slow on Node.js 24 / 25 (was ok on 22)
+        const codes = nativeBuffer.from(str, 'utf8') // ascii/latin1 coerces, we need to check
+        if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
+        return new Uint8Array(codes.buffer, codes.byteOffset, codes.byteLength)
+      }
+    : (str, ERR) => {
+        const codes = nativeEncoder.encode(str)
+        if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
+        return codes
+      }

package/fallback/utf8.js CHANGED Viewed

@@ -5,72 +5,53 @@ const replacementPoint = 0xff_fd
 // https://encoding.spec.whatwg.org/#utf-8-decoder
 // We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
-export function decode(arr, loose) {
-  const start = 0
+export function decode(arr, loose, start = 0) {
+  start |= 0
   const end = arr.length
   let out = ''
-  const tmp = []
+  const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
+  const tmpSize = Math.min(end - start, chunkSize + 1) // need 1 extra slot for last codepoint, which can be 2 charcodes
+  const tmp = new Array(tmpSize).fill(0)
+  let ti = 0
   for (let i = start; i < end; i++) {
-    if (tmp.length > 0x2_00) {
-      // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
-      // length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
+    if (ti >= chunkSize) {
+      tmp.length = ti // can be larger by 1 if last codepoint is two charcodes
       out += String.fromCharCode.apply(String, tmp)
-      tmp.length = 0
+      if (tmp.length <= chunkSize) tmp.push(0) // restore 1 extra slot for last codepoint
+      ti = 0
     }
     const byte = arr[i]
     if (byte < 0x80) {
-      // Fast path ascii
-      tmp.push(byte)
-      // Unroll the loop a bit for faster ops, overshoot by 20 chars
-      for (let j = 0; j < 5; j++) {
-        if (i + 1 >= end) break
-        const byte1 = arr[i + 1]
-        if (byte1 >= 0x80) break
-        tmp.push(byte1)
-        i++
-        if (i + 1 >= end) break
-        const byte2 = arr[i + 1]
-        if (byte2 >= 0x80) break
-        tmp.push(byte2)
-        i++
-        if (i + 1 >= end) break
-        const byte3 = arr[i + 1]
-        if (byte3 >= 0x80) break
-        tmp.push(byte3)
-        i++
-        if (i + 1 >= end) break
-        const byte4 = arr[i + 1]
-        if (byte4 >= 0x80) break
-        tmp.push(byte4)
-        i++
-      }
+      tmp[ti++] = byte
+      // ascii fast path is in ../utf8.js, this is called only on non-ascii input
+      // so we don't unroll this anymore
     } else if (byte < 0xc2) {
       if (!loose) throw new TypeError(E_STRICT)
-      tmp.push(replacementPoint)
+      tmp[ti++] = replacementPoint
     } else if (byte < 0xe0) {
       // need 1 more
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
       const byte1 = arr[i + 1]
       if (byte1 < 0x80 || byte1 > 0xbf) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
-      tmp.push(((byte & 0x1f) << 6) | (byte1 & 0x3f))
+      tmp[ti++] = ((byte & 0x1f) << 6) | (byte1 & 0x3f)
     } else if (byte < 0xf0) {
       // need 2 more
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
@@ -79,31 +60,31 @@ export function decode(arr, loose) {
       const byte1 = arr[i + 1]
       if (byte1 < lower || byte1 > upper) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
       const byte2 = arr[i + 1]
       if (byte2 < 0x80 || byte2 > 0xbf) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
-      tmp.push(((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f))
+      tmp[ti++] = ((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f)
     } else if (byte <= 0xf4) {
       // need 3 more
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
@@ -112,35 +93,35 @@ export function decode(arr, loose) {
       const byte1 = arr[i + 1]
       if (byte1 < lower || byte1 > upper) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
       const byte2 = arr[i + 1]
       if (byte2 < 0x80 || byte2 > 0xbf) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
       i++
       if (i + 1 >= end) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         break
       }
       const byte3 = arr[i + 1]
       if (byte3 < 0x80 || byte3 > 0xbf) {
         if (!loose) throw new TypeError(E_STRICT)
-        tmp.push(replacementPoint)
+        tmp[ti++] = replacementPoint
         continue
       }
@@ -150,71 +131,65 @@ export function decode(arr, loose) {
       if (codePoint > 0xff_ff) {
         // split into char codes as String.fromCharCode is faster than String.fromCodePoint
         const u = codePoint - 0x1_00_00
-        tmp.push(0xd8_00 + ((u >> 10) & 0x3_ff), 0xdc_00 + (u & 0x3_ff))
+        tmp[ti++] = 0xd8_00 + ((u >> 10) & 0x3_ff)
+        tmp[ti++] = 0xdc_00 + (u & 0x3_ff)
       } else {
-        tmp.push(codePoint)
+        tmp[ti++] = codePoint
       }
       // eslint-disable-next-line sonarjs/no-duplicated-branches
     } else {
       if (!loose) throw new TypeError(E_STRICT)
-      tmp.push(replacementPoint)
+      tmp[ti++] = replacementPoint
     }
   }
-  if (tmp.length > 0) out += String.fromCharCode.apply(String, tmp)
-  return out
+  if (ti === 0) return out
+  tmp.length = ti
+  return out + String.fromCharCode.apply(String, tmp)
 }
 export function encode(string, loose) {
   const length = string.length
-  let lead = null
   let small = true
   let bytes = new Uint8Array(length) // assume ascii
   let p = 0
   for (let i = 0; i < length; i++) {
-    const code = string.charCodeAt(i)
+    let code = string.charCodeAt(i)
     if (code < 0x80) {
-      // Fast path for ascii
-      if (lead) {
-        if (!loose) throw new TypeError(E_STRICT_UNICODE)
-        bytes[p++] = 0xef
-        bytes[p++] = 0xbf
-        bytes[p++] = 0xbd
-        lead = null
-      }
       bytes[p++] = code
       // Unroll the loop a bit for faster ops
-      for (let j = 0; j < 5; j++) {
-        if (i + 1 >= length) break
-        const c1 = string.charCodeAt(i + 1)
-        if (c1 >= 0x80) break
-        bytes[p++] = c1
+      while (true) {
         i++
-        if (i + 1 >= length) break
-        const c2 = string.charCodeAt(i + 1)
-        if (c2 >= 0x80) break
-        bytes[p++] = c2
+        if (i >= length) break
+        code = string.charCodeAt(i)
+        if (code >= 0x80) break
+        bytes[p++] = code
         i++
-        if (i + 1 >= length) break
-        const c3 = string.charCodeAt(i + 1)
-        if (c3 >= 0x80) break
-        bytes[p++] = c3
+        if (i >= length) break
+        code = string.charCodeAt(i)
+        if (code >= 0x80) break
+        bytes[p++] = code
         i++
-        if (i + 1 >= length) break
-        const c4 = string.charCodeAt(i + 1)
-        if (c4 >= 0x80) break
-        bytes[p++] = c4
+        if (i >= length) break
+        code = string.charCodeAt(i)
+        if (code >= 0x80) break
+        bytes[p++] = code
         i++
+        if (i >= length) break
+        code = string.charCodeAt(i)
+        if (code >= 0x80) break
+        bytes[p++] = code
       }
-      continue
+      if (i >= length) break
+      // now, code is present and >= 0x80
     }
     if (small) {
       // TODO: use resizable array buffers? will have to return a non-resizeable one
-      const bytesNew = new Uint8Array(length * 3) // maximium can be 3x of the string length in charcodes
+      if (p !== i) throw new Error('Unreachable') // Here, p === i (only when small is still true)
+      const bytesNew = new Uint8Array(p + (length - i) * 3) // maximium can be 3x of the string length in charcodes
       bytesNew.set(bytes)
       bytes = bytesNew
       small = false
@@ -224,45 +199,35 @@ export function encode(string, loose) {
     // lead: d800 - dbff
     // trail: dc00 - dfff
     if (code >= 0xd8_00 && code < 0xe0_00) {
-      if (lead && code < 0xdc_00) {
-        // a second lead, meaning the previous one was unpaired
+      // Can't be a valid trail as we already processed that below
+      if (code > 0xdb_ff || i + 1 >= length) {
+        // An unexpected trail or a lead at the very end of input
         if (!loose) throw new TypeError(E_STRICT_UNICODE)
         bytes[p++] = 0xef
         bytes[p++] = 0xbf
         bytes[p++] = 0xbd
-        lead = null
-        // code is still processed as a new lead
+        continue
       }
-      if (!lead) {
-        if (code > 0xdb_ff || i + 1 >= length) {
-          // lead out of range || unpaired
-          if (!loose) throw new TypeError(E_STRICT_UNICODE)
-          bytes[p++] = 0xef
-          bytes[p++] = 0xbf
-          bytes[p++] = 0xbd
-          continue
-        }
-        lead = code
-        continue
+      const next = string.charCodeAt(i + 1) // Process valid pairs immediately
+      if (next >= 0xdc_00 && next < 0xe0_00) {
+        // here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
+        const codePoint = (((code - 0xd8_00) << 10) | (next - 0xdc_00)) + 0x1_00_00
+        bytes[p++] = (codePoint >> 18) | 0xf0
+        bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
+        bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
+        bytes[p++] = (codePoint & 0x3f) | 0x80
+        i++ // consume next
+      } else {
+        // Next is not a trail, leave next unconsumed but process unmatched lead error
+        if (!loose) throw new TypeError(E_STRICT_UNICODE)
+        bytes[p++] = 0xef
+        bytes[p++] = 0xbf
+        bytes[p++] = 0xbd
       }
-      // here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
-      const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
-      bytes[p++] = (codePoint >> 18) | 0xf0
-      bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
-      bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
-      bytes[p++] = (codePoint & 0x3f) | 0x80
-      lead = null
       continue
-    } else if (lead) {
-      if (!loose) throw new TypeError(E_STRICT_UNICODE)
-      bytes[p++] = 0xef
-      bytes[p++] = 0xbf
-      bytes[p++] = 0xbd
-      lead = null
-      // code is still processed
     }
     // We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes