npm - @bigdreamsweb3/wordbin - Versions diffs - 1.0.6 → 1.0.8 - Mend

@bigdreamsweb3/wordbin 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CONTRIBUTING.md +156 -67
package/README.md +364 -149
package/dist/{dictionary-D3gr2Ala.js → builder-vFphFQMU.js} +16 -19
package/dist/builder-vFphFQMU.js.map +1 -0
package/dist/cli.mjs +1 -1
package/dist/core/binary-payload.d.ts +6 -0
package/dist/core/comp/latin1-compressor.d.ts +9 -0
package/dist/core/comp/onebyte-encoder.d.ts +2 -0
package/dist/core/index.d.ts +58 -0
package/dist/data/wordbin-v1-bip39.json +13 -11
package/dist/{dictionary.d.ts → dict/builder.d.ts} +1 -1
package/dist/{dictionary-loader.d.ts → dict/dictionary-loader.d.ts} +1 -1
package/dist/index.d.ts +3 -3
package/dist/index.mjs +425 -142
package/dist/index.mjs.map +1 -1
package/dist/types.d.ts +7 -3
package/package.json +6 -2
package/dist/core.d.ts +0 -19
package/dist/dictionary-D3gr2Ala.js.map +0 -1

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { b as buildDictionary, t as toBase64, a as toHex, g as generateWordId, u as utf8Encode, e as encodeVarint, f as fromBase64, d as decodeVarint, c as utf8Decode } from "./dictionary-D3gr2Ala.js";
+import { b as buildDictionary, t as toBase64, u as utf8Encode, e as encodeVarint, d as decodeVarint, a as utf8Decode, c as toHex } from "./builder-vFphFQMU.js";
 import fs from "fs/promises";
 import path from "path";
 import { fileURLToPath } from "url";
@@ -81,225 +81,508 @@ async function loadLatestDictionary() {
   );
   return loadDictionaryByVersion(latestVersion);
 }
+function base(ALPHABET2) {
+  if (ALPHABET2.length >= 255) {
+    throw new TypeError("Alphabet too long");
+  }
+  const BASE_MAP = new Uint8Array(256);
+  for (let j = 0; j < BASE_MAP.length; j++) {
+    BASE_MAP[j] = 255;
+  }
+  for (let i = 0; i < ALPHABET2.length; i++) {
+    const x = ALPHABET2.charAt(i);
+    const xc = x.charCodeAt(0);
+    if (BASE_MAP[xc] !== 255) {
+      throw new TypeError(x + " is ambiguous");
+    }
+    BASE_MAP[xc] = i;
+  }
+  const BASE = ALPHABET2.length;
+  const LEADER = ALPHABET2.charAt(0);
+  const FACTOR = Math.log(BASE) / Math.log(256);
+  const iFACTOR = Math.log(256) / Math.log(BASE);
+  function encode(source) {
+    if (source instanceof Uint8Array) ;
+    else if (ArrayBuffer.isView(source)) {
+      source = new Uint8Array(source.buffer, source.byteOffset, source.byteLength);
+    } else if (Array.isArray(source)) {
+      source = Uint8Array.from(source);
+    }
+    if (!(source instanceof Uint8Array)) {
+      throw new TypeError("Expected Uint8Array");
+    }
+    if (source.length === 0) {
+      return "";
+    }
+    let zeroes = 0;
+    let length = 0;
+    let pbegin = 0;
+    const pend = source.length;
+    while (pbegin !== pend && source[pbegin] === 0) {
+      pbegin++;
+      zeroes++;
+    }
+    const size = (pend - pbegin) * iFACTOR + 1 >>> 0;
+    const b58 = new Uint8Array(size);
+    while (pbegin !== pend) {
+      let carry = source[pbegin];
+      let i = 0;
+      for (let it1 = size - 1; (carry !== 0 || i < length) && it1 !== -1; it1--, i++) {
+        carry += 256 * b58[it1] >>> 0;
+        b58[it1] = carry % BASE >>> 0;
+        carry = carry / BASE >>> 0;
+      }
+      if (carry !== 0) {
+        throw new Error("Non-zero carry");
+      }
+      length = i;
+      pbegin++;
+    }
+    let it2 = size - length;
+    while (it2 !== size && b58[it2] === 0) {
+      it2++;
+    }
+    let str = LEADER.repeat(zeroes);
+    for (; it2 < size; ++it2) {
+      str += ALPHABET2.charAt(b58[it2]);
+    }
+    return str;
+  }
+  function decodeUnsafe(source) {
+    if (typeof source !== "string") {
+      throw new TypeError("Expected String");
+    }
+    if (source.length === 0) {
+      return new Uint8Array();
+    }
+    let psz = 0;
+    let zeroes = 0;
+    let length = 0;
+    while (source[psz] === LEADER) {
+      zeroes++;
+      psz++;
+    }
+    const size = (source.length - psz) * FACTOR + 1 >>> 0;
+    const b256 = new Uint8Array(size);
+    while (psz < source.length) {
+      const charCode = source.charCodeAt(psz);
+      if (charCode > 255) {
+        return;
+      }
+      let carry = BASE_MAP[charCode];
+      if (carry === 255) {
+        return;
+      }
+      let i = 0;
+      for (let it3 = size - 1; (carry !== 0 || i < length) && it3 !== -1; it3--, i++) {
+        carry += BASE * b256[it3] >>> 0;
+        b256[it3] = carry % 256 >>> 0;
+        carry = carry / 256 >>> 0;
+      }
+      if (carry !== 0) {
+        throw new Error("Non-zero carry");
+      }
+      length = i;
+      psz++;
+    }
+    let it4 = size - length;
+    while (it4 !== size && b256[it4] === 0) {
+      it4++;
+    }
+    const vch = new Uint8Array(zeroes + (size - it4));
+    let j = zeroes;
+    while (it4 !== size) {
+      vch[j++] = b256[it4++];
+    }
+    return vch;
+  }
+  function decode(string) {
+    const buffer = decodeUnsafe(string);
+    if (buffer) {
+      return buffer;
+    }
+    throw new Error("Non-base" + BASE + " character");
+  }
+  return {
+    encode,
+    decodeUnsafe,
+    decode
+  };
+}
+var ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
+const bs58 = base(ALPHABET);
+function bytesToHex(bytes) {
+  return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
+}
+function detectAndConvert(payload) {
+  if (/^[0-9a-fA-F]+$/.test(payload) && payload.length % 2 === 0) {
+    const bytes2 = Uint8Array.from(
+      payload.match(/.{1,2}/g).map((h) => parseInt(h, 16))
+    );
+    return { buffer: bytes2, detectedFormat: "hex" };
+  }
+  const base58Re = /^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$/;
+  if (base58Re.test(payload)) {
+    try {
+      return { buffer: bs58.decode(payload), detectedFormat: "base58" };
+    } catch {
+    }
+  }
+  const b64Re = /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})$/;
+  const b64urlRe = /^(?:[A-Za-z0-9\-_]{4})*(?:[A-Za-z0-9\-_]{2}(?:==)?|[A-Za-z0-9\-_]{3}=?|[A-Za-z0-9\-_]{4})$/;
+  const norm = payload.replace(/-/g, "+").replace(/_/g, "/");
+  const padded = norm + (norm.length % 4 ? "=".repeat(4 - norm.length % 4) : "");
+  if (b64Re.test(payload) || b64urlRe.test(payload)) {
+    try {
+      const bin = atob(padded);
+      return {
+        buffer: Uint8Array.from(bin, (c) => c.charCodeAt(0)),
+        detectedFormat: "base64"
+      };
+    } catch {
+    }
+  }
+  const bytes = new Uint8Array(payload.length);
+  for (let i = 0; i < payload.length; i++) bytes[i] = payload.charCodeAt(i);
+  return { buffer: bytes, detectedFormat: "bin21" };
+}
 class WordBin {
   constructor(initialDict, options) {
-    this.primaryDictVersion = initialDict?.version ?? 2;
+    this.primaryDictVersion = initialDict?.version ?? 1;
     this.log = options?.debug ? (...args) => console.log("[WordBin]", ...args) : () => {
     };
   }
   static async createFromWords(words) {
-    console.warn("Building dictionary from scratch – consider pre-built files");
-    const dict = await buildDictionary(words);
-    return new WordBin(dict);
+    console.warn(
+      "Building dictionary from scratch – consider using pre-built files"
+    );
+    return new WordBin(await buildDictionary(words));
   }
   static async createFromJson(dictJson) {
     return new WordBin(dictJson);
   }
   static async create(options) {
-    const latestDict = await loadLatestDictionary();
-    return new WordBin(latestDict, options);
+    return new WordBin(await loadLatestDictionary(), options);
   }
-  async getReverseMapForVersion(version) {
+  async getMapsForVersion(version) {
     const dict = await loadDictionaryByVersion(version);
     const reverseMap = /* @__PURE__ */ new Map();
+    const forwardMap = /* @__PURE__ */ new Map();
+    const idLengths = /* @__PURE__ */ new Set();
     for (const [hex, words] of Object.entries(dict.words)) {
-      if (words.length > 0) reverseMap.set(hex, words[0]);
+      if (!words.length) continue;
+      if (words.length > 1) {
+        throw new Error(
+          `Dictionary corruption: ID ${hex} maps to multiple words`
+        );
+      }
+      const word = words[0];
+      const bytes = Buffer.from(hex, "hex");
+      idLengths.add(bytes.length);
+      reverseMap.set(hex, word);
+      forwardMap.set(word, bytes);
     }
-    return reverseMap;
+    return {
+      reverseMap,
+      forwardMap,
+      sortedIdLengths: Array.from(idLengths).sort((a, b) => b - a)
+    };
   }
-  async encode(text, options = {}) {
+  // ── encode ──────────────────────────────────────────────────────────────────
+  async encode(text, options) {
     let textStr;
-    if (typeof text === "string") textStr = text;
-    else if (text instanceof Uint8Array) textStr = toBase64(text);
-    else textStr = text.encodedBase64;
-    if (!textStr.trim()) {
+    if (typeof text === "string") {
+      textStr = text;
+    } else if (text instanceof Uint8Array) {
+      textStr = toBase64(text);
+    } else {
+      textStr = text.base64Payload;
+    }
+    const trimmed = textStr.trim();
+    if (!trimmed) {
       return {
         originalText: "",
-        dictVersion: 0,
+        dictVersion: this.primaryDictVersion,
         encoded: new Uint8Array(0),
         payload: "",
-        encodedBase64: "",
+        bin21: "",
+        bin21Payload: "",
+        base64Payload: "",
+        hexPayload: "",
+        base58Payload: "",
         originalBytes: 0,
         encodedBytes: 0,
         bytesSaved: 0,
         ratioPercent: 100
       };
     }
-    const words = textStr.split(/\s+/).filter(Boolean);
-    this.log(`[encode] Input words (${words.length}):`, words);
-    const useVersion = options.dictVersion ?? this.primaryDictVersion;
-    this.log(`[encode] Using dictionary version: ${useVersion}`);
-    const header = new Uint8Array([useVersion]);
-    this.log(`[encode] Header bytes: [${[...header].join(", ")}]`);
-    this.log(`[encode] Header hex: ${toHex(header)}`);
-    const chunks = [header];
-    const reverseMap = await this.getReverseMapForVersion(useVersion);
-    this.log(`[encode] Reverse map loaded — size: ${reverseMap.size} entries`);
-    this.log("[encode] Word → ID mapping:");
-    for (const w of words) {
-      const id = await generateWordId(w);
-      const key = toHex(id);
-      this.log(`  "${w}" → ID bytes: [${[...id].join(", ")}] | hex: ${key}`);
-      if (reverseMap.has(key)) {
-        reverseMap.get(key);
-        this.log(`    → Found in dictionary → using ${id.length}-byte ID`);
+    const useVersion = options?.dictVersion ?? this.primaryDictVersion;
+    const { forwardMap } = await this.getMapsForVersion(useVersion);
+    const chunks = [new Uint8Array([useVersion])];
+    for (const w of trimmed.split(/\s+/).filter(Boolean)) {
+      const id = forwardMap.get(w);
+      if (id) {
         chunks.push(id);
       } else {
         const utf8 = utf8Encode(w);
         const lenVarint = encodeVarint(utf8.length);
-        this.log(`    → NOT in dictionary → literal mode`);
-        this.log(
-          `      Literal length varint bytes: [${[...lenVarint].join(", ")}] (value = ${utf8.length})`
-        );
-        this.log(`      Word UTF-8 bytes length: ${utf8.length}`);
         const out = new Uint8Array(1 + lenVarint.length + utf8.length);
         out[0] = LITERAL;
         out.set(lenVarint, 1);
         out.set(utf8, 1 + lenVarint.length);
-        this.log(`      Literal chunk bytes: [${[...out].join(", ")}]`);
         chunks.push(out);
       }
     }
-    const totalLength = chunks.reduce((n, c) => n + c.length, 0);
+    const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
     const result = new Uint8Array(totalLength);
-    this.log(`[encode] Total encoded length: ${totalLength} bytes`);
-    let off = 0;
-    chunks.forEach((chunk, i) => {
-      result.set(chunk, off);
-      off += chunk.length;
-      this.log(
-        `  Chunk ${i}: ${chunk.length} bytes → offset ${off - chunk.length}`
-      );
-    });
-    this.log(
-      `[encode] Final encoded bytes (first 32): [${[...result.subarray(0, Math.min(32, result.length))].join(", ")}]`
-    );
+    let offset = 0;
+    for (const chunk of chunks) {
+      result.set(chunk, offset);
+      offset += chunk.length;
+    }
     const originalBytes = new TextEncoder().encode(textStr).length;
-    const base64Result = toBase64(result);
-    this.log(`[encode] Base64 starts with: ${base64Result.slice(0, 12)}...`);
+    const hexPayload = bytesToHex(result);
+    const bin21Payload = Array.from(result).map((b) => String.fromCharCode(b)).join("");
+    const base64Payload = toBase64(result);
+    const base58Payload = bs58.encode(result);
     return {
       originalText: textStr,
-      dictVersion: result[0],
+      dictVersion: useVersion,
       encoded: result,
-      payload: base64Result,
-      encodedBase64: base64Result,
+      bin21: bin21Payload,
+      payload: bin21Payload,
+      bin21Payload,
+      hexPayload,
+      base64Payload,
+      base58Payload,
       originalBytes,
-      encodedBytes: totalLength,
-      bytesSaved: originalBytes - totalLength,
-      ratioPercent: totalLength === 0 ? 100 : Math.round(totalLength / originalBytes * 100)
+      encodedBytes: bin21Payload.length,
+      bytesSaved: originalBytes - bin21Payload.length,
+      ratioPercent: Math.round(bin21Payload.length / originalBytes * 1e4) / 100
     };
   }
-  async decode(data) {
+  // ── decode ───────────────────────────────────────────────────────────────────
+  /**
+   * Decodes any supported payload format back to human-readable text.
+   *
+   * For valid WordBin payloads:  returns the exact original words.
+   * For non-WordBin payloads:    scans byte-by-byte, extracts dictionary words
+   *                               wherever possible, and preserves unrecognised
+   *                               bytes as "[0xXX]" markers.
+   */
+  async decode(payload) {
     let buffer;
-    if (typeof data === "string") {
-      this.log(
-        `[decode] Input is base64 string: "${data.substring(0, 20)}..."`
-      );
-      buffer = fromBase64(data);
-      this.log(`[decode] Decoded to ${buffer.length} bytes`);
+    let detectedFormat;
+    if (payload instanceof Uint8Array) {
+      buffer = payload;
+      detectedFormat = "bytes";
     } else {
-      buffer = data;
-      this.log(`[decode] Input is Uint8Array with ${buffer.length} bytes`);
+      ({ buffer, detectedFormat } = detectAndConvert(payload));
     }
-    this.log(`[decode] Full buffer hex: ${toHex(buffer)}`);
     this.log(
-      `[decode] First 16 bytes: [${[...buffer.subarray(0, Math.min(16, buffer.length))].join(", ")}]`
+      `[decode] format=${detectedFormat} bufLen=${buffer.length} firstBytes=[${Array.from(buffer.slice(0, 8)).join(",")}]`
     );
     if (buffer.length < 1) {
-      throw new Error("Data too short");
-    }
-    const version = buffer[0];
-    this.log(`[decode] Dictionary version from header: ${version}`);
-    if (version < 1 || version > 100) {
-      this.log(`[decode] Warning: unusual dictionary version ${version}`);
+      return {
+        text: "",
+        isWordBin: false,
+        detectedFormat,
+        notice: "Payload is empty — nothing to decode."
+      };
     }
-    let pos = 1;
-    this.log(`[decode] Starting decode at position ${pos}`);
-    const reverseMap = await this.getReverseMapForVersion(version);
+    const availableVersions = await getAllAvailableDictionaryVersions();
+    const versionByte = buffer[0];
+    const versionIsHeader = availableVersions.includes(versionByte);
     this.log(
-      `[decode] Reverse map loaded for v${version} — size: ${reverseMap.size} entries`
+      `[decode] availableVersions=[${availableVersions.join(",")}] versionByte=${versionByte} isKnownHeader=${versionIsHeader}`
     );
-    this.log(`[decode] ===== STARTING DECODE LOOP =====`);
-    const result = [];
-    const decoded = this.tryDecode(pos, buffer, reverseMap, result, 0);
-    if (decoded === null) {
-      throw new Error(
-        "No valid decode path found — possible corruption or dictionary mismatch"
+    const tryOrder = versionIsHeader ? [versionByte, ...availableVersions.filter((v) => v !== versionByte)] : [...availableVersions];
+    for (const ver of tryOrder) {
+      let maps;
+      try {
+        maps = await this.getMapsForVersion(ver);
+      } catch (err) {
+        this.log(`[decode] v${ver}: getMapsForVersion threw — ${err}`);
+        continue;
+      }
+      const { reverseMap, sortedIdLengths } = maps;
+      const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths) ?? this.tryDecode(1, buffer, reverseMap, [], 0, sortedIdLengths);
+      this.log(
+        `[decode] v${ver} strict(pos=1): ${r1 !== null ? `"${r1}"` : "null"}`
       );
+      if (r1 !== null) {
+        const notice2 = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
+        return { text: r1, isWordBin: true, detectedFormat, notice: notice2 };
+      }
+      const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths) ?? this.tryDecode(0, buffer, reverseMap, [], 0, sortedIdLengths);
+      this.log(
+        `[decode] v${ver} strict(pos=0): ${r0 !== null ? `"${r0}"` : "null"}`
+      );
+      if (r0 !== null) {
+        return {
+          text: r0,
+          isWordBin: true,
+          detectedFormat,
+          notice: `Payload had no version header. Decoded using dictionary v${ver}.`
+        };
+      }
     }
-    this.log(`
-[decode] ===== DECODE COMPLETE =====`);
-    this.log(`[decode] Total words decoded: ${result.length}`);
-    this.log(`[decode] Final result: "${decoded}"`);
-    return decoded;
+    this.log(`[decode] strict parse failed — falling back to partial scan`);
+    if (availableVersions.length > 0) {
+      const scanVersion = availableVersions[availableVersions.length - 1];
+      try {
+        const { reverseMap, sortedIdLengths } = await this.getMapsForVersion(scanVersion);
+        const scan1 = this.partialScan(buffer, 1, reverseMap, sortedIdLengths);
+        const scan0 = this.partialScan(buffer, 0, reverseMap, sortedIdLengths);
+        const best = scan1.wordCount >= scan0.wordCount ? scan1 : scan0;
+        this.log(
+          `[decode] partial scan(pos=1) words=${scan1.wordCount} raw=${scan1.rawSegments.length} | scan(pos=0) words=${scan0.wordCount} raw=${scan0.rawSegments.length}`
+        );
+        const notice2 = `This does not appear to be a valid WordBin payload. Partial scan using dictionary v${scanVersion} extracted ${best.wordCount} word(s); ${best.rawSegments.length} byte sequence(s) had no dictionary match and are shown as [0xXX] markers.`;
+        return {
+          text: best.text,
+          isWordBin: false,
+          detectedFormat,
+          rawSegments: best.rawSegments,
+          notice: notice2
+        };
+      } catch {
+      }
+    }
+    const notice = `Could not decode with any available dictionary (tried: ${availableVersions.join(", ") || "none"}). Falling back to UTF-8 text decoding.`;
+    this.log(`[decode] ${notice}`);
+    return {
+      text: new TextDecoder("utf-8", { fatal: false }).decode(buffer),
+      isWordBin: false,
+      detectedFormat,
+      notice
+    };
   }
-  tryDecode(pos, buffer, reverseMap, result, depth) {
-    const indent = "  ".repeat(depth);
-    this.log(`${indent}[tryDecode] At position ${pos} (depth ${depth})`);
-    if (pos === buffer.length) {
-      this.log(`${indent}[tryDecode] Reached end successfully`);
-      return result.join(" ");
-    }
-    const previewLen = Math.min(8, buffer.length - pos);
-    const preview = [...buffer.subarray(pos, pos + previewLen)].map((b) => `0x${b.toString(16).padStart(2, "0")}`).join(" ");
-    this.log(`${indent}[tryDecode] Next ${previewLen} bytes: ${preview}`);
+  // ── Private: greedy linear decode ────────────────────────────────────────────
+  /**
+   * O(n) longest-match-first decode. Returns null if any byte has no match.
+   * This is the fast path; tryDecode is used as a backtracking fallback.
+   */
+  greedyDecode(buffer, startPos, reverseMap, sortedIdLengths) {
+    const words = [];
+    let pos = startPos;
+    while (pos < buffer.length) {
+      if (buffer[pos] === LITERAL) {
+        const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
+        if (byteLen > 1e6 || byteLen < 0) return null;
+        const start = pos + 1 + bytesRead;
+        const end = start + byteLen;
+        if (end > buffer.length) return null;
+        words.push(utf8Decode(buffer.subarray(start, end)));
+        pos = end;
+        continue;
+      }
+      let matched = false;
+      for (const len of sortedIdLengths) {
+        if (pos + len > buffer.length) continue;
+        const key = toHex(buffer.subarray(pos, pos + len));
+        if (reverseMap.has(key)) {
+          words.push(reverseMap.get(key));
+          pos += len;
+          matched = true;
+          break;
+        }
+      }
+      if (!matched) return null;
+    }
+    return words.join(" ");
+  }
+  // ── Private: partial / best-effort scan ──────────────────────────────────────
+  /**
+   * Scans through the buffer extracting any recognised dictionary words.
+   * Unrecognised bytes are collected as raw segments and rendered as [0xXX].
+   * Always consumes the entire buffer — never returns null.
+   */
+  partialScan(buffer, startPos, reverseMap, sortedIdLengths) {
+    const parts = [];
+    const rawSegments = [];
+    let wordCount = 0;
+    let pos = startPos;
+    while (pos < buffer.length) {
+      if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
+        try {
+          const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
+          if (byteLen > 0 && byteLen <= 1e6) {
+            const start = pos + 1 + bytesRead;
+            const end = start + byteLen;
+            if (end <= buffer.length) {
+              const word = utf8Decode(buffer.subarray(start, end));
+              parts.push(word);
+              wordCount++;
+              pos = end;
+              continue;
+            }
+          }
+        } catch {
+        }
+      }
+      let matched = false;
+      for (const len of sortedIdLengths) {
+        if (pos + len > buffer.length) continue;
+        const key = toHex(buffer.subarray(pos, pos + len));
+        if (reverseMap.has(key)) {
+          parts.push(reverseMap.get(key));
+          wordCount++;
+          pos += len;
+          matched = true;
+          break;
+        }
+      }
+      if (!matched) {
+        const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
+        parts.push(marker);
+        rawSegments.push(marker);
+        this.log(
+          `[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`
+        );
+        pos++;
+      }
+    }
+    return { text: parts.join(" "), wordCount, rawSegments };
+  }
+  // ── Private: backtracking decode ─────────────────────────────────────────────
+  tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
+    if (pos === buffer.length) return result.join(" ");
     if (buffer[pos] === LITERAL) {
-      this.log(
-        `${indent}[tryDecode] Found LITERAL marker (0x${LITERAL.toString(16)})`
-      );
       const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
-      this.log(
-        `${indent}[tryDecode] Varint: value=${byteLen}, bytesRead=${bytesRead}`
-      );
+      if (byteLen > 1e6 || byteLen < 0) return null;
       const start = pos + 1 + bytesRead;
       const end = start + byteLen;
-      this.log(
-        `${indent}[tryDecode] Literal range: [${start}..${end}) (${byteLen} bytes)`
+      if (end > buffer.length) return null;
+      result.push(utf8Decode(buffer.subarray(start, end)));
+      const res = this.tryDecode(
+        end,
+        buffer,
+        reverseMap,
+        result,
+        depth + 1,
+        sortedIdLengths
       );
-      if (end > buffer.length) {
-        this.log(`${indent}[tryDecode] Truncated literal — failing path`);
-        return null;
-      }
-      const literalBytes = buffer.subarray(start, end);
-      const word = utf8Decode(literalBytes);
-      this.log(`${indent}[tryDecode] Decoded literal: "${word}"`);
-      result.push(word);
-      const res = this.tryDecode(end, buffer, reverseMap, result, depth + 1);
       if (res !== null) return res;
       result.pop();
-      this.log(`${indent}[tryDecode] Backtracking from literal`);
-      return null;
     }
-    for (const len of [4, 3, 2]) {
-      if (pos + len > buffer.length) {
-        this.log(`${indent}[tryDecode] Skipping ${len}-byte (would exceed)`);
-        continue;
-      }
-      const slice = buffer.subarray(pos, pos + len);
-      const key = toHex(slice);
-      const keyBytes = [...slice].map((b) => `0x${b.toString(16).padStart(2, "0")}`).join(" ");
-      this.log(
-        `${indent}[tryDecode] Trying ${len}-byte: [${keyBytes}] hex=${key}`
-      );
+    for (const len of sortedIdLengths) {
+      if (pos + len > buffer.length) continue;
+      const key = toHex(buffer.subarray(pos, pos + len));
       if (reverseMap.has(key)) {
-        const word = reverseMap.get(key);
-        this.log(`${indent}[tryDecode] Match: "${word}" (ID: ${key})`);
-        result.push(word);
+        result.push(reverseMap.get(key));
         const res = this.tryDecode(
           pos + len,
           buffer,
           reverseMap,
           result,
-          depth + 1
+          depth + 1,
+          sortedIdLengths
         );
         if (res !== null) return res;
         result.pop();
-        this.log(`${indent}[tryDecode] Backtracking from "${word}"`);
-      } else {
-        this.log(`${indent}[tryDecode] No match for ${key}`);
       }
     }
-    this.log(`${indent}[tryDecode] No valid branches — failing path`);
     return null;
   }
 }