npm - @bigdreamsweb3/wordbin - Versions diffs - 1.2.0 → 1.3.0 - Mend

@bigdreamsweb3/wordbin 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CONTRIBUTING.md +18 -18
package/README.md +33 -31
package/dist/builder-vFphFQMU.js.map +1 -1
package/dist/cli.mjs +3 -3
package/dist/cli.mjs.map +1 -1
package/dist/core/format-detection.d.ts +5 -0
package/dist/core/helpers.d.ts +1 -0
package/dist/core/index.d.ts +3 -32
package/dist/data/dict-v1-bip39.json +2054 -0
package/dist/index.mjs +233 -179
package/dist/index.mjs.map +1 -1
package/dist/types.d.ts +1 -3
package/package.json +2 -1
package/dist/core/binary-payload.d.ts +0 -6
package/dist/core/comp/latin1-compressor.d.ts +0 -9
package/dist/core/comp/onebyte-encoder.d.ts +0 -2
package/dist/data/wordbin-v1-bip39.json +0 -6150

package/dist/index.mjs CHANGED Viewed

@@ -29,7 +29,7 @@ async function getAllAvailableDictionaryVersions() {
     try {
       const files = await fs.readdir(dir);
       for (const file of files) {
-        const match = file.match(/wordbin-v(\d+)/i);
+        const match = file.match(/dict-v(\d+)/i);
         if (match) {
           versions.add(parseInt(match[1], 10));
         }
@@ -49,7 +49,7 @@ async function loadDictionaryByVersion(version) {
   for (const dir of dirs) {
     const files = await fs.readdir(dir);
     const versionFile = files.find(
-      (f) => f.match(new RegExp(`wordbin-v${version}(?:\\.|-)`, "i"))
+      (f) => f.match(new RegExp(`dict-v${version}(?:\\.|-)`, "i"))
     );
     if (versionFile) {
       const filePath = path.join(dir, versionFile);
@@ -211,15 +211,12 @@ function base(ALPHABET2) {
 }
 var ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
 const bs58 = base(ALPHABET);
-function bytesToHex(bytes) {
-  return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
-}
 function detectAndConvert(payload) {
   if (/^[0-9a-fA-F]+$/.test(payload) && payload.length % 2 === 0) {
-    const bytes2 = Uint8Array.from(
+    const bytes = Uint8Array.from(
       payload.match(/.{1,2}/g).map((h) => parseInt(h, 16))
     );
-    return { buffer: bytes2, detectedFormat: "hex" };
+    return { buffer: bytes, detectedFormat: "hex" };
   }
   const base58Re = /^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$/;
   if (base58Re.test(payload)) {
@@ -234,17 +231,19 @@ function detectAndConvert(payload) {
   const padded = norm + (norm.length % 4 ? "=".repeat(4 - norm.length % 4) : "");
   if (b64Re.test(payload) || b64urlRe.test(payload)) {
     try {
-      const bin = atob(padded);
+      const bin2 = atob(padded);
       return {
-        buffer: Uint8Array.from(bin, (c) => c.charCodeAt(0)),
+        buffer: Uint8Array.from(bin2, (c) => c.charCodeAt(0)),
         detectedFormat: "base64"
       };
     } catch {
     }
   }
-  const bytes = new Uint8Array(payload.length);
-  for (let i = 0; i < payload.length; i++) bytes[i] = payload.charCodeAt(i);
-  return { buffer: bytes, detectedFormat: "bin21" };
+  const bin = Array.from(payload).map((c) => c.charCodeAt(0));
+  return { buffer: Uint8Array.from(bin), detectedFormat: "bytes" };
+}
+function bytesToHex(bytes) {
+  return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
 }
 class WordBin {
   constructor(initialDict, options) {
@@ -288,7 +287,69 @@ class WordBin {
       sortedIdLengths: Array.from(idLengths).sort((a, b) => b - a)
     };
   }
-  // ── encode ──────────────────────────────────────────────────────────────────
+  tryRecoverWordsFromHex(hex, reverseMap, sortedIdLengths) {
+    const bytes = Buffer.from(hex, "hex");
+    const recovered = this.greedyDecode(bytes, 0, reverseMap, sortedIdLengths);
+    if (recovered && recovered.trim().length > 0) {
+      return recovered;
+    }
+    return null;
+  }
+  validateDecodedWords(text, forwardMap, reverseMap, sortedIdLengths) {
+    const parts = [];
+    const rawSegments = [];
+    const tokens = text.match(/[a-zA-Z]+|[^\w\s]+|\d+|\s+/g) || [];
+    for (const token of tokens) {
+      if (/^\s+$/.test(token)) {
+        parts.push(token);
+        continue;
+      }
+      if (/^[a-zA-Z]+$/.test(token)) {
+        const normalized = token.toLowerCase();
+        if (forwardMap.has(normalized)) {
+          parts.push(normalized);
+          continue;
+        }
+        const hex2 = bytesToHex(new TextEncoder().encode(token));
+        const recovered2 = this.tryRecoverWordsFromHex(
+          hex2,
+          reverseMap,
+          sortedIdLengths
+        );
+        if (recovered2) {
+          parts.push(recovered2);
+        } else {
+          const raw = `[hex:${hex2}]`;
+          parts.push(raw);
+          rawSegments.push(raw);
+        }
+        continue;
+      }
+      if (/^[^\w\s]+$/.test(token)) {
+        const raw = `[raw:${token}]`;
+        parts.push(raw);
+        rawSegments.push(raw);
+        continue;
+      }
+      const hex = bytesToHex(new TextEncoder().encode(token));
+      const recovered = this.tryRecoverWordsFromHex(
+        hex,
+        reverseMap,
+        sortedIdLengths
+      );
+      if (recovered) {
+        parts.push(recovered);
+      } else {
+        const raw = `[hex:${hex}]`;
+        parts.push(raw);
+        rawSegments.push(raw);
+      }
+    }
+    return {
+      text: parts.join(""),
+      rawSegments
+    };
+  }
   async encode(text, options) {
     let textStr;
     if (typeof text === "string") {
@@ -305,8 +366,6 @@ class WordBin {
         dictVersion: this.primaryDictVersion,
         encoded: new Uint8Array(0),
         payload: "",
-        bin21: "",
-        bin21Payload: "",
         base64Payload: "",
         hexPayload: "",
         base58Payload: "",
@@ -342,34 +401,23 @@ class WordBin {
     }
     const originalBytes = new TextEncoder().encode(textStr).length;
     const hexPayload = bytesToHex(result);
-    const bin21Payload = Array.from(result).map((b) => String.fromCharCode(b)).join("");
     const base64Payload = toBase64(result);
     const base58Payload = bs58.encode(result);
+    const encodedBytes = Math.floor(hexPayload.length / 2);
     return {
       originalText: textStr,
       dictVersion: useVersion,
       encoded: result,
-      bin21: bin21Payload,
-      payload: bin21Payload,
-      bin21Payload,
+      payload: hexPayload,
       hexPayload,
       base64Payload,
       base58Payload,
       originalBytes,
-      encodedBytes: bin21Payload.length,
-      bytesSaved: originalBytes - bin21Payload.length,
-      ratioPercent: Math.round(bin21Payload.length / originalBytes * 1e4) / 100
+      encodedBytes,
+      bytesSaved: originalBytes - encodedBytes,
+      ratioPercent: Math.round(encodedBytes / originalBytes * 1e4) / 100
     };
   }
-  // ── decode ───────────────────────────────────────────────────────────────────
-  /**
-   * Decodes any supported payload format back to human-readable text.
-   *
-   * For valid WordBin payloads:  returns the exact original words.
-   * For non-WordBin payloads:    scans byte-by-byte, extracts dictionary words
-   *                               wherever possible, and preserves unrecognised
-   *                               bytes as "[0xXX]" markers.
-   */
   async decode(payload) {
     let buffer;
     let detectedFormat;
@@ -401,124 +449,73 @@ class WordBin {
       let maps;
       try {
         maps = await this.getMapsForVersion(ver);
-      } catch (err) {
-        this.log(`[decode] v${ver}: getMapsForVersion threw — ${err}`);
+      } catch {
         continue;
       }
       const { reverseMap, sortedIdLengths } = maps;
-      const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths) ?? this.tryDecode(1, buffer, reverseMap, [], 0, sortedIdLengths);
-      this.log(
-        `[decode] v${ver} strict(pos=1): ${r1 !== null ? `"${r1}"` : "null"}`
-      );
+      const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths);
       if (r1 !== null) {
-        const notice2 = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
-        return { text: r1, isWordBin: true, detectedFormat, notice: notice2 };
+        const notice = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
+        return { text: r1, isWordBin: true, detectedFormat, notice };
       }
-      const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths) ?? this.tryDecode(0, buffer, reverseMap, [], 0, sortedIdLengths);
-      this.log(
-        `[decode] v${ver} strict(pos=0): ${r0 !== null ? `"${r0}"` : "null"}`
-      );
+      const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths);
       if (r0 !== null) {
         return {
           text: r0,
-          isWordBin: true,
+          isWordBin: false,
           detectedFormat,
           notice: `Payload had no version header. Decoded using dictionary v${ver}.`
         };
       }
     }
-    this.log(`[decode] strict parse failed — falling back to partial scan`);
-    if (availableVersions.length > 0) {
-      const scanVersion = availableVersions[availableVersions.length - 1];
-      try {
-        const { reverseMap, sortedIdLengths } = await this.getMapsForVersion(scanVersion);
-        const scan1 = this.partialScan(buffer, 1, reverseMap, sortedIdLengths);
-        const scan0 = this.partialScan(buffer, 0, reverseMap, sortedIdLengths);
-        const best = scan1.wordCount >= scan0.wordCount ? scan1 : scan0;
-        this.log(
-          `[decode] partial scan(pos=1) words=${scan1.wordCount} raw=${scan1.rawSegments.length} | scan(pos=0) words=${scan0.wordCount} raw=${scan0.rawSegments.length}`
-        );
-        const notice2 = `This does not appear to be a valid WordBin payload. Partial scan using dictionary v${scanVersion} extracted ${best.wordCount} word(s); ${best.rawSegments.length} byte sequence(s) had no dictionary match and are shown as [0xXX] markers.`;
-        return {
-          text: best.text,
-          isWordBin: false,
-          detectedFormat,
-          rawSegments: best.rawSegments,
-          notice: notice2
-        };
-      } catch {
-      }
+    this.log(`[decode] strict parse failed — falling back to UTF-8 validation`);
+    const utf8Text = new TextDecoder("utf-8", { fatal: false }).decode(buffer);
+    try {
+      const latest = availableVersions[availableVersions.length - 1];
+      const { forwardMap, reverseMap, sortedIdLengths } = await this.getMapsForVersion(latest);
+      const validated = this.validateDecodedWords(
+        utf8Text,
+        forwardMap,
+        reverseMap,
+        sortedIdLengths
+      );
+      return {
+        text: validated.text,
+        isWordBin: false,
+        detectedFormat,
+        rawSegments: validated.rawSegments,
+        notice: "Payload is not WordBin. UTF-8 text was recovered and dictionary validation applied."
+      };
+    } catch {
+      return {
+        text: utf8Text,
+        isWordBin: false,
+        detectedFormat,
+        notice: "Payload decoded as plain UTF-8 text."
+      };
     }
-    const notice = `Could not decode with any available dictionary (tried: ${availableVersions.join(", ") || "none"}). Falling back to UTF-8 text decoding.`;
-    this.log(`[decode] ${notice}`);
-    return {
-      text: new TextDecoder("utf-8", { fatal: false }).decode(buffer),
-      isWordBin: false,
-      detectedFormat,
-      notice
-    };
   }
-  // ── Private: greedy linear decode ────────────────────────────────────────────
-  /**
-   * O(n) longest-match-first decode. Returns null if any byte has no match.
-   * This is the fast path; tryDecode is used as a backtracking fallback.
-   */
   greedyDecode(buffer, startPos, reverseMap, sortedIdLengths) {
     const words = [];
     let pos = startPos;
     while (pos < buffer.length) {
       if (buffer[pos] === LITERAL) {
-        const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
-        if (byteLen > 1e6 || byteLen < 0) return null;
-        const start = pos + 1 + bytesRead;
-        const end = start + byteLen;
-        if (end > buffer.length) return null;
-        words.push(utf8Decode(buffer.subarray(start, end)));
-        pos = end;
-        continue;
-      }
-      let matched = false;
-      for (const len of sortedIdLengths) {
-        if (pos + len > buffer.length) continue;
-        const key = toHex(buffer.subarray(pos, pos + len));
-        if (reverseMap.has(key)) {
-          words.push(reverseMap.get(key));
-          pos += len;
-          matched = true;
-          break;
-        }
-      }
-      if (!matched) return null;
-    }
-    return words.join(" ");
-  }
-  // ── Private: partial / best-effort scan ──────────────────────────────────────
-  /**
-   * Scans through the buffer extracting any recognised dictionary words.
-   * Unrecognised bytes are collected as raw segments and rendered as [0xXX].
-   * Always consumes the entire buffer — never returns null.
-   */
-  partialScan(buffer, startPos, reverseMap, sortedIdLengths) {
-    const parts = [];
-    const rawSegments = [];
-    let wordCount = 0;
-    let pos = startPos;
-    while (pos < buffer.length) {
-      if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
+        let byteLen;
+        let bytesRead;
         try {
-          const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
-          if (byteLen > 0 && byteLen <= 1e6) {
-            const start = pos + 1 + bytesRead;
-            const end = start + byteLen;
-            if (end <= buffer.length) {
-              const word = utf8Decode(buffer.subarray(start, end));
-              parts.push(word);
-              wordCount++;
-              pos = end;
-              continue;
-            }
-          }
+          ({ value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1));
         } catch {
+          byteLen = -1;
+          bytesRead = 0;
+        }
+        if (byteLen > 0) {
+          if (byteLen > 1e6 || byteLen < 0) return null;
+          const start = pos + 1 + bytesRead;
+          const end = start + byteLen;
+          if (end > buffer.length) return null;
+          words.push(utf8Decode(buffer.subarray(start, end)));
+          pos = end;
+          continue;
         }
       }
       let matched = false;
@@ -526,65 +523,122 @@ class WordBin {
         if (pos + len > buffer.length) continue;
         const key = toHex(buffer.subarray(pos, pos + len));
         if (reverseMap.has(key)) {
-          parts.push(reverseMap.get(key));
-          wordCount++;
+          words.push(reverseMap.get(key));
           pos += len;
           matched = true;
           break;
         }
       }
-      if (!matched) {
-        const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
-        parts.push(marker);
-        rawSegments.push(marker);
-        this.log(
-          `[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`
-        );
-        pos++;
-      }
-    }
-    return { text: parts.join(" "), wordCount, rawSegments };
-  }
-  // ── Private: backtracking decode ─────────────────────────────────────────────
-  tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
-    if (pos === buffer.length) return result.join(" ");
-    if (buffer[pos] === LITERAL) {
-      const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
-      if (byteLen > 1e6 || byteLen < 0) return null;
-      const start = pos + 1 + bytesRead;
-      const end = start + byteLen;
-      if (end > buffer.length) return null;
-      result.push(utf8Decode(buffer.subarray(start, end)));
-      const res = this.tryDecode(
-        end,
-        buffer,
-        reverseMap,
-        result,
-        depth + 1,
-        sortedIdLengths
-      );
-      if (res !== null) return res;
-      result.pop();
-    }
-    for (const len of sortedIdLengths) {
-      if (pos + len > buffer.length) continue;
-      const key = toHex(buffer.subarray(pos, pos + len));
-      if (reverseMap.has(key)) {
-        result.push(reverseMap.get(key));
-        const res = this.tryDecode(
-          pos + len,
-          buffer,
-          reverseMap,
-          result,
-          depth + 1,
-          sortedIdLengths
-        );
-        if (res !== null) return res;
-        result.pop();
-      }
+      if (!matched) return null;
     }
-    return null;
+    return words.join(" ");
   }
+  // private partialScan(
+  //   buffer: Uint8Array,
+  //   startPos: number,
+  //   reverseMap: Map<string, string>,
+  //   sortedIdLengths: number[],
+  // ): { text: string; wordCount: number; rawSegments: string[] } {
+  //   const parts: string[] = [];
+  //   const rawSegments: string[] = [];
+  //   let wordCount = 0;
+  //   let pos = startPos;
+  //   while (pos < buffer.length) {
+  //     if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
+  //       try {
+  //         const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
+  //         if (byteLen > 0 && byteLen <= 1_000_000) {
+  //           const start = pos + 1 + bytesRead;
+  //           const end = start + byteLen;
+  //           if (end <= buffer.length) {
+  //             const word = utf8Decode(buffer.subarray(start, end));
+  //             parts.push(word);
+  //             wordCount++;
+  //             pos = end;
+  //             continue;
+  //           }
+  //         }
+  //       } catch {}
+  //     }
+  //     let matched = false;
+  //     for (const len of sortedIdLengths) {
+  //       if (pos + len > buffer.length) continue;
+  //       const key = toHex(buffer.subarray(pos, pos + len));
+  //       if (reverseMap.has(key)) {
+  //         parts.push(reverseMap.get(key)!);
+  //         wordCount++;
+  //         pos += len;
+  //         matched = true;
+  //         break;
+  //       }
+  //     }
+  //     if (!matched) {
+  //       const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
+  //       parts.push(marker);
+  //       rawSegments.push(marker);
+  //       this.log(
+  //         `[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`,
+  //       );
+  //       pos++;
+  //     }
+  //   }
+  //   return { text: parts.join(" "), wordCount, rawSegments };
+  // }
+  // private tryDecode(
+  //   pos: number,
+  //   buffer: Uint8Array,
+  //   reverseMap: Map<string, string>,
+  //   result: string[],
+  //   depth: number,
+  //   sortedIdLengths: number[],
+  // ): string | null {
+  //   if (pos === buffer.length) return result.join(" ");
+  //   if (buffer[pos] === LITERAL) {
+  //     let byteLen: number;
+  //     let bytesRead: number;
+  //     try {
+  //       ({ value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1));
+  //     } catch {
+  //       byteLen = -1;
+  //       bytesRead = 0;
+  //     }
+  //     if (byteLen > 0) {
+  //       if (byteLen > 1_000_000 || byteLen < 0) return null;
+  //       const start = pos + 1 + bytesRead;
+  //       const end = start + byteLen;
+  //       if (end > buffer.length) return null;
+  //       result.push(utf8Decode(buffer.subarray(start, end)));
+  //       const res = this.tryDecode(
+  //         end,
+  //         buffer,
+  //         reverseMap,
+  //         result,
+  //         depth + 1,
+  //         sortedIdLengths,
+  //       );
+  //       if (res !== null) return res;
+  //       result.pop();
+  //     }
+  //   }
+  //   for (const len of sortedIdLengths) {
+  //     if (pos + len > buffer.length) continue;
+  //     const key = toHex(buffer.subarray(pos, pos + len));
+  //     if (reverseMap.has(key)) {
+  //       result.push(reverseMap.get(key)!);
+  //       const res = this.tryDecode(
+  //         pos + len,
+  //         buffer,
+  //         reverseMap,
+  //         result,
+  //         depth + 1,
+  //         sortedIdLengths,
+  //       );
+  //       if (res !== null) return res;
+  //       result.pop();
+  //     }
+  //   }
+  //   return null;
+  // }
 }
 export {
   MAGIC,