@bigdreamsweb3/wordbin 1.1.8 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -29,7 +29,7 @@ async function getAllAvailableDictionaryVersions() {
29
29
  try {
30
30
  const files = await fs.readdir(dir);
31
31
  for (const file of files) {
32
- const match = file.match(/wordbin-v(\d+)/i);
32
+ const match = file.match(/dict-v(\d+)/i);
33
33
  if (match) {
34
34
  versions.add(parseInt(match[1], 10));
35
35
  }
@@ -49,7 +49,7 @@ async function loadDictionaryByVersion(version) {
49
49
  for (const dir of dirs) {
50
50
  const files = await fs.readdir(dir);
51
51
  const versionFile = files.find(
52
- (f) => f.match(new RegExp(`wordbin-v${version}(?:\\.|-)`, "i"))
52
+ (f) => f.match(new RegExp(`dict-v${version}(?:\\.|-)`, "i"))
53
53
  );
54
54
  if (versionFile) {
55
55
  const filePath = path.join(dir, versionFile);
@@ -211,15 +211,12 @@ function base(ALPHABET2) {
211
211
  }
212
212
  var ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
213
213
  const bs58 = base(ALPHABET);
214
- function bytesToHex(bytes) {
215
- return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
216
- }
217
214
  function detectAndConvert(payload) {
218
215
  if (/^[0-9a-fA-F]+$/.test(payload) && payload.length % 2 === 0) {
219
- const bytes2 = Uint8Array.from(
216
+ const bytes = Uint8Array.from(
220
217
  payload.match(/.{1,2}/g).map((h) => parseInt(h, 16))
221
218
  );
222
- return { buffer: bytes2, detectedFormat: "hex" };
219
+ return { buffer: bytes, detectedFormat: "hex" };
223
220
  }
224
221
  const base58Re = /^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$/;
225
222
  if (base58Re.test(payload)) {
@@ -234,17 +231,19 @@ function detectAndConvert(payload) {
234
231
  const padded = norm + (norm.length % 4 ? "=".repeat(4 - norm.length % 4) : "");
235
232
  if (b64Re.test(payload) || b64urlRe.test(payload)) {
236
233
  try {
237
- const bin = atob(padded);
234
+ const bin2 = atob(padded);
238
235
  return {
239
- buffer: Uint8Array.from(bin, (c) => c.charCodeAt(0)),
236
+ buffer: Uint8Array.from(bin2, (c) => c.charCodeAt(0)),
240
237
  detectedFormat: "base64"
241
238
  };
242
239
  } catch {
243
240
  }
244
241
  }
245
- const bytes = new Uint8Array(payload.length);
246
- for (let i = 0; i < payload.length; i++) bytes[i] = payload.charCodeAt(i);
247
- return { buffer: bytes, detectedFormat: "bin21" };
242
+ const bin = Array.from(payload).map((c) => c.charCodeAt(0));
243
+ return { buffer: Uint8Array.from(bin), detectedFormat: "bytes" };
244
+ }
245
+ function bytesToHex(bytes) {
246
+ return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
248
247
  }
249
248
  class WordBin {
250
249
  constructor(initialDict, options) {
@@ -288,7 +287,69 @@ class WordBin {
288
287
  sortedIdLengths: Array.from(idLengths).sort((a, b) => b - a)
289
288
  };
290
289
  }
291
- // ── encode ──────────────────────────────────────────────────────────────────
290
+ tryRecoverWordsFromHex(hex, reverseMap, sortedIdLengths) {
291
+ const bytes = Buffer.from(hex, "hex");
292
+ const recovered = this.greedyDecode(bytes, 0, reverseMap, sortedIdLengths);
293
+ if (recovered && recovered.trim().length > 0) {
294
+ return recovered;
295
+ }
296
+ return null;
297
+ }
298
+ validateDecodedWords(text, forwardMap, reverseMap, sortedIdLengths) {
299
+ const parts = [];
300
+ const rawSegments = [];
301
+ const tokens = text.match(/[a-zA-Z]+|[^\w\s]+|\d+|\s+/g) || [];
302
+ for (const token of tokens) {
303
+ if (/^\s+$/.test(token)) {
304
+ parts.push(token);
305
+ continue;
306
+ }
307
+ if (/^[a-zA-Z]+$/.test(token)) {
308
+ const normalized = token.toLowerCase();
309
+ if (forwardMap.has(normalized)) {
310
+ parts.push(normalized);
311
+ continue;
312
+ }
313
+ const hex2 = bytesToHex(new TextEncoder().encode(token));
314
+ const recovered2 = this.tryRecoverWordsFromHex(
315
+ hex2,
316
+ reverseMap,
317
+ sortedIdLengths
318
+ );
319
+ if (recovered2) {
320
+ parts.push(recovered2);
321
+ } else {
322
+ const raw = `[hex:${hex2}]`;
323
+ parts.push(raw);
324
+ rawSegments.push(raw);
325
+ }
326
+ continue;
327
+ }
328
+ if (/^[^\w\s]+$/.test(token)) {
329
+ const raw = `[raw:${token}]`;
330
+ parts.push(raw);
331
+ rawSegments.push(raw);
332
+ continue;
333
+ }
334
+ const hex = bytesToHex(new TextEncoder().encode(token));
335
+ const recovered = this.tryRecoverWordsFromHex(
336
+ hex,
337
+ reverseMap,
338
+ sortedIdLengths
339
+ );
340
+ if (recovered) {
341
+ parts.push(recovered);
342
+ } else {
343
+ const raw = `[hex:${hex}]`;
344
+ parts.push(raw);
345
+ rawSegments.push(raw);
346
+ }
347
+ }
348
+ return {
349
+ text: parts.join(""),
350
+ rawSegments
351
+ };
352
+ }
292
353
  async encode(text, options) {
293
354
  let textStr;
294
355
  if (typeof text === "string") {
@@ -305,8 +366,6 @@ class WordBin {
305
366
  dictVersion: this.primaryDictVersion,
306
367
  encoded: new Uint8Array(0),
307
368
  payload: "",
308
- bin21: "",
309
- bin21Payload: "",
310
369
  base64Payload: "",
311
370
  hexPayload: "",
312
371
  base58Payload: "",
@@ -342,34 +401,23 @@ class WordBin {
342
401
  }
343
402
  const originalBytes = new TextEncoder().encode(textStr).length;
344
403
  const hexPayload = bytesToHex(result);
345
- const bin21Payload = Array.from(result).map((b) => String.fromCharCode(b)).join("");
346
404
  const base64Payload = toBase64(result);
347
405
  const base58Payload = bs58.encode(result);
406
+ const encodedBytes = Math.floor(hexPayload.length / 2);
348
407
  return {
349
408
  originalText: textStr,
350
409
  dictVersion: useVersion,
351
410
  encoded: result,
352
- bin21: bin21Payload,
353
- payload: bin21Payload,
354
- bin21Payload,
411
+ payload: hexPayload,
355
412
  hexPayload,
356
413
  base64Payload,
357
414
  base58Payload,
358
415
  originalBytes,
359
- encodedBytes: bin21Payload.length,
360
- bytesSaved: originalBytes - bin21Payload.length,
361
- ratioPercent: Math.round(bin21Payload.length / originalBytes * 1e4) / 100
416
+ encodedBytes,
417
+ bytesSaved: originalBytes - encodedBytes,
418
+ ratioPercent: Math.round(encodedBytes / originalBytes * 1e4) / 100
362
419
  };
363
420
  }
364
- // ── decode ───────────────────────────────────────────────────────────────────
365
- /**
366
- * Decodes any supported payload format back to human-readable text.
367
- *
368
- * For valid WordBin payloads: returns the exact original words.
369
- * For non-WordBin payloads: scans byte-by-byte, extracts dictionary words
370
- * wherever possible, and preserves unrecognised
371
- * bytes as "[0xXX]" markers.
372
- */
373
421
  async decode(payload) {
374
422
  let buffer;
375
423
  let detectedFormat;
@@ -401,124 +449,73 @@ class WordBin {
401
449
  let maps;
402
450
  try {
403
451
  maps = await this.getMapsForVersion(ver);
404
- } catch (err) {
405
- this.log(`[decode] v${ver}: getMapsForVersion threw — ${err}`);
452
+ } catch {
406
453
  continue;
407
454
  }
408
455
  const { reverseMap, sortedIdLengths } = maps;
409
- const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths) ?? this.tryDecode(1, buffer, reverseMap, [], 0, sortedIdLengths);
410
- this.log(
411
- `[decode] v${ver} strict(pos=1): ${r1 !== null ? `"${r1}"` : "null"}`
412
- );
456
+ const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths);
413
457
  if (r1 !== null) {
414
- const notice2 = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
415
- return { text: r1, isWordBin: true, detectedFormat, notice: notice2 };
458
+ const notice = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
459
+ return { text: r1, isWordBin: true, detectedFormat, notice };
416
460
  }
417
- const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths) ?? this.tryDecode(0, buffer, reverseMap, [], 0, sortedIdLengths);
418
- this.log(
419
- `[decode] v${ver} strict(pos=0): ${r0 !== null ? `"${r0}"` : "null"}`
420
- );
461
+ const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths);
421
462
  if (r0 !== null) {
422
463
  return {
423
464
  text: r0,
424
- isWordBin: true,
465
+ isWordBin: false,
425
466
  detectedFormat,
426
467
  notice: `Payload had no version header. Decoded using dictionary v${ver}.`
427
468
  };
428
469
  }
429
470
  }
430
- this.log(`[decode] strict parse failed — falling back to partial scan`);
431
- if (availableVersions.length > 0) {
432
- const scanVersion = availableVersions[availableVersions.length - 1];
433
- try {
434
- const { reverseMap, sortedIdLengths } = await this.getMapsForVersion(scanVersion);
435
- const scan1 = this.partialScan(buffer, 1, reverseMap, sortedIdLengths);
436
- const scan0 = this.partialScan(buffer, 0, reverseMap, sortedIdLengths);
437
- const best = scan1.wordCount >= scan0.wordCount ? scan1 : scan0;
438
- this.log(
439
- `[decode] partial scan(pos=1) words=${scan1.wordCount} raw=${scan1.rawSegments.length} | scan(pos=0) words=${scan0.wordCount} raw=${scan0.rawSegments.length}`
440
- );
441
- const notice2 = `This does not appear to be a valid WordBin payload. Partial scan using dictionary v${scanVersion} extracted ${best.wordCount} word(s); ${best.rawSegments.length} byte sequence(s) had no dictionary match and are shown as [0xXX] markers.`;
442
- return {
443
- text: best.text,
444
- isWordBin: false,
445
- detectedFormat,
446
- rawSegments: best.rawSegments,
447
- notice: notice2
448
- };
449
- } catch {
450
- }
471
+ this.log(`[decode] strict parse failed — falling back to UTF-8 validation`);
472
+ const utf8Text = new TextDecoder("utf-8", { fatal: false }).decode(buffer);
473
+ try {
474
+ const latest = availableVersions[availableVersions.length - 1];
475
+ const { forwardMap, reverseMap, sortedIdLengths } = await this.getMapsForVersion(latest);
476
+ const validated = this.validateDecodedWords(
477
+ utf8Text,
478
+ forwardMap,
479
+ reverseMap,
480
+ sortedIdLengths
481
+ );
482
+ return {
483
+ text: validated.text,
484
+ isWordBin: false,
485
+ detectedFormat,
486
+ rawSegments: validated.rawSegments,
487
+ notice: "Payload is not WordBin. UTF-8 text was recovered and dictionary validation applied."
488
+ };
489
+ } catch {
490
+ return {
491
+ text: utf8Text,
492
+ isWordBin: false,
493
+ detectedFormat,
494
+ notice: "Payload decoded as plain UTF-8 text."
495
+ };
451
496
  }
452
- const notice = `Could not decode with any available dictionary (tried: ${availableVersions.join(", ") || "none"}). Falling back to UTF-8 text decoding.`;
453
- this.log(`[decode] ${notice}`);
454
- return {
455
- text: new TextDecoder("utf-8", { fatal: false }).decode(buffer),
456
- isWordBin: false,
457
- detectedFormat,
458
- notice
459
- };
460
497
  }
461
- // ── Private: greedy linear decode ────────────────────────────────────────────
462
- /**
463
- * O(n) longest-match-first decode. Returns null if any byte has no match.
464
- * This is the fast path; tryDecode is used as a backtracking fallback.
465
- */
466
498
  greedyDecode(buffer, startPos, reverseMap, sortedIdLengths) {
467
499
  const words = [];
468
500
  let pos = startPos;
469
501
  while (pos < buffer.length) {
470
502
  if (buffer[pos] === LITERAL) {
471
- const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
472
- if (byteLen > 1e6 || byteLen < 0) return null;
473
- const start = pos + 1 + bytesRead;
474
- const end = start + byteLen;
475
- if (end > buffer.length) return null;
476
- words.push(utf8Decode(buffer.subarray(start, end)));
477
- pos = end;
478
- continue;
479
- }
480
- let matched = false;
481
- for (const len of sortedIdLengths) {
482
- if (pos + len > buffer.length) continue;
483
- const key = toHex(buffer.subarray(pos, pos + len));
484
- if (reverseMap.has(key)) {
485
- words.push(reverseMap.get(key));
486
- pos += len;
487
- matched = true;
488
- break;
489
- }
490
- }
491
- if (!matched) return null;
492
- }
493
- return words.join(" ");
494
- }
495
- // ── Private: partial / best-effort scan ──────────────────────────────────────
496
- /**
497
- * Scans through the buffer extracting any recognised dictionary words.
498
- * Unrecognised bytes are collected as raw segments and rendered as [0xXX].
499
- * Always consumes the entire buffer — never returns null.
500
- */
501
- partialScan(buffer, startPos, reverseMap, sortedIdLengths) {
502
- const parts = [];
503
- const rawSegments = [];
504
- let wordCount = 0;
505
- let pos = startPos;
506
- while (pos < buffer.length) {
507
- if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
503
+ let byteLen;
504
+ let bytesRead;
508
505
  try {
509
- const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
510
- if (byteLen > 0 && byteLen <= 1e6) {
511
- const start = pos + 1 + bytesRead;
512
- const end = start + byteLen;
513
- if (end <= buffer.length) {
514
- const word = utf8Decode(buffer.subarray(start, end));
515
- parts.push(word);
516
- wordCount++;
517
- pos = end;
518
- continue;
519
- }
520
- }
506
+ ({ value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1));
521
507
  } catch {
508
+ byteLen = -1;
509
+ bytesRead = 0;
510
+ }
511
+ if (byteLen > 0) {
512
+ if (byteLen > 1e6 || byteLen < 0) return null;
513
+ const start = pos + 1 + bytesRead;
514
+ const end = start + byteLen;
515
+ if (end > buffer.length) return null;
516
+ words.push(utf8Decode(buffer.subarray(start, end)));
517
+ pos = end;
518
+ continue;
522
519
  }
523
520
  }
524
521
  let matched = false;
@@ -526,65 +523,122 @@ class WordBin {
526
523
  if (pos + len > buffer.length) continue;
527
524
  const key = toHex(buffer.subarray(pos, pos + len));
528
525
  if (reverseMap.has(key)) {
529
- parts.push(reverseMap.get(key));
530
- wordCount++;
526
+ words.push(reverseMap.get(key));
531
527
  pos += len;
532
528
  matched = true;
533
529
  break;
534
530
  }
535
531
  }
536
- if (!matched) {
537
- const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
538
- parts.push(marker);
539
- rawSegments.push(marker);
540
- this.log(
541
- `[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`
542
- );
543
- pos++;
544
- }
545
- }
546
- return { text: parts.join(" "), wordCount, rawSegments };
547
- }
548
- // ── Private: backtracking decode ─────────────────────────────────────────────
549
- tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
550
- if (pos === buffer.length) return result.join(" ");
551
- if (buffer[pos] === LITERAL) {
552
- const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
553
- if (byteLen > 1e6 || byteLen < 0) return null;
554
- const start = pos + 1 + bytesRead;
555
- const end = start + byteLen;
556
- if (end > buffer.length) return null;
557
- result.push(utf8Decode(buffer.subarray(start, end)));
558
- const res = this.tryDecode(
559
- end,
560
- buffer,
561
- reverseMap,
562
- result,
563
- depth + 1,
564
- sortedIdLengths
565
- );
566
- if (res !== null) return res;
567
- result.pop();
568
- }
569
- for (const len of sortedIdLengths) {
570
- if (pos + len > buffer.length) continue;
571
- const key = toHex(buffer.subarray(pos, pos + len));
572
- if (reverseMap.has(key)) {
573
- result.push(reverseMap.get(key));
574
- const res = this.tryDecode(
575
- pos + len,
576
- buffer,
577
- reverseMap,
578
- result,
579
- depth + 1,
580
- sortedIdLengths
581
- );
582
- if (res !== null) return res;
583
- result.pop();
584
- }
532
+ if (!matched) return null;
585
533
  }
586
- return null;
534
+ return words.join(" ");
587
535
  }
536
+ // private partialScan(
537
+ // buffer: Uint8Array,
538
+ // startPos: number,
539
+ // reverseMap: Map<string, string>,
540
+ // sortedIdLengths: number[],
541
+ // ): { text: string; wordCount: number; rawSegments: string[] } {
542
+ // const parts: string[] = [];
543
+ // const rawSegments: string[] = [];
544
+ // let wordCount = 0;
545
+ // let pos = startPos;
546
+ // while (pos < buffer.length) {
547
+ // if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
548
+ // try {
549
+ // const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
550
+ // if (byteLen > 0 && byteLen <= 1_000_000) {
551
+ // const start = pos + 1 + bytesRead;
552
+ // const end = start + byteLen;
553
+ // if (end <= buffer.length) {
554
+ // const word = utf8Decode(buffer.subarray(start, end));
555
+ // parts.push(word);
556
+ // wordCount++;
557
+ // pos = end;
558
+ // continue;
559
+ // }
560
+ // }
561
+ // } catch {}
562
+ // }
563
+ // let matched = false;
564
+ // for (const len of sortedIdLengths) {
565
+ // if (pos + len > buffer.length) continue;
566
+ // const key = toHex(buffer.subarray(pos, pos + len));
567
+ // if (reverseMap.has(key)) {
568
+ // parts.push(reverseMap.get(key)!);
569
+ // wordCount++;
570
+ // pos += len;
571
+ // matched = true;
572
+ // break;
573
+ // }
574
+ // }
575
+ // if (!matched) {
576
+ // const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
577
+ // parts.push(marker);
578
+ // rawSegments.push(marker);
579
+ // this.log(
580
+ // `[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`,
581
+ // );
582
+ // pos++;
583
+ // }
584
+ // }
585
+ // return { text: parts.join(" "), wordCount, rawSegments };
586
+ // }
587
+ // private tryDecode(
588
+ // pos: number,
589
+ // buffer: Uint8Array,
590
+ // reverseMap: Map<string, string>,
591
+ // result: string[],
592
+ // depth: number,
593
+ // sortedIdLengths: number[],
594
+ // ): string | null {
595
+ // if (pos === buffer.length) return result.join(" ");
596
+ // if (buffer[pos] === LITERAL) {
597
+ // let byteLen: number;
598
+ // let bytesRead: number;
599
+ // try {
600
+ // ({ value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1));
601
+ // } catch {
602
+ // byteLen = -1;
603
+ // bytesRead = 0;
604
+ // }
605
+ // if (byteLen > 0) {
606
+ // if (byteLen > 1_000_000 || byteLen < 0) return null;
607
+ // const start = pos + 1 + bytesRead;
608
+ // const end = start + byteLen;
609
+ // if (end > buffer.length) return null;
610
+ // result.push(utf8Decode(buffer.subarray(start, end)));
611
+ // const res = this.tryDecode(
612
+ // end,
613
+ // buffer,
614
+ // reverseMap,
615
+ // result,
616
+ // depth + 1,
617
+ // sortedIdLengths,
618
+ // );
619
+ // if (res !== null) return res;
620
+ // result.pop();
621
+ // }
622
+ // }
623
+ // for (const len of sortedIdLengths) {
624
+ // if (pos + len > buffer.length) continue;
625
+ // const key = toHex(buffer.subarray(pos, pos + len));
626
+ // if (reverseMap.has(key)) {
627
+ // result.push(reverseMap.get(key)!);
628
+ // const res = this.tryDecode(
629
+ // pos + len,
630
+ // buffer,
631
+ // reverseMap,
632
+ // result,
633
+ // depth + 1,
634
+ // sortedIdLengths,
635
+ // );
636
+ // if (res !== null) return res;
637
+ // result.pop();
638
+ // }
639
+ // }
640
+ // return null;
641
+ // }
588
642
  }
589
643
  export {
590
644
  MAGIC,