@bigdreamsweb3/wordbin 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ import { EncodeResult, WordBinDictionary } from '../types.js';
2
+ type PayloadFormat = "bytes" | "base58" | "base64" | "hex" | "bin21";
3
+ export interface DecodeResult {
4
+ /** The decoded text — words for WordBin payloads, best-effort for others. */
5
+ text: string;
6
+ /** True only when the payload was a valid, fully-parsed WordBin stream. */
7
+ isWordBin: boolean;
8
+ /** Auto-detected wire format of the input. */
9
+ detectedFormat: PayloadFormat;
10
+ /**
11
+ * Human-readable notice when the payload is not a valid WordBin stream.
12
+ * Includes information about what the decoder did as a fallback.
13
+ */
14
+ notice?: string;
15
+ /**
16
+ * Present when partial scanning was used (non-WordBin payloads).
17
+ * Lists raw byte sequences that had no dictionary match, in order.
18
+ */
19
+ rawSegments?: string[];
20
+ }
21
+ export declare class WordBin {
22
+ private primaryDictVersion;
23
+ private log;
24
+ constructor(initialDict?: WordBinDictionary, options?: {
25
+ debug?: boolean;
26
+ });
27
+ static createFromWords(words: string[]): Promise<WordBin>;
28
+ static createFromJson(dictJson: WordBinDictionary): Promise<WordBin>;
29
+ static create(options?: {
30
+ debug?: boolean;
31
+ }): Promise<WordBin>;
32
+ private getMapsForVersion;
33
+ encode(text: string | EncodeResult | Uint8Array, options?: {
34
+ dictVersion?: number;
35
+ }): Promise<EncodeResult>;
36
+ /**
37
+ * Decodes any supported payload format back to human-readable text.
38
+ *
39
+ * For valid WordBin payloads: returns the exact original words.
40
+ * For non-WordBin payloads: scans byte-by-byte, extracts dictionary words
41
+ * wherever possible, and preserves unrecognised
42
+ * bytes as "[0xXX]" markers.
43
+ */
44
+ decode(payload: Uint8Array | string): Promise<DecodeResult>;
45
+ /**
46
+ * O(n) longest-match-first decode. Returns null if any byte has no match.
47
+ * This is the fast path; tryDecode is used as a backtracking fallback.
48
+ */
49
+ private greedyDecode;
50
+ /**
51
+ * Scans through the buffer extracting any recognised dictionary words.
52
+ * Unrecognised bytes are collected as raw segments and rendered as [0xXX].
53
+ * Always consumes the entire buffer — never returns null.
54
+ */
55
+ private partialScan;
56
+ private tryDecode;
57
+ }
58
+ export {};
@@ -446,15 +446,15 @@
446
446
  "988180": [
447
447
  "offer"
448
448
  ],
449
- "b0ad": [
450
- "able"
451
- ],
452
449
  "df864c": [
453
450
  "abandon"
454
451
  ],
455
452
  "bcffaa": [
456
453
  "ability"
457
454
  ],
455
+ "b0ad": [
456
+ "able"
457
+ ],
458
458
  "a4262e": [
459
459
  "about"
460
460
  ],
@@ -1647,8 +1647,7 @@
1647
1647
  "crush"
1648
1648
  ],
1649
1649
  "58a6": [
1650
- "cry",
1651
- "math"
1650
+ "cry"
1652
1651
  ],
1653
1652
  "60a0f7": [
1654
1653
  "crystal"
@@ -2574,24 +2573,24 @@
2574
2573
  "5a5770": [
2575
2574
  "furnace"
2576
2575
  ],
2577
- "f44a": [
2578
- "fury"
2579
- ],
2580
2576
  "ebb3de": [
2581
2577
  "future"
2582
2578
  ],
2579
+ "f44a": [
2580
+ "fury"
2581
+ ],
2583
2582
  "4e5aa6": [
2584
2583
  "gadget"
2585
2584
  ],
2586
2585
  "66dd": [
2587
2586
  "gain"
2588
2587
  ],
2589
- "eba4ae": [
2590
- "galaxy"
2591
- ],
2592
2588
  "ce387d": [
2593
2589
  "gallery"
2594
2590
  ],
2591
+ "eba4ae": [
2592
+ "galaxy"
2593
+ ],
2595
2594
  "6ca5": [
2596
2595
  "game"
2597
2596
  ],
@@ -6143,6 +6142,9 @@
6143
6142
  ],
6144
6143
  "24fe": [
6145
6144
  "zoo"
6145
+ ],
6146
+ "ad06fa": [
6147
+ "math"
6146
6148
  ]
6147
6149
  }
6148
6150
  }
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export { MAGIC } from './constants.js';
1
+ export { MAGIC } from './constants';
2
2
  export { buildDictionary } from './dict/builder';
3
- export { WordBin } from './core.js';
3
+ export { WordBin } from './core/index';
4
4
  export type { EncodeResult, WordBinDictionary } from './types';
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { b as buildDictionary, t as toBase64, u as utf8Encode, e as encodeVarint, f as fromBase64, d as decodeVarint, a as utf8Decode, c as toHex } from "./builder-e2OwBYJh.js";
1
+ import { b as buildDictionary, t as toBase64, u as utf8Encode, e as encodeVarint, d as decodeVarint, a as utf8Decode, c as toHex } from "./builder-vFphFQMU.js";
2
2
  import fs from "fs/promises";
3
3
  import path from "path";
4
4
  import { fileURLToPath } from "url";
@@ -81,9 +81,174 @@ async function loadLatestDictionary() {
81
81
  );
82
82
  return loadDictionaryByVersion(latestVersion);
83
83
  }
84
+ function base(ALPHABET2) {
85
+ if (ALPHABET2.length >= 255) {
86
+ throw new TypeError("Alphabet too long");
87
+ }
88
+ const BASE_MAP = new Uint8Array(256);
89
+ for (let j = 0; j < BASE_MAP.length; j++) {
90
+ BASE_MAP[j] = 255;
91
+ }
92
+ for (let i = 0; i < ALPHABET2.length; i++) {
93
+ const x = ALPHABET2.charAt(i);
94
+ const xc = x.charCodeAt(0);
95
+ if (BASE_MAP[xc] !== 255) {
96
+ throw new TypeError(x + " is ambiguous");
97
+ }
98
+ BASE_MAP[xc] = i;
99
+ }
100
+ const BASE = ALPHABET2.length;
101
+ const LEADER = ALPHABET2.charAt(0);
102
+ const FACTOR = Math.log(BASE) / Math.log(256);
103
+ const iFACTOR = Math.log(256) / Math.log(BASE);
104
+ function encode(source) {
105
+ if (source instanceof Uint8Array) ;
106
+ else if (ArrayBuffer.isView(source)) {
107
+ source = new Uint8Array(source.buffer, source.byteOffset, source.byteLength);
108
+ } else if (Array.isArray(source)) {
109
+ source = Uint8Array.from(source);
110
+ }
111
+ if (!(source instanceof Uint8Array)) {
112
+ throw new TypeError("Expected Uint8Array");
113
+ }
114
+ if (source.length === 0) {
115
+ return "";
116
+ }
117
+ let zeroes = 0;
118
+ let length = 0;
119
+ let pbegin = 0;
120
+ const pend = source.length;
121
+ while (pbegin !== pend && source[pbegin] === 0) {
122
+ pbegin++;
123
+ zeroes++;
124
+ }
125
+ const size = (pend - pbegin) * iFACTOR + 1 >>> 0;
126
+ const b58 = new Uint8Array(size);
127
+ while (pbegin !== pend) {
128
+ let carry = source[pbegin];
129
+ let i = 0;
130
+ for (let it1 = size - 1; (carry !== 0 || i < length) && it1 !== -1; it1--, i++) {
131
+ carry += 256 * b58[it1] >>> 0;
132
+ b58[it1] = carry % BASE >>> 0;
133
+ carry = carry / BASE >>> 0;
134
+ }
135
+ if (carry !== 0) {
136
+ throw new Error("Non-zero carry");
137
+ }
138
+ length = i;
139
+ pbegin++;
140
+ }
141
+ let it2 = size - length;
142
+ while (it2 !== size && b58[it2] === 0) {
143
+ it2++;
144
+ }
145
+ let str = LEADER.repeat(zeroes);
146
+ for (; it2 < size; ++it2) {
147
+ str += ALPHABET2.charAt(b58[it2]);
148
+ }
149
+ return str;
150
+ }
151
+ function decodeUnsafe(source) {
152
+ if (typeof source !== "string") {
153
+ throw new TypeError("Expected String");
154
+ }
155
+ if (source.length === 0) {
156
+ return new Uint8Array();
157
+ }
158
+ let psz = 0;
159
+ let zeroes = 0;
160
+ let length = 0;
161
+ while (source[psz] === LEADER) {
162
+ zeroes++;
163
+ psz++;
164
+ }
165
+ const size = (source.length - psz) * FACTOR + 1 >>> 0;
166
+ const b256 = new Uint8Array(size);
167
+ while (psz < source.length) {
168
+ const charCode = source.charCodeAt(psz);
169
+ if (charCode > 255) {
170
+ return;
171
+ }
172
+ let carry = BASE_MAP[charCode];
173
+ if (carry === 255) {
174
+ return;
175
+ }
176
+ let i = 0;
177
+ for (let it3 = size - 1; (carry !== 0 || i < length) && it3 !== -1; it3--, i++) {
178
+ carry += BASE * b256[it3] >>> 0;
179
+ b256[it3] = carry % 256 >>> 0;
180
+ carry = carry / 256 >>> 0;
181
+ }
182
+ if (carry !== 0) {
183
+ throw new Error("Non-zero carry");
184
+ }
185
+ length = i;
186
+ psz++;
187
+ }
188
+ let it4 = size - length;
189
+ while (it4 !== size && b256[it4] === 0) {
190
+ it4++;
191
+ }
192
+ const vch = new Uint8Array(zeroes + (size - it4));
193
+ let j = zeroes;
194
+ while (it4 !== size) {
195
+ vch[j++] = b256[it4++];
196
+ }
197
+ return vch;
198
+ }
199
+ function decode(string) {
200
+ const buffer = decodeUnsafe(string);
201
+ if (buffer) {
202
+ return buffer;
203
+ }
204
+ throw new Error("Non-base" + BASE + " character");
205
+ }
206
+ return {
207
+ encode,
208
+ decodeUnsafe,
209
+ decode
210
+ };
211
+ }
212
+ var ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
213
+ const bs58 = base(ALPHABET);
214
+ function bytesToHex(bytes) {
215
+ return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
216
+ }
217
+ function detectAndConvert(payload) {
218
+ if (/^[0-9a-fA-F]+$/.test(payload) && payload.length % 2 === 0) {
219
+ const bytes2 = Uint8Array.from(
220
+ payload.match(/.{1,2}/g).map((h) => parseInt(h, 16))
221
+ );
222
+ return { buffer: bytes2, detectedFormat: "hex" };
223
+ }
224
+ const base58Re = /^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$/;
225
+ if (base58Re.test(payload)) {
226
+ try {
227
+ return { buffer: bs58.decode(payload), detectedFormat: "base58" };
228
+ } catch {
229
+ }
230
+ }
231
+ const b64Re = /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})$/;
232
+ const b64urlRe = /^(?:[A-Za-z0-9\-_]{4})*(?:[A-Za-z0-9\-_]{2}(?:==)?|[A-Za-z0-9\-_]{3}=?|[A-Za-z0-9\-_]{4})$/;
233
+ const norm = payload.replace(/-/g, "+").replace(/_/g, "/");
234
+ const padded = norm + (norm.length % 4 ? "=".repeat(4 - norm.length % 4) : "");
235
+ if (b64Re.test(payload) || b64urlRe.test(payload)) {
236
+ try {
237
+ const bin = atob(padded);
238
+ return {
239
+ buffer: Uint8Array.from(bin, (c) => c.charCodeAt(0)),
240
+ detectedFormat: "base64"
241
+ };
242
+ } catch {
243
+ }
244
+ }
245
+ const bytes = new Uint8Array(payload.length);
246
+ for (let i = 0; i < payload.length; i++) bytes[i] = payload.charCodeAt(i);
247
+ return { buffer: bytes, detectedFormat: "bin21" };
248
+ }
84
249
  class WordBin {
85
250
  constructor(initialDict, options) {
86
- this.primaryDictVersion = initialDict?.version ?? 2;
251
+ this.primaryDictVersion = initialDict?.version ?? 1;
87
252
  this.log = options?.debug ? (...args) => console.log("[WordBin]", ...args) : () => {
88
253
  };
89
254
  }
@@ -91,15 +256,13 @@ class WordBin {
91
256
  console.warn(
92
257
  "Building dictionary from scratch – consider using pre-built files"
93
258
  );
94
- const dict = await buildDictionary(words);
95
- return new WordBin(dict);
259
+ return new WordBin(await buildDictionary(words));
96
260
  }
97
261
  static async createFromJson(dictJson) {
98
262
  return new WordBin(dictJson);
99
263
  }
100
264
  static async create(options) {
101
- const latestDict = await loadLatestDictionary();
102
- return new WordBin(latestDict, options);
265
+ return new WordBin(await loadLatestDictionary(), options);
103
266
  }
104
267
  async getMapsForVersion(version) {
105
268
  const dict = await loadDictionaryByVersion(version);
@@ -119,9 +282,13 @@ class WordBin {
119
282
  reverseMap.set(hex, word);
120
283
  forwardMap.set(word, bytes);
121
284
  }
122
- const sortedIdLengths = Array.from(idLengths).sort((a, b) => b - a);
123
- return { reverseMap, forwardMap, sortedIdLengths };
285
+ return {
286
+ reverseMap,
287
+ forwardMap,
288
+ sortedIdLengths: Array.from(idLengths).sort((a, b) => b - a)
289
+ };
124
290
  }
291
+ // ── encode ──────────────────────────────────────────────────────────────────
125
292
  async encode(text, options) {
126
293
  let textStr;
127
294
  if (typeof text === "string") {
@@ -129,7 +296,7 @@ class WordBin {
129
296
  } else if (text instanceof Uint8Array) {
130
297
  textStr = toBase64(text);
131
298
  } else {
132
- textStr = text.encodedBase64;
299
+ textStr = text.base64Payload;
133
300
  }
134
301
  const trimmed = textStr.trim();
135
302
  if (!trimmed) {
@@ -138,19 +305,21 @@ class WordBin {
138
305
  dictVersion: this.primaryDictVersion,
139
306
  encoded: new Uint8Array(0),
140
307
  payload: "",
141
- encodedBase64: "",
308
+ bin21: "",
309
+ bin21Payload: "",
310
+ base64Payload: "",
311
+ hexPayload: "",
312
+ base58Payload: "",
142
313
  originalBytes: 0,
143
314
  encodedBytes: 0,
144
315
  bytesSaved: 0,
145
316
  ratioPercent: 100
146
317
  };
147
318
  }
148
- const words = trimmed.split(/\s+/).filter(Boolean);
149
319
  const useVersion = options?.dictVersion ?? this.primaryDictVersion;
150
- const header = new Uint8Array([useVersion]);
151
- const chunks = [header];
152
320
  const { forwardMap } = await this.getMapsForVersion(useVersion);
153
- for (const w of words) {
321
+ const chunks = [new Uint8Array([useVersion])];
322
+ for (const w of trimmed.split(/\s+/).filter(Boolean)) {
154
323
  const id = forwardMap.get(w);
155
324
  if (id) {
156
325
  chunks.push(id);
@@ -172,65 +341,220 @@ class WordBin {
172
341
  offset += chunk.length;
173
342
  }
174
343
  const originalBytes = new TextEncoder().encode(textStr).length;
175
- const base64Result = toBase64(result);
344
+ const hexPayload = bytesToHex(result);
345
+ const bin21Payload = Array.from(result).map((b) => String.fromCharCode(b)).join("");
346
+ const base64Payload = toBase64(result);
347
+ const base58Payload = bs58.encode(result);
176
348
  return {
177
349
  originalText: textStr,
178
350
  dictVersion: useVersion,
179
351
  encoded: result,
180
- payload: base64Result,
181
- encodedBase64: base64Result,
352
+ bin21: bin21Payload,
353
+ payload: bin21Payload,
354
+ bin21Payload,
355
+ hexPayload,
356
+ base64Payload,
357
+ base58Payload,
182
358
  originalBytes,
183
- encodedBytes: totalLength,
184
- bytesSaved: originalBytes - totalLength,
185
- ratioPercent: totalLength === 0 ? 100 : Math.round(totalLength / originalBytes * 100 * 100) / 100
359
+ encodedBytes: bin21Payload.length,
360
+ bytesSaved: originalBytes - bin21Payload.length,
361
+ ratioPercent: Math.round(bin21Payload.length / originalBytes * 1e4) / 100
186
362
  };
187
363
  }
188
- async decode(data) {
364
+ // ── decode ───────────────────────────────────────────────────────────────────
365
+ /**
366
+ * Decodes any supported payload format back to human-readable text.
367
+ *
368
+ * For valid WordBin payloads: returns the exact original words.
369
+ * For non-WordBin payloads: scans byte-by-byte, extracts dictionary words
370
+ * wherever possible, and preserves unrecognised
371
+ * bytes as "[0xXX]" markers.
372
+ */
373
+ async decode(payload) {
189
374
  let buffer;
190
- if (typeof data === "string") {
191
- buffer = fromBase64(data);
375
+ let detectedFormat;
376
+ if (payload instanceof Uint8Array) {
377
+ buffer = payload;
378
+ detectedFormat = "bytes";
192
379
  } else {
193
- buffer = data;
380
+ ({ buffer, detectedFormat } = detectAndConvert(payload));
194
381
  }
382
+ this.log(
383
+ `[decode] format=${detectedFormat} bufLen=${buffer.length} firstBytes=[${Array.from(buffer.slice(0, 8)).join(",")}]`
384
+ );
195
385
  if (buffer.length < 1) {
196
- throw new Error("Data too short to contain version byte");
197
- }
198
- const version = buffer[0];
199
- let pos = 1;
200
- const { reverseMap, sortedIdLengths } = await this.getMapsForVersion(version);
201
- const result = [];
202
- const decoded = this.tryDecode(
203
- pos,
204
- buffer,
205
- reverseMap,
206
- result,
207
- 0,
208
- sortedIdLengths
386
+ return {
387
+ text: "",
388
+ isWordBin: false,
389
+ detectedFormat,
390
+ notice: "Payload is empty nothing to decode."
391
+ };
392
+ }
393
+ const availableVersions = await getAllAvailableDictionaryVersions();
394
+ const versionByte = buffer[0];
395
+ const versionIsHeader = availableVersions.includes(versionByte);
396
+ this.log(
397
+ `[decode] availableVersions=[${availableVersions.join(",")}] versionByte=${versionByte} isKnownHeader=${versionIsHeader}`
209
398
  );
210
- if (decoded === null) {
211
- throw new Error(
212
- "Decode failed — possible data corruption, wrong dictionary version, or unsupported format"
399
+ const tryOrder = versionIsHeader ? [versionByte, ...availableVersions.filter((v) => v !== versionByte)] : [...availableVersions];
400
+ for (const ver of tryOrder) {
401
+ let maps;
402
+ try {
403
+ maps = await this.getMapsForVersion(ver);
404
+ } catch (err) {
405
+ this.log(`[decode] v${ver}: getMapsForVersion threw — ${err}`);
406
+ continue;
407
+ }
408
+ const { reverseMap, sortedIdLengths } = maps;
409
+ const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths) ?? this.tryDecode(1, buffer, reverseMap, [], 0, sortedIdLengths);
410
+ this.log(
411
+ `[decode] v${ver} strict(pos=1): ${r1 !== null ? `"${r1}"` : "null"}`
412
+ );
413
+ if (r1 !== null) {
414
+ const notice2 = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
415
+ return { text: r1, isWordBin: true, detectedFormat, notice: notice2 };
416
+ }
417
+ const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths) ?? this.tryDecode(0, buffer, reverseMap, [], 0, sortedIdLengths);
418
+ this.log(
419
+ `[decode] v${ver} strict(pos=0): ${r0 !== null ? `"${r0}"` : "null"}`
213
420
  );
421
+ if (r0 !== null) {
422
+ return {
423
+ text: r0,
424
+ isWordBin: true,
425
+ detectedFormat,
426
+ notice: `Payload had no version header. Decoded using dictionary v${ver}.`
427
+ };
428
+ }
429
+ }
430
+ this.log(`[decode] strict parse failed — falling back to partial scan`);
431
+ if (availableVersions.length > 0) {
432
+ const scanVersion = availableVersions[availableVersions.length - 1];
433
+ try {
434
+ const { reverseMap, sortedIdLengths } = await this.getMapsForVersion(scanVersion);
435
+ const scan1 = this.partialScan(buffer, 1, reverseMap, sortedIdLengths);
436
+ const scan0 = this.partialScan(buffer, 0, reverseMap, sortedIdLengths);
437
+ const best = scan1.wordCount >= scan0.wordCount ? scan1 : scan0;
438
+ this.log(
439
+ `[decode] partial scan(pos=1) words=${scan1.wordCount} raw=${scan1.rawSegments.length} | scan(pos=0) words=${scan0.wordCount} raw=${scan0.rawSegments.length}`
440
+ );
441
+ const notice2 = `This does not appear to be a valid WordBin payload. Partial scan using dictionary v${scanVersion} extracted ${best.wordCount} word(s); ${best.rawSegments.length} byte sequence(s) had no dictionary match and are shown as [0xXX] markers.`;
442
+ return {
443
+ text: best.text,
444
+ isWordBin: false,
445
+ detectedFormat,
446
+ rawSegments: best.rawSegments,
447
+ notice: notice2
448
+ };
449
+ } catch {
450
+ }
214
451
  }
215
- return decoded;
452
+ const notice = `Could not decode with any available dictionary (tried: ${availableVersions.join(", ") || "none"}). Falling back to UTF-8 text decoding.`;
453
+ this.log(`[decode] ${notice}`);
454
+ return {
455
+ text: new TextDecoder("utf-8", { fatal: false }).decode(buffer),
456
+ isWordBin: false,
457
+ detectedFormat,
458
+ notice
459
+ };
216
460
  }
217
- tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
218
- if (pos === buffer.length) {
219
- return result.join(" ");
461
+ // ── Private: greedy linear decode ────────────────────────────────────────────
462
+ /**
463
+ * O(n) longest-match-first decode. Returns null if any byte has no match.
464
+ * This is the fast path; tryDecode is used as a backtracking fallback.
465
+ */
466
+ greedyDecode(buffer, startPos, reverseMap, sortedIdLengths) {
467
+ const words = [];
468
+ let pos = startPos;
469
+ while (pos < buffer.length) {
470
+ if (buffer[pos] === LITERAL) {
471
+ const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
472
+ if (byteLen > 1e6 || byteLen < 0) return null;
473
+ const start = pos + 1 + bytesRead;
474
+ const end = start + byteLen;
475
+ if (end > buffer.length) return null;
476
+ words.push(utf8Decode(buffer.subarray(start, end)));
477
+ pos = end;
478
+ continue;
479
+ }
480
+ let matched = false;
481
+ for (const len of sortedIdLengths) {
482
+ if (pos + len > buffer.length) continue;
483
+ const key = toHex(buffer.subarray(pos, pos + len));
484
+ if (reverseMap.has(key)) {
485
+ words.push(reverseMap.get(key));
486
+ pos += len;
487
+ matched = true;
488
+ break;
489
+ }
490
+ }
491
+ if (!matched) return null;
492
+ }
493
+ return words.join(" ");
494
+ }
495
+ // ── Private: partial / best-effort scan ──────────────────────────────────────
496
+ /**
497
+ * Scans through the buffer extracting any recognised dictionary words.
498
+ * Unrecognised bytes are collected as raw segments and rendered as [0xXX].
499
+ * Always consumes the entire buffer — never returns null.
500
+ */
501
+ partialScan(buffer, startPos, reverseMap, sortedIdLengths) {
502
+ const parts = [];
503
+ const rawSegments = [];
504
+ let wordCount = 0;
505
+ let pos = startPos;
506
+ while (pos < buffer.length) {
507
+ if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
508
+ try {
509
+ const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
510
+ if (byteLen > 0 && byteLen <= 1e6) {
511
+ const start = pos + 1 + bytesRead;
512
+ const end = start + byteLen;
513
+ if (end <= buffer.length) {
514
+ const word = utf8Decode(buffer.subarray(start, end));
515
+ parts.push(word);
516
+ wordCount++;
517
+ pos = end;
518
+ continue;
519
+ }
520
+ }
521
+ } catch {
522
+ }
523
+ }
524
+ let matched = false;
525
+ for (const len of sortedIdLengths) {
526
+ if (pos + len > buffer.length) continue;
527
+ const key = toHex(buffer.subarray(pos, pos + len));
528
+ if (reverseMap.has(key)) {
529
+ parts.push(reverseMap.get(key));
530
+ wordCount++;
531
+ pos += len;
532
+ matched = true;
533
+ break;
534
+ }
535
+ }
536
+ if (!matched) {
537
+ const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
538
+ parts.push(marker);
539
+ rawSegments.push(marker);
540
+ this.log(
541
+ `[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`
542
+ );
543
+ pos++;
544
+ }
220
545
  }
546
+ return { text: parts.join(" "), wordCount, rawSegments };
547
+ }
548
+ // ── Private: backtracking decode ─────────────────────────────────────────────
549
+ tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
550
+ if (pos === buffer.length) return result.join(" ");
221
551
  if (buffer[pos] === LITERAL) {
222
552
  const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
223
- if (byteLen > 1e6 || byteLen < 0) {
224
- return null;
225
- }
553
+ if (byteLen > 1e6 || byteLen < 0) return null;
226
554
  const start = pos + 1 + bytesRead;
227
555
  const end = start + byteLen;
228
- if (end > buffer.length) {
229
- return null;
230
- }
231
- const literalBytes = buffer.subarray(start, end);
232
- const word = utf8Decode(literalBytes);
233
- result.push(word);
556
+ if (end > buffer.length) return null;
557
+ result.push(utf8Decode(buffer.subarray(start, end)));
234
558
  const res = this.tryDecode(
235
559
  end,
236
560
  buffer,
@@ -244,11 +568,9 @@ class WordBin {
244
568
  }
245
569
  for (const len of sortedIdLengths) {
246
570
  if (pos + len > buffer.length) continue;
247
- const slice = buffer.subarray(pos, pos + len);
248
- const key = toHex(slice);
571
+ const key = toHex(buffer.subarray(pos, pos + len));
249
572
  if (reverseMap.has(key)) {
250
- const word = reverseMap.get(key);
251
- result.push(word);
573
+ result.push(reverseMap.get(key));
252
574
  const res = this.tryDecode(
253
575
  pos + len,
254
576
  buffer,