@bigdreamsweb3/wordbin 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { b as buildDictionary, t as toBase64, a as toHex, g as generateWordId, u as utf8Encode, e as encodeVarint, f as fromBase64, d as decodeVarint, c as utf8Decode } from "./dictionary-D3gr2Ala.js";
1
+ import { b as buildDictionary, t as toBase64, u as utf8Encode, e as encodeVarint, d as decodeVarint, a as utf8Decode, c as toHex } from "./builder-vFphFQMU.js";
2
2
  import fs from "fs/promises";
3
3
  import path from "path";
4
4
  import { fileURLToPath } from "url";
@@ -81,225 +81,508 @@ async function loadLatestDictionary() {
81
81
  );
82
82
  return loadDictionaryByVersion(latestVersion);
83
83
  }
84
+ function base(ALPHABET2) {
85
+ if (ALPHABET2.length >= 255) {
86
+ throw new TypeError("Alphabet too long");
87
+ }
88
+ const BASE_MAP = new Uint8Array(256);
89
+ for (let j = 0; j < BASE_MAP.length; j++) {
90
+ BASE_MAP[j] = 255;
91
+ }
92
+ for (let i = 0; i < ALPHABET2.length; i++) {
93
+ const x = ALPHABET2.charAt(i);
94
+ const xc = x.charCodeAt(0);
95
+ if (BASE_MAP[xc] !== 255) {
96
+ throw new TypeError(x + " is ambiguous");
97
+ }
98
+ BASE_MAP[xc] = i;
99
+ }
100
+ const BASE = ALPHABET2.length;
101
+ const LEADER = ALPHABET2.charAt(0);
102
+ const FACTOR = Math.log(BASE) / Math.log(256);
103
+ const iFACTOR = Math.log(256) / Math.log(BASE);
104
+ function encode(source) {
105
+ if (source instanceof Uint8Array) ;
106
+ else if (ArrayBuffer.isView(source)) {
107
+ source = new Uint8Array(source.buffer, source.byteOffset, source.byteLength);
108
+ } else if (Array.isArray(source)) {
109
+ source = Uint8Array.from(source);
110
+ }
111
+ if (!(source instanceof Uint8Array)) {
112
+ throw new TypeError("Expected Uint8Array");
113
+ }
114
+ if (source.length === 0) {
115
+ return "";
116
+ }
117
+ let zeroes = 0;
118
+ let length = 0;
119
+ let pbegin = 0;
120
+ const pend = source.length;
121
+ while (pbegin !== pend && source[pbegin] === 0) {
122
+ pbegin++;
123
+ zeroes++;
124
+ }
125
+ const size = (pend - pbegin) * iFACTOR + 1 >>> 0;
126
+ const b58 = new Uint8Array(size);
127
+ while (pbegin !== pend) {
128
+ let carry = source[pbegin];
129
+ let i = 0;
130
+ for (let it1 = size - 1; (carry !== 0 || i < length) && it1 !== -1; it1--, i++) {
131
+ carry += 256 * b58[it1] >>> 0;
132
+ b58[it1] = carry % BASE >>> 0;
133
+ carry = carry / BASE >>> 0;
134
+ }
135
+ if (carry !== 0) {
136
+ throw new Error("Non-zero carry");
137
+ }
138
+ length = i;
139
+ pbegin++;
140
+ }
141
+ let it2 = size - length;
142
+ while (it2 !== size && b58[it2] === 0) {
143
+ it2++;
144
+ }
145
+ let str = LEADER.repeat(zeroes);
146
+ for (; it2 < size; ++it2) {
147
+ str += ALPHABET2.charAt(b58[it2]);
148
+ }
149
+ return str;
150
+ }
151
+ function decodeUnsafe(source) {
152
+ if (typeof source !== "string") {
153
+ throw new TypeError("Expected String");
154
+ }
155
+ if (source.length === 0) {
156
+ return new Uint8Array();
157
+ }
158
+ let psz = 0;
159
+ let zeroes = 0;
160
+ let length = 0;
161
+ while (source[psz] === LEADER) {
162
+ zeroes++;
163
+ psz++;
164
+ }
165
+ const size = (source.length - psz) * FACTOR + 1 >>> 0;
166
+ const b256 = new Uint8Array(size);
167
+ while (psz < source.length) {
168
+ const charCode = source.charCodeAt(psz);
169
+ if (charCode > 255) {
170
+ return;
171
+ }
172
+ let carry = BASE_MAP[charCode];
173
+ if (carry === 255) {
174
+ return;
175
+ }
176
+ let i = 0;
177
+ for (let it3 = size - 1; (carry !== 0 || i < length) && it3 !== -1; it3--, i++) {
178
+ carry += BASE * b256[it3] >>> 0;
179
+ b256[it3] = carry % 256 >>> 0;
180
+ carry = carry / 256 >>> 0;
181
+ }
182
+ if (carry !== 0) {
183
+ throw new Error("Non-zero carry");
184
+ }
185
+ length = i;
186
+ psz++;
187
+ }
188
+ let it4 = size - length;
189
+ while (it4 !== size && b256[it4] === 0) {
190
+ it4++;
191
+ }
192
+ const vch = new Uint8Array(zeroes + (size - it4));
193
+ let j = zeroes;
194
+ while (it4 !== size) {
195
+ vch[j++] = b256[it4++];
196
+ }
197
+ return vch;
198
+ }
199
+ function decode(string) {
200
+ const buffer = decodeUnsafe(string);
201
+ if (buffer) {
202
+ return buffer;
203
+ }
204
+ throw new Error("Non-base" + BASE + " character");
205
+ }
206
+ return {
207
+ encode,
208
+ decodeUnsafe,
209
+ decode
210
+ };
211
+ }
212
+ var ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
213
+ const bs58 = base(ALPHABET);
214
+ function bytesToHex(bytes) {
215
+ return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
216
+ }
217
+ function detectAndConvert(payload) {
218
+ if (/^[0-9a-fA-F]+$/.test(payload) && payload.length % 2 === 0) {
219
+ const bytes2 = Uint8Array.from(
220
+ payload.match(/.{1,2}/g).map((h) => parseInt(h, 16))
221
+ );
222
+ return { buffer: bytes2, detectedFormat: "hex" };
223
+ }
224
+ const base58Re = /^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$/;
225
+ if (base58Re.test(payload)) {
226
+ try {
227
+ return { buffer: bs58.decode(payload), detectedFormat: "base58" };
228
+ } catch {
229
+ }
230
+ }
231
+ const b64Re = /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})$/;
232
+ const b64urlRe = /^(?:[A-Za-z0-9\-_]{4})*(?:[A-Za-z0-9\-_]{2}(?:==)?|[A-Za-z0-9\-_]{3}=?|[A-Za-z0-9\-_]{4})$/;
233
+ const norm = payload.replace(/-/g, "+").replace(/_/g, "/");
234
+ const padded = norm + (norm.length % 4 ? "=".repeat(4 - norm.length % 4) : "");
235
+ if (b64Re.test(payload) || b64urlRe.test(payload)) {
236
+ try {
237
+ const bin = atob(padded);
238
+ return {
239
+ buffer: Uint8Array.from(bin, (c) => c.charCodeAt(0)),
240
+ detectedFormat: "base64"
241
+ };
242
+ } catch {
243
+ }
244
+ }
245
+ const bytes = new Uint8Array(payload.length);
246
+ for (let i = 0; i < payload.length; i++) bytes[i] = payload.charCodeAt(i);
247
+ return { buffer: bytes, detectedFormat: "bin21" };
248
+ }
84
249
  class WordBin {
85
250
  constructor(initialDict, options) {
86
- this.primaryDictVersion = initialDict?.version ?? 2;
251
+ this.primaryDictVersion = initialDict?.version ?? 1;
87
252
  this.log = options?.debug ? (...args) => console.log("[WordBin]", ...args) : () => {
88
253
  };
89
254
  }
90
255
  static async createFromWords(words) {
91
- console.warn("Building dictionary from scratch – consider pre-built files");
92
- const dict = await buildDictionary(words);
93
- return new WordBin(dict);
256
+ console.warn(
257
+ "Building dictionary from scratch – consider using pre-built files"
258
+ );
259
+ return new WordBin(await buildDictionary(words));
94
260
  }
95
261
  static async createFromJson(dictJson) {
96
262
  return new WordBin(dictJson);
97
263
  }
98
264
  static async create(options) {
99
- const latestDict = await loadLatestDictionary();
100
- return new WordBin(latestDict, options);
265
+ return new WordBin(await loadLatestDictionary(), options);
101
266
  }
102
- async getReverseMapForVersion(version) {
267
+ async getMapsForVersion(version) {
103
268
  const dict = await loadDictionaryByVersion(version);
104
269
  const reverseMap = /* @__PURE__ */ new Map();
270
+ const forwardMap = /* @__PURE__ */ new Map();
271
+ const idLengths = /* @__PURE__ */ new Set();
105
272
  for (const [hex, words] of Object.entries(dict.words)) {
106
- if (words.length > 0) reverseMap.set(hex, words[0]);
273
+ if (!words.length) continue;
274
+ if (words.length > 1) {
275
+ throw new Error(
276
+ `Dictionary corruption: ID ${hex} maps to multiple words`
277
+ );
278
+ }
279
+ const word = words[0];
280
+ const bytes = Buffer.from(hex, "hex");
281
+ idLengths.add(bytes.length);
282
+ reverseMap.set(hex, word);
283
+ forwardMap.set(word, bytes);
107
284
  }
108
- return reverseMap;
285
+ return {
286
+ reverseMap,
287
+ forwardMap,
288
+ sortedIdLengths: Array.from(idLengths).sort((a, b) => b - a)
289
+ };
109
290
  }
110
- async encode(text, options = {}) {
291
+ // ── encode ──────────────────────────────────────────────────────────────────
292
+ async encode(text, options) {
111
293
  let textStr;
112
- if (typeof text === "string") textStr = text;
113
- else if (text instanceof Uint8Array) textStr = toBase64(text);
114
- else textStr = text.encodedBase64;
115
- if (!textStr.trim()) {
294
+ if (typeof text === "string") {
295
+ textStr = text;
296
+ } else if (text instanceof Uint8Array) {
297
+ textStr = toBase64(text);
298
+ } else {
299
+ textStr = text.base64Payload;
300
+ }
301
+ const trimmed = textStr.trim();
302
+ if (!trimmed) {
116
303
  return {
117
304
  originalText: "",
118
- dictVersion: 0,
305
+ dictVersion: this.primaryDictVersion,
119
306
  encoded: new Uint8Array(0),
120
307
  payload: "",
121
- encodedBase64: "",
308
+ bin21: "",
309
+ bin21Payload: "",
310
+ base64Payload: "",
311
+ hexPayload: "",
312
+ base58Payload: "",
122
313
  originalBytes: 0,
123
314
  encodedBytes: 0,
124
315
  bytesSaved: 0,
125
316
  ratioPercent: 100
126
317
  };
127
318
  }
128
- const words = textStr.split(/\s+/).filter(Boolean);
129
- this.log(`[encode] Input words (${words.length}):`, words);
130
- const useVersion = options.dictVersion ?? this.primaryDictVersion;
131
- this.log(`[encode] Using dictionary version: ${useVersion}`);
132
- const header = new Uint8Array([useVersion]);
133
- this.log(`[encode] Header bytes: [${[...header].join(", ")}]`);
134
- this.log(`[encode] Header hex: ${toHex(header)}`);
135
- const chunks = [header];
136
- const reverseMap = await this.getReverseMapForVersion(useVersion);
137
- this.log(`[encode] Reverse map loaded — size: ${reverseMap.size} entries`);
138
- this.log("[encode] Word → ID mapping:");
139
- for (const w of words) {
140
- const id = await generateWordId(w);
141
- const key = toHex(id);
142
- this.log(` "${w}" → ID bytes: [${[...id].join(", ")}] | hex: ${key}`);
143
- if (reverseMap.has(key)) {
144
- reverseMap.get(key);
145
- this.log(` → Found in dictionary → using ${id.length}-byte ID`);
319
+ const useVersion = options?.dictVersion ?? this.primaryDictVersion;
320
+ const { forwardMap } = await this.getMapsForVersion(useVersion);
321
+ const chunks = [new Uint8Array([useVersion])];
322
+ for (const w of trimmed.split(/\s+/).filter(Boolean)) {
323
+ const id = forwardMap.get(w);
324
+ if (id) {
146
325
  chunks.push(id);
147
326
  } else {
148
327
  const utf8 = utf8Encode(w);
149
328
  const lenVarint = encodeVarint(utf8.length);
150
- this.log(` → NOT in dictionary → literal mode`);
151
- this.log(
152
- ` Literal length varint bytes: [${[...lenVarint].join(", ")}] (value = ${utf8.length})`
153
- );
154
- this.log(` Word UTF-8 bytes length: ${utf8.length}`);
155
329
  const out = new Uint8Array(1 + lenVarint.length + utf8.length);
156
330
  out[0] = LITERAL;
157
331
  out.set(lenVarint, 1);
158
332
  out.set(utf8, 1 + lenVarint.length);
159
- this.log(` Literal chunk bytes: [${[...out].join(", ")}]`);
160
333
  chunks.push(out);
161
334
  }
162
335
  }
163
- const totalLength = chunks.reduce((n, c) => n + c.length, 0);
336
+ const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
164
337
  const result = new Uint8Array(totalLength);
165
- this.log(`[encode] Total encoded length: ${totalLength} bytes`);
166
- let off = 0;
167
- chunks.forEach((chunk, i) => {
168
- result.set(chunk, off);
169
- off += chunk.length;
170
- this.log(
171
- ` Chunk ${i}: ${chunk.length} bytes → offset ${off - chunk.length}`
172
- );
173
- });
174
- this.log(
175
- `[encode] Final encoded bytes (first 32): [${[...result.subarray(0, Math.min(32, result.length))].join(", ")}]`
176
- );
338
+ let offset = 0;
339
+ for (const chunk of chunks) {
340
+ result.set(chunk, offset);
341
+ offset += chunk.length;
342
+ }
177
343
  const originalBytes = new TextEncoder().encode(textStr).length;
178
- const base64Result = toBase64(result);
179
- this.log(`[encode] Base64 starts with: ${base64Result.slice(0, 12)}...`);
344
+ const hexPayload = bytesToHex(result);
345
+ const bin21Payload = Array.from(result).map((b) => String.fromCharCode(b)).join("");
346
+ const base64Payload = toBase64(result);
347
+ const base58Payload = bs58.encode(result);
180
348
  return {
181
349
  originalText: textStr,
182
- dictVersion: result[0],
350
+ dictVersion: useVersion,
183
351
  encoded: result,
184
- payload: base64Result,
185
- encodedBase64: base64Result,
352
+ bin21: bin21Payload,
353
+ payload: bin21Payload,
354
+ bin21Payload,
355
+ hexPayload,
356
+ base64Payload,
357
+ base58Payload,
186
358
  originalBytes,
187
- encodedBytes: totalLength,
188
- bytesSaved: originalBytes - totalLength,
189
- ratioPercent: totalLength === 0 ? 100 : Math.round(totalLength / originalBytes * 100)
359
+ encodedBytes: bin21Payload.length,
360
+ bytesSaved: originalBytes - bin21Payload.length,
361
+ ratioPercent: Math.round(bin21Payload.length / originalBytes * 1e4) / 100
190
362
  };
191
363
  }
192
- async decode(data) {
364
+ // ── decode ───────────────────────────────────────────────────────────────────
365
+ /**
366
+ * Decodes any supported payload format back to human-readable text.
367
+ *
368
+ * For valid WordBin payloads: returns the exact original words.
369
+ * For non-WordBin payloads: scans byte-by-byte, extracts dictionary words
370
+ * wherever possible, and preserves unrecognised
371
+ * bytes as "[0xXX]" markers.
372
+ */
373
+ async decode(payload) {
193
374
  let buffer;
194
- if (typeof data === "string") {
195
- this.log(
196
- `[decode] Input is base64 string: "${data.substring(0, 20)}..."`
197
- );
198
- buffer = fromBase64(data);
199
- this.log(`[decode] Decoded to ${buffer.length} bytes`);
375
+ let detectedFormat;
376
+ if (payload instanceof Uint8Array) {
377
+ buffer = payload;
378
+ detectedFormat = "bytes";
200
379
  } else {
201
- buffer = data;
202
- this.log(`[decode] Input is Uint8Array with ${buffer.length} bytes`);
380
+ ({ buffer, detectedFormat } = detectAndConvert(payload));
203
381
  }
204
- this.log(`[decode] Full buffer hex: ${toHex(buffer)}`);
205
382
  this.log(
206
- `[decode] First 16 bytes: [${[...buffer.subarray(0, Math.min(16, buffer.length))].join(", ")}]`
383
+ `[decode] format=${detectedFormat} bufLen=${buffer.length} firstBytes=[${Array.from(buffer.slice(0, 8)).join(",")}]`
207
384
  );
208
385
  if (buffer.length < 1) {
209
- throw new Error("Data too short");
210
- }
211
- const version = buffer[0];
212
- this.log(`[decode] Dictionary version from header: ${version}`);
213
- if (version < 1 || version > 100) {
214
- this.log(`[decode] Warning: unusual dictionary version ${version}`);
386
+ return {
387
+ text: "",
388
+ isWordBin: false,
389
+ detectedFormat,
390
+ notice: "Payload is empty nothing to decode."
391
+ };
215
392
  }
216
- let pos = 1;
217
- this.log(`[decode] Starting decode at position ${pos}`);
218
- const reverseMap = await this.getReverseMapForVersion(version);
393
+ const availableVersions = await getAllAvailableDictionaryVersions();
394
+ const versionByte = buffer[0];
395
+ const versionIsHeader = availableVersions.includes(versionByte);
219
396
  this.log(
220
- `[decode] Reverse map loaded for v${version} size: ${reverseMap.size} entries`
397
+ `[decode] availableVersions=[${availableVersions.join(",")}] versionByte=${versionByte} isKnownHeader=${versionIsHeader}`
221
398
  );
222
- this.log(`[decode] ===== STARTING DECODE LOOP =====`);
223
- const result = [];
224
- const decoded = this.tryDecode(pos, buffer, reverseMap, result, 0);
225
- if (decoded === null) {
226
- throw new Error(
227
- "No valid decode path found — possible corruption or dictionary mismatch"
399
+ const tryOrder = versionIsHeader ? [versionByte, ...availableVersions.filter((v) => v !== versionByte)] : [...availableVersions];
400
+ for (const ver of tryOrder) {
401
+ let maps;
402
+ try {
403
+ maps = await this.getMapsForVersion(ver);
404
+ } catch (err) {
405
+ this.log(`[decode] v${ver}: getMapsForVersion threw — ${err}`);
406
+ continue;
407
+ }
408
+ const { reverseMap, sortedIdLengths } = maps;
409
+ const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths) ?? this.tryDecode(1, buffer, reverseMap, [], 0, sortedIdLengths);
410
+ this.log(
411
+ `[decode] v${ver} strict(pos=1): ${r1 !== null ? `"${r1}"` : "null"}`
228
412
  );
413
+ if (r1 !== null) {
414
+ const notice2 = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
415
+ return { text: r1, isWordBin: true, detectedFormat, notice: notice2 };
416
+ }
417
+ const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths) ?? this.tryDecode(0, buffer, reverseMap, [], 0, sortedIdLengths);
418
+ this.log(
419
+ `[decode] v${ver} strict(pos=0): ${r0 !== null ? `"${r0}"` : "null"}`
420
+ );
421
+ if (r0 !== null) {
422
+ return {
423
+ text: r0,
424
+ isWordBin: true,
425
+ detectedFormat,
426
+ notice: `Payload had no version header. Decoded using dictionary v${ver}.`
427
+ };
428
+ }
229
429
  }
230
- this.log(`
231
- [decode] ===== DECODE COMPLETE =====`);
232
- this.log(`[decode] Total words decoded: ${result.length}`);
233
- this.log(`[decode] Final result: "${decoded}"`);
234
- return decoded;
430
+ this.log(`[decode] strict parse failed — falling back to partial scan`);
431
+ if (availableVersions.length > 0) {
432
+ const scanVersion = availableVersions[availableVersions.length - 1];
433
+ try {
434
+ const { reverseMap, sortedIdLengths } = await this.getMapsForVersion(scanVersion);
435
+ const scan1 = this.partialScan(buffer, 1, reverseMap, sortedIdLengths);
436
+ const scan0 = this.partialScan(buffer, 0, reverseMap, sortedIdLengths);
437
+ const best = scan1.wordCount >= scan0.wordCount ? scan1 : scan0;
438
+ this.log(
439
+ `[decode] partial scan(pos=1) words=${scan1.wordCount} raw=${scan1.rawSegments.length} | scan(pos=0) words=${scan0.wordCount} raw=${scan0.rawSegments.length}`
440
+ );
441
+ const notice2 = `This does not appear to be a valid WordBin payload. Partial scan using dictionary v${scanVersion} extracted ${best.wordCount} word(s); ${best.rawSegments.length} byte sequence(s) had no dictionary match and are shown as [0xXX] markers.`;
442
+ return {
443
+ text: best.text,
444
+ isWordBin: false,
445
+ detectedFormat,
446
+ rawSegments: best.rawSegments,
447
+ notice: notice2
448
+ };
449
+ } catch {
450
+ }
451
+ }
452
+ const notice = `Could not decode with any available dictionary (tried: ${availableVersions.join(", ") || "none"}). Falling back to UTF-8 text decoding.`;
453
+ this.log(`[decode] ${notice}`);
454
+ return {
455
+ text: new TextDecoder("utf-8", { fatal: false }).decode(buffer),
456
+ isWordBin: false,
457
+ detectedFormat,
458
+ notice
459
+ };
235
460
  }
236
- tryDecode(pos, buffer, reverseMap, result, depth) {
237
- const indent = " ".repeat(depth);
238
- this.log(`${indent}[tryDecode] At position ${pos} (depth ${depth})`);
239
- if (pos === buffer.length) {
240
- this.log(`${indent}[tryDecode] Reached end successfully`);
241
- return result.join(" ");
242
- }
243
- const previewLen = Math.min(8, buffer.length - pos);
244
- const preview = [...buffer.subarray(pos, pos + previewLen)].map((b) => `0x${b.toString(16).padStart(2, "0")}`).join(" ");
245
- this.log(`${indent}[tryDecode] Next ${previewLen} bytes: ${preview}`);
461
+ // ── Private: greedy linear decode ────────────────────────────────────────────
462
+ /**
463
+ * O(n) longest-match-first decode. Returns null if any byte has no match.
464
+ * This is the fast path; tryDecode is used as a backtracking fallback.
465
+ */
466
+ greedyDecode(buffer, startPos, reverseMap, sortedIdLengths) {
467
+ const words = [];
468
+ let pos = startPos;
469
+ while (pos < buffer.length) {
470
+ if (buffer[pos] === LITERAL) {
471
+ const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
472
+ if (byteLen > 1e6 || byteLen < 0) return null;
473
+ const start = pos + 1 + bytesRead;
474
+ const end = start + byteLen;
475
+ if (end > buffer.length) return null;
476
+ words.push(utf8Decode(buffer.subarray(start, end)));
477
+ pos = end;
478
+ continue;
479
+ }
480
+ let matched = false;
481
+ for (const len of sortedIdLengths) {
482
+ if (pos + len > buffer.length) continue;
483
+ const key = toHex(buffer.subarray(pos, pos + len));
484
+ if (reverseMap.has(key)) {
485
+ words.push(reverseMap.get(key));
486
+ pos += len;
487
+ matched = true;
488
+ break;
489
+ }
490
+ }
491
+ if (!matched) return null;
492
+ }
493
+ return words.join(" ");
494
+ }
495
+ // ── Private: partial / best-effort scan ──────────────────────────────────────
496
+ /**
497
+ * Scans through the buffer extracting any recognised dictionary words.
498
+ * Unrecognised bytes are collected as raw segments and rendered as [0xXX].
499
+ * Always consumes the entire buffer — never returns null.
500
+ */
501
+ partialScan(buffer, startPos, reverseMap, sortedIdLengths) {
502
+ const parts = [];
503
+ const rawSegments = [];
504
+ let wordCount = 0;
505
+ let pos = startPos;
506
+ while (pos < buffer.length) {
507
+ if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
508
+ try {
509
+ const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
510
+ if (byteLen > 0 && byteLen <= 1e6) {
511
+ const start = pos + 1 + bytesRead;
512
+ const end = start + byteLen;
513
+ if (end <= buffer.length) {
514
+ const word = utf8Decode(buffer.subarray(start, end));
515
+ parts.push(word);
516
+ wordCount++;
517
+ pos = end;
518
+ continue;
519
+ }
520
+ }
521
+ } catch {
522
+ }
523
+ }
524
+ let matched = false;
525
+ for (const len of sortedIdLengths) {
526
+ if (pos + len > buffer.length) continue;
527
+ const key = toHex(buffer.subarray(pos, pos + len));
528
+ if (reverseMap.has(key)) {
529
+ parts.push(reverseMap.get(key));
530
+ wordCount++;
531
+ pos += len;
532
+ matched = true;
533
+ break;
534
+ }
535
+ }
536
+ if (!matched) {
537
+ const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
538
+ parts.push(marker);
539
+ rawSegments.push(marker);
540
+ this.log(
541
+ `[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`
542
+ );
543
+ pos++;
544
+ }
545
+ }
546
+ return { text: parts.join(" "), wordCount, rawSegments };
547
+ }
548
+ // ── Private: backtracking decode ─────────────────────────────────────────────
549
+ tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
550
+ if (pos === buffer.length) return result.join(" ");
246
551
  if (buffer[pos] === LITERAL) {
247
- this.log(
248
- `${indent}[tryDecode] Found LITERAL marker (0x${LITERAL.toString(16)})`
249
- );
250
552
  const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
251
- this.log(
252
- `${indent}[tryDecode] Varint: value=${byteLen}, bytesRead=${bytesRead}`
253
- );
553
+ if (byteLen > 1e6 || byteLen < 0) return null;
254
554
  const start = pos + 1 + bytesRead;
255
555
  const end = start + byteLen;
256
- this.log(
257
- `${indent}[tryDecode] Literal range: [${start}..${end}) (${byteLen} bytes)`
556
+ if (end > buffer.length) return null;
557
+ result.push(utf8Decode(buffer.subarray(start, end)));
558
+ const res = this.tryDecode(
559
+ end,
560
+ buffer,
561
+ reverseMap,
562
+ result,
563
+ depth + 1,
564
+ sortedIdLengths
258
565
  );
259
- if (end > buffer.length) {
260
- this.log(`${indent}[tryDecode] Truncated literal — failing path`);
261
- return null;
262
- }
263
- const literalBytes = buffer.subarray(start, end);
264
- const word = utf8Decode(literalBytes);
265
- this.log(`${indent}[tryDecode] Decoded literal: "${word}"`);
266
- result.push(word);
267
- const res = this.tryDecode(end, buffer, reverseMap, result, depth + 1);
268
566
  if (res !== null) return res;
269
567
  result.pop();
270
- this.log(`${indent}[tryDecode] Backtracking from literal`);
271
- return null;
272
568
  }
273
- for (const len of [4, 3, 2]) {
274
- if (pos + len > buffer.length) {
275
- this.log(`${indent}[tryDecode] Skipping ${len}-byte (would exceed)`);
276
- continue;
277
- }
278
- const slice = buffer.subarray(pos, pos + len);
279
- const key = toHex(slice);
280
- const keyBytes = [...slice].map((b) => `0x${b.toString(16).padStart(2, "0")}`).join(" ");
281
- this.log(
282
- `${indent}[tryDecode] Trying ${len}-byte: [${keyBytes}] hex=${key}`
283
- );
569
+ for (const len of sortedIdLengths) {
570
+ if (pos + len > buffer.length) continue;
571
+ const key = toHex(buffer.subarray(pos, pos + len));
284
572
  if (reverseMap.has(key)) {
285
- const word = reverseMap.get(key);
286
- this.log(`${indent}[tryDecode] Match: "${word}" (ID: ${key})`);
287
- result.push(word);
573
+ result.push(reverseMap.get(key));
288
574
  const res = this.tryDecode(
289
575
  pos + len,
290
576
  buffer,
291
577
  reverseMap,
292
578
  result,
293
- depth + 1
579
+ depth + 1,
580
+ sortedIdLengths
294
581
  );
295
582
  if (res !== null) return res;
296
583
  result.pop();
297
- this.log(`${indent}[tryDecode] Backtracking from "${word}"`);
298
- } else {
299
- this.log(`${indent}[tryDecode] No match for ${key}`);
300
584
  }
301
585
  }
302
- this.log(`${indent}[tryDecode] No valid branches — failing path`);
303
586
  return null;
304
587
  }
305
588
  }