@bigdreamsweb3/wordbin 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +18 -18
- package/README.md +33 -31
- package/dist/builder-vFphFQMU.js.map +1 -1
- package/dist/cli.mjs +3 -3
- package/dist/cli.mjs.map +1 -1
- package/dist/core/format-detection.d.ts +5 -0
- package/dist/core/helpers.d.ts +1 -0
- package/dist/core/index.d.ts +3 -32
- package/dist/data/dict-v1-bip39.json +2054 -0
- package/dist/index.mjs +233 -179
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.ts +1 -3
- package/package.json +2 -1
- package/dist/core/binary-payload.d.ts +0 -6
- package/dist/core/comp/latin1-compressor.d.ts +0 -9
- package/dist/core/comp/onebyte-encoder.d.ts +0 -2
- package/dist/data/wordbin-v1-bip39.json +0 -6150
package/dist/index.mjs
CHANGED
|
@@ -29,7 +29,7 @@ async function getAllAvailableDictionaryVersions() {
|
|
|
29
29
|
try {
|
|
30
30
|
const files = await fs.readdir(dir);
|
|
31
31
|
for (const file of files) {
|
|
32
|
-
const match = file.match(/
|
|
32
|
+
const match = file.match(/dict-v(\d+)/i);
|
|
33
33
|
if (match) {
|
|
34
34
|
versions.add(parseInt(match[1], 10));
|
|
35
35
|
}
|
|
@@ -49,7 +49,7 @@ async function loadDictionaryByVersion(version) {
|
|
|
49
49
|
for (const dir of dirs) {
|
|
50
50
|
const files = await fs.readdir(dir);
|
|
51
51
|
const versionFile = files.find(
|
|
52
|
-
(f) => f.match(new RegExp(`
|
|
52
|
+
(f) => f.match(new RegExp(`dict-v${version}(?:\\.|-)`, "i"))
|
|
53
53
|
);
|
|
54
54
|
if (versionFile) {
|
|
55
55
|
const filePath = path.join(dir, versionFile);
|
|
@@ -211,15 +211,12 @@ function base(ALPHABET2) {
|
|
|
211
211
|
}
|
|
212
212
|
var ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
|
|
213
213
|
const bs58 = base(ALPHABET);
|
|
214
|
-
function bytesToHex(bytes) {
|
|
215
|
-
return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
216
|
-
}
|
|
217
214
|
function detectAndConvert(payload) {
|
|
218
215
|
if (/^[0-9a-fA-F]+$/.test(payload) && payload.length % 2 === 0) {
|
|
219
|
-
const
|
|
216
|
+
const bytes = Uint8Array.from(
|
|
220
217
|
payload.match(/.{1,2}/g).map((h) => parseInt(h, 16))
|
|
221
218
|
);
|
|
222
|
-
return { buffer:
|
|
219
|
+
return { buffer: bytes, detectedFormat: "hex" };
|
|
223
220
|
}
|
|
224
221
|
const base58Re = /^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$/;
|
|
225
222
|
if (base58Re.test(payload)) {
|
|
@@ -234,17 +231,19 @@ function detectAndConvert(payload) {
|
|
|
234
231
|
const padded = norm + (norm.length % 4 ? "=".repeat(4 - norm.length % 4) : "");
|
|
235
232
|
if (b64Re.test(payload) || b64urlRe.test(payload)) {
|
|
236
233
|
try {
|
|
237
|
-
const
|
|
234
|
+
const bin2 = atob(padded);
|
|
238
235
|
return {
|
|
239
|
-
buffer: Uint8Array.from(
|
|
236
|
+
buffer: Uint8Array.from(bin2, (c) => c.charCodeAt(0)),
|
|
240
237
|
detectedFormat: "base64"
|
|
241
238
|
};
|
|
242
239
|
} catch {
|
|
243
240
|
}
|
|
244
241
|
}
|
|
245
|
-
const
|
|
246
|
-
|
|
247
|
-
|
|
242
|
+
const bin = Array.from(payload).map((c) => c.charCodeAt(0));
|
|
243
|
+
return { buffer: Uint8Array.from(bin), detectedFormat: "bytes" };
|
|
244
|
+
}
|
|
245
|
+
function bytesToHex(bytes) {
|
|
246
|
+
return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
248
247
|
}
|
|
249
248
|
class WordBin {
|
|
250
249
|
constructor(initialDict, options) {
|
|
@@ -288,7 +287,69 @@ class WordBin {
|
|
|
288
287
|
sortedIdLengths: Array.from(idLengths).sort((a, b) => b - a)
|
|
289
288
|
};
|
|
290
289
|
}
|
|
291
|
-
|
|
290
|
+
tryRecoverWordsFromHex(hex, reverseMap, sortedIdLengths) {
|
|
291
|
+
const bytes = Buffer.from(hex, "hex");
|
|
292
|
+
const recovered = this.greedyDecode(bytes, 0, reverseMap, sortedIdLengths);
|
|
293
|
+
if (recovered && recovered.trim().length > 0) {
|
|
294
|
+
return recovered;
|
|
295
|
+
}
|
|
296
|
+
return null;
|
|
297
|
+
}
|
|
298
|
+
validateDecodedWords(text, forwardMap, reverseMap, sortedIdLengths) {
|
|
299
|
+
const parts = [];
|
|
300
|
+
const rawSegments = [];
|
|
301
|
+
const tokens = text.match(/[a-zA-Z]+|[^\w\s]+|\d+|\s+/g) || [];
|
|
302
|
+
for (const token of tokens) {
|
|
303
|
+
if (/^\s+$/.test(token)) {
|
|
304
|
+
parts.push(token);
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
if (/^[a-zA-Z]+$/.test(token)) {
|
|
308
|
+
const normalized = token.toLowerCase();
|
|
309
|
+
if (forwardMap.has(normalized)) {
|
|
310
|
+
parts.push(normalized);
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
const hex2 = bytesToHex(new TextEncoder().encode(token));
|
|
314
|
+
const recovered2 = this.tryRecoverWordsFromHex(
|
|
315
|
+
hex2,
|
|
316
|
+
reverseMap,
|
|
317
|
+
sortedIdLengths
|
|
318
|
+
);
|
|
319
|
+
if (recovered2) {
|
|
320
|
+
parts.push(recovered2);
|
|
321
|
+
} else {
|
|
322
|
+
const raw = `[hex:${hex2}]`;
|
|
323
|
+
parts.push(raw);
|
|
324
|
+
rawSegments.push(raw);
|
|
325
|
+
}
|
|
326
|
+
continue;
|
|
327
|
+
}
|
|
328
|
+
if (/^[^\w\s]+$/.test(token)) {
|
|
329
|
+
const raw = `[raw:${token}]`;
|
|
330
|
+
parts.push(raw);
|
|
331
|
+
rawSegments.push(raw);
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
const hex = bytesToHex(new TextEncoder().encode(token));
|
|
335
|
+
const recovered = this.tryRecoverWordsFromHex(
|
|
336
|
+
hex,
|
|
337
|
+
reverseMap,
|
|
338
|
+
sortedIdLengths
|
|
339
|
+
);
|
|
340
|
+
if (recovered) {
|
|
341
|
+
parts.push(recovered);
|
|
342
|
+
} else {
|
|
343
|
+
const raw = `[hex:${hex}]`;
|
|
344
|
+
parts.push(raw);
|
|
345
|
+
rawSegments.push(raw);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
return {
|
|
349
|
+
text: parts.join(""),
|
|
350
|
+
rawSegments
|
|
351
|
+
};
|
|
352
|
+
}
|
|
292
353
|
async encode(text, options) {
|
|
293
354
|
let textStr;
|
|
294
355
|
if (typeof text === "string") {
|
|
@@ -305,8 +366,6 @@ class WordBin {
|
|
|
305
366
|
dictVersion: this.primaryDictVersion,
|
|
306
367
|
encoded: new Uint8Array(0),
|
|
307
368
|
payload: "",
|
|
308
|
-
bin21: "",
|
|
309
|
-
bin21Payload: "",
|
|
310
369
|
base64Payload: "",
|
|
311
370
|
hexPayload: "",
|
|
312
371
|
base58Payload: "",
|
|
@@ -342,34 +401,23 @@ class WordBin {
|
|
|
342
401
|
}
|
|
343
402
|
const originalBytes = new TextEncoder().encode(textStr).length;
|
|
344
403
|
const hexPayload = bytesToHex(result);
|
|
345
|
-
const bin21Payload = Array.from(result).map((b) => String.fromCharCode(b)).join("");
|
|
346
404
|
const base64Payload = toBase64(result);
|
|
347
405
|
const base58Payload = bs58.encode(result);
|
|
406
|
+
const encodedBytes = Math.floor(hexPayload.length / 2);
|
|
348
407
|
return {
|
|
349
408
|
originalText: textStr,
|
|
350
409
|
dictVersion: useVersion,
|
|
351
410
|
encoded: result,
|
|
352
|
-
|
|
353
|
-
payload: bin21Payload,
|
|
354
|
-
bin21Payload,
|
|
411
|
+
payload: hexPayload,
|
|
355
412
|
hexPayload,
|
|
356
413
|
base64Payload,
|
|
357
414
|
base58Payload,
|
|
358
415
|
originalBytes,
|
|
359
|
-
encodedBytes
|
|
360
|
-
bytesSaved: originalBytes -
|
|
361
|
-
ratioPercent: Math.round(
|
|
416
|
+
encodedBytes,
|
|
417
|
+
bytesSaved: originalBytes - encodedBytes,
|
|
418
|
+
ratioPercent: Math.round(encodedBytes / originalBytes * 1e4) / 100
|
|
362
419
|
};
|
|
363
420
|
}
|
|
364
|
-
// ── decode ───────────────────────────────────────────────────────────────────
|
|
365
|
-
/**
|
|
366
|
-
* Decodes any supported payload format back to human-readable text.
|
|
367
|
-
*
|
|
368
|
-
* For valid WordBin payloads: returns the exact original words.
|
|
369
|
-
* For non-WordBin payloads: scans byte-by-byte, extracts dictionary words
|
|
370
|
-
* wherever possible, and preserves unrecognised
|
|
371
|
-
* bytes as "[0xXX]" markers.
|
|
372
|
-
*/
|
|
373
421
|
async decode(payload) {
|
|
374
422
|
let buffer;
|
|
375
423
|
let detectedFormat;
|
|
@@ -401,124 +449,73 @@ class WordBin {
|
|
|
401
449
|
let maps;
|
|
402
450
|
try {
|
|
403
451
|
maps = await this.getMapsForVersion(ver);
|
|
404
|
-
} catch
|
|
405
|
-
this.log(`[decode] v${ver}: getMapsForVersion threw — ${err}`);
|
|
452
|
+
} catch {
|
|
406
453
|
continue;
|
|
407
454
|
}
|
|
408
455
|
const { reverseMap, sortedIdLengths } = maps;
|
|
409
|
-
const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths)
|
|
410
|
-
this.log(
|
|
411
|
-
`[decode] v${ver} strict(pos=1): ${r1 !== null ? `"${r1}"` : "null"}`
|
|
412
|
-
);
|
|
456
|
+
const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths);
|
|
413
457
|
if (r1 !== null) {
|
|
414
|
-
const
|
|
415
|
-
return { text: r1, isWordBin: true, detectedFormat, notice
|
|
458
|
+
const notice = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
|
|
459
|
+
return { text: r1, isWordBin: true, detectedFormat, notice };
|
|
416
460
|
}
|
|
417
|
-
const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths)
|
|
418
|
-
this.log(
|
|
419
|
-
`[decode] v${ver} strict(pos=0): ${r0 !== null ? `"${r0}"` : "null"}`
|
|
420
|
-
);
|
|
461
|
+
const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths);
|
|
421
462
|
if (r0 !== null) {
|
|
422
463
|
return {
|
|
423
464
|
text: r0,
|
|
424
|
-
isWordBin:
|
|
465
|
+
isWordBin: false,
|
|
425
466
|
detectedFormat,
|
|
426
467
|
notice: `Payload had no version header. Decoded using dictionary v${ver}.`
|
|
427
468
|
};
|
|
428
469
|
}
|
|
429
470
|
}
|
|
430
|
-
this.log(`[decode] strict parse failed — falling back to
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
471
|
+
this.log(`[decode] strict parse failed — falling back to UTF-8 validation`);
|
|
472
|
+
const utf8Text = new TextDecoder("utf-8", { fatal: false }).decode(buffer);
|
|
473
|
+
try {
|
|
474
|
+
const latest = availableVersions[availableVersions.length - 1];
|
|
475
|
+
const { forwardMap, reverseMap, sortedIdLengths } = await this.getMapsForVersion(latest);
|
|
476
|
+
const validated = this.validateDecodedWords(
|
|
477
|
+
utf8Text,
|
|
478
|
+
forwardMap,
|
|
479
|
+
reverseMap,
|
|
480
|
+
sortedIdLengths
|
|
481
|
+
);
|
|
482
|
+
return {
|
|
483
|
+
text: validated.text,
|
|
484
|
+
isWordBin: false,
|
|
485
|
+
detectedFormat,
|
|
486
|
+
rawSegments: validated.rawSegments,
|
|
487
|
+
notice: "Payload is not WordBin. UTF-8 text was recovered and dictionary validation applied."
|
|
488
|
+
};
|
|
489
|
+
} catch {
|
|
490
|
+
return {
|
|
491
|
+
text: utf8Text,
|
|
492
|
+
isWordBin: false,
|
|
493
|
+
detectedFormat,
|
|
494
|
+
notice: "Payload decoded as plain UTF-8 text."
|
|
495
|
+
};
|
|
451
496
|
}
|
|
452
|
-
const notice = `Could not decode with any available dictionary (tried: ${availableVersions.join(", ") || "none"}). Falling back to UTF-8 text decoding.`;
|
|
453
|
-
this.log(`[decode] ${notice}`);
|
|
454
|
-
return {
|
|
455
|
-
text: new TextDecoder("utf-8", { fatal: false }).decode(buffer),
|
|
456
|
-
isWordBin: false,
|
|
457
|
-
detectedFormat,
|
|
458
|
-
notice
|
|
459
|
-
};
|
|
460
497
|
}
|
|
461
|
-
// ── Private: greedy linear decode ────────────────────────────────────────────
|
|
462
|
-
/**
|
|
463
|
-
* O(n) longest-match-first decode. Returns null if any byte has no match.
|
|
464
|
-
* This is the fast path; tryDecode is used as a backtracking fallback.
|
|
465
|
-
*/
|
|
466
498
|
greedyDecode(buffer, startPos, reverseMap, sortedIdLengths) {
|
|
467
499
|
const words = [];
|
|
468
500
|
let pos = startPos;
|
|
469
501
|
while (pos < buffer.length) {
|
|
470
502
|
if (buffer[pos] === LITERAL) {
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
const start = pos + 1 + bytesRead;
|
|
474
|
-
const end = start + byteLen;
|
|
475
|
-
if (end > buffer.length) return null;
|
|
476
|
-
words.push(utf8Decode(buffer.subarray(start, end)));
|
|
477
|
-
pos = end;
|
|
478
|
-
continue;
|
|
479
|
-
}
|
|
480
|
-
let matched = false;
|
|
481
|
-
for (const len of sortedIdLengths) {
|
|
482
|
-
if (pos + len > buffer.length) continue;
|
|
483
|
-
const key = toHex(buffer.subarray(pos, pos + len));
|
|
484
|
-
if (reverseMap.has(key)) {
|
|
485
|
-
words.push(reverseMap.get(key));
|
|
486
|
-
pos += len;
|
|
487
|
-
matched = true;
|
|
488
|
-
break;
|
|
489
|
-
}
|
|
490
|
-
}
|
|
491
|
-
if (!matched) return null;
|
|
492
|
-
}
|
|
493
|
-
return words.join(" ");
|
|
494
|
-
}
|
|
495
|
-
// ── Private: partial / best-effort scan ──────────────────────────────────────
|
|
496
|
-
/**
|
|
497
|
-
* Scans through the buffer extracting any recognised dictionary words.
|
|
498
|
-
* Unrecognised bytes are collected as raw segments and rendered as [0xXX].
|
|
499
|
-
* Always consumes the entire buffer — never returns null.
|
|
500
|
-
*/
|
|
501
|
-
partialScan(buffer, startPos, reverseMap, sortedIdLengths) {
|
|
502
|
-
const parts = [];
|
|
503
|
-
const rawSegments = [];
|
|
504
|
-
let wordCount = 0;
|
|
505
|
-
let pos = startPos;
|
|
506
|
-
while (pos < buffer.length) {
|
|
507
|
-
if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
|
|
503
|
+
let byteLen;
|
|
504
|
+
let bytesRead;
|
|
508
505
|
try {
|
|
509
|
-
|
|
510
|
-
if (byteLen > 0 && byteLen <= 1e6) {
|
|
511
|
-
const start = pos + 1 + bytesRead;
|
|
512
|
-
const end = start + byteLen;
|
|
513
|
-
if (end <= buffer.length) {
|
|
514
|
-
const word = utf8Decode(buffer.subarray(start, end));
|
|
515
|
-
parts.push(word);
|
|
516
|
-
wordCount++;
|
|
517
|
-
pos = end;
|
|
518
|
-
continue;
|
|
519
|
-
}
|
|
520
|
-
}
|
|
506
|
+
({ value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1));
|
|
521
507
|
} catch {
|
|
508
|
+
byteLen = -1;
|
|
509
|
+
bytesRead = 0;
|
|
510
|
+
}
|
|
511
|
+
if (byteLen > 0) {
|
|
512
|
+
if (byteLen > 1e6 || byteLen < 0) return null;
|
|
513
|
+
const start = pos + 1 + bytesRead;
|
|
514
|
+
const end = start + byteLen;
|
|
515
|
+
if (end > buffer.length) return null;
|
|
516
|
+
words.push(utf8Decode(buffer.subarray(start, end)));
|
|
517
|
+
pos = end;
|
|
518
|
+
continue;
|
|
522
519
|
}
|
|
523
520
|
}
|
|
524
521
|
let matched = false;
|
|
@@ -526,65 +523,122 @@ class WordBin {
|
|
|
526
523
|
if (pos + len > buffer.length) continue;
|
|
527
524
|
const key = toHex(buffer.subarray(pos, pos + len));
|
|
528
525
|
if (reverseMap.has(key)) {
|
|
529
|
-
|
|
530
|
-
wordCount++;
|
|
526
|
+
words.push(reverseMap.get(key));
|
|
531
527
|
pos += len;
|
|
532
528
|
matched = true;
|
|
533
529
|
break;
|
|
534
530
|
}
|
|
535
531
|
}
|
|
536
|
-
if (!matched)
|
|
537
|
-
const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
|
|
538
|
-
parts.push(marker);
|
|
539
|
-
rawSegments.push(marker);
|
|
540
|
-
this.log(
|
|
541
|
-
`[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`
|
|
542
|
-
);
|
|
543
|
-
pos++;
|
|
544
|
-
}
|
|
545
|
-
}
|
|
546
|
-
return { text: parts.join(" "), wordCount, rawSegments };
|
|
547
|
-
}
|
|
548
|
-
// ── Private: backtracking decode ─────────────────────────────────────────────
|
|
549
|
-
tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
|
|
550
|
-
if (pos === buffer.length) return result.join(" ");
|
|
551
|
-
if (buffer[pos] === LITERAL) {
|
|
552
|
-
const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
|
|
553
|
-
if (byteLen > 1e6 || byteLen < 0) return null;
|
|
554
|
-
const start = pos + 1 + bytesRead;
|
|
555
|
-
const end = start + byteLen;
|
|
556
|
-
if (end > buffer.length) return null;
|
|
557
|
-
result.push(utf8Decode(buffer.subarray(start, end)));
|
|
558
|
-
const res = this.tryDecode(
|
|
559
|
-
end,
|
|
560
|
-
buffer,
|
|
561
|
-
reverseMap,
|
|
562
|
-
result,
|
|
563
|
-
depth + 1,
|
|
564
|
-
sortedIdLengths
|
|
565
|
-
);
|
|
566
|
-
if (res !== null) return res;
|
|
567
|
-
result.pop();
|
|
568
|
-
}
|
|
569
|
-
for (const len of sortedIdLengths) {
|
|
570
|
-
if (pos + len > buffer.length) continue;
|
|
571
|
-
const key = toHex(buffer.subarray(pos, pos + len));
|
|
572
|
-
if (reverseMap.has(key)) {
|
|
573
|
-
result.push(reverseMap.get(key));
|
|
574
|
-
const res = this.tryDecode(
|
|
575
|
-
pos + len,
|
|
576
|
-
buffer,
|
|
577
|
-
reverseMap,
|
|
578
|
-
result,
|
|
579
|
-
depth + 1,
|
|
580
|
-
sortedIdLengths
|
|
581
|
-
);
|
|
582
|
-
if (res !== null) return res;
|
|
583
|
-
result.pop();
|
|
584
|
-
}
|
|
532
|
+
if (!matched) return null;
|
|
585
533
|
}
|
|
586
|
-
return
|
|
534
|
+
return words.join(" ");
|
|
587
535
|
}
|
|
536
|
+
// private partialScan(
|
|
537
|
+
// buffer: Uint8Array,
|
|
538
|
+
// startPos: number,
|
|
539
|
+
// reverseMap: Map<string, string>,
|
|
540
|
+
// sortedIdLengths: number[],
|
|
541
|
+
// ): { text: string; wordCount: number; rawSegments: string[] } {
|
|
542
|
+
// const parts: string[] = [];
|
|
543
|
+
// const rawSegments: string[] = [];
|
|
544
|
+
// let wordCount = 0;
|
|
545
|
+
// let pos = startPos;
|
|
546
|
+
// while (pos < buffer.length) {
|
|
547
|
+
// if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
|
|
548
|
+
// try {
|
|
549
|
+
// const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
|
|
550
|
+
// if (byteLen > 0 && byteLen <= 1_000_000) {
|
|
551
|
+
// const start = pos + 1 + bytesRead;
|
|
552
|
+
// const end = start + byteLen;
|
|
553
|
+
// if (end <= buffer.length) {
|
|
554
|
+
// const word = utf8Decode(buffer.subarray(start, end));
|
|
555
|
+
// parts.push(word);
|
|
556
|
+
// wordCount++;
|
|
557
|
+
// pos = end;
|
|
558
|
+
// continue;
|
|
559
|
+
// }
|
|
560
|
+
// }
|
|
561
|
+
// } catch {}
|
|
562
|
+
// }
|
|
563
|
+
// let matched = false;
|
|
564
|
+
// for (const len of sortedIdLengths) {
|
|
565
|
+
// if (pos + len > buffer.length) continue;
|
|
566
|
+
// const key = toHex(buffer.subarray(pos, pos + len));
|
|
567
|
+
// if (reverseMap.has(key)) {
|
|
568
|
+
// parts.push(reverseMap.get(key)!);
|
|
569
|
+
// wordCount++;
|
|
570
|
+
// pos += len;
|
|
571
|
+
// matched = true;
|
|
572
|
+
// break;
|
|
573
|
+
// }
|
|
574
|
+
// }
|
|
575
|
+
// if (!matched) {
|
|
576
|
+
// const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
|
|
577
|
+
// parts.push(marker);
|
|
578
|
+
// rawSegments.push(marker);
|
|
579
|
+
// this.log(
|
|
580
|
+
// `[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`,
|
|
581
|
+
// );
|
|
582
|
+
// pos++;
|
|
583
|
+
// }
|
|
584
|
+
// }
|
|
585
|
+
// return { text: parts.join(" "), wordCount, rawSegments };
|
|
586
|
+
// }
|
|
587
|
+
// private tryDecode(
|
|
588
|
+
// pos: number,
|
|
589
|
+
// buffer: Uint8Array,
|
|
590
|
+
// reverseMap: Map<string, string>,
|
|
591
|
+
// result: string[],
|
|
592
|
+
// depth: number,
|
|
593
|
+
// sortedIdLengths: number[],
|
|
594
|
+
// ): string | null {
|
|
595
|
+
// if (pos === buffer.length) return result.join(" ");
|
|
596
|
+
// if (buffer[pos] === LITERAL) {
|
|
597
|
+
// let byteLen: number;
|
|
598
|
+
// let bytesRead: number;
|
|
599
|
+
// try {
|
|
600
|
+
// ({ value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1));
|
|
601
|
+
// } catch {
|
|
602
|
+
// byteLen = -1;
|
|
603
|
+
// bytesRead = 0;
|
|
604
|
+
// }
|
|
605
|
+
// if (byteLen > 0) {
|
|
606
|
+
// if (byteLen > 1_000_000 || byteLen < 0) return null;
|
|
607
|
+
// const start = pos + 1 + bytesRead;
|
|
608
|
+
// const end = start + byteLen;
|
|
609
|
+
// if (end > buffer.length) return null;
|
|
610
|
+
// result.push(utf8Decode(buffer.subarray(start, end)));
|
|
611
|
+
// const res = this.tryDecode(
|
|
612
|
+
// end,
|
|
613
|
+
// buffer,
|
|
614
|
+
// reverseMap,
|
|
615
|
+
// result,
|
|
616
|
+
// depth + 1,
|
|
617
|
+
// sortedIdLengths,
|
|
618
|
+
// );
|
|
619
|
+
// if (res !== null) return res;
|
|
620
|
+
// result.pop();
|
|
621
|
+
// }
|
|
622
|
+
// }
|
|
623
|
+
// for (const len of sortedIdLengths) {
|
|
624
|
+
// if (pos + len > buffer.length) continue;
|
|
625
|
+
// const key = toHex(buffer.subarray(pos, pos + len));
|
|
626
|
+
// if (reverseMap.has(key)) {
|
|
627
|
+
// result.push(reverseMap.get(key)!);
|
|
628
|
+
// const res = this.tryDecode(
|
|
629
|
+
// pos + len,
|
|
630
|
+
// buffer,
|
|
631
|
+
// reverseMap,
|
|
632
|
+
// result,
|
|
633
|
+
// depth + 1,
|
|
634
|
+
// sortedIdLengths,
|
|
635
|
+
// );
|
|
636
|
+
// if (res !== null) return res;
|
|
637
|
+
// result.pop();
|
|
638
|
+
// }
|
|
639
|
+
// }
|
|
640
|
+
// return null;
|
|
641
|
+
// }
|
|
588
642
|
}
|
|
589
643
|
export {
|
|
590
644
|
MAGIC,
|