@bigdreamsweb3/wordbin 1.0.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +156 -67
- package/README.md +364 -149
- package/dist/{builder-e2OwBYJh.js → builder-vFphFQMU.js} +1 -12
- package/dist/{builder-e2OwBYJh.js.map → builder-vFphFQMU.js.map} +1 -1
- package/dist/cli.mjs +1 -1
- package/dist/core/binary-payload.d.ts +6 -0
- package/dist/core/comp/latin1-compressor.d.ts +9 -0
- package/dist/core/comp/onebyte-encoder.d.ts +2 -0
- package/dist/core/index.d.ts +58 -0
- package/dist/data/wordbin-v1-bip39.json +13 -11
- package/dist/index.d.ts +2 -2
- package/dist/index.mjs +379 -57
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.ts +7 -3
- package/package.json +4 -3
- package/dist/core.d.ts +0 -19
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { EncodeResult, WordBinDictionary } from '../types.js';
|
|
2
|
+
type PayloadFormat = "bytes" | "base58" | "base64" | "hex" | "bin21";
|
|
3
|
+
export interface DecodeResult {
|
|
4
|
+
/** The decoded text — words for WordBin payloads, best-effort for others. */
|
|
5
|
+
text: string;
|
|
6
|
+
/** True only when the payload was a valid, fully-parsed WordBin stream. */
|
|
7
|
+
isWordBin: boolean;
|
|
8
|
+
/** Auto-detected wire format of the input. */
|
|
9
|
+
detectedFormat: PayloadFormat;
|
|
10
|
+
/**
|
|
11
|
+
* Human-readable notice when the payload is not a valid WordBin stream.
|
|
12
|
+
* Includes information about what the decoder did as a fallback.
|
|
13
|
+
*/
|
|
14
|
+
notice?: string;
|
|
15
|
+
/**
|
|
16
|
+
* Present when partial scanning was used (non-WordBin payloads).
|
|
17
|
+
* Lists raw byte sequences that had no dictionary match, in order.
|
|
18
|
+
*/
|
|
19
|
+
rawSegments?: string[];
|
|
20
|
+
}
|
|
21
|
+
export declare class WordBin {
|
|
22
|
+
private primaryDictVersion;
|
|
23
|
+
private log;
|
|
24
|
+
constructor(initialDict?: WordBinDictionary, options?: {
|
|
25
|
+
debug?: boolean;
|
|
26
|
+
});
|
|
27
|
+
static createFromWords(words: string[]): Promise<WordBin>;
|
|
28
|
+
static createFromJson(dictJson: WordBinDictionary): Promise<WordBin>;
|
|
29
|
+
static create(options?: {
|
|
30
|
+
debug?: boolean;
|
|
31
|
+
}): Promise<WordBin>;
|
|
32
|
+
private getMapsForVersion;
|
|
33
|
+
encode(text: string | EncodeResult | Uint8Array, options?: {
|
|
34
|
+
dictVersion?: number;
|
|
35
|
+
}): Promise<EncodeResult>;
|
|
36
|
+
/**
|
|
37
|
+
* Decodes any supported payload format back to human-readable text.
|
|
38
|
+
*
|
|
39
|
+
* For valid WordBin payloads: returns the exact original words.
|
|
40
|
+
* For non-WordBin payloads: scans byte-by-byte, extracts dictionary words
|
|
41
|
+
* wherever possible, and preserves unrecognised
|
|
42
|
+
* bytes as "[0xXX]" markers.
|
|
43
|
+
*/
|
|
44
|
+
decode(payload: Uint8Array | string): Promise<DecodeResult>;
|
|
45
|
+
/**
|
|
46
|
+
* O(n) longest-match-first decode. Returns null if any byte has no match.
|
|
47
|
+
* This is the fast path; tryDecode is used as a backtracking fallback.
|
|
48
|
+
*/
|
|
49
|
+
private greedyDecode;
|
|
50
|
+
/**
|
|
51
|
+
* Scans through the buffer extracting any recognised dictionary words.
|
|
52
|
+
* Unrecognised bytes are collected as raw segments and rendered as [0xXX].
|
|
53
|
+
* Always consumes the entire buffer — never returns null.
|
|
54
|
+
*/
|
|
55
|
+
private partialScan;
|
|
56
|
+
private tryDecode;
|
|
57
|
+
}
|
|
58
|
+
export {};
|
|
@@ -446,15 +446,15 @@
|
|
|
446
446
|
"988180": [
|
|
447
447
|
"offer"
|
|
448
448
|
],
|
|
449
|
-
"b0ad": [
|
|
450
|
-
"able"
|
|
451
|
-
],
|
|
452
449
|
"df864c": [
|
|
453
450
|
"abandon"
|
|
454
451
|
],
|
|
455
452
|
"bcffaa": [
|
|
456
453
|
"ability"
|
|
457
454
|
],
|
|
455
|
+
"b0ad": [
|
|
456
|
+
"able"
|
|
457
|
+
],
|
|
458
458
|
"a4262e": [
|
|
459
459
|
"about"
|
|
460
460
|
],
|
|
@@ -1647,8 +1647,7 @@
|
|
|
1647
1647
|
"crush"
|
|
1648
1648
|
],
|
|
1649
1649
|
"58a6": [
|
|
1650
|
-
"cry"
|
|
1651
|
-
"math"
|
|
1650
|
+
"cry"
|
|
1652
1651
|
],
|
|
1653
1652
|
"60a0f7": [
|
|
1654
1653
|
"crystal"
|
|
@@ -2574,24 +2573,24 @@
|
|
|
2574
2573
|
"5a5770": [
|
|
2575
2574
|
"furnace"
|
|
2576
2575
|
],
|
|
2577
|
-
"f44a": [
|
|
2578
|
-
"fury"
|
|
2579
|
-
],
|
|
2580
2576
|
"ebb3de": [
|
|
2581
2577
|
"future"
|
|
2582
2578
|
],
|
|
2579
|
+
"f44a": [
|
|
2580
|
+
"fury"
|
|
2581
|
+
],
|
|
2583
2582
|
"4e5aa6": [
|
|
2584
2583
|
"gadget"
|
|
2585
2584
|
],
|
|
2586
2585
|
"66dd": [
|
|
2587
2586
|
"gain"
|
|
2588
2587
|
],
|
|
2589
|
-
"eba4ae": [
|
|
2590
|
-
"galaxy"
|
|
2591
|
-
],
|
|
2592
2588
|
"ce387d": [
|
|
2593
2589
|
"gallery"
|
|
2594
2590
|
],
|
|
2591
|
+
"eba4ae": [
|
|
2592
|
+
"galaxy"
|
|
2593
|
+
],
|
|
2595
2594
|
"6ca5": [
|
|
2596
2595
|
"game"
|
|
2597
2596
|
],
|
|
@@ -6143,6 +6142,9 @@
|
|
|
6143
6142
|
],
|
|
6144
6143
|
"24fe": [
|
|
6145
6144
|
"zoo"
|
|
6145
|
+
],
|
|
6146
|
+
"ad06fa": [
|
|
6147
|
+
"math"
|
|
6146
6148
|
]
|
|
6147
6149
|
}
|
|
6148
6150
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { MAGIC } from './constants
|
|
1
|
+
export { MAGIC } from './constants';
|
|
2
2
|
export { buildDictionary } from './dict/builder';
|
|
3
|
-
export { WordBin } from './core
|
|
3
|
+
export { WordBin } from './core/index';
|
|
4
4
|
export type { EncodeResult, WordBinDictionary } from './types';
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { b as buildDictionary, t as toBase64, u as utf8Encode, e as encodeVarint,
|
|
1
|
+
import { b as buildDictionary, t as toBase64, u as utf8Encode, e as encodeVarint, d as decodeVarint, a as utf8Decode, c as toHex } from "./builder-vFphFQMU.js";
|
|
2
2
|
import fs from "fs/promises";
|
|
3
3
|
import path from "path";
|
|
4
4
|
import { fileURLToPath } from "url";
|
|
@@ -81,9 +81,174 @@ async function loadLatestDictionary() {
|
|
|
81
81
|
);
|
|
82
82
|
return loadDictionaryByVersion(latestVersion);
|
|
83
83
|
}
|
|
84
|
+
function base(ALPHABET2) {
|
|
85
|
+
if (ALPHABET2.length >= 255) {
|
|
86
|
+
throw new TypeError("Alphabet too long");
|
|
87
|
+
}
|
|
88
|
+
const BASE_MAP = new Uint8Array(256);
|
|
89
|
+
for (let j = 0; j < BASE_MAP.length; j++) {
|
|
90
|
+
BASE_MAP[j] = 255;
|
|
91
|
+
}
|
|
92
|
+
for (let i = 0; i < ALPHABET2.length; i++) {
|
|
93
|
+
const x = ALPHABET2.charAt(i);
|
|
94
|
+
const xc = x.charCodeAt(0);
|
|
95
|
+
if (BASE_MAP[xc] !== 255) {
|
|
96
|
+
throw new TypeError(x + " is ambiguous");
|
|
97
|
+
}
|
|
98
|
+
BASE_MAP[xc] = i;
|
|
99
|
+
}
|
|
100
|
+
const BASE = ALPHABET2.length;
|
|
101
|
+
const LEADER = ALPHABET2.charAt(0);
|
|
102
|
+
const FACTOR = Math.log(BASE) / Math.log(256);
|
|
103
|
+
const iFACTOR = Math.log(256) / Math.log(BASE);
|
|
104
|
+
function encode(source) {
|
|
105
|
+
if (source instanceof Uint8Array) ;
|
|
106
|
+
else if (ArrayBuffer.isView(source)) {
|
|
107
|
+
source = new Uint8Array(source.buffer, source.byteOffset, source.byteLength);
|
|
108
|
+
} else if (Array.isArray(source)) {
|
|
109
|
+
source = Uint8Array.from(source);
|
|
110
|
+
}
|
|
111
|
+
if (!(source instanceof Uint8Array)) {
|
|
112
|
+
throw new TypeError("Expected Uint8Array");
|
|
113
|
+
}
|
|
114
|
+
if (source.length === 0) {
|
|
115
|
+
return "";
|
|
116
|
+
}
|
|
117
|
+
let zeroes = 0;
|
|
118
|
+
let length = 0;
|
|
119
|
+
let pbegin = 0;
|
|
120
|
+
const pend = source.length;
|
|
121
|
+
while (pbegin !== pend && source[pbegin] === 0) {
|
|
122
|
+
pbegin++;
|
|
123
|
+
zeroes++;
|
|
124
|
+
}
|
|
125
|
+
const size = (pend - pbegin) * iFACTOR + 1 >>> 0;
|
|
126
|
+
const b58 = new Uint8Array(size);
|
|
127
|
+
while (pbegin !== pend) {
|
|
128
|
+
let carry = source[pbegin];
|
|
129
|
+
let i = 0;
|
|
130
|
+
for (let it1 = size - 1; (carry !== 0 || i < length) && it1 !== -1; it1--, i++) {
|
|
131
|
+
carry += 256 * b58[it1] >>> 0;
|
|
132
|
+
b58[it1] = carry % BASE >>> 0;
|
|
133
|
+
carry = carry / BASE >>> 0;
|
|
134
|
+
}
|
|
135
|
+
if (carry !== 0) {
|
|
136
|
+
throw new Error("Non-zero carry");
|
|
137
|
+
}
|
|
138
|
+
length = i;
|
|
139
|
+
pbegin++;
|
|
140
|
+
}
|
|
141
|
+
let it2 = size - length;
|
|
142
|
+
while (it2 !== size && b58[it2] === 0) {
|
|
143
|
+
it2++;
|
|
144
|
+
}
|
|
145
|
+
let str = LEADER.repeat(zeroes);
|
|
146
|
+
for (; it2 < size; ++it2) {
|
|
147
|
+
str += ALPHABET2.charAt(b58[it2]);
|
|
148
|
+
}
|
|
149
|
+
return str;
|
|
150
|
+
}
|
|
151
|
+
function decodeUnsafe(source) {
|
|
152
|
+
if (typeof source !== "string") {
|
|
153
|
+
throw new TypeError("Expected String");
|
|
154
|
+
}
|
|
155
|
+
if (source.length === 0) {
|
|
156
|
+
return new Uint8Array();
|
|
157
|
+
}
|
|
158
|
+
let psz = 0;
|
|
159
|
+
let zeroes = 0;
|
|
160
|
+
let length = 0;
|
|
161
|
+
while (source[psz] === LEADER) {
|
|
162
|
+
zeroes++;
|
|
163
|
+
psz++;
|
|
164
|
+
}
|
|
165
|
+
const size = (source.length - psz) * FACTOR + 1 >>> 0;
|
|
166
|
+
const b256 = new Uint8Array(size);
|
|
167
|
+
while (psz < source.length) {
|
|
168
|
+
const charCode = source.charCodeAt(psz);
|
|
169
|
+
if (charCode > 255) {
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
let carry = BASE_MAP[charCode];
|
|
173
|
+
if (carry === 255) {
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
let i = 0;
|
|
177
|
+
for (let it3 = size - 1; (carry !== 0 || i < length) && it3 !== -1; it3--, i++) {
|
|
178
|
+
carry += BASE * b256[it3] >>> 0;
|
|
179
|
+
b256[it3] = carry % 256 >>> 0;
|
|
180
|
+
carry = carry / 256 >>> 0;
|
|
181
|
+
}
|
|
182
|
+
if (carry !== 0) {
|
|
183
|
+
throw new Error("Non-zero carry");
|
|
184
|
+
}
|
|
185
|
+
length = i;
|
|
186
|
+
psz++;
|
|
187
|
+
}
|
|
188
|
+
let it4 = size - length;
|
|
189
|
+
while (it4 !== size && b256[it4] === 0) {
|
|
190
|
+
it4++;
|
|
191
|
+
}
|
|
192
|
+
const vch = new Uint8Array(zeroes + (size - it4));
|
|
193
|
+
let j = zeroes;
|
|
194
|
+
while (it4 !== size) {
|
|
195
|
+
vch[j++] = b256[it4++];
|
|
196
|
+
}
|
|
197
|
+
return vch;
|
|
198
|
+
}
|
|
199
|
+
function decode(string) {
|
|
200
|
+
const buffer = decodeUnsafe(string);
|
|
201
|
+
if (buffer) {
|
|
202
|
+
return buffer;
|
|
203
|
+
}
|
|
204
|
+
throw new Error("Non-base" + BASE + " character");
|
|
205
|
+
}
|
|
206
|
+
return {
|
|
207
|
+
encode,
|
|
208
|
+
decodeUnsafe,
|
|
209
|
+
decode
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
var ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
|
|
213
|
+
const bs58 = base(ALPHABET);
|
|
214
|
+
function bytesToHex(bytes) {
|
|
215
|
+
return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
216
|
+
}
|
|
217
|
+
function detectAndConvert(payload) {
|
|
218
|
+
if (/^[0-9a-fA-F]+$/.test(payload) && payload.length % 2 === 0) {
|
|
219
|
+
const bytes2 = Uint8Array.from(
|
|
220
|
+
payload.match(/.{1,2}/g).map((h) => parseInt(h, 16))
|
|
221
|
+
);
|
|
222
|
+
return { buffer: bytes2, detectedFormat: "hex" };
|
|
223
|
+
}
|
|
224
|
+
const base58Re = /^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$/;
|
|
225
|
+
if (base58Re.test(payload)) {
|
|
226
|
+
try {
|
|
227
|
+
return { buffer: bs58.decode(payload), detectedFormat: "base58" };
|
|
228
|
+
} catch {
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
const b64Re = /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})$/;
|
|
232
|
+
const b64urlRe = /^(?:[A-Za-z0-9\-_]{4})*(?:[A-Za-z0-9\-_]{2}(?:==)?|[A-Za-z0-9\-_]{3}=?|[A-Za-z0-9\-_]{4})$/;
|
|
233
|
+
const norm = payload.replace(/-/g, "+").replace(/_/g, "/");
|
|
234
|
+
const padded = norm + (norm.length % 4 ? "=".repeat(4 - norm.length % 4) : "");
|
|
235
|
+
if (b64Re.test(payload) || b64urlRe.test(payload)) {
|
|
236
|
+
try {
|
|
237
|
+
const bin = atob(padded);
|
|
238
|
+
return {
|
|
239
|
+
buffer: Uint8Array.from(bin, (c) => c.charCodeAt(0)),
|
|
240
|
+
detectedFormat: "base64"
|
|
241
|
+
};
|
|
242
|
+
} catch {
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
const bytes = new Uint8Array(payload.length);
|
|
246
|
+
for (let i = 0; i < payload.length; i++) bytes[i] = payload.charCodeAt(i);
|
|
247
|
+
return { buffer: bytes, detectedFormat: "bin21" };
|
|
248
|
+
}
|
|
84
249
|
class WordBin {
|
|
85
250
|
constructor(initialDict, options) {
|
|
86
|
-
this.primaryDictVersion = initialDict?.version ??
|
|
251
|
+
this.primaryDictVersion = initialDict?.version ?? 1;
|
|
87
252
|
this.log = options?.debug ? (...args) => console.log("[WordBin]", ...args) : () => {
|
|
88
253
|
};
|
|
89
254
|
}
|
|
@@ -91,15 +256,13 @@ class WordBin {
|
|
|
91
256
|
console.warn(
|
|
92
257
|
"Building dictionary from scratch – consider using pre-built files"
|
|
93
258
|
);
|
|
94
|
-
|
|
95
|
-
return new WordBin(dict);
|
|
259
|
+
return new WordBin(await buildDictionary(words));
|
|
96
260
|
}
|
|
97
261
|
static async createFromJson(dictJson) {
|
|
98
262
|
return new WordBin(dictJson);
|
|
99
263
|
}
|
|
100
264
|
static async create(options) {
|
|
101
|
-
|
|
102
|
-
return new WordBin(latestDict, options);
|
|
265
|
+
return new WordBin(await loadLatestDictionary(), options);
|
|
103
266
|
}
|
|
104
267
|
async getMapsForVersion(version) {
|
|
105
268
|
const dict = await loadDictionaryByVersion(version);
|
|
@@ -119,9 +282,13 @@ class WordBin {
|
|
|
119
282
|
reverseMap.set(hex, word);
|
|
120
283
|
forwardMap.set(word, bytes);
|
|
121
284
|
}
|
|
122
|
-
|
|
123
|
-
|
|
285
|
+
return {
|
|
286
|
+
reverseMap,
|
|
287
|
+
forwardMap,
|
|
288
|
+
sortedIdLengths: Array.from(idLengths).sort((a, b) => b - a)
|
|
289
|
+
};
|
|
124
290
|
}
|
|
291
|
+
// ── encode ──────────────────────────────────────────────────────────────────
|
|
125
292
|
async encode(text, options) {
|
|
126
293
|
let textStr;
|
|
127
294
|
if (typeof text === "string") {
|
|
@@ -129,7 +296,7 @@ class WordBin {
|
|
|
129
296
|
} else if (text instanceof Uint8Array) {
|
|
130
297
|
textStr = toBase64(text);
|
|
131
298
|
} else {
|
|
132
|
-
textStr = text.
|
|
299
|
+
textStr = text.base64Payload;
|
|
133
300
|
}
|
|
134
301
|
const trimmed = textStr.trim();
|
|
135
302
|
if (!trimmed) {
|
|
@@ -138,19 +305,21 @@ class WordBin {
|
|
|
138
305
|
dictVersion: this.primaryDictVersion,
|
|
139
306
|
encoded: new Uint8Array(0),
|
|
140
307
|
payload: "",
|
|
141
|
-
|
|
308
|
+
bin21: "",
|
|
309
|
+
bin21Payload: "",
|
|
310
|
+
base64Payload: "",
|
|
311
|
+
hexPayload: "",
|
|
312
|
+
base58Payload: "",
|
|
142
313
|
originalBytes: 0,
|
|
143
314
|
encodedBytes: 0,
|
|
144
315
|
bytesSaved: 0,
|
|
145
316
|
ratioPercent: 100
|
|
146
317
|
};
|
|
147
318
|
}
|
|
148
|
-
const words = trimmed.split(/\s+/).filter(Boolean);
|
|
149
319
|
const useVersion = options?.dictVersion ?? this.primaryDictVersion;
|
|
150
|
-
const header = new Uint8Array([useVersion]);
|
|
151
|
-
const chunks = [header];
|
|
152
320
|
const { forwardMap } = await this.getMapsForVersion(useVersion);
|
|
153
|
-
|
|
321
|
+
const chunks = [new Uint8Array([useVersion])];
|
|
322
|
+
for (const w of trimmed.split(/\s+/).filter(Boolean)) {
|
|
154
323
|
const id = forwardMap.get(w);
|
|
155
324
|
if (id) {
|
|
156
325
|
chunks.push(id);
|
|
@@ -172,65 +341,220 @@ class WordBin {
|
|
|
172
341
|
offset += chunk.length;
|
|
173
342
|
}
|
|
174
343
|
const originalBytes = new TextEncoder().encode(textStr).length;
|
|
175
|
-
const
|
|
344
|
+
const hexPayload = bytesToHex(result);
|
|
345
|
+
const bin21Payload = Array.from(result).map((b) => String.fromCharCode(b)).join("");
|
|
346
|
+
const base64Payload = toBase64(result);
|
|
347
|
+
const base58Payload = bs58.encode(result);
|
|
176
348
|
return {
|
|
177
349
|
originalText: textStr,
|
|
178
350
|
dictVersion: useVersion,
|
|
179
351
|
encoded: result,
|
|
180
|
-
|
|
181
|
-
|
|
352
|
+
bin21: bin21Payload,
|
|
353
|
+
payload: bin21Payload,
|
|
354
|
+
bin21Payload,
|
|
355
|
+
hexPayload,
|
|
356
|
+
base64Payload,
|
|
357
|
+
base58Payload,
|
|
182
358
|
originalBytes,
|
|
183
|
-
encodedBytes:
|
|
184
|
-
bytesSaved: originalBytes -
|
|
185
|
-
ratioPercent:
|
|
359
|
+
encodedBytes: bin21Payload.length,
|
|
360
|
+
bytesSaved: originalBytes - bin21Payload.length,
|
|
361
|
+
ratioPercent: Math.round(bin21Payload.length / originalBytes * 1e4) / 100
|
|
186
362
|
};
|
|
187
363
|
}
|
|
188
|
-
|
|
364
|
+
// ── decode ───────────────────────────────────────────────────────────────────
|
|
365
|
+
/**
|
|
366
|
+
* Decodes any supported payload format back to human-readable text.
|
|
367
|
+
*
|
|
368
|
+
* For valid WordBin payloads: returns the exact original words.
|
|
369
|
+
* For non-WordBin payloads: scans byte-by-byte, extracts dictionary words
|
|
370
|
+
* wherever possible, and preserves unrecognised
|
|
371
|
+
* bytes as "[0xXX]" markers.
|
|
372
|
+
*/
|
|
373
|
+
async decode(payload) {
|
|
189
374
|
let buffer;
|
|
190
|
-
|
|
191
|
-
|
|
375
|
+
let detectedFormat;
|
|
376
|
+
if (payload instanceof Uint8Array) {
|
|
377
|
+
buffer = payload;
|
|
378
|
+
detectedFormat = "bytes";
|
|
192
379
|
} else {
|
|
193
|
-
buffer =
|
|
380
|
+
({ buffer, detectedFormat } = detectAndConvert(payload));
|
|
194
381
|
}
|
|
382
|
+
this.log(
|
|
383
|
+
`[decode] format=${detectedFormat} bufLen=${buffer.length} firstBytes=[${Array.from(buffer.slice(0, 8)).join(",")}]`
|
|
384
|
+
);
|
|
195
385
|
if (buffer.length < 1) {
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
sortedIdLengths
|
|
386
|
+
return {
|
|
387
|
+
text: "",
|
|
388
|
+
isWordBin: false,
|
|
389
|
+
detectedFormat,
|
|
390
|
+
notice: "Payload is empty — nothing to decode."
|
|
391
|
+
};
|
|
392
|
+
}
|
|
393
|
+
const availableVersions = await getAllAvailableDictionaryVersions();
|
|
394
|
+
const versionByte = buffer[0];
|
|
395
|
+
const versionIsHeader = availableVersions.includes(versionByte);
|
|
396
|
+
this.log(
|
|
397
|
+
`[decode] availableVersions=[${availableVersions.join(",")}] versionByte=${versionByte} isKnownHeader=${versionIsHeader}`
|
|
209
398
|
);
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
399
|
+
const tryOrder = versionIsHeader ? [versionByte, ...availableVersions.filter((v) => v !== versionByte)] : [...availableVersions];
|
|
400
|
+
for (const ver of tryOrder) {
|
|
401
|
+
let maps;
|
|
402
|
+
try {
|
|
403
|
+
maps = await this.getMapsForVersion(ver);
|
|
404
|
+
} catch (err) {
|
|
405
|
+
this.log(`[decode] v${ver}: getMapsForVersion threw — ${err}`);
|
|
406
|
+
continue;
|
|
407
|
+
}
|
|
408
|
+
const { reverseMap, sortedIdLengths } = maps;
|
|
409
|
+
const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths) ?? this.tryDecode(1, buffer, reverseMap, [], 0, sortedIdLengths);
|
|
410
|
+
this.log(
|
|
411
|
+
`[decode] v${ver} strict(pos=1): ${r1 !== null ? `"${r1}"` : "null"}`
|
|
412
|
+
);
|
|
413
|
+
if (r1 !== null) {
|
|
414
|
+
const notice2 = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
|
|
415
|
+
return { text: r1, isWordBin: true, detectedFormat, notice: notice2 };
|
|
416
|
+
}
|
|
417
|
+
const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths) ?? this.tryDecode(0, buffer, reverseMap, [], 0, sortedIdLengths);
|
|
418
|
+
this.log(
|
|
419
|
+
`[decode] v${ver} strict(pos=0): ${r0 !== null ? `"${r0}"` : "null"}`
|
|
213
420
|
);
|
|
421
|
+
if (r0 !== null) {
|
|
422
|
+
return {
|
|
423
|
+
text: r0,
|
|
424
|
+
isWordBin: true,
|
|
425
|
+
detectedFormat,
|
|
426
|
+
notice: `Payload had no version header. Decoded using dictionary v${ver}.`
|
|
427
|
+
};
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
this.log(`[decode] strict parse failed — falling back to partial scan`);
|
|
431
|
+
if (availableVersions.length > 0) {
|
|
432
|
+
const scanVersion = availableVersions[availableVersions.length - 1];
|
|
433
|
+
try {
|
|
434
|
+
const { reverseMap, sortedIdLengths } = await this.getMapsForVersion(scanVersion);
|
|
435
|
+
const scan1 = this.partialScan(buffer, 1, reverseMap, sortedIdLengths);
|
|
436
|
+
const scan0 = this.partialScan(buffer, 0, reverseMap, sortedIdLengths);
|
|
437
|
+
const best = scan1.wordCount >= scan0.wordCount ? scan1 : scan0;
|
|
438
|
+
this.log(
|
|
439
|
+
`[decode] partial scan(pos=1) words=${scan1.wordCount} raw=${scan1.rawSegments.length} | scan(pos=0) words=${scan0.wordCount} raw=${scan0.rawSegments.length}`
|
|
440
|
+
);
|
|
441
|
+
const notice2 = `This does not appear to be a valid WordBin payload. Partial scan using dictionary v${scanVersion} extracted ${best.wordCount} word(s); ${best.rawSegments.length} byte sequence(s) had no dictionary match and are shown as [0xXX] markers.`;
|
|
442
|
+
return {
|
|
443
|
+
text: best.text,
|
|
444
|
+
isWordBin: false,
|
|
445
|
+
detectedFormat,
|
|
446
|
+
rawSegments: best.rawSegments,
|
|
447
|
+
notice: notice2
|
|
448
|
+
};
|
|
449
|
+
} catch {
|
|
450
|
+
}
|
|
214
451
|
}
|
|
215
|
-
|
|
452
|
+
const notice = `Could not decode with any available dictionary (tried: ${availableVersions.join(", ") || "none"}). Falling back to UTF-8 text decoding.`;
|
|
453
|
+
this.log(`[decode] ${notice}`);
|
|
454
|
+
return {
|
|
455
|
+
text: new TextDecoder("utf-8", { fatal: false }).decode(buffer),
|
|
456
|
+
isWordBin: false,
|
|
457
|
+
detectedFormat,
|
|
458
|
+
notice
|
|
459
|
+
};
|
|
216
460
|
}
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
461
|
+
// ── Private: greedy linear decode ────────────────────────────────────────────
|
|
462
|
+
/**
|
|
463
|
+
* O(n) longest-match-first decode. Returns null if any byte has no match.
|
|
464
|
+
* This is the fast path; tryDecode is used as a backtracking fallback.
|
|
465
|
+
*/
|
|
466
|
+
greedyDecode(buffer, startPos, reverseMap, sortedIdLengths) {
|
|
467
|
+
const words = [];
|
|
468
|
+
let pos = startPos;
|
|
469
|
+
while (pos < buffer.length) {
|
|
470
|
+
if (buffer[pos] === LITERAL) {
|
|
471
|
+
const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
|
|
472
|
+
if (byteLen > 1e6 || byteLen < 0) return null;
|
|
473
|
+
const start = pos + 1 + bytesRead;
|
|
474
|
+
const end = start + byteLen;
|
|
475
|
+
if (end > buffer.length) return null;
|
|
476
|
+
words.push(utf8Decode(buffer.subarray(start, end)));
|
|
477
|
+
pos = end;
|
|
478
|
+
continue;
|
|
479
|
+
}
|
|
480
|
+
let matched = false;
|
|
481
|
+
for (const len of sortedIdLengths) {
|
|
482
|
+
if (pos + len > buffer.length) continue;
|
|
483
|
+
const key = toHex(buffer.subarray(pos, pos + len));
|
|
484
|
+
if (reverseMap.has(key)) {
|
|
485
|
+
words.push(reverseMap.get(key));
|
|
486
|
+
pos += len;
|
|
487
|
+
matched = true;
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
if (!matched) return null;
|
|
492
|
+
}
|
|
493
|
+
return words.join(" ");
|
|
494
|
+
}
|
|
495
|
+
// ── Private: partial / best-effort scan ──────────────────────────────────────
|
|
496
|
+
/**
|
|
497
|
+
* Scans through the buffer extracting any recognised dictionary words.
|
|
498
|
+
* Unrecognised bytes are collected as raw segments and rendered as [0xXX].
|
|
499
|
+
* Always consumes the entire buffer — never returns null.
|
|
500
|
+
*/
|
|
501
|
+
partialScan(buffer, startPos, reverseMap, sortedIdLengths) {
|
|
502
|
+
const parts = [];
|
|
503
|
+
const rawSegments = [];
|
|
504
|
+
let wordCount = 0;
|
|
505
|
+
let pos = startPos;
|
|
506
|
+
while (pos < buffer.length) {
|
|
507
|
+
if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
|
|
508
|
+
try {
|
|
509
|
+
const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
|
|
510
|
+
if (byteLen > 0 && byteLen <= 1e6) {
|
|
511
|
+
const start = pos + 1 + bytesRead;
|
|
512
|
+
const end = start + byteLen;
|
|
513
|
+
if (end <= buffer.length) {
|
|
514
|
+
const word = utf8Decode(buffer.subarray(start, end));
|
|
515
|
+
parts.push(word);
|
|
516
|
+
wordCount++;
|
|
517
|
+
pos = end;
|
|
518
|
+
continue;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
} catch {
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
let matched = false;
|
|
525
|
+
for (const len of sortedIdLengths) {
|
|
526
|
+
if (pos + len > buffer.length) continue;
|
|
527
|
+
const key = toHex(buffer.subarray(pos, pos + len));
|
|
528
|
+
if (reverseMap.has(key)) {
|
|
529
|
+
parts.push(reverseMap.get(key));
|
|
530
|
+
wordCount++;
|
|
531
|
+
pos += len;
|
|
532
|
+
matched = true;
|
|
533
|
+
break;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
if (!matched) {
|
|
537
|
+
const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
|
|
538
|
+
parts.push(marker);
|
|
539
|
+
rawSegments.push(marker);
|
|
540
|
+
this.log(
|
|
541
|
+
`[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`
|
|
542
|
+
);
|
|
543
|
+
pos++;
|
|
544
|
+
}
|
|
220
545
|
}
|
|
546
|
+
return { text: parts.join(" "), wordCount, rawSegments };
|
|
547
|
+
}
|
|
548
|
+
// ── Private: backtracking decode ─────────────────────────────────────────────
|
|
549
|
+
tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
|
|
550
|
+
if (pos === buffer.length) return result.join(" ");
|
|
221
551
|
if (buffer[pos] === LITERAL) {
|
|
222
552
|
const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
|
|
223
|
-
if (byteLen > 1e6 || byteLen < 0)
|
|
224
|
-
return null;
|
|
225
|
-
}
|
|
553
|
+
if (byteLen > 1e6 || byteLen < 0) return null;
|
|
226
554
|
const start = pos + 1 + bytesRead;
|
|
227
555
|
const end = start + byteLen;
|
|
228
|
-
if (end > buffer.length)
|
|
229
|
-
|
|
230
|
-
}
|
|
231
|
-
const literalBytes = buffer.subarray(start, end);
|
|
232
|
-
const word = utf8Decode(literalBytes);
|
|
233
|
-
result.push(word);
|
|
556
|
+
if (end > buffer.length) return null;
|
|
557
|
+
result.push(utf8Decode(buffer.subarray(start, end)));
|
|
234
558
|
const res = this.tryDecode(
|
|
235
559
|
end,
|
|
236
560
|
buffer,
|
|
@@ -244,11 +568,9 @@ class WordBin {
|
|
|
244
568
|
}
|
|
245
569
|
for (const len of sortedIdLengths) {
|
|
246
570
|
if (pos + len > buffer.length) continue;
|
|
247
|
-
const
|
|
248
|
-
const key = toHex(slice);
|
|
571
|
+
const key = toHex(buffer.subarray(pos, pos + len));
|
|
249
572
|
if (reverseMap.has(key)) {
|
|
250
|
-
|
|
251
|
-
result.push(word);
|
|
573
|
+
result.push(reverseMap.get(key));
|
|
252
574
|
const res = this.tryDecode(
|
|
253
575
|
pos + len,
|
|
254
576
|
buffer,
|