@bigdreamsweb3/wordbin 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +156 -67
- package/README.md +364 -149
- package/dist/{dictionary-D3gr2Ala.js → builder-vFphFQMU.js} +16 -19
- package/dist/builder-vFphFQMU.js.map +1 -0
- package/dist/cli.mjs +1 -1
- package/dist/core/binary-payload.d.ts +6 -0
- package/dist/core/comp/latin1-compressor.d.ts +9 -0
- package/dist/core/comp/onebyte-encoder.d.ts +2 -0
- package/dist/core/index.d.ts +58 -0
- package/dist/data/wordbin-v1-bip39.json +13 -11
- package/dist/{dictionary.d.ts → dict/builder.d.ts} +1 -1
- package/dist/{dictionary-loader.d.ts → dict/dictionary-loader.d.ts} +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.mjs +425 -142
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.ts +7 -3
- package/package.json +6 -2
- package/dist/core.d.ts +0 -19
- package/dist/dictionary-D3gr2Ala.js.map +0 -1
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { b as buildDictionary, t as toBase64,
|
|
1
|
+
import { b as buildDictionary, t as toBase64, u as utf8Encode, e as encodeVarint, d as decodeVarint, a as utf8Decode, c as toHex } from "./builder-vFphFQMU.js";
|
|
2
2
|
import fs from "fs/promises";
|
|
3
3
|
import path from "path";
|
|
4
4
|
import { fileURLToPath } from "url";
|
|
@@ -81,225 +81,508 @@ async function loadLatestDictionary() {
|
|
|
81
81
|
);
|
|
82
82
|
return loadDictionaryByVersion(latestVersion);
|
|
83
83
|
}
|
|
84
|
+
function base(ALPHABET2) {
|
|
85
|
+
if (ALPHABET2.length >= 255) {
|
|
86
|
+
throw new TypeError("Alphabet too long");
|
|
87
|
+
}
|
|
88
|
+
const BASE_MAP = new Uint8Array(256);
|
|
89
|
+
for (let j = 0; j < BASE_MAP.length; j++) {
|
|
90
|
+
BASE_MAP[j] = 255;
|
|
91
|
+
}
|
|
92
|
+
for (let i = 0; i < ALPHABET2.length; i++) {
|
|
93
|
+
const x = ALPHABET2.charAt(i);
|
|
94
|
+
const xc = x.charCodeAt(0);
|
|
95
|
+
if (BASE_MAP[xc] !== 255) {
|
|
96
|
+
throw new TypeError(x + " is ambiguous");
|
|
97
|
+
}
|
|
98
|
+
BASE_MAP[xc] = i;
|
|
99
|
+
}
|
|
100
|
+
const BASE = ALPHABET2.length;
|
|
101
|
+
const LEADER = ALPHABET2.charAt(0);
|
|
102
|
+
const FACTOR = Math.log(BASE) / Math.log(256);
|
|
103
|
+
const iFACTOR = Math.log(256) / Math.log(BASE);
|
|
104
|
+
function encode(source) {
|
|
105
|
+
if (source instanceof Uint8Array) ;
|
|
106
|
+
else if (ArrayBuffer.isView(source)) {
|
|
107
|
+
source = new Uint8Array(source.buffer, source.byteOffset, source.byteLength);
|
|
108
|
+
} else if (Array.isArray(source)) {
|
|
109
|
+
source = Uint8Array.from(source);
|
|
110
|
+
}
|
|
111
|
+
if (!(source instanceof Uint8Array)) {
|
|
112
|
+
throw new TypeError("Expected Uint8Array");
|
|
113
|
+
}
|
|
114
|
+
if (source.length === 0) {
|
|
115
|
+
return "";
|
|
116
|
+
}
|
|
117
|
+
let zeroes = 0;
|
|
118
|
+
let length = 0;
|
|
119
|
+
let pbegin = 0;
|
|
120
|
+
const pend = source.length;
|
|
121
|
+
while (pbegin !== pend && source[pbegin] === 0) {
|
|
122
|
+
pbegin++;
|
|
123
|
+
zeroes++;
|
|
124
|
+
}
|
|
125
|
+
const size = (pend - pbegin) * iFACTOR + 1 >>> 0;
|
|
126
|
+
const b58 = new Uint8Array(size);
|
|
127
|
+
while (pbegin !== pend) {
|
|
128
|
+
let carry = source[pbegin];
|
|
129
|
+
let i = 0;
|
|
130
|
+
for (let it1 = size - 1; (carry !== 0 || i < length) && it1 !== -1; it1--, i++) {
|
|
131
|
+
carry += 256 * b58[it1] >>> 0;
|
|
132
|
+
b58[it1] = carry % BASE >>> 0;
|
|
133
|
+
carry = carry / BASE >>> 0;
|
|
134
|
+
}
|
|
135
|
+
if (carry !== 0) {
|
|
136
|
+
throw new Error("Non-zero carry");
|
|
137
|
+
}
|
|
138
|
+
length = i;
|
|
139
|
+
pbegin++;
|
|
140
|
+
}
|
|
141
|
+
let it2 = size - length;
|
|
142
|
+
while (it2 !== size && b58[it2] === 0) {
|
|
143
|
+
it2++;
|
|
144
|
+
}
|
|
145
|
+
let str = LEADER.repeat(zeroes);
|
|
146
|
+
for (; it2 < size; ++it2) {
|
|
147
|
+
str += ALPHABET2.charAt(b58[it2]);
|
|
148
|
+
}
|
|
149
|
+
return str;
|
|
150
|
+
}
|
|
151
|
+
function decodeUnsafe(source) {
|
|
152
|
+
if (typeof source !== "string") {
|
|
153
|
+
throw new TypeError("Expected String");
|
|
154
|
+
}
|
|
155
|
+
if (source.length === 0) {
|
|
156
|
+
return new Uint8Array();
|
|
157
|
+
}
|
|
158
|
+
let psz = 0;
|
|
159
|
+
let zeroes = 0;
|
|
160
|
+
let length = 0;
|
|
161
|
+
while (source[psz] === LEADER) {
|
|
162
|
+
zeroes++;
|
|
163
|
+
psz++;
|
|
164
|
+
}
|
|
165
|
+
const size = (source.length - psz) * FACTOR + 1 >>> 0;
|
|
166
|
+
const b256 = new Uint8Array(size);
|
|
167
|
+
while (psz < source.length) {
|
|
168
|
+
const charCode = source.charCodeAt(psz);
|
|
169
|
+
if (charCode > 255) {
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
let carry = BASE_MAP[charCode];
|
|
173
|
+
if (carry === 255) {
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
let i = 0;
|
|
177
|
+
for (let it3 = size - 1; (carry !== 0 || i < length) && it3 !== -1; it3--, i++) {
|
|
178
|
+
carry += BASE * b256[it3] >>> 0;
|
|
179
|
+
b256[it3] = carry % 256 >>> 0;
|
|
180
|
+
carry = carry / 256 >>> 0;
|
|
181
|
+
}
|
|
182
|
+
if (carry !== 0) {
|
|
183
|
+
throw new Error("Non-zero carry");
|
|
184
|
+
}
|
|
185
|
+
length = i;
|
|
186
|
+
psz++;
|
|
187
|
+
}
|
|
188
|
+
let it4 = size - length;
|
|
189
|
+
while (it4 !== size && b256[it4] === 0) {
|
|
190
|
+
it4++;
|
|
191
|
+
}
|
|
192
|
+
const vch = new Uint8Array(zeroes + (size - it4));
|
|
193
|
+
let j = zeroes;
|
|
194
|
+
while (it4 !== size) {
|
|
195
|
+
vch[j++] = b256[it4++];
|
|
196
|
+
}
|
|
197
|
+
return vch;
|
|
198
|
+
}
|
|
199
|
+
function decode(string) {
|
|
200
|
+
const buffer = decodeUnsafe(string);
|
|
201
|
+
if (buffer) {
|
|
202
|
+
return buffer;
|
|
203
|
+
}
|
|
204
|
+
throw new Error("Non-base" + BASE + " character");
|
|
205
|
+
}
|
|
206
|
+
return {
|
|
207
|
+
encode,
|
|
208
|
+
decodeUnsafe,
|
|
209
|
+
decode
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
var ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
|
|
213
|
+
const bs58 = base(ALPHABET);
|
|
214
|
+
function bytesToHex(bytes) {
|
|
215
|
+
return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
216
|
+
}
|
|
217
|
+
function detectAndConvert(payload) {
|
|
218
|
+
if (/^[0-9a-fA-F]+$/.test(payload) && payload.length % 2 === 0) {
|
|
219
|
+
const bytes2 = Uint8Array.from(
|
|
220
|
+
payload.match(/.{1,2}/g).map((h) => parseInt(h, 16))
|
|
221
|
+
);
|
|
222
|
+
return { buffer: bytes2, detectedFormat: "hex" };
|
|
223
|
+
}
|
|
224
|
+
const base58Re = /^[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]+$/;
|
|
225
|
+
if (base58Re.test(payload)) {
|
|
226
|
+
try {
|
|
227
|
+
return { buffer: bs58.decode(payload), detectedFormat: "base58" };
|
|
228
|
+
} catch {
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
const b64Re = /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})$/;
|
|
232
|
+
const b64urlRe = /^(?:[A-Za-z0-9\-_]{4})*(?:[A-Za-z0-9\-_]{2}(?:==)?|[A-Za-z0-9\-_]{3}=?|[A-Za-z0-9\-_]{4})$/;
|
|
233
|
+
const norm = payload.replace(/-/g, "+").replace(/_/g, "/");
|
|
234
|
+
const padded = norm + (norm.length % 4 ? "=".repeat(4 - norm.length % 4) : "");
|
|
235
|
+
if (b64Re.test(payload) || b64urlRe.test(payload)) {
|
|
236
|
+
try {
|
|
237
|
+
const bin = atob(padded);
|
|
238
|
+
return {
|
|
239
|
+
buffer: Uint8Array.from(bin, (c) => c.charCodeAt(0)),
|
|
240
|
+
detectedFormat: "base64"
|
|
241
|
+
};
|
|
242
|
+
} catch {
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
const bytes = new Uint8Array(payload.length);
|
|
246
|
+
for (let i = 0; i < payload.length; i++) bytes[i] = payload.charCodeAt(i);
|
|
247
|
+
return { buffer: bytes, detectedFormat: "bin21" };
|
|
248
|
+
}
|
|
84
249
|
class WordBin {
|
|
85
250
|
constructor(initialDict, options) {
|
|
86
|
-
this.primaryDictVersion = initialDict?.version ??
|
|
251
|
+
this.primaryDictVersion = initialDict?.version ?? 1;
|
|
87
252
|
this.log = options?.debug ? (...args) => console.log("[WordBin]", ...args) : () => {
|
|
88
253
|
};
|
|
89
254
|
}
|
|
90
255
|
static async createFromWords(words) {
|
|
91
|
-
console.warn(
|
|
92
|
-
|
|
93
|
-
|
|
256
|
+
console.warn(
|
|
257
|
+
"Building dictionary from scratch – consider using pre-built files"
|
|
258
|
+
);
|
|
259
|
+
return new WordBin(await buildDictionary(words));
|
|
94
260
|
}
|
|
95
261
|
static async createFromJson(dictJson) {
|
|
96
262
|
return new WordBin(dictJson);
|
|
97
263
|
}
|
|
98
264
|
static async create(options) {
|
|
99
|
-
|
|
100
|
-
return new WordBin(latestDict, options);
|
|
265
|
+
return new WordBin(await loadLatestDictionary(), options);
|
|
101
266
|
}
|
|
102
|
-
async
|
|
267
|
+
async getMapsForVersion(version) {
|
|
103
268
|
const dict = await loadDictionaryByVersion(version);
|
|
104
269
|
const reverseMap = /* @__PURE__ */ new Map();
|
|
270
|
+
const forwardMap = /* @__PURE__ */ new Map();
|
|
271
|
+
const idLengths = /* @__PURE__ */ new Set();
|
|
105
272
|
for (const [hex, words] of Object.entries(dict.words)) {
|
|
106
|
-
if (words.length
|
|
273
|
+
if (!words.length) continue;
|
|
274
|
+
if (words.length > 1) {
|
|
275
|
+
throw new Error(
|
|
276
|
+
`Dictionary corruption: ID ${hex} maps to multiple words`
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
const word = words[0];
|
|
280
|
+
const bytes = Buffer.from(hex, "hex");
|
|
281
|
+
idLengths.add(bytes.length);
|
|
282
|
+
reverseMap.set(hex, word);
|
|
283
|
+
forwardMap.set(word, bytes);
|
|
107
284
|
}
|
|
108
|
-
return
|
|
285
|
+
return {
|
|
286
|
+
reverseMap,
|
|
287
|
+
forwardMap,
|
|
288
|
+
sortedIdLengths: Array.from(idLengths).sort((a, b) => b - a)
|
|
289
|
+
};
|
|
109
290
|
}
|
|
110
|
-
|
|
291
|
+
// ── encode ──────────────────────────────────────────────────────────────────
|
|
292
|
+
async encode(text, options) {
|
|
111
293
|
let textStr;
|
|
112
|
-
if (typeof text === "string")
|
|
113
|
-
|
|
114
|
-
else
|
|
115
|
-
|
|
294
|
+
if (typeof text === "string") {
|
|
295
|
+
textStr = text;
|
|
296
|
+
} else if (text instanceof Uint8Array) {
|
|
297
|
+
textStr = toBase64(text);
|
|
298
|
+
} else {
|
|
299
|
+
textStr = text.base64Payload;
|
|
300
|
+
}
|
|
301
|
+
const trimmed = textStr.trim();
|
|
302
|
+
if (!trimmed) {
|
|
116
303
|
return {
|
|
117
304
|
originalText: "",
|
|
118
|
-
dictVersion:
|
|
305
|
+
dictVersion: this.primaryDictVersion,
|
|
119
306
|
encoded: new Uint8Array(0),
|
|
120
307
|
payload: "",
|
|
121
|
-
|
|
308
|
+
bin21: "",
|
|
309
|
+
bin21Payload: "",
|
|
310
|
+
base64Payload: "",
|
|
311
|
+
hexPayload: "",
|
|
312
|
+
base58Payload: "",
|
|
122
313
|
originalBytes: 0,
|
|
123
314
|
encodedBytes: 0,
|
|
124
315
|
bytesSaved: 0,
|
|
125
316
|
ratioPercent: 100
|
|
126
317
|
};
|
|
127
318
|
}
|
|
128
|
-
const
|
|
129
|
-
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
this.log(`[encode] Header hex: ${toHex(header)}`);
|
|
135
|
-
const chunks = [header];
|
|
136
|
-
const reverseMap = await this.getReverseMapForVersion(useVersion);
|
|
137
|
-
this.log(`[encode] Reverse map loaded — size: ${reverseMap.size} entries`);
|
|
138
|
-
this.log("[encode] Word → ID mapping:");
|
|
139
|
-
for (const w of words) {
|
|
140
|
-
const id = await generateWordId(w);
|
|
141
|
-
const key = toHex(id);
|
|
142
|
-
this.log(` "${w}" → ID bytes: [${[...id].join(", ")}] | hex: ${key}`);
|
|
143
|
-
if (reverseMap.has(key)) {
|
|
144
|
-
reverseMap.get(key);
|
|
145
|
-
this.log(` → Found in dictionary → using ${id.length}-byte ID`);
|
|
319
|
+
const useVersion = options?.dictVersion ?? this.primaryDictVersion;
|
|
320
|
+
const { forwardMap } = await this.getMapsForVersion(useVersion);
|
|
321
|
+
const chunks = [new Uint8Array([useVersion])];
|
|
322
|
+
for (const w of trimmed.split(/\s+/).filter(Boolean)) {
|
|
323
|
+
const id = forwardMap.get(w);
|
|
324
|
+
if (id) {
|
|
146
325
|
chunks.push(id);
|
|
147
326
|
} else {
|
|
148
327
|
const utf8 = utf8Encode(w);
|
|
149
328
|
const lenVarint = encodeVarint(utf8.length);
|
|
150
|
-
this.log(` → NOT in dictionary → literal mode`);
|
|
151
|
-
this.log(
|
|
152
|
-
` Literal length varint bytes: [${[...lenVarint].join(", ")}] (value = ${utf8.length})`
|
|
153
|
-
);
|
|
154
|
-
this.log(` Word UTF-8 bytes length: ${utf8.length}`);
|
|
155
329
|
const out = new Uint8Array(1 + lenVarint.length + utf8.length);
|
|
156
330
|
out[0] = LITERAL;
|
|
157
331
|
out.set(lenVarint, 1);
|
|
158
332
|
out.set(utf8, 1 + lenVarint.length);
|
|
159
|
-
this.log(` Literal chunk bytes: [${[...out].join(", ")}]`);
|
|
160
333
|
chunks.push(out);
|
|
161
334
|
}
|
|
162
335
|
}
|
|
163
|
-
const totalLength = chunks.reduce((
|
|
336
|
+
const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
|
|
164
337
|
const result = new Uint8Array(totalLength);
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
this.log(
|
|
171
|
-
` Chunk ${i}: ${chunk.length} bytes → offset ${off - chunk.length}`
|
|
172
|
-
);
|
|
173
|
-
});
|
|
174
|
-
this.log(
|
|
175
|
-
`[encode] Final encoded bytes (first 32): [${[...result.subarray(0, Math.min(32, result.length))].join(", ")}]`
|
|
176
|
-
);
|
|
338
|
+
let offset = 0;
|
|
339
|
+
for (const chunk of chunks) {
|
|
340
|
+
result.set(chunk, offset);
|
|
341
|
+
offset += chunk.length;
|
|
342
|
+
}
|
|
177
343
|
const originalBytes = new TextEncoder().encode(textStr).length;
|
|
178
|
-
const
|
|
179
|
-
|
|
344
|
+
const hexPayload = bytesToHex(result);
|
|
345
|
+
const bin21Payload = Array.from(result).map((b) => String.fromCharCode(b)).join("");
|
|
346
|
+
const base64Payload = toBase64(result);
|
|
347
|
+
const base58Payload = bs58.encode(result);
|
|
180
348
|
return {
|
|
181
349
|
originalText: textStr,
|
|
182
|
-
dictVersion:
|
|
350
|
+
dictVersion: useVersion,
|
|
183
351
|
encoded: result,
|
|
184
|
-
|
|
185
|
-
|
|
352
|
+
bin21: bin21Payload,
|
|
353
|
+
payload: bin21Payload,
|
|
354
|
+
bin21Payload,
|
|
355
|
+
hexPayload,
|
|
356
|
+
base64Payload,
|
|
357
|
+
base58Payload,
|
|
186
358
|
originalBytes,
|
|
187
|
-
encodedBytes:
|
|
188
|
-
bytesSaved: originalBytes -
|
|
189
|
-
ratioPercent:
|
|
359
|
+
encodedBytes: bin21Payload.length,
|
|
360
|
+
bytesSaved: originalBytes - bin21Payload.length,
|
|
361
|
+
ratioPercent: Math.round(bin21Payload.length / originalBytes * 1e4) / 100
|
|
190
362
|
};
|
|
191
363
|
}
|
|
192
|
-
|
|
364
|
+
// ── decode ───────────────────────────────────────────────────────────────────
|
|
365
|
+
/**
|
|
366
|
+
* Decodes any supported payload format back to human-readable text.
|
|
367
|
+
*
|
|
368
|
+
* For valid WordBin payloads: returns the exact original words.
|
|
369
|
+
* For non-WordBin payloads: scans byte-by-byte, extracts dictionary words
|
|
370
|
+
* wherever possible, and preserves unrecognised
|
|
371
|
+
* bytes as "[0xXX]" markers.
|
|
372
|
+
*/
|
|
373
|
+
async decode(payload) {
|
|
193
374
|
let buffer;
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
buffer = fromBase64(data);
|
|
199
|
-
this.log(`[decode] Decoded to ${buffer.length} bytes`);
|
|
375
|
+
let detectedFormat;
|
|
376
|
+
if (payload instanceof Uint8Array) {
|
|
377
|
+
buffer = payload;
|
|
378
|
+
detectedFormat = "bytes";
|
|
200
379
|
} else {
|
|
201
|
-
buffer =
|
|
202
|
-
this.log(`[decode] Input is Uint8Array with ${buffer.length} bytes`);
|
|
380
|
+
({ buffer, detectedFormat } = detectAndConvert(payload));
|
|
203
381
|
}
|
|
204
|
-
this.log(`[decode] Full buffer hex: ${toHex(buffer)}`);
|
|
205
382
|
this.log(
|
|
206
|
-
`[decode]
|
|
383
|
+
`[decode] format=${detectedFormat} bufLen=${buffer.length} firstBytes=[${Array.from(buffer.slice(0, 8)).join(",")}]`
|
|
207
384
|
);
|
|
208
385
|
if (buffer.length < 1) {
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
386
|
+
return {
|
|
387
|
+
text: "",
|
|
388
|
+
isWordBin: false,
|
|
389
|
+
detectedFormat,
|
|
390
|
+
notice: "Payload is empty — nothing to decode."
|
|
391
|
+
};
|
|
215
392
|
}
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
const
|
|
393
|
+
const availableVersions = await getAllAvailableDictionaryVersions();
|
|
394
|
+
const versionByte = buffer[0];
|
|
395
|
+
const versionIsHeader = availableVersions.includes(versionByte);
|
|
219
396
|
this.log(
|
|
220
|
-
`[decode]
|
|
397
|
+
`[decode] availableVersions=[${availableVersions.join(",")}] versionByte=${versionByte} isKnownHeader=${versionIsHeader}`
|
|
221
398
|
);
|
|
222
|
-
|
|
223
|
-
const
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
399
|
+
const tryOrder = versionIsHeader ? [versionByte, ...availableVersions.filter((v) => v !== versionByte)] : [...availableVersions];
|
|
400
|
+
for (const ver of tryOrder) {
|
|
401
|
+
let maps;
|
|
402
|
+
try {
|
|
403
|
+
maps = await this.getMapsForVersion(ver);
|
|
404
|
+
} catch (err) {
|
|
405
|
+
this.log(`[decode] v${ver}: getMapsForVersion threw — ${err}`);
|
|
406
|
+
continue;
|
|
407
|
+
}
|
|
408
|
+
const { reverseMap, sortedIdLengths } = maps;
|
|
409
|
+
const r1 = this.greedyDecode(buffer, 1, reverseMap, sortedIdLengths) ?? this.tryDecode(1, buffer, reverseMap, [], 0, sortedIdLengths);
|
|
410
|
+
this.log(
|
|
411
|
+
`[decode] v${ver} strict(pos=1): ${r1 !== null ? `"${r1}"` : "null"}`
|
|
228
412
|
);
|
|
413
|
+
if (r1 !== null) {
|
|
414
|
+
const notice2 = versionByte === ver ? void 0 : `Byte[0]=${versionByte} is not a recognised version header but decoded successfully with dictionary v${ver}.`;
|
|
415
|
+
return { text: r1, isWordBin: true, detectedFormat, notice: notice2 };
|
|
416
|
+
}
|
|
417
|
+
const r0 = this.greedyDecode(buffer, 0, reverseMap, sortedIdLengths) ?? this.tryDecode(0, buffer, reverseMap, [], 0, sortedIdLengths);
|
|
418
|
+
this.log(
|
|
419
|
+
`[decode] v${ver} strict(pos=0): ${r0 !== null ? `"${r0}"` : "null"}`
|
|
420
|
+
);
|
|
421
|
+
if (r0 !== null) {
|
|
422
|
+
return {
|
|
423
|
+
text: r0,
|
|
424
|
+
isWordBin: true,
|
|
425
|
+
detectedFormat,
|
|
426
|
+
notice: `Payload had no version header. Decoded using dictionary v${ver}.`
|
|
427
|
+
};
|
|
428
|
+
}
|
|
229
429
|
}
|
|
230
|
-
this.log(`
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
430
|
+
this.log(`[decode] strict parse failed — falling back to partial scan`);
|
|
431
|
+
if (availableVersions.length > 0) {
|
|
432
|
+
const scanVersion = availableVersions[availableVersions.length - 1];
|
|
433
|
+
try {
|
|
434
|
+
const { reverseMap, sortedIdLengths } = await this.getMapsForVersion(scanVersion);
|
|
435
|
+
const scan1 = this.partialScan(buffer, 1, reverseMap, sortedIdLengths);
|
|
436
|
+
const scan0 = this.partialScan(buffer, 0, reverseMap, sortedIdLengths);
|
|
437
|
+
const best = scan1.wordCount >= scan0.wordCount ? scan1 : scan0;
|
|
438
|
+
this.log(
|
|
439
|
+
`[decode] partial scan(pos=1) words=${scan1.wordCount} raw=${scan1.rawSegments.length} | scan(pos=0) words=${scan0.wordCount} raw=${scan0.rawSegments.length}`
|
|
440
|
+
);
|
|
441
|
+
const notice2 = `This does not appear to be a valid WordBin payload. Partial scan using dictionary v${scanVersion} extracted ${best.wordCount} word(s); ${best.rawSegments.length} byte sequence(s) had no dictionary match and are shown as [0xXX] markers.`;
|
|
442
|
+
return {
|
|
443
|
+
text: best.text,
|
|
444
|
+
isWordBin: false,
|
|
445
|
+
detectedFormat,
|
|
446
|
+
rawSegments: best.rawSegments,
|
|
447
|
+
notice: notice2
|
|
448
|
+
};
|
|
449
|
+
} catch {
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
const notice = `Could not decode with any available dictionary (tried: ${availableVersions.join(", ") || "none"}). Falling back to UTF-8 text decoding.`;
|
|
453
|
+
this.log(`[decode] ${notice}`);
|
|
454
|
+
return {
|
|
455
|
+
text: new TextDecoder("utf-8", { fatal: false }).decode(buffer),
|
|
456
|
+
isWordBin: false,
|
|
457
|
+
detectedFormat,
|
|
458
|
+
notice
|
|
459
|
+
};
|
|
235
460
|
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
461
|
+
// ── Private: greedy linear decode ────────────────────────────────────────────
|
|
462
|
+
/**
|
|
463
|
+
* O(n) longest-match-first decode. Returns null if any byte has no match.
|
|
464
|
+
* This is the fast path; tryDecode is used as a backtracking fallback.
|
|
465
|
+
*/
|
|
466
|
+
greedyDecode(buffer, startPos, reverseMap, sortedIdLengths) {
|
|
467
|
+
const words = [];
|
|
468
|
+
let pos = startPos;
|
|
469
|
+
while (pos < buffer.length) {
|
|
470
|
+
if (buffer[pos] === LITERAL) {
|
|
471
|
+
const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
|
|
472
|
+
if (byteLen > 1e6 || byteLen < 0) return null;
|
|
473
|
+
const start = pos + 1 + bytesRead;
|
|
474
|
+
const end = start + byteLen;
|
|
475
|
+
if (end > buffer.length) return null;
|
|
476
|
+
words.push(utf8Decode(buffer.subarray(start, end)));
|
|
477
|
+
pos = end;
|
|
478
|
+
continue;
|
|
479
|
+
}
|
|
480
|
+
let matched = false;
|
|
481
|
+
for (const len of sortedIdLengths) {
|
|
482
|
+
if (pos + len > buffer.length) continue;
|
|
483
|
+
const key = toHex(buffer.subarray(pos, pos + len));
|
|
484
|
+
if (reverseMap.has(key)) {
|
|
485
|
+
words.push(reverseMap.get(key));
|
|
486
|
+
pos += len;
|
|
487
|
+
matched = true;
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
if (!matched) return null;
|
|
492
|
+
}
|
|
493
|
+
return words.join(" ");
|
|
494
|
+
}
|
|
495
|
+
// ── Private: partial / best-effort scan ──────────────────────────────────────
|
|
496
|
+
/**
|
|
497
|
+
* Scans through the buffer extracting any recognised dictionary words.
|
|
498
|
+
* Unrecognised bytes are collected as raw segments and rendered as [0xXX].
|
|
499
|
+
* Always consumes the entire buffer — never returns null.
|
|
500
|
+
*/
|
|
501
|
+
partialScan(buffer, startPos, reverseMap, sortedIdLengths) {
|
|
502
|
+
const parts = [];
|
|
503
|
+
const rawSegments = [];
|
|
504
|
+
let wordCount = 0;
|
|
505
|
+
let pos = startPos;
|
|
506
|
+
while (pos < buffer.length) {
|
|
507
|
+
if (buffer[pos] === LITERAL && pos + 1 < buffer.length) {
|
|
508
|
+
try {
|
|
509
|
+
const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
|
|
510
|
+
if (byteLen > 0 && byteLen <= 1e6) {
|
|
511
|
+
const start = pos + 1 + bytesRead;
|
|
512
|
+
const end = start + byteLen;
|
|
513
|
+
if (end <= buffer.length) {
|
|
514
|
+
const word = utf8Decode(buffer.subarray(start, end));
|
|
515
|
+
parts.push(word);
|
|
516
|
+
wordCount++;
|
|
517
|
+
pos = end;
|
|
518
|
+
continue;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
} catch {
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
let matched = false;
|
|
525
|
+
for (const len of sortedIdLengths) {
|
|
526
|
+
if (pos + len > buffer.length) continue;
|
|
527
|
+
const key = toHex(buffer.subarray(pos, pos + len));
|
|
528
|
+
if (reverseMap.has(key)) {
|
|
529
|
+
parts.push(reverseMap.get(key));
|
|
530
|
+
wordCount++;
|
|
531
|
+
pos += len;
|
|
532
|
+
matched = true;
|
|
533
|
+
break;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
if (!matched) {
|
|
537
|
+
const marker = `[0x${buffer[pos].toString(16).padStart(2, "0")}]`;
|
|
538
|
+
parts.push(marker);
|
|
539
|
+
rawSegments.push(marker);
|
|
540
|
+
this.log(
|
|
541
|
+
`[decode] partial scan: no match at pos=${pos} byte=${buffer[pos]}`
|
|
542
|
+
);
|
|
543
|
+
pos++;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
return { text: parts.join(" "), wordCount, rawSegments };
|
|
547
|
+
}
|
|
548
|
+
// ── Private: backtracking decode ─────────────────────────────────────────────
|
|
549
|
+
tryDecode(pos, buffer, reverseMap, result, depth, sortedIdLengths) {
|
|
550
|
+
if (pos === buffer.length) return result.join(" ");
|
|
246
551
|
if (buffer[pos] === LITERAL) {
|
|
247
|
-
this.log(
|
|
248
|
-
`${indent}[tryDecode] Found LITERAL marker (0x${LITERAL.toString(16)})`
|
|
249
|
-
);
|
|
250
552
|
const { value: byteLen, bytesRead } = decodeVarint(buffer, pos + 1);
|
|
251
|
-
|
|
252
|
-
`${indent}[tryDecode] Varint: value=${byteLen}, bytesRead=${bytesRead}`
|
|
253
|
-
);
|
|
553
|
+
if (byteLen > 1e6 || byteLen < 0) return null;
|
|
254
554
|
const start = pos + 1 + bytesRead;
|
|
255
555
|
const end = start + byteLen;
|
|
256
|
-
|
|
257
|
-
|
|
556
|
+
if (end > buffer.length) return null;
|
|
557
|
+
result.push(utf8Decode(buffer.subarray(start, end)));
|
|
558
|
+
const res = this.tryDecode(
|
|
559
|
+
end,
|
|
560
|
+
buffer,
|
|
561
|
+
reverseMap,
|
|
562
|
+
result,
|
|
563
|
+
depth + 1,
|
|
564
|
+
sortedIdLengths
|
|
258
565
|
);
|
|
259
|
-
if (end > buffer.length) {
|
|
260
|
-
this.log(`${indent}[tryDecode] Truncated literal — failing path`);
|
|
261
|
-
return null;
|
|
262
|
-
}
|
|
263
|
-
const literalBytes = buffer.subarray(start, end);
|
|
264
|
-
const word = utf8Decode(literalBytes);
|
|
265
|
-
this.log(`${indent}[tryDecode] Decoded literal: "${word}"`);
|
|
266
|
-
result.push(word);
|
|
267
|
-
const res = this.tryDecode(end, buffer, reverseMap, result, depth + 1);
|
|
268
566
|
if (res !== null) return res;
|
|
269
567
|
result.pop();
|
|
270
|
-
this.log(`${indent}[tryDecode] Backtracking from literal`);
|
|
271
|
-
return null;
|
|
272
568
|
}
|
|
273
|
-
for (const len of
|
|
274
|
-
if (pos + len > buffer.length)
|
|
275
|
-
|
|
276
|
-
continue;
|
|
277
|
-
}
|
|
278
|
-
const slice = buffer.subarray(pos, pos + len);
|
|
279
|
-
const key = toHex(slice);
|
|
280
|
-
const keyBytes = [...slice].map((b) => `0x${b.toString(16).padStart(2, "0")}`).join(" ");
|
|
281
|
-
this.log(
|
|
282
|
-
`${indent}[tryDecode] Trying ${len}-byte: [${keyBytes}] hex=${key}`
|
|
283
|
-
);
|
|
569
|
+
for (const len of sortedIdLengths) {
|
|
570
|
+
if (pos + len > buffer.length) continue;
|
|
571
|
+
const key = toHex(buffer.subarray(pos, pos + len));
|
|
284
572
|
if (reverseMap.has(key)) {
|
|
285
|
-
|
|
286
|
-
this.log(`${indent}[tryDecode] Match: "${word}" (ID: ${key})`);
|
|
287
|
-
result.push(word);
|
|
573
|
+
result.push(reverseMap.get(key));
|
|
288
574
|
const res = this.tryDecode(
|
|
289
575
|
pos + len,
|
|
290
576
|
buffer,
|
|
291
577
|
reverseMap,
|
|
292
578
|
result,
|
|
293
|
-
depth + 1
|
|
579
|
+
depth + 1,
|
|
580
|
+
sortedIdLengths
|
|
294
581
|
);
|
|
295
582
|
if (res !== null) return res;
|
|
296
583
|
result.pop();
|
|
297
|
-
this.log(`${indent}[tryDecode] Backtracking from "${word}"`);
|
|
298
|
-
} else {
|
|
299
|
-
this.log(`${indent}[tryDecode] No match for ${key}`);
|
|
300
584
|
}
|
|
301
585
|
}
|
|
302
|
-
this.log(`${indent}[tryDecode] No valid branches — failing path`);
|
|
303
586
|
return null;
|
|
304
587
|
}
|
|
305
588
|
}
|