ruvector 0.2.19 → 0.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,379 @@
1
+ /**
2
+ * validator.js - Operational validation for reconstructed code.
3
+ *
4
+ * Verifies that the reconstruction preserves semantics:
5
+ * - Syntax validity (parseable without errors)
6
+ * - String literal preservation (all strings intact)
7
+ * - Class hierarchy preservation (same extends chains)
8
+ * - Export preservation (same exports)
9
+ * - Functional equivalence (same behavior for test inputs)
10
+ */
11
+
12
+ 'use strict';
13
+
14
+ const vm = require('vm');
15
+
16
+ /**
17
+ * Validate that a reconstruction preserves the semantics of the original.
18
+ *
19
+ * @param {string} originalSource - the minified/beautified original
20
+ * @param {string} reconstructedSource - the reconstructed version
21
+ * @param {object} [options]
22
+ * @param {boolean} [options.checkSyntax=true]
23
+ * @param {boolean} [options.checkStrings=true]
24
+ * @param {boolean} [options.checkClasses=true]
25
+ * @param {boolean} [options.checkFunctions=true]
26
+ * @param {number} [options.timeoutMs=1000] - sandbox execution timeout
27
+ * @returns {{syntaxValid: boolean, exportsMatch: boolean, stringsPreserved: boolean, classesMatch: boolean, functionallyEquivalent: boolean, issues: string[]}}
28
+ */
29
+ function validateReconstruction(originalSource, reconstructedSource, options = {}) {
30
+ const {
31
+ checkSyntax = true,
32
+ checkStrings = true,
33
+ checkClasses = true,
34
+ checkFunctions = true,
35
+ timeoutMs = 1000,
36
+ } = options;
37
+
38
+ const issues = [];
39
+ let syntaxValid = true;
40
+ let stringsPreserved = true;
41
+ let classesMatch = true;
42
+ let exportsMatch = true;
43
+ let functionallyEquivalent = true;
44
+
45
+ // 1. Syntax check
46
+ if (checkSyntax) {
47
+ const syntaxResult = checkSyntaxValidity(reconstructedSource);
48
+ syntaxValid = syntaxResult.valid;
49
+ if (!syntaxValid) {
50
+ issues.push(`Syntax error: ${syntaxResult.error}`);
51
+ // If syntax is broken, further checks are unreliable
52
+ return {
53
+ syntaxValid,
54
+ exportsMatch: false,
55
+ stringsPreserved: false,
56
+ classesMatch: false,
57
+ functionallyEquivalent: false,
58
+ issues,
59
+ };
60
+ }
61
+ }
62
+
63
+ // 2. String literal preservation
64
+ if (checkStrings) {
65
+ const result = checkStringPreservation(originalSource, reconstructedSource);
66
+ stringsPreserved = result.preserved;
67
+ for (const missing of result.missing) {
68
+ issues.push(`Missing string literal: "${missing}"`);
69
+ }
70
+ }
71
+
72
+ // 3. Class hierarchy preservation
73
+ if (checkClasses) {
74
+ const result = checkClassHierarchy(originalSource, reconstructedSource);
75
+ classesMatch = result.match;
76
+ for (const issue of result.issues) {
77
+ issues.push(issue);
78
+ }
79
+ }
80
+
81
+ // 4. Export/function count check
82
+ if (checkFunctions) {
83
+ const result = checkFunctionPreservation(originalSource, reconstructedSource);
84
+ exportsMatch = result.match;
85
+ for (const issue of result.issues) {
86
+ issues.push(issue);
87
+ }
88
+ }
89
+
90
+ // 5. Functional equivalence (best-effort, sandboxed)
91
+ if (syntaxValid) {
92
+ const result = checkFunctionalEquivalence(
93
+ originalSource,
94
+ reconstructedSource,
95
+ timeoutMs,
96
+ );
97
+ functionallyEquivalent = result.equivalent;
98
+ for (const issue of result.issues) {
99
+ issues.push(issue);
100
+ }
101
+ }
102
+
103
+ return {
104
+ syntaxValid,
105
+ exportsMatch,
106
+ stringsPreserved,
107
+ classesMatch,
108
+ functionallyEquivalent,
109
+ issues,
110
+ };
111
+ }
112
+
113
+ /**
114
+ * Check if source code is syntactically valid JavaScript.
115
+ *
116
+ * @param {string} source
117
+ * @returns {{valid: boolean, error: string|null}}
118
+ */
119
+ function checkSyntaxValidity(source) {
120
+ try {
121
+ // Use Function constructor for syntax check (does not execute)
122
+ new Function(source);
123
+ return { valid: true, error: null };
124
+ } catch (err) {
125
+ return { valid: false, error: err.message };
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Check that all string literals from the original appear in the reconstruction.
131
+ * Identifiers may change, but string values must be preserved.
132
+ *
133
+ * @param {string} original
134
+ * @param {string} reconstructed
135
+ * @returns {{preserved: boolean, missing: string[], total: number}}
136
+ */
137
+ function checkStringPreservation(original, reconstructed) {
138
+ const origStrings = extractStringLiterals(original);
139
+ const reconStrings = new Set(extractStringLiterals(reconstructed));
140
+
141
+ const missing = [];
142
+ for (const s of origStrings) {
143
+ // Skip very short strings and common noise
144
+ if (s.length < 2) continue;
145
+ if (!reconStrings.has(s)) {
146
+ missing.push(s);
147
+ }
148
+ }
149
+
150
+ return {
151
+ preserved: missing.length === 0,
152
+ missing: missing.slice(0, 20), // Cap at 20 for reporting
153
+ total: origStrings.length,
154
+ };
155
+ }
156
+
157
+ /**
158
+ * Extract all string literals from source code.
159
+ *
160
+ * @param {string} source
161
+ * @returns {string[]}
162
+ */
163
+ function extractStringLiterals(source) {
164
+ const strings = [];
165
+
166
+ // Match double-quoted strings
167
+ const doubleQuoted = source.match(/"([^"\\]|\\.)*"/g) || [];
168
+ for (const s of doubleQuoted) {
169
+ strings.push(s.slice(1, -1));
170
+ }
171
+
172
+ // Match single-quoted strings
173
+ const singleQuoted = source.match(/'([^'\\]|\\.)*'/g) || [];
174
+ for (const s of singleQuoted) {
175
+ strings.push(s.slice(1, -1));
176
+ }
177
+
178
+ return strings;
179
+ }
180
+
181
+ /**
182
+ * Check that class hierarchies are preserved.
183
+ * All "class X extends Y" pairs must appear in both versions.
184
+ *
185
+ * @param {string} original
186
+ * @param {string} reconstructed
187
+ * @returns {{match: boolean, issues: string[]}}
188
+ */
189
+ function checkClassHierarchy(original, reconstructed) {
190
+ const origClasses = extractClassHierarchy(original);
191
+ const reconClasses = extractClassHierarchy(reconstructed);
192
+ const issues = [];
193
+
194
+ // Check that base classes are preserved (names may have changed)
195
+ const origBases = new Set(origClasses.map((c) => c.base).filter(Boolean));
196
+ const reconBases = new Set(reconClasses.map((c) => c.base).filter(Boolean));
197
+
198
+ // Base class names (Error, EventEmitter, etc.) should be preserved
199
+ for (const base of origBases) {
200
+ if (!reconBases.has(base)) {
201
+ // Check if it is a built-in that was renamed
202
+ const builtIns = ['Error', 'TypeError', 'RangeError', 'EventEmitter', 'Stream', 'Buffer'];
203
+ if (builtIns.includes(base)) {
204
+ issues.push(`Base class "${base}" missing from reconstruction`);
205
+ }
206
+ }
207
+ }
208
+
209
+ // Same number of class declarations
210
+ if (origClasses.length !== reconClasses.length) {
211
+ issues.push(
212
+ `Class count mismatch: original has ${origClasses.length}, reconstructed has ${reconClasses.length}`,
213
+ );
214
+ }
215
+
216
+ return { match: issues.length === 0, issues };
217
+ }
218
+
219
+ /**
220
+ * Extract class declarations and their inheritance.
221
+ *
222
+ * @param {string} source
223
+ * @returns {Array<{name: string, base: string|null}>}
224
+ */
225
+ function extractClassHierarchy(source) {
226
+ const classes = [];
227
+ const re = /class\s+(\w+)(?:\s+extends\s+(\w+))?/g;
228
+ let match;
229
+ while ((match = re.exec(source)) !== null) {
230
+ classes.push({ name: match[1], base: match[2] || null });
231
+ }
232
+ return classes;
233
+ }
234
+
235
+ /**
236
+ * Check that the number of functions/exports is preserved.
237
+ *
238
+ * @param {string} original
239
+ * @param {string} reconstructed
240
+ * @returns {{match: boolean, issues: string[]}}
241
+ */
242
+ function checkFunctionPreservation(original, reconstructed) {
243
+ const issues = [];
244
+
245
+ const origFuncCount = (original.match(/function\s*[\w$]*\s*\(/g) || []).length;
246
+ const reconFuncCount = (reconstructed.match(/function\s*[\w$]*\s*\(/g) || []).length;
247
+
248
+ if (origFuncCount !== reconFuncCount) {
249
+ issues.push(
250
+ `Function count mismatch: original has ${origFuncCount}, reconstructed has ${reconFuncCount}`,
251
+ );
252
+ }
253
+
254
+ const origArrowCount = (original.match(/=>/g) || []).length;
255
+ const reconArrowCount = (reconstructed.match(/=>/g) || []).length;
256
+
257
+ if (origArrowCount !== reconArrowCount) {
258
+ issues.push(
259
+ `Arrow function count mismatch: original has ${origArrowCount}, reconstructed has ${reconArrowCount}`,
260
+ );
261
+ }
262
+
263
+ // Check module.exports / export counts
264
+ const origExports = (original.match(/module\.exports|export\s+(default\s+)?/g) || []).length;
265
+ const reconExports = (reconstructed.match(/module\.exports|export\s+(default\s+)?/g) || []).length;
266
+
267
+ if (origExports !== reconExports) {
268
+ issues.push(
269
+ `Export count mismatch: original has ${origExports}, reconstructed has ${reconExports}`,
270
+ );
271
+ }
272
+
273
+ return { match: issues.length === 0, issues };
274
+ }
275
+
276
+ /**
277
+ * Best-effort functional equivalence check.
278
+ * Runs both versions in a sandboxed VM and compares outputs.
279
+ *
280
+ * This is a heuristic — it cannot prove full equivalence, but catches
281
+ * obvious breakages (renamed exports, broken references, etc.).
282
+ *
283
+ * @param {string} original
284
+ * @param {string} reconstructed
285
+ * @param {number} timeoutMs
286
+ * @returns {{equivalent: boolean, issues: string[]}}
287
+ */
288
+ function checkFunctionalEquivalence(original, reconstructed, timeoutMs) {
289
+ const issues = [];
290
+
291
+ // Compare the shape of what each version exports
292
+ const origExports = safeEvalExports(original, timeoutMs);
293
+ const reconExports = safeEvalExports(reconstructed, timeoutMs);
294
+
295
+ if (origExports.error && !reconExports.error) {
296
+ // Original errors but reconstructed does not — likely OK
297
+ return { equivalent: true, issues };
298
+ }
299
+
300
+ if (!origExports.error && reconExports.error) {
301
+ issues.push(`Reconstructed code fails to execute: ${reconExports.error}`);
302
+ return { equivalent: false, issues };
303
+ }
304
+
305
+ if (origExports.error && reconExports.error) {
306
+ // Both error — check if it is the same kind of error
307
+ return { equivalent: true, issues };
308
+ }
309
+
310
+ // Compare export shapes (type and count of exported values)
311
+ const origKeys = Object.keys(origExports.exports || {}).sort();
312
+ const reconKeys = Object.keys(reconExports.exports || {}).sort();
313
+
314
+ // Exports may have been renamed, so just compare counts and types
315
+ if (origKeys.length !== reconKeys.length) {
316
+ issues.push(
317
+ `Exported key count differs: ${origKeys.length} vs ${reconKeys.length}`,
318
+ );
319
+ }
320
+
321
+ // Compare types of exported values
322
+ const origTypes = origKeys.map((k) => typeof origExports.exports[k]).sort();
323
+ const reconTypes = reconKeys.map((k) => typeof reconExports.exports[k]).sort();
324
+
325
+ for (let i = 0; i < Math.min(origTypes.length, reconTypes.length); i++) {
326
+ if (origTypes[i] !== reconTypes[i]) {
327
+ issues.push(
328
+ `Export type mismatch at position ${i}: ${origTypes[i]} vs ${reconTypes[i]}`,
329
+ );
330
+ }
331
+ }
332
+
333
+ return { equivalent: issues.length === 0, issues };
334
+ }
335
+
336
+ /**
337
+ * Safely execute code in a VM sandbox and extract module.exports.
338
+ *
339
+ * @param {string} source
340
+ * @param {number} timeoutMs
341
+ * @returns {{exports: object|null, error: string|null}}
342
+ */
343
+ function safeEvalExports(source, timeoutMs) {
344
+ try {
345
+ const sandbox = {
346
+ module: { exports: {} },
347
+ exports: {},
348
+ require: () => ({}),
349
+ console: { log() {}, error() {}, warn() {}, info() {} },
350
+ process: { env: {}, argv: [], cwd: () => '/' },
351
+ setTimeout: () => {},
352
+ setInterval: () => {},
353
+ clearTimeout: () => {},
354
+ clearInterval: () => {},
355
+ Buffer: { from: () => Buffer.alloc(0), alloc: () => Buffer.alloc(0) },
356
+ global: {},
357
+ __dirname: '/',
358
+ __filename: '/test.js',
359
+ };
360
+
361
+ const context = vm.createContext(sandbox);
362
+ const script = new vm.Script(source, { filename: 'reconstructed.js' });
363
+ script.runInContext(context, { timeout: timeoutMs });
364
+
365
+ return { exports: sandbox.module.exports, error: null };
366
+ } catch (err) {
367
+ return { exports: null, error: err.message };
368
+ }
369
+ }
370
+
371
+ module.exports = {
372
+ validateReconstruction,
373
+ checkSyntaxValidity,
374
+ checkStringPreservation,
375
+ checkClassHierarchy,
376
+ checkFunctionPreservation,
377
+ checkFunctionalEquivalence,
378
+ extractStringLiterals,
379
+ };
@@ -0,0 +1,140 @@
1
+ /**
2
+ * witness.js - SHA-256 witness chain generation and verification.
3
+ *
4
+ * A witness chain is a Merkle-like structure that cryptographically proves
5
+ * the decompiled output derives from a specific input bundle.
6
+ *
7
+ * Chain structure:
8
+ * root = H(source_hash || module_hashes[0] || ... || module_hashes[n])
9
+ *
10
+ * Each entry records:
11
+ * { hash, label, parent }
12
+ * so the chain can be verified without re-running the decompiler.
13
+ */
14
+
15
+ 'use strict';
16
+
17
+ const crypto = require('crypto');
18
+
19
+ /**
20
+ * Compute SHA-256 hash of a string or buffer.
21
+ * @param {string|Buffer} data
22
+ * @returns {string} hex-encoded hash
23
+ */
24
+ function sha256(data) {
25
+ return crypto.createHash('sha256').update(data).digest('hex');
26
+ }
27
+
28
+ /**
29
+ * Build a witness chain from source and decompiled modules.
30
+ *
31
+ * @param {string} source - original bundle source code
32
+ * @param {Array<{name: string, content: string}>} modules - decompiled modules
33
+ * @returns {{
34
+ * source_hash: string,
35
+ * module_hashes: Array<{name: string, hash: string}>,
36
+ * root: string,
37
+ * chain: Array<{hash: string, label: string, parent: string|null}>,
38
+ * created: string,
39
+ * algorithm: string
40
+ * }}
41
+ */
42
+ function buildWitnessChain(source, modules) {
43
+ const sourceHash = sha256(source);
44
+ const chain = [];
45
+ const moduleHashes = [];
46
+
47
+ // Root node: the source hash
48
+ chain.push({
49
+ hash: sourceHash,
50
+ label: 'source',
51
+ parent: null,
52
+ });
53
+
54
+ // One node per decompiled module
55
+ for (const mod of modules) {
56
+ const modHash = sha256(mod.content);
57
+ moduleHashes.push({ name: mod.name, hash: modHash });
58
+
59
+ chain.push({
60
+ hash: modHash,
61
+ label: `module:${mod.name}`,
62
+ parent: sourceHash,
63
+ });
64
+ }
65
+
66
+ // Compute Merkle root: H(source_hash || mod_hash_0 || ... || mod_hash_n)
67
+ const allHashes = sourceHash + moduleHashes.map((m) => m.hash).join('');
68
+ const root = sha256(allHashes);
69
+
70
+ chain.push({
71
+ hash: root,
72
+ label: 'root',
73
+ parent: sourceHash,
74
+ });
75
+
76
+ return {
77
+ source_hash: sourceHash,
78
+ module_hashes: moduleHashes,
79
+ root,
80
+ chain,
81
+ created: new Date().toISOString(),
82
+ algorithm: 'sha256',
83
+ };
84
+ }
85
+
86
+ /**
87
+ * Verify a witness chain against a source file.
88
+ *
89
+ * @param {object} witness - the witness object (from buildWitnessChain)
90
+ * @param {string} [sourceContent] - original source to verify against (optional)
91
+ * @returns {{valid: boolean, chain_length: number, root: string, errors: string[]}}
92
+ */
93
+ function verifyWitnessChain(witness, sourceContent) {
94
+ const errors = [];
95
+
96
+ if (!witness || !witness.chain || !witness.root) {
97
+ return { valid: false, chain_length: 0, root: '', errors: ['Missing witness data'] };
98
+ }
99
+
100
+ // Verify source hash if content provided
101
+ if (sourceContent) {
102
+ const actualSourceHash = sha256(sourceContent);
103
+ if (actualSourceHash !== witness.source_hash) {
104
+ errors.push(
105
+ `Source hash mismatch: expected ${witness.source_hash}, got ${actualSourceHash}`,
106
+ );
107
+ }
108
+ }
109
+
110
+ // Verify chain integrity: each node's parent must exist in the chain
111
+ const hashSet = new Set(witness.chain.map((n) => n.hash));
112
+ for (const node of witness.chain) {
113
+ if (node.parent && !hashSet.has(node.parent)) {
114
+ errors.push(`Broken chain: node ${node.label} references missing parent ${node.parent}`);
115
+ }
116
+ }
117
+
118
+ // Recompute root from module hashes
119
+ if (witness.module_hashes && witness.source_hash) {
120
+ const allHashes =
121
+ witness.source_hash + witness.module_hashes.map((m) => m.hash).join('');
122
+ const expectedRoot = sha256(allHashes);
123
+ if (expectedRoot !== witness.root) {
124
+ errors.push(`Root mismatch: expected ${expectedRoot}, got ${witness.root}`);
125
+ }
126
+ }
127
+
128
+ return {
129
+ valid: errors.length === 0,
130
+ chain_length: witness.chain.length,
131
+ root: witness.root,
132
+ errors,
133
+ };
134
+ }
135
+
136
+ module.exports = {
137
+ sha256,
138
+ buildWitnessChain,
139
+ verifyWitnessChain,
140
+ };
@@ -0,0 +1,27 @@
1
+ {
2
+ "name": "ruvector-decompiler-wasm",
3
+ "collaborators": [
4
+ "Ruvector Team"
5
+ ],
6
+ "description": "WASM bindings for the RuVector JavaScript bundle decompiler (Louvain pipeline)",
7
+ "version": "2.1.0",
8
+ "license": "MIT",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "https://github.com/ruvnet/ruvector"
12
+ },
13
+ "files": [
14
+ "ruvector_decompiler_wasm_bg.wasm",
15
+ "ruvector_decompiler_wasm.js",
16
+ "ruvector_decompiler_wasm.d.ts"
17
+ ],
18
+ "main": "ruvector_decompiler_wasm.js",
19
+ "types": "ruvector_decompiler_wasm.d.ts",
20
+ "keywords": [
21
+ "decompiler",
22
+ "javascript",
23
+ "wasm",
24
+ "mincut",
25
+ "louvain"
26
+ ]
27
+ }
@@ -0,0 +1,27 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ /**
5
+ * Decompile a minified JavaScript bundle using the full Louvain pipeline.
6
+ *
7
+ * # Arguments
8
+ *
9
+ * * `source` - The minified JavaScript source code.
10
+ * * `config_json` - JSON string of `DecompileConfig` fields. Pass `"{}"` for defaults.
11
+ *
12
+ * # Returns
13
+ *
14
+ * A JSON string containing the `DecompileResult` (modules, witness, inferred names, etc.)
15
+ * or a JSON object with an `"error"` field on failure.
16
+ */
17
+ export function decompile(source: string, config_json: string): string;
18
+
19
+ /**
20
+ * Initialize the WASM module (sets up panic hook for better error messages).
21
+ */
22
+ export function init(): void;
23
+
24
+ /**
25
+ * Return the version of the decompiler WASM module.
26
+ */
27
+ export function version(): string;