ruvector 0.2.19 → 0.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,463 @@
1
+ /**
2
+ * decompiler/index.js - High-level decompiler API.
3
+ *
4
+ * Exports three main entry points:
5
+ * - decompilePackage(name, version, options)
6
+ * - decompileFile(filePath, options)
7
+ * - decompileUrl(url, options)
8
+ *
9
+ * Each returns a standardized DecompileResult:
10
+ * { modules, metrics, witness, source, packageInfo? }
11
+ */
12
+
13
+ 'use strict';
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const {
18
+ fetchPackageInfo,
19
+ fetchPackageFileList,
20
+ fetchFileContent,
21
+ findMainBundle,
22
+ parseTarget,
23
+ } = require('./npm-fetch');
24
+ const { splitModules } = require('./module-splitter');
25
+ const { buildWitnessChain, verifyWitnessChain } = require('./witness');
26
+ const { computeMetrics, computeModuleMetrics } = require('./metrics');
27
+ const { reconstructCode, reconstructRunnable } = require('./reconstructor');
28
+ const { validateReconstruction } = require('./validator');
29
+
30
+ /**
31
+ * Try the WASM Louvain decompiler (full graph-partitioning pipeline).
32
+ * Returns null if WASM module is not available or fails.
33
+ *
34
+ * @param {string} source - raw JavaScript source
35
+ * @param {object} [options]
36
+ * @returns {{modules: object[], metrics: object, witness: object|null}|null}
37
+ */
38
+ function tryWasmDecompiler(source, options = {}) {
39
+ try {
40
+ const wasm = require('../../wasm/ruvector_decompiler_wasm');
41
+ const configJson = JSON.stringify({
42
+ target_modules: null,
43
+ min_confidence: options.minConfidence || 0.3,
44
+ generate_source_maps: false,
45
+ generate_witness: options.witness !== false,
46
+ output_filename: 'bundle.js',
47
+ model_path: null,
48
+ hierarchical_output: true,
49
+ max_depth: 3,
50
+ min_folder_size: 3,
51
+ });
52
+ const resultJson = wasm.decompile(source, configJson);
53
+ const result = JSON.parse(resultJson);
54
+ if (result.error) return null;
55
+
56
+ // Convert Rust DecompileResult to Node.js format
57
+ return {
58
+ modules: (result.modules || []).map((m) => ({
59
+ name: m.name,
60
+ content: m.source || '',
61
+ declarations: (m.declarations && m.declarations.length) || 0,
62
+ fragments: (m.declarations && m.declarations.length) || 0,
63
+ confidence: 0.8,
64
+ })),
65
+ metrics: {
66
+ source: { sizeBytes: source.length },
67
+ modules: (result.modules || []).length,
68
+ engine: 'wasm-louvain',
69
+ },
70
+ witness: result.witness || null,
71
+ moduleTree: result.module_tree || null,
72
+ beautifiedSource: source,
73
+ };
74
+ } catch {
75
+ return null; // WASM not available, fall back
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Try to beautify source code using js-beautify (optional dep).
81
+ * Falls back to returning the source unchanged if not installed.
82
+ * @param {string} source
83
+ * @returns {string}
84
+ */
85
+ function beautify(source) {
86
+ try {
87
+ const jsBeautify = require('js-beautify');
88
+ const beautifyFn = jsBeautify.js || jsBeautify;
89
+ return beautifyFn(source, {
90
+ indent_size: 2,
91
+ space_in_empty_paren: false,
92
+ preserve_newlines: true,
93
+ max_preserve_newlines: 2,
94
+ end_with_newline: true,
95
+ });
96
+ } catch {
97
+ // js-beautify not installed; return source as-is
98
+ return source;
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Try to use the Rust decompiler for full Louvain graph partitioning (878+ modules).
104
+ * Falls back to Node.js keyword splitting if Rust binary not available.
105
+ *
106
+ * @param {string} filePath - path to JS file
107
+ * @param {string} outputDir - output directory
108
+ * @returns {{success: boolean, modules: number, outputDir: string}|null}
109
+ */
110
+ function tryRustDecompiler(filePath, outputDir) {
111
+ try {
112
+ const { execSync } = require('child_process');
113
+ // Try to find the Rust binary
114
+ const candidates = [
115
+ 'cargo run --release -p ruvector-decompiler --example run_on_cli --',
116
+ path.join(__dirname, '../../../../target/release/examples/run_on_cli'),
117
+ ];
118
+ for (const bin of candidates) {
119
+ try {
120
+ const cmd = bin.includes('cargo')
121
+ ? `${bin} "${filePath}" --output-dir "${outputDir}"`
122
+ : `"${bin}" "${filePath}" --output-dir "${outputDir}"`;
123
+ const result = execSync(cmd, {
124
+ timeout: 120000,
125
+ stdio: ['pipe', 'pipe', 'pipe'],
126
+ cwd: path.join(__dirname, '../../../..'),
127
+ });
128
+ const stderr = result.toString();
129
+ const match = stderr.match(/Wrote (\d+) modules/);
130
+ const moduleCount = match ? parseInt(match[1]) : 0;
131
+ return { success: true, modules: moduleCount, outputDir };
132
+ } catch { continue; }
133
+ }
134
+ } catch {}
135
+ return null;
136
+ }
137
+
138
+ /**
139
+ * Core decompilation pipeline: beautify -> split -> metrics -> witness -> reconstruct.
140
+ *
141
+ * If the Rust decompiler is available (cargo built), uses Louvain graph partitioning
142
+ * for 878+ modules with 100% parse rate. Falls back to Node.js keyword splitting.
143
+ *
144
+ * @param {string} source - raw JavaScript source
145
+ * @param {object} [options]
146
+ * @param {number} [options.minConfidence=0.3]
147
+ * @param {boolean} [options.witness=true]
148
+ * @param {boolean} [options.reconstruct=false] - apply readable reconstruction
149
+ * @param {boolean} [options.validate=false] - validate reconstruction preserves semantics
150
+ * @param {string} [options.patternPath] - path to training patterns JSON
151
+ * @param {boolean} [options.addComments=true] - add JSDoc comments during reconstruction
152
+ * @param {boolean} [options.improveStyle=true] - apply style improvements during reconstruction
153
+ * @param {boolean} [options.useRust=true] - try Rust Louvain partitioner first
154
+ * @param {string} [options.filePath] - original file path (needed for Rust pipeline)
155
+ * @returns {{modules: object[], metrics: object, witness: object|null, beautifiedSource: string, reconstruction?: object}}
156
+ */
157
+ function decompileSource(source, options = {}) {
158
+ const {
159
+ minConfidence = 0.3,
160
+ witness: generateWitness = true,
161
+ reconstruct = false,
162
+ validate = false,
163
+ patternPath,
164
+ addComments = true,
165
+ improveStyle = true,
166
+ useRust = true,
167
+ filePath,
168
+ } = options;
169
+
170
+ // Priority 1: WASM Louvain (full pipeline, works everywhere, no binary needed)
171
+ if (useRust !== false && source.length > 1000) {
172
+ const wasmResult = tryWasmDecompiler(source, options);
173
+ if (wasmResult) return wasmResult;
174
+ }
175
+
176
+ // Priority 2: Rust binary (full pipeline, requires cargo build)
177
+ if (useRust && filePath && source.length > 100000) {
178
+ const tmpDir = path.join(require('os').tmpdir(), 'ruvector-decompile-' + Date.now());
179
+ const rustResult = tryRustDecompiler(filePath, tmpDir);
180
+ if (rustResult && rustResult.success) {
181
+ // Load modules from Rust output
182
+ const sourceDir = path.join(tmpDir, 'source');
183
+ const rustModules = [];
184
+ try {
185
+ for (const f of fs.readdirSync(sourceDir).filter(f => f.endsWith('.js'))) {
186
+ const content = fs.readFileSync(path.join(sourceDir, f), 'utf8');
187
+ rustModules.push({
188
+ name: f.replace('.js', ''),
189
+ content,
190
+ fragments: 0,
191
+ confidence: 0.8,
192
+ });
193
+ }
194
+ } catch {}
195
+ if (rustModules.length > 0) {
196
+ const sourceMetrics = computeMetrics(source);
197
+ const witnessPath = path.join(tmpDir, 'witness.json');
198
+ let witnessChain = null;
199
+ try { witnessChain = JSON.parse(fs.readFileSync(witnessPath, 'utf8')); } catch {}
200
+ return {
201
+ modules: rustModules,
202
+ metrics: { source: sourceMetrics, modules: rustModules.length, engine: 'rust-louvain' },
203
+ witness: witnessChain,
204
+ beautifiedSource: source,
205
+ source,
206
+ };
207
+ }
208
+ }
209
+ }
210
+
211
+ // Fallback: Node.js keyword-based splitting
212
+ const beautified = beautify(source);
213
+ const { modules, unclassified } = splitModules(beautified, { minConfidence });
214
+ const sourceMetrics = computeMetrics(beautified);
215
+ const moduleMetrics = computeModuleMetrics(modules);
216
+ const witnessChain = generateWitness ? buildWitnessChain(source, modules) : null;
217
+
218
+ // Optional: apply readable reconstruction to each module
219
+ let reconstructionSummary = null;
220
+ if (reconstruct) {
221
+ let totalRenames = 0;
222
+ let totalComments = 0;
223
+ let totalConfidence = 0;
224
+ let validationResults = [];
225
+
226
+ for (const mod of modules) {
227
+ const result = reconstructCode(mod.content, {
228
+ patternPath,
229
+ propagateNames: true,
230
+ addComments,
231
+ improveStyle,
232
+ minConfidence,
233
+ });
234
+
235
+ const originalContent = mod.content;
236
+ mod.content = result.code;
237
+ mod.renames = result.renames;
238
+ mod.confidence = Math.max(mod.confidence, result.confidence);
239
+
240
+ totalRenames += result.renames.length;
241
+ totalComments += result.comments;
242
+ totalConfidence += result.confidence;
243
+
244
+ // Optional: validate the reconstruction
245
+ if (validate) {
246
+ const validation = validateReconstruction(originalContent, result.code);
247
+ validationResults.push({
248
+ module: mod.name,
249
+ ...validation,
250
+ });
251
+ }
252
+ }
253
+
254
+ reconstructionSummary = {
255
+ totalRenames,
256
+ totalComments,
257
+ averageConfidence: modules.length > 0
258
+ ? parseFloat((totalConfidence / modules.length).toFixed(3))
259
+ : 0,
260
+ modulesProcessed: modules.length,
261
+ };
262
+
263
+ if (validate) {
264
+ reconstructionSummary.validation = validationResults;
265
+ reconstructionSummary.allValid = validationResults.every((v) => v.syntaxValid);
266
+ reconstructionSummary.allEquivalent = validationResults.every((v) => v.functionallyEquivalent);
267
+ }
268
+ }
269
+
270
+ return {
271
+ modules,
272
+ metrics: {
273
+ source: sourceMetrics,
274
+ modules: moduleMetrics,
275
+ unclassifiedStatements: unclassified.length,
276
+ },
277
+ witness: witnessChain,
278
+ beautifiedSource: beautified,
279
+ ...(reconstructionSummary ? { reconstruction: reconstructionSummary } : {}),
280
+ };
281
+ }
282
+
283
+ /**
284
+ * Decompile an npm package.
285
+ *
286
+ * @param {string} packageName - e.g. 'express', '@anthropic-ai/claude-code'
287
+ * @param {string} [version] - defaults to 'latest'
288
+ * @param {object} [options]
289
+ * @param {number} [options.minConfidence=0.3]
290
+ * @param {boolean} [options.witness=true]
291
+ * @returns {Promise<{modules: object[], metrics: object, witness: object|null, packageInfo: object, bundlePath: string, source: string}>}
292
+ */
293
+ async function decompilePackage(packageName, version, options = {}) {
294
+ const info = await fetchPackageInfo(packageName);
295
+ const resolvedVersion = version || info.latest;
296
+
297
+ if (!info.versions.includes(resolvedVersion)) {
298
+ throw new Error(
299
+ `Version "${resolvedVersion}" not found for ${packageName}. ` +
300
+ `Available: ${info.versions.slice(0, 10).join(', ')}...`,
301
+ );
302
+ }
303
+
304
+ const files = await fetchPackageFileList(packageName, resolvedVersion);
305
+ const pkgJson = info.packageJson || {};
306
+ const bundlePath = findMainBundle(files, pkgJson);
307
+
308
+ if (!bundlePath) {
309
+ throw new Error(
310
+ `Could not find main bundle for ${packageName}@${resolvedVersion}. ` +
311
+ `Files: ${files.slice(0, 10).map((f) => f.name).join(', ')}`,
312
+ );
313
+ }
314
+
315
+ const source = await fetchFileContent(packageName, resolvedVersion, bundlePath);
316
+ const result = decompileSource(source, options);
317
+
318
+ return {
319
+ ...result,
320
+ packageInfo: {
321
+ name: info.name,
322
+ version: resolvedVersion,
323
+ description: info.description,
324
+ bundlePath,
325
+ bundleSize: source.length,
326
+ },
327
+ source,
328
+ };
329
+ }
330
+
331
+ /**
332
+ * Decompile a local JavaScript file.
333
+ *
334
+ * @param {string} filePath - path to a .js file
335
+ * @param {object} [options]
336
+ * @returns {{modules: object[], metrics: object, witness: object|null, filePath: string, source: string}}
337
+ */
338
+ function decompileFile(filePath, options = {}) {
339
+ const resolved = path.resolve(filePath);
340
+
341
+ if (!fs.existsSync(resolved)) {
342
+ throw new Error(`File not found: ${resolved}`);
343
+ }
344
+
345
+ const source = fs.readFileSync(resolved, 'utf-8');
346
+ const result = decompileSource(source, { ...options, filePath: resolved });
347
+
348
+ return {
349
+ ...result,
350
+ filePath: resolved,
351
+ source,
352
+ };
353
+ }
354
+
355
+ /**
356
+ * Decompile JavaScript from a URL.
357
+ *
358
+ * @param {string} url
359
+ * @param {object} [options]
360
+ * @returns {Promise<{modules: object[], metrics: object, witness: object|null, url: string, source: string}>}
361
+ */
362
+ async function decompileUrl(url, options = {}) {
363
+ const resp = await fetch(url, { redirect: 'follow' });
364
+ if (!resp.ok) {
365
+ throw new Error(`Failed to fetch ${url} (HTTP ${resp.status})`);
366
+ }
367
+
368
+ const source = await resp.text();
369
+ const result = decompileSource(source, options);
370
+
371
+ return {
372
+ ...result,
373
+ url,
374
+ source,
375
+ };
376
+ }
377
+
378
+ /**
379
+ * Write decompilation results to an output directory.
380
+ *
381
+ * @param {object} result - decompilation result from any of the decompile* functions
382
+ * @param {string} outputDir
383
+ * @param {string} [format='modules'] - 'modules', 'single', 'json'
384
+ */
385
+ function writeOutput(result, outputDir, format = 'modules') {
386
+ fs.mkdirSync(outputDir, { recursive: true });
387
+
388
+ if (format === 'json') {
389
+ const jsonResult = {
390
+ modules: result.modules.map((m) => ({
391
+ name: m.name,
392
+ fragments: m.fragments,
393
+ confidence: m.confidence,
394
+ content: m.content,
395
+ })),
396
+ metrics: result.metrics,
397
+ witness: result.witness,
398
+ packageInfo: result.packageInfo || null,
399
+ };
400
+ fs.writeFileSync(
401
+ path.join(outputDir, 'decompiled.json'),
402
+ JSON.stringify(jsonResult, null, 2),
403
+ );
404
+ return;
405
+ }
406
+
407
+ if (format === 'single') {
408
+ let output = '';
409
+ for (const mod of result.modules) {
410
+ output += `// ─── Module: ${mod.name} (confidence: ${mod.confidence}) ───\n\n`;
411
+ output += mod.content + '\n\n';
412
+ }
413
+ fs.writeFileSync(path.join(outputDir, 'decompiled.js'), output);
414
+ return;
415
+ }
416
+
417
+ // Default: 'modules' format — one file per module
418
+ // Supports hierarchical module names like 'tools/bash' -> tools/bash.js
419
+ for (let i = 0; i < result.modules.length; i++) {
420
+ const mod = result.modules[i];
421
+ const header = `// Module: ${mod.name}\n// Confidence: ${mod.confidence}\n// Fragments: ${mod.fragments}\n\n`;
422
+
423
+ if (mod.name.includes('/')) {
424
+ // Hierarchical: create subdirectories
425
+ const filePath = path.join(outputDir, mod.name + '.js');
426
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
427
+ fs.writeFileSync(filePath, header + mod.content);
428
+ } else {
429
+ const idx = String(i + 1).padStart(3, '0');
430
+ const fileName = `module-${idx}-${mod.name}.js`;
431
+ fs.writeFileSync(path.join(outputDir, fileName), header + mod.content);
432
+ }
433
+ }
434
+
435
+ // Metrics
436
+ fs.writeFileSync(
437
+ path.join(outputDir, 'metrics.json'),
438
+ JSON.stringify(result.metrics, null, 2),
439
+ );
440
+
441
+ // Witness chain
442
+ if (result.witness) {
443
+ fs.writeFileSync(
444
+ path.join(outputDir, 'witness.json'),
445
+ JSON.stringify(result.witness, null, 2),
446
+ );
447
+ }
448
+ }
449
+
450
+ module.exports = {
451
+ decompilePackage,
452
+ decompileFile,
453
+ decompileUrl,
454
+ decompileSource,
455
+ writeOutput,
456
+ beautify,
457
+ parseTarget,
458
+ verifyWitnessChain,
459
+ reconstructCode,
460
+ reconstructRunnable,
461
+ validateReconstruction,
462
+ tryWasmDecompiler,
463
+ };
@@ -0,0 +1,86 @@
1
+ /**
2
+ * metrics.js - Code metrics extraction from JavaScript source.
3
+ *
4
+ * Computes structural metrics: function count, class count,
5
+ * declaration count, line count, async patterns, etc.
6
+ */
7
+
8
+ 'use strict';
9
+
10
+ /**
11
+ * Compute code metrics for a JavaScript source string.
12
+ *
13
+ * @param {string} source - JavaScript source code
14
+ * @returns {{
15
+ * lines: number,
16
+ * sizeBytes: number,
17
+ * functions: number,
18
+ * asyncFunctions: number,
19
+ * arrowFunctions: number,
20
+ * classes: number,
21
+ * classExtensions: number,
22
+ * constDeclarations: number,
23
+ * letDeclarations: number,
24
+ * varDeclarations: number,
25
+ * imports: number,
26
+ * exports: number,
27
+ * requires: number,
28
+ * awaitExpressions: number,
29
+ * promiseUsages: number,
30
+ * tryBlocks: number,
31
+ * throwStatements: number,
32
+ * regexLiterals: number
33
+ * }}
34
+ */
35
+ function computeMetrics(source) {
36
+ const count = (pattern) => (source.match(pattern) || []).length;
37
+
38
+ return {
39
+ lines: source.split('\n').length,
40
+ sizeBytes: Buffer.byteLength(source, 'utf-8'),
41
+ functions: count(/function\s*\w*\s*\(/g),
42
+ asyncFunctions: count(/async\s+function/g),
43
+ arrowFunctions: count(/=>/g),
44
+ classes: count(/class\s+\w+/g),
45
+ classExtensions: count(/extends\s+\w+/g),
46
+ constDeclarations: count(/\bconst\s+/g),
47
+ letDeclarations: count(/\blet\s+/g),
48
+ varDeclarations: count(/\bvar\s+/g),
49
+ imports: count(/\bimport\s+/g),
50
+ exports: count(/\bexport\s+/g),
51
+ requires: count(/\brequire\s*\(/g),
52
+ awaitExpressions: count(/\bawait\s+/g),
53
+ promiseUsages: count(/\bPromise\b/g),
54
+ tryBlocks: count(/\btry\s*\{/g),
55
+ throwStatements: count(/\bthrow\s+/g),
56
+ regexLiterals: count(/\/[^/\n]+\/[gimsuy]*/g),
57
+ };
58
+ }
59
+
60
+ /**
61
+ * Compute a summary of metrics across multiple modules.
62
+ *
63
+ * @param {Array<{name: string, content: string}>} modules
64
+ * @returns {{moduleCount: number, totalLines: number, totalBytes: number, perModule: object[]}}
65
+ */
66
+ function computeModuleMetrics(modules) {
67
+ const perModule = modules.map((mod) => ({
68
+ name: mod.name,
69
+ ...computeMetrics(mod.content),
70
+ }));
71
+
72
+ const totalLines = perModule.reduce((sum, m) => sum + m.lines, 0);
73
+ const totalBytes = perModule.reduce((sum, m) => sum + m.sizeBytes, 0);
74
+
75
+ return {
76
+ moduleCount: modules.length,
77
+ totalLines,
78
+ totalBytes,
79
+ perModule,
80
+ };
81
+ }
82
+
83
+ module.exports = {
84
+ computeMetrics,
85
+ computeModuleMetrics,
86
+ };