ruvector 0.2.19 → 0.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,499 @@
1
+ /**
2
+ * reconstructor.js - Readable reconstruction pipeline.
3
+ *
4
+ * Takes beautified minified JS and produces human-readable code by
5
+ * renaming variables, adding comments, and reconstructing structure
6
+ * using pattern inference and contextual analysis.
7
+ */
8
+
9
+ 'use strict';
10
+
11
+ const {
12
+ findMinifiedIdentifiers,
13
+ extractContext,
14
+ applyAllRenames,
15
+ } = require('./reference-tracker');
16
+ const {
17
+ predictName,
18
+ inferParamName,
19
+ } = require('./name-predictor');
20
+ const {
21
+ improveReadability,
22
+ generateJSDoc,
23
+ } = require('./style-improver');
24
+
25
+ /**
26
+ * Run the full reconstruction pipeline on a source string.
27
+ *
28
+ * @param {string} source - beautified JavaScript source
29
+ * @param {object} [options]
30
+ * @param {string} [options.modelPath] - path to ONNX model (optional)
31
+ * @param {string} [options.patternPath] - path to patterns JSON
32
+ * @param {boolean} [options.propagateNames=true] - rename all references
33
+ * @param {boolean} [options.addComments=true] - add JSDoc comments
34
+ * @param {boolean} [options.improveStyle=true] - apply style improvements
35
+ * @param {number} [options.minConfidence=0.3] - minimum confidence for renames
36
+ * @param {number} [options.maxRenames=500] - safety limit on number of renames
37
+ * @returns {{code: string, renames: Array<{original: string, newName: string, confidence: number, source: string}>, comments: number, confidence: number}}
38
+ */
39
+ function reconstructCode(source, options = {}) {
40
+ const {
41
+ patternPath,
42
+ propagateNames = true,
43
+ addComments = true,
44
+ improveStyle = true,
45
+ minConfidence = 0.3,
46
+ maxRenames = 500,
47
+ } = options;
48
+
49
+ // Phase 1: Find all minified identifiers
50
+ const minifiedIds = findMinifiedIdentifiers(source);
51
+
52
+ // Phase 2: Extract context and predict names for each
53
+ const renames = [];
54
+ const usedNames = new Set();
55
+
56
+ for (const id of minifiedIds) {
57
+ if (renames.length >= maxRenames) break;
58
+
59
+ const context = extractContext(source, id);
60
+ const declaration = findDeclaration(source, id);
61
+
62
+ const prediction = predictName(id, context, {
63
+ declaration,
64
+ patternPath,
65
+ minConfidence,
66
+ });
67
+
68
+ if (prediction) {
69
+ // Ensure uniqueness: append a suffix if the name is already used
70
+ let finalName = prediction.name;
71
+ if (usedNames.has(finalName) || minifiedIds.includes(finalName)) {
72
+ finalName = deduplicateName(finalName, usedNames);
73
+ }
74
+ usedNames.add(finalName);
75
+
76
+ renames.push({
77
+ original: id,
78
+ newName: finalName,
79
+ confidence: prediction.confidence,
80
+ source: prediction.source,
81
+ type: prediction.type || null,
82
+ });
83
+ }
84
+ }
85
+
86
+ // Phase 3: Apply renames (propagate through all references)
87
+ let code = source;
88
+ if (propagateNames && renames.length > 0) {
89
+ code = applyAllRenames(
90
+ code,
91
+ renames.map((r) => ({ oldName: r.original, newName: r.newName })),
92
+ );
93
+ }
94
+
95
+ // Phase 4: Apply style improvements
96
+ if (improveStyle) {
97
+ code = improveReadability(code);
98
+ }
99
+
100
+ // Phase 5: Add JSDoc comments
101
+ let commentsAdded = 0;
102
+ if (addComments) {
103
+ const result = addJSDocComments(code, renames);
104
+ code = result.code;
105
+ commentsAdded = result.count;
106
+ }
107
+
108
+ // Phase 6: Convert var declarations intelligently
109
+ code = upgradeVarDeclarations(code);
110
+
111
+ // Compute overall confidence
112
+ const avgConfidence =
113
+ renames.length > 0
114
+ ? renames.reduce((sum, r) => sum + r.confidence, 0) / renames.length
115
+ : 0;
116
+
117
+ return {
118
+ code,
119
+ renames,
120
+ comments: commentsAdded,
121
+ confidence: parseFloat(avgConfidence.toFixed(3)),
122
+ };
123
+ }
124
+
125
+ /**
126
+ * Find the declaration statement for an identifier.
127
+ * Looks for var/let/const/function/class declarations.
128
+ *
129
+ * @param {string} source
130
+ * @param {string} identifier
131
+ * @returns {string} the declaration line, or empty string
132
+ */
133
+ function findDeclaration(source, identifier) {
134
+ const escaped = identifier.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
135
+
136
+ // Try common declaration patterns
137
+ const patterns = [
138
+ new RegExp(`(?:var|let|const)\\s+${escaped}\\s*=([^;]{0,200})`, 'm'),
139
+ // Comma-separated: let X=..., IDENT=value
140
+ new RegExp(`(?:var|let|const)\\s+[^;]*,\\s*${escaped}\\s*=([^;,]{0,200})`, 'm'),
141
+ new RegExp(`function\\s+${escaped}\\s*\\([^)]*\\)`, 'm'),
142
+ new RegExp(`async\\s+function\\s*\\*?\\s+${escaped}`, 'm'),
143
+ new RegExp(`class\\s+${escaped}`, 'm'),
144
+ new RegExp(`${escaped}\\s*=\\s*(?:async\\s+)?function\\s*\\*?\\s*\\([^)]*\\)`, 'm'),
145
+ new RegExp(`${escaped}\\s*=\\s*(?:async\\s+)?\\([^)]*\\)\\s*=>`, 'm'),
146
+ // Function parameter: function name(IDENT, ...) or function*(IDENT)
147
+ new RegExp(`function\\s*\\*?\\s*\\w*\\s*\\([^)]*\\b${escaped}\\b[^)]*\\)`, 'm'),
148
+ // For-of: for await (let IDENT of expr)
149
+ new RegExp(`for\\s*(?:await)?\\s*\\(\\s*(?:let|const|var)\\s+${escaped}\\s+of\\s+([^)]{1,100})\\)`, 'm'),
150
+ ];
151
+
152
+ for (const re of patterns) {
153
+ const match = source.match(re);
154
+ if (match) {
155
+ // Return up to 300 chars of context around the declaration
156
+ const start = Math.max(0, match.index - 50);
157
+ const end = Math.min(source.length, match.index + match[0].length + 200);
158
+ return source.substring(start, end);
159
+ }
160
+ }
161
+
162
+ return '';
163
+ }
164
+
165
+ /**
166
+ * Add JSDoc comments before function and class declarations.
167
+ *
168
+ * @param {string} code
169
+ * @param {Array<{original: string, newName: string}>} renames
170
+ * @returns {{code: string, count: number}}
171
+ */
172
+ function addJSDocComments(code, renames) {
173
+ const lines = code.split('\n');
174
+ const result = [];
175
+ let count = 0;
176
+
177
+ for (let i = 0; i < lines.length; i++) {
178
+ const trimmed = lines[i].trim();
179
+
180
+ // Check if this line is a function or class declaration
181
+ const isDecl =
182
+ /^(async\s+)?function\s*\*?\s+\w+/.test(trimmed) ||
183
+ /^(export\s+)?(default\s+)?(async\s+)?function/.test(trimmed) ||
184
+ /^class\s+\w+/.test(trimmed) ||
185
+ /^(const|let|var)\s+\w+\s*=\s*(async\s+)?function/.test(trimmed) ||
186
+ /^(const|let|var)\s+\w+\s*=\s*async\s+function\s*\*/.test(trimmed);
187
+
188
+ if (isDecl) {
189
+ // Check if there is already a JSDoc comment above
190
+ const prevLine = i > 0 ? lines[i - 1].trim() : '';
191
+ const hasPrevDoc = prevLine.endsWith('*/') || prevLine.startsWith('/**') || prevLine.startsWith('*');
192
+
193
+ if (!hasPrevDoc) {
194
+ // Collect context from surrounding lines
195
+ const contextWindow = lines
196
+ .slice(Math.max(0, i - 2), Math.min(lines.length, i + 15))
197
+ .join('\n');
198
+ const contextStrings = extractContextFromCode(contextWindow);
199
+
200
+ const jsdoc = generateJSDoc(trimmed, contextStrings, { renames });
201
+ if (jsdoc) {
202
+ const indent = lines[i].match(/^(\s*)/)[1];
203
+ const indented = jsdoc
204
+ .split('\n')
205
+ .map((l) => indent + l)
206
+ .join('\n');
207
+ result.push(indented);
208
+ count++;
209
+ }
210
+ }
211
+ }
212
+
213
+ result.push(lines[i]);
214
+ }
215
+
216
+ return { code: result.join('\n'), count };
217
+ }
218
+
219
+ /**
220
+ * Extract context strings from a code snippet (string literals, property accesses).
221
+ *
222
+ * @param {string} code
223
+ * @returns {string[]}
224
+ */
225
+ function extractContextFromCode(code) {
226
+ const contexts = [];
227
+
228
+ // String literals
229
+ const strings = code.match(/["']([^"']{2,60})["']/g) || [];
230
+ for (const s of strings) {
231
+ contexts.push(s.replace(/^["']|["']$/g, ''));
232
+ }
233
+
234
+ // Property accesses
235
+ const props = code.match(/\.([a-zA-Z_]\w{1,30})/g) || [];
236
+ for (const p of props) {
237
+ contexts.push(p);
238
+ }
239
+
240
+ // Keywords that give semantic hints
241
+ const keywords = ['yield', 'await', 'return', 'throw', 'catch', 'for', 'if', 'switch'];
242
+ for (const kw of keywords) {
243
+ if (code.includes(kw)) contexts.push(kw);
244
+ }
245
+
246
+ return contexts;
247
+ }
248
+
249
+ /**
250
+ * Upgrade var declarations to const/let based on usage.
251
+ * - var x = ... with no reassignment -> const x = ...
252
+ * - var x = ... with reassignment -> let x = ...
253
+ *
254
+ * @param {string} code
255
+ * @returns {string}
256
+ */
257
+ function upgradeVarDeclarations(code) {
258
+ // Find all var declarations
259
+ const varPattern = /\bvar\s+([a-zA-Z_$]\w*)\s*=/g;
260
+ const declarations = [];
261
+ let match;
262
+
263
+ while ((match = varPattern.exec(code)) !== null) {
264
+ declarations.push({ name: match[1], index: match.index });
265
+ }
266
+
267
+ if (declarations.length === 0) return code;
268
+
269
+ // For each var declaration, check if the variable is reassigned
270
+ let result = code;
271
+ const replacements = [];
272
+
273
+ for (const decl of declarations) {
274
+ const name = decl.name;
275
+ const escaped = name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
276
+
277
+ // Check for reassignment: name = (but not == or ===)
278
+ const reassignPattern = new RegExp(
279
+ `(?<!var\\s)(?<!let\\s)(?<!const\\s)\\b${escaped}\\s*=[^=]`,
280
+ 'g',
281
+ );
282
+ const allAssigns = [...result.matchAll(reassignPattern)];
283
+
284
+ // Filter out the declaration itself
285
+ const reassignments = allAssigns.filter(
286
+ (m) => m.index !== decl.index && !result.substring(m.index - 10, m.index).includes('var'),
287
+ );
288
+
289
+ // Also check for ++ -- += -= etc.
290
+ const mutationPattern = new RegExp(`\\b${escaped}\\s*(?:\\+\\+|--|\\+=|-=|\\*=|/=)`, 'g');
291
+ const mutations = [...result.matchAll(mutationPattern)];
292
+
293
+ const isReassigned = reassignments.length > 0 || mutations.length > 0;
294
+ replacements.push({
295
+ index: decl.index,
296
+ replacement: isReassigned ? 'let' : 'const',
297
+ });
298
+ }
299
+
300
+ // Apply replacements from end to start
301
+ replacements.sort((a, b) => b.index - a.index);
302
+ for (const rep of replacements) {
303
+ result = result.substring(0, rep.index) + rep.replacement + result.substring(rep.index + 3);
304
+ }
305
+
306
+ return result;
307
+ }
308
+
309
+ /**
310
+ * Deduplicate a name by appending a numeric suffix.
311
+ *
312
+ * @param {string} name
313
+ * @param {Set<string>} usedNames
314
+ * @returns {string}
315
+ */
316
+ function deduplicateName(name, usedNames) {
317
+ let suffix = 2;
318
+ let candidate = `${name}${suffix}`;
319
+ while (usedNames.has(candidate)) {
320
+ suffix++;
321
+ candidate = `${name}${suffix}`;
322
+ }
323
+ return candidate;
324
+ }
325
+
326
+ /**
327
+ * Runnable reconstruction — applies renames one at a time, validating
328
+ * each one. Trades completeness for correctness: the output is
329
+ * guaranteed to parse and produce the same exports as the original.
330
+ *
331
+ * @param {string} source - beautified JavaScript source
332
+ * @param {object} [options]
333
+ * @param {string} [options.patternPath] - path to patterns JSON
334
+ * @param {boolean} [options.addComments=true] - add JSDoc comments
335
+ * @param {number} [options.minConfidence=0.3]
336
+ * @param {number} [options.timeoutMs=1000] - VM timeout for equivalence checks
337
+ * @returns {{code: string, appliedRenames: Array, rejectedRenames: Array, runnable: boolean, stats: object}}
338
+ */
339
+ function reconstructRunnable(source, options = {}) {
340
+ const {
341
+ patternPath,
342
+ addComments = true,
343
+ minConfidence = 0.3,
344
+ timeoutMs = 1000,
345
+ } = options;
346
+
347
+ const vm = require('vm');
348
+
349
+ // Helper: check syntax validity
350
+ function isSyntacticallyValid(code) {
351
+ try {
352
+ new Function(code);
353
+ return true;
354
+ } catch {
355
+ return false;
356
+ }
357
+ }
358
+
359
+ // Helper: check functional equivalence via sandboxed VM
360
+ function isFunctionallyEquivalent(original, modified) {
361
+ try {
362
+ const makeSandbox = () => ({
363
+ module: { exports: {} },
364
+ exports: {},
365
+ require: () => ({}),
366
+ console: { log() {}, error() {}, warn() {}, info() {} },
367
+ process: { env: {}, argv: [], cwd: () => '/' },
368
+ setTimeout: () => {},
369
+ setInterval: () => {},
370
+ clearTimeout: () => {},
371
+ clearInterval: () => {},
372
+ Buffer: { from: () => Buffer.alloc(0), alloc: () => Buffer.alloc(0) },
373
+ global: {},
374
+ __dirname: '/',
375
+ __filename: '/test.js',
376
+ });
377
+
378
+ const origCtx = vm.createContext(makeSandbox());
379
+ const modCtx = vm.createContext(makeSandbox());
380
+
381
+ vm.runInContext(original, origCtx, { timeout: timeoutMs });
382
+ vm.runInContext(modified, modCtx, { timeout: timeoutMs });
383
+
384
+ const origKeys = JSON.stringify(Object.keys(origCtx.module.exports || {}).sort());
385
+ const modKeys = JSON.stringify(Object.keys(modCtx.module.exports || {}).sort());
386
+
387
+ return origKeys === modKeys;
388
+ } catch {
389
+ return false;
390
+ }
391
+ }
392
+
393
+ // 1. Collect all candidate renames
394
+ const minifiedIds = findMinifiedIdentifiers(source);
395
+ const candidates = [];
396
+
397
+ for (const id of minifiedIds) {
398
+ const context = extractContext(source, id);
399
+ const declaration = findDeclaration(source, id);
400
+ const prediction = predictName(id, context, {
401
+ declaration,
402
+ patternPath,
403
+ minConfidence,
404
+ });
405
+ if (prediction) {
406
+ candidates.push({
407
+ original: id,
408
+ inferred: prediction.name,
409
+ confidence: prediction.confidence,
410
+ source: prediction.source,
411
+ });
412
+ }
413
+ }
414
+
415
+ // 2. Sort by confidence (highest first)
416
+ candidates.sort((a, b) => b.confidence - a.confidence);
417
+
418
+ // 3. Apply renames one at a time, validating each
419
+ let current = source;
420
+ const appliedRenames = [];
421
+ const rejectedRenames = [];
422
+ const usedNames = new Set();
423
+
424
+ for (const candidate of candidates) {
425
+ let inferredName = candidate.inferred;
426
+ if (usedNames.has(inferredName)) {
427
+ inferredName = deduplicateName(inferredName, usedNames);
428
+ }
429
+
430
+ const { applyRename: doRename } = require('./reference-tracker');
431
+ const attempt = doRename(current, candidate.original, inferredName);
432
+
433
+ if (isSyntacticallyValid(attempt)) {
434
+ if (isFunctionallyEquivalent(source, attempt)) {
435
+ current = attempt;
436
+ usedNames.add(inferredName);
437
+ appliedRenames.push({ ...candidate, inferred: inferredName });
438
+ } else {
439
+ rejectedRenames.push({ ...candidate, reason: 'breaks behavior' });
440
+ }
441
+ } else {
442
+ rejectedRenames.push({ ...candidate, reason: 'syntax error' });
443
+ }
444
+ }
445
+
446
+ // 4. Apply safe style fixes (semantic equivalents, always safe)
447
+ current = applySafeStyleFixes(current);
448
+
449
+ // 5. Upgrade var -> const/let
450
+ current = upgradeVarDeclarations(current);
451
+
452
+ // 6. Add JSDoc comments (does not affect execution)
453
+ let commentsAdded = 0;
454
+ if (addComments) {
455
+ const result = addJSDocComments(current, appliedRenames);
456
+ current = result.code;
457
+ commentsAdded = result.count;
458
+ }
459
+
460
+ return {
461
+ code: current,
462
+ appliedRenames,
463
+ rejectedRenames,
464
+ runnable: true,
465
+ comments: commentsAdded,
466
+ stats: {
467
+ totalCandidates: candidates.length,
468
+ applied: appliedRenames.length,
469
+ rejected: rejectedRenames.length,
470
+ successRate: candidates.length > 0
471
+ ? parseFloat((appliedRenames.length / candidates.length).toFixed(3))
472
+ : 1,
473
+ },
474
+ };
475
+ }
476
+
477
+ /**
478
+ * Apply only the style fixes that are guaranteed semantic equivalents.
479
+ * These never change behavior: !0===true, !1===false, void 0===undefined.
480
+ *
481
+ * @param {string} code
482
+ * @returns {string}
483
+ */
484
+ function applySafeStyleFixes(code) {
485
+ return code
486
+ .replace(/(?<![a-zA-Z0-9_$"'`])!0(?![a-zA-Z0-9_$])/g, 'true')
487
+ .replace(/(?<![a-zA-Z0-9_$"'`])!1(?![a-zA-Z0-9_$])/g, 'false')
488
+ .replace(/\bvoid 0\b/g, 'undefined');
489
+ }
490
+
491
+ module.exports = {
492
+ reconstructCode,
493
+ reconstructRunnable,
494
+ findDeclaration,
495
+ addJSDocComments,
496
+ extractContextFromCode,
497
+ upgradeVarDeclarations,
498
+ applySafeStyleFixes,
499
+ };