depyo 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,6 +32,9 @@ node depyo.js --out /path/to/file.pyc
32
32
  # Marshal-only blob (no .pyc header)
33
33
  node depyo.js --marshal --py-version 3.11 /path/to/blob.bin
34
34
  node depyo.js --marshal /path/to/blob.bin
35
+
36
+ # Fast marshal scan (no decompile)
37
+ node depyo.js --marshal-scan /path/to/blob.bin
35
38
  ```
36
39
  Without `--py-version`, depyo scans supported versions (oldest → newest) and accepts the first clean output when all clean candidates agree. If outputs diverge (ambiguous), it stops and asks for `--py-version`. Use `--debug` to see scan results.
37
40
 
@@ -45,6 +48,7 @@ Without `--py-version`, depyo scans supported versions (oldest → newest) and a
45
48
  - `--skip-path` flatten output paths (write next to input)
46
49
  - `--out` print source to stdout instead of files
47
50
  - `--marshal` treat input as raw marshalled data (no .pyc header, auto-scan versions)
51
+ - `--marshal-scan` fast scan marshal blobs and print version candidates
48
52
  - `--py-version <x.y>` bytecode version hint (use with `--marshal`)
49
53
  - `--basedir <dir>` override output root (default: alongside input)
50
54
  - `--file-ext <ext>` change emitted extension (default `py`)
package/depyo.js CHANGED
@@ -21,6 +21,8 @@ global.g_cliArgs = {
21
21
  skipPath: false,
22
22
  sendToStdout: false,
23
23
  marshal: false,
24
+ marshalScan: false,
25
+ strict: false,
24
26
  pyVersion: null,
25
27
  silent: false,
26
28
  fileExt: 'py',
@@ -28,11 +30,14 @@ global.g_cliArgs = {
28
30
  filenames: []
29
31
  };
30
32
 
33
+ let g_dirtyFiles = []; // Files where decompiler caught at least one opcode exception.
34
+
31
35
  let g_totalInThroughput = 0;
32
36
  let g_totalOutThroughput = 0;
33
37
  let g_totalExecTime = 0;
34
38
  let g_totalFiles = 0;
35
39
  let g_pyVersionInfo = null;
40
+ let g_marshalScanStats = {ok: 0, ambiguous: 0, failed: 0};
36
41
 
37
42
  function printUsage() {
38
43
  console.log(`Usage: node depyo.js [options] <file.pyc|archive.zip> [...]
@@ -48,6 +53,8 @@ Options:
48
53
  --skip-path Flatten output paths (write files next to inputs)
49
54
  --out Print decompiled source to stdout instead of files
50
55
  --marshal Treat input as raw marshalled data (no .pyc header)
56
+ --marshal-scan Fast scan of marshal blobs (no decompile, prints version)
57
+ --strict Re-throw on first opcode handler exception (default: log + continue + non-zero exit)
51
58
  --py-version <x.y> Python bytecode version hint (auto-scan if omitted)
52
59
  --basedir <path> Output base directory (default: alongside input)
53
60
  --file-ext <ext> Extension for generated source (default: py)
@@ -77,6 +84,11 @@ function parseCLIParams() {
77
84
  g_cliArgs.sendToStdout = true;
78
85
  } else if (cliParam.toLowerCase() == "--marshal") {
79
86
  g_cliArgs.marshal = true;
87
+ } else if (cliParam.toLowerCase() == "--marshal-scan" || cliParam.toLowerCase() == "--marshal-smoke") {
88
+ g_cliArgs.marshalScan = true;
89
+ g_cliArgs.marshal = true;
90
+ } else if (cliParam.toLowerCase() == "--strict") {
91
+ g_cliArgs.strict = true;
80
92
  } else if (cliParam.toLowerCase() == "--py-version") {
81
93
  g_cliArgs.pyVersion = process.argv[++idx];
82
94
  } else if (cliParam.toLowerCase() == "--basedir") {
@@ -103,6 +115,44 @@ function normalizeMarshalOutput(src) {
103
115
  .trim();
104
116
  }
105
117
 
118
+ function scanMarshalBuffer(buffer, filenameLabel) {
119
+ if (g_pyVersionInfo) {
120
+ const trial = PycReader.TryParseMarshal(buffer, g_pyVersionInfo);
121
+ if (!trial) {
122
+ g_marshalScanStats.failed++;
123
+ console.log(`${filenameLabel}: no parse with ${g_pyVersionInfo.major}.${g_pyVersionInfo.minor}`);
124
+ return;
125
+ }
126
+ g_marshalScanStats.ok++;
127
+ console.log(`${filenameLabel}: forced ${g_pyVersionInfo.major}.${g_pyVersionInfo.minor} unknown=${trial.unknown}/${trial.total} remaining=${trial.remaining}`);
128
+ return;
129
+ }
130
+
131
+ const results = PycReader.ScanMarshalCandidates(buffer);
132
+ if (!results.length) {
133
+ g_marshalScanStats.failed++;
134
+ console.log(`${filenameLabel}: no candidates`);
135
+ return;
136
+ }
137
+
138
+ const best = results[0];
139
+ const ambiguous = results.filter(r =>
140
+ r.unknown === best.unknown &&
141
+ r.remaining === best.remaining &&
142
+ r.unknownRatio === best.unknownRatio
143
+ );
144
+
145
+ if (ambiguous.length > 1) {
146
+ g_marshalScanStats.ambiguous++;
147
+ const versions = ambiguous.map(r => `${r.versionInfo.major}.${r.versionInfo.minor}`).join(', ');
148
+ console.log(`${filenameLabel}: ambiguous candidates (${versions})`);
149
+ return;
150
+ }
151
+
152
+ g_marshalScanStats.ok++;
153
+ console.log(`${filenameLabel}: best=${best.versionInfo.major}.${best.versionInfo.minor} unknown=${best.unknown}/${best.total} remaining=${best.remaining}`);
154
+ }
155
+
106
156
  function attemptMarshalDecompile(buffer, versionInfo, opts = {}) {
107
157
  const prevSilent = g_cliArgs.silent;
108
158
  const prevDebug = g_cliArgs.debug;
@@ -195,6 +245,11 @@ function decompilePycObject(data) {
195
245
  if (!Buffer.isBuffer(buffer)) {
196
246
  buffer = fs.readFileSync(data);
197
247
  }
248
+ if (g_cliArgs.marshalScan) {
249
+ const label = typeof data === 'string' ? data : '<buffer>';
250
+ scanMarshalBuffer(buffer, label);
251
+ return;
252
+ }
198
253
  let rdr = null;
199
254
  let pySrc = null;
200
255
  let genSecs = 0;
@@ -292,9 +347,33 @@ function decompilePycObject(data) {
292
347
  let genStartTS = process.hrtime.bigint();
293
348
  let decompiler = new PycDecompiler(obj);
294
349
  let ast = decompiler.decompile();
295
- let pycResult = ast.codeFragment();
296
- pySrc = pycResult.toString();
350
+ let renderError = null;
351
+ try {
352
+ let pycResult = ast.codeFragment();
353
+ pySrc = pycResult.toString();
354
+ } catch (ex) {
355
+ if (g_cliArgs.strict) throw ex;
356
+ renderError = ex;
357
+ decompiler.errors.push({
358
+ opcode: 'RENDER',
359
+ codeObject: obj?.Name?.toString?.() || '<root>',
360
+ message: ex.message,
361
+ stack: ex.stack
362
+ });
363
+ decompiler.cleanBuild = false;
364
+ pySrc = `# DECOMPILER ERROR: codeFragment() threw: ${ex.message}\n`;
365
+ if (!g_cliArgs.silent) {
366
+ console.error(`RENDER EXCEPTION in '${obj?.Name}': ${ex.message}`);
367
+ if (g_cliArgs.debug) console.error(ex.stack);
368
+ }
369
+ }
297
370
  genSecs = Number(process.hrtime.bigint() - genStartTS) / 1000000000;
371
+ if (!decompiler.cleanBuild) {
372
+ g_dirtyFiles.push({
373
+ file: typeof data === 'string' ? data : (obj?.FileName || '<buffer>'),
374
+ errors: decompiler.errors.length
375
+ });
376
+ }
298
377
  }
299
378
  if (!pySrc.endsWith("\n")) {
300
379
  pySrc += "\n";
@@ -369,8 +448,30 @@ g_baseDir = Path.resolve(baseInputDir, 'decompiled') + '/';
369
448
 
370
449
  DecompileModule(g_cliArgs.filenames);
371
450
 
451
+ if (g_cliArgs.marshalScan) {
452
+ console.log(`Marshal scan summary: ok=${g_marshalScanStats.ok}, ambiguous=${g_marshalScanStats.ambiguous}, failed=${g_marshalScanStats.failed}`);
453
+ if (g_marshalScanStats.failed > 0) {
454
+ process.exit(1);
455
+ }
456
+ if (g_marshalScanStats.ambiguous > 0) {
457
+ process.exit(2);
458
+ }
459
+ process.exit(0);
460
+ }
461
+
372
462
  if (!g_cliArgs.sendToStdout) {
373
463
  const inRate = (g_totalInThroughput / g_totalExecTime).toFixed(2);
374
464
  const outRate = (g_totalOutThroughput / g_totalExecTime).toFixed(2);
375
465
  console.log(`Processed ${g_totalFiles} files in ${g_totalExecTime.toFixed(3)}s. In: ${g_totalInThroughput} bytes (${inRate} B/s). Out: ${g_totalOutThroughput} bytes (${outRate} B/s).`);
376
466
  }
467
+
468
+ if (g_dirtyFiles.length > 0) {
469
+ console.error(`\nDirty decompile: ${g_dirtyFiles.length} file(s) had handler exceptions (output may be partial):`);
470
+ for (const d of g_dirtyFiles.slice(0, 20)) {
471
+ console.error(` - ${d.file} (${d.errors} opcode error${d.errors === 1 ? '' : 's'})`);
472
+ }
473
+ if (g_dirtyFiles.length > 20) {
474
+ console.error(` ... and ${g_dirtyFiles.length - 20} more`);
475
+ }
476
+ process.exit(1);
477
+ }
package/lib/OpCodes.js CHANGED
@@ -329,6 +329,9 @@ class OpCodes
329
329
  static LOAD_ZERO_SUPER_ATTR_A = 319; // Python 3.13+ zero-cost super attr
330
330
  static LOAD_ZERO_SUPER_METHOD_A = 320; // Python 3.13+ zero-cost super method
331
331
 
332
+ // Python 3.15 new opcodes
333
+ static TRACE_RECORD_A = 321; // Python 3.15 -> trace recording (ignore)
334
+
332
335
 
333
336
  // enum cmp_op
334
337
  // {
@@ -391,7 +394,7 @@ class OpCodes
391
394
 
392
395
  if (reader.versionCompare(3, 6) >= 0) {
393
396
  while (opCodeID == OpCodes.EXTENDED_ARG_A) {
394
- argument = argument | code[++opOffset] << 8;
397
+ argument = (argument << 8) | code[++opOffset];
395
398
  opCodeID = this.GetOpCodeID(code, ++opOffset);
396
399
 
397
400
  // Break if we hit end of bytecode
@@ -401,9 +404,15 @@ class OpCodes
401
404
  }
402
405
  argument <<= 8;
403
406
  } else {
407
+ // Pre-3.6: EXTENDED_ARG carries a 16-bit operand that becomes
408
+ // the upper 16 bits of the next instruction's argument. After
409
+ // reading both operand bytes we must advance opOffset one more
410
+ // step so the caller lands on the real opcode (not the trailing
411
+ // operand byte of EXTENDED_ARG).
404
412
  if (opCodeID == OpCodes.EXTENDED_ARG_A) {
405
413
  argument = code[++opOffset] | code[++opOffset] << 8;
406
414
  argument <<= 16;
415
+ opOffset++;
407
416
  }
408
417
  }
409
418
 
@@ -522,10 +531,18 @@ class OpCodes
522
531
  opCode.Name = this.CodeObject.Names.Value[opCode.Argument].toString();
523
532
  }
524
533
  } else if (opCode.HasFree) {
525
- if (opCode.Argument < this.CodeObject.CellVars.Value.length) {
526
- opCode.FreeName = this.CodeObject.CellVars.Value[opCode.Argument].toString();
527
- } else if ((opCode.Argument - this.CodeObject.CellVars.Value.length) < this.CodeObject.FreeVars.Value.length) {
528
- opCode.FreeName = this.CodeObject.FreeVars.Value[opCode.Argument - this.CodeObject.CellVars.Value.length].toString();
534
+ // 3.11+ stores cells/frees inside localsplus, so the opcode argument
535
+ // is an index into [locals | cells | frees]. Strip the locals prefix
536
+ // before looking up into the split CellVars/FreeVars tuples.
537
+ const isNewLayout = this.CodeObject.Reader?.versionCompare?.(3, 11) >= 0;
538
+ const localsLen = isNewLayout ? (this.CodeObject.VarNames?.Value?.length ?? 0) : 0;
539
+ const freeIdx = opCode.Argument - localsLen;
540
+ const cellLen = this.CodeObject.CellVars?.Value?.length ?? 0;
541
+ const freeLen = this.CodeObject.FreeVars?.Value?.length ?? 0;
542
+ if (freeIdx >= 0 && freeIdx < cellLen) {
543
+ opCode.FreeName = this.CodeObject.CellVars.Value[freeIdx].toString();
544
+ } else if (freeIdx >= cellLen && (freeIdx - cellLen) < freeLen) {
545
+ opCode.FreeName = this.CodeObject.FreeVars.Value[freeIdx - cellLen].toString();
529
546
  } else {
530
547
  opCode.FreeName = `##FREEVAR_${opCode.Argument}##`;
531
548
  }