js-confuser-vm 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +125 -28
  2. package/LICENSE +21 -21
  3. package/README.MD +370 -196
  4. package/babel-plugin-inline-runtime.cjs +34 -34
  5. package/babel.config.json +23 -23
  6. package/dist/build-runtime.js +53 -0
  7. package/dist/compiler.js +107 -117
  8. package/dist/runtime.js +78 -84
  9. package/dist/transforms/bytecode/macroOpcodes.js +152 -0
  10. package/dist/transforms/{resolveContants.js → bytecode/resolveContants.js} +16 -6
  11. package/dist/transforms/bytecode/resolveLabels.js +80 -0
  12. package/dist/transforms/{selfModifying.js → bytecode/selfModifying.js} +33 -33
  13. package/dist/transforms/bytecode/specializedOpcodes.js +103 -0
  14. package/dist/transforms/runtime/macroOpcodes.js +88 -0
  15. package/dist/transforms/runtime/minify.js +1 -0
  16. package/dist/transforms/runtime/shuffleOpcodes.js +20 -0
  17. package/dist/transforms/runtime/specializedOpcodes.js +102 -0
  18. package/dist/transforms/utils/op-utils.js +25 -0
  19. package/dist/{random.js → transforms/utils/random-utils.js} +3 -3
  20. package/dist/types.js +4 -2
  21. package/index.ts +34 -22
  22. package/jest-strip-types.js +10 -10
  23. package/jest.config.js +35 -28
  24. package/package.json +49 -48
  25. package/src/build-runtime.ts +57 -0
  26. package/src/compiler.ts +2069 -2066
  27. package/src/index.ts +14 -14
  28. package/src/minify.ts +21 -21
  29. package/src/options.ts +14 -12
  30. package/src/runtime.ts +771 -779
  31. package/src/transforms/bytecode/macroOpcodes.ts +177 -0
  32. package/src/transforms/bytecode/resolveContants.ts +62 -0
  33. package/src/transforms/bytecode/resolveLabels.ts +107 -0
  34. package/src/transforms/{selfModifying.ts → bytecode/selfModifying.ts} +37 -40
  35. package/src/transforms/bytecode/specializedOpcodes.ts +118 -0
  36. package/src/transforms/runtime/macroOpcodes.ts +111 -0
  37. package/src/transforms/runtime/minify.ts +1 -0
  38. package/src/transforms/runtime/shuffleOpcodes.ts +24 -0
  39. package/src/transforms/runtime/specializedOpcodes.ts +146 -0
  40. package/src/transforms/utils/op-utils.ts +26 -0
  41. package/src/{random.ts → transforms/utils/random-utils.ts} +31 -31
  42. package/src/types.ts +33 -24
  43. package/src/utilts.ts +3 -3
  44. package/tsconfig.json +12 -12
  45. package/dist/runtimeObf.js +0 -56
  46. package/dist/transforms/controlFlowFlattening.js +0 -22
  47. package/dist/transforms/resolveLabels.js +0 -59
  48. package/src/runtimeObf.ts +0 -62
  49. package/src/transforms/controlFlowFlattening.ts +0 -30
  50. package/src/transforms/resolveContants.ts +0 -42
  51. package/src/transforms/resolveLabels.ts +0 -83
package/dist/runtime.js CHANGED
@@ -12,8 +12,9 @@ function decodeBytecode(s) {
12
12
  var b = typeof Buffer !== "undefined" ? Buffer.from(s, "base64") : Uint8Array.from(atob(s), function (c) {
13
13
  return c.charCodeAt(0);
14
14
  });
15
- var r = new Int32Array(b.length / 4);
16
- for (var i = 0; i < r.length; i++) r[i] = b[i * 4] | b[i * 4 + 1] << 8 | b[i * 4 + 2] << 16 | b[i * 4 + 3] << 24;
15
+ // Each slot is a u16 stored as 2 little-endian bytes.
16
+ var r = new Uint16Array(b.length / 2);
17
+ for (var i = 0; i < r.length; i++) r[i] = b[i * 2] | b[i * 2 + 1] << 8;
17
18
  return r;
18
19
  }
19
20
 
@@ -85,22 +86,10 @@ VM.prototype.peek = function () {
85
86
  return this._stack[this._stack.length - 1];
86
87
  };
87
88
 
88
- // Read one instruction word from this.bytecode at `pc`, unwrapping the
89
- // encoding so callers always get a plain { op, operand } pair regardless
90
- // of whether ENCODE_BYTECODE is active.
91
- VM.prototype.readWord = function (pc) {
92
- var word = this.bytecode[pc];
93
- if (ENCODE_BYTECODE) {
94
- return {
95
- op: word & 0xff,
96
- operand: word >>> 8
97
- };
98
- } else {
99
- return {
100
- op: word[0],
101
- operand: word[1]
102
- };
103
- }
89
+ // Consume the next slot from the flat bytecode stream and advance the PC.
90
+ // Called by opcode handlers to read each of their operands in order.
91
+ VM.prototype._operand = function () {
92
+ return this.bytecode[this._currentFrame._pc++];
104
93
  };
105
94
  VM.prototype.captureUpvalue = function (frame, slot) {
106
95
  // Reuse existing open upvalue for this frame+slot if one exists.
@@ -128,47 +117,48 @@ VM.prototype.run = function () {
128
117
  var now = () => {
129
118
  return performance.now();
130
119
  };
131
- var t = now();
120
+ var lastTime = now();
132
121
  while (true) {
133
122
  var frame = this._currentFrame;
134
123
  var bc = this.bytecode;
135
124
  if (frame._pc >= bc.length) break;
136
- var op, operand;
137
- var word = this.readWord(frame._pc++);
138
- op = word.op;
139
- operand = word.operand;
125
+ var op = this.bytecode[frame._pc++];
140
126
 
141
- // console.log(frame._pc - 1, op, operand);
127
+ // console.log(frame._pc - 1, op);
142
128
 
143
- // Debugging protection
129
+ // Debugging protection: Detects debugger by checking for >1s pauses which can only happen from debugger; or extremely slow sync tasks
144
130
  if (TIMING_CHECKS) {
145
- var t2 = now();
146
- var isTamper = t2 - t > 1000;
147
- t = t2;
131
+ var currentTime = now();
132
+ var isTamper = currentTime - lastTime > 1000;
133
+ lastTime = currentTime;
148
134
  if (isTamper) {
135
+ // Poison the bytecode
136
+ for (var i = 0; i < this.bytecode.length; i++) this.bytecode[i] = 0;
137
+ // Break the current state
149
138
  op = OP.POP;
139
+ this._stack = [];
150
140
  }
151
141
  }
152
142
  try {
153
143
  /* @SWITCH */
154
144
  switch (op) {
155
145
  case OP.LOAD_CONST:
156
- this._push(this.constants[operand]);
146
+ this._push(this.constants[this._operand()]);
157
147
  break;
158
148
  case OP.LOAD_INT:
159
- this._push(operand);
149
+ this._push(this._operand());
160
150
  break;
161
151
  case OP.LOAD_LOCAL:
162
- this._push(frame.locals[operand]);
152
+ this._push(frame.locals[this._operand()]);
163
153
  break;
164
154
  case OP.STORE_LOCAL:
165
- frame.locals[operand] = this._pop();
155
+ frame.locals[this._operand()] = this._pop();
166
156
  break;
167
157
  case OP.LOAD_GLOBAL:
168
- this._push(this.globals[this.constants[operand]]);
158
+ this._push(this.globals[this.constants[this._operand()]]);
169
159
  break;
170
160
  case OP.STORE_GLOBAL:
171
- this.globals[this.constants[operand]] = this._pop();
161
+ this.globals[this.constants[this._operand()]] = this._pop();
172
162
  break;
173
163
  case OP.GET_PROP:
174
164
  {
@@ -352,45 +342,50 @@ VM.prototype.run = function () {
352
342
  break;
353
343
  }
354
344
  case OP.JUMP:
355
- frame._pc = operand;
345
+ frame._pc = this._operand();
356
346
  break;
357
347
  case OP.JUMP_IF_FALSE:
358
- if (!this._pop()) frame._pc = operand;
359
- break;
348
+ {
349
+ var target = this._operand();
350
+ if (!this._pop()) frame._pc = target;
351
+ break;
352
+ }
360
353
  case OP.JUMP_IF_TRUE_OR_POP:
361
- // || semantics: if truthy, we're done - leave value, jump over RHS.
362
- // If falsy, discard it and fall through to evaluate RHS.
363
- if (this.peek()) {
364
- frame._pc = operand;
365
- } else {
366
- this._pop();
354
+ {
355
+ // || semantics: if truthy, we're done - leave value, jump over RHS.
356
+ // If falsy, discard it and fall through to evaluate RHS.
357
+ var target = this._operand();
358
+ if (this.peek()) {
359
+ frame._pc = target;
360
+ } else {
361
+ this._pop();
362
+ }
363
+ break;
367
364
  }
368
- break;
369
365
  case OP.JUMP_IF_FALSE_OR_POP:
370
- // && semantics: if falsy, we're done - leave value, jump over RHS.
371
- // If truthy, discard it and fall through to evaluate RHS.
372
- if (!this.peek()) {
373
- frame._pc = operand;
374
- } else {
375
- this._pop();
366
+ {
367
+ // && semantics: if falsy, we're done - leave value, jump over RHS.
368
+ // If truthy, discard it and fall through to evaluate RHS.
369
+ var target = this._operand();
370
+ if (!this.peek()) {
371
+ frame._pc = target;
372
+ } else {
373
+ this._pop();
374
+ }
375
+ break;
376
376
  }
377
- break;
378
377
  case OP.MAKE_CLOSURE:
379
378
  {
380
- // operand = startPc: absolute index of the function body's first instruction.
381
- // Metadata is read from the value stack (pushed by _emitClosureMetadata).
382
- // Stack layout when we arrive here (top is rightmost):
383
- // [isLocal_0, idx_0, ..., isLocal_N-1, idx_N-1, uvCount, localCount, paramCount]
384
- var startPc = operand;
385
- var paramCount = this._pop();
386
- var localCount = this._pop();
387
- var uvCount = this._pop();
388
-
389
- // Upvalues were pushed in order 0..N-1 so we pop them in reverse.
379
+ // Inline operands: startPc, paramCount, localCount, uvCount,
380
+ // [isLocal_0, idx_0, isLocal_1, idx_1, ...]
381
+ var startPc = this._operand();
382
+ var paramCount = this._operand();
383
+ var localCount = this._operand();
384
+ var uvCount = this._operand();
390
385
  var uvDescs = new Array(uvCount);
391
- for (var i = uvCount - 1; i >= 0; i--) {
392
- var uvIndex = this._pop();
393
- var isLocalRaw = this._pop();
386
+ for (var i = 0; i < uvCount; i++) {
387
+ var isLocalRaw = this._operand();
388
+ var uvIndex = this._operand();
394
389
  uvDescs[i] = {
395
390
  isLocal: isLocalRaw,
396
391
  _index: uvIndex
@@ -434,19 +429,15 @@ VM.prototype.run = function () {
434
429
  this._push(shell);
435
430
  break;
436
431
  }
437
- case OP.DATA:
438
- // Should never appear in compiled output (reserved opcode slot).
439
- throw new Error("DATA opcode executed at pc " + (frame._pc - 1));
440
432
  case OP.LOAD_UPVALUE:
441
- this._push(frame.closure.upvalues[operand]._read());
433
+ this._push(frame.closure.upvalues[this._operand()]._read());
442
434
  break;
443
435
  case OP.STORE_UPVALUE:
444
- frame.closure.upvalues[operand]._write(this._pop());
436
+ frame.closure.upvalues[this._operand()]._write(this._pop());
445
437
  break;
446
438
  case OP.BUILD_ARRAY:
447
439
  {
448
- // Pop \`operand\` values off the stack in reverse, assemble array.
449
- var elems = this._stack.splice(this._stack.length - operand);
440
+ var elems = this._stack.splice(this._stack.length - this._operand());
450
441
  this._push(elems);
451
442
  break;
452
443
  }
@@ -454,7 +445,7 @@ VM.prototype.run = function () {
454
445
  {
455
446
  // Stack has: key0, val0, key1, val1 ... keyN, valN (pushed left->right)
456
447
  // Pop all pairs and build the object.
457
- var pairs = this._stack.splice(this._stack.length - operand * 2);
448
+ var pairs = this._stack.splice(this._stack.length - this._operand() * 2);
458
449
  var o = {};
459
450
  for (var i = 0; i < pairs.length; i += 2) {
460
451
  o[pairs[i]] = pairs[i + 1]; // key at even index, val at odd
@@ -495,7 +486,7 @@ VM.prototype.run = function () {
495
486
  }
496
487
  case OP.CALL:
497
488
  {
498
- var args = this._stack.splice(this._stack.length - operand);
489
+ var args = this._stack.splice(this._stack.length - this._operand());
499
490
  var callee = this._pop();
500
491
  if (callee && callee[CLOSURE_SYM]) {
501
492
  // VM closure - run directly in this VM, no sub-VM overhead
@@ -514,7 +505,7 @@ VM.prototype.run = function () {
514
505
  }
515
506
  case OP.CALL_METHOD:
516
507
  {
517
- var args = this._stack.splice(this._stack.length - operand);
508
+ var args = this._stack.splice(this._stack.length - this._operand());
518
509
  var callee = this._pop();
519
510
  var receiver = this._pop(); // left on stack by GET_PROP
520
511
  if (callee && callee[CLOSURE_SYM]) {
@@ -536,7 +527,7 @@ VM.prototype.run = function () {
536
527
  break;
537
528
  case OP.NEW:
538
529
  {
539
- var args = this._stack.splice(this._stack.length - operand);
530
+ var args = this._stack.splice(this._stack.length - this._operand());
540
531
  var callee = this._pop();
541
532
  if (callee && callee[CLOSURE_SYM]) {
542
533
  // VM closure constructor - prototype is unified via shell.prototype = closure.prototype
@@ -612,11 +603,12 @@ VM.prototype.run = function () {
612
603
  }
613
604
  case OP.FOR_IN_NEXT:
614
605
  {
615
- // operand = jump target for the done case.
616
- // Pop the iterator; if exhausted jump to exit, otherwise push next key.
606
+ // Operand = jump target for the done case. Must be read before the
607
+ // conditional so the PC stays correctly aligned either way.
608
+ var target = this._operand();
617
609
  var iter = this._pop();
618
610
  if (iter.i >= iter._keys.length) {
619
- frame._pc = operand;
611
+ frame._pc = target;
620
612
  } else {
621
613
  this._push(iter._keys[iter.i++]);
622
614
  }
@@ -624,11 +616,13 @@ VM.prototype.run = function () {
624
616
  }
625
617
  case OP.PATCH:
626
618
  {
627
- // Writes at operand the bytecode[arg1:arg2]
628
- var destPc = operand;
629
- var instructions = this.bytecode.slice(this._pop(), this._pop());
630
- for (var i = 0; i < instructions.length; i++) {
631
- this.bytecode[destPc + i] = instructions[i];
619
+ // Inline operands: destPc, sliceStart, sliceEnd
620
+ // Copies bytecode[sliceStart..sliceEnd) flat u16 slots to destPc.
621
+ var destPc = this._operand();
622
+ var sliceStart = this._operand();
623
+ var sliceEnd = this._operand();
624
+ for (var pi = sliceStart; pi < sliceEnd; pi++) {
625
+ this.bytecode[destPc + (pi - sliceStart)] = this.bytecode[pi];
632
626
  }
633
627
  break;
634
628
  }
@@ -638,7 +632,7 @@ VM.prototype.run = function () {
638
632
  // Saves: catch PC (operand), current stack depth, current frame-stack depth.
639
633
  // If an exception is thrown before TRY_END fires, the VM jumps here.
640
634
  frame._handlerStack.push({
641
- handlerPc: operand,
635
+ handlerPc: this._operand(),
642
636
  stackDepth: this._stack.length,
643
637
  frameStackDepth: this._frameStack.length
644
638
  });
@@ -0,0 +1,152 @@
1
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
2
+ import { nextFreeSlot, U16_MAX } from "../utils/op-utils.js";
3
+
4
+ // Opcodes that must not appear inside a macro window.
5
+ // Jump ops: modifying frame._pc mid-execution causes the macro handler to
6
+ // run subsequent sub-bodies even after the jump already fired.
7
+ // Frame-changing ops (CALL, CALL_METHOD, NEW, RETURN, THROW): push/pop call
8
+ // frames mid-macro, leaving the `frame` variable stale for later sub-bodies.
9
+ // Variable-operand ops (MAKE_CLOSURE): the number of _operand() calls depends
10
+ // on uvCount at runtime, so a static handler cannot be generated.
11
+ // Infrastructure ops (DATA, PATCH, TRY_SETUP, TRY_END, DEBUGGER):
12
+ // either illegal here or nonsensical to fold.
13
+
14
+ // Scan bytecode for repeating instruction sequences and fold them into
15
+ // macro opcodes. Runs after selfModifying but before resolveLabels so
16
+ // IR-ref operands (label/constant) are carried through transparently.
17
+ //
18
+ // Algorithm:
19
+ // 1. Count every eligible window of length 2–5 by its op-code signature.
20
+ // 2. Keep sequences that appear >= 2 times; sort by frequency then length.
21
+ // 3. Assign unused opcode values (0–255, not already claimed by compiler.OP)
22
+ // to the most-frequent candidates and store in compiler.MACRO_OPS.
23
+ // 4. Re-scan bytecode, replacing each matched sequence with a single
24
+ // multi-operand instruction:
25
+ // [macroOpCode, operands_of_instr_0..., operands_of_instr_1..., ...]
26
+ // The runtime macro handler inlines each sub-instruction body; those
27
+ // bodies call this._operand() themselves to consume the inline operands.
28
+ export function macroOpcodes(bc, compiler) {
29
+ const originalOpToName = new Map();
30
+ for (const name in compiler.OP) {
31
+ const opVal = compiler.OP[name];
32
+ originalOpToName.set(opVal, name);
33
+ }
34
+ function isEligible(op, compiler) {
35
+ if (op === null) return false;
36
+ const {
37
+ OP,
38
+ JUMP_OPS
39
+ } = compiler;
40
+ if (JUMP_OPS.has(op)) return false;
41
+ const excluded = new Set([OP.RETURN, OP.PATCH, OP.TRY_SETUP, OP.TRY_END, OP.DEBUGGER, OP.CALL, OP.CALL_METHOD, OP.NEW, OP.THROW, OP.MAKE_CLOSURE // variable-length operands — cannot generate a static handler
42
+ ]);
43
+ return !excluded.has(op) && originalOpToName.has(op); // Only original Ops are eligible (specialized disallowed)
44
+ }
45
+
46
+ // Collect every opcode value already in use so we can find free slots.
47
+ const usedOpcodes = new Set(Object.values(compiler.OP).filter(v => v !== undefined));
48
+ if (usedOpcodes.size > U16_MAX) return {
49
+ bytecode: bc
50
+ };
51
+
52
+ // ── Step 1: count window frequencies ──────────────────────────────────────
53
+ const freqMap = new Map();
54
+ for (let i = 0; i < bc.length; i++) {
55
+ for (let len = 2; len <= 5; len++) {
56
+ if (i + len > bc.length) break;
57
+ const ops = [];
58
+ let valid = true;
59
+ for (let j = 0; j < len; j++) {
60
+ const op = bc[i + j][0];
61
+ if (!isEligible(op, compiler)) {
62
+ valid = false;
63
+ break;
64
+ }
65
+ ops.push(op);
66
+ }
67
+ // If position (i+j) is ineligible, longer windows from i are also invalid.
68
+ if (!valid) break;
69
+ const key = ops.join(",");
70
+ const entry = freqMap.get(key);
71
+ if (entry) {
72
+ entry.count++;
73
+ } else {
74
+ freqMap.set(key, {
75
+ ops,
76
+ count: 1
77
+ });
78
+ }
79
+ }
80
+ }
81
+
82
+ // ── Step 2: keep repeated candidates, prioritise by frequency then length ─
83
+ const candidates = Array.from(freqMap.values()).filter(e => e.count >= 2).sort((a, b) => b.count - a.count || b.ops.length - a.ops.length);
84
+ if (candidates.length === 0) return {
85
+ bytecode: bc
86
+ };
87
+
88
+ // ── Step 3: assign free opcode slots to the best candidates ───────────────
89
+ for (let i = 0; i < candidates.length; i++) {
90
+ const macroOp = nextFreeSlot(usedOpcodes);
91
+ if (macroOp === -1) break;
92
+ const ops = candidates[i].ops;
93
+ compiler.MACRO_OPS[macroOp] = ops;
94
+ // Register a combined name so OP_NAME and comment generation both work.
95
+ let combinedName = ops.map(v => compiler.OP_NAME[v] ?? `OP_${v}`).join(",");
96
+ compiler.OP_NAME[macroOp] = combinedName;
97
+ }
98
+
99
+ // ── Step 4: build signature → macro opcode lookup ─────────────────────────
100
+ const sigToMacro = new Map();
101
+ for (const [macroOpStr, ops] of Object.entries(compiler.MACRO_OPS)) {
102
+ sigToMacro.set(ops.join(","), Number(macroOpStr));
103
+ }
104
+
105
+ // ── Step 5: replace sequences with a single multi-operand macro instruction ─
106
+ // Emit [macroOpCode, ...all operands from all constituent instructions].
107
+ // The runtime handler inlines each sub-instruction body; those bodies call
108
+ // this._operand() themselves to consume the operands in order.
109
+ const result = [];
110
+ let i = 0;
111
+ while (i < bc.length) {
112
+ let matched = false;
113
+ for (let len = 5; len >= 2; len--) {
114
+ if (i + len > bc.length) continue;
115
+ const instructions = [];
116
+ let valid = true;
117
+ for (let j = 0; j < len; j++) {
118
+ const instr = bc[i + j];
119
+ const op = instr[0];
120
+ if (!isEligible(op, compiler)) {
121
+ valid = false;
122
+ break;
123
+ }
124
+ instructions.push(instr);
125
+ }
126
+ if (!valid) continue;
127
+ const key = instructions.map(instr => instr[0]).join(",");
128
+ if (!sigToMacro.has(key)) continue;
129
+ const macroOpCode = sigToMacro.get(key);
130
+
131
+ // Collect all operands from every constituent instruction, in order.
132
+ // Each instruction contributes instr.slice(1) — zero or more operands.
133
+ const allOperands = [];
134
+ for (let j = 0; j < len; j++) {
135
+ allOperands.push(...bc[i + j].slice(1));
136
+ }
137
+ const newInstr = [macroOpCode, ...allOperands];
138
+ newInstr[SOURCE_NODE_SYM] = instructions[0][SOURCE_NODE_SYM];
139
+ result.push(newInstr);
140
+ i += len;
141
+ matched = true;
142
+ break;
143
+ }
144
+ if (!matched) {
145
+ result.push(bc[i]);
146
+ i++;
147
+ }
148
+ }
149
+ return {
150
+ bytecode: result
151
+ };
152
+ }
@@ -1,25 +1,35 @@
1
- import { SOURCE_NODE_SYM } from "../compiler.js";
1
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
2
2
 
3
3
  // Resolve all {type:"constant", value} operands to integer indices into the
4
4
  // constants pool. Returns both the resolved bytecode and the constants array
5
5
  // so the Serializer can use it for comment generation and output.
6
+ // Constant refs may appear at any operand position (index 1, 2, 3, …).
6
7
  export function resolveConstants(bc) {
7
8
  const constants = [];
8
9
  const constantsMap = new Map();
9
- function intern(value) {
10
+ function intern(operand) {
11
+ const operandAsObject = typeof operand === "object" && operand ? operand : {};
12
+ const value = operand.value;
10
13
  let idx = constantsMap.get(value);
11
14
  if (typeof idx !== "number") {
12
15
  idx = constants.length;
13
16
  constantsMap.set(value, idx);
14
17
  constants.push(value);
15
18
  }
16
- return idx;
19
+ const newOperand = {
20
+ ...operandAsObject,
21
+ type: "number",
22
+ resolvedValue: idx
23
+ };
24
+ return newOperand;
17
25
  }
18
26
  const resolved = [];
19
27
  for (const instr of bc) {
20
- const [op, operand] = instr;
21
- if (operand !== undefined && operand !== null && typeof operand === "object" && operand.type === "constant") {
22
- const newInstr = [op, intern(operand.value)];
28
+ const [op, ...operands] = instr;
29
+ const hasConstant = operands.some(o => o !== undefined && o !== null && typeof o === "object" && o.type === "constant");
30
+ if (hasConstant) {
31
+ const newOperands = operands.map(operand => operand?.type === "constant" ? intern(operand) : operand);
32
+ const newInstr = [op, ...newOperands];
23
33
  newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
24
34
  resolved.push(newInstr);
25
35
  } else {
@@ -0,0 +1,80 @@
1
+ // --- Label IR ---
2
+ // During compilation, jump targets are symbolic labels instead of hard-coded
3
+ // PC numbers. Two IR "pseudo operands" carry the label information:
4
+ //
5
+ // defineLabel operand : [null, {type:"defineLabel", label:"FN_ENTRY_1"}]
6
+ // Marks a position in the bytecode array.
7
+ // resolveLabels() strips these out entirely.
8
+ //
9
+ // label ref operand : [OP.JUMP, {type:"label", label:"FN_ENTRY_1"}]
10
+ // Used as the operand of any jump instruction. resolveLabels() replaces
11
+ // it with the integer PC that the corresponding defineLabel resolves to.
12
+ //
13
+ // The output bytecode is still a nested array of instructions.
14
+ // Flattening (one u16 slot per op, one per operand) happens in the Serializer.
15
+ // PC values computed here reflect the FLAT slot index so that jump targets,
16
+ // startPc, and LOAD_INT label operands are all correct after flattening.
17
+
18
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
19
+
20
+ // Resolve symbolic labels to absolute flat-PC indices within a bytecode array.
21
+ // defineLabel pseudo-instructions are stripped; label-ref operands become ints.
22
+ // Each instruction [op, ...operands] occupies (1 + operands.length) flat slots,
23
+ // so realPc advances by instr.length for every non-pseudo instruction.
24
+ export function resolveLabels(bc, compiler) {
25
+ // Pass 1 – walk the array and record each label's flat PC, counting
26
+ // real instructions by their full flat width (1 op + N operands).
27
+ const labelToPc = new Map();
28
+ let realPc = 0;
29
+ for (const instr of bc) {
30
+ const op = instr[0];
31
+ const operand = instr[1];
32
+ if (op === null && operand !== null && typeof operand === "object" && operand.type === "defineLabel") {
33
+ labelToPc.set(operand.label, realPc);
34
+ } else {
35
+ // Each instruction occupies 1 slot for the opcode + 1 per operand.
36
+ // IMPORTANT: 'placeholder' operands are not counted
37
+ realPc += instr.filter(x => x?.placeholder !== true).length;
38
+ }
39
+ }
40
+
41
+ // Pass 2 – build the resolved instruction list.
42
+ // Label refs may appear at any operand position, so scan all of them.
43
+ const resolved = [];
44
+ for (const instr of bc) {
45
+ const [op, ...operands] = instr;
46
+
47
+ // Strip defineLabel pseudo-ops.
48
+ if (op === null && typeof operands[0] === "object" && operands[0]?.type === "defineLabel") {
49
+ continue;
50
+ }
51
+
52
+ // Replace label-ref operands with their resolved flat PC (any position).
53
+ const newOperands = operands.map(operand => {
54
+ if (operand !== undefined && operand !== null && typeof operand === "object" && operand.type === "label") {
55
+ const pc = labelToPc.get(operand.label);
56
+ if (pc === undefined) throw new Error(`Undefined label: ${operand.label}`);
57
+ var operandAsObject = typeof operand === "object" && operand ? operand : {};
58
+ const newOperand = {
59
+ ...operandAsObject,
60
+ // Preverse original operand properties
61
+ type: "number",
62
+ resolvedValue: pc + (operand.offset ?? 0)
63
+ };
64
+ return newOperand;
65
+ }
66
+ return operand;
67
+ });
68
+ const newInstr = [op, ...newOperands];
69
+ newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
70
+ resolved.push(newInstr);
71
+ }
72
+
73
+ // Patch each function descriptor's startPc now that labels are resolved.
74
+ for (const desc of compiler.fnDescriptors) {
75
+ desc.startPc = labelToPc.get(desc.startLabel) ?? labelToPc.get(desc.entryLabel);
76
+ }
77
+ return {
78
+ bytecode: resolved
79
+ };
80
+ }
@@ -1,21 +1,22 @@
1
+ import { choice } from "../utils/random-utils.js";
1
2
  export function selfModifying(bc, compiler) {
2
3
  // Walk the bytecode looking for "defineLabel" pseudo-ops, which start basic
3
4
  // blocks. For each block we collect the body (instructions between the label
4
5
  // and the next label/jump terminator), move it to the end of the bytecode
5
6
  // under a fresh "patch_LXX" label, and replace it in-place with:
6
7
  //
7
- // defineLabel ("originalLabel") ← kept as-is (pseudo-op)
8
- // LOAD_INT { label: patch_LXX, offset: N } ← push slice-end PC
9
- // LOAD_INT { label: patch_LXX } push slice-start PC
10
- // PATCH { label: originalLabel, offset: 3 } ← destPc = L+3
11
- // LOAD_INT 0 × N ← N placeholder instructions
8
+ // defineLabel ("originalLabel") ← kept as-is (pseudo-op)
9
+ // PATCH destPc sliceStart sliceEnd ← 4 flat slots total
10
+ // Garbage Opcodes × bodyFlatSize placeholder slots
12
11
  //
13
- // PATCH pops (start, end) from the stack and copies bytecode[start..end) to
14
- // bytecode[destPc..]. Since destPc = L+3 = first placeholder, the body is
15
- // written exactly over the placeholder region on the first call. Subsequent
16
- // calls are idempotent (same bytes written again). Execution falls through
17
- // from PATCH into the freshly-patched body at L+3, then continues naturally
18
- // to whatever terminator (JUMP/RETURN) follows at L+3+N.
12
+ // PATCH reads three inline operands via _operand():
13
+ // destPc = originalLabel + 4 (first slot after PATCH's own 4 slots)
14
+ // sliceStart = patchLabel (flat PC of appended body)
15
+ // sliceEnd = patchLabel + bodyFlatSize
16
+ //
17
+ // On first execution PATCH copies bytecode[sliceStart..sliceEnd) over the
18
+ // placeholder region starting at destPc. Execution then falls through into
19
+ // the freshly-patched body. Subsequent calls are idempotent.
19
20
 
20
21
  const {
21
22
  OP,
@@ -45,9 +46,8 @@ export function selfModifying(bc, compiler) {
45
46
  break;
46
47
  }
47
48
 
48
- // Jump instructions, RETURN, and DATA (function header words) all
49
- // terminate the body without being included in it.
50
- if (nextOp !== null && (JUMP_OPS.has(nextOp) || nextOp === OP.RETURN || nextOp === OP.DATA)) {
49
+ // Jump instructions, RETURN all terminate the body.
50
+ if (nextOp !== null && (JUMP_OPS.has(nextOp) || nextOp === OP.RETURN)) {
51
51
  break;
52
52
  }
53
53
  j++;
@@ -60,31 +60,31 @@ export function selfModifying(bc, compiler) {
60
60
  }
61
61
  const patchLabel = `patch_${originalLabel}_${patchCount++}`;
62
62
 
63
- // ── Stub (3 real instructions) ──────────────────────────────────────
64
- // LOAD_INT pushes the end-index of the body slice (patchLabel_pc + N).
65
- // LOAD_INT pushes the start-index (patchLabel_pc).
66
- // Stack before PATCH: [end (bottom), start (top)].
67
- // PATCH: slice(pop()=start, pop()=end) copies the body to destPc = L+3.
68
- result.push([OP.LOAD_INT, {
63
+ // Flat size of the body (each instruction occupies instr.length slots).
64
+ const bodyFlatSize = body.reduce((acc, instr) => acc + instr.filter(x => x?.placeholder !== true).length, 0);
65
+
66
+ // ── PATCH instruction (4 flat slots: opcode + 3 operands) ───────────
67
+ // destPc = originalLabel + 4 (slot right after PATCH's 4 slots)
68
+ // sliceStart = patchLabel
69
+ // sliceEnd = patchLabel + bodyFlatSize
70
+ result.push([OP.PATCH, {
69
71
  type: "label",
70
- label: patchLabel,
71
- offset: N
72
- }]);
73
- result.push([OP.LOAD_INT, {
72
+ label: originalLabel,
73
+ offset: 4
74
+ }, {
74
75
  type: "label",
75
76
  label: patchLabel
76
- }]);
77
- result.push([OP.PATCH, {
77
+ }, {
78
78
  type: "label",
79
- label: originalLabel,
80
- offset: 3
79
+ label: patchLabel,
80
+ offset: bodyFlatSize
81
81
  }]);
82
82
 
83
- // ── Placeholders (N instructions) ───────────────────────────────────
84
- // These are overwritten by PATCH on the first execution. They never
85
- // execute as LOAD_INT 0 in a correct run.
86
- for (let p = 0; p < N; p++) {
87
- result.push([OP.LOAD_INT, 0]);
83
+ // ── Placeholders (Garbage Opcodes * bodyFlatSize, each 1 flat slot) ────────────
84
+ // These are overwritten by PATCH on first execution.
85
+ for (let p = 0; p < bodyFlatSize; p++) {
86
+ const randomOpcode = choice(Object.values(compiler.OP));
87
+ result.push([+randomOpcode]);
88
88
  }
89
89
 
90
90
  // ── Append real body at end ─────────────────────────────────────────