js-confuser-vm 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +125 -28
- package/LICENSE +21 -21
- package/README.MD +370 -196
- package/babel-plugin-inline-runtime.cjs +34 -34
- package/babel.config.json +23 -23
- package/dist/build-runtime.js +53 -0
- package/dist/compiler.js +107 -117
- package/dist/runtime.js +78 -84
- package/dist/transforms/bytecode/macroOpcodes.js +152 -0
- package/dist/transforms/{resolveContants.js → bytecode/resolveContants.js} +16 -6
- package/dist/transforms/bytecode/resolveLabels.js +80 -0
- package/dist/transforms/{selfModifying.js → bytecode/selfModifying.js} +33 -33
- package/dist/transforms/bytecode/specializedOpcodes.js +103 -0
- package/dist/transforms/runtime/macroOpcodes.js +88 -0
- package/dist/transforms/runtime/minify.js +1 -0
- package/dist/transforms/runtime/shuffleOpcodes.js +20 -0
- package/dist/transforms/runtime/specializedOpcodes.js +102 -0
- package/dist/transforms/utils/op-utils.js +25 -0
- package/dist/{random.js → transforms/utils/random-utils.js} +3 -3
- package/dist/types.js +4 -2
- package/index.ts +34 -22
- package/jest-strip-types.js +10 -10
- package/jest.config.js +35 -28
- package/package.json +49 -48
- package/src/build-runtime.ts +57 -0
- package/src/compiler.ts +2069 -2066
- package/src/index.ts +14 -14
- package/src/minify.ts +21 -21
- package/src/options.ts +14 -12
- package/src/runtime.ts +771 -779
- package/src/transforms/bytecode/macroOpcodes.ts +177 -0
- package/src/transforms/bytecode/resolveContants.ts +62 -0
- package/src/transforms/bytecode/resolveLabels.ts +107 -0
- package/src/transforms/{selfModifying.ts → bytecode/selfModifying.ts} +37 -40
- package/src/transforms/bytecode/specializedOpcodes.ts +118 -0
- package/src/transforms/runtime/macroOpcodes.ts +111 -0
- package/src/transforms/runtime/minify.ts +1 -0
- package/src/transforms/runtime/shuffleOpcodes.ts +24 -0
- package/src/transforms/runtime/specializedOpcodes.ts +146 -0
- package/src/transforms/utils/op-utils.ts +26 -0
- package/src/{random.ts → transforms/utils/random-utils.ts} +31 -31
- package/src/types.ts +33 -24
- package/src/utilts.ts +3 -3
- package/tsconfig.json +12 -12
- package/dist/runtimeObf.js +0 -56
- package/dist/transforms/controlFlowFlattening.js +0 -22
- package/dist/transforms/resolveLabels.js +0 -59
- package/src/runtimeObf.ts +0 -62
- package/src/transforms/controlFlowFlattening.ts +0 -30
- package/src/transforms/resolveContants.ts +0 -42
- package/src/transforms/resolveLabels.ts +0 -83
package/dist/runtime.js
CHANGED
|
@@ -12,8 +12,9 @@ function decodeBytecode(s) {
|
|
|
12
12
|
var b = typeof Buffer !== "undefined" ? Buffer.from(s, "base64") : Uint8Array.from(atob(s), function (c) {
|
|
13
13
|
return c.charCodeAt(0);
|
|
14
14
|
});
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
// Each slot is a u16 stored as 2 little-endian bytes.
|
|
16
|
+
var r = new Uint16Array(b.length / 2);
|
|
17
|
+
for (var i = 0; i < r.length; i++) r[i] = b[i * 2] | b[i * 2 + 1] << 8;
|
|
17
18
|
return r;
|
|
18
19
|
}
|
|
19
20
|
|
|
@@ -85,22 +86,10 @@ VM.prototype.peek = function () {
|
|
|
85
86
|
return this._stack[this._stack.length - 1];
|
|
86
87
|
};
|
|
87
88
|
|
|
88
|
-
//
|
|
89
|
-
//
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
var word = this.bytecode[pc];
|
|
93
|
-
if (ENCODE_BYTECODE) {
|
|
94
|
-
return {
|
|
95
|
-
op: word & 0xff,
|
|
96
|
-
operand: word >>> 8
|
|
97
|
-
};
|
|
98
|
-
} else {
|
|
99
|
-
return {
|
|
100
|
-
op: word[0],
|
|
101
|
-
operand: word[1]
|
|
102
|
-
};
|
|
103
|
-
}
|
|
89
|
+
// Consume the next slot from the flat bytecode stream and advance the PC.
|
|
90
|
+
// Called by opcode handlers to read each of their operands in order.
|
|
91
|
+
VM.prototype._operand = function () {
|
|
92
|
+
return this.bytecode[this._currentFrame._pc++];
|
|
104
93
|
};
|
|
105
94
|
VM.prototype.captureUpvalue = function (frame, slot) {
|
|
106
95
|
// Reuse existing open upvalue for this frame+slot if one exists.
|
|
@@ -128,47 +117,48 @@ VM.prototype.run = function () {
|
|
|
128
117
|
var now = () => {
|
|
129
118
|
return performance.now();
|
|
130
119
|
};
|
|
131
|
-
var
|
|
120
|
+
var lastTime = now();
|
|
132
121
|
while (true) {
|
|
133
122
|
var frame = this._currentFrame;
|
|
134
123
|
var bc = this.bytecode;
|
|
135
124
|
if (frame._pc >= bc.length) break;
|
|
136
|
-
var op
|
|
137
|
-
var word = this.readWord(frame._pc++);
|
|
138
|
-
op = word.op;
|
|
139
|
-
operand = word.operand;
|
|
125
|
+
var op = this.bytecode[frame._pc++];
|
|
140
126
|
|
|
141
|
-
// console.log(frame._pc - 1, op
|
|
127
|
+
// console.log(frame._pc - 1, op);
|
|
142
128
|
|
|
143
|
-
// Debugging protection
|
|
129
|
+
// Debugging protection: Detects debugger by checking for >1s pauses which can only happen from debugger; or extremely slow sync tasks
|
|
144
130
|
if (TIMING_CHECKS) {
|
|
145
|
-
var
|
|
146
|
-
var isTamper =
|
|
147
|
-
|
|
131
|
+
var currentTime = now();
|
|
132
|
+
var isTamper = currentTime - lastTime > 1000;
|
|
133
|
+
lastTime = currentTime;
|
|
148
134
|
if (isTamper) {
|
|
135
|
+
// Poison the bytecode
|
|
136
|
+
for (var i = 0; i < this.bytecode.length; i++) this.bytecode[i] = 0;
|
|
137
|
+
// Break the current state
|
|
149
138
|
op = OP.POP;
|
|
139
|
+
this._stack = [];
|
|
150
140
|
}
|
|
151
141
|
}
|
|
152
142
|
try {
|
|
153
143
|
/* @SWITCH */
|
|
154
144
|
switch (op) {
|
|
155
145
|
case OP.LOAD_CONST:
|
|
156
|
-
this._push(this.constants[
|
|
146
|
+
this._push(this.constants[this._operand()]);
|
|
157
147
|
break;
|
|
158
148
|
case OP.LOAD_INT:
|
|
159
|
-
this._push(
|
|
149
|
+
this._push(this._operand());
|
|
160
150
|
break;
|
|
161
151
|
case OP.LOAD_LOCAL:
|
|
162
|
-
this._push(frame.locals[
|
|
152
|
+
this._push(frame.locals[this._operand()]);
|
|
163
153
|
break;
|
|
164
154
|
case OP.STORE_LOCAL:
|
|
165
|
-
frame.locals[
|
|
155
|
+
frame.locals[this._operand()] = this._pop();
|
|
166
156
|
break;
|
|
167
157
|
case OP.LOAD_GLOBAL:
|
|
168
|
-
this._push(this.globals[this.constants[
|
|
158
|
+
this._push(this.globals[this.constants[this._operand()]]);
|
|
169
159
|
break;
|
|
170
160
|
case OP.STORE_GLOBAL:
|
|
171
|
-
this.globals[this.constants[
|
|
161
|
+
this.globals[this.constants[this._operand()]] = this._pop();
|
|
172
162
|
break;
|
|
173
163
|
case OP.GET_PROP:
|
|
174
164
|
{
|
|
@@ -352,45 +342,50 @@ VM.prototype.run = function () {
|
|
|
352
342
|
break;
|
|
353
343
|
}
|
|
354
344
|
case OP.JUMP:
|
|
355
|
-
frame._pc =
|
|
345
|
+
frame._pc = this._operand();
|
|
356
346
|
break;
|
|
357
347
|
case OP.JUMP_IF_FALSE:
|
|
358
|
-
|
|
359
|
-
|
|
348
|
+
{
|
|
349
|
+
var target = this._operand();
|
|
350
|
+
if (!this._pop()) frame._pc = target;
|
|
351
|
+
break;
|
|
352
|
+
}
|
|
360
353
|
case OP.JUMP_IF_TRUE_OR_POP:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
354
|
+
{
|
|
355
|
+
// || semantics: if truthy, we're done - leave value, jump over RHS.
|
|
356
|
+
// If falsy, discard it and fall through to evaluate RHS.
|
|
357
|
+
var target = this._operand();
|
|
358
|
+
if (this.peek()) {
|
|
359
|
+
frame._pc = target;
|
|
360
|
+
} else {
|
|
361
|
+
this._pop();
|
|
362
|
+
}
|
|
363
|
+
break;
|
|
367
364
|
}
|
|
368
|
-
break;
|
|
369
365
|
case OP.JUMP_IF_FALSE_OR_POP:
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
366
|
+
{
|
|
367
|
+
// && semantics: if falsy, we're done - leave value, jump over RHS.
|
|
368
|
+
// If truthy, discard it and fall through to evaluate RHS.
|
|
369
|
+
var target = this._operand();
|
|
370
|
+
if (!this.peek()) {
|
|
371
|
+
frame._pc = target;
|
|
372
|
+
} else {
|
|
373
|
+
this._pop();
|
|
374
|
+
}
|
|
375
|
+
break;
|
|
376
376
|
}
|
|
377
|
-
break;
|
|
378
377
|
case OP.MAKE_CLOSURE:
|
|
379
378
|
{
|
|
380
|
-
//
|
|
381
|
-
//
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
var
|
|
385
|
-
var
|
|
386
|
-
var localCount = this._pop();
|
|
387
|
-
var uvCount = this._pop();
|
|
388
|
-
|
|
389
|
-
// Upvalues were pushed in order 0..N-1 so we pop them in reverse.
|
|
379
|
+
// Inline operands: startPc, paramCount, localCount, uvCount,
|
|
380
|
+
// [isLocal_0, idx_0, isLocal_1, idx_1, ...]
|
|
381
|
+
var startPc = this._operand();
|
|
382
|
+
var paramCount = this._operand();
|
|
383
|
+
var localCount = this._operand();
|
|
384
|
+
var uvCount = this._operand();
|
|
390
385
|
var uvDescs = new Array(uvCount);
|
|
391
|
-
for (var i =
|
|
392
|
-
var
|
|
393
|
-
var
|
|
386
|
+
for (var i = 0; i < uvCount; i++) {
|
|
387
|
+
var isLocalRaw = this._operand();
|
|
388
|
+
var uvIndex = this._operand();
|
|
394
389
|
uvDescs[i] = {
|
|
395
390
|
isLocal: isLocalRaw,
|
|
396
391
|
_index: uvIndex
|
|
@@ -434,19 +429,15 @@ VM.prototype.run = function () {
|
|
|
434
429
|
this._push(shell);
|
|
435
430
|
break;
|
|
436
431
|
}
|
|
437
|
-
case OP.DATA:
|
|
438
|
-
// Should never appear in compiled output (reserved opcode slot).
|
|
439
|
-
throw new Error("DATA opcode executed at pc " + (frame._pc - 1));
|
|
440
432
|
case OP.LOAD_UPVALUE:
|
|
441
|
-
this._push(frame.closure.upvalues[
|
|
433
|
+
this._push(frame.closure.upvalues[this._operand()]._read());
|
|
442
434
|
break;
|
|
443
435
|
case OP.STORE_UPVALUE:
|
|
444
|
-
frame.closure.upvalues[
|
|
436
|
+
frame.closure.upvalues[this._operand()]._write(this._pop());
|
|
445
437
|
break;
|
|
446
438
|
case OP.BUILD_ARRAY:
|
|
447
439
|
{
|
|
448
|
-
|
|
449
|
-
var elems = this._stack.splice(this._stack.length - operand);
|
|
440
|
+
var elems = this._stack.splice(this._stack.length - this._operand());
|
|
450
441
|
this._push(elems);
|
|
451
442
|
break;
|
|
452
443
|
}
|
|
@@ -454,7 +445,7 @@ VM.prototype.run = function () {
|
|
|
454
445
|
{
|
|
455
446
|
// Stack has: key0, val0, key1, val1 ... keyN, valN (pushed left->right)
|
|
456
447
|
// Pop all pairs and build the object.
|
|
457
|
-
var pairs = this._stack.splice(this._stack.length -
|
|
448
|
+
var pairs = this._stack.splice(this._stack.length - this._operand() * 2);
|
|
458
449
|
var o = {};
|
|
459
450
|
for (var i = 0; i < pairs.length; i += 2) {
|
|
460
451
|
o[pairs[i]] = pairs[i + 1]; // key at even index, val at odd
|
|
@@ -495,7 +486,7 @@ VM.prototype.run = function () {
|
|
|
495
486
|
}
|
|
496
487
|
case OP.CALL:
|
|
497
488
|
{
|
|
498
|
-
var args = this._stack.splice(this._stack.length -
|
|
489
|
+
var args = this._stack.splice(this._stack.length - this._operand());
|
|
499
490
|
var callee = this._pop();
|
|
500
491
|
if (callee && callee[CLOSURE_SYM]) {
|
|
501
492
|
// VM closure - run directly in this VM, no sub-VM overhead
|
|
@@ -514,7 +505,7 @@ VM.prototype.run = function () {
|
|
|
514
505
|
}
|
|
515
506
|
case OP.CALL_METHOD:
|
|
516
507
|
{
|
|
517
|
-
var args = this._stack.splice(this._stack.length -
|
|
508
|
+
var args = this._stack.splice(this._stack.length - this._operand());
|
|
518
509
|
var callee = this._pop();
|
|
519
510
|
var receiver = this._pop(); // left on stack by GET_PROP
|
|
520
511
|
if (callee && callee[CLOSURE_SYM]) {
|
|
@@ -536,7 +527,7 @@ VM.prototype.run = function () {
|
|
|
536
527
|
break;
|
|
537
528
|
case OP.NEW:
|
|
538
529
|
{
|
|
539
|
-
var args = this._stack.splice(this._stack.length -
|
|
530
|
+
var args = this._stack.splice(this._stack.length - this._operand());
|
|
540
531
|
var callee = this._pop();
|
|
541
532
|
if (callee && callee[CLOSURE_SYM]) {
|
|
542
533
|
// VM closure constructor - prototype is unified via shell.prototype = closure.prototype
|
|
@@ -612,11 +603,12 @@ VM.prototype.run = function () {
|
|
|
612
603
|
}
|
|
613
604
|
case OP.FOR_IN_NEXT:
|
|
614
605
|
{
|
|
615
|
-
//
|
|
616
|
-
//
|
|
606
|
+
// Operand = jump target for the done case. Must be read before the
|
|
607
|
+
// conditional so the PC stays correctly aligned either way.
|
|
608
|
+
var target = this._operand();
|
|
617
609
|
var iter = this._pop();
|
|
618
610
|
if (iter.i >= iter._keys.length) {
|
|
619
|
-
frame._pc =
|
|
611
|
+
frame._pc = target;
|
|
620
612
|
} else {
|
|
621
613
|
this._push(iter._keys[iter.i++]);
|
|
622
614
|
}
|
|
@@ -624,11 +616,13 @@ VM.prototype.run = function () {
|
|
|
624
616
|
}
|
|
625
617
|
case OP.PATCH:
|
|
626
618
|
{
|
|
627
|
-
//
|
|
628
|
-
|
|
629
|
-
var
|
|
630
|
-
|
|
631
|
-
|
|
619
|
+
// Inline operands: destPc, sliceStart, sliceEnd
|
|
620
|
+
// Copies bytecode[sliceStart..sliceEnd) flat u16 slots to destPc.
|
|
621
|
+
var destPc = this._operand();
|
|
622
|
+
var sliceStart = this._operand();
|
|
623
|
+
var sliceEnd = this._operand();
|
|
624
|
+
for (var pi = sliceStart; pi < sliceEnd; pi++) {
|
|
625
|
+
this.bytecode[destPc + (pi - sliceStart)] = this.bytecode[pi];
|
|
632
626
|
}
|
|
633
627
|
break;
|
|
634
628
|
}
|
|
@@ -638,7 +632,7 @@ VM.prototype.run = function () {
|
|
|
638
632
|
// Saves: catch PC (operand), current stack depth, current frame-stack depth.
|
|
639
633
|
// If an exception is thrown before TRY_END fires, the VM jumps here.
|
|
640
634
|
frame._handlerStack.push({
|
|
641
|
-
handlerPc:
|
|
635
|
+
handlerPc: this._operand(),
|
|
642
636
|
stackDepth: this._stack.length,
|
|
643
637
|
frameStackDepth: this._frameStack.length
|
|
644
638
|
});
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { SOURCE_NODE_SYM } from "../../compiler.js";
|
|
2
|
+
import { nextFreeSlot, U16_MAX } from "../utils/op-utils.js";
|
|
3
|
+
|
|
4
|
+
// Opcodes that must not appear inside a macro window.
|
|
5
|
+
// Jump ops: modifying frame._pc mid-execution causes the macro handler to
|
|
6
|
+
// run subsequent sub-bodies even after the jump already fired.
|
|
7
|
+
// Frame-changing ops (CALL, CALL_METHOD, NEW, RETURN, THROW): push/pop call
|
|
8
|
+
// frames mid-macro, leaving the `frame` variable stale for later sub-bodies.
|
|
9
|
+
// Variable-operand ops (MAKE_CLOSURE): the number of _operand() calls depends
|
|
10
|
+
// on uvCount at runtime, so a static handler cannot be generated.
|
|
11
|
+
// Infrastructure ops (DATA, PATCH, TRY_SETUP, TRY_END, DEBUGGER):
|
|
12
|
+
// either illegal here or nonsensical to fold.
|
|
13
|
+
|
|
14
|
+
// Scan bytecode for repeating instruction sequences and fold them into
|
|
15
|
+
// macro opcodes. Runs after selfModifying but before resolveLabels so
|
|
16
|
+
// IR-ref operands (label/constant) are carried through transparently.
|
|
17
|
+
//
|
|
18
|
+
// Algorithm:
|
|
19
|
+
// 1. Count every eligible window of length 2–5 by its op-code signature.
|
|
20
|
+
// 2. Keep sequences that appear >= 2 times; sort by frequency then length.
|
|
21
|
+
// 3. Assign unused opcode values (0–255, not already claimed by compiler.OP)
|
|
22
|
+
// to the most-frequent candidates and store in compiler.MACRO_OPS.
|
|
23
|
+
// 4. Re-scan bytecode, replacing each matched sequence with a single
|
|
24
|
+
// multi-operand instruction:
|
|
25
|
+
// [macroOpCode, operands_of_instr_0..., operands_of_instr_1..., ...]
|
|
26
|
+
// The runtime macro handler inlines each sub-instruction body; those
|
|
27
|
+
// bodies call this._operand() themselves to consume the inline operands.
|
|
28
|
+
export function macroOpcodes(bc, compiler) {
|
|
29
|
+
const originalOpToName = new Map();
|
|
30
|
+
for (const name in compiler.OP) {
|
|
31
|
+
const opVal = compiler.OP[name];
|
|
32
|
+
originalOpToName.set(opVal, name);
|
|
33
|
+
}
|
|
34
|
+
function isEligible(op, compiler) {
|
|
35
|
+
if (op === null) return false;
|
|
36
|
+
const {
|
|
37
|
+
OP,
|
|
38
|
+
JUMP_OPS
|
|
39
|
+
} = compiler;
|
|
40
|
+
if (JUMP_OPS.has(op)) return false;
|
|
41
|
+
const excluded = new Set([OP.RETURN, OP.PATCH, OP.TRY_SETUP, OP.TRY_END, OP.DEBUGGER, OP.CALL, OP.CALL_METHOD, OP.NEW, OP.THROW, OP.MAKE_CLOSURE // variable-length operands — cannot generate a static handler
|
|
42
|
+
]);
|
|
43
|
+
return !excluded.has(op) && originalOpToName.has(op); // Only original Ops are eligible (specialized disallowed)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Collect every opcode value already in use so we can find free slots.
|
|
47
|
+
const usedOpcodes = new Set(Object.values(compiler.OP).filter(v => v !== undefined));
|
|
48
|
+
if (usedOpcodes.size > U16_MAX) return {
|
|
49
|
+
bytecode: bc
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// ── Step 1: count window frequencies ──────────────────────────────────────
|
|
53
|
+
const freqMap = new Map();
|
|
54
|
+
for (let i = 0; i < bc.length; i++) {
|
|
55
|
+
for (let len = 2; len <= 5; len++) {
|
|
56
|
+
if (i + len > bc.length) break;
|
|
57
|
+
const ops = [];
|
|
58
|
+
let valid = true;
|
|
59
|
+
for (let j = 0; j < len; j++) {
|
|
60
|
+
const op = bc[i + j][0];
|
|
61
|
+
if (!isEligible(op, compiler)) {
|
|
62
|
+
valid = false;
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
ops.push(op);
|
|
66
|
+
}
|
|
67
|
+
// If position (i+j) is ineligible, longer windows from i are also invalid.
|
|
68
|
+
if (!valid) break;
|
|
69
|
+
const key = ops.join(",");
|
|
70
|
+
const entry = freqMap.get(key);
|
|
71
|
+
if (entry) {
|
|
72
|
+
entry.count++;
|
|
73
|
+
} else {
|
|
74
|
+
freqMap.set(key, {
|
|
75
|
+
ops,
|
|
76
|
+
count: 1
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// ── Step 2: keep repeated candidates, prioritise by frequency then length ─
|
|
83
|
+
const candidates = Array.from(freqMap.values()).filter(e => e.count >= 2).sort((a, b) => b.count - a.count || b.ops.length - a.ops.length);
|
|
84
|
+
if (candidates.length === 0) return {
|
|
85
|
+
bytecode: bc
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// ── Step 3: assign free opcode slots to the best candidates ───────────────
|
|
89
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
90
|
+
const macroOp = nextFreeSlot(usedOpcodes);
|
|
91
|
+
if (macroOp === -1) break;
|
|
92
|
+
const ops = candidates[i].ops;
|
|
93
|
+
compiler.MACRO_OPS[macroOp] = ops;
|
|
94
|
+
// Register a combined name so OP_NAME and comment generation both work.
|
|
95
|
+
let combinedName = ops.map(v => compiler.OP_NAME[v] ?? `OP_${v}`).join(",");
|
|
96
|
+
compiler.OP_NAME[macroOp] = combinedName;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ── Step 4: build signature → macro opcode lookup ─────────────────────────
|
|
100
|
+
const sigToMacro = new Map();
|
|
101
|
+
for (const [macroOpStr, ops] of Object.entries(compiler.MACRO_OPS)) {
|
|
102
|
+
sigToMacro.set(ops.join(","), Number(macroOpStr));
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ── Step 5: replace sequences with a single multi-operand macro instruction ─
|
|
106
|
+
// Emit [macroOpCode, ...all operands from all constituent instructions].
|
|
107
|
+
// The runtime handler inlines each sub-instruction body; those bodies call
|
|
108
|
+
// this._operand() themselves to consume the operands in order.
|
|
109
|
+
const result = [];
|
|
110
|
+
let i = 0;
|
|
111
|
+
while (i < bc.length) {
|
|
112
|
+
let matched = false;
|
|
113
|
+
for (let len = 5; len >= 2; len--) {
|
|
114
|
+
if (i + len > bc.length) continue;
|
|
115
|
+
const instructions = [];
|
|
116
|
+
let valid = true;
|
|
117
|
+
for (let j = 0; j < len; j++) {
|
|
118
|
+
const instr = bc[i + j];
|
|
119
|
+
const op = instr[0];
|
|
120
|
+
if (!isEligible(op, compiler)) {
|
|
121
|
+
valid = false;
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
instructions.push(instr);
|
|
125
|
+
}
|
|
126
|
+
if (!valid) continue;
|
|
127
|
+
const key = instructions.map(instr => instr[0]).join(",");
|
|
128
|
+
if (!sigToMacro.has(key)) continue;
|
|
129
|
+
const macroOpCode = sigToMacro.get(key);
|
|
130
|
+
|
|
131
|
+
// Collect all operands from every constituent instruction, in order.
|
|
132
|
+
// Each instruction contributes instr.slice(1) — zero or more operands.
|
|
133
|
+
const allOperands = [];
|
|
134
|
+
for (let j = 0; j < len; j++) {
|
|
135
|
+
allOperands.push(...bc[i + j].slice(1));
|
|
136
|
+
}
|
|
137
|
+
const newInstr = [macroOpCode, ...allOperands];
|
|
138
|
+
newInstr[SOURCE_NODE_SYM] = instructions[0][SOURCE_NODE_SYM];
|
|
139
|
+
result.push(newInstr);
|
|
140
|
+
i += len;
|
|
141
|
+
matched = true;
|
|
142
|
+
break;
|
|
143
|
+
}
|
|
144
|
+
if (!matched) {
|
|
145
|
+
result.push(bc[i]);
|
|
146
|
+
i++;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
bytecode: result
|
|
151
|
+
};
|
|
152
|
+
}
|
|
@@ -1,25 +1,35 @@
|
|
|
1
|
-
import { SOURCE_NODE_SYM } from "
|
|
1
|
+
import { SOURCE_NODE_SYM } from "../../compiler.js";
|
|
2
2
|
|
|
3
3
|
// Resolve all {type:"constant", value} operands to integer indices into the
|
|
4
4
|
// constants pool. Returns both the resolved bytecode and the constants array
|
|
5
5
|
// so the Serializer can use it for comment generation and output.
|
|
6
|
+
// Constant refs may appear at any operand position (index 1, 2, 3, …).
|
|
6
7
|
export function resolveConstants(bc) {
|
|
7
8
|
const constants = [];
|
|
8
9
|
const constantsMap = new Map();
|
|
9
|
-
function intern(
|
|
10
|
+
function intern(operand) {
|
|
11
|
+
const operandAsObject = typeof operand === "object" && operand ? operand : {};
|
|
12
|
+
const value = operand.value;
|
|
10
13
|
let idx = constantsMap.get(value);
|
|
11
14
|
if (typeof idx !== "number") {
|
|
12
15
|
idx = constants.length;
|
|
13
16
|
constantsMap.set(value, idx);
|
|
14
17
|
constants.push(value);
|
|
15
18
|
}
|
|
16
|
-
|
|
19
|
+
const newOperand = {
|
|
20
|
+
...operandAsObject,
|
|
21
|
+
type: "number",
|
|
22
|
+
resolvedValue: idx
|
|
23
|
+
};
|
|
24
|
+
return newOperand;
|
|
17
25
|
}
|
|
18
26
|
const resolved = [];
|
|
19
27
|
for (const instr of bc) {
|
|
20
|
-
const [op,
|
|
21
|
-
|
|
22
|
-
|
|
28
|
+
const [op, ...operands] = instr;
|
|
29
|
+
const hasConstant = operands.some(o => o !== undefined && o !== null && typeof o === "object" && o.type === "constant");
|
|
30
|
+
if (hasConstant) {
|
|
31
|
+
const newOperands = operands.map(operand => operand?.type === "constant" ? intern(operand) : operand);
|
|
32
|
+
const newInstr = [op, ...newOperands];
|
|
23
33
|
newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
|
|
24
34
|
resolved.push(newInstr);
|
|
25
35
|
} else {
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
// --- Label IR ---
|
|
2
|
+
// During compilation, jump targets are symbolic labels instead of hard-coded
|
|
3
|
+
// PC numbers. Two IR "pseudo operands" carry the label information:
|
|
4
|
+
//
|
|
5
|
+
// defineLabel operand : [null, {type:"defineLabel", label:"FN_ENTRY_1"}]
|
|
6
|
+
// Marks a position in the bytecode array.
|
|
7
|
+
// resolveLabels() strips these out entirely.
|
|
8
|
+
//
|
|
9
|
+
// label ref operand : [OP.JUMP, {type:"label", label:"FN_ENTRY_1"}]
|
|
10
|
+
// Used as the operand of any jump instruction. resolveLabels() replaces
|
|
11
|
+
// it with the integer PC that the corresponding defineLabel resolves to.
|
|
12
|
+
//
|
|
13
|
+
// The output bytecode is still a nested array of instructions.
|
|
14
|
+
// Flattening (one u16 slot per op, one per operand) happens in the Serializer.
|
|
15
|
+
// PC values computed here reflect the FLAT slot index so that jump targets,
|
|
16
|
+
// startPc, and LOAD_INT label operands are all correct after flattening.
|
|
17
|
+
|
|
18
|
+
import { SOURCE_NODE_SYM } from "../../compiler.js";
|
|
19
|
+
|
|
20
|
+
// Resolve symbolic labels to absolute flat-PC indices within a bytecode array.
|
|
21
|
+
// defineLabel pseudo-instructions are stripped; label-ref operands become ints.
|
|
22
|
+
// Each instruction [op, ...operands] occupies (1 + operands.length) flat slots,
|
|
23
|
+
// so realPc advances by instr.length for every non-pseudo instruction.
|
|
24
|
+
export function resolveLabels(bc, compiler) {
|
|
25
|
+
// Pass 1 – walk the array and record each label's flat PC, counting
|
|
26
|
+
// real instructions by their full flat width (1 op + N operands).
|
|
27
|
+
const labelToPc = new Map();
|
|
28
|
+
let realPc = 0;
|
|
29
|
+
for (const instr of bc) {
|
|
30
|
+
const op = instr[0];
|
|
31
|
+
const operand = instr[1];
|
|
32
|
+
if (op === null && operand !== null && typeof operand === "object" && operand.type === "defineLabel") {
|
|
33
|
+
labelToPc.set(operand.label, realPc);
|
|
34
|
+
} else {
|
|
35
|
+
// Each instruction occupies 1 slot for the opcode + 1 per operand.
|
|
36
|
+
// IMPORTANT: 'placeholder' operands are not counted
|
|
37
|
+
realPc += instr.filter(x => x?.placeholder !== true).length;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Pass 2 – build the resolved instruction list.
|
|
42
|
+
// Label refs may appear at any operand position, so scan all of them.
|
|
43
|
+
const resolved = [];
|
|
44
|
+
for (const instr of bc) {
|
|
45
|
+
const [op, ...operands] = instr;
|
|
46
|
+
|
|
47
|
+
// Strip defineLabel pseudo-ops.
|
|
48
|
+
if (op === null && typeof operands[0] === "object" && operands[0]?.type === "defineLabel") {
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Replace label-ref operands with their resolved flat PC (any position).
|
|
53
|
+
const newOperands = operands.map(operand => {
|
|
54
|
+
if (operand !== undefined && operand !== null && typeof operand === "object" && operand.type === "label") {
|
|
55
|
+
const pc = labelToPc.get(operand.label);
|
|
56
|
+
if (pc === undefined) throw new Error(`Undefined label: ${operand.label}`);
|
|
57
|
+
var operandAsObject = typeof operand === "object" && operand ? operand : {};
|
|
58
|
+
const newOperand = {
|
|
59
|
+
...operandAsObject,
|
|
60
|
+
// Preverse original operand properties
|
|
61
|
+
type: "number",
|
|
62
|
+
resolvedValue: pc + (operand.offset ?? 0)
|
|
63
|
+
};
|
|
64
|
+
return newOperand;
|
|
65
|
+
}
|
|
66
|
+
return operand;
|
|
67
|
+
});
|
|
68
|
+
const newInstr = [op, ...newOperands];
|
|
69
|
+
newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
|
|
70
|
+
resolved.push(newInstr);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Patch each function descriptor's startPc now that labels are resolved.
|
|
74
|
+
for (const desc of compiler.fnDescriptors) {
|
|
75
|
+
desc.startPc = labelToPc.get(desc.startLabel) ?? labelToPc.get(desc.entryLabel);
|
|
76
|
+
}
|
|
77
|
+
return {
|
|
78
|
+
bytecode: resolved
|
|
79
|
+
};
|
|
80
|
+
}
|
|
@@ -1,21 +1,22 @@
|
|
|
1
|
+
import { choice } from "../utils/random-utils.js";
|
|
1
2
|
export function selfModifying(bc, compiler) {
|
|
2
3
|
// Walk the bytecode looking for "defineLabel" pseudo-ops, which start basic
|
|
3
4
|
// blocks. For each block we collect the body (instructions between the label
|
|
4
5
|
// and the next label/jump terminator), move it to the end of the bytecode
|
|
5
6
|
// under a fresh "patch_LXX" label, and replace it in-place with:
|
|
6
7
|
//
|
|
7
|
-
// defineLabel ("originalLabel")
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
// PATCH { label: originalLabel, offset: 3 } ← destPc = L+3
|
|
11
|
-
// LOAD_INT 0 × N ← N placeholder instructions
|
|
8
|
+
// defineLabel ("originalLabel") ← kept as-is (pseudo-op)
|
|
9
|
+
// PATCH destPc sliceStart sliceEnd ← 4 flat slots total
|
|
10
|
+
// Garbage Opcodes × bodyFlatSize ← placeholder slots
|
|
12
11
|
//
|
|
13
|
-
// PATCH
|
|
14
|
-
//
|
|
15
|
-
//
|
|
16
|
-
//
|
|
17
|
-
//
|
|
18
|
-
//
|
|
12
|
+
// PATCH reads three inline operands via _operand():
|
|
13
|
+
// destPc = originalLabel + 4 (first slot after PATCH's own 4 slots)
|
|
14
|
+
// sliceStart = patchLabel (flat PC of appended body)
|
|
15
|
+
// sliceEnd = patchLabel + bodyFlatSize
|
|
16
|
+
//
|
|
17
|
+
// On first execution PATCH copies bytecode[sliceStart..sliceEnd) over the
|
|
18
|
+
// placeholder region starting at destPc. Execution then falls through into
|
|
19
|
+
// the freshly-patched body. Subsequent calls are idempotent.
|
|
19
20
|
|
|
20
21
|
const {
|
|
21
22
|
OP,
|
|
@@ -45,9 +46,8 @@ export function selfModifying(bc, compiler) {
|
|
|
45
46
|
break;
|
|
46
47
|
}
|
|
47
48
|
|
|
48
|
-
// Jump instructions, RETURN
|
|
49
|
-
|
|
50
|
-
if (nextOp !== null && (JUMP_OPS.has(nextOp) || nextOp === OP.RETURN || nextOp === OP.DATA)) {
|
|
49
|
+
// Jump instructions, RETURN all terminate the body.
|
|
50
|
+
if (nextOp !== null && (JUMP_OPS.has(nextOp) || nextOp === OP.RETURN)) {
|
|
51
51
|
break;
|
|
52
52
|
}
|
|
53
53
|
j++;
|
|
@@ -60,31 +60,31 @@ export function selfModifying(bc, compiler) {
|
|
|
60
60
|
}
|
|
61
61
|
const patchLabel = `patch_${originalLabel}_${patchCount++}`;
|
|
62
62
|
|
|
63
|
-
//
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
//
|
|
67
|
-
//
|
|
68
|
-
|
|
63
|
+
// Flat size of the body (each instruction occupies instr.length slots).
|
|
64
|
+
const bodyFlatSize = body.reduce((acc, instr) => acc + instr.filter(x => x?.placeholder !== true).length, 0);
|
|
65
|
+
|
|
66
|
+
// ── PATCH instruction (4 flat slots: opcode + 3 operands) ───────────
|
|
67
|
+
// destPc = originalLabel + 4 (slot right after PATCH's 4 slots)
|
|
68
|
+
// sliceStart = patchLabel
|
|
69
|
+
// sliceEnd = patchLabel + bodyFlatSize
|
|
70
|
+
result.push([OP.PATCH, {
|
|
69
71
|
type: "label",
|
|
70
|
-
label:
|
|
71
|
-
offset:
|
|
72
|
-
}
|
|
73
|
-
result.push([OP.LOAD_INT, {
|
|
72
|
+
label: originalLabel,
|
|
73
|
+
offset: 4
|
|
74
|
+
}, {
|
|
74
75
|
type: "label",
|
|
75
76
|
label: patchLabel
|
|
76
|
-
}
|
|
77
|
-
result.push([OP.PATCH, {
|
|
77
|
+
}, {
|
|
78
78
|
type: "label",
|
|
79
|
-
label:
|
|
80
|
-
offset:
|
|
79
|
+
label: patchLabel,
|
|
80
|
+
offset: bodyFlatSize
|
|
81
81
|
}]);
|
|
82
82
|
|
|
83
|
-
// ── Placeholders (
|
|
84
|
-
// These are overwritten by PATCH on
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
result.push([
|
|
83
|
+
// ── Placeholders (Garbage Opcodes * bodyFlatSize, each 1 flat slot) ────────────
|
|
84
|
+
// These are overwritten by PATCH on first execution.
|
|
85
|
+
for (let p = 0; p < bodyFlatSize; p++) {
|
|
86
|
+
const randomOpcode = choice(Object.values(compiler.OP));
|
|
87
|
+
result.push([+randomOpcode]);
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
// ── Append real body at end ─────────────────────────────────────────
|