phantomrt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. atlas/__init__.py +3 -0
  2. atlas/agents/__init__.py +8 -0
  3. atlas/agents/command_space.py +227 -0
  4. atlas/analysis/__init__.py +3 -0
  5. atlas/analysis/binary_agent.py +488 -0
  6. atlas/analysis/binary_fuzz.py +389 -0
  7. atlas/analysis/frida_live.py +261 -0
  8. atlas/analysis/graph_annotator.py +147 -0
  9. atlas/analysis/spectrida_bridge.py +84 -0
  10. atlas/analysis/unicorn_harness.py +337 -0
  11. atlas/core/__init__.py +14 -0
  12. atlas/core/decoder.py +65 -0
  13. atlas/core/dynamics.py +217 -0
  14. atlas/core/encoder.py +120 -0
  15. atlas/core/surprise.py +145 -0
  16. atlas/core/world_model.py +334 -0
  17. atlas/environments/__init__.py +5 -0
  18. atlas/environments/base.py +51 -0
  19. atlas/environments/grid_world.py +219 -0
  20. atlas/environments/physics_2d.py +283 -0
  21. atlas/environments/vm_world.py +168 -0
  22. atlas/knowledge/__init__.py +3 -0
  23. atlas/knowledge/instruction_vocab.py +534 -0
  24. atlas/monitor/__init__.py +5 -0
  25. atlas/monitor/execution_monitor.py +518 -0
  26. atlas/optimization/__init__.py +6 -0
  27. atlas/optimization/speed.py +457 -0
  28. atlas/planning/__init__.py +4 -0
  29. atlas/planning/goal.py +100 -0
  30. atlas/planning/mcts.py +228 -0
  31. atlas/training/__init__.py +4 -0
  32. atlas/training/continual.py +392 -0
  33. atlas/training/growth.py +213 -0
  34. atlas/training/loop.py +306 -0
  35. atlas/training/losses.py +101 -0
  36. atlas/training/self_train.py +307 -0
  37. atlas/utils/__init__.py +4 -0
  38. atlas/utils/logging.py +33 -0
  39. atlas/utils/math_helpers.py +30 -0
  40. atlas/utils/viz.py +136 -0
  41. atlas/vm/__init__.py +4 -0
  42. atlas/vm/wsl_vm.py +249 -0
  43. phantomrt-0.1.0.dist-info/METADATA +75 -0
  44. phantomrt-0.1.0.dist-info/RECORD +48 -0
  45. phantomrt-0.1.0.dist-info/WHEEL +5 -0
  46. phantomrt-0.1.0.dist-info/entry_points.txt +3 -0
  47. phantomrt-0.1.0.dist-info/licenses/LICENSE +21 -0
  48. phantomrt-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,389 @@
1
+ """
2
+ BinaryFuzzEnv — point Atlas at ONE binary and let it hunt crashes.
3
+
4
+ This is the target-binary variant of the VM world: the action is an INPUT fed to
5
+ the target, the outcome is (crash? which signal? which functions ran?), and the
6
+ reward is coverage-guided — inputs that reach NEW functions or NEW crashes are
7
+ worth more. The world model learns input -> behavior and curiosity steers toward
8
+ unexplored code + crashes.
9
+
10
+ Coverage is function-level, via gcc `-finstrument-functions` plus a small shim
11
+ that records each function entered and dumps the set on exit OR on a fatal signal
12
+ (so crashing runs still report the path that led to the crash). Function-level
13
+ coverage is deliberate: it is exactly the granularity a spectrIDA-style function
14
+ graph speaks, so this plugs into that later.
15
+
16
+ Works on:
17
+ * a provided C source (compiled here, instrumented), or
18
+ * the built-in vulnerable demo target (default).
19
+ Prebuilt binaries can't be function-instrumented this way — for those you'd fall
20
+ back to black-box (crash-only) coverage; that's a documented follow-on.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import base64
26
+ import math
27
+ from collections import Counter
28
+ from dataclasses import dataclass, field
29
+
30
+ import numpy as np
31
+
32
+ FUZZ_ACTION_DIM = 24
33
+ FUZZ_STATE_DIM = 20
34
+
35
+ # ── coverage shim (compiled WITHOUT instrumentation; dumps on exit or signal) ─
36
+ _COV_SHIM = r"""
37
+ #define _GNU_SOURCE
38
+ #include <stdio.h>
39
+ #include <stdlib.h>
40
+ #include <signal.h>
41
+ #include <unistd.h>
42
+ static void* seen[8192]; static int nseen = 0;
43
+ __attribute__((no_instrument_function)) static void dump(void){
44
+ const char* p = getenv("COV_FILE"); if(!p) p = "/tmp/atlas_fuzz/cov";
45
+ FILE* f = fopen(p, "w"); if(!f) return;
46
+ for(int i=0;i<nseen;i++) fprintf(f, "%p\n", seen[i]);
47
+ fclose(f);
48
+ }
49
+ __attribute__((no_instrument_function)) static void onsig(int s){ dump(); _exit(128+s); }
50
+ __attribute__((no_instrument_function)) __attribute__((constructor))
51
+ static void init(void){
52
+ atexit(dump);
53
+ signal(SIGSEGV,onsig); signal(SIGABRT,onsig);
54
+ signal(SIGBUS,onsig); signal(SIGFPE,onsig);
55
+ }
56
+ __attribute__((no_instrument_function))
57
+ void __cyg_profile_func_enter(void* fn, void* site){
58
+ for(int i=0;i<nseen;i++) if(seen[i]==fn) return;
59
+ if(nseen<8192) seen[nseen++]=fn;
60
+ }
61
+ __attribute__((no_instrument_function))
62
+ void __cyg_profile_func_exit(void* fn, void* site){ (void)fn; (void)site; }
63
+ """
64
+
65
+ # ── built-in vulnerable demo target (compiles on modern glibc; has structure) ─
66
+ DEFAULT_TARGET = r"""
67
+ #include <stdio.h>
68
+ #include <string.h>
69
+ #include <stdlib.h>
70
+ static void handle_a(const char* s){ char b[16]; strcpy(b, s); printf("A:%s\n", b); }
71
+ static void handle_b(const char* s){ printf("B:len=%zu\n", strlen(s)); }
72
+ static void handle_c(const char* s){ if(s[0]) printf(s); putchar('\n'); }
73
+ static void handle_d(const char* s){ int n = atoi(s+1); char* p = malloc(n>0?n:1);
74
+ if(p){ memset(p, 'x', n); free(p); } printf("D:%d\n", n); }
75
+ static int route(const char* s){
76
+ if(!s[0]) return 0;
77
+ switch(s[0]){
78
+ case 'A': handle_a(s+1); return 1; /* stack overflow on long input */
79
+ case 'B': handle_b(s+1); return 2;
80
+ case 'C': handle_c(s+1); return 3; /* format string */
81
+ case 'D': handle_d(s); return 4; /* size-driven alloc */
82
+ default: return -1;
83
+ }
84
+ }
85
+ int main(void){ char in[256];
86
+ if(!fgets(in, sizeof(in), stdin)) return 0;
87
+ in[strcspn(in, "\n")] = 0;
88
+ return route(in) >= 0 ? 0 : 2;
89
+ }
90
+ """
91
+
92
+
93
+ # ── action token: "mode|base64(payload)" ─────────────────────────────────────
94
+ def make_token(payload: bytes, mode: str = "stdin") -> str:
95
+ return f"{mode}|{base64.b64encode(payload).decode()}"
96
+
97
+
98
+ def parse_token(token: str) -> tuple[str, bytes]:
99
+ mode, b64 = token.split("|", 1)
100
+ return mode, base64.b64decode(b64)
101
+
102
+
103
+ _FMT = (b"%s", b"%n", b"%x", b"%p")
104
+
105
+
106
+ def embed_input(token: str) -> np.ndarray:
107
+ """Structured, compositional embedding of an input (so the model generalizes
108
+ across inputs instead of memorizing exact byte strings)."""
109
+ mode, p = parse_token(token)
110
+ v = np.zeros(FUZZ_ACTION_DIM, dtype=np.float32)
111
+ n = len(p)
112
+ v[0] = 1.0 if mode == "stdin" else 0.0
113
+ v[1] = 1.0 if mode == "argv" else 0.0
114
+ v[2] = min(n / 256.0, 1.0)
115
+ if n:
116
+ arr = np.frombuffer(p, dtype=np.uint8)
117
+ v[3] = np.mean((arr >= 32) & (arr < 127)) # printable
118
+ v[4] = np.mean((arr >= 48) & (arr < 58)) # digits
119
+ v[5] = np.mean(((arr >= 65) & (arr < 91)) | ((arr >= 97) & (arr < 123))) # letters
120
+ v[6] = np.mean(arr == 0) # nulls
121
+ v[7] = np.mean(arr > 127) # high bytes
122
+ v[8] = np.mean((arr == 32) | (arr == 9) | (arr == 10)) # whitespace
123
+ # longest run of a single byte (repetition signature)
124
+ run = best = 1
125
+ for i in range(1, n):
126
+ run = run + 1 if arr[i] == arr[i-1] else 1
127
+ best = max(best, run)
128
+ v[9] = min(best / 64.0, 1.0)
129
+ counts = np.bincount(arr, minlength=256) / n
130
+ v[10] = float(-np.sum(counts[counts > 0] * np.log2(counts[counts > 0]))) / 8.0
131
+ v[11] = min(sum(p.count(f) for f in _FMT) / 4.0, 1.0) # format specifiers
132
+ v[12] = 1.0 if p[:1] in (b"A", b"B", b"C", b"D") else 0.0 # routes to a handler
133
+ v[13] = 1.0 if n == 0 else 0.0
134
+ v[14] = 1.0 if n in (15, 16, 17, 31, 32, 33, 63, 64, 65, 255, 256) else 0.0 # boundary
135
+ v[15] = 1.0 if b"\n" in p else 0.0
136
+ v[16] = 1.0 if (n and len(set(p)) == 1) else 0.0 # single repeated byte
137
+ return v
138
+
139
+
140
+ def input_family(token: str) -> str:
141
+ """Coarse input *type* — the unit of per-family competence/curiosity."""
142
+ _, p = parse_token(token)
143
+ if not p:
144
+ return "empty"
145
+ if any(f in p for f in _FMT):
146
+ return "format"
147
+ if len(p) >= 64:
148
+ return "long"
149
+ if len(set(p)) == 1:
150
+ return "repeat"
151
+ printable = sum(32 <= b < 127 for b in p) / len(p)
152
+ if printable < 0.7:
153
+ return "binary"
154
+ return "text"
155
+
156
+
157
+ @dataclass
158
+ class FuzzResult:
159
+ exit_code: int
160
+ stdout: str
161
+ cov_ids: frozenset
162
+ new_coverage: int
163
+ timed_out: bool = False
164
+ duration: float = 0.0
165
+
166
+ @property
167
+ def crashed(self) -> bool:
168
+ return self.exit_code >= 128 or self.exit_code in (134, 136, 139)
169
+
170
+ @property
171
+ def crash_kind(self) -> str:
172
+ return {139: "segv", 134: "abort", 136: "fpe", 135: "bus"}.get(
173
+ self.exit_code, "signal" if self.crashed else "")
174
+
175
+
176
+ # ── input proposer (coverage-guided corpus, AFL-style but unrestricted) ──────
177
+ class InputProposer:
178
+ def __init__(self, mode: str = "stdin", rng=None):
179
+ import random
180
+ self.mode = mode
181
+ self.rng = rng or random.Random(0)
182
+ # corpus of inputs that earned new coverage — the seeds for mutation
183
+ self.corpus: list[bytes] = [b"", b"A", b"BX", b"C%s", b"D8"]
184
+
185
+ def observe(self, token: str, result: FuzzResult) -> None:
186
+ if result.new_coverage > 0:
187
+ _, p = parse_token(token)
188
+ if p not in self.corpus and len(self.corpus) < 2000:
189
+ self.corpus.append(p)
190
+
191
+ def propose(self, n: int = 16) -> list[str]:
192
+ out = set()
193
+ strat = [self._seeded, self._long, self._format, self._boundary,
194
+ self._binary, self._mutate, self._route_prefix, self._empty]
195
+ guard = 0
196
+ while len(out) < n and guard < n * 6:
197
+ guard += 1
198
+ try:
199
+ out.add(make_token(self.rng.choice(strat)(), self.mode))
200
+ except Exception:
201
+ pass
202
+ return list(out)[:n]
203
+
204
+ def _seeded(self):
205
+ return self.rng.choice(self.corpus)
206
+
207
+ def _long(self):
208
+ c = bytes([self.rng.randint(65, 90)])
209
+ return c * self.rng.randint(20, 300)
210
+
211
+ def _format(self):
212
+ return self.rng.choice([b"C", b""]) + b"".join(
213
+ self.rng.choice(_FMT) for _ in range(self.rng.randint(1, 8)))
214
+
215
+ def _boundary(self):
216
+ pre = self.rng.choice([b"A", b"B", b"C", b"D", b""])
217
+ return pre + b"A" * self.rng.choice([14, 15, 16, 17, 31, 32, 33, 63, 64, 65])
218
+
219
+ def _binary(self):
220
+ return bytes(self.rng.randint(0, 255) for _ in range(self.rng.randint(1, 64)))
221
+
222
+ def _mutate(self):
223
+ base = bytearray(self.rng.choice(self.corpus) or b"A")
224
+ for _ in range(self.rng.randint(1, 4)):
225
+ if not base:
226
+ base.append(self.rng.randint(0, 255)); continue
227
+ op = self.rng.randint(0, 2)
228
+ i = self.rng.randrange(len(base))
229
+ if op == 0:
230
+ base[i] = self.rng.randint(0, 255) # flip
231
+ elif op == 1:
232
+ base.insert(i, self.rng.randint(0, 255)) # insert
233
+ else:
234
+ del base[i] # delete
235
+ return bytes(base)
236
+
237
+ def _route_prefix(self):
238
+ return self.rng.choice([b"A", b"B", b"C", b"D"]) + bytes(
239
+ self.rng.randint(32, 126) for _ in range(self.rng.randint(0, 40)))
240
+
241
+ def _empty(self):
242
+ return b""
243
+
244
+
245
+ # ── the environment ──────────────────────────────────────────────────────────
246
+ class BinaryFuzzEnv:
247
+ """Atlas's crash-hunting environment for a single target binary."""
248
+
249
+ WORKDIR = "/tmp/atlas_fuzz"
250
+
251
+ def __init__(self, vm, source: str | None = None, mode: str = "stdin",
252
+ timeout: int = 4, log=print):
253
+ self.vm = vm
254
+ self.mode = mode
255
+ self.timeout = timeout
256
+ self.log = log
257
+ self.source = source if source is not None else DEFAULT_TARGET
258
+
259
+ self.covered_global: set = set() # all functions ever reached
260
+ self.crash_inputs: dict[str, bytes] = {} # crash_kind+path -> input
261
+ self.seen: Counter = Counter() # behavior signatures (coverage metric)
262
+ self._last = np.zeros(FUZZ_STATE_DIM, dtype=np.float32)
263
+ self.recoveries = 0 # binaries crash w/o bricking the VM
264
+ self.steps = 0
265
+ self._compile()
266
+
267
+ # ── setup ────────────────────────────────────────────────────────────────
268
+ def _compile(self):
269
+ d = self.WORKDIR
270
+ put = (
271
+ f"mkdir -p {d} && "
272
+ f"cat > {d}/target.c <<'ATLAS_EOF'\n{self.source}\nATLAS_EOF\n"
273
+ f"cat > {d}/cov.c <<'ATLAS_EOF'\n{_COV_SHIM}\nATLAS_EOF\n"
274
+ f"gcc -c -no-pie {d}/cov.c -o {d}/cov.o 2>{d}/cc.log && "
275
+ f"gcc -no-pie -fno-pie -finstrument-functions {d}/target.c {d}/cov.o "
276
+ f"-o {d}/target 2>>{d}/cc.log; echo RC:$?"
277
+ )
278
+ r = self.vm.run(put, timeout=60)
279
+ if "RC:0" not in r.stdout:
280
+ log = self.vm.run(f"cat {d}/cc.log").stdout
281
+ raise RuntimeError(f"target compile failed:\n{log}")
282
+ self.log(f"[fuzz] compiled instrumented target in VM ({d}/target)")
283
+
284
+ # ── BaseEnvironment-ish API (so SelfTrainer can drive it) ────────────────
285
+ def get_action_dim(self):
286
+ return FUZZ_ACTION_DIM
287
+
288
+ def get_observation_dim(self):
289
+ return FUZZ_STATE_DIM
290
+
291
+ def reset(self):
292
+ self._last = np.zeros(FUZZ_STATE_DIM, dtype=np.float32)
293
+ return self._last.copy()
294
+
295
+ def render(self):
296
+ return None
297
+
298
+ # ── run the target with an input, parse coverage + crash ─────────────────
299
+ def _execute(self, token: str, record: bool = True) -> FuzzResult:
300
+ mode, payload = parse_token(token)
301
+ b64 = base64.b64encode(payload).decode()
302
+ d = self.WORKDIR
303
+ run = f"{d}/target" if mode == "stdin" else f'{d}/target "$(cat {d}/in)"'
304
+ script = (
305
+ f"printf %s '{b64}' | base64 -d > {d}/in 2>/dev/null; "
306
+ f"COV_FILE={d}/cov timeout {self.timeout} {run} < {d}/in > {d}/out 2>&1; "
307
+ f"rc=$?; echo \"===RC:$rc\"; echo ===COV; sort -u {d}/cov 2>/dev/null; "
308
+ f"echo ===OUT; head -c 300 {d}/out"
309
+ )
310
+ r = self.vm.run(script, timeout=self.timeout + 5)
311
+ rc, cov_ids, out = self._parse(r.stdout)
312
+ new = 0
313
+ if record:
314
+ fresh = cov_ids - self.covered_global
315
+ new = len(fresh)
316
+ self.covered_global |= cov_ids
317
+ else:
318
+ new = len(cov_ids - self.covered_global)
319
+ return FuzzResult(rc, out, frozenset(cov_ids), new, timed_out=(rc == 124),
320
+ duration=r.duration)
321
+
322
+ @staticmethod
323
+ def _parse(s: str):
324
+ rc, cov, out = 0, set(), ""
325
+ try:
326
+ head, rest = s.split("===RC:", 1)
327
+ rc_str, rest = rest.split("\n", 1)
328
+ rc = int(rc_str.strip())
329
+ cov_block, out = rest.split("===OUT", 1)
330
+ cov_block = cov_block.split("===COV", 1)[-1]
331
+ cov = {ln.strip() for ln in cov_block.splitlines() if ln.strip()}
332
+ out = out.strip()
333
+ except Exception:
334
+ pass
335
+ return rc, cov, out
336
+
337
+ def step(self, token: str):
338
+ self.steps += 1
339
+ res = self._execute(token, record=True)
340
+ obs = self.featurize(token, res)
341
+ sig = self._signature(res)
342
+ self.seen[sig] += 1
343
+ reward = float(res.new_coverage) + (3.0 if res.crashed else 0.0)
344
+
345
+ if res.crashed:
346
+ _, payload = parse_token(token)
347
+ key = f"{res.crash_kind}:{len(res.cov_ids)}"
348
+ if key not in self.crash_inputs:
349
+ self.crash_inputs[key] = payload
350
+ self.log(f"[fuzz] CRASH ({res.crash_kind}) on {payload[:40]!r} "
351
+ f"(len={len(payload)}) — {len(self.crash_inputs)} unique so far")
352
+
353
+ self._last = obs
354
+ info = {"command": token, "result": res, "family": input_family(token),
355
+ "recovered": False, "coverage": len(self.covered_global),
356
+ "crashed": res.crashed}
357
+ return obs, reward, False, info
358
+
359
+ def run_probe(self, token: str):
360
+ return self.featurize(token, self._execute(token, record=False))
361
+
362
+ # ── featurization ────────────────────────────────────────────────────────
363
+ def featurize(self, token: str, res: FuzzResult) -> np.ndarray:
364
+ v = np.zeros(FUZZ_STATE_DIM, dtype=np.float32)
365
+ v[0] = max(-1.0, min(1.0, res.exit_code / 128.0))
366
+ v[1] = 1.0 if res.exit_code == 0 else 0.0
367
+ v[2] = 1.0 if res.crashed else 0.0
368
+ v[3] = 1.0 if res.timed_out else 0.0
369
+ v[4] = 1.0 if res.crash_kind == "segv" else 0.0
370
+ v[5] = 1.0 if res.crash_kind == "abort" else 0.0
371
+ v[6] = min(len(res.cov_ids) / 8.0, 1.0) # functions reached
372
+ v[7] = min(res.new_coverage / 4.0, 1.0) # NEW functions
373
+ v[8] = min(len(res.stdout) / 200.0, 1.0)
374
+ v[9] = 1.0 if res.stdout.strip() else 0.0
375
+ v[10] = min(len(self.covered_global) / 8.0, 1.0) # global progress
376
+ v[11] = min(res.duration / self.timeout, 1.0)
377
+ v[12] = 1.0 if "%" in res.stdout else 0.0
378
+ v[13] = 1.0 # bias
379
+ return v
380
+
381
+ def _signature(self, res: FuzzResult):
382
+ return (res.crash_kind or f"rc{res.exit_code}", len(res.cov_ids))
383
+
384
+ # ── reporting ────────────────────────────────────────────────────────────
385
+ def summary(self) -> dict:
386
+ return {"functions_covered": len(self.covered_global),
387
+ "unique_crashes": len(self.crash_inputs),
388
+ "crash_inputs": {k: v.hex() for k, v in self.crash_inputs.items()}}
389
+
@@ -0,0 +1,261 @@
1
+ """
2
+ Atlas Live — instrument the RUNNING process (Frida), not a vacuum.
3
+
4
+ Emulation (unicorn_harness) can't build the live state a real function needs, so
5
+ state-entangled functions come back `needs_state`. Live mode sidesteps that: the
6
+ real process already HAS the globals/heap/objects, so those functions actually run.
7
+
8
+ Two capabilities:
9
+ * trace(rvas) — passively hook functions, capture REAL args/returns/coverage
10
+ as the program runs its own workload.
11
+ * fuzz_function(rva) — call a function in-process with mutated inputs using the
12
+ live process state, and catch crashes.
13
+
14
+ Function identity is by RVA (module.base + offset) — the exact addressing the
15
+ spectrIDA graph stores (graph addr - image_base = rva), so a graph function maps
16
+ straight onto the live module with no guessing.
17
+
18
+ Crash model (learned the hard way): recovering in-process after a crash corrupts
19
+ the target and hangs. So fuzzing is spawn-per-crash — run inputs fast while the
20
+ process survives; when one crashes, record the REPRODUCING INPUT and respawn to
21
+ continue. Robust, and only pays the respawn cost when a crash actually happens.
22
+
23
+ Fault-address honesty: on Windows, reliably capturing the exact fault address from
24
+ inside a hardware exception is unreliable (WER interaction + the JS runtime being
25
+ in a bad state mid-fault). We tried four ways; all reliably DETECT the crash but
26
+ the address often comes back "detached". That's acceptable — the reproducing input
27
+ is the real artifact (re-run it under a debugger for the address), same as AFL.
28
+ When Frida does surface the address it's recorded; otherwise the input still is.
29
+
30
+ `frida` is an optional dependency; importing this module without it is fine, you
31
+ just can't construct FridaLiveTarget.
32
+
33
+ Honest limits: the target must actually run on this machine (so NOT Switch NSO);
34
+ for packed runtimes (Bun) the native functions are the runtime, not the app logic.
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import time
40
+ from dataclasses import dataclass, field
41
+
42
+ # The Frida JS agent. Generic + rpc-driven so one agent serves trace and fuzz.
43
+ _AGENT = r"""
44
+ // On Windows an unhandled access violation triggers Windows Error Reporting,
45
+ // which HANGS the crashing process (a WerFault child spawns and blocks). That
46
+ // both stalls fuzzing and hides the fault address. Suppress WER, then in the
47
+ // exception handler send the fault address, give the async message a beat to
48
+ // flush, and die cleanly. We do NOT try to recover in-process (redirecting past
49
+ // a fault lands on a corrupt stack → second fault); death + respawn is robust.
50
+ try {
51
+ const k32 = 'kernel32.dll';
52
+ const SetErrorMode = new NativeFunction(
53
+ Module.getExportByName(k32, 'SetErrorMode'), 'uint32', ['uint32']);
54
+ SetErrorMode(0x0001 | 0x0002 | 0x8000); // FAILCRITICALERRORS|NOGPFAULTERRORBOX|NOOPENFILEERRORBOX
55
+ const Sleep = new NativeFunction(
56
+ Module.getExportByName(k32, 'Sleep'), 'void', ['uint32']);
57
+ Process.setExceptionHandler(function (details) {
58
+ send({ t: 'crash', addr: details.address.toString(), kind: details.type });
59
+ Sleep(150); // let the async crash message flush before we die
60
+ return false; // WER suppressed → clean fast death, no hang
61
+ });
62
+ } catch (e) {
63
+ // non-Windows or missing export: fall back to OS-level detach crash reporting
64
+ Process.setExceptionHandler(function (details) {
65
+ send({ t: 'crash', addr: details.address.toString(), kind: details.type });
66
+ return false;
67
+ });
68
+ }
69
+
70
+ function fnAddr(rva) { return Process.mainModule.base.add(ptr(rva)); }
71
+
72
+ rpc.exports = {
73
+ base: function () { return Process.mainModule.base.toString(); },
74
+
75
+ hook: function (rva) {
76
+ Interceptor.attach(fnAddr(rva), {
77
+ onEnter: function (args) {
78
+ this.p = args[0];
79
+ try { this.s = args[0].readUtf8String(80); } catch (e) { this.s = null; }
80
+ },
81
+ onLeave: function (ret) {
82
+ send({ t: 'call', rva: rva, arg: this.s, ret: ret.toInt32() });
83
+ }
84
+ });
85
+ return true;
86
+ },
87
+
88
+ // call fn(char* input) -> int, with live process state present
89
+ callStr: function (rva, s) {
90
+ const fn = new NativeFunction(fnAddr(rva), 'int', ['pointer']);
91
+ return fn(Memory.allocUtf8String(s));
92
+ },
93
+
94
+ // call fn(char* buf, size_t len) -> int
95
+ callBuf: function (rva, hex) {
96
+ const bytes = [];
97
+ for (let i = 0; i < hex.length; i += 2) bytes.push(parseInt(hex.substr(i, 2), 16));
98
+ const buf = Memory.alloc(Math.max(bytes.length, 1));
99
+ buf.writeByteArray(bytes);
100
+ const fn = new NativeFunction(fnAddr(rva), 'int', ['pointer', 'uint']);
101
+ return fn(buf, bytes.length);
102
+ }
103
+ };
104
+ """
105
+
106
+
107
+ @dataclass
108
+ class LiveTrace:
109
+ rva: int
110
+ arg: str | None
111
+ ret: int
112
+
113
+
114
+ @dataclass
115
+ class LiveResult:
116
+ """Outcome of live-fuzzing one function."""
117
+ rva: int
118
+ calls: int = 0
119
+ crashes: list = field(default_factory=list) # [{input, addr, kind}]
120
+ returns: list = field(default_factory=list) # observed non-crash return values
121
+ respawns: int = 0
122
+
123
+ @property
124
+ def crashed(self) -> bool:
125
+ return bool(self.crashes)
126
+
127
+ def summary(self) -> dict:
128
+ return {"rva": hex(self.rva), "calls": self.calls,
129
+ "unique_crashes": len(self.crashes), "respawns": self.respawns,
130
+ "crash_inputs": [c["input"] for c in self.crashes]}
131
+
132
+
133
+ class FridaLiveTarget:
134
+ """Attach to / spawn a process and drive it via the Frida agent."""
135
+
136
+ def __init__(self, program: str | list[str], log=print):
137
+ import frida # optional dep — only needed to actually use live mode
138
+ self._frida = frida
139
+ self.program = program if isinstance(program, list) else [program]
140
+ self.log = log
141
+ self.device = frida.get_local_device()
142
+ self.pid = None
143
+ self.session = None
144
+ self.script = None
145
+ self._events: list = []
146
+ self._dead = False
147
+ self._last_crash = None # set by _on_message the instant a crash arrives
148
+
149
+ # ── lifecycle ────────────────────────────────────────────────────────────
150
+ def _on_message(self, message, data):
151
+ if message.get("type") == "send":
152
+ payload = message["payload"]
153
+ self._events.append(payload)
154
+ if payload.get("t") == "crash":
155
+ self._last_crash = payload # fault address, delivered while alive
156
+ elif message.get("type") == "error":
157
+ self.log(f"[live] agent error: {message.get('description')}")
158
+
159
+ def _on_detached(self, reason=None, crash=None, *a):
160
+ self._dead = True
161
+ # Frida hands us an OS-level Crash object when the process actually faulted
162
+ # (reliable fault address, unlike an in-agent handler racing teardown).
163
+ if crash is not None:
164
+ addr = getattr(crash, "address", None)
165
+ self._last_crash = {
166
+ "addr": hex(addr) if isinstance(addr, int) else str(addr),
167
+ "kind": getattr(crash, "signal_name", None) or reason or "crash",
168
+ }
169
+
170
+ def spawn(self) -> "FridaLiveTarget":
171
+ self._dead = False
172
+ self.pid = self.device.spawn(self.program)
173
+ self.session = self.device.attach(self.pid)
174
+ self.session.on("detached", self._on_detached)
175
+ self.script = self.session.create_script(_AGENT)
176
+ self.script.on("message", self._on_message)
177
+ self.script.load()
178
+ self.device.resume(self.pid)
179
+ return self
180
+
181
+ def close(self):
182
+ try:
183
+ if self.pid is not None:
184
+ self.device.kill(self.pid)
185
+ except Exception:
186
+ pass
187
+ self.pid = self.session = self.script = None
188
+
189
+ # ── passive tracing: what really runs, with real args ────────────────────
190
+ def trace(self, rvas: list[int], seconds: float = 2.0) -> list[LiveTrace]:
191
+ if self.script is None:
192
+ self.spawn()
193
+ for rva in rvas:
194
+ try:
195
+ self.script.exports_sync.hook(hex(rva))
196
+ except Exception as e:
197
+ self.log(f"[live] hook {hex(rva)} failed: {e}")
198
+ time.sleep(seconds)
199
+ out = []
200
+ for e in self._events:
201
+ if e.get("t") == "call":
202
+ out.append(LiveTrace(int(e["rva"], 16), e.get("arg"), e.get("ret")))
203
+ return out
204
+
205
+ # ── in-process fuzzing with spawn-per-crash ──────────────────────────────
206
+ def fuzz_function(self, rva: int, inputs, arg_mode: str = "str",
207
+ call_timeout: float = 3.0) -> LiveResult:
208
+ """Call the function at `rva` with each input, using live process state.
209
+ On a crash the process dies and is respawned to continue. `arg_mode`:
210
+ "str" → fn(char*); "buf" → fn(char*, len)."""
211
+ res = LiveResult(rva=rva)
212
+ if self.script is None:
213
+ self.spawn()
214
+ seen_crash = set()
215
+
216
+ for inp in inputs:
217
+ if self._dead or self.script is None:
218
+ self._respawn(res)
219
+ payload = inp if isinstance(inp, (bytes, bytearray)) else str(inp).encode()
220
+ self._last_crash = None
221
+ ret, detached = None, False
222
+ try:
223
+ if arg_mode == "buf":
224
+ ret = self.script.exports_sync.call_buf(hex(rva), payload.hex())
225
+ else:
226
+ ret = self.script.exports_sync.call_str(hex(rva), payload.decode("latin-1"))
227
+ except Exception:
228
+ detached = True # session dropped mid-call = the process faulted
229
+ time.sleep(0.05) # let the 'detached' crash-report callback land
230
+
231
+ # a crash is an outright detach; its fault address comes from Frida's
232
+ # OS-level Crash object captured in _on_detached.
233
+ crash = self._last_crash
234
+ if crash or detached:
235
+ key = crash["addr"] if crash else "detached"
236
+ if key not in seen_crash:
237
+ seen_crash.add(key)
238
+ res.crashes.append({
239
+ "input": payload.hex(), "addr": key,
240
+ "kind": crash["kind"] if crash else "detached",
241
+ })
242
+ self.log(f"[live] CRASH @ {key} on {payload[:32]!r} "
243
+ f"({len(res.crashes)} unique)")
244
+ self._respawn(res) # post-fault state is corrupt → fresh process
245
+ else:
246
+ res.calls += 1
247
+ if len(res.returns) < 50:
248
+ res.returns.append(ret)
249
+ return res
250
+
251
+ def _respawn(self, res: LiveResult):
252
+ self.close()
253
+ res.respawns += 1
254
+ self._events = []
255
+ self.spawn()
256
+
257
+
258
+ def rva_from_graph_addr(graph_addr: int, image_base: int) -> int:
259
+ """spectrIDA graph stores absolute VAs; a live module is relocated (ASLR).
260
+ The stable identity is the RVA = addr - image_base."""
261
+ return graph_addr - image_base