phantomrt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas/__init__.py +3 -0
- atlas/agents/__init__.py +8 -0
- atlas/agents/command_space.py +227 -0
- atlas/analysis/__init__.py +3 -0
- atlas/analysis/binary_agent.py +488 -0
- atlas/analysis/binary_fuzz.py +389 -0
- atlas/analysis/frida_live.py +261 -0
- atlas/analysis/graph_annotator.py +147 -0
- atlas/analysis/spectrida_bridge.py +84 -0
- atlas/analysis/unicorn_harness.py +337 -0
- atlas/core/__init__.py +14 -0
- atlas/core/decoder.py +65 -0
- atlas/core/dynamics.py +217 -0
- atlas/core/encoder.py +120 -0
- atlas/core/surprise.py +145 -0
- atlas/core/world_model.py +334 -0
- atlas/environments/__init__.py +5 -0
- atlas/environments/base.py +51 -0
- atlas/environments/grid_world.py +219 -0
- atlas/environments/physics_2d.py +283 -0
- atlas/environments/vm_world.py +168 -0
- atlas/knowledge/__init__.py +3 -0
- atlas/knowledge/instruction_vocab.py +534 -0
- atlas/monitor/__init__.py +5 -0
- atlas/monitor/execution_monitor.py +518 -0
- atlas/optimization/__init__.py +6 -0
- atlas/optimization/speed.py +457 -0
- atlas/planning/__init__.py +4 -0
- atlas/planning/goal.py +100 -0
- atlas/planning/mcts.py +228 -0
- atlas/training/__init__.py +4 -0
- atlas/training/continual.py +392 -0
- atlas/training/growth.py +213 -0
- atlas/training/loop.py +306 -0
- atlas/training/losses.py +101 -0
- atlas/training/self_train.py +307 -0
- atlas/utils/__init__.py +4 -0
- atlas/utils/logging.py +33 -0
- atlas/utils/math_helpers.py +30 -0
- atlas/utils/viz.py +136 -0
- atlas/vm/__init__.py +4 -0
- atlas/vm/wsl_vm.py +249 -0
- phantomrt-0.1.0.dist-info/METADATA +75 -0
- phantomrt-0.1.0.dist-info/RECORD +48 -0
- phantomrt-0.1.0.dist-info/WHEEL +5 -0
- phantomrt-0.1.0.dist-info/entry_points.txt +3 -0
- phantomrt-0.1.0.dist-info/licenses/LICENSE +21 -0
- phantomrt-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Full Execution Monitor — Catches EVERYTHING
|
|
3
|
+
|
|
4
|
+
Hooks into a running binary and records:
|
|
5
|
+
- Every instruction executed
|
|
6
|
+
- Register state before/after each instruction
|
|
7
|
+
- Memory reads and writes (addresses + values)
|
|
8
|
+
- Stack pointer changes
|
|
9
|
+
- Syscalls
|
|
10
|
+
- Branch decisions (taken/not taken)
|
|
11
|
+
- Crash signals (segfault, stack smash, etc.)
|
|
12
|
+
|
|
13
|
+
This is the model's "eyes" — it sees everything
|
|
14
|
+
the binary does, instruction by instruction.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import struct
|
|
18
|
+
import json
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import Optional
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from collections import defaultdict
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class RegisterState:
|
|
27
|
+
"""Snapshot of CPU registers at a point in time."""
|
|
28
|
+
rax: int = 0
|
|
29
|
+
rbx: int = 0
|
|
30
|
+
rcx: int = 0
|
|
31
|
+
rdx: int = 0
|
|
32
|
+
rsi: int = 0
|
|
33
|
+
rdi: int = 0
|
|
34
|
+
rbp: int = 0
|
|
35
|
+
rsp: int = 0
|
|
36
|
+
rip: int = 0
|
|
37
|
+
r8: int = 0
|
|
38
|
+
r9: int = 0
|
|
39
|
+
r10: int = 0
|
|
40
|
+
r11: int = 0
|
|
41
|
+
r12: int = 0
|
|
42
|
+
r13: int = 0
|
|
43
|
+
r14: int = 0
|
|
44
|
+
r15: int = 0
|
|
45
|
+
rflags: int = 0
|
|
46
|
+
|
|
47
|
+
# SIMD/FP registers (important for some exploits)
|
|
48
|
+
xmm0: int = 0
|
|
49
|
+
xmm1: int = 0
|
|
50
|
+
|
|
51
|
+
def to_dict(self) -> dict:
|
|
52
|
+
return {k: v for k, v in self.__dict__.items()}
|
|
53
|
+
|
|
54
|
+
def diff(self, other: 'RegisterState') -> list:
|
|
55
|
+
"""Find which registers changed between two states."""
|
|
56
|
+
changes = []
|
|
57
|
+
for attr in vars(self):
|
|
58
|
+
if getattr(self, attr) != getattr(other, attr):
|
|
59
|
+
changes.append(attr)
|
|
60
|
+
return changes
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class MemoryEvent:
|
|
65
|
+
"""A single memory access event."""
|
|
66
|
+
address: int
|
|
67
|
+
size: int # bytes
|
|
68
|
+
value: Optional[int] = None
|
|
69
|
+
event_type: str = "read" # read, write, execute
|
|
70
|
+
is_stack: bool = False
|
|
71
|
+
is_heap: bool = False
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class InstructionTrace:
|
|
76
|
+
"""Complete trace of a single instruction execution."""
|
|
77
|
+
address: int
|
|
78
|
+
mnemonic: str # e.g., "mov", "push", "call"
|
|
79
|
+
operands: str # e.g., "eax, ebx"
|
|
80
|
+
raw_bytes: bytes
|
|
81
|
+
register_before: RegisterState
|
|
82
|
+
register_after: RegisterState
|
|
83
|
+
memory_events: list # list of MemoryEvent
|
|
84
|
+
is_branch: bool = False
|
|
85
|
+
branch_taken: bool = False
|
|
86
|
+
is_syscall: bool = False
|
|
87
|
+
is_call: bool = False
|
|
88
|
+
is_return: bool = False
|
|
89
|
+
is_crash: bool = False
|
|
90
|
+
crash_type: str = ""
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ExecutionTrace:
|
|
95
|
+
"""Complete execution trace from start to finish."""
|
|
96
|
+
instructions: list = field(default_factory=list)
|
|
97
|
+
crash: bool = False
|
|
98
|
+
crash_type: str = ""
|
|
99
|
+
crash_address: int = 0
|
|
100
|
+
total_instructions: int = 0
|
|
101
|
+
total_memory_reads: int = 0
|
|
102
|
+
total_memory_writes: int = 0
|
|
103
|
+
stack_depth_max: int = 0
|
|
104
|
+
input_used: bytes = b""
|
|
105
|
+
|
|
106
|
+
# Statistics
|
|
107
|
+
unique_mnemonics: dict = field(default_factory=lambda: defaultdict(int))
|
|
108
|
+
branch_count: int = 0
|
|
109
|
+
syscall_count: int = 0
|
|
110
|
+
|
|
111
|
+
def summary(self) -> dict:
|
|
112
|
+
return {
|
|
113
|
+
"total_instructions": self.total_instructions,
|
|
114
|
+
"total_memory_reads": self.total_memory_reads,
|
|
115
|
+
"total_memory_writes": self.total_memory_writes,
|
|
116
|
+
"stack_depth_max": self.stack_depth_max,
|
|
117
|
+
"crash": self.crash,
|
|
118
|
+
"crash_type": self.crash_type,
|
|
119
|
+
"unique_mnemonics": dict(self.unique_mnemonics),
|
|
120
|
+
"branch_count": self.branch_count,
|
|
121
|
+
"syscall_count": self.syscall_count,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
def to_dict(self) -> dict:
|
|
125
|
+
"""Serialize for the world model."""
|
|
126
|
+
return {
|
|
127
|
+
"instructions": [
|
|
128
|
+
{
|
|
129
|
+
"address": inst.address,
|
|
130
|
+
"mnemonic": inst.mnemonic,
|
|
131
|
+
"operands": inst.operands,
|
|
132
|
+
"is_branch": inst.is_branch,
|
|
133
|
+
"branch_taken": inst.branch_taken,
|
|
134
|
+
"is_call": inst.is_call,
|
|
135
|
+
"is_return": inst.is_return,
|
|
136
|
+
"is_syscall": inst.is_syscall,
|
|
137
|
+
"is_crash": inst.is_crash,
|
|
138
|
+
"registers": inst.register_after.to_dict(),
|
|
139
|
+
"memory_events": [
|
|
140
|
+
{"address": e.address, "size": e.size, "type": e.event_type,
|
|
141
|
+
"is_stack": e.is_stack, "is_heap": e.is_heap}
|
|
142
|
+
for e in inst.memory_events
|
|
143
|
+
],
|
|
144
|
+
}
|
|
145
|
+
for inst in self.instructions[-500:] # last 500 instructions
|
|
146
|
+
],
|
|
147
|
+
"summary": self.summary(),
|
|
148
|
+
"input_used": self.input_used.hex(),
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class ExecutionMonitor:
|
|
153
|
+
"""
|
|
154
|
+
Full execution monitor that captures everything.
|
|
155
|
+
|
|
156
|
+
Can work with:
|
|
157
|
+
1. Unicorn Engine (emulated execution — cross-platform)
|
|
158
|
+
2. Windows Debug API (native execution — Windows only)
|
|
159
|
+
3. ptrace (native execution — Linux only)
|
|
160
|
+
|
|
161
|
+
Default: Unicorn Engine for portability.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
def __init__(self, max_instructions: int = 100000, stack_base: int = 0x7fff0000, stack_size: int = 0x10000):
|
|
165
|
+
self.max_instructions = max_instructions
|
|
166
|
+
self.stack_base = stack_base
|
|
167
|
+
self.stack_size = stack_size
|
|
168
|
+
|
|
169
|
+
# Known memory regions
|
|
170
|
+
self.regions = {
|
|
171
|
+
"stack": (stack_base, stack_base + stack_size),
|
|
172
|
+
"heap": (0x60000000, 0x61000000),
|
|
173
|
+
"code": (0x400000, 0x500000),
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
def trace_execution(self, binary_path: str, input_data: bytes, arch: str = "x86_64") -> ExecutionTrace:
|
|
177
|
+
"""
|
|
178
|
+
Execute binary with given input and record full trace.
|
|
179
|
+
|
|
180
|
+
Uses Unicorn Engine for safe emulation.
|
|
181
|
+
"""
|
|
182
|
+
trace = ExecutionTrace(input_used=input_data)
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
from unicorn import Uc, UC_ARCH_X86, UC_MODE_64
|
|
186
|
+
from unicorn.x86_const import (
|
|
187
|
+
UC_X86_REG_RAX, UC_X86_REG_RBX, UC_X86_REG_RCX, UC_X86_REG_RDX,
|
|
188
|
+
UC_X86_REG_RSI, UC_X86_REG_RDI, UC_X86_REG_RBP, UC_X86_REG_RSP,
|
|
189
|
+
UC_X86_REG_RIP, UC_X86_REG_R8, UC_X86_REG_R9, UC_X86_REG_R10,
|
|
190
|
+
UC_X86_REG_R11, UC_X86_REG_R12, UC_X86_REG_R13, UC_X86_REG_R14,
|
|
191
|
+
UC_X86_REG_R15, UC_X86_REG_RFLAGS,
|
|
192
|
+
)
|
|
193
|
+
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
|
|
194
|
+
|
|
195
|
+
# Initialize emulator
|
|
196
|
+
mu = Uc(UC_ARCH_X86, UC_MODE_64)
|
|
197
|
+
|
|
198
|
+
# Setup memory regions
|
|
199
|
+
mu.mem_map(self.regions["code"][0], 0x100000) # 1MB for code
|
|
200
|
+
mu.mem_map(self.stack_base, self.stack_size) # stack
|
|
201
|
+
mu.mem_map(self.regions["heap"][0], 0x1000000) # 16MB for heap
|
|
202
|
+
|
|
203
|
+
# Load binary into code region
|
|
204
|
+
binary_data = Path(binary_path).read_bytes()
|
|
205
|
+
mu.mem_write(self.regions["code"][0], binary_data[:0x100000])
|
|
206
|
+
|
|
207
|
+
# Setup stack
|
|
208
|
+
stack_top = self.stack_base + self.stack_size - 0x1000
|
|
209
|
+
mu.reg_write(UC_X86_REG_RSP, stack_top)
|
|
210
|
+
mu.reg_write(UC_X86_REG_RBP, stack_top)
|
|
211
|
+
|
|
212
|
+
# Setup input on stack (like gets() / read())
|
|
213
|
+
input_addr = self.stack_base + 0x1000
|
|
214
|
+
mu.mem_write(input_addr, input_data)
|
|
215
|
+
mu.reg_write(UC_X86_REG_RDI, input_addr) # first arg = input pointer
|
|
216
|
+
mu.reg_write(UC_X86_REG_RSI, len(input_data)) # second arg = length
|
|
217
|
+
|
|
218
|
+
# Disassembler
|
|
219
|
+
md = Cs(CS_ARCH_X86, CS_MODE_64)
|
|
220
|
+
md.detail = True
|
|
221
|
+
|
|
222
|
+
# Instruction counter
|
|
223
|
+
inst_count = 0
|
|
224
|
+
|
|
225
|
+
def hook_code(mu, address, size, user_data):
|
|
226
|
+
nonlocal inst_count
|
|
227
|
+
inst_count += 1
|
|
228
|
+
|
|
229
|
+
if inst_count > self.max_instructions:
|
|
230
|
+
mu.emu_stop()
|
|
231
|
+
return
|
|
232
|
+
|
|
233
|
+
# Read instruction bytes
|
|
234
|
+
raw_bytes = mu.mem_read(address, size)
|
|
235
|
+
|
|
236
|
+
# Disassemble
|
|
237
|
+
for inst in md.disasm(bytes(raw_bytes), address, 1):
|
|
238
|
+
# Capture registers BEFORE execution
|
|
239
|
+
reg_before = self._capture_registers(mu)
|
|
240
|
+
|
|
241
|
+
# Capture memory state before (for detecting writes)
|
|
242
|
+
memory_before = self._snapshot_memory(mu, inst)
|
|
243
|
+
|
|
244
|
+
# Execute the instruction ( Unicorn does this automatically)
|
|
245
|
+
|
|
246
|
+
# Capture registers AFTER execution
|
|
247
|
+
# We do this in a second hook
|
|
248
|
+
trace.instructions.append(InstructionTrace(
|
|
249
|
+
address=address,
|
|
250
|
+
mnemonic=inst.mnemonic,
|
|
251
|
+
operands=inst.op_str,
|
|
252
|
+
raw_bytes=bytes(raw_bytes),
|
|
253
|
+
register_before=reg_before,
|
|
254
|
+
register_after=RegisterState(), # filled in next hook
|
|
255
|
+
memory_events=[],
|
|
256
|
+
is_branch=self._is_branch(inst.mnemonic),
|
|
257
|
+
is_call=self._is_call(inst.mnemonic),
|
|
258
|
+
is_return=self._is_return(inst.mnemonic),
|
|
259
|
+
is_syscall=inst.mnemonic in ('syscall', 'int', 'sysenter'),
|
|
260
|
+
))
|
|
261
|
+
|
|
262
|
+
def hook_code_after(mu, address, size, user_data):
|
|
263
|
+
"""Capture state AFTER instruction executes."""
|
|
264
|
+
if trace.instructions:
|
|
265
|
+
last = trace.instructions[-1]
|
|
266
|
+
last.register_after = self._capture_registers(mu)
|
|
267
|
+
|
|
268
|
+
# Detect memory events by comparing before/after
|
|
269
|
+
last.memory_events = self._detect_memory_events(mu, last)
|
|
270
|
+
|
|
271
|
+
# Track statistics
|
|
272
|
+
trace.unique_mnemonics[last.mnemonic] += 1
|
|
273
|
+
if last.is_branch:
|
|
274
|
+
trace.branch_count += 1
|
|
275
|
+
trace.branch_taken = self._was_branch_taken(last)
|
|
276
|
+
if last.is_syscall:
|
|
277
|
+
trace.syscall_count += 1
|
|
278
|
+
|
|
279
|
+
def hook_mem_access(mu, access, address, size, value, user_data):
|
|
280
|
+
"""Monitor memory access for dangerous patterns."""
|
|
281
|
+
pass
|
|
282
|
+
|
|
283
|
+
def hook_block(mu, address, size, user_data):
|
|
284
|
+
"""Track basic blocks."""
|
|
285
|
+
pass
|
|
286
|
+
|
|
287
|
+
# Register hooks
|
|
288
|
+
mu.hook_add(UC_HOOK_CODE, hook_code)
|
|
289
|
+
mu.hook_add(UC_HOOK_CODE, hook_code_after)
|
|
290
|
+
mu.hook_add(UC_HOOK_MEM_READ_UNMAPPED, hook_mem_access)
|
|
291
|
+
mu.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED, hook_mem_access)
|
|
292
|
+
|
|
293
|
+
# Execute
|
|
294
|
+
try:
|
|
295
|
+
mu.emu_start(
|
|
296
|
+
self.regions["code"][0],
|
|
297
|
+
self.regions["code"][0] + min(len(binary_data), 0x100000),
|
|
298
|
+
timeout=0,
|
|
299
|
+
count=self.max_instructions
|
|
300
|
+
)
|
|
301
|
+
except Exception as e:
|
|
302
|
+
trace.crash = True
|
|
303
|
+
trace.crash_type = self._classify_crash(str(e))
|
|
304
|
+
trace.crash_address = mu.reg_read(UC_X86_REG_RIP)
|
|
305
|
+
|
|
306
|
+
trace.total_instructions = inst_count
|
|
307
|
+
trace.total_memory_reads = sum(
|
|
308
|
+
len(inst.memory_events) for inst in trace.instructions
|
|
309
|
+
if any(e.event_type == "read" for e in inst.memory_events)
|
|
310
|
+
)
|
|
311
|
+
trace.total_memory_writes = sum(
|
|
312
|
+
len(inst.memory_events) for inst in trace.instructions
|
|
313
|
+
if any(e.event_type == "write" for e in inst.memory_events)
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Stack depth tracking
|
|
317
|
+
for inst in trace.instructions:
|
|
318
|
+
stack_ptr = inst.register_after.rsp
|
|
319
|
+
depth = self.stack_base + self.stack_size - stack_ptr
|
|
320
|
+
trace.stack_depth_max = max(trace.stack_depth_max, depth)
|
|
321
|
+
|
|
322
|
+
except ImportError:
|
|
323
|
+
print("Unicorn not available. Using simulated trace.")
|
|
324
|
+
trace = self._simulate_trace(input_data)
|
|
325
|
+
|
|
326
|
+
return trace
|
|
327
|
+
|
|
328
|
+
def _capture_registers(self, mu) -> RegisterState:
|
|
329
|
+
"""Capture current register state."""
|
|
330
|
+
try:
|
|
331
|
+
from unicorn.x86_const import (
|
|
332
|
+
UC_X86_REG_RAX, UC_X86_REG_RBX, UC_X86_REG_RCX, UC_X86_REG_RDX,
|
|
333
|
+
UC_X86_REG_RSI, UC_X86_REG_RDI, UC_X86_REG_RBP, UC_X86_REG_RSP,
|
|
334
|
+
UC_X86_REG_RIP, UC_X86_REG_R8, UC_X86_REG_R9, UC_X86_REG_R10,
|
|
335
|
+
UC_X86_REG_R11, UC_X86_REG_R12, UC_X86_REG_R13, UC_X86_REG_R14,
|
|
336
|
+
UC_X86_REG_R15, UC_X86_REG_RFLAGS,
|
|
337
|
+
)
|
|
338
|
+
return RegisterState(
|
|
339
|
+
rax=mu.reg_read(UC_X86_REG_RAX),
|
|
340
|
+
rbx=mu.reg_read(UC_X86_REG_RBX),
|
|
341
|
+
rcx=mu.reg_read(UC_X86_REG_RCX),
|
|
342
|
+
rdx=mu.reg_read(UC_X86_REG_RDX),
|
|
343
|
+
rsi=mu.reg_read(UC_X86_REG_RSI),
|
|
344
|
+
rdi=mu.reg_read(UC_X86_REG_RDI),
|
|
345
|
+
rbp=mu.reg_read(UC_X86_REG_RBP),
|
|
346
|
+
rsp=mu.reg_read(UC_X86_REG_RSP),
|
|
347
|
+
rip=mu.reg_read(UC_X86_REG_RIP),
|
|
348
|
+
r8=mu.reg_read(UC_X86_REG_R8),
|
|
349
|
+
r9=mu.reg_read(UC_X86_REG_R9),
|
|
350
|
+
r10=mu.reg_read(UC_X86_REG_R10),
|
|
351
|
+
r11=mu.reg_read(UC_X86_REG_R11),
|
|
352
|
+
r12=mu.reg_read(UC_X86_REG_R12),
|
|
353
|
+
r13=mu.reg_read(UC_X86_REG_R13),
|
|
354
|
+
r14=mu.reg_read(UC_X86_REG_R14),
|
|
355
|
+
r15=mu.reg_read(UC_X86_REG_R15),
|
|
356
|
+
rflags=mu.reg_read(UC_X86_REG_RFLAGS),
|
|
357
|
+
)
|
|
358
|
+
except Exception:
|
|
359
|
+
return RegisterState()
|
|
360
|
+
|
|
361
|
+
def _snapshot_memory(self, mu, inst) -> dict:
|
|
362
|
+
"""Snapshot memory regions that instruction might touch."""
|
|
363
|
+
return {}
|
|
364
|
+
|
|
365
|
+
def _detect_memory_events(self, mu, inst: InstructionTrace) -> list:
|
|
366
|
+
"""Detect memory reads/writes by comparing register changes."""
|
|
367
|
+
events = []
|
|
368
|
+
|
|
369
|
+
# Simple heuristic: if RSP changed, stack was accessed
|
|
370
|
+
rsp_diff = inst.register_after.rsp - inst.register_before.rsp
|
|
371
|
+
if rsp_diff != 0:
|
|
372
|
+
events.append(MemoryEvent(
|
|
373
|
+
address=inst.register_after.rsp,
|
|
374
|
+
size=abs(rsp_diff),
|
|
375
|
+
event_type="write" if rsp_diff < 0 else "read",
|
|
376
|
+
is_stack=True,
|
|
377
|
+
))
|
|
378
|
+
|
|
379
|
+
# If RSI/RDI changed significantly, memory was accessed
|
|
380
|
+
for reg_name in ['rsi', 'rdi']:
|
|
381
|
+
before = getattr(inst.register_before, reg_name)
|
|
382
|
+
after = getattr(inst.register_after, reg_name)
|
|
383
|
+
if before != after and before != 0:
|
|
384
|
+
events.append(MemoryEvent(
|
|
385
|
+
address=before,
|
|
386
|
+
size=8,
|
|
387
|
+
event_type="read",
|
|
388
|
+
is_stack=self.regions["stack"][0] <= before < self.regions["stack"][1],
|
|
389
|
+
is_heap=self.regions["heap"][0] <= before < self.regions["heap"][1],
|
|
390
|
+
))
|
|
391
|
+
|
|
392
|
+
return events
|
|
393
|
+
|
|
394
|
+
def _is_branch(self, mnemonic: str) -> bool:
|
|
395
|
+
return mnemonic in ('je', 'jne', 'jg', 'jge', 'jl', 'jle', 'ja', 'jae',
|
|
396
|
+
'jb', 'jbe', 'jo', 'jno', 'js', 'jns', 'jp', 'jnp',
|
|
397
|
+
'loop', 'loope', 'loopne', 'jmp', 'jcxz', 'jecxz', 'jrcxz')
|
|
398
|
+
|
|
399
|
+
def _is_call(self, mnemonic: str) -> bool:
|
|
400
|
+
return mnemonic in ('call', 'callq')
|
|
401
|
+
|
|
402
|
+
def _is_return(self, mnemonic: str) -> bool:
|
|
403
|
+
return mnemonic in ('ret', 'retq', 'retn')
|
|
404
|
+
|
|
405
|
+
def _was_branch_taken(self, inst: InstructionTrace) -> bool:
|
|
406
|
+
"""Check if a branch was taken by comparing RIP."""
|
|
407
|
+
return inst.register_after.rip != inst.address + len(inst.raw_bytes)
|
|
408
|
+
|
|
409
|
+
def _classify_crash(self, error_str: str) -> str:
|
|
410
|
+
"""Classify crash type from exception."""
|
|
411
|
+
error_lower = error_str.lower()
|
|
412
|
+
if "read" in error_lower or "write" in error_lower:
|
|
413
|
+
return "segfault"
|
|
414
|
+
elif "stack" in error_lower:
|
|
415
|
+
return "stack_smash"
|
|
416
|
+
elif "illegal" in error_lower or "invalid" in error_lower:
|
|
417
|
+
return "illegal_instruction"
|
|
418
|
+
elif "overflow" in error_lower:
|
|
419
|
+
return "integer_overflow"
|
|
420
|
+
else:
|
|
421
|
+
return "unknown_crash"
|
|
422
|
+
|
|
423
|
+
def _simulate_trace(self, input_data: bytes) -> ExecutionTrace:
|
|
424
|
+
"""Simulated trace when Unicorn is not available."""
|
|
425
|
+
trace = ExecutionTrace(input_used=input_data)
|
|
426
|
+
|
|
427
|
+
# Generate simulated instructions
|
|
428
|
+
simulated_mnemonics = [
|
|
429
|
+
("push", "rbp"),
|
|
430
|
+
("mov", "rbp, rsp"),
|
|
431
|
+
("sub", "rsp, 0x20"),
|
|
432
|
+
("mov", "dword [rbp-0x14], edi"),
|
|
433
|
+
("mov", "dword [rbp-0x8], 0"),
|
|
434
|
+
("jmp", ".loop_start"),
|
|
435
|
+
(".loop_start:", ""),
|
|
436
|
+
("cmp", "dword [rbp-0x8], eax"),
|
|
437
|
+
("jge", ".loop_end"),
|
|
438
|
+
("mov", "eax, dword [rbp-0x8]"),
|
|
439
|
+
("add", "dword [rbp-0x4], 1"),
|
|
440
|
+
("add", "eax, 1"),
|
|
441
|
+
("jmp", ".loop_start"),
|
|
442
|
+
(".loop_end:", ""),
|
|
443
|
+
("add", "rsp, 0x20"),
|
|
444
|
+
("pop", "rbp"),
|
|
445
|
+
("ret", ""),
|
|
446
|
+
]
|
|
447
|
+
|
|
448
|
+
addr = 0x400000
|
|
449
|
+
for mnemonic, operands in simulated_mnemonics:
|
|
450
|
+
trace.instructions.append(InstructionTrace(
|
|
451
|
+
address=addr,
|
|
452
|
+
mnemonic=mnemonic,
|
|
453
|
+
operands=operands,
|
|
454
|
+
raw_bytes=b'\x90',
|
|
455
|
+
register_before=RegisterState(rsp=0x7fff4000, rbp=0x7fff4000),
|
|
456
|
+
register_after=RegisterState(rsp=0x7fff4000, rbp=0x7fff4000),
|
|
457
|
+
memory_events=[],
|
|
458
|
+
))
|
|
459
|
+
trace.unique_mnemonics[mnemonic] += 1
|
|
460
|
+
addr += 1
|
|
461
|
+
|
|
462
|
+
trace.total_instructions = len(trace.instructions)
|
|
463
|
+
return trace
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def analyze_trace(trace: ExecutionTrace) -> dict:
|
|
467
|
+
"""Analyze an execution trace for suspicious patterns."""
|
|
468
|
+
analysis = {
|
|
469
|
+
"suspicious_patterns": [],
|
|
470
|
+
"risk_score": 0.0,
|
|
471
|
+
"vulnerability_indicators": [],
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
for inst in trace.instructions:
|
|
475
|
+
# Detect potential buffer overflow
|
|
476
|
+
if inst.mnemonic in ('mov', 'push', 'stosb', 'stosw', 'stosd', 'stosq'):
|
|
477
|
+
if inst.memory_events:
|
|
478
|
+
for event in inst.memory_events:
|
|
479
|
+
if event.is_stack and event.event_type == "write":
|
|
480
|
+
# Writing to stack — check if near stack base
|
|
481
|
+
stack_start = 0x7fff0000
|
|
482
|
+
stack_end = stack_start + 0x10000
|
|
483
|
+
if event.address < stack_start + 0x1000:
|
|
484
|
+
analysis["vulnerability_indicators"].append({
|
|
485
|
+
"type": "potential_stack_overflow",
|
|
486
|
+
"address": event.address,
|
|
487
|
+
"instruction": f"{inst.mnemonic} {inst.operands}",
|
|
488
|
+
})
|
|
489
|
+
analysis["risk_score"] += 0.3
|
|
490
|
+
|
|
491
|
+
# Detect format string patterns
|
|
492
|
+
if inst.mnemonic == 'call' and 'printf' in inst.operands.lower():
|
|
493
|
+
analysis["vulnerability_indicators"].append({
|
|
494
|
+
"type": "potential_format_string",
|
|
495
|
+
"address": inst.address,
|
|
496
|
+
"instruction": f"call {inst.operands}",
|
|
497
|
+
})
|
|
498
|
+
analysis["risk_score"] += 0.2
|
|
499
|
+
|
|
500
|
+
# Detect integer overflow potential
|
|
501
|
+
if inst.mnemonic in ('add', 'imul', 'mul') and 'dword' in inst.operands:
|
|
502
|
+
analysis["suspicious_patterns"].append({
|
|
503
|
+
"type": "arithmetic_operation",
|
|
504
|
+
"instruction": f"{inst.mnemonic} {inst.operands}",
|
|
505
|
+
})
|
|
506
|
+
|
|
507
|
+
# Detect use-after-free patterns
|
|
508
|
+
if inst.mnemonic in ('call', 'jmp') and any(op in inst.operands.lower()
|
|
509
|
+
for op in ['free', 'delete', 'dealloc']):
|
|
510
|
+
analysis["vulnerability_indicators"].append({
|
|
511
|
+
"type": "potential_use_after_free",
|
|
512
|
+
"address": inst.address,
|
|
513
|
+
"instruction": f"{inst.mnemonic} {inst.operands}",
|
|
514
|
+
})
|
|
515
|
+
analysis["risk_score"] += 0.25
|
|
516
|
+
|
|
517
|
+
analysis["risk_score"] = min(analysis["risk_score"], 1.0)
|
|
518
|
+
return analysis
|