smallworld-re 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smallworld/__init__.py +35 -0
- smallworld/analyses/__init__.py +14 -0
- smallworld/analyses/analysis.py +88 -0
- smallworld/analyses/code_coverage.py +31 -0
- smallworld/analyses/colorizer.py +682 -0
- smallworld/analyses/colorizer_summary.py +100 -0
- smallworld/analyses/field_detection/__init__.py +14 -0
- smallworld/analyses/field_detection/field_analysis.py +536 -0
- smallworld/analyses/field_detection/guards.py +26 -0
- smallworld/analyses/field_detection/hints.py +133 -0
- smallworld/analyses/field_detection/malloc.py +211 -0
- smallworld/analyses/forced_exec/__init__.py +3 -0
- smallworld/analyses/forced_exec/forced_exec.py +87 -0
- smallworld/analyses/underlays/__init__.py +4 -0
- smallworld/analyses/underlays/basic.py +13 -0
- smallworld/analyses/underlays/underlay.py +31 -0
- smallworld/analyses/unstable/__init__.py +4 -0
- smallworld/analyses/unstable/angr/__init__.py +0 -0
- smallworld/analyses/unstable/angr/base.py +12 -0
- smallworld/analyses/unstable/angr/divergence.py +274 -0
- smallworld/analyses/unstable/angr/model.py +383 -0
- smallworld/analyses/unstable/angr/nwbt.py +63 -0
- smallworld/analyses/unstable/angr/typedefs.py +170 -0
- smallworld/analyses/unstable/angr/utils.py +25 -0
- smallworld/analyses/unstable/angr/visitor.py +315 -0
- smallworld/analyses/unstable/angr_nwbt.py +106 -0
- smallworld/analyses/unstable/code_coverage.py +54 -0
- smallworld/analyses/unstable/code_reachable.py +44 -0
- smallworld/analyses/unstable/control_flow_tracer.py +71 -0
- smallworld/analyses/unstable/pointer_finder.py +90 -0
- smallworld/arch/__init__.py +0 -0
- smallworld/arch/aarch64_arch.py +286 -0
- smallworld/arch/amd64_arch.py +86 -0
- smallworld/arch/i386_arch.py +44 -0
- smallworld/emulators/__init__.py +14 -0
- smallworld/emulators/angr/__init__.py +7 -0
- smallworld/emulators/angr/angr.py +1652 -0
- smallworld/emulators/angr/default.py +15 -0
- smallworld/emulators/angr/exceptions.py +7 -0
- smallworld/emulators/angr/exploration/__init__.py +9 -0
- smallworld/emulators/angr/exploration/bounds.py +27 -0
- smallworld/emulators/angr/exploration/default.py +17 -0
- smallworld/emulators/angr/exploration/terminate.py +22 -0
- smallworld/emulators/angr/factory.py +55 -0
- smallworld/emulators/angr/machdefs/__init__.py +35 -0
- smallworld/emulators/angr/machdefs/aarch64.py +292 -0
- smallworld/emulators/angr/machdefs/amd64.py +192 -0
- smallworld/emulators/angr/machdefs/arm.py +387 -0
- smallworld/emulators/angr/machdefs/i386.py +221 -0
- smallworld/emulators/angr/machdefs/machdef.py +138 -0
- smallworld/emulators/angr/machdefs/mips.py +184 -0
- smallworld/emulators/angr/machdefs/mips64.py +189 -0
- smallworld/emulators/angr/machdefs/ppc.py +101 -0
- smallworld/emulators/angr/machdefs/riscv.py +261 -0
- smallworld/emulators/angr/machdefs/xtensa.py +255 -0
- smallworld/emulators/angr/memory/__init__.py +7 -0
- smallworld/emulators/angr/memory/default.py +10 -0
- smallworld/emulators/angr/memory/fixups.py +43 -0
- smallworld/emulators/angr/memory/memtrack.py +105 -0
- smallworld/emulators/angr/scratch.py +43 -0
- smallworld/emulators/angr/simos.py +53 -0
- smallworld/emulators/angr/utils.py +70 -0
- smallworld/emulators/emulator.py +1013 -0
- smallworld/emulators/hookable.py +252 -0
- smallworld/emulators/panda/__init__.py +5 -0
- smallworld/emulators/panda/machdefs/__init__.py +28 -0
- smallworld/emulators/panda/machdefs/aarch64.py +93 -0
- smallworld/emulators/panda/machdefs/amd64.py +71 -0
- smallworld/emulators/panda/machdefs/arm.py +89 -0
- smallworld/emulators/panda/machdefs/i386.py +36 -0
- smallworld/emulators/panda/machdefs/machdef.py +86 -0
- smallworld/emulators/panda/machdefs/mips.py +94 -0
- smallworld/emulators/panda/machdefs/mips64.py +91 -0
- smallworld/emulators/panda/machdefs/ppc.py +79 -0
- smallworld/emulators/panda/panda.py +575 -0
- smallworld/emulators/unicorn/__init__.py +13 -0
- smallworld/emulators/unicorn/machdefs/__init__.py +28 -0
- smallworld/emulators/unicorn/machdefs/aarch64.py +310 -0
- smallworld/emulators/unicorn/machdefs/amd64.py +326 -0
- smallworld/emulators/unicorn/machdefs/arm.py +321 -0
- smallworld/emulators/unicorn/machdefs/i386.py +137 -0
- smallworld/emulators/unicorn/machdefs/machdef.py +117 -0
- smallworld/emulators/unicorn/machdefs/mips.py +202 -0
- smallworld/emulators/unicorn/unicorn.py +684 -0
- smallworld/exceptions/__init__.py +5 -0
- smallworld/exceptions/exceptions.py +85 -0
- smallworld/exceptions/unstable/__init__.py +1 -0
- smallworld/exceptions/unstable/exceptions.py +25 -0
- smallworld/extern/__init__.py +4 -0
- smallworld/extern/ctypes.py +94 -0
- smallworld/extern/unstable/__init__.py +1 -0
- smallworld/extern/unstable/ghidra.py +129 -0
- smallworld/helpers.py +107 -0
- smallworld/hinting/__init__.py +8 -0
- smallworld/hinting/hinting.py +214 -0
- smallworld/hinting/hints.py +427 -0
- smallworld/hinting/unstable/__init__.py +2 -0
- smallworld/hinting/utils.py +19 -0
- smallworld/instructions/__init__.py +18 -0
- smallworld/instructions/aarch64.py +20 -0
- smallworld/instructions/arm.py +18 -0
- smallworld/instructions/bsid.py +67 -0
- smallworld/instructions/instructions.py +258 -0
- smallworld/instructions/mips.py +21 -0
- smallworld/instructions/x86.py +100 -0
- smallworld/logging.py +90 -0
- smallworld/platforms.py +95 -0
- smallworld/py.typed +0 -0
- smallworld/state/__init__.py +6 -0
- smallworld/state/cpus/__init__.py +32 -0
- smallworld/state/cpus/aarch64.py +563 -0
- smallworld/state/cpus/amd64.py +676 -0
- smallworld/state/cpus/arm.py +630 -0
- smallworld/state/cpus/cpu.py +71 -0
- smallworld/state/cpus/i386.py +239 -0
- smallworld/state/cpus/mips.py +374 -0
- smallworld/state/cpus/mips64.py +372 -0
- smallworld/state/cpus/powerpc.py +229 -0
- smallworld/state/cpus/riscv.py +357 -0
- smallworld/state/cpus/xtensa.py +80 -0
- smallworld/state/memory/__init__.py +7 -0
- smallworld/state/memory/code.py +70 -0
- smallworld/state/memory/elf/__init__.py +3 -0
- smallworld/state/memory/elf/elf.py +564 -0
- smallworld/state/memory/elf/rela/__init__.py +32 -0
- smallworld/state/memory/elf/rela/aarch64.py +27 -0
- smallworld/state/memory/elf/rela/amd64.py +32 -0
- smallworld/state/memory/elf/rela/arm.py +51 -0
- smallworld/state/memory/elf/rela/i386.py +32 -0
- smallworld/state/memory/elf/rela/mips.py +45 -0
- smallworld/state/memory/elf/rela/ppc.py +45 -0
- smallworld/state/memory/elf/rela/rela.py +63 -0
- smallworld/state/memory/elf/rela/riscv64.py +27 -0
- smallworld/state/memory/elf/rela/xtensa.py +15 -0
- smallworld/state/memory/elf/structs.py +55 -0
- smallworld/state/memory/heap.py +85 -0
- smallworld/state/memory/memory.py +181 -0
- smallworld/state/memory/stack/__init__.py +31 -0
- smallworld/state/memory/stack/aarch64.py +22 -0
- smallworld/state/memory/stack/amd64.py +42 -0
- smallworld/state/memory/stack/arm.py +66 -0
- smallworld/state/memory/stack/i386.py +22 -0
- smallworld/state/memory/stack/mips.py +34 -0
- smallworld/state/memory/stack/mips64.py +34 -0
- smallworld/state/memory/stack/ppc.py +34 -0
- smallworld/state/memory/stack/riscv.py +22 -0
- smallworld/state/memory/stack/stack.py +127 -0
- smallworld/state/memory/stack/xtensa.py +34 -0
- smallworld/state/models/__init__.py +6 -0
- smallworld/state/models/mmio.py +186 -0
- smallworld/state/models/model.py +163 -0
- smallworld/state/models/posix.py +455 -0
- smallworld/state/models/x86/__init__.py +2 -0
- smallworld/state/models/x86/microsoftcdecl.py +35 -0
- smallworld/state/models/x86/systemv.py +240 -0
- smallworld/state/state.py +962 -0
- smallworld/state/unstable/__init__.py +0 -0
- smallworld/state/unstable/elf.py +393 -0
- smallworld/state/x86_registers.py +30 -0
- smallworld/utils.py +935 -0
- smallworld_re-1.0.0.dist-info/LICENSE.txt +21 -0
- smallworld_re-1.0.0.dist-info/METADATA +189 -0
- smallworld_re-1.0.0.dist-info/RECORD +166 -0
- smallworld_re-1.0.0.dist-info/WHEEL +5 -0
- smallworld_re-1.0.0.dist-info/entry_points.txt +2 -0
- smallworld_re-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,684 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import sys
|
5
|
+
import typing
|
6
|
+
from enum import Enum
|
7
|
+
|
8
|
+
import capstone
|
9
|
+
import claripy
|
10
|
+
import unicorn
|
11
|
+
import unicorn.ppc_const # Not properly exposed by the unicorn module
|
12
|
+
|
13
|
+
from ... import exceptions, instructions, platforms, utils
|
14
|
+
from .. import emulator, hookable
|
15
|
+
from .machdefs import UnicornMachineDef
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
class UnicornEmulationError(exceptions.EmulationError):
|
21
|
+
def __init__(self, uc_err: unicorn.UcError, pc: int, msg: str, details: dict):
|
22
|
+
self.uc_err = uc_err
|
23
|
+
self.pc = pc
|
24
|
+
self.msg = msg
|
25
|
+
self.details = details
|
26
|
+
|
27
|
+
def __repr__(self) -> str:
|
28
|
+
return (
|
29
|
+
f"{self.__class__.__name__}({self.uc_err}, {hex(self.pc)}, {self.details})"
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
class UnicornEmulationMemoryReadError(UnicornEmulationError):
|
34
|
+
pass
|
35
|
+
|
36
|
+
|
37
|
+
class UnicornEmulationMemoryWriteError(UnicornEmulationError):
|
38
|
+
pass
|
39
|
+
|
40
|
+
|
41
|
+
class UnicornEmulationExecutionError(UnicornEmulationError):
|
42
|
+
pass
|
43
|
+
|
44
|
+
|
45
|
+
class EmulatorState(Enum):
|
46
|
+
START_BLOCK = 1
|
47
|
+
START_STEP = 2
|
48
|
+
STEP = 3
|
49
|
+
BLOCK = 4
|
50
|
+
RUN = 5
|
51
|
+
SETUP = 6
|
52
|
+
|
53
|
+
|
54
|
+
class UnicornEmulator(
|
55
|
+
emulator.Emulator,
|
56
|
+
hookable.QInstructionHookable,
|
57
|
+
hookable.QFunctionHookable,
|
58
|
+
hookable.QMemoryReadHookable,
|
59
|
+
hookable.QMemoryWriteHookable,
|
60
|
+
hookable.QInterruptHookable,
|
61
|
+
):
|
62
|
+
"""An emulator for the Unicorn emulation engine."""
|
63
|
+
|
64
|
+
description = "This is a smallworld class encapsulating the Unicorn emulator."
|
65
|
+
name = "smallworld's-unicorn"
|
66
|
+
version = "0.0"
|
67
|
+
|
68
|
+
PAGE_SIZE = 0x1000
|
69
|
+
|
70
|
+
def __init__(self, platform: platforms.Platform):
|
71
|
+
super().__init__(platform)
|
72
|
+
self.platform = platform
|
73
|
+
self.machdef = UnicornMachineDef.for_platform(self.platform)
|
74
|
+
self.engine = unicorn.Uc(self.machdef.uc_arch, self.machdef.uc_mode)
|
75
|
+
self.disassembler = capstone.Cs(self.machdef.cs_arch, self.machdef.cs_mode)
|
76
|
+
self.disassembler.detail = True
|
77
|
+
|
78
|
+
self.memory_map: utils.RangeCollection = utils.RangeCollection()
|
79
|
+
self.state: EmulatorState = EmulatorState.SETUP
|
80
|
+
# labels are per byte
|
81
|
+
|
82
|
+
# We'll have one entry in this dictionary per full-width base
|
83
|
+
# register (by name) and those themselves are a map from offset
|
84
|
+
# within the register to string label.
|
85
|
+
# In other words, for 64-bit x86, we'd have
|
86
|
+
# self.label["rax"][0] = "input", e.g., for the 0th byte in rax.
|
87
|
+
# But we will not have self.label["eax"] -- you have to look in "rax"
|
88
|
+
# Note that read_register_label will navigate that for you...
|
89
|
+
# For memory, we have one label per byte in memory with address
|
90
|
+
# translated via `hex(address)`.
|
91
|
+
# In other words, self.label["0xdeadbeef"] = "came_from_hades" is
|
92
|
+
# the label on that address in memory
|
93
|
+
self.label: typing.Dict[str, typing.Dict[int, str]] = {}
|
94
|
+
|
95
|
+
# this will run on *every instruction
|
96
|
+
def code_callback(uc, address, size, user_data):
|
97
|
+
# print(f"code callback addr={address:x}")
|
98
|
+
# We want to end on the instruction after
|
99
|
+
if self.state == EmulatorState.STEP:
|
100
|
+
self.engine.emu_stop()
|
101
|
+
if self.state == EmulatorState.START_STEP:
|
102
|
+
self.state = EmulatorState.STEP
|
103
|
+
|
104
|
+
if not self._bounds.is_empty() and not self._bounds.contains_value(address):
|
105
|
+
self.engine.emu_stop()
|
106
|
+
raise exceptions.EmulationBounds
|
107
|
+
|
108
|
+
# check for if we've hit an exit point
|
109
|
+
if address in self._exit_points:
|
110
|
+
logger.debug(f"stopping emulation at exit point {address:x}")
|
111
|
+
self.engine.emu_stop()
|
112
|
+
raise exceptions.EmulationExitpoint
|
113
|
+
|
114
|
+
# run instruciton hooks
|
115
|
+
if self.all_instructions_hook:
|
116
|
+
self.all_instructions_hook(self)
|
117
|
+
|
118
|
+
if cb := self.is_instruction_hooked(address):
|
119
|
+
logger.debug(f"hit hooking address for instruction at {address:x}")
|
120
|
+
cb(self)
|
121
|
+
# check function hooks *before* bounds since these might be out-of-bounds
|
122
|
+
if cb := self.is_function_hooked(address):
|
123
|
+
logger.debug(
|
124
|
+
f"hit hooking address for function at {address:x} -- {self.function_hooks[address]}"
|
125
|
+
)
|
126
|
+
# note that hooking a function means that we stop at function
|
127
|
+
# entry and, after running the hook, we do not let the function
|
128
|
+
# execute. Instead, we return from the function as if it ran.
|
129
|
+
# this permits modeling
|
130
|
+
# this is the model for the function
|
131
|
+
cb(self)
|
132
|
+
# self.engine.emu_stop()
|
133
|
+
|
134
|
+
# Mimic a platform-specific "return" instruction.
|
135
|
+
if self.platform.architecture == platforms.Architecture.X86_32:
|
136
|
+
# i386: pop a 4-byte value off the stack
|
137
|
+
sp = self.read_register("esp")
|
138
|
+
ret = int.from_bytes(
|
139
|
+
self.read_memory(sp, 4), self.platform.byteorder.value
|
140
|
+
)
|
141
|
+
self.write_register("esp", sp + 4)
|
142
|
+
elif self.platform.architecture == platforms.Architecture.X86_64:
|
143
|
+
# amd64: pop an 8-byte value off the stack
|
144
|
+
sp = self.read_register("rsp")
|
145
|
+
ret = int.from_bytes(
|
146
|
+
self.read_memory(sp, 8), self.platform.byteorder.value
|
147
|
+
)
|
148
|
+
self.write_register("rsp", sp + 8)
|
149
|
+
elif (
|
150
|
+
self.platform.architecture == platforms.Architecture.AARCH64
|
151
|
+
or self.platform.architecture == platforms.Architecture.ARM_V5T
|
152
|
+
or self.platform.architecture == platforms.Architecture.ARM_V6M
|
153
|
+
or self.platform.architecture
|
154
|
+
== platforms.Architecture.ARM_V6M_THUMB
|
155
|
+
or self.platform.architecture == platforms.Architecture.ARM_V7A
|
156
|
+
or self.platform.architecture == platforms.Architecture.ARM_V7M
|
157
|
+
or self.platform.architecture == platforms.Architecture.ARM_V7R
|
158
|
+
or self.platform.architecture == platforms.Architecture.POWERPC32
|
159
|
+
or self.platform.architecture == platforms.Architecture.POWERPC64
|
160
|
+
):
|
161
|
+
# aarch64, arm32, powerpc and powerpc64: branch to register 'lr'
|
162
|
+
ret = self.read_register("lr")
|
163
|
+
elif (
|
164
|
+
self.platform.architecture == platforms.Architecture.MIPS32
|
165
|
+
or self.platform.architecture == platforms.Architecture.MIPS64
|
166
|
+
):
|
167
|
+
# mips32 and mips64: branch to register 'ra'
|
168
|
+
ret = self.read_register("ra")
|
169
|
+
else:
|
170
|
+
raise exceptions.ConfigurationError(
|
171
|
+
"Don't know how to return for {self.platform.architecture}"
|
172
|
+
)
|
173
|
+
|
174
|
+
self.write_register("pc", ret)
|
175
|
+
|
176
|
+
self.engine.hook_add(unicorn.UC_HOOK_CODE, code_callback)
|
177
|
+
|
178
|
+
# functions to run before memory read and write for
|
179
|
+
# specific addresses
|
180
|
+
|
181
|
+
def mem_read_callback(uc, type, address, size, value, user_data):
|
182
|
+
assert type == unicorn.UC_MEM_READ
|
183
|
+
orig_data = (value.to_bytes(size, self.platform.byteorder.value),)
|
184
|
+
if self.all_reads_hook:
|
185
|
+
data = self.all_reads_hook(self, address, size, orig_data)
|
186
|
+
if data:
|
187
|
+
if len(data) != size:
|
188
|
+
raise exceptions.EmulationError(
|
189
|
+
f"Read hook at {hex(address)} returned {len(data)} bytes; need {size} bytes"
|
190
|
+
)
|
191
|
+
uc.mem_write(address, data)
|
192
|
+
orig_data = data
|
193
|
+
|
194
|
+
if cb := self.is_memory_read_hooked(address):
|
195
|
+
data = cb(self, address, size, orig_data)
|
196
|
+
|
197
|
+
# Execute registered callback
|
198
|
+
# data = cb(self, address, size)
|
199
|
+
# Overwrite memory being read.
|
200
|
+
# The instruction is emulated after this callback fires,
|
201
|
+
# so the new value will get used for computation.
|
202
|
+
if data:
|
203
|
+
if len(data) != size:
|
204
|
+
raise exceptions.EmulationError(
|
205
|
+
f"Read hook at {hex(address)} returned {len(data)} bytes; need {size} bytes"
|
206
|
+
)
|
207
|
+
uc.mem_write(address, data)
|
208
|
+
|
209
|
+
def mem_write_callback(uc, type, address, size, value, user_data):
|
210
|
+
assert type == unicorn.UC_MEM_WRITE
|
211
|
+
if self.all_writes_hook:
|
212
|
+
self.all_writes_hook(
|
213
|
+
self,
|
214
|
+
address,
|
215
|
+
size,
|
216
|
+
value.to_bytes(size, self.platform.byteorder.value),
|
217
|
+
)
|
218
|
+
|
219
|
+
if cb := self.is_memory_write_hooked(address):
|
220
|
+
cb(
|
221
|
+
self,
|
222
|
+
address,
|
223
|
+
size,
|
224
|
+
value.to_bytes(size, self.platform.byteorder.value),
|
225
|
+
)
|
226
|
+
|
227
|
+
self.engine.hook_add(unicorn.UC_HOOK_MEM_WRITE, mem_write_callback)
|
228
|
+
self.engine.hook_add(unicorn.UC_HOOK_MEM_READ, mem_read_callback)
|
229
|
+
|
230
|
+
# function to run on *every* interrupt
|
231
|
+
self.interrupts_hook: typing.Optional[
|
232
|
+
typing.Callable[[emulator.Emulator, int], None]
|
233
|
+
] = None
|
234
|
+
|
235
|
+
# function to run on a specific interrupt number
|
236
|
+
self.interrupt_hook: typing.Dict[
|
237
|
+
int, typing.Callable[[emulator.Emulator], None]
|
238
|
+
] = {}
|
239
|
+
|
240
|
+
def interrupt_callback(uc, index, user_data):
|
241
|
+
if self.interrupts_hook is not None:
|
242
|
+
self.interrupts_hook()
|
243
|
+
if index in self.interrupt_hook:
|
244
|
+
self.interrupt_hook[index]()
|
245
|
+
|
246
|
+
self.engine.hook_add(unicorn.UC_HOOK_INTR, interrupt_callback)
|
247
|
+
|
248
|
+
def block_callback(uc, address, block_size, user_data):
|
249
|
+
if self.state == EmulatorState.BLOCK:
|
250
|
+
self.engine.emu_stop()
|
251
|
+
if self.state == EmulatorState.START_BLOCK:
|
252
|
+
self.state = EmulatorState.BLOCK
|
253
|
+
|
254
|
+
self.engine.hook_add(unicorn.UC_HOOK_BLOCK, block_callback)
|
255
|
+
|
256
|
+
# keep track of which registers have been initialized
|
257
|
+
self.initialized_registers: typing.Dict[str, typing.Set[int]] = {}
|
258
|
+
|
259
|
+
def _check_pc_ok(self, pc):
|
260
|
+
"""Check if this pc is ok to emulate, i.e. in bounds and not an exit
|
261
|
+
point."""
|
262
|
+
|
263
|
+
if not self._bounds.is_empty() and not self._bounds.contains_value(pc):
|
264
|
+
# There are bounds, and we are not in them
|
265
|
+
return False
|
266
|
+
|
267
|
+
# check for if we've hit an exit point
|
268
|
+
if pc in self._exit_points:
|
269
|
+
logger.debug(f"stopping emulation at exit point {pc:x}")
|
270
|
+
return False
|
271
|
+
return True
|
272
|
+
|
273
|
+
def _register(self, name: str) -> typing.Tuple[typing.Any, str, int, int]:
|
274
|
+
# Translate register name into the tuple
|
275
|
+
# (u, b, o, s)
|
276
|
+
# u is the unicorn reg number
|
277
|
+
# b is the name of full-width base register this is or is part of
|
278
|
+
# o is start offset within full-width base register
|
279
|
+
# s is size in bytes
|
280
|
+
name = name.lower()
|
281
|
+
# support some generic register references
|
282
|
+
if name == "pc":
|
283
|
+
name = self.machdef.pc_reg
|
284
|
+
return self.machdef.uc_reg(name)
|
285
|
+
|
286
|
+
def read_register_content(self, name: str) -> int:
|
287
|
+
(reg, _, _, _) = self._register(name)
|
288
|
+
if reg == 0:
|
289
|
+
return 0
|
290
|
+
# logger.warn(f"Unicorn doesn't support register {name} for {self.platform}")
|
291
|
+
try:
|
292
|
+
return self.engine.reg_read(reg)
|
293
|
+
except Exception as e:
|
294
|
+
raise exceptions.AnalysisError(f"Failed reading {name} (id: {reg})") from e
|
295
|
+
|
296
|
+
def read_register_label(self, name: str) -> typing.Optional[str]:
|
297
|
+
(_, base_reg, size, offset) = self._register(name)
|
298
|
+
if base_reg in self.label:
|
299
|
+
# we'll return a string repr of set of labels on all byte offsets
|
300
|
+
# for this register
|
301
|
+
labels = set([])
|
302
|
+
for i in range(offset, offset + size):
|
303
|
+
if i in self.label[base_reg]:
|
304
|
+
label = self.label[base_reg][i]
|
305
|
+
if label is not None:
|
306
|
+
labels.add(label)
|
307
|
+
return ":".join(list(labels))
|
308
|
+
return None
|
309
|
+
|
310
|
+
def read_register(self, name: str) -> int:
|
311
|
+
return self.read_register_content(name)
|
312
|
+
|
313
|
+
def write_register_content(
|
314
|
+
self, name: str, content: typing.Union[None, int, claripy.ast.bv.BV]
|
315
|
+
) -> None:
|
316
|
+
if content is None:
|
317
|
+
logger.debug(f"ignoring register write to {name} - no value")
|
318
|
+
return
|
319
|
+
|
320
|
+
if isinstance(content, claripy.ast.bv.BV):
|
321
|
+
raise exceptions.SymbolicValueError(
|
322
|
+
"This emulator cannot handle bitvector expressions"
|
323
|
+
)
|
324
|
+
|
325
|
+
(reg, base_reg, size, start_offset) = self._register(name)
|
326
|
+
try:
|
327
|
+
self.engine.reg_write(reg, content)
|
328
|
+
except Exception as e:
|
329
|
+
raise exceptions.AnalysisError(f"Failed writing {name} (id: {reg})") from e
|
330
|
+
# keep track of which bytes in this register have been initialized
|
331
|
+
if base_reg not in self.initialized_registers:
|
332
|
+
self.initialized_registers[base_reg] = set([])
|
333
|
+
for o in range(start_offset, start_offset + size):
|
334
|
+
self.initialized_registers[base_reg].add(o)
|
335
|
+
logger.debug(f"set register {name}={content}")
|
336
|
+
|
337
|
+
def write_register_label(
|
338
|
+
self, name: str, label: typing.Optional[str] = None
|
339
|
+
) -> None:
|
340
|
+
if label is None:
|
341
|
+
return
|
342
|
+
(_, base_reg, size, offset) = self._register(name)
|
343
|
+
if base_reg not in self.label:
|
344
|
+
self.label[base_reg] = {}
|
345
|
+
for i in range(offset, offset + size):
|
346
|
+
self.label[base_reg][i] = label
|
347
|
+
|
348
|
+
def write_register(
|
349
|
+
self, name: str, content: typing.Union[None, int, claripy.ast.bv.BV]
|
350
|
+
) -> None:
|
351
|
+
self.write_register_content(name, content)
|
352
|
+
|
353
|
+
def read_memory_content(self, address: int, size: int) -> bytes:
|
354
|
+
if size > sys.maxsize:
|
355
|
+
raise ValueError(f"{size} is too large (max: {sys.maxsize})")
|
356
|
+
try:
|
357
|
+
return self.engine.mem_read(address, size)
|
358
|
+
except unicorn.UcError as e:
|
359
|
+
logger.warn(f"Unicorn raised an exception on memory read {e}")
|
360
|
+
self._error(e, "mem")
|
361
|
+
assert False # Line is unreachable
|
362
|
+
|
363
|
+
def read_memory_label(self, address: int, size: int) -> typing.Optional[str]:
|
364
|
+
labels = set()
|
365
|
+
if "mem" not in self.label:
|
366
|
+
return None
|
367
|
+
else:
|
368
|
+
for a in range(address, address + size):
|
369
|
+
if a in self.label["mem"]:
|
370
|
+
labels.add(self.label["mem"][a])
|
371
|
+
if len(labels) == 0:
|
372
|
+
return None
|
373
|
+
return ":".join(list(labels))
|
374
|
+
|
375
|
+
def read_memory(self, address: int, size: int) -> bytes:
|
376
|
+
return self.read_memory_content(address, size)
|
377
|
+
|
378
|
+
def map_memory(self, address: int, size: int) -> None:
|
379
|
+
# Round address down to a page boundary
|
380
|
+
page_address = (address // self.PAGE_SIZE) * self.PAGE_SIZE
|
381
|
+
|
382
|
+
# Expand the size to accound for moving address
|
383
|
+
page_size = size + address - page_address
|
384
|
+
|
385
|
+
# Round page_size up to the next page
|
386
|
+
page_size = (
|
387
|
+
(page_size + self.PAGE_SIZE - 1) // self.PAGE_SIZE
|
388
|
+
) * self.PAGE_SIZE
|
389
|
+
|
390
|
+
# Fill in any gaps in the specified region
|
391
|
+
region = (page_address, page_address + page_size)
|
392
|
+
missing_ranges = self.memory_map.get_missing_ranges(region)
|
393
|
+
|
394
|
+
for start, end in missing_ranges:
|
395
|
+
self.memory_map.add_range((start, end))
|
396
|
+
self.engine.mem_map(start, end - start)
|
397
|
+
|
398
|
+
def get_memory_map(self) -> typing.List[typing.Tuple[int, int]]:
|
399
|
+
return list(self.memory_map.ranges)
|
400
|
+
|
401
|
+
def _is_address_mapped(self, address):
|
402
|
+
(ind, found) = self.memory_map.find_closest_range(address)
|
403
|
+
return found
|
404
|
+
|
405
|
+
def _is_address_range_mapped(self, address_range):
|
406
|
+
(a, b) = address_range
|
407
|
+
for address in range(a, b):
|
408
|
+
if self._is_address_mapped(address) is False:
|
409
|
+
return False
|
410
|
+
return True
|
411
|
+
|
412
|
+
def write_memory_content(
|
413
|
+
self, address: int, content: typing.Union[bytes, claripy.ast.bv.BV]
|
414
|
+
) -> None:
|
415
|
+
if content is None:
|
416
|
+
raise ValueError(f"{self.__class__.__name__} requires concrete state")
|
417
|
+
|
418
|
+
if isinstance(content, claripy.ast.bv.BV):
|
419
|
+
raise exceptions.SymbolicValueError(
|
420
|
+
"This emulator cannot handle bitvector expressions"
|
421
|
+
)
|
422
|
+
|
423
|
+
if len(content) > sys.maxsize:
|
424
|
+
raise ValueError(f"{len(content)} is too large (max: {sys.maxsize})")
|
425
|
+
|
426
|
+
if not len(content):
|
427
|
+
raise ValueError("memory write cannot be empty")
|
428
|
+
|
429
|
+
try:
|
430
|
+
# print(f"write_memory: {content}")
|
431
|
+
self.engine.mem_write(address, content)
|
432
|
+
except unicorn.UcError as e:
|
433
|
+
logger.warn(f"Unicorn raised an exception on memory write {e}")
|
434
|
+
self._error(e, "mem")
|
435
|
+
|
436
|
+
logger.debug(f"wrote {len(content)} bytes to 0x{address:x}")
|
437
|
+
|
438
|
+
def write_memory_label(
|
439
|
+
self, address: int, size: int, label: typing.Optional[str] = None
|
440
|
+
) -> None:
|
441
|
+
if label is None:
|
442
|
+
return
|
443
|
+
if "mem" not in self.label:
|
444
|
+
self.label["mem"] = dict()
|
445
|
+
for a in range(address, address + size):
|
446
|
+
self.label["mem"][a] = label
|
447
|
+
|
448
|
+
def write_memory(
|
449
|
+
self, address: int, content: typing.Union[bytes, claripy.ast.bv.BV]
|
450
|
+
) -> None:
|
451
|
+
self.write_memory_content(address, content)
|
452
|
+
|
453
|
+
def hook_instruction(
|
454
|
+
self, address: int, function: typing.Callable[[emulator.Emulator], None]
|
455
|
+
) -> None:
|
456
|
+
super(UnicornEmulator, self).hook_instruction(address, function)
|
457
|
+
self.map_memory(address, self.PAGE_SIZE)
|
458
|
+
|
459
|
+
def hook_function(
|
460
|
+
self, address: int, function: typing.Callable[[emulator.Emulator], None]
|
461
|
+
) -> None:
|
462
|
+
super(UnicornEmulator, self).hook_function(address, function)
|
463
|
+
self.map_memory(address, self.PAGE_SIZE)
|
464
|
+
|
465
|
+
def _disassemble(
|
466
|
+
self, code: bytes, base: int, count: typing.Optional[int] = None
|
467
|
+
) -> typing.Tuple[typing.List[capstone.CsInsn], str]:
|
468
|
+
instructions = self.disassembler.disasm(code, base)
|
469
|
+
disassembly = []
|
470
|
+
insns = []
|
471
|
+
for i, instruction in enumerate(instructions):
|
472
|
+
if count is not None and i >= count:
|
473
|
+
break
|
474
|
+
insns.append(instruction)
|
475
|
+
disassembly.append(f"{instruction.mnemonic} {instruction.op_str}")
|
476
|
+
return (insns, "\n".join(disassembly))
|
477
|
+
|
478
|
+
def current_instruction(self) -> capstone.CsInsn:
|
479
|
+
pc = self.read_register("pc")
|
480
|
+
code = self.read_memory(pc, 15)
|
481
|
+
if code is None:
|
482
|
+
raise AssertionError("invalid state")
|
483
|
+
for i in self.disassembler.disasm(code, pc):
|
484
|
+
return i
|
485
|
+
|
486
|
+
def _check(self) -> None:
|
487
|
+
# check if it's ok to begin emulating
|
488
|
+
# 1. pc must be set in order to emulate
|
489
|
+
(_, base_name, size, offset) = self._register("pc")
|
490
|
+
if (
|
491
|
+
base_name in self.initialized_registers
|
492
|
+
and len(self.initialized_registers[base_name]) == size
|
493
|
+
):
|
494
|
+
# pc is fully initialized
|
495
|
+
pass
|
496
|
+
else:
|
497
|
+
raise exceptions.ConfigurationError(
|
498
|
+
"pc not initialized, emulation cannot start"
|
499
|
+
)
|
500
|
+
# 2. an exit point is also required
|
501
|
+
if len(self._exit_points) == 0:
|
502
|
+
raise exceptions.ConfigurationError(
|
503
|
+
"at least one exit point must be set, emulation cannot start"
|
504
|
+
)
|
505
|
+
|
506
|
+
def step_instruction(self) -> None:
|
507
|
+
self._check()
|
508
|
+
self.state = EmulatorState.START_STEP
|
509
|
+
|
510
|
+
pc = self.read_register("pc")
|
511
|
+
exit_point = list(self._exit_points)[0]
|
512
|
+
if pc == exit_point:
|
513
|
+
raise exceptions.EmulationBounds
|
514
|
+
|
515
|
+
if pc not in self.function_hooks:
|
516
|
+
disas = self.current_instruction()
|
517
|
+
logger.info(f"single step at 0x{pc:x}: {disas}")
|
518
|
+
|
519
|
+
try:
|
520
|
+
self.engine.emu_start(pc, exit_point)
|
521
|
+
|
522
|
+
except unicorn.UcError as e:
|
523
|
+
if (
|
524
|
+
e.errno == unicorn.UC_ERR_FETCH_UNMAPPED
|
525
|
+
and self.read_register("pc") in self.function_hooks
|
526
|
+
):
|
527
|
+
# probably we tried to execute call to code that's not mapped?
|
528
|
+
pass
|
529
|
+
else:
|
530
|
+
logger.warn(f"emulation stopped - reason: {e}")
|
531
|
+
# translate this unicorn error into something richer
|
532
|
+
self._error(e, "exec")
|
533
|
+
|
534
|
+
def step_block(self) -> None:
|
535
|
+
self._check()
|
536
|
+
pc = self.read_register("pc")
|
537
|
+
exit_point = list(self._exit_points)[0]
|
538
|
+
|
539
|
+
disas = self.current_instruction()
|
540
|
+
logger.info(f"step block at 0x{pc:x}: {disas}")
|
541
|
+
try:
|
542
|
+
self.state = EmulatorState.START_BLOCK
|
543
|
+
self.engine.emu_start(pc, exit_point)
|
544
|
+
pc = self.read_register("pc")
|
545
|
+
|
546
|
+
self.state = EmulatorState.BLOCK
|
547
|
+
self.engine.emu_start(pc, exit_point)
|
548
|
+
except unicorn.UcError as e:
|
549
|
+
logger.warn(f"emulation stopped - reason: {e}")
|
550
|
+
logger.warn("for more details, run emulation in single step mode")
|
551
|
+
|
552
|
+
def run(self) -> None:
|
553
|
+
self._check()
|
554
|
+
self.state = EmulatorState.RUN
|
555
|
+
|
556
|
+
logger.info(
|
557
|
+
f"starting emulation at 0x{self.read_register('pc'):x}"
|
558
|
+
) # until 0x{self._exit_point:x}")
|
559
|
+
|
560
|
+
try:
|
561
|
+
# unicorn requires one exit point so just use first
|
562
|
+
exit_point = list(self._exit_points)[0]
|
563
|
+
self.engine.emu_start(self.read_register("pc"), exit_point)
|
564
|
+
except exceptions.EmulationStop:
|
565
|
+
pass
|
566
|
+
except unicorn.UcError as e:
|
567
|
+
logger.warn(f"emulation stopped - reason: {e}")
|
568
|
+
logger.warn("for more details, run emulation in single step mode")
|
569
|
+
self._error(e, "exec")
|
570
|
+
|
571
|
+
logger.info("emulation complete")
|
572
|
+
|
573
|
+
def _error(
|
574
|
+
self, error: unicorn.UcError, typ: str
|
575
|
+
) -> typing.Dict[typing.Union[str, int], typing.Union[str, int, bytes]]:
|
576
|
+
"""Raises new exception from unicorn exception with extra details.
|
577
|
+
|
578
|
+
Should only be run while single stepping.
|
579
|
+
|
580
|
+
Arguments:
|
581
|
+
error: Unicorn exception.
|
582
|
+
|
583
|
+
Raises:
|
584
|
+
UnicornEmulationError with extra details about the error.
|
585
|
+
"""
|
586
|
+
|
587
|
+
pc = self.read_register("pc")
|
588
|
+
|
589
|
+
try:
|
590
|
+
code = self.read_memory(pc, 16)
|
591
|
+
insns, _ = self._disassemble(code, pc, 1)
|
592
|
+
i = instructions.Instruction.from_capstone(insns[0])
|
593
|
+
except:
|
594
|
+
# looks like that code is not available
|
595
|
+
i = None
|
596
|
+
|
597
|
+
exc: typing.Type[exceptions.EmulationError] = exceptions.EmulationError
|
598
|
+
|
599
|
+
if typ == "mem":
|
600
|
+
prefix = "Failed memory access"
|
601
|
+
exc = UnicornEmulationMemoryReadError
|
602
|
+
if typ == "exec":
|
603
|
+
prefix = "Quit emulation"
|
604
|
+
exc = UnicornEmulationExecutionError
|
605
|
+
else:
|
606
|
+
prefix = "Unexpected Unicorn error"
|
607
|
+
|
608
|
+
# rws is list of either reads or writes. get list of these
|
609
|
+
# reads or writes that is not actually available, i.e. memory
|
610
|
+
# not mapped
|
611
|
+
def get_unavailable_rw(rws):
|
612
|
+
out = []
|
613
|
+
for rw in rws:
|
614
|
+
if type(rw) is instructions.BSIDMemoryReferenceOperand:
|
615
|
+
a = rw.address(self)
|
616
|
+
if not (self._is_address_mapped(a)):
|
617
|
+
out.append(rw)
|
618
|
+
return out
|
619
|
+
|
620
|
+
details: typing.Dict[typing.Union[str, int], typing.Union[str, int, bytes]] = {}
|
621
|
+
|
622
|
+
if error.errno == unicorn.UC_ERR_READ_UNMAPPED:
|
623
|
+
msg = f"{prefix} due to read of unmapped memory"
|
624
|
+
# actually this is a memory read error
|
625
|
+
exc = UnicornEmulationMemoryReadError
|
626
|
+
details["unmapped_reads"] = get_unavailable_rw(i.reads)
|
627
|
+
elif error.errno == unicorn.UC_ERR_READ_PROT:
|
628
|
+
msg = f"{prefix} due to read of mapped but protected memory"
|
629
|
+
# actually this is a memory read error
|
630
|
+
exc = UnicornEmulationMemoryReadError
|
631
|
+
details["protected_reads"] = get_unavailable_rw(i.reads)
|
632
|
+
elif error.errno == unicorn.UC_ERR_READ_UNALIGNED:
|
633
|
+
msg = f"{prefix} due to unaligned read"
|
634
|
+
# actually this is a memory read error
|
635
|
+
exc = UnicornEmulationMemoryReadError
|
636
|
+
details["unaligned_reads"] = get_unavailable_rw(i.reads)
|
637
|
+
|
638
|
+
elif error.errno == unicorn.UC_ERR_WRITE_UNMAPPED:
|
639
|
+
msg = f"{prefix} due to write to unmapped memory"
|
640
|
+
# actually this is a memory write error
|
641
|
+
exc = UnicornEmulationMemoryWriteError
|
642
|
+
details["unmapped_writes"] = get_unavailable_rw(i.writes)
|
643
|
+
elif error.errno == unicorn.UC_ERR_WRITE_PROT:
|
644
|
+
msg = f"{prefix} due to write to mapped but protected memory"
|
645
|
+
# actually this is a memory write error
|
646
|
+
exc = UnicornEmulationMemoryWriteError
|
647
|
+
details["protected_writes"] = get_unavailable_rw(i.writes)
|
648
|
+
elif error.errno == unicorn.UC_ERR_WRITE_UNALIGNED:
|
649
|
+
msg = f"{prefix} due to unaligned write"
|
650
|
+
# actually this is a memory write error
|
651
|
+
exc = UnicornEmulationMemoryWriteError
|
652
|
+
details["unaligned_writes"] = get_unavailable_rw(i.writes)
|
653
|
+
|
654
|
+
elif error.errno == unicorn.UC_ERR_FETCH_UNMAPPED:
|
655
|
+
msg = f"{prefix} due to fetch of unmapped memory"
|
656
|
+
elif error.errno == unicorn.UC_ERR_FETCH_PROT:
|
657
|
+
msg = f"{prefix} due to fetch of from mapped but protected memory"
|
658
|
+
elif error.errno == unicorn.UC_ERR_FETCH_UNALIGNED:
|
659
|
+
msg = f"{prefix} due to unaligned fetch"
|
660
|
+
|
661
|
+
elif error.errno == unicorn.UC_ERR_NOMEM:
|
662
|
+
msg = f"{prefix} due Out-Of-Memory"
|
663
|
+
elif error.errno == unicorn.UC_ERR_INSN_INVALID:
|
664
|
+
msg = f"{prefix} due invalid instruction"
|
665
|
+
details = {"pc": pc, "instr": str(i)}
|
666
|
+
elif error.errno == unicorn.UC_ERR_RESOURCE:
|
667
|
+
msg = f"{prefix} due insufficient resources"
|
668
|
+
elif error.errno == unicorn.UC_ERR_EXCEPTION:
|
669
|
+
msg = f"{prefix} due cpu exception"
|
670
|
+
else:
|
671
|
+
msg = f"{prefix} due to unknown Unicorn error {error.errno}"
|
672
|
+
|
673
|
+
raise exc(error, pc, msg, details)
|
674
|
+
|
675
|
+
def __repr__(self) -> str:
|
676
|
+
return f"UnicornEmulator(platform={self.platform})"
|
677
|
+
|
678
|
+
|
679
|
+
__all__ = [
|
680
|
+
"UnicornEmulator",
|
681
|
+
"UnicornEmulationMemoryReadError",
|
682
|
+
"UnicornEmulationMemoryWriteError",
|
683
|
+
"UnicornEmulationExecutionError",
|
684
|
+
]
|