smallworld-re 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smallworld/__init__.py +35 -0
- smallworld/analyses/__init__.py +14 -0
- smallworld/analyses/analysis.py +88 -0
- smallworld/analyses/code_coverage.py +31 -0
- smallworld/analyses/colorizer.py +682 -0
- smallworld/analyses/colorizer_summary.py +100 -0
- smallworld/analyses/field_detection/__init__.py +14 -0
- smallworld/analyses/field_detection/field_analysis.py +536 -0
- smallworld/analyses/field_detection/guards.py +26 -0
- smallworld/analyses/field_detection/hints.py +133 -0
- smallworld/analyses/field_detection/malloc.py +211 -0
- smallworld/analyses/forced_exec/__init__.py +3 -0
- smallworld/analyses/forced_exec/forced_exec.py +87 -0
- smallworld/analyses/underlays/__init__.py +4 -0
- smallworld/analyses/underlays/basic.py +13 -0
- smallworld/analyses/underlays/underlay.py +31 -0
- smallworld/analyses/unstable/__init__.py +4 -0
- smallworld/analyses/unstable/angr/__init__.py +0 -0
- smallworld/analyses/unstable/angr/base.py +12 -0
- smallworld/analyses/unstable/angr/divergence.py +274 -0
- smallworld/analyses/unstable/angr/model.py +383 -0
- smallworld/analyses/unstable/angr/nwbt.py +63 -0
- smallworld/analyses/unstable/angr/typedefs.py +170 -0
- smallworld/analyses/unstable/angr/utils.py +25 -0
- smallworld/analyses/unstable/angr/visitor.py +315 -0
- smallworld/analyses/unstable/angr_nwbt.py +106 -0
- smallworld/analyses/unstable/code_coverage.py +54 -0
- smallworld/analyses/unstable/code_reachable.py +44 -0
- smallworld/analyses/unstable/control_flow_tracer.py +71 -0
- smallworld/analyses/unstable/pointer_finder.py +90 -0
- smallworld/arch/__init__.py +0 -0
- smallworld/arch/aarch64_arch.py +286 -0
- smallworld/arch/amd64_arch.py +86 -0
- smallworld/arch/i386_arch.py +44 -0
- smallworld/emulators/__init__.py +14 -0
- smallworld/emulators/angr/__init__.py +7 -0
- smallworld/emulators/angr/angr.py +1652 -0
- smallworld/emulators/angr/default.py +15 -0
- smallworld/emulators/angr/exceptions.py +7 -0
- smallworld/emulators/angr/exploration/__init__.py +9 -0
- smallworld/emulators/angr/exploration/bounds.py +27 -0
- smallworld/emulators/angr/exploration/default.py +17 -0
- smallworld/emulators/angr/exploration/terminate.py +22 -0
- smallworld/emulators/angr/factory.py +55 -0
- smallworld/emulators/angr/machdefs/__init__.py +35 -0
- smallworld/emulators/angr/machdefs/aarch64.py +292 -0
- smallworld/emulators/angr/machdefs/amd64.py +192 -0
- smallworld/emulators/angr/machdefs/arm.py +387 -0
- smallworld/emulators/angr/machdefs/i386.py +221 -0
- smallworld/emulators/angr/machdefs/machdef.py +138 -0
- smallworld/emulators/angr/machdefs/mips.py +184 -0
- smallworld/emulators/angr/machdefs/mips64.py +189 -0
- smallworld/emulators/angr/machdefs/ppc.py +101 -0
- smallworld/emulators/angr/machdefs/riscv.py +261 -0
- smallworld/emulators/angr/machdefs/xtensa.py +255 -0
- smallworld/emulators/angr/memory/__init__.py +7 -0
- smallworld/emulators/angr/memory/default.py +10 -0
- smallworld/emulators/angr/memory/fixups.py +43 -0
- smallworld/emulators/angr/memory/memtrack.py +105 -0
- smallworld/emulators/angr/scratch.py +43 -0
- smallworld/emulators/angr/simos.py +53 -0
- smallworld/emulators/angr/utils.py +70 -0
- smallworld/emulators/emulator.py +1013 -0
- smallworld/emulators/hookable.py +252 -0
- smallworld/emulators/panda/__init__.py +5 -0
- smallworld/emulators/panda/machdefs/__init__.py +28 -0
- smallworld/emulators/panda/machdefs/aarch64.py +93 -0
- smallworld/emulators/panda/machdefs/amd64.py +71 -0
- smallworld/emulators/panda/machdefs/arm.py +89 -0
- smallworld/emulators/panda/machdefs/i386.py +36 -0
- smallworld/emulators/panda/machdefs/machdef.py +86 -0
- smallworld/emulators/panda/machdefs/mips.py +94 -0
- smallworld/emulators/panda/machdefs/mips64.py +91 -0
- smallworld/emulators/panda/machdefs/ppc.py +79 -0
- smallworld/emulators/panda/panda.py +575 -0
- smallworld/emulators/unicorn/__init__.py +13 -0
- smallworld/emulators/unicorn/machdefs/__init__.py +28 -0
- smallworld/emulators/unicorn/machdefs/aarch64.py +310 -0
- smallworld/emulators/unicorn/machdefs/amd64.py +326 -0
- smallworld/emulators/unicorn/machdefs/arm.py +321 -0
- smallworld/emulators/unicorn/machdefs/i386.py +137 -0
- smallworld/emulators/unicorn/machdefs/machdef.py +117 -0
- smallworld/emulators/unicorn/machdefs/mips.py +202 -0
- smallworld/emulators/unicorn/unicorn.py +684 -0
- smallworld/exceptions/__init__.py +5 -0
- smallworld/exceptions/exceptions.py +85 -0
- smallworld/exceptions/unstable/__init__.py +1 -0
- smallworld/exceptions/unstable/exceptions.py +25 -0
- smallworld/extern/__init__.py +4 -0
- smallworld/extern/ctypes.py +94 -0
- smallworld/extern/unstable/__init__.py +1 -0
- smallworld/extern/unstable/ghidra.py +129 -0
- smallworld/helpers.py +107 -0
- smallworld/hinting/__init__.py +8 -0
- smallworld/hinting/hinting.py +214 -0
- smallworld/hinting/hints.py +427 -0
- smallworld/hinting/unstable/__init__.py +2 -0
- smallworld/hinting/utils.py +19 -0
- smallworld/instructions/__init__.py +18 -0
- smallworld/instructions/aarch64.py +20 -0
- smallworld/instructions/arm.py +18 -0
- smallworld/instructions/bsid.py +67 -0
- smallworld/instructions/instructions.py +258 -0
- smallworld/instructions/mips.py +21 -0
- smallworld/instructions/x86.py +100 -0
- smallworld/logging.py +90 -0
- smallworld/platforms.py +95 -0
- smallworld/py.typed +0 -0
- smallworld/state/__init__.py +6 -0
- smallworld/state/cpus/__init__.py +32 -0
- smallworld/state/cpus/aarch64.py +563 -0
- smallworld/state/cpus/amd64.py +676 -0
- smallworld/state/cpus/arm.py +630 -0
- smallworld/state/cpus/cpu.py +71 -0
- smallworld/state/cpus/i386.py +239 -0
- smallworld/state/cpus/mips.py +374 -0
- smallworld/state/cpus/mips64.py +372 -0
- smallworld/state/cpus/powerpc.py +229 -0
- smallworld/state/cpus/riscv.py +357 -0
- smallworld/state/cpus/xtensa.py +80 -0
- smallworld/state/memory/__init__.py +7 -0
- smallworld/state/memory/code.py +70 -0
- smallworld/state/memory/elf/__init__.py +3 -0
- smallworld/state/memory/elf/elf.py +564 -0
- smallworld/state/memory/elf/rela/__init__.py +32 -0
- smallworld/state/memory/elf/rela/aarch64.py +27 -0
- smallworld/state/memory/elf/rela/amd64.py +32 -0
- smallworld/state/memory/elf/rela/arm.py +51 -0
- smallworld/state/memory/elf/rela/i386.py +32 -0
- smallworld/state/memory/elf/rela/mips.py +45 -0
- smallworld/state/memory/elf/rela/ppc.py +45 -0
- smallworld/state/memory/elf/rela/rela.py +63 -0
- smallworld/state/memory/elf/rela/riscv64.py +27 -0
- smallworld/state/memory/elf/rela/xtensa.py +15 -0
- smallworld/state/memory/elf/structs.py +55 -0
- smallworld/state/memory/heap.py +85 -0
- smallworld/state/memory/memory.py +181 -0
- smallworld/state/memory/stack/__init__.py +31 -0
- smallworld/state/memory/stack/aarch64.py +22 -0
- smallworld/state/memory/stack/amd64.py +42 -0
- smallworld/state/memory/stack/arm.py +66 -0
- smallworld/state/memory/stack/i386.py +22 -0
- smallworld/state/memory/stack/mips.py +34 -0
- smallworld/state/memory/stack/mips64.py +34 -0
- smallworld/state/memory/stack/ppc.py +34 -0
- smallworld/state/memory/stack/riscv.py +22 -0
- smallworld/state/memory/stack/stack.py +127 -0
- smallworld/state/memory/stack/xtensa.py +34 -0
- smallworld/state/models/__init__.py +6 -0
- smallworld/state/models/mmio.py +186 -0
- smallworld/state/models/model.py +163 -0
- smallworld/state/models/posix.py +455 -0
- smallworld/state/models/x86/__init__.py +2 -0
- smallworld/state/models/x86/microsoftcdecl.py +35 -0
- smallworld/state/models/x86/systemv.py +240 -0
- smallworld/state/state.py +962 -0
- smallworld/state/unstable/__init__.py +0 -0
- smallworld/state/unstable/elf.py +393 -0
- smallworld/state/x86_registers.py +30 -0
- smallworld/utils.py +935 -0
- smallworld_re-1.0.0.dist-info/LICENSE.txt +21 -0
- smallworld_re-1.0.0.dist-info/METADATA +189 -0
- smallworld_re-1.0.0.dist-info/RECORD +166 -0
- smallworld_re-1.0.0.dist-info/WHEEL +5 -0
- smallworld_re-1.0.0.dist-info/entry_points.txt +2 -0
- smallworld_re-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,682 @@
|
|
1
|
+
import base64
|
2
|
+
import copy
|
3
|
+
import logging
|
4
|
+
import random
|
5
|
+
import typing
|
6
|
+
|
7
|
+
import capstone
|
8
|
+
|
9
|
+
from .. import hinting, state
|
10
|
+
from ..emulators import (
|
11
|
+
UnicornEmulationMemoryReadError,
|
12
|
+
UnicornEmulationMemoryWriteError,
|
13
|
+
UnicornEmulator,
|
14
|
+
)
|
15
|
+
from ..exceptions import AnalysisRunError, EmulationBounds
|
16
|
+
from ..instructions import (
|
17
|
+
BSIDMemoryReferenceOperand,
|
18
|
+
Instruction,
|
19
|
+
Operand,
|
20
|
+
RegisterOperand,
|
21
|
+
)
|
22
|
+
from . import analysis
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
hinter = hinting.get_hinter(__name__)
|
26
|
+
|
27
|
+
MIN_ACCEPTABLE_COLOR_INT = 20
|
28
|
+
BAD_COLOR = "BAD_COLOR"
|
29
|
+
|
30
|
+
Colors = typing.Dict[str, typing.Tuple[Operand, int, int, Instruction, int]]
|
31
|
+
|
32
|
+
|
33
|
+
class Colorizer(analysis.Analysis):
|
34
|
+
"""A simple kind of data flow analysis via tracking distinct values (colors)
|
35
|
+
and employing instruction use/def analysis
|
36
|
+
|
37
|
+
We run multiple micro-executions of the code starting from same entry. At
|
38
|
+
the start of each, we randomize register values that have not already been
|
39
|
+
initialized. We maintain a "colors" map from values to when we first
|
40
|
+
observed them. This map is initially empty. Before emulating an instruction,
|
41
|
+
we examine the values (registers and memory) it will read. If any are NOT in
|
42
|
+
the colors map, that is the initial sighting of that value and we emit a
|
43
|
+
hint to that effect. If any color IS already in the map, then that is a flow
|
44
|
+
from the time at which that value was first observed to this
|
45
|
+
instruction. Similarly, after emulating an instruction, we examine every
|
46
|
+
value (register and memory) written. If a value is not in the colors map, it
|
47
|
+
is a new, computed result and we hint about its creation. If it is in the
|
48
|
+
colors map, we do nothing since it just a copy.
|
49
|
+
|
50
|
+
Whilst looking at reads and writes for instructions, we hint if any
|
51
|
+
correspond to unavailable memory.
|
52
|
+
|
53
|
+
Arguments:
|
54
|
+
num_micro_executions: The number of micro-executions to run.
|
55
|
+
num_insns: The number of instructions to micro-execute.
|
56
|
+
seed: Random seed for test stability, or None.
|
57
|
+
|
58
|
+
"""
|
59
|
+
|
60
|
+
name = "colorizer"
|
61
|
+
description = "it's almost taint"
|
62
|
+
version = "0.0.1"
|
63
|
+
|
64
|
+
def __init__(
|
65
|
+
self,
|
66
|
+
*args,
|
67
|
+
num_micro_executions: int = 5,
|
68
|
+
num_insns: int = 200,
|
69
|
+
seed: typing.Optional[int] = 99,
|
70
|
+
**kwargs
|
71
|
+
# self, *args, num_micro_executions: int = 1, num_insns: int = 10, **kwargs
|
72
|
+
):
|
73
|
+
super().__init__(*args, **kwargs)
|
74
|
+
# Create our own random so we can avoid contention.
|
75
|
+
self.random = random.Random()
|
76
|
+
self.seed = seed
|
77
|
+
self.num_micro_executions = num_micro_executions
|
78
|
+
self.num_insns = num_insns
|
79
|
+
|
80
|
+
def _get_instr_at_pc(self, pc: int) -> capstone.CsInsn:
|
81
|
+
code = self.emu.read_memory(pc, 15) # longest possible instruction
|
82
|
+
if code is None:
|
83
|
+
raise AnalysisRunError(
|
84
|
+
"Unable to read next instruction out of emulator memory"
|
85
|
+
)
|
86
|
+
(insns, disas) = self.emu._disassemble(code, pc, 2)
|
87
|
+
insn = insns[0]
|
88
|
+
return insn
|
89
|
+
|
90
|
+
def _operand_size(self, operand: Operand) -> int:
|
91
|
+
if type(operand) is RegisterOperand:
|
92
|
+
# return size of a reg based on its name
|
93
|
+
return getattr(self.cpu, operand.name).size
|
94
|
+
elif type(operand) is BSIDMemoryReferenceOperand:
|
95
|
+
# memory operand knows its size
|
96
|
+
return operand.size
|
97
|
+
return 0
|
98
|
+
|
99
|
+
def run(self, machine: state.Machine) -> None:
|
100
|
+
# note that start pc is in start_cpustate
|
101
|
+
|
102
|
+
# collect hints for each microexecution, in a list of lists
|
103
|
+
hint_list_list: typing.List[typing.List[hinting.Hint]] = []
|
104
|
+
|
105
|
+
self.orig_machine = copy.deepcopy(machine)
|
106
|
+
self.orig_cpu = self.orig_machine.get_cpu()
|
107
|
+
self.platform = self.orig_cpu.platform
|
108
|
+
|
109
|
+
for i in range(self.num_micro_executions):
|
110
|
+
logger.info("-------------------------")
|
111
|
+
logger.info(f"micro exec #{i}")
|
112
|
+
|
113
|
+
if self.seed is not None:
|
114
|
+
self.random.seed(a=self.seed)
|
115
|
+
|
116
|
+
self.machine = copy.deepcopy(self.orig_machine)
|
117
|
+
self.cpu = self.machine.get_cpu()
|
118
|
+
self.emu = UnicornEmulator(self.platform)
|
119
|
+
self.machine.apply(self.emu)
|
120
|
+
|
121
|
+
# initialize registers with random values
|
122
|
+
self._randomize_registers()
|
123
|
+
|
124
|
+
# map from color values to first use / def
|
125
|
+
self.colors: Colors = {}
|
126
|
+
|
127
|
+
hint_list: typing.List[hinting.Hint] = []
|
128
|
+
for j in range(self.num_insns):
|
129
|
+
logger.info(f"instr_count = {j}")
|
130
|
+
# obtain instr about to be emulated
|
131
|
+
pc = self.emu.read_register("pc")
|
132
|
+
if pc in self.emu.get_exit_points():
|
133
|
+
break
|
134
|
+
cs_insn = self._get_instr_at_pc(pc)
|
135
|
+
sw_insn = Instruction.from_capstone(cs_insn)
|
136
|
+
|
137
|
+
logger.debug(sw_insn)
|
138
|
+
|
139
|
+
# pull state back out of the emulator for inspection
|
140
|
+
m = copy.deepcopy(self.machine)
|
141
|
+
m.extract(self.emu)
|
142
|
+
self.cpu = m.get_cpu()
|
143
|
+
# self.cpu = copy.deepcopy(self.machine).extract(self.emu).get_cpu()
|
144
|
+
# curr_machine = copy.deepcopy(self.machine)
|
145
|
+
# curr_machine.extract(self.emu)
|
146
|
+
# curr_machine = self.eself.cpu.load(self.emu)
|
147
|
+
# self.cpu = curr_machien.get_cpu()
|
148
|
+
|
149
|
+
# print(f"pc={pc:x} {sw_insn}")
|
150
|
+
# import pdb
|
151
|
+
# pdb.set_trace()
|
152
|
+
|
153
|
+
reads: typing.List[typing.Tuple[Operand, str, int]] = []
|
154
|
+
for read_operand in sw_insn.reads:
|
155
|
+
logger.debug(f"pc={pc:x} read_operand={read_operand}")
|
156
|
+
|
157
|
+
if (
|
158
|
+
type(read_operand) is RegisterOperand
|
159
|
+
and read_operand.name == "rflags"
|
160
|
+
):
|
161
|
+
continue
|
162
|
+
|
163
|
+
sz = self._operand_size(read_operand)
|
164
|
+
if type(read_operand) is BSIDMemoryReferenceOperand:
|
165
|
+
a = read_operand.address(self.emu)
|
166
|
+
ar = (a, a + sz)
|
167
|
+
if not self.emu._is_address_range_mapped(ar):
|
168
|
+
# at least one byte in this range is not mapped
|
169
|
+
# so dont add this read to the list
|
170
|
+
continue
|
171
|
+
read_operand_color = self._concrete_val_to_color(
|
172
|
+
read_operand.concretize(self.emu), sz
|
173
|
+
)
|
174
|
+
# discard bad colors
|
175
|
+
if read_operand_color == BAD_COLOR:
|
176
|
+
continue
|
177
|
+
# except UnicornEmulationMemoryReadError as e:
|
178
|
+
# # ignore bc self.emu.step() will also raise
|
179
|
+
# # same error, which will generate a hint
|
180
|
+
# pass
|
181
|
+
# except Exception as e:
|
182
|
+
# import pdb
|
183
|
+
# pdb.set_trace()
|
184
|
+
# print(e)
|
185
|
+
tup = (read_operand, read_operand_color, sz)
|
186
|
+
reads.append(tup)
|
187
|
+
reads.sort(key=lambda e: e[0].__repr__())
|
188
|
+
# logger.info(f"reads: {reads}")
|
189
|
+
self._check_colors_instruction_reads(reads, sw_insn, i, j, hint_list)
|
190
|
+
|
191
|
+
try:
|
192
|
+
# print(f"pc={pc:x} {sw_insn}")
|
193
|
+
# import pdb
|
194
|
+
# pdb.set_trace()
|
195
|
+
|
196
|
+
self.emu.step()
|
197
|
+
|
198
|
+
except EmulationBounds:
|
199
|
+
# import pdb
|
200
|
+
# pdb.set_trace()
|
201
|
+
logger.info(
|
202
|
+
"emulation complete. encountered exit point or went out of bounds"
|
203
|
+
)
|
204
|
+
break
|
205
|
+
except UnicornEmulationMemoryWriteError as e:
|
206
|
+
# import pdb
|
207
|
+
# pdb.set_trace()
|
208
|
+
for write_operand, conc_val in e.details["writes"]:
|
209
|
+
if type(write_operand) is BSIDMemoryReferenceOperand:
|
210
|
+
if conc_val is None:
|
211
|
+
h = self._mem_unavailable_hint(
|
212
|
+
write_operand, e.pc, i, j, False
|
213
|
+
)
|
214
|
+
hint_list.append(h)
|
215
|
+
break
|
216
|
+
|
217
|
+
except UnicornEmulationMemoryReadError as e:
|
218
|
+
# import pdb
|
219
|
+
# pdb.set_trace()
|
220
|
+
for read_operand in e.details["unmapped_reads"]:
|
221
|
+
if type(read_operand) is BSIDMemoryReferenceOperand:
|
222
|
+
h = self._mem_unavailable_hint(
|
223
|
+
read_operand, e.pc, i, j, True
|
224
|
+
)
|
225
|
+
hint_list.append(h)
|
226
|
+
break
|
227
|
+
except Exception as e:
|
228
|
+
# emulating this instruction failed
|
229
|
+
# import pdb
|
230
|
+
# pdb.set_trace()
|
231
|
+
import pdb
|
232
|
+
|
233
|
+
pdb.set_trace()
|
234
|
+
exhint = hinting.EmulationException(
|
235
|
+
message=f"In analysis, single step raised an exception {e}",
|
236
|
+
pc=pc,
|
237
|
+
# instruction=sw_insn,
|
238
|
+
instruction_num=j,
|
239
|
+
exception=str(e),
|
240
|
+
)
|
241
|
+
hint_list.append(exhint)
|
242
|
+
hinter.debug(exhint)
|
243
|
+
logger.info(e)
|
244
|
+
break
|
245
|
+
|
246
|
+
writes: typing.List[typing.Tuple[Operand, str, int]] = []
|
247
|
+
|
248
|
+
# print(sw_insn.writes)
|
249
|
+
# import pdb
|
250
|
+
# pdb.set_trace()
|
251
|
+
|
252
|
+
for write_operand in sw_insn.writes:
|
253
|
+
logger.debug(f"pc={pc:x} write_operand={write_operand}")
|
254
|
+
|
255
|
+
if (
|
256
|
+
type(write_operand) is RegisterOperand
|
257
|
+
and write_operand.name == "rflags"
|
258
|
+
):
|
259
|
+
continue
|
260
|
+
|
261
|
+
sz = self._operand_size(write_operand)
|
262
|
+
try:
|
263
|
+
write_operand_color = self._concrete_val_to_color(
|
264
|
+
write_operand.concretize(self.emu), sz
|
265
|
+
)
|
266
|
+
# discard bad colors
|
267
|
+
if write_operand_color == BAD_COLOR:
|
268
|
+
continue
|
269
|
+
except Exception as e:
|
270
|
+
print(e)
|
271
|
+
h = self._mem_unavailable_hint(write_operand, pc, i, j, False)
|
272
|
+
hint_list.append(h)
|
273
|
+
continue
|
274
|
+
tup = (write_operand, write_operand_color, sz)
|
275
|
+
writes.append(tup)
|
276
|
+
writes.sort(key=lambda e: e[0].__repr__())
|
277
|
+
# import pdb
|
278
|
+
# pdb.set_trace()
|
279
|
+
self._check_colors_instruction_writes(writes, sw_insn, i, j, hint_list)
|
280
|
+
|
281
|
+
hint_list_list.append(hint_list)
|
282
|
+
|
283
|
+
logger.info("-------------------------")
|
284
|
+
|
285
|
+
# if two hints map to the same key then they are in same equivalence class
|
286
|
+
def hint_key(hint):
|
287
|
+
if type(hint) is hinting.DynamicRegisterValueHint:
|
288
|
+
return (
|
289
|
+
"dynamic_register_value",
|
290
|
+
hint.pc,
|
291
|
+
not hint.use,
|
292
|
+
hint.color,
|
293
|
+
hint.new,
|
294
|
+
hint.message,
|
295
|
+
hint.reg_name,
|
296
|
+
)
|
297
|
+
if type(hint) is hinting.DynamicMemoryValueHint:
|
298
|
+
return (
|
299
|
+
"dynamic_memory_value",
|
300
|
+
hint.pc,
|
301
|
+
not hint.use,
|
302
|
+
hint.color,
|
303
|
+
hint.new,
|
304
|
+
hint.message,
|
305
|
+
hint.base,
|
306
|
+
hint.index,
|
307
|
+
hint.scale,
|
308
|
+
hint.offset,
|
309
|
+
)
|
310
|
+
if type(hint) is hinting.MemoryUnavailableHint:
|
311
|
+
return (
|
312
|
+
"memory_unavailable",
|
313
|
+
hint.pc,
|
314
|
+
hint.size,
|
315
|
+
hint.message,
|
316
|
+
hint.base_reg_name,
|
317
|
+
hint.index_reg_name,
|
318
|
+
hint.offset,
|
319
|
+
hint.scale,
|
320
|
+
)
|
321
|
+
if type(hint) is hinting.EmulationException:
|
322
|
+
return (
|
323
|
+
"emulation_exception",
|
324
|
+
hint.pc,
|
325
|
+
hint.instruction_num,
|
326
|
+
hint.exception,
|
327
|
+
)
|
328
|
+
|
329
|
+
all_hint_keys = set([])
|
330
|
+
hk_exemplar = {}
|
331
|
+
for hint_list in hint_list_list:
|
332
|
+
for hint in hint_list:
|
333
|
+
hk = hint_key(hint)
|
334
|
+
all_hint_keys.add(hk)
|
335
|
+
# keep one exemplar
|
336
|
+
if hk not in hk_exemplar:
|
337
|
+
hk_exemplar[hk] = hint
|
338
|
+
|
339
|
+
# import pdb
|
340
|
+
# pdb.set_trace()
|
341
|
+
hint_keys_sorted = sorted(list(all_hint_keys))
|
342
|
+
|
343
|
+
# given the equivalence classes established by `hint_key`, determine
|
344
|
+
# which of those were observed in each micro-execution
|
345
|
+
hk_observed: typing.Dict[
|
346
|
+
int, typing.Set[typing.Tuple[int, bool, str, bool, str, str, str, int, int]]
|
347
|
+
] = {}
|
348
|
+
for me in range(self.num_micro_executions):
|
349
|
+
hk_observed[me] = set([])
|
350
|
+
for hint in hint_list_list[me]:
|
351
|
+
# this hint key was observed in micro execution me
|
352
|
+
hk_observed[me].add(hint_key(hint))
|
353
|
+
|
354
|
+
# estimate "probability" of observing a hint in an equiv class as
|
355
|
+
# fraction of micro executions in which it was observed at least once
|
356
|
+
hk_c = {}
|
357
|
+
for hk in hint_keys_sorted:
|
358
|
+
hk_c[hk] = 0
|
359
|
+
for me in range(self.num_micro_executions):
|
360
|
+
for hk2 in hk_observed[me]:
|
361
|
+
if hk == hk2:
|
362
|
+
hk_c[hk] += 1
|
363
|
+
|
364
|
+
for hk in hint_keys_sorted:
|
365
|
+
prob = (float(hk_c[hk])) / self.num_micro_executions
|
366
|
+
assert prob <= 1.0
|
367
|
+
hint = hk_exemplar[hk]
|
368
|
+
|
369
|
+
if type(hint) is hinting.DynamicRegisterValueHint:
|
370
|
+
hinter.info(
|
371
|
+
hinting.DynamicRegisterValueProbHint(
|
372
|
+
# instruction=hint.instruction,
|
373
|
+
pc=hint.pc,
|
374
|
+
reg_name=hint.reg_name,
|
375
|
+
color=hint.color,
|
376
|
+
size=hint.size,
|
377
|
+
use=hint.use,
|
378
|
+
new=hint.new,
|
379
|
+
prob=prob,
|
380
|
+
message=hint.message + "-prob",
|
381
|
+
)
|
382
|
+
)
|
383
|
+
if type(hint) is hinting.DynamicMemoryValueHint:
|
384
|
+
hinter.info(
|
385
|
+
hinting.DynamicMemoryValueProbHint(
|
386
|
+
# instruction=hint.instruction,
|
387
|
+
pc=hint.pc,
|
388
|
+
size=hint.size,
|
389
|
+
base=hint.base,
|
390
|
+
index=hint.index,
|
391
|
+
scale=hint.scale,
|
392
|
+
offset=hint.offset,
|
393
|
+
color=hint.color,
|
394
|
+
use=hint.use,
|
395
|
+
new=hint.new,
|
396
|
+
prob=prob,
|
397
|
+
message=hint.message + "-prob",
|
398
|
+
)
|
399
|
+
)
|
400
|
+
if type(hint) is hinting.MemoryUnavailableHint:
|
401
|
+
hinter.info(
|
402
|
+
hinting.MemoryUnavailableProbHint(
|
403
|
+
is_read=hint.is_read,
|
404
|
+
size=hint.size,
|
405
|
+
base_reg_name=hint.base_reg_name,
|
406
|
+
index_reg_name=hint.index_reg_name,
|
407
|
+
offset=hint.offset,
|
408
|
+
scale=hint.scale,
|
409
|
+
pc=hint.pc,
|
410
|
+
prob=prob,
|
411
|
+
message=hint.message + "-prob",
|
412
|
+
)
|
413
|
+
)
|
414
|
+
|
415
|
+
def _concrete_val_to_color(
|
416
|
+
self, concrete_value: typing.Union[int, bytes, bytearray], size: int
|
417
|
+
) -> str:
|
418
|
+
# this concrete value can be an int (if it came from a register)
|
419
|
+
# or bytes (if it came from memory read)
|
420
|
+
# we want these in a common format so that we can see them as colors
|
421
|
+
the_bytes: bytes = b""
|
422
|
+
if type(concrete_value) is int:
|
423
|
+
if concrete_value < MIN_ACCEPTABLE_COLOR_INT:
|
424
|
+
return BAD_COLOR
|
425
|
+
the_bytes = concrete_value.to_bytes(size, byteorder="little")
|
426
|
+
elif (type(concrete_value) is bytes) or (type(concrete_value) is bytearray):
|
427
|
+
# assuming little-endian
|
428
|
+
if (
|
429
|
+
int.from_bytes(concrete_value, byteorder="little")
|
430
|
+
< MIN_ACCEPTABLE_COLOR_INT
|
431
|
+
):
|
432
|
+
return BAD_COLOR
|
433
|
+
the_bytes = concrete_value
|
434
|
+
else:
|
435
|
+
assert 1 == 0
|
436
|
+
return base64.b64encode(the_bytes).decode()
|
437
|
+
|
438
|
+
def _randomize_registers(self) -> None:
|
439
|
+
for reg in self.orig_cpu:
|
440
|
+
# only colorize the "regular" registers
|
441
|
+
if (type(reg) is not state.Register) or (
|
442
|
+
reg.name not in self.orig_cpu.get_general_purpose_registers()
|
443
|
+
):
|
444
|
+
continue
|
445
|
+
orig_val = self.emu.read_register(reg.name)
|
446
|
+
logger.debug(f"_randomize_registers {reg.name} orig_val={orig_val:x}")
|
447
|
+
# if reg.name == "rip" or reg.name == "rsp":
|
448
|
+
# import pdb
|
449
|
+
# pdb.set_trace()
|
450
|
+
new_val = 0
|
451
|
+
bc = 0
|
452
|
+
for i in range(0, reg.size):
|
453
|
+
new_val = new_val << 8
|
454
|
+
if (
|
455
|
+
reg.name in self.emu.initialized_registers
|
456
|
+
and i in self.emu.initialized_registers[reg.name]
|
457
|
+
):
|
458
|
+
bs = 8 * (reg.size - i - 1)
|
459
|
+
b = (orig_val >> bs) & 0xFF
|
460
|
+
# b = (orig_val >> (i * 8)) & 0xFF
|
461
|
+
new_val |= b
|
462
|
+
else:
|
463
|
+
new_val |= random.randint(0, 255)
|
464
|
+
bc += 1
|
465
|
+
if bc == 0:
|
466
|
+
logger.debug(
|
467
|
+
f"Not colorizing register {reg.name} since it is already fully initialized with {orig_val:x}"
|
468
|
+
)
|
469
|
+
else:
|
470
|
+
# make sure to update cpu as well as emu not sure why
|
471
|
+
self.emu.write_register(reg.name, new_val)
|
472
|
+
setattr(self.cpu, reg.name, new_val)
|
473
|
+
logger.debug(
|
474
|
+
f"Colorized {bc} bytes in register {reg.name}, old value was {orig_val:x} new is {new_val:x}"
|
475
|
+
)
|
476
|
+
|
477
|
+
# helper for read/write unavailable hint
|
478
|
+
def _mem_unavailable_hint(
|
479
|
+
self,
|
480
|
+
operand: typing.Optional[BSIDMemoryReferenceOperand],
|
481
|
+
pc: int,
|
482
|
+
exec_num: int,
|
483
|
+
insn_num: int,
|
484
|
+
is_read: bool,
|
485
|
+
) -> hinting.Hint:
|
486
|
+
(base_name, base_val) = ("None", 0)
|
487
|
+
(index_name, index_val) = ("None", 0)
|
488
|
+
(operand_size, operand_scale, operand_offset, operand_address) = (0, 0, 0, 0)
|
489
|
+
if operand:
|
490
|
+
operand_size = operand.size
|
491
|
+
operand_scale = operand.scale
|
492
|
+
operand_offset = operand.offset
|
493
|
+
operand_address = operand.address(self.emu)
|
494
|
+
if operand.base is not None:
|
495
|
+
base_val = self.emu.read_register(operand.base)
|
496
|
+
base_name = operand.base
|
497
|
+
if operand.index is not None:
|
498
|
+
index_val = self.emu.read_register(operand.index)
|
499
|
+
index_name = operand.index
|
500
|
+
hint = hinting.MemoryUnavailableHint(
|
501
|
+
is_read=is_read,
|
502
|
+
size=operand_size,
|
503
|
+
base_reg_name=base_name,
|
504
|
+
base_reg_val=base_val,
|
505
|
+
index_reg_name=index_name,
|
506
|
+
index_reg_val=index_val,
|
507
|
+
offset=operand_offset,
|
508
|
+
scale=operand_scale,
|
509
|
+
address=operand_address,
|
510
|
+
pc=pc,
|
511
|
+
micro_exec_num=exec_num,
|
512
|
+
instruction_num=insn_num,
|
513
|
+
message="mem_unavailable",
|
514
|
+
)
|
515
|
+
hinter.debug(hint)
|
516
|
+
return hint
|
517
|
+
|
518
|
+
def _get_color_num(self, color: str) -> int:
|
519
|
+
(_, _, _, _, color_num) = self.colors[color]
|
520
|
+
return color_num
|
521
|
+
|
522
|
+
def _add_color(
|
523
|
+
self,
|
524
|
+
color: str,
|
525
|
+
operand: Operand,
|
526
|
+
insn: Instruction,
|
527
|
+
exec_num: int,
|
528
|
+
insn_num: int,
|
529
|
+
) -> None:
|
530
|
+
self.colors[color] = (operand, exec_num, insn_num, insn, 1 + len(self.colors))
|
531
|
+
|
532
|
+
def _check_colors_instruction_reads(
|
533
|
+
self,
|
534
|
+
reads: typing.List[typing.Tuple[Operand, str, int]],
|
535
|
+
insn: Instruction,
|
536
|
+
exec_num: int,
|
537
|
+
insn_num: int,
|
538
|
+
hint_list: typing.List[hinting.Hint],
|
539
|
+
):
|
540
|
+
# import pdb
|
541
|
+
# pdb.set_trace()
|
542
|
+
for operand, color, operand_size in reads:
|
543
|
+
if color in self.colors.keys():
|
544
|
+
# read-flow: use of a previously recorded color value
|
545
|
+
hint = self._dynamic_value_hint(
|
546
|
+
operand,
|
547
|
+
operand_size,
|
548
|
+
color,
|
549
|
+
insn,
|
550
|
+
True,
|
551
|
+
False,
|
552
|
+
exec_num,
|
553
|
+
insn_num,
|
554
|
+
"read-flow",
|
555
|
+
)
|
556
|
+
hinter.debug(hint)
|
557
|
+
hint_list.append(hint)
|
558
|
+
else:
|
559
|
+
# red-def: use of a NOT previously recorded color value. As
|
560
|
+
# long as the value is something reasonable, we'll record it as
|
561
|
+
# a new color
|
562
|
+
self._add_color(color, operand, insn, exec_num, insn_num)
|
563
|
+
# logger.info(
|
564
|
+
# f"new color {color} color_num {self._get_color_num(color)} instruction [{insn}] operand {operand}"
|
565
|
+
# )
|
566
|
+
hint = self._dynamic_value_hint(
|
567
|
+
operand,
|
568
|
+
operand_size,
|
569
|
+
color,
|
570
|
+
insn,
|
571
|
+
True,
|
572
|
+
True,
|
573
|
+
exec_num,
|
574
|
+
insn_num,
|
575
|
+
"read-def",
|
576
|
+
)
|
577
|
+
hinter.debug(hint)
|
578
|
+
hint_list.append(hint)
|
579
|
+
|
580
|
+
def _check_colors_instruction_writes(
|
581
|
+
self,
|
582
|
+
writes: typing.List[typing.Tuple[Operand, str, int]],
|
583
|
+
insn: Instruction,
|
584
|
+
exec_num: int,
|
585
|
+
insn_num: int,
|
586
|
+
hint_list: typing.List[hinting.Hint],
|
587
|
+
):
|
588
|
+
# NB: This should be called *AFTER the instruction emulates!
|
589
|
+
for operand, color, operand_size in writes:
|
590
|
+
if color in self.colors.keys():
|
591
|
+
# write of a previously seen value
|
592
|
+
# ... its just a copy so no hint, right?
|
593
|
+
hint = self._dynamic_value_hint(
|
594
|
+
operand,
|
595
|
+
operand_size,
|
596
|
+
color,
|
597
|
+
insn,
|
598
|
+
False,
|
599
|
+
False,
|
600
|
+
exec_num,
|
601
|
+
insn_num,
|
602
|
+
"write-copy",
|
603
|
+
)
|
604
|
+
hinter.debug(hint)
|
605
|
+
hint_list.append(hint)
|
606
|
+
pass
|
607
|
+
else:
|
608
|
+
# write-def: write of a NOT previously recorded color value as
|
609
|
+
# long as the value is something reasonable, we'll record it as
|
610
|
+
# a new color
|
611
|
+
self._add_color(color, operand, insn, exec_num, insn_num)
|
612
|
+
# logger.info(
|
613
|
+
# f"new color {color} color_num {self._get_color_num(color)} instruction [{insn}] operand {operand}"
|
614
|
+
# )
|
615
|
+
hint = self._dynamic_value_hint(
|
616
|
+
operand,
|
617
|
+
operand_size,
|
618
|
+
color,
|
619
|
+
insn,
|
620
|
+
False,
|
621
|
+
True,
|
622
|
+
exec_num,
|
623
|
+
insn_num,
|
624
|
+
"write-def",
|
625
|
+
)
|
626
|
+
hinter.debug(hint)
|
627
|
+
hint_list.append(hint)
|
628
|
+
|
629
|
+
def _dynamic_value_hint(
|
630
|
+
self,
|
631
|
+
operand: Operand,
|
632
|
+
size: int,
|
633
|
+
color: str,
|
634
|
+
insn: Instruction,
|
635
|
+
is_use: bool,
|
636
|
+
is_new: bool,
|
637
|
+
exec_num: int,
|
638
|
+
insn_num: int,
|
639
|
+
message: str,
|
640
|
+
):
|
641
|
+
pc = insn.address
|
642
|
+
color_num = self._get_color_num(color)
|
643
|
+
if type(operand) is RegisterOperand:
|
644
|
+
return hinting.DynamicRegisterValueHint(
|
645
|
+
reg_name=operand.name,
|
646
|
+
size=size,
|
647
|
+
color=color_num,
|
648
|
+
dynamic_value=color,
|
649
|
+
use=is_use,
|
650
|
+
new=is_new,
|
651
|
+
# instruction=insn,
|
652
|
+
pc=pc,
|
653
|
+
micro_exec_num=exec_num,
|
654
|
+
instruction_num=insn_num,
|
655
|
+
message=message,
|
656
|
+
)
|
657
|
+
elif type(operand) is BSIDMemoryReferenceOperand:
|
658
|
+
base_name = "None"
|
659
|
+
if operand.base is not None:
|
660
|
+
base_name = operand.base
|
661
|
+
index_name = "None"
|
662
|
+
if operand.index is not None:
|
663
|
+
index_name = operand.index
|
664
|
+
return hinting.DynamicMemoryValueHint(
|
665
|
+
address=operand.address(self.emu),
|
666
|
+
base=base_name,
|
667
|
+
index=index_name,
|
668
|
+
scale=operand.scale,
|
669
|
+
offset=operand.offset,
|
670
|
+
color=color_num,
|
671
|
+
dynamic_value=color,
|
672
|
+
size=operand.size,
|
673
|
+
use=is_use,
|
674
|
+
new=is_new,
|
675
|
+
# instruction=insn,
|
676
|
+
pc=pc,
|
677
|
+
micro_exec_num=exec_num,
|
678
|
+
instruction_num=insn_num,
|
679
|
+
message=message,
|
680
|
+
)
|
681
|
+
else:
|
682
|
+
assert 1 == 0
|