smallworld-re 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. smallworld/__init__.py +35 -0
  2. smallworld/analyses/__init__.py +14 -0
  3. smallworld/analyses/analysis.py +88 -0
  4. smallworld/analyses/code_coverage.py +31 -0
  5. smallworld/analyses/colorizer.py +682 -0
  6. smallworld/analyses/colorizer_summary.py +100 -0
  7. smallworld/analyses/field_detection/__init__.py +14 -0
  8. smallworld/analyses/field_detection/field_analysis.py +536 -0
  9. smallworld/analyses/field_detection/guards.py +26 -0
  10. smallworld/analyses/field_detection/hints.py +133 -0
  11. smallworld/analyses/field_detection/malloc.py +211 -0
  12. smallworld/analyses/forced_exec/__init__.py +3 -0
  13. smallworld/analyses/forced_exec/forced_exec.py +87 -0
  14. smallworld/analyses/underlays/__init__.py +4 -0
  15. smallworld/analyses/underlays/basic.py +13 -0
  16. smallworld/analyses/underlays/underlay.py +31 -0
  17. smallworld/analyses/unstable/__init__.py +4 -0
  18. smallworld/analyses/unstable/angr/__init__.py +0 -0
  19. smallworld/analyses/unstable/angr/base.py +12 -0
  20. smallworld/analyses/unstable/angr/divergence.py +274 -0
  21. smallworld/analyses/unstable/angr/model.py +383 -0
  22. smallworld/analyses/unstable/angr/nwbt.py +63 -0
  23. smallworld/analyses/unstable/angr/typedefs.py +170 -0
  24. smallworld/analyses/unstable/angr/utils.py +25 -0
  25. smallworld/analyses/unstable/angr/visitor.py +315 -0
  26. smallworld/analyses/unstable/angr_nwbt.py +106 -0
  27. smallworld/analyses/unstable/code_coverage.py +54 -0
  28. smallworld/analyses/unstable/code_reachable.py +44 -0
  29. smallworld/analyses/unstable/control_flow_tracer.py +71 -0
  30. smallworld/analyses/unstable/pointer_finder.py +90 -0
  31. smallworld/arch/__init__.py +0 -0
  32. smallworld/arch/aarch64_arch.py +286 -0
  33. smallworld/arch/amd64_arch.py +86 -0
  34. smallworld/arch/i386_arch.py +44 -0
  35. smallworld/emulators/__init__.py +14 -0
  36. smallworld/emulators/angr/__init__.py +7 -0
  37. smallworld/emulators/angr/angr.py +1652 -0
  38. smallworld/emulators/angr/default.py +15 -0
  39. smallworld/emulators/angr/exceptions.py +7 -0
  40. smallworld/emulators/angr/exploration/__init__.py +9 -0
  41. smallworld/emulators/angr/exploration/bounds.py +27 -0
  42. smallworld/emulators/angr/exploration/default.py +17 -0
  43. smallworld/emulators/angr/exploration/terminate.py +22 -0
  44. smallworld/emulators/angr/factory.py +55 -0
  45. smallworld/emulators/angr/machdefs/__init__.py +35 -0
  46. smallworld/emulators/angr/machdefs/aarch64.py +292 -0
  47. smallworld/emulators/angr/machdefs/amd64.py +192 -0
  48. smallworld/emulators/angr/machdefs/arm.py +387 -0
  49. smallworld/emulators/angr/machdefs/i386.py +221 -0
  50. smallworld/emulators/angr/machdefs/machdef.py +138 -0
  51. smallworld/emulators/angr/machdefs/mips.py +184 -0
  52. smallworld/emulators/angr/machdefs/mips64.py +189 -0
  53. smallworld/emulators/angr/machdefs/ppc.py +101 -0
  54. smallworld/emulators/angr/machdefs/riscv.py +261 -0
  55. smallworld/emulators/angr/machdefs/xtensa.py +255 -0
  56. smallworld/emulators/angr/memory/__init__.py +7 -0
  57. smallworld/emulators/angr/memory/default.py +10 -0
  58. smallworld/emulators/angr/memory/fixups.py +43 -0
  59. smallworld/emulators/angr/memory/memtrack.py +105 -0
  60. smallworld/emulators/angr/scratch.py +43 -0
  61. smallworld/emulators/angr/simos.py +53 -0
  62. smallworld/emulators/angr/utils.py +70 -0
  63. smallworld/emulators/emulator.py +1013 -0
  64. smallworld/emulators/hookable.py +252 -0
  65. smallworld/emulators/panda/__init__.py +5 -0
  66. smallworld/emulators/panda/machdefs/__init__.py +28 -0
  67. smallworld/emulators/panda/machdefs/aarch64.py +93 -0
  68. smallworld/emulators/panda/machdefs/amd64.py +71 -0
  69. smallworld/emulators/panda/machdefs/arm.py +89 -0
  70. smallworld/emulators/panda/machdefs/i386.py +36 -0
  71. smallworld/emulators/panda/machdefs/machdef.py +86 -0
  72. smallworld/emulators/panda/machdefs/mips.py +94 -0
  73. smallworld/emulators/panda/machdefs/mips64.py +91 -0
  74. smallworld/emulators/panda/machdefs/ppc.py +79 -0
  75. smallworld/emulators/panda/panda.py +575 -0
  76. smallworld/emulators/unicorn/__init__.py +13 -0
  77. smallworld/emulators/unicorn/machdefs/__init__.py +28 -0
  78. smallworld/emulators/unicorn/machdefs/aarch64.py +310 -0
  79. smallworld/emulators/unicorn/machdefs/amd64.py +326 -0
  80. smallworld/emulators/unicorn/machdefs/arm.py +321 -0
  81. smallworld/emulators/unicorn/machdefs/i386.py +137 -0
  82. smallworld/emulators/unicorn/machdefs/machdef.py +117 -0
  83. smallworld/emulators/unicorn/machdefs/mips.py +202 -0
  84. smallworld/emulators/unicorn/unicorn.py +684 -0
  85. smallworld/exceptions/__init__.py +5 -0
  86. smallworld/exceptions/exceptions.py +85 -0
  87. smallworld/exceptions/unstable/__init__.py +1 -0
  88. smallworld/exceptions/unstable/exceptions.py +25 -0
  89. smallworld/extern/__init__.py +4 -0
  90. smallworld/extern/ctypes.py +94 -0
  91. smallworld/extern/unstable/__init__.py +1 -0
  92. smallworld/extern/unstable/ghidra.py +129 -0
  93. smallworld/helpers.py +107 -0
  94. smallworld/hinting/__init__.py +8 -0
  95. smallworld/hinting/hinting.py +214 -0
  96. smallworld/hinting/hints.py +427 -0
  97. smallworld/hinting/unstable/__init__.py +2 -0
  98. smallworld/hinting/utils.py +19 -0
  99. smallworld/instructions/__init__.py +18 -0
  100. smallworld/instructions/aarch64.py +20 -0
  101. smallworld/instructions/arm.py +18 -0
  102. smallworld/instructions/bsid.py +67 -0
  103. smallworld/instructions/instructions.py +258 -0
  104. smallworld/instructions/mips.py +21 -0
  105. smallworld/instructions/x86.py +100 -0
  106. smallworld/logging.py +90 -0
  107. smallworld/platforms.py +95 -0
  108. smallworld/py.typed +0 -0
  109. smallworld/state/__init__.py +6 -0
  110. smallworld/state/cpus/__init__.py +32 -0
  111. smallworld/state/cpus/aarch64.py +563 -0
  112. smallworld/state/cpus/amd64.py +676 -0
  113. smallworld/state/cpus/arm.py +630 -0
  114. smallworld/state/cpus/cpu.py +71 -0
  115. smallworld/state/cpus/i386.py +239 -0
  116. smallworld/state/cpus/mips.py +374 -0
  117. smallworld/state/cpus/mips64.py +372 -0
  118. smallworld/state/cpus/powerpc.py +229 -0
  119. smallworld/state/cpus/riscv.py +357 -0
  120. smallworld/state/cpus/xtensa.py +80 -0
  121. smallworld/state/memory/__init__.py +7 -0
  122. smallworld/state/memory/code.py +70 -0
  123. smallworld/state/memory/elf/__init__.py +3 -0
  124. smallworld/state/memory/elf/elf.py +564 -0
  125. smallworld/state/memory/elf/rela/__init__.py +32 -0
  126. smallworld/state/memory/elf/rela/aarch64.py +27 -0
  127. smallworld/state/memory/elf/rela/amd64.py +32 -0
  128. smallworld/state/memory/elf/rela/arm.py +51 -0
  129. smallworld/state/memory/elf/rela/i386.py +32 -0
  130. smallworld/state/memory/elf/rela/mips.py +45 -0
  131. smallworld/state/memory/elf/rela/ppc.py +45 -0
  132. smallworld/state/memory/elf/rela/rela.py +63 -0
  133. smallworld/state/memory/elf/rela/riscv64.py +27 -0
  134. smallworld/state/memory/elf/rela/xtensa.py +15 -0
  135. smallworld/state/memory/elf/structs.py +55 -0
  136. smallworld/state/memory/heap.py +85 -0
  137. smallworld/state/memory/memory.py +181 -0
  138. smallworld/state/memory/stack/__init__.py +31 -0
  139. smallworld/state/memory/stack/aarch64.py +22 -0
  140. smallworld/state/memory/stack/amd64.py +42 -0
  141. smallworld/state/memory/stack/arm.py +66 -0
  142. smallworld/state/memory/stack/i386.py +22 -0
  143. smallworld/state/memory/stack/mips.py +34 -0
  144. smallworld/state/memory/stack/mips64.py +34 -0
  145. smallworld/state/memory/stack/ppc.py +34 -0
  146. smallworld/state/memory/stack/riscv.py +22 -0
  147. smallworld/state/memory/stack/stack.py +127 -0
  148. smallworld/state/memory/stack/xtensa.py +34 -0
  149. smallworld/state/models/__init__.py +6 -0
  150. smallworld/state/models/mmio.py +186 -0
  151. smallworld/state/models/model.py +163 -0
  152. smallworld/state/models/posix.py +455 -0
  153. smallworld/state/models/x86/__init__.py +2 -0
  154. smallworld/state/models/x86/microsoftcdecl.py +35 -0
  155. smallworld/state/models/x86/systemv.py +240 -0
  156. smallworld/state/state.py +962 -0
  157. smallworld/state/unstable/__init__.py +0 -0
  158. smallworld/state/unstable/elf.py +393 -0
  159. smallworld/state/x86_registers.py +30 -0
  160. smallworld/utils.py +935 -0
  161. smallworld_re-1.0.0.dist-info/LICENSE.txt +21 -0
  162. smallworld_re-1.0.0.dist-info/METADATA +189 -0
  163. smallworld_re-1.0.0.dist-info/RECORD +166 -0
  164. smallworld_re-1.0.0.dist-info/WHEEL +5 -0
  165. smallworld_re-1.0.0.dist-info/entry_points.txt +2 -0
  166. smallworld_re-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,684 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import sys
5
+ import typing
6
+ from enum import Enum
7
+
8
+ import capstone
9
+ import claripy
10
+ import unicorn
11
+ import unicorn.ppc_const # Not properly exposed by the unicorn module
12
+
13
+ from ... import exceptions, instructions, platforms, utils
14
+ from .. import emulator, hookable
15
+ from .machdefs import UnicornMachineDef
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class UnicornEmulationError(exceptions.EmulationError):
21
+ def __init__(self, uc_err: unicorn.UcError, pc: int, msg: str, details: dict):
22
+ self.uc_err = uc_err
23
+ self.pc = pc
24
+ self.msg = msg
25
+ self.details = details
26
+
27
+ def __repr__(self) -> str:
28
+ return (
29
+ f"{self.__class__.__name__}({self.uc_err}, {hex(self.pc)}, {self.details})"
30
+ )
31
+
32
+
33
+ class UnicornEmulationMemoryReadError(UnicornEmulationError):
34
+ pass
35
+
36
+
37
+ class UnicornEmulationMemoryWriteError(UnicornEmulationError):
38
+ pass
39
+
40
+
41
+ class UnicornEmulationExecutionError(UnicornEmulationError):
42
+ pass
43
+
44
+
45
+ class EmulatorState(Enum):
46
+ START_BLOCK = 1
47
+ START_STEP = 2
48
+ STEP = 3
49
+ BLOCK = 4
50
+ RUN = 5
51
+ SETUP = 6
52
+
53
+
54
+ class UnicornEmulator(
55
+ emulator.Emulator,
56
+ hookable.QInstructionHookable,
57
+ hookable.QFunctionHookable,
58
+ hookable.QMemoryReadHookable,
59
+ hookable.QMemoryWriteHookable,
60
+ hookable.QInterruptHookable,
61
+ ):
62
+ """An emulator for the Unicorn emulation engine."""
63
+
64
+ description = "This is a smallworld class encapsulating the Unicorn emulator."
65
+ name = "smallworld's-unicorn"
66
+ version = "0.0"
67
+
68
+ PAGE_SIZE = 0x1000
69
+
70
+ def __init__(self, platform: platforms.Platform):
71
+ super().__init__(platform)
72
+ self.platform = platform
73
+ self.machdef = UnicornMachineDef.for_platform(self.platform)
74
+ self.engine = unicorn.Uc(self.machdef.uc_arch, self.machdef.uc_mode)
75
+ self.disassembler = capstone.Cs(self.machdef.cs_arch, self.machdef.cs_mode)
76
+ self.disassembler.detail = True
77
+
78
+ self.memory_map: utils.RangeCollection = utils.RangeCollection()
79
+ self.state: EmulatorState = EmulatorState.SETUP
80
+ # labels are per byte
81
+
82
+ # We'll have one entry in this dictionary per full-width base
83
+ # register (by name) and those themselves are a map from offset
84
+ # within the register to string label.
85
+ # In other words, for 64-bit x86, we'd have
86
+ # self.label["rax"][0] = "input", e.g., for the 0th byte in rax.
87
+ # But we will not have self.label["eax"] -- you have to look in "rax"
88
+ # Note that read_register_label will navigate that for you...
89
+ # For memory, we have one label per byte in memory with address
90
+ # translated via `hex(address)`.
91
+ # In other words, self.label["0xdeadbeef"] = "came_from_hades" is
92
+ # the label on that address in memory
93
+ self.label: typing.Dict[str, typing.Dict[int, str]] = {}
94
+
95
+ # this will run on *every instruction
96
+ def code_callback(uc, address, size, user_data):
97
+ # print(f"code callback addr={address:x}")
98
+ # We want to end on the instruction after
99
+ if self.state == EmulatorState.STEP:
100
+ self.engine.emu_stop()
101
+ if self.state == EmulatorState.START_STEP:
102
+ self.state = EmulatorState.STEP
103
+
104
+ if not self._bounds.is_empty() and not self._bounds.contains_value(address):
105
+ self.engine.emu_stop()
106
+ raise exceptions.EmulationBounds
107
+
108
+ # check for if we've hit an exit point
109
+ if address in self._exit_points:
110
+ logger.debug(f"stopping emulation at exit point {address:x}")
111
+ self.engine.emu_stop()
112
+ raise exceptions.EmulationExitpoint
113
+
114
+ # run instruciton hooks
115
+ if self.all_instructions_hook:
116
+ self.all_instructions_hook(self)
117
+
118
+ if cb := self.is_instruction_hooked(address):
119
+ logger.debug(f"hit hooking address for instruction at {address:x}")
120
+ cb(self)
121
+ # check function hooks *before* bounds since these might be out-of-bounds
122
+ if cb := self.is_function_hooked(address):
123
+ logger.debug(
124
+ f"hit hooking address for function at {address:x} -- {self.function_hooks[address]}"
125
+ )
126
+ # note that hooking a function means that we stop at function
127
+ # entry and, after running the hook, we do not let the function
128
+ # execute. Instead, we return from the function as if it ran.
129
+ # this permits modeling
130
+ # this is the model for the function
131
+ cb(self)
132
+ # self.engine.emu_stop()
133
+
134
+ # Mimic a platform-specific "return" instruction.
135
+ if self.platform.architecture == platforms.Architecture.X86_32:
136
+ # i386: pop a 4-byte value off the stack
137
+ sp = self.read_register("esp")
138
+ ret = int.from_bytes(
139
+ self.read_memory(sp, 4), self.platform.byteorder.value
140
+ )
141
+ self.write_register("esp", sp + 4)
142
+ elif self.platform.architecture == platforms.Architecture.X86_64:
143
+ # amd64: pop an 8-byte value off the stack
144
+ sp = self.read_register("rsp")
145
+ ret = int.from_bytes(
146
+ self.read_memory(sp, 8), self.platform.byteorder.value
147
+ )
148
+ self.write_register("rsp", sp + 8)
149
+ elif (
150
+ self.platform.architecture == platforms.Architecture.AARCH64
151
+ or self.platform.architecture == platforms.Architecture.ARM_V5T
152
+ or self.platform.architecture == platforms.Architecture.ARM_V6M
153
+ or self.platform.architecture
154
+ == platforms.Architecture.ARM_V6M_THUMB
155
+ or self.platform.architecture == platforms.Architecture.ARM_V7A
156
+ or self.platform.architecture == platforms.Architecture.ARM_V7M
157
+ or self.platform.architecture == platforms.Architecture.ARM_V7R
158
+ or self.platform.architecture == platforms.Architecture.POWERPC32
159
+ or self.platform.architecture == platforms.Architecture.POWERPC64
160
+ ):
161
+ # aarch64, arm32, powerpc and powerpc64: branch to register 'lr'
162
+ ret = self.read_register("lr")
163
+ elif (
164
+ self.platform.architecture == platforms.Architecture.MIPS32
165
+ or self.platform.architecture == platforms.Architecture.MIPS64
166
+ ):
167
+ # mips32 and mips64: branch to register 'ra'
168
+ ret = self.read_register("ra")
169
+ else:
170
+ raise exceptions.ConfigurationError(
171
+ "Don't know how to return for {self.platform.architecture}"
172
+ )
173
+
174
+ self.write_register("pc", ret)
175
+
176
+ self.engine.hook_add(unicorn.UC_HOOK_CODE, code_callback)
177
+
178
+ # functions to run before memory read and write for
179
+ # specific addresses
180
+
181
+ def mem_read_callback(uc, type, address, size, value, user_data):
182
+ assert type == unicorn.UC_MEM_READ
183
+ orig_data = (value.to_bytes(size, self.platform.byteorder.value),)
184
+ if self.all_reads_hook:
185
+ data = self.all_reads_hook(self, address, size, orig_data)
186
+ if data:
187
+ if len(data) != size:
188
+ raise exceptions.EmulationError(
189
+ f"Read hook at {hex(address)} returned {len(data)} bytes; need {size} bytes"
190
+ )
191
+ uc.mem_write(address, data)
192
+ orig_data = data
193
+
194
+ if cb := self.is_memory_read_hooked(address):
195
+ data = cb(self, address, size, orig_data)
196
+
197
+ # Execute registered callback
198
+ # data = cb(self, address, size)
199
+ # Overwrite memory being read.
200
+ # The instruction is emulated after this callback fires,
201
+ # so the new value will get used for computation.
202
+ if data:
203
+ if len(data) != size:
204
+ raise exceptions.EmulationError(
205
+ f"Read hook at {hex(address)} returned {len(data)} bytes; need {size} bytes"
206
+ )
207
+ uc.mem_write(address, data)
208
+
209
+ def mem_write_callback(uc, type, address, size, value, user_data):
210
+ assert type == unicorn.UC_MEM_WRITE
211
+ if self.all_writes_hook:
212
+ self.all_writes_hook(
213
+ self,
214
+ address,
215
+ size,
216
+ value.to_bytes(size, self.platform.byteorder.value),
217
+ )
218
+
219
+ if cb := self.is_memory_write_hooked(address):
220
+ cb(
221
+ self,
222
+ address,
223
+ size,
224
+ value.to_bytes(size, self.platform.byteorder.value),
225
+ )
226
+
227
+ self.engine.hook_add(unicorn.UC_HOOK_MEM_WRITE, mem_write_callback)
228
+ self.engine.hook_add(unicorn.UC_HOOK_MEM_READ, mem_read_callback)
229
+
230
+ # function to run on *every* interrupt
231
+ self.interrupts_hook: typing.Optional[
232
+ typing.Callable[[emulator.Emulator, int], None]
233
+ ] = None
234
+
235
+ # function to run on a specific interrupt number
236
+ self.interrupt_hook: typing.Dict[
237
+ int, typing.Callable[[emulator.Emulator], None]
238
+ ] = {}
239
+
240
+ def interrupt_callback(uc, index, user_data):
241
+ if self.interrupts_hook is not None:
242
+ self.interrupts_hook()
243
+ if index in self.interrupt_hook:
244
+ self.interrupt_hook[index]()
245
+
246
+ self.engine.hook_add(unicorn.UC_HOOK_INTR, interrupt_callback)
247
+
248
+ def block_callback(uc, address, block_size, user_data):
249
+ if self.state == EmulatorState.BLOCK:
250
+ self.engine.emu_stop()
251
+ if self.state == EmulatorState.START_BLOCK:
252
+ self.state = EmulatorState.BLOCK
253
+
254
+ self.engine.hook_add(unicorn.UC_HOOK_BLOCK, block_callback)
255
+
256
+ # keep track of which registers have been initialized
257
+ self.initialized_registers: typing.Dict[str, typing.Set[int]] = {}
258
+
259
+ def _check_pc_ok(self, pc):
260
+ """Check if this pc is ok to emulate, i.e. in bounds and not an exit
261
+ point."""
262
+
263
+ if not self._bounds.is_empty() and not self._bounds.contains_value(pc):
264
+ # There are bounds, and we are not in them
265
+ return False
266
+
267
+ # check for if we've hit an exit point
268
+ if pc in self._exit_points:
269
+ logger.debug(f"stopping emulation at exit point {pc:x}")
270
+ return False
271
+ return True
272
+
273
+ def _register(self, name: str) -> typing.Tuple[typing.Any, str, int, int]:
274
+ # Translate register name into the tuple
275
+ # (u, b, o, s)
276
+ # u is the unicorn reg number
277
+ # b is the name of full-width base register this is or is part of
278
+ # o is start offset within full-width base register
279
+ # s is size in bytes
280
+ name = name.lower()
281
+ # support some generic register references
282
+ if name == "pc":
283
+ name = self.machdef.pc_reg
284
+ return self.machdef.uc_reg(name)
285
+
286
+ def read_register_content(self, name: str) -> int:
287
+ (reg, _, _, _) = self._register(name)
288
+ if reg == 0:
289
+ return 0
290
+ # logger.warn(f"Unicorn doesn't support register {name} for {self.platform}")
291
+ try:
292
+ return self.engine.reg_read(reg)
293
+ except Exception as e:
294
+ raise exceptions.AnalysisError(f"Failed reading {name} (id: {reg})") from e
295
+
296
+ def read_register_label(self, name: str) -> typing.Optional[str]:
297
+ (_, base_reg, size, offset) = self._register(name)
298
+ if base_reg in self.label:
299
+ # we'll return a string repr of set of labels on all byte offsets
300
+ # for this register
301
+ labels = set([])
302
+ for i in range(offset, offset + size):
303
+ if i in self.label[base_reg]:
304
+ label = self.label[base_reg][i]
305
+ if label is not None:
306
+ labels.add(label)
307
+ return ":".join(list(labels))
308
+ return None
309
+
310
+ def read_register(self, name: str) -> int:
311
+ return self.read_register_content(name)
312
+
313
+ def write_register_content(
314
+ self, name: str, content: typing.Union[None, int, claripy.ast.bv.BV]
315
+ ) -> None:
316
+ if content is None:
317
+ logger.debug(f"ignoring register write to {name} - no value")
318
+ return
319
+
320
+ if isinstance(content, claripy.ast.bv.BV):
321
+ raise exceptions.SymbolicValueError(
322
+ "This emulator cannot handle bitvector expressions"
323
+ )
324
+
325
+ (reg, base_reg, size, start_offset) = self._register(name)
326
+ try:
327
+ self.engine.reg_write(reg, content)
328
+ except Exception as e:
329
+ raise exceptions.AnalysisError(f"Failed writing {name} (id: {reg})") from e
330
+ # keep track of which bytes in this register have been initialized
331
+ if base_reg not in self.initialized_registers:
332
+ self.initialized_registers[base_reg] = set([])
333
+ for o in range(start_offset, start_offset + size):
334
+ self.initialized_registers[base_reg].add(o)
335
+ logger.debug(f"set register {name}={content}")
336
+
337
+ def write_register_label(
338
+ self, name: str, label: typing.Optional[str] = None
339
+ ) -> None:
340
+ if label is None:
341
+ return
342
+ (_, base_reg, size, offset) = self._register(name)
343
+ if base_reg not in self.label:
344
+ self.label[base_reg] = {}
345
+ for i in range(offset, offset + size):
346
+ self.label[base_reg][i] = label
347
+
348
+ def write_register(
349
+ self, name: str, content: typing.Union[None, int, claripy.ast.bv.BV]
350
+ ) -> None:
351
+ self.write_register_content(name, content)
352
+
353
+ def read_memory_content(self, address: int, size: int) -> bytes:
354
+ if size > sys.maxsize:
355
+ raise ValueError(f"{size} is too large (max: {sys.maxsize})")
356
+ try:
357
+ return self.engine.mem_read(address, size)
358
+ except unicorn.UcError as e:
359
+ logger.warn(f"Unicorn raised an exception on memory read {e}")
360
+ self._error(e, "mem")
361
+ assert False # Line is unreachable
362
+
363
+ def read_memory_label(self, address: int, size: int) -> typing.Optional[str]:
364
+ labels = set()
365
+ if "mem" not in self.label:
366
+ return None
367
+ else:
368
+ for a in range(address, address + size):
369
+ if a in self.label["mem"]:
370
+ labels.add(self.label["mem"][a])
371
+ if len(labels) == 0:
372
+ return None
373
+ return ":".join(list(labels))
374
+
375
+ def read_memory(self, address: int, size: int) -> bytes:
376
+ return self.read_memory_content(address, size)
377
+
378
+ def map_memory(self, address: int, size: int) -> None:
379
+ # Round address down to a page boundary
380
+ page_address = (address // self.PAGE_SIZE) * self.PAGE_SIZE
381
+
382
+ # Expand the size to accound for moving address
383
+ page_size = size + address - page_address
384
+
385
+ # Round page_size up to the next page
386
+ page_size = (
387
+ (page_size + self.PAGE_SIZE - 1) // self.PAGE_SIZE
388
+ ) * self.PAGE_SIZE
389
+
390
+ # Fill in any gaps in the specified region
391
+ region = (page_address, page_address + page_size)
392
+ missing_ranges = self.memory_map.get_missing_ranges(region)
393
+
394
+ for start, end in missing_ranges:
395
+ self.memory_map.add_range((start, end))
396
+ self.engine.mem_map(start, end - start)
397
+
398
+ def get_memory_map(self) -> typing.List[typing.Tuple[int, int]]:
399
+ return list(self.memory_map.ranges)
400
+
401
+ def _is_address_mapped(self, address):
402
+ (ind, found) = self.memory_map.find_closest_range(address)
403
+ return found
404
+
405
+ def _is_address_range_mapped(self, address_range):
406
+ (a, b) = address_range
407
+ for address in range(a, b):
408
+ if self._is_address_mapped(address) is False:
409
+ return False
410
+ return True
411
+
412
+ def write_memory_content(
413
+ self, address: int, content: typing.Union[bytes, claripy.ast.bv.BV]
414
+ ) -> None:
415
+ if content is None:
416
+ raise ValueError(f"{self.__class__.__name__} requires concrete state")
417
+
418
+ if isinstance(content, claripy.ast.bv.BV):
419
+ raise exceptions.SymbolicValueError(
420
+ "This emulator cannot handle bitvector expressions"
421
+ )
422
+
423
+ if len(content) > sys.maxsize:
424
+ raise ValueError(f"{len(content)} is too large (max: {sys.maxsize})")
425
+
426
+ if not len(content):
427
+ raise ValueError("memory write cannot be empty")
428
+
429
+ try:
430
+ # print(f"write_memory: {content}")
431
+ self.engine.mem_write(address, content)
432
+ except unicorn.UcError as e:
433
+ logger.warn(f"Unicorn raised an exception on memory write {e}")
434
+ self._error(e, "mem")
435
+
436
+ logger.debug(f"wrote {len(content)} bytes to 0x{address:x}")
437
+
438
+ def write_memory_label(
439
+ self, address: int, size: int, label: typing.Optional[str] = None
440
+ ) -> None:
441
+ if label is None:
442
+ return
443
+ if "mem" not in self.label:
444
+ self.label["mem"] = dict()
445
+ for a in range(address, address + size):
446
+ self.label["mem"][a] = label
447
+
448
+ def write_memory(
449
+ self, address: int, content: typing.Union[bytes, claripy.ast.bv.BV]
450
+ ) -> None:
451
+ self.write_memory_content(address, content)
452
+
453
+ def hook_instruction(
454
+ self, address: int, function: typing.Callable[[emulator.Emulator], None]
455
+ ) -> None:
456
+ super(UnicornEmulator, self).hook_instruction(address, function)
457
+ self.map_memory(address, self.PAGE_SIZE)
458
+
459
+ def hook_function(
460
+ self, address: int, function: typing.Callable[[emulator.Emulator], None]
461
+ ) -> None:
462
+ super(UnicornEmulator, self).hook_function(address, function)
463
+ self.map_memory(address, self.PAGE_SIZE)
464
+
465
+ def _disassemble(
466
+ self, code: bytes, base: int, count: typing.Optional[int] = None
467
+ ) -> typing.Tuple[typing.List[capstone.CsInsn], str]:
468
+ instructions = self.disassembler.disasm(code, base)
469
+ disassembly = []
470
+ insns = []
471
+ for i, instruction in enumerate(instructions):
472
+ if count is not None and i >= count:
473
+ break
474
+ insns.append(instruction)
475
+ disassembly.append(f"{instruction.mnemonic} {instruction.op_str}")
476
+ return (insns, "\n".join(disassembly))
477
+
478
+ def current_instruction(self) -> capstone.CsInsn:
479
+ pc = self.read_register("pc")
480
+ code = self.read_memory(pc, 15)
481
+ if code is None:
482
+ raise AssertionError("invalid state")
483
+ for i in self.disassembler.disasm(code, pc):
484
+ return i
485
+
486
+ def _check(self) -> None:
487
+ # check if it's ok to begin emulating
488
+ # 1. pc must be set in order to emulate
489
+ (_, base_name, size, offset) = self._register("pc")
490
+ if (
491
+ base_name in self.initialized_registers
492
+ and len(self.initialized_registers[base_name]) == size
493
+ ):
494
+ # pc is fully initialized
495
+ pass
496
+ else:
497
+ raise exceptions.ConfigurationError(
498
+ "pc not initialized, emulation cannot start"
499
+ )
500
+ # 2. an exit point is also required
501
+ if len(self._exit_points) == 0:
502
+ raise exceptions.ConfigurationError(
503
+ "at least one exit point must be set, emulation cannot start"
504
+ )
505
+
506
+ def step_instruction(self) -> None:
507
+ self._check()
508
+ self.state = EmulatorState.START_STEP
509
+
510
+ pc = self.read_register("pc")
511
+ exit_point = list(self._exit_points)[0]
512
+ if pc == exit_point:
513
+ raise exceptions.EmulationBounds
514
+
515
+ if pc not in self.function_hooks:
516
+ disas = self.current_instruction()
517
+ logger.info(f"single step at 0x{pc:x}: {disas}")
518
+
519
+ try:
520
+ self.engine.emu_start(pc, exit_point)
521
+
522
+ except unicorn.UcError as e:
523
+ if (
524
+ e.errno == unicorn.UC_ERR_FETCH_UNMAPPED
525
+ and self.read_register("pc") in self.function_hooks
526
+ ):
527
+ # probably we tried to execute call to code that's not mapped?
528
+ pass
529
+ else:
530
+ logger.warn(f"emulation stopped - reason: {e}")
531
+ # translate this unicorn error into something richer
532
+ self._error(e, "exec")
533
+
534
+ def step_block(self) -> None:
535
+ self._check()
536
+ pc = self.read_register("pc")
537
+ exit_point = list(self._exit_points)[0]
538
+
539
+ disas = self.current_instruction()
540
+ logger.info(f"step block at 0x{pc:x}: {disas}")
541
+ try:
542
+ self.state = EmulatorState.START_BLOCK
543
+ self.engine.emu_start(pc, exit_point)
544
+ pc = self.read_register("pc")
545
+
546
+ self.state = EmulatorState.BLOCK
547
+ self.engine.emu_start(pc, exit_point)
548
+ except unicorn.UcError as e:
549
+ logger.warn(f"emulation stopped - reason: {e}")
550
+ logger.warn("for more details, run emulation in single step mode")
551
+
552
+ def run(self) -> None:
553
+ self._check()
554
+ self.state = EmulatorState.RUN
555
+
556
+ logger.info(
557
+ f"starting emulation at 0x{self.read_register('pc'):x}"
558
+ ) # until 0x{self._exit_point:x}")
559
+
560
+ try:
561
+ # unicorn requires one exit point so just use first
562
+ exit_point = list(self._exit_points)[0]
563
+ self.engine.emu_start(self.read_register("pc"), exit_point)
564
+ except exceptions.EmulationStop:
565
+ pass
566
+ except unicorn.UcError as e:
567
+ logger.warn(f"emulation stopped - reason: {e}")
568
+ logger.warn("for more details, run emulation in single step mode")
569
+ self._error(e, "exec")
570
+
571
+ logger.info("emulation complete")
572
+
573
+ def _error(
574
+ self, error: unicorn.UcError, typ: str
575
+ ) -> typing.Dict[typing.Union[str, int], typing.Union[str, int, bytes]]:
576
+ """Raises new exception from unicorn exception with extra details.
577
+
578
+ Should only be run while single stepping.
579
+
580
+ Arguments:
581
+ error: Unicorn exception.
582
+
583
+ Raises:
584
+ UnicornEmulationError with extra details about the error.
585
+ """
586
+
587
+ pc = self.read_register("pc")
588
+
589
+ try:
590
+ code = self.read_memory(pc, 16)
591
+ insns, _ = self._disassemble(code, pc, 1)
592
+ i = instructions.Instruction.from_capstone(insns[0])
593
+ except:
594
+ # looks like that code is not available
595
+ i = None
596
+
597
+ exc: typing.Type[exceptions.EmulationError] = exceptions.EmulationError
598
+
599
+ if typ == "mem":
600
+ prefix = "Failed memory access"
601
+ exc = UnicornEmulationMemoryReadError
602
+ if typ == "exec":
603
+ prefix = "Quit emulation"
604
+ exc = UnicornEmulationExecutionError
605
+ else:
606
+ prefix = "Unexpected Unicorn error"
607
+
608
+ # rws is list of either reads or writes. get list of these
609
+ # reads or writes that is not actually available, i.e. memory
610
+ # not mapped
611
+ def get_unavailable_rw(rws):
612
+ out = []
613
+ for rw in rws:
614
+ if type(rw) is instructions.BSIDMemoryReferenceOperand:
615
+ a = rw.address(self)
616
+ if not (self._is_address_mapped(a)):
617
+ out.append(rw)
618
+ return out
619
+
620
+ details: typing.Dict[typing.Union[str, int], typing.Union[str, int, bytes]] = {}
621
+
622
+ if error.errno == unicorn.UC_ERR_READ_UNMAPPED:
623
+ msg = f"{prefix} due to read of unmapped memory"
624
+ # actually this is a memory read error
625
+ exc = UnicornEmulationMemoryReadError
626
+ details["unmapped_reads"] = get_unavailable_rw(i.reads)
627
+ elif error.errno == unicorn.UC_ERR_READ_PROT:
628
+ msg = f"{prefix} due to read of mapped but protected memory"
629
+ # actually this is a memory read error
630
+ exc = UnicornEmulationMemoryReadError
631
+ details["protected_reads"] = get_unavailable_rw(i.reads)
632
+ elif error.errno == unicorn.UC_ERR_READ_UNALIGNED:
633
+ msg = f"{prefix} due to unaligned read"
634
+ # actually this is a memory read error
635
+ exc = UnicornEmulationMemoryReadError
636
+ details["unaligned_reads"] = get_unavailable_rw(i.reads)
637
+
638
+ elif error.errno == unicorn.UC_ERR_WRITE_UNMAPPED:
639
+ msg = f"{prefix} due to write to unmapped memory"
640
+ # actually this is a memory write error
641
+ exc = UnicornEmulationMemoryWriteError
642
+ details["unmapped_writes"] = get_unavailable_rw(i.writes)
643
+ elif error.errno == unicorn.UC_ERR_WRITE_PROT:
644
+ msg = f"{prefix} due to write to mapped but protected memory"
645
+ # actually this is a memory write error
646
+ exc = UnicornEmulationMemoryWriteError
647
+ details["protected_writes"] = get_unavailable_rw(i.writes)
648
+ elif error.errno == unicorn.UC_ERR_WRITE_UNALIGNED:
649
+ msg = f"{prefix} due to unaligned write"
650
+ # actually this is a memory write error
651
+ exc = UnicornEmulationMemoryWriteError
652
+ details["unaligned_writes"] = get_unavailable_rw(i.writes)
653
+
654
+ elif error.errno == unicorn.UC_ERR_FETCH_UNMAPPED:
655
+ msg = f"{prefix} due to fetch of unmapped memory"
656
+ elif error.errno == unicorn.UC_ERR_FETCH_PROT:
657
+ msg = f"{prefix} due to fetch of from mapped but protected memory"
658
+ elif error.errno == unicorn.UC_ERR_FETCH_UNALIGNED:
659
+ msg = f"{prefix} due to unaligned fetch"
660
+
661
+ elif error.errno == unicorn.UC_ERR_NOMEM:
662
+ msg = f"{prefix} due Out-Of-Memory"
663
+ elif error.errno == unicorn.UC_ERR_INSN_INVALID:
664
+ msg = f"{prefix} due invalid instruction"
665
+ details = {"pc": pc, "instr": str(i)}
666
+ elif error.errno == unicorn.UC_ERR_RESOURCE:
667
+ msg = f"{prefix} due insufficient resources"
668
+ elif error.errno == unicorn.UC_ERR_EXCEPTION:
669
+ msg = f"{prefix} due cpu exception"
670
+ else:
671
+ msg = f"{prefix} due to unknown Unicorn error {error.errno}"
672
+
673
+ raise exc(error, pc, msg, details)
674
+
675
+ def __repr__(self) -> str:
676
+ return f"UnicornEmulator(platform={self.platform})"
677
+
678
+
679
+ __all__ = [
680
+ "UnicornEmulator",
681
+ "UnicornEmulationMemoryReadError",
682
+ "UnicornEmulationMemoryWriteError",
683
+ "UnicornEmulationExecutionError",
684
+ ]
@@ -0,0 +1,5 @@
1
+ from .exceptions import * # noqa: F401, F403
2
+ from .exceptions import __all__ as __exceptions__
3
+ from .unstable import * # noqa: F401, F403
4
+
5
+ __all__ = __exceptions__