smallworld-re 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smallworld/__init__.py +35 -0
- smallworld/analyses/__init__.py +14 -0
- smallworld/analyses/analysis.py +88 -0
- smallworld/analyses/code_coverage.py +31 -0
- smallworld/analyses/colorizer.py +682 -0
- smallworld/analyses/colorizer_summary.py +100 -0
- smallworld/analyses/field_detection/__init__.py +14 -0
- smallworld/analyses/field_detection/field_analysis.py +536 -0
- smallworld/analyses/field_detection/guards.py +26 -0
- smallworld/analyses/field_detection/hints.py +133 -0
- smallworld/analyses/field_detection/malloc.py +211 -0
- smallworld/analyses/forced_exec/__init__.py +3 -0
- smallworld/analyses/forced_exec/forced_exec.py +87 -0
- smallworld/analyses/underlays/__init__.py +4 -0
- smallworld/analyses/underlays/basic.py +13 -0
- smallworld/analyses/underlays/underlay.py +31 -0
- smallworld/analyses/unstable/__init__.py +4 -0
- smallworld/analyses/unstable/angr/__init__.py +0 -0
- smallworld/analyses/unstable/angr/base.py +12 -0
- smallworld/analyses/unstable/angr/divergence.py +274 -0
- smallworld/analyses/unstable/angr/model.py +383 -0
- smallworld/analyses/unstable/angr/nwbt.py +63 -0
- smallworld/analyses/unstable/angr/typedefs.py +170 -0
- smallworld/analyses/unstable/angr/utils.py +25 -0
- smallworld/analyses/unstable/angr/visitor.py +315 -0
- smallworld/analyses/unstable/angr_nwbt.py +106 -0
- smallworld/analyses/unstable/code_coverage.py +54 -0
- smallworld/analyses/unstable/code_reachable.py +44 -0
- smallworld/analyses/unstable/control_flow_tracer.py +71 -0
- smallworld/analyses/unstable/pointer_finder.py +90 -0
- smallworld/arch/__init__.py +0 -0
- smallworld/arch/aarch64_arch.py +286 -0
- smallworld/arch/amd64_arch.py +86 -0
- smallworld/arch/i386_arch.py +44 -0
- smallworld/emulators/__init__.py +14 -0
- smallworld/emulators/angr/__init__.py +7 -0
- smallworld/emulators/angr/angr.py +1652 -0
- smallworld/emulators/angr/default.py +15 -0
- smallworld/emulators/angr/exceptions.py +7 -0
- smallworld/emulators/angr/exploration/__init__.py +9 -0
- smallworld/emulators/angr/exploration/bounds.py +27 -0
- smallworld/emulators/angr/exploration/default.py +17 -0
- smallworld/emulators/angr/exploration/terminate.py +22 -0
- smallworld/emulators/angr/factory.py +55 -0
- smallworld/emulators/angr/machdefs/__init__.py +35 -0
- smallworld/emulators/angr/machdefs/aarch64.py +292 -0
- smallworld/emulators/angr/machdefs/amd64.py +192 -0
- smallworld/emulators/angr/machdefs/arm.py +387 -0
- smallworld/emulators/angr/machdefs/i386.py +221 -0
- smallworld/emulators/angr/machdefs/machdef.py +138 -0
- smallworld/emulators/angr/machdefs/mips.py +184 -0
- smallworld/emulators/angr/machdefs/mips64.py +189 -0
- smallworld/emulators/angr/machdefs/ppc.py +101 -0
- smallworld/emulators/angr/machdefs/riscv.py +261 -0
- smallworld/emulators/angr/machdefs/xtensa.py +255 -0
- smallworld/emulators/angr/memory/__init__.py +7 -0
- smallworld/emulators/angr/memory/default.py +10 -0
- smallworld/emulators/angr/memory/fixups.py +43 -0
- smallworld/emulators/angr/memory/memtrack.py +105 -0
- smallworld/emulators/angr/scratch.py +43 -0
- smallworld/emulators/angr/simos.py +53 -0
- smallworld/emulators/angr/utils.py +70 -0
- smallworld/emulators/emulator.py +1013 -0
- smallworld/emulators/hookable.py +252 -0
- smallworld/emulators/panda/__init__.py +5 -0
- smallworld/emulators/panda/machdefs/__init__.py +28 -0
- smallworld/emulators/panda/machdefs/aarch64.py +93 -0
- smallworld/emulators/panda/machdefs/amd64.py +71 -0
- smallworld/emulators/panda/machdefs/arm.py +89 -0
- smallworld/emulators/panda/machdefs/i386.py +36 -0
- smallworld/emulators/panda/machdefs/machdef.py +86 -0
- smallworld/emulators/panda/machdefs/mips.py +94 -0
- smallworld/emulators/panda/machdefs/mips64.py +91 -0
- smallworld/emulators/panda/machdefs/ppc.py +79 -0
- smallworld/emulators/panda/panda.py +575 -0
- smallworld/emulators/unicorn/__init__.py +13 -0
- smallworld/emulators/unicorn/machdefs/__init__.py +28 -0
- smallworld/emulators/unicorn/machdefs/aarch64.py +310 -0
- smallworld/emulators/unicorn/machdefs/amd64.py +326 -0
- smallworld/emulators/unicorn/machdefs/arm.py +321 -0
- smallworld/emulators/unicorn/machdefs/i386.py +137 -0
- smallworld/emulators/unicorn/machdefs/machdef.py +117 -0
- smallworld/emulators/unicorn/machdefs/mips.py +202 -0
- smallworld/emulators/unicorn/unicorn.py +684 -0
- smallworld/exceptions/__init__.py +5 -0
- smallworld/exceptions/exceptions.py +85 -0
- smallworld/exceptions/unstable/__init__.py +1 -0
- smallworld/exceptions/unstable/exceptions.py +25 -0
- smallworld/extern/__init__.py +4 -0
- smallworld/extern/ctypes.py +94 -0
- smallworld/extern/unstable/__init__.py +1 -0
- smallworld/extern/unstable/ghidra.py +129 -0
- smallworld/helpers.py +107 -0
- smallworld/hinting/__init__.py +8 -0
- smallworld/hinting/hinting.py +214 -0
- smallworld/hinting/hints.py +427 -0
- smallworld/hinting/unstable/__init__.py +2 -0
- smallworld/hinting/utils.py +19 -0
- smallworld/instructions/__init__.py +18 -0
- smallworld/instructions/aarch64.py +20 -0
- smallworld/instructions/arm.py +18 -0
- smallworld/instructions/bsid.py +67 -0
- smallworld/instructions/instructions.py +258 -0
- smallworld/instructions/mips.py +21 -0
- smallworld/instructions/x86.py +100 -0
- smallworld/logging.py +90 -0
- smallworld/platforms.py +95 -0
- smallworld/py.typed +0 -0
- smallworld/state/__init__.py +6 -0
- smallworld/state/cpus/__init__.py +32 -0
- smallworld/state/cpus/aarch64.py +563 -0
- smallworld/state/cpus/amd64.py +676 -0
- smallworld/state/cpus/arm.py +630 -0
- smallworld/state/cpus/cpu.py +71 -0
- smallworld/state/cpus/i386.py +239 -0
- smallworld/state/cpus/mips.py +374 -0
- smallworld/state/cpus/mips64.py +372 -0
- smallworld/state/cpus/powerpc.py +229 -0
- smallworld/state/cpus/riscv.py +357 -0
- smallworld/state/cpus/xtensa.py +80 -0
- smallworld/state/memory/__init__.py +7 -0
- smallworld/state/memory/code.py +70 -0
- smallworld/state/memory/elf/__init__.py +3 -0
- smallworld/state/memory/elf/elf.py +564 -0
- smallworld/state/memory/elf/rela/__init__.py +32 -0
- smallworld/state/memory/elf/rela/aarch64.py +27 -0
- smallworld/state/memory/elf/rela/amd64.py +32 -0
- smallworld/state/memory/elf/rela/arm.py +51 -0
- smallworld/state/memory/elf/rela/i386.py +32 -0
- smallworld/state/memory/elf/rela/mips.py +45 -0
- smallworld/state/memory/elf/rela/ppc.py +45 -0
- smallworld/state/memory/elf/rela/rela.py +63 -0
- smallworld/state/memory/elf/rela/riscv64.py +27 -0
- smallworld/state/memory/elf/rela/xtensa.py +15 -0
- smallworld/state/memory/elf/structs.py +55 -0
- smallworld/state/memory/heap.py +85 -0
- smallworld/state/memory/memory.py +181 -0
- smallworld/state/memory/stack/__init__.py +31 -0
- smallworld/state/memory/stack/aarch64.py +22 -0
- smallworld/state/memory/stack/amd64.py +42 -0
- smallworld/state/memory/stack/arm.py +66 -0
- smallworld/state/memory/stack/i386.py +22 -0
- smallworld/state/memory/stack/mips.py +34 -0
- smallworld/state/memory/stack/mips64.py +34 -0
- smallworld/state/memory/stack/ppc.py +34 -0
- smallworld/state/memory/stack/riscv.py +22 -0
- smallworld/state/memory/stack/stack.py +127 -0
- smallworld/state/memory/stack/xtensa.py +34 -0
- smallworld/state/models/__init__.py +6 -0
- smallworld/state/models/mmio.py +186 -0
- smallworld/state/models/model.py +163 -0
- smallworld/state/models/posix.py +455 -0
- smallworld/state/models/x86/__init__.py +2 -0
- smallworld/state/models/x86/microsoftcdecl.py +35 -0
- smallworld/state/models/x86/systemv.py +240 -0
- smallworld/state/state.py +962 -0
- smallworld/state/unstable/__init__.py +0 -0
- smallworld/state/unstable/elf.py +393 -0
- smallworld/state/x86_registers.py +30 -0
- smallworld/utils.py +935 -0
- smallworld_re-1.0.0.dist-info/LICENSE.txt +21 -0
- smallworld_re-1.0.0.dist-info/METADATA +189 -0
- smallworld_re-1.0.0.dist-info/RECORD +166 -0
- smallworld_re-1.0.0.dist-info/WHEEL +5 -0
- smallworld_re-1.0.0.dist-info/entry_points.txt +2 -0
- smallworld_re-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,564 @@
|
|
1
|
+
import logging
|
2
|
+
import typing
|
3
|
+
|
4
|
+
import lief
|
5
|
+
|
6
|
+
from ....exceptions import ConfigurationError
|
7
|
+
from ....hinting import Hint, get_hinter
|
8
|
+
from ....platforms import Architecture, Byteorder, Platform
|
9
|
+
from ....utils import RangeCollection
|
10
|
+
from ...state import BytesValue
|
11
|
+
from ..code import Executable
|
12
|
+
from .rela import ElfRelocator
|
13
|
+
from .structs import ElfRela, ElfSymbol
|
14
|
+
|
15
|
+
log = logging.getLogger(__name__)
|
16
|
+
hinter = get_hinter(__name__)
|
17
|
+
|
18
|
+
# ELF machine values
|
19
|
+
# See /usr/include/elf.h for the complete list
|
20
|
+
EM_386 = 3 # Intel 80386
|
21
|
+
EM_MIPS = 8 # MIPS; all kinds
|
22
|
+
EM_PPC = 20 # PowerPC 32-bit
|
23
|
+
EM_PPC64 = 21 # PowerPC 64-bit
|
24
|
+
EM_ARM = 40 # ARM 32-bit
|
25
|
+
EM_X86_64 = 62 # AMD/Intel x86-64
|
26
|
+
EM_XTENSA = 94 # Xtensa
|
27
|
+
EM_AARCH64 = 183 # ARM v9, or AARCH64
|
28
|
+
EM_RISCV = 243 # RISC-V
|
29
|
+
|
30
|
+
# ARM-specific flag values
|
31
|
+
EF_ARM_VFP_FLOAT = 0x400
|
32
|
+
EF_ARM_SOFT_FLOAT = 0x200
|
33
|
+
EF_ARM_EABI_VER5 = 0x05000000
|
34
|
+
|
35
|
+
# Program header types
|
36
|
+
PT_NULL = 0 # Empty/unused program header
|
37
|
+
PT_LOAD = 1 # Describes loadable program segment
|
38
|
+
PT_DYNAMIC = 2 # Points to dynamic linking metadata
|
39
|
+
PT_INTERP = 3 # Points to program interpreter
|
40
|
+
PT_NOTE = 4 # Points to auxiliary information
|
41
|
+
PT_SHLIB = 5 # Reserved value; I think it's unused
|
42
|
+
PT_PHDR = 6 # Points to program header table
|
43
|
+
PT_TLS = 7 # Indicates need for thread-local storage
|
44
|
+
PT_LOOS = 0x60000000 # Start of OS-specific types
|
45
|
+
PT_GNU_EH_FRAME = 0x6474E550 # GNU-specific: Points to exception handler segment
|
46
|
+
PT_GNU_STACK = 0x6474E551 # GNU-specific: Describes stack permissions
|
47
|
+
PT_GNU_RELRO = 0x6474E552 # GNU-specific: Describes read-only after relocation segment
|
48
|
+
PT_GNU_PROPERTY = 0x6474E553 # GNU-specific: Points to GNU property
|
49
|
+
PT_HIOS = 0x6FFFFFFF # End of OS-specific types
|
50
|
+
PT_LOPROC = 0x70000000 # Start of processor-specific types
|
51
|
+
PT_HIPROC = 0x7FFFFFFF # End of processor-specific types
|
52
|
+
|
53
|
+
# Program header flags
|
54
|
+
PF_X = 0x1 # Segment is executable
|
55
|
+
PF_W = 0x2 # Segment is writable
|
56
|
+
PF_R = 0x4 # Segment is readable
|
57
|
+
|
58
|
+
# Universal dynamic tag values
|
59
|
+
DT_PLTGOT = 0x3
|
60
|
+
|
61
|
+
# MIPS-specific dynamic tag values
|
62
|
+
DT_MIPS_LOCAL_GOTNO = 0x7000000A
|
63
|
+
DT_MIPS_GOTSYM = 0x70000013
|
64
|
+
|
65
|
+
# MIPS-specific relocation type
|
66
|
+
R_MIPS_32 = 2
|
67
|
+
R_MIPS_64 = 18
|
68
|
+
|
69
|
+
|
70
|
+
class ElfExecutable(Executable):
|
71
|
+
"""Executable loaded from an ELF
|
72
|
+
|
73
|
+
This loads a single ELF file into a SmallWorld memory object.
|
74
|
+
It performs no relocation or any other initialization,
|
75
|
+
just maps the file into memory as the kernel intended.
|
76
|
+
|
77
|
+
Arguments:
|
78
|
+
file: File-like object containing the image
|
79
|
+
platform: Optional platform; used for header verification
|
80
|
+
ignore_platform: Do not try to ID or verify platform from headers
|
81
|
+
user_base: Optional user-specified base address
|
82
|
+
page_size: System page size
|
83
|
+
"""
|
84
|
+
|
85
|
+
def __init__(
|
86
|
+
self,
|
87
|
+
file: typing.BinaryIO,
|
88
|
+
platform: typing.Optional[Platform] = None,
|
89
|
+
ignore_platform: bool = False,
|
90
|
+
user_base: typing.Optional[int] = None,
|
91
|
+
page_size: int = 0x1000,
|
92
|
+
):
|
93
|
+
# Initialize with null address and size;
|
94
|
+
# we will update these later.
|
95
|
+
super().__init__(0, 0)
|
96
|
+
self.platform = platform
|
97
|
+
self.bounds: RangeCollection = RangeCollection()
|
98
|
+
self._page_size = page_size
|
99
|
+
self._user_base = user_base
|
100
|
+
self._file_base = 0
|
101
|
+
|
102
|
+
# Initialize symbol info
|
103
|
+
self._symbols: typing.List[ElfSymbol] = list()
|
104
|
+
self._syms_by_name: typing.Dict[str, typing.List[ElfSymbol]] = dict()
|
105
|
+
self._relas: typing.List[ElfRela] = list()
|
106
|
+
self._relocator: typing.Optional[ElfRelocator] = None
|
107
|
+
|
108
|
+
# Read the entire image out of the file.
|
109
|
+
image = file.read()
|
110
|
+
|
111
|
+
# Use lief to check if this is an ELF.
|
112
|
+
# NOTE: For some reason, this takes list(int), not bytes
|
113
|
+
if not lief.is_elf(list(image)):
|
114
|
+
raise ConfigurationError("Image is not an ELF")
|
115
|
+
|
116
|
+
# Use lief to parse the ELF.
|
117
|
+
# NOTE: For some reason, this takes list(int), not bytes
|
118
|
+
# NOTE: lief objects aren't deep-copyable.
|
119
|
+
# I'd love to keep `elf` around for later use, but I can't.
|
120
|
+
elf = lief.ELF.parse(list(image))
|
121
|
+
if elf is None:
|
122
|
+
raise ConfigurationError("Failed parsing ELF")
|
123
|
+
|
124
|
+
# Extract the file header
|
125
|
+
ehdr = elf.header
|
126
|
+
if ehdr is None:
|
127
|
+
raise ConfigurationError("Failed extracting ELF header")
|
128
|
+
|
129
|
+
# Check machine compatibility
|
130
|
+
if not ignore_platform:
|
131
|
+
hdr_platform = self._platform_for_ehdr(elf)
|
132
|
+
if self.platform is not None:
|
133
|
+
if self.platform != hdr_platform:
|
134
|
+
raise ConfigurationError(
|
135
|
+
"Platform mismatch: "
|
136
|
+
f"specified {self.platform}, but got {hdr_platform} from header"
|
137
|
+
)
|
138
|
+
else:
|
139
|
+
self.platform = hdr_platform
|
140
|
+
|
141
|
+
if self.platform is not None:
|
142
|
+
# If we have a platform, we can relocate
|
143
|
+
self._relocator = ElfRelocator.for_platform(self.platform)
|
144
|
+
|
145
|
+
# Figure out if this file is loadable.
|
146
|
+
# If there are program headers, it's loadable.
|
147
|
+
# This doesn't support .ko files, are relocatable objects,
|
148
|
+
# and have no program headers.
|
149
|
+
if ehdr.program_header_offset == 0:
|
150
|
+
# No program headers; not a loadable ELF.
|
151
|
+
raise ConfigurationError("ELF is not loadable")
|
152
|
+
|
153
|
+
if ehdr.program_header_offset >= len(image):
|
154
|
+
# Obviously-invalid program headers
|
155
|
+
raise ConfigurationError(
|
156
|
+
f"Invalid program header offset {hex(ehdr.program_header_offset)}"
|
157
|
+
)
|
158
|
+
|
159
|
+
# Determine the file base address.
|
160
|
+
self._file_base = elf.imagebase
|
161
|
+
self._determine_base()
|
162
|
+
|
163
|
+
for phdr in elf.segments:
|
164
|
+
log.debug(f"{phdr}")
|
165
|
+
if phdr.type == PT_LOAD:
|
166
|
+
# Loadable segment
|
167
|
+
# Map its data into memory
|
168
|
+
self._map_segment(phdr, image)
|
169
|
+
elif phdr.type == PT_DYNAMIC:
|
170
|
+
# Dynamic linking metadata.
|
171
|
+
# This ELF needs dynamic linking
|
172
|
+
hint = Hint(message="Program includes dynamic linking metadata")
|
173
|
+
hinter.info(hint)
|
174
|
+
elif phdr.type == PT_INTERP:
|
175
|
+
# Program interpreter
|
176
|
+
# This completely changes how program loading works.
|
177
|
+
# Whether you care is a different matter.
|
178
|
+
interp = image[phdr.file_offset : phdr.file_offset + phdr.physical_size]
|
179
|
+
hint = Hint(message=f"Program specifies interpreter {interp!r}")
|
180
|
+
hinter.info(hint)
|
181
|
+
elif phdr.type == PT_NOTE:
|
182
|
+
# Auxiliary information
|
183
|
+
# Possibly useful for comparing machine/OS type.
|
184
|
+
pass
|
185
|
+
elif phdr.type == PT_PHDR:
|
186
|
+
# Program header self-reference
|
187
|
+
# Useful for the dynamic linker, but not for us
|
188
|
+
pass
|
189
|
+
elif phdr.type == PT_TLS:
|
190
|
+
# TLS Segment
|
191
|
+
# Your analysis is about to get nasty :(
|
192
|
+
hint = Hint(message="Program includes thread-local storage")
|
193
|
+
hinter.info(hint)
|
194
|
+
elif phdr.type == PT_GNU_EH_FRAME:
|
195
|
+
# Exception handler frame.
|
196
|
+
# GCC puts one of these in everything. Do we care?
|
197
|
+
pass
|
198
|
+
elif phdr.type == PT_GNU_STACK:
|
199
|
+
# Stack executability
|
200
|
+
# If this is missing, assume executable stack
|
201
|
+
hint = Hint(message="Program specifies stack permissions")
|
202
|
+
hinter.info(hint)
|
203
|
+
elif phdr.type == PT_GNU_RELRO:
|
204
|
+
# Read-only after relocation
|
205
|
+
# Only the dynamic linker should write this data.
|
206
|
+
hint = Hint(message="Program specifies RELRO data")
|
207
|
+
hinter.info(hint)
|
208
|
+
elif phdr.type == PT_GNU_PROPERTY:
|
209
|
+
# GNU property segment
|
210
|
+
# Contains extra metadata which I'm not sure anything uses
|
211
|
+
pass
|
212
|
+
elif phdr.type >= PT_LOOS and phdr.type <= PT_HIOS:
|
213
|
+
# Unknown OS-specific program header
|
214
|
+
# Either this is a weird ISA that extends the generic GNU ABI,
|
215
|
+
# or this isn't a Linux ELF.
|
216
|
+
hint = Hint(f"Unknown OS-specific program header: {phdr.type:08x}")
|
217
|
+
hinter.warn(hint)
|
218
|
+
elif phdr.type >= PT_LOPROC and phdr.type <= PT_HIPROC:
|
219
|
+
# Unknown machine-specific program header
|
220
|
+
# This is probably a non-Intel ISA.
|
221
|
+
# Most of these are harmless, serving to tell the RTLD
|
222
|
+
# where to find machine-specific metadata
|
223
|
+
hint = Hint(
|
224
|
+
f"Unknown machine-specific program header: {phdr.type.value:08x}"
|
225
|
+
)
|
226
|
+
hinter.warn(hint)
|
227
|
+
else:
|
228
|
+
# Unknown program header outside the allowed custom ranges
|
229
|
+
hint = Hint(f"Invalid program header: {phdr.type.value:08x}")
|
230
|
+
hinter.warn(hint)
|
231
|
+
|
232
|
+
# Compute the final total capacity
|
233
|
+
for offset, value in self.items():
|
234
|
+
self.size = max(self.size, offset + value.get_size())
|
235
|
+
|
236
|
+
# Determine if the file specifies an entrypoint
|
237
|
+
if elf.entrypoint is not None and elf.entrypoint != 0:
|
238
|
+
# Check if the entrypoint is valid (falls within the image)
|
239
|
+
entrypoint = self._rebase_file(elf.entrypoint)
|
240
|
+
if not self.bounds.contains_value(entrypoint):
|
241
|
+
if (
|
242
|
+
self.platform is not None
|
243
|
+
and self.platform.architecture == Architecture.POWERPC64
|
244
|
+
):
|
245
|
+
# NOTE: PowerPC64's ABI is trippy.
|
246
|
+
# It uses "function descriptor" structs instead of
|
247
|
+
# simple function pointers.
|
248
|
+
# Thus, the entrypoint points to something in the data section.
|
249
|
+
log.warn("Entrypoint for PowerPC64 file is not a code address")
|
250
|
+
else:
|
251
|
+
raise ConfigurationError(
|
252
|
+
f"Invalid entrypoint address {hex(entrypoint)}"
|
253
|
+
)
|
254
|
+
else:
|
255
|
+
self.entrypoint = entrypoint
|
256
|
+
else:
|
257
|
+
# No entrypoint specified,
|
258
|
+
# Or this is PowerPC64 and the entrypoint is gibberish
|
259
|
+
self.entrypoint = None
|
260
|
+
|
261
|
+
# Organize symbols for later relocation
|
262
|
+
self._extract_symbols(elf)
|
263
|
+
|
264
|
+
def _platform_for_ehdr(self, elf):
|
265
|
+
# Determine byteorder. This bit's easy
|
266
|
+
if elf.header.identity_data.value == 1:
|
267
|
+
# LSB byteorder
|
268
|
+
byteorder = Byteorder.LITTLE
|
269
|
+
elif elf.header.identity_data.value == 2:
|
270
|
+
# MSB byteorder
|
271
|
+
byteorder = Byteorder.BIG
|
272
|
+
else:
|
273
|
+
raise ConfigurationError(
|
274
|
+
f"Unknown value of ei_data: {hex(elf.header.identity_data.value)}"
|
275
|
+
)
|
276
|
+
|
277
|
+
# Determine arch/mode. This bit's harder.
|
278
|
+
if elf.header.machine_type.value == EM_X86_64:
|
279
|
+
# amd64
|
280
|
+
architecture = Architecture.X86_64
|
281
|
+
elif elf.header.machine_type.value == EM_AARCH64:
|
282
|
+
# aarch64
|
283
|
+
architecture = Architecture.AARCH64
|
284
|
+
elif elf.header.machine_type.value == EM_386:
|
285
|
+
# i386
|
286
|
+
architecture = Architecture.X86_32
|
287
|
+
elif elf.header.machine_type.value == EM_ARM:
|
288
|
+
# Some kind of arm32
|
289
|
+
flags = set(map(lambda x: x.value, elf.header.arm_flags_list))
|
290
|
+
|
291
|
+
if EF_ARM_EABI_VER5 in flags and EF_ARM_SOFT_FLOAT in flags:
|
292
|
+
# This is either ARMv5T or some kind of ARMv6.
|
293
|
+
# We're currently assuming v5T, but this isn't always correct.
|
294
|
+
architecture = Architecture.ARM_V5T
|
295
|
+
elif EF_ARM_EABI_VER5 in flags and EF_ARM_VFP_FLOAT in flags:
|
296
|
+
# This is ARMv7a, as built by gcc.
|
297
|
+
architecture = Architecture.ARM_V7A
|
298
|
+
else:
|
299
|
+
raise ConfigurationError(f"Unknown ARM flags: {list(map(hex, flags))}")
|
300
|
+
elif elf.header.machine_type.value == EM_MIPS:
|
301
|
+
# Some kind of mips.
|
302
|
+
# TODO: There are more parameters than just word size
|
303
|
+
if elf.header.identity_class.value == 1:
|
304
|
+
# 32-bit ELF
|
305
|
+
architecture = Architecture.MIPS32
|
306
|
+
elif elf.header.identity_class.value == 2:
|
307
|
+
architecture = Architecture.MIPS64
|
308
|
+
else:
|
309
|
+
raise ConfigurationError(
|
310
|
+
f"Unknown value of ei_class: {hex(elf.header.identity_class.value)}"
|
311
|
+
)
|
312
|
+
elif elf.header.machine_type.value == EM_PPC:
|
313
|
+
# PowerPC 32-bit
|
314
|
+
architecture = Architecture.POWERPC32
|
315
|
+
elif elf.header.machine_type.value == EM_PPC64:
|
316
|
+
# PowerPC 64-bit
|
317
|
+
architecture = Architecture.POWERPC64
|
318
|
+
elif elf.header.machine_type.value == EM_RISCV:
|
319
|
+
# RISC-V
|
320
|
+
if elf.header.identity_class.value == 1:
|
321
|
+
raise ConfigurationError("RISC-V 32-bit isn't supported")
|
322
|
+
elif elf.header.identity_class.value == 2:
|
323
|
+
architecture = Architecture.RISCV64
|
324
|
+
else:
|
325
|
+
raise ConfigurationError(
|
326
|
+
f"Unknown value of ei_class: {hex(elf.header.identity_class.value)}"
|
327
|
+
)
|
328
|
+
elif elf.header.machine_type.value == EM_XTENSA:
|
329
|
+
architecture = Architecture.XTENSA
|
330
|
+
else:
|
331
|
+
raise ConfigurationError(
|
332
|
+
f"Unknown value of e_machine: {hex(elf.header.machine_type.value)}"
|
333
|
+
)
|
334
|
+
return Platform(architecture, byteorder)
|
335
|
+
|
336
|
+
def _determine_base(self):
|
337
|
+
# Determine the base address of this image
|
338
|
+
#
|
339
|
+
# Normally, the loader respects the wishes of the file;
|
340
|
+
# an external base address is only used if the image doesn't define one.
|
341
|
+
#
|
342
|
+
# Here, the user doesn't know the image layout ahead of time,
|
343
|
+
# and may need to adjust their environment to fit,
|
344
|
+
# so a user-provided base is given equal weight.
|
345
|
+
#
|
346
|
+
# If both base addresses are specified,
|
347
|
+
# or neither is specified, it's a configuratione rror.
|
348
|
+
|
349
|
+
if self._user_base is None:
|
350
|
+
# No user base requested
|
351
|
+
if self._file_base == 0:
|
352
|
+
# No file base defined.
|
353
|
+
# Need the user to provide one
|
354
|
+
raise ConfigurationError(
|
355
|
+
"No base address provided for position-independent ELF image"
|
356
|
+
)
|
357
|
+
else:
|
358
|
+
self.address = self._file_base
|
359
|
+
else:
|
360
|
+
# User base requested
|
361
|
+
if self._file_base == 0:
|
362
|
+
# No file base requested; we are okay with this.
|
363
|
+
self.address = self._user_base
|
364
|
+
elif self._user_base == self._file_base:
|
365
|
+
# Everyone requested the same base address, so we're okay.
|
366
|
+
self.address = self._user_base
|
367
|
+
else:
|
368
|
+
# File base is defined.
|
369
|
+
# We (probably) cannot move the image without problems.
|
370
|
+
raise ConfigurationError("Base address defined for fixed-position ELF")
|
371
|
+
|
372
|
+
def _rebase_file(self, val: int):
|
373
|
+
# Rebase an offset from file-relative to image-relative
|
374
|
+
return val - self._file_base + self.address
|
375
|
+
|
376
|
+
def _page_align(self, val: int, up: bool = True):
|
377
|
+
# Align an address to a page boundary
|
378
|
+
# There are a number of cases where ELF files are imprecise;
|
379
|
+
# they rely on the kernel/libc to map things at page-aligned addresses.
|
380
|
+
if up:
|
381
|
+
val += self._page_size - 1
|
382
|
+
return (val // self._page_size) * self._page_size
|
383
|
+
|
384
|
+
def _map_segment(self, phdr, image):
|
385
|
+
# Compute segment boundaries
|
386
|
+
seg_start = self._page_align(phdr.file_offset, up=False)
|
387
|
+
seg_end = self._page_align(phdr.file_offset + phdr.physical_size)
|
388
|
+
seg_addr = self._page_align(self._rebase_file(phdr.virtual_address), up=False)
|
389
|
+
seg_size = self._page_align(phdr.virtual_size + (phdr.file_offset - seg_start))
|
390
|
+
|
391
|
+
log.debug("Mapping: ")
|
392
|
+
log.debug(f" f: [ {seg_start:012x} -> {seg_end:012x} ]")
|
393
|
+
log.debug(f" m: [ {seg_addr:012x} -> {seg_addr + seg_size:012x} ]")
|
394
|
+
|
395
|
+
# Extract segment data
|
396
|
+
seg_data = image[seg_start:seg_end]
|
397
|
+
if len(seg_data) < seg_size:
|
398
|
+
# Segment is shorter than is available from the file;
|
399
|
+
# this will get zero-padded.
|
400
|
+
seg_data += b"\0" * (seg_size - (seg_start - seg_end))
|
401
|
+
elif len(seg_data) != seg_size:
|
402
|
+
raise ConfigurationError(
|
403
|
+
f"Expected segment of size {seg_size}, but got {len(seg_data)}"
|
404
|
+
)
|
405
|
+
if (phdr.flags & PF_X) != 0:
|
406
|
+
# This is a code segment; add it to program bounds
|
407
|
+
self.bounds.add_range((seg_addr, seg_addr + seg_size))
|
408
|
+
|
409
|
+
# Add the segment to the memory map
|
410
|
+
seg_value = BytesValue(seg_data, None)
|
411
|
+
self[seg_addr - self.address] = seg_value
|
412
|
+
|
413
|
+
def _extract_symbols(self, elf):
|
414
|
+
lief_to_elf = dict()
|
415
|
+
|
416
|
+
# Figure out the base address
|
417
|
+
# TODO: Currently, this only handles PIC or Non-PIC
|
418
|
+
# It will be wrong for .o and .ko
|
419
|
+
if self._file_base is not None and self._file_base != 0:
|
420
|
+
# This is a non-PIC binary.
|
421
|
+
# Dollars to ducats the symbol is absolute.
|
422
|
+
baseaddr = 0
|
423
|
+
else:
|
424
|
+
# This is a PIC binary
|
425
|
+
# Relative symbols will be relative to the load address
|
426
|
+
baseaddr = self.address
|
427
|
+
|
428
|
+
for s in elf.symbols:
|
429
|
+
# Build a symbol
|
430
|
+
sym = ElfSymbol(
|
431
|
+
name=s.name,
|
432
|
+
type=s.type.value,
|
433
|
+
bind=s.binding.value,
|
434
|
+
visibility=s.visibility.value,
|
435
|
+
shndx=s.shndx,
|
436
|
+
value=s.value,
|
437
|
+
size=s.size,
|
438
|
+
baseaddr=baseaddr,
|
439
|
+
)
|
440
|
+
# Save the sym, and temporarily tie it to its lief partner
|
441
|
+
self._symbols.append(sym)
|
442
|
+
self._syms_by_name.setdefault(sym.name, list()).append(sym)
|
443
|
+
lief_to_elf[s] = sym
|
444
|
+
|
445
|
+
if (
|
446
|
+
self.platform.architecture == Architecture.MIPS32
|
447
|
+
or self.platform.architecture == Architecture.MIPS64
|
448
|
+
):
|
449
|
+
# All MIPS dynamic symbols have an implicit rela.
|
450
|
+
# MIPS dynamic symbols always have a GOT entry;
|
451
|
+
# to save space, the ABI just assumes that the rela exists
|
452
|
+
|
453
|
+
# Find the GOT and the number of local entries
|
454
|
+
gotoff = None
|
455
|
+
gotsym = None
|
456
|
+
local_gotno = None
|
457
|
+
for dt in elf.dynamic_entries:
|
458
|
+
if dt.tag.value == DT_MIPS_GOTSYM:
|
459
|
+
gotsym = dt.value
|
460
|
+
if dt.tag.value == DT_MIPS_LOCAL_GOTNO:
|
461
|
+
local_gotno = dt.value
|
462
|
+
if dt.tag.value == DT_PLTGOT:
|
463
|
+
gotoff = dt.value
|
464
|
+
if (
|
465
|
+
local_gotno is not None
|
466
|
+
and gotsym is not None
|
467
|
+
and gotoff is not None
|
468
|
+
):
|
469
|
+
break
|
470
|
+
|
471
|
+
if local_gotno is None or gotoff is None or gotsym is None:
|
472
|
+
log.error("MIPS binary missing got information")
|
473
|
+
else:
|
474
|
+
# We found the GOT info; we're actually a dynamic binary
|
475
|
+
# Figure out the GOT entry size based on arch
|
476
|
+
if self.platform.architecture == Architecture.MIPS32:
|
477
|
+
gotent = 4
|
478
|
+
rela_type = R_MIPS_32
|
479
|
+
else:
|
480
|
+
gotent = 8
|
481
|
+
rela_type = R_MIPS_64
|
482
|
+
|
483
|
+
# Rebase the GOT offset relative to the image
|
484
|
+
gotoff = self._rebase_file(gotoff)
|
485
|
+
|
486
|
+
# Skip the first local_gotno entries
|
487
|
+
gotoff += gotent * local_gotno
|
488
|
+
|
489
|
+
for s in list(elf.dynamic_symbols)[gotsym:]:
|
490
|
+
sym = lief_to_elf[s]
|
491
|
+
rela = ElfRela(offset=gotoff, type=rela_type, symbol=sym, addend=0)
|
492
|
+
sym.relas.append(rela)
|
493
|
+
self._relas.append(rela)
|
494
|
+
|
495
|
+
gotoff += gotent
|
496
|
+
|
497
|
+
for r in elf.relocations:
|
498
|
+
# Build a rela, and tie it to its symbol
|
499
|
+
sym = lief_to_elf[r.symbol]
|
500
|
+
rela = ElfRela(
|
501
|
+
offset=r.address + baseaddr, type=r.type, symbol=sym, addend=r.addend
|
502
|
+
)
|
503
|
+
sym.relas.append(rela)
|
504
|
+
self._relas.append(rela)
|
505
|
+
|
506
|
+
def _get_symbols(self, name: typing.Union[str, int]) -> typing.List[ElfSymbol]:
|
507
|
+
if isinstance(name, str):
|
508
|
+
# Caller wants to look up a symbol by name
|
509
|
+
if name not in self._syms_by_name:
|
510
|
+
raise ConfigurationError(f"No symbol named {name}")
|
511
|
+
|
512
|
+
syms = self._syms_by_name[name]
|
513
|
+
return list(syms)
|
514
|
+
elif isinstance(name, int):
|
515
|
+
return [self._symbols[name]]
|
516
|
+
else:
|
517
|
+
raise TypeError("Symbols must be specified by str names or int indexes")
|
518
|
+
|
519
|
+
def get_symbol_value(
|
520
|
+
self, name: typing.Union[str, int], rebase: bool = True
|
521
|
+
) -> int:
|
522
|
+
syms = self._get_symbols(name)
|
523
|
+
if len(syms) > 1:
|
524
|
+
for sym in syms:
|
525
|
+
if sym.value != syms[0].value and sym.baseaddr != syms[0].baseaddr:
|
526
|
+
raise ConfigurationError(f"Conflicting syms named {name}")
|
527
|
+
|
528
|
+
val = syms[0].value
|
529
|
+
if rebase:
|
530
|
+
val += syms[0].baseaddr
|
531
|
+
return val
|
532
|
+
|
533
|
+
def get_symbol_size(self, name: typing.Union[str, int]):
|
534
|
+
syms = self._get_symbols(name)
|
535
|
+
if len(syms) > 1:
|
536
|
+
for sym in syms:
|
537
|
+
if sym.size != syms[0].size:
|
538
|
+
raise ConfigurationError(f"Conflicting syms named {name}")
|
539
|
+
return syms[0].size
|
540
|
+
|
541
|
+
def update_symbol_value(
|
542
|
+
self, name: typing.Union[str, int], value: int, rebase: bool = True
|
543
|
+
) -> None:
|
544
|
+
syms = self._get_symbols(name)
|
545
|
+
if len(syms) > 1:
|
546
|
+
raise ConfigurationError(f"Multiple syms named {name}")
|
547
|
+
sym = syms[0]
|
548
|
+
|
549
|
+
if rebase:
|
550
|
+
# Value provided is absolute; rebase it to the symbol's base address
|
551
|
+
value -= sym.baseaddr
|
552
|
+
|
553
|
+
# Update the value
|
554
|
+
sym.value = value
|
555
|
+
|
556
|
+
if self._relocator is not None:
|
557
|
+
for rela in sym.relas:
|
558
|
+
# Relocate!
|
559
|
+
self._relocator.relocate(self, rela)
|
560
|
+
else:
|
561
|
+
log.error(f"No platform defined; cannot relocate {name}!")
|
562
|
+
|
563
|
+
|
564
|
+
__all__ = ["ElfExecutable"]
|
@@ -0,0 +1,32 @@
|
|
1
|
+
from .aarch64 import AArch64ElfRelocator
|
2
|
+
from .amd64 import AMD64ElfRelocator
|
3
|
+
from .arm import (
|
4
|
+
Armv5TElfRelocator,
|
5
|
+
Armv6MElfRelocator,
|
6
|
+
Armv7AElfRelocator,
|
7
|
+
Armv7MElfRelocator,
|
8
|
+
Armv7RElfRelocator,
|
9
|
+
)
|
10
|
+
from .i386 import I386ElfRelocator
|
11
|
+
from .mips import MIPSELElfRelocator, MIPSElfRelocator
|
12
|
+
from .ppc import PowerPCElfRelocator
|
13
|
+
from .rela import ElfRelocator
|
14
|
+
from .riscv64 import RISCV64ElfRelocator
|
15
|
+
from .xtensa import XtensaElfRelocator
|
16
|
+
|
17
|
+
__all__ = [
|
18
|
+
"AArch64ElfRelocator",
|
19
|
+
"AMD64ElfRelocator",
|
20
|
+
"Armv5TElfRelocator",
|
21
|
+
"Armv6MElfRelocator",
|
22
|
+
"Armv7AElfRelocator",
|
23
|
+
"Armv7MElfRelocator",
|
24
|
+
"Armv7RElfRelocator",
|
25
|
+
"I386ElfRelocator",
|
26
|
+
"MIPSElfRelocator",
|
27
|
+
"MIPSELElfRelocator",
|
28
|
+
"PowerPCElfRelocator",
|
29
|
+
"RISCV64ElfRelocator",
|
30
|
+
"XtensaElfRelocator",
|
31
|
+
"ElfRelocator",
|
32
|
+
]
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from ..... import platforms
|
2
|
+
from .....exceptions import ConfigurationError
|
3
|
+
from ..structs import ElfRela
|
4
|
+
from .rela import ElfRelocator
|
5
|
+
|
6
|
+
R_AARCH64_GLOB_DAT = 1025 # Create GOT entry
|
7
|
+
R_AARCH64_JUMP_SLOT = 1026 # Create PLT entry
|
8
|
+
R_AARCH64_RELATIVE = 1027 # Adjust by program base
|
9
|
+
|
10
|
+
|
11
|
+
class AArch64ElfRelocator(ElfRelocator):
|
12
|
+
arch = platforms.Architecture.AARCH64
|
13
|
+
byteorder = platforms.Byteorder.LITTLE
|
14
|
+
|
15
|
+
def _compute_value(self, rela: ElfRela):
|
16
|
+
if (
|
17
|
+
rela.type == R_AARCH64_GLOB_DAT
|
18
|
+
or rela.type == R_AARCH64_JUMP_SLOT
|
19
|
+
or rela.type == R_AARCH64_RELATIVE
|
20
|
+
):
|
21
|
+
# Different semantics, all behave the same
|
22
|
+
val = rela.symbol.value + rela.symbol.baseaddr + rela.addend
|
23
|
+
return val.to_bytes(8, "little")
|
24
|
+
else:
|
25
|
+
raise ConfigurationError(
|
26
|
+
"Unknown relocation type for {rela.symbol.name}: {rela.type}"
|
27
|
+
)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
from ..... import platforms
|
2
|
+
from .....exceptions import ConfigurationError
|
3
|
+
from ..structs import ElfRela
|
4
|
+
from .rela import ElfRelocator
|
5
|
+
|
6
|
+
R_X86_64_GLOB_DAT = 6 # Create GOT entry
|
7
|
+
R_X86_64_JUMP_SLOT = 7 # Create PLT entry
|
8
|
+
R_X86_64_RELATIVE = 8 # Adjust by program base
|
9
|
+
R_X86_64_NUM = 43 # This and higher aren't valid
|
10
|
+
|
11
|
+
|
12
|
+
class AMD64ElfRelocator(ElfRelocator):
|
13
|
+
arch = platforms.Architecture.X86_64
|
14
|
+
byteorder = platforms.Byteorder.LITTLE
|
15
|
+
|
16
|
+
def _compute_value(self, rela: ElfRela):
|
17
|
+
if (
|
18
|
+
rela.type == R_X86_64_GLOB_DAT
|
19
|
+
or rela.type == R_X86_64_JUMP_SLOT
|
20
|
+
or rela.type == R_X86_64_RELATIVE
|
21
|
+
):
|
22
|
+
# Different semantics, all behave the same
|
23
|
+
val = rela.symbol.value + rela.symbol.baseaddr + rela.addend
|
24
|
+
return val.to_bytes(8, "little")
|
25
|
+
elif rela.type >= 0 and rela.type < R_X86_64_NUM:
|
26
|
+
raise ConfigurationError(
|
27
|
+
"Valid, but unsupported relocation for {rela.symbol.name}: {rela.type}"
|
28
|
+
)
|
29
|
+
else:
|
30
|
+
raise ConfigurationError(
|
31
|
+
"Invalid relocation type for {rela.symbol.name}: {rela.type}"
|
32
|
+
)
|