smallworld-re 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. smallworld/__init__.py +35 -0
  2. smallworld/analyses/__init__.py +14 -0
  3. smallworld/analyses/analysis.py +88 -0
  4. smallworld/analyses/code_coverage.py +31 -0
  5. smallworld/analyses/colorizer.py +682 -0
  6. smallworld/analyses/colorizer_summary.py +100 -0
  7. smallworld/analyses/field_detection/__init__.py +14 -0
  8. smallworld/analyses/field_detection/field_analysis.py +536 -0
  9. smallworld/analyses/field_detection/guards.py +26 -0
  10. smallworld/analyses/field_detection/hints.py +133 -0
  11. smallworld/analyses/field_detection/malloc.py +211 -0
  12. smallworld/analyses/forced_exec/__init__.py +3 -0
  13. smallworld/analyses/forced_exec/forced_exec.py +87 -0
  14. smallworld/analyses/underlays/__init__.py +4 -0
  15. smallworld/analyses/underlays/basic.py +13 -0
  16. smallworld/analyses/underlays/underlay.py +31 -0
  17. smallworld/analyses/unstable/__init__.py +4 -0
  18. smallworld/analyses/unstable/angr/__init__.py +0 -0
  19. smallworld/analyses/unstable/angr/base.py +12 -0
  20. smallworld/analyses/unstable/angr/divergence.py +274 -0
  21. smallworld/analyses/unstable/angr/model.py +383 -0
  22. smallworld/analyses/unstable/angr/nwbt.py +63 -0
  23. smallworld/analyses/unstable/angr/typedefs.py +170 -0
  24. smallworld/analyses/unstable/angr/utils.py +25 -0
  25. smallworld/analyses/unstable/angr/visitor.py +315 -0
  26. smallworld/analyses/unstable/angr_nwbt.py +106 -0
  27. smallworld/analyses/unstable/code_coverage.py +54 -0
  28. smallworld/analyses/unstable/code_reachable.py +44 -0
  29. smallworld/analyses/unstable/control_flow_tracer.py +71 -0
  30. smallworld/analyses/unstable/pointer_finder.py +90 -0
  31. smallworld/arch/__init__.py +0 -0
  32. smallworld/arch/aarch64_arch.py +286 -0
  33. smallworld/arch/amd64_arch.py +86 -0
  34. smallworld/arch/i386_arch.py +44 -0
  35. smallworld/emulators/__init__.py +14 -0
  36. smallworld/emulators/angr/__init__.py +7 -0
  37. smallworld/emulators/angr/angr.py +1652 -0
  38. smallworld/emulators/angr/default.py +15 -0
  39. smallworld/emulators/angr/exceptions.py +7 -0
  40. smallworld/emulators/angr/exploration/__init__.py +9 -0
  41. smallworld/emulators/angr/exploration/bounds.py +27 -0
  42. smallworld/emulators/angr/exploration/default.py +17 -0
  43. smallworld/emulators/angr/exploration/terminate.py +22 -0
  44. smallworld/emulators/angr/factory.py +55 -0
  45. smallworld/emulators/angr/machdefs/__init__.py +35 -0
  46. smallworld/emulators/angr/machdefs/aarch64.py +292 -0
  47. smallworld/emulators/angr/machdefs/amd64.py +192 -0
  48. smallworld/emulators/angr/machdefs/arm.py +387 -0
  49. smallworld/emulators/angr/machdefs/i386.py +221 -0
  50. smallworld/emulators/angr/machdefs/machdef.py +138 -0
  51. smallworld/emulators/angr/machdefs/mips.py +184 -0
  52. smallworld/emulators/angr/machdefs/mips64.py +189 -0
  53. smallworld/emulators/angr/machdefs/ppc.py +101 -0
  54. smallworld/emulators/angr/machdefs/riscv.py +261 -0
  55. smallworld/emulators/angr/machdefs/xtensa.py +255 -0
  56. smallworld/emulators/angr/memory/__init__.py +7 -0
  57. smallworld/emulators/angr/memory/default.py +10 -0
  58. smallworld/emulators/angr/memory/fixups.py +43 -0
  59. smallworld/emulators/angr/memory/memtrack.py +105 -0
  60. smallworld/emulators/angr/scratch.py +43 -0
  61. smallworld/emulators/angr/simos.py +53 -0
  62. smallworld/emulators/angr/utils.py +70 -0
  63. smallworld/emulators/emulator.py +1013 -0
  64. smallworld/emulators/hookable.py +252 -0
  65. smallworld/emulators/panda/__init__.py +5 -0
  66. smallworld/emulators/panda/machdefs/__init__.py +28 -0
  67. smallworld/emulators/panda/machdefs/aarch64.py +93 -0
  68. smallworld/emulators/panda/machdefs/amd64.py +71 -0
  69. smallworld/emulators/panda/machdefs/arm.py +89 -0
  70. smallworld/emulators/panda/machdefs/i386.py +36 -0
  71. smallworld/emulators/panda/machdefs/machdef.py +86 -0
  72. smallworld/emulators/panda/machdefs/mips.py +94 -0
  73. smallworld/emulators/panda/machdefs/mips64.py +91 -0
  74. smallworld/emulators/panda/machdefs/ppc.py +79 -0
  75. smallworld/emulators/panda/panda.py +575 -0
  76. smallworld/emulators/unicorn/__init__.py +13 -0
  77. smallworld/emulators/unicorn/machdefs/__init__.py +28 -0
  78. smallworld/emulators/unicorn/machdefs/aarch64.py +310 -0
  79. smallworld/emulators/unicorn/machdefs/amd64.py +326 -0
  80. smallworld/emulators/unicorn/machdefs/arm.py +321 -0
  81. smallworld/emulators/unicorn/machdefs/i386.py +137 -0
  82. smallworld/emulators/unicorn/machdefs/machdef.py +117 -0
  83. smallworld/emulators/unicorn/machdefs/mips.py +202 -0
  84. smallworld/emulators/unicorn/unicorn.py +684 -0
  85. smallworld/exceptions/__init__.py +5 -0
  86. smallworld/exceptions/exceptions.py +85 -0
  87. smallworld/exceptions/unstable/__init__.py +1 -0
  88. smallworld/exceptions/unstable/exceptions.py +25 -0
  89. smallworld/extern/__init__.py +4 -0
  90. smallworld/extern/ctypes.py +94 -0
  91. smallworld/extern/unstable/__init__.py +1 -0
  92. smallworld/extern/unstable/ghidra.py +129 -0
  93. smallworld/helpers.py +107 -0
  94. smallworld/hinting/__init__.py +8 -0
  95. smallworld/hinting/hinting.py +214 -0
  96. smallworld/hinting/hints.py +427 -0
  97. smallworld/hinting/unstable/__init__.py +2 -0
  98. smallworld/hinting/utils.py +19 -0
  99. smallworld/instructions/__init__.py +18 -0
  100. smallworld/instructions/aarch64.py +20 -0
  101. smallworld/instructions/arm.py +18 -0
  102. smallworld/instructions/bsid.py +67 -0
  103. smallworld/instructions/instructions.py +258 -0
  104. smallworld/instructions/mips.py +21 -0
  105. smallworld/instructions/x86.py +100 -0
  106. smallworld/logging.py +90 -0
  107. smallworld/platforms.py +95 -0
  108. smallworld/py.typed +0 -0
  109. smallworld/state/__init__.py +6 -0
  110. smallworld/state/cpus/__init__.py +32 -0
  111. smallworld/state/cpus/aarch64.py +563 -0
  112. smallworld/state/cpus/amd64.py +676 -0
  113. smallworld/state/cpus/arm.py +630 -0
  114. smallworld/state/cpus/cpu.py +71 -0
  115. smallworld/state/cpus/i386.py +239 -0
  116. smallworld/state/cpus/mips.py +374 -0
  117. smallworld/state/cpus/mips64.py +372 -0
  118. smallworld/state/cpus/powerpc.py +229 -0
  119. smallworld/state/cpus/riscv.py +357 -0
  120. smallworld/state/cpus/xtensa.py +80 -0
  121. smallworld/state/memory/__init__.py +7 -0
  122. smallworld/state/memory/code.py +70 -0
  123. smallworld/state/memory/elf/__init__.py +3 -0
  124. smallworld/state/memory/elf/elf.py +564 -0
  125. smallworld/state/memory/elf/rela/__init__.py +32 -0
  126. smallworld/state/memory/elf/rela/aarch64.py +27 -0
  127. smallworld/state/memory/elf/rela/amd64.py +32 -0
  128. smallworld/state/memory/elf/rela/arm.py +51 -0
  129. smallworld/state/memory/elf/rela/i386.py +32 -0
  130. smallworld/state/memory/elf/rela/mips.py +45 -0
  131. smallworld/state/memory/elf/rela/ppc.py +45 -0
  132. smallworld/state/memory/elf/rela/rela.py +63 -0
  133. smallworld/state/memory/elf/rela/riscv64.py +27 -0
  134. smallworld/state/memory/elf/rela/xtensa.py +15 -0
  135. smallworld/state/memory/elf/structs.py +55 -0
  136. smallworld/state/memory/heap.py +85 -0
  137. smallworld/state/memory/memory.py +181 -0
  138. smallworld/state/memory/stack/__init__.py +31 -0
  139. smallworld/state/memory/stack/aarch64.py +22 -0
  140. smallworld/state/memory/stack/amd64.py +42 -0
  141. smallworld/state/memory/stack/arm.py +66 -0
  142. smallworld/state/memory/stack/i386.py +22 -0
  143. smallworld/state/memory/stack/mips.py +34 -0
  144. smallworld/state/memory/stack/mips64.py +34 -0
  145. smallworld/state/memory/stack/ppc.py +34 -0
  146. smallworld/state/memory/stack/riscv.py +22 -0
  147. smallworld/state/memory/stack/stack.py +127 -0
  148. smallworld/state/memory/stack/xtensa.py +34 -0
  149. smallworld/state/models/__init__.py +6 -0
  150. smallworld/state/models/mmio.py +186 -0
  151. smallworld/state/models/model.py +163 -0
  152. smallworld/state/models/posix.py +455 -0
  153. smallworld/state/models/x86/__init__.py +2 -0
  154. smallworld/state/models/x86/microsoftcdecl.py +35 -0
  155. smallworld/state/models/x86/systemv.py +240 -0
  156. smallworld/state/state.py +962 -0
  157. smallworld/state/unstable/__init__.py +0 -0
  158. smallworld/state/unstable/elf.py +393 -0
  159. smallworld/state/x86_registers.py +30 -0
  160. smallworld/utils.py +935 -0
  161. smallworld_re-1.0.0.dist-info/LICENSE.txt +21 -0
  162. smallworld_re-1.0.0.dist-info/METADATA +189 -0
  163. smallworld_re-1.0.0.dist-info/RECORD +166 -0
  164. smallworld_re-1.0.0.dist-info/WHEEL +5 -0
  165. smallworld_re-1.0.0.dist-info/entry_points.txt +2 -0
  166. smallworld_re-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,564 @@
1
+ import logging
2
+ import typing
3
+
4
+ import lief
5
+
6
+ from ....exceptions import ConfigurationError
7
+ from ....hinting import Hint, get_hinter
8
+ from ....platforms import Architecture, Byteorder, Platform
9
+ from ....utils import RangeCollection
10
+ from ...state import BytesValue
11
+ from ..code import Executable
12
+ from .rela import ElfRelocator
13
+ from .structs import ElfRela, ElfSymbol
14
+
15
+ log = logging.getLogger(__name__)
16
+ hinter = get_hinter(__name__)
17
+
18
+ # ELF machine values
19
+ # See /usr/include/elf.h for the complete list
20
+ EM_386 = 3 # Intel 80386
21
+ EM_MIPS = 8 # MIPS; all kinds
22
+ EM_PPC = 20 # PowerPC 32-bit
23
+ EM_PPC64 = 21 # PowerPC 64-bit
24
+ EM_ARM = 40 # ARM 32-bit
25
+ EM_X86_64 = 62 # AMD/Intel x86-64
26
+ EM_XTENSA = 94 # Xtensa
27
+ EM_AARCH64 = 183 # ARM v9, or AARCH64
28
+ EM_RISCV = 243 # RISC-V
29
+
30
+ # ARM-specific flag values
31
+ EF_ARM_VFP_FLOAT = 0x400
32
+ EF_ARM_SOFT_FLOAT = 0x200
33
+ EF_ARM_EABI_VER5 = 0x05000000
34
+
35
+ # Program header types
36
+ PT_NULL = 0 # Empty/unused program header
37
+ PT_LOAD = 1 # Describes loadable program segment
38
+ PT_DYNAMIC = 2 # Points to dynamic linking metadata
39
+ PT_INTERP = 3 # Points to program interpreter
40
+ PT_NOTE = 4 # Points to auxiliary information
41
+ PT_SHLIB = 5 # Reserved value; I think it's unused
42
+ PT_PHDR = 6 # Points to program header table
43
+ PT_TLS = 7 # Indicates need for thread-local storage
44
+ PT_LOOS = 0x60000000 # Start of OS-specific types
45
+ PT_GNU_EH_FRAME = 0x6474E550 # GNU-specific: Points to exception handler segment
46
+ PT_GNU_STACK = 0x6474E551 # GNU-specific: Describes stack permissions
47
+ PT_GNU_RELRO = 0x6474E552 # GNU-specific: Describes read-only after relocation segment
48
+ PT_GNU_PROPERTY = 0x6474E553 # GNU-specific: Points to GNU property
49
+ PT_HIOS = 0x6FFFFFFF # End of OS-specific types
50
+ PT_LOPROC = 0x70000000 # Start of processor-specific types
51
+ PT_HIPROC = 0x7FFFFFFF # End of processor-specific types
52
+
53
+ # Program header flags
54
+ PF_X = 0x1 # Segment is executable
55
+ PF_W = 0x2 # Segment is writable
56
+ PF_R = 0x4 # Segment is readable
57
+
58
+ # Universal dynamic tag values
59
+ DT_PLTGOT = 0x3
60
+
61
+ # MIPS-specific dynamic tag values
62
+ DT_MIPS_LOCAL_GOTNO = 0x7000000A
63
+ DT_MIPS_GOTSYM = 0x70000013
64
+
65
+ # MIPS-specific relocation type
66
+ R_MIPS_32 = 2
67
+ R_MIPS_64 = 18
68
+
69
+
70
+ class ElfExecutable(Executable):
71
+ """Executable loaded from an ELF
72
+
73
+ This loads a single ELF file into a SmallWorld memory object.
74
+ It performs no relocation or any other initialization,
75
+ just maps the file into memory as the kernel intended.
76
+
77
+ Arguments:
78
+ file: File-like object containing the image
79
+ platform: Optional platform; used for header verification
80
+ ignore_platform: Do not try to ID or verify platform from headers
81
+ user_base: Optional user-specified base address
82
+ page_size: System page size
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ file: typing.BinaryIO,
88
+ platform: typing.Optional[Platform] = None,
89
+ ignore_platform: bool = False,
90
+ user_base: typing.Optional[int] = None,
91
+ page_size: int = 0x1000,
92
+ ):
93
+ # Initialize with null address and size;
94
+ # we will update these later.
95
+ super().__init__(0, 0)
96
+ self.platform = platform
97
+ self.bounds: RangeCollection = RangeCollection()
98
+ self._page_size = page_size
99
+ self._user_base = user_base
100
+ self._file_base = 0
101
+
102
+ # Initialize symbol info
103
+ self._symbols: typing.List[ElfSymbol] = list()
104
+ self._syms_by_name: typing.Dict[str, typing.List[ElfSymbol]] = dict()
105
+ self._relas: typing.List[ElfRela] = list()
106
+ self._relocator: typing.Optional[ElfRelocator] = None
107
+
108
+ # Read the entire image out of the file.
109
+ image = file.read()
110
+
111
+ # Use lief to check if this is an ELF.
112
+ # NOTE: For some reason, this takes list(int), not bytes
113
+ if not lief.is_elf(list(image)):
114
+ raise ConfigurationError("Image is not an ELF")
115
+
116
+ # Use lief to parse the ELF.
117
+ # NOTE: For some reason, this takes list(int), not bytes
118
+ # NOTE: lief objects aren't deep-copyable.
119
+ # I'd love to keep `elf` around for later use, but I can't.
120
+ elf = lief.ELF.parse(list(image))
121
+ if elf is None:
122
+ raise ConfigurationError("Failed parsing ELF")
123
+
124
+ # Extract the file header
125
+ ehdr = elf.header
126
+ if ehdr is None:
127
+ raise ConfigurationError("Failed extracting ELF header")
128
+
129
+ # Check machine compatibility
130
+ if not ignore_platform:
131
+ hdr_platform = self._platform_for_ehdr(elf)
132
+ if self.platform is not None:
133
+ if self.platform != hdr_platform:
134
+ raise ConfigurationError(
135
+ "Platform mismatch: "
136
+ f"specified {self.platform}, but got {hdr_platform} from header"
137
+ )
138
+ else:
139
+ self.platform = hdr_platform
140
+
141
+ if self.platform is not None:
142
+ # If we have a platform, we can relocate
143
+ self._relocator = ElfRelocator.for_platform(self.platform)
144
+
145
+ # Figure out if this file is loadable.
146
+ # If there are program headers, it's loadable.
147
+ # This doesn't support .ko files, are relocatable objects,
148
+ # and have no program headers.
149
+ if ehdr.program_header_offset == 0:
150
+ # No program headers; not a loadable ELF.
151
+ raise ConfigurationError("ELF is not loadable")
152
+
153
+ if ehdr.program_header_offset >= len(image):
154
+ # Obviously-invalid program headers
155
+ raise ConfigurationError(
156
+ f"Invalid program header offset {hex(ehdr.program_header_offset)}"
157
+ )
158
+
159
+ # Determine the file base address.
160
+ self._file_base = elf.imagebase
161
+ self._determine_base()
162
+
163
+ for phdr in elf.segments:
164
+ log.debug(f"{phdr}")
165
+ if phdr.type == PT_LOAD:
166
+ # Loadable segment
167
+ # Map its data into memory
168
+ self._map_segment(phdr, image)
169
+ elif phdr.type == PT_DYNAMIC:
170
+ # Dynamic linking metadata.
171
+ # This ELF needs dynamic linking
172
+ hint = Hint(message="Program includes dynamic linking metadata")
173
+ hinter.info(hint)
174
+ elif phdr.type == PT_INTERP:
175
+ # Program interpreter
176
+ # This completely changes how program loading works.
177
+ # Whether you care is a different matter.
178
+ interp = image[phdr.file_offset : phdr.file_offset + phdr.physical_size]
179
+ hint = Hint(message=f"Program specifies interpreter {interp!r}")
180
+ hinter.info(hint)
181
+ elif phdr.type == PT_NOTE:
182
+ # Auxiliary information
183
+ # Possibly useful for comparing machine/OS type.
184
+ pass
185
+ elif phdr.type == PT_PHDR:
186
+ # Program header self-reference
187
+ # Useful for the dynamic linker, but not for us
188
+ pass
189
+ elif phdr.type == PT_TLS:
190
+ # TLS Segment
191
+ # Your analysis is about to get nasty :(
192
+ hint = Hint(message="Program includes thread-local storage")
193
+ hinter.info(hint)
194
+ elif phdr.type == PT_GNU_EH_FRAME:
195
+ # Exception handler frame.
196
+ # GCC puts one of these in everything. Do we care?
197
+ pass
198
+ elif phdr.type == PT_GNU_STACK:
199
+ # Stack executability
200
+ # If this is missing, assume executable stack
201
+ hint = Hint(message="Program specifies stack permissions")
202
+ hinter.info(hint)
203
+ elif phdr.type == PT_GNU_RELRO:
204
+ # Read-only after relocation
205
+ # Only the dynamic linker should write this data.
206
+ hint = Hint(message="Program specifies RELRO data")
207
+ hinter.info(hint)
208
+ elif phdr.type == PT_GNU_PROPERTY:
209
+ # GNU property segment
210
+ # Contains extra metadata which I'm not sure anything uses
211
+ pass
212
+ elif phdr.type >= PT_LOOS and phdr.type <= PT_HIOS:
213
+ # Unknown OS-specific program header
214
+ # Either this is a weird ISA that extends the generic GNU ABI,
215
+ # or this isn't a Linux ELF.
216
+ hint = Hint(f"Unknown OS-specific program header: {phdr.type:08x}")
217
+ hinter.warn(hint)
218
+ elif phdr.type >= PT_LOPROC and phdr.type <= PT_HIPROC:
219
+ # Unknown machine-specific program header
220
+ # This is probably a non-Intel ISA.
221
+ # Most of these are harmless, serving to tell the RTLD
222
+ # where to find machine-specific metadata
223
+ hint = Hint(
224
+ f"Unknown machine-specific program header: {phdr.type.value:08x}"
225
+ )
226
+ hinter.warn(hint)
227
+ else:
228
+ # Unknown program header outside the allowed custom ranges
229
+ hint = Hint(f"Invalid program header: {phdr.type.value:08x}")
230
+ hinter.warn(hint)
231
+
232
+ # Compute the final total capacity
233
+ for offset, value in self.items():
234
+ self.size = max(self.size, offset + value.get_size())
235
+
236
+ # Determine if the file specifies an entrypoint
237
+ if elf.entrypoint is not None and elf.entrypoint != 0:
238
+ # Check if the entrypoint is valid (falls within the image)
239
+ entrypoint = self._rebase_file(elf.entrypoint)
240
+ if not self.bounds.contains_value(entrypoint):
241
+ if (
242
+ self.platform is not None
243
+ and self.platform.architecture == Architecture.POWERPC64
244
+ ):
245
+ # NOTE: PowerPC64's ABI is trippy.
246
+ # It uses "function descriptor" structs instead of
247
+ # simple function pointers.
248
+ # Thus, the entrypoint points to something in the data section.
249
+ log.warn("Entrypoint for PowerPC64 file is not a code address")
250
+ else:
251
+ raise ConfigurationError(
252
+ f"Invalid entrypoint address {hex(entrypoint)}"
253
+ )
254
+ else:
255
+ self.entrypoint = entrypoint
256
+ else:
257
+ # No entrypoint specified,
258
+ # Or this is PowerPC64 and the entrypoint is gibberish
259
+ self.entrypoint = None
260
+
261
+ # Organize symbols for later relocation
262
+ self._extract_symbols(elf)
263
+
264
+ def _platform_for_ehdr(self, elf):
265
+ # Determine byteorder. This bit's easy
266
+ if elf.header.identity_data.value == 1:
267
+ # LSB byteorder
268
+ byteorder = Byteorder.LITTLE
269
+ elif elf.header.identity_data.value == 2:
270
+ # MSB byteorder
271
+ byteorder = Byteorder.BIG
272
+ else:
273
+ raise ConfigurationError(
274
+ f"Unknown value of ei_data: {hex(elf.header.identity_data.value)}"
275
+ )
276
+
277
+ # Determine arch/mode. This bit's harder.
278
+ if elf.header.machine_type.value == EM_X86_64:
279
+ # amd64
280
+ architecture = Architecture.X86_64
281
+ elif elf.header.machine_type.value == EM_AARCH64:
282
+ # aarch64
283
+ architecture = Architecture.AARCH64
284
+ elif elf.header.machine_type.value == EM_386:
285
+ # i386
286
+ architecture = Architecture.X86_32
287
+ elif elf.header.machine_type.value == EM_ARM:
288
+ # Some kind of arm32
289
+ flags = set(map(lambda x: x.value, elf.header.arm_flags_list))
290
+
291
+ if EF_ARM_EABI_VER5 in flags and EF_ARM_SOFT_FLOAT in flags:
292
+ # This is either ARMv5T or some kind of ARMv6.
293
+ # We're currently assuming v5T, but this isn't always correct.
294
+ architecture = Architecture.ARM_V5T
295
+ elif EF_ARM_EABI_VER5 in flags and EF_ARM_VFP_FLOAT in flags:
296
+ # This is ARMv7a, as built by gcc.
297
+ architecture = Architecture.ARM_V7A
298
+ else:
299
+ raise ConfigurationError(f"Unknown ARM flags: {list(map(hex, flags))}")
300
+ elif elf.header.machine_type.value == EM_MIPS:
301
+ # Some kind of mips.
302
+ # TODO: There are more parameters than just word size
303
+ if elf.header.identity_class.value == 1:
304
+ # 32-bit ELF
305
+ architecture = Architecture.MIPS32
306
+ elif elf.header.identity_class.value == 2:
307
+ architecture = Architecture.MIPS64
308
+ else:
309
+ raise ConfigurationError(
310
+ f"Unknown value of ei_class: {hex(elf.header.identity_class.value)}"
311
+ )
312
+ elif elf.header.machine_type.value == EM_PPC:
313
+ # PowerPC 32-bit
314
+ architecture = Architecture.POWERPC32
315
+ elif elf.header.machine_type.value == EM_PPC64:
316
+ # PowerPC 64-bit
317
+ architecture = Architecture.POWERPC64
318
+ elif elf.header.machine_type.value == EM_RISCV:
319
+ # RISC-V
320
+ if elf.header.identity_class.value == 1:
321
+ raise ConfigurationError("RISC-V 32-bit isn't supported")
322
+ elif elf.header.identity_class.value == 2:
323
+ architecture = Architecture.RISCV64
324
+ else:
325
+ raise ConfigurationError(
326
+ f"Unknown value of ei_class: {hex(elf.header.identity_class.value)}"
327
+ )
328
+ elif elf.header.machine_type.value == EM_XTENSA:
329
+ architecture = Architecture.XTENSA
330
+ else:
331
+ raise ConfigurationError(
332
+ f"Unknown value of e_machine: {hex(elf.header.machine_type.value)}"
333
+ )
334
+ return Platform(architecture, byteorder)
335
+
336
+ def _determine_base(self):
337
+ # Determine the base address of this image
338
+ #
339
+ # Normally, the loader respects the wishes of the file;
340
+ # an external base address is only used if the image doesn't define one.
341
+ #
342
+ # Here, the user doesn't know the image layout ahead of time,
343
+ # and may need to adjust their environment to fit,
344
+ # so a user-provided base is given equal weight.
345
+ #
346
+ # If both base addresses are specified,
347
+ # or neither is specified, it's a configuratione rror.
348
+
349
+ if self._user_base is None:
350
+ # No user base requested
351
+ if self._file_base == 0:
352
+ # No file base defined.
353
+ # Need the user to provide one
354
+ raise ConfigurationError(
355
+ "No base address provided for position-independent ELF image"
356
+ )
357
+ else:
358
+ self.address = self._file_base
359
+ else:
360
+ # User base requested
361
+ if self._file_base == 0:
362
+ # No file base requested; we are okay with this.
363
+ self.address = self._user_base
364
+ elif self._user_base == self._file_base:
365
+ # Everyone requested the same base address, so we're okay.
366
+ self.address = self._user_base
367
+ else:
368
+ # File base is defined.
369
+ # We (probably) cannot move the image without problems.
370
+ raise ConfigurationError("Base address defined for fixed-position ELF")
371
+
372
+ def _rebase_file(self, val: int):
373
+ # Rebase an offset from file-relative to image-relative
374
+ return val - self._file_base + self.address
375
+
376
+ def _page_align(self, val: int, up: bool = True):
377
+ # Align an address to a page boundary
378
+ # There are a number of cases where ELF files are imprecise;
379
+ # they rely on the kernel/libc to map things at page-aligned addresses.
380
+ if up:
381
+ val += self._page_size - 1
382
+ return (val // self._page_size) * self._page_size
383
+
384
+ def _map_segment(self, phdr, image):
385
+ # Compute segment boundaries
386
+ seg_start = self._page_align(phdr.file_offset, up=False)
387
+ seg_end = self._page_align(phdr.file_offset + phdr.physical_size)
388
+ seg_addr = self._page_align(self._rebase_file(phdr.virtual_address), up=False)
389
+ seg_size = self._page_align(phdr.virtual_size + (phdr.file_offset - seg_start))
390
+
391
+ log.debug("Mapping: ")
392
+ log.debug(f" f: [ {seg_start:012x} -> {seg_end:012x} ]")
393
+ log.debug(f" m: [ {seg_addr:012x} -> {seg_addr + seg_size:012x} ]")
394
+
395
+ # Extract segment data
396
+ seg_data = image[seg_start:seg_end]
397
+ if len(seg_data) < seg_size:
398
+ # Segment is shorter than is available from the file;
399
+ # this will get zero-padded.
400
+ seg_data += b"\0" * (seg_size - (seg_start - seg_end))
401
+ elif len(seg_data) != seg_size:
402
+ raise ConfigurationError(
403
+ f"Expected segment of size {seg_size}, but got {len(seg_data)}"
404
+ )
405
+ if (phdr.flags & PF_X) != 0:
406
+ # This is a code segment; add it to program bounds
407
+ self.bounds.add_range((seg_addr, seg_addr + seg_size))
408
+
409
+ # Add the segment to the memory map
410
+ seg_value = BytesValue(seg_data, None)
411
+ self[seg_addr - self.address] = seg_value
412
+
413
+ def _extract_symbols(self, elf):
414
+ lief_to_elf = dict()
415
+
416
+ # Figure out the base address
417
+ # TODO: Currently, this only handles PIC or Non-PIC
418
+ # It will be wrong for .o and .ko
419
+ if self._file_base is not None and self._file_base != 0:
420
+ # This is a non-PIC binary.
421
+ # Dollars to ducats the symbol is absolute.
422
+ baseaddr = 0
423
+ else:
424
+ # This is a PIC binary
425
+ # Relative symbols will be relative to the load address
426
+ baseaddr = self.address
427
+
428
+ for s in elf.symbols:
429
+ # Build a symbol
430
+ sym = ElfSymbol(
431
+ name=s.name,
432
+ type=s.type.value,
433
+ bind=s.binding.value,
434
+ visibility=s.visibility.value,
435
+ shndx=s.shndx,
436
+ value=s.value,
437
+ size=s.size,
438
+ baseaddr=baseaddr,
439
+ )
440
+ # Save the sym, and temporarily tie it to its lief partner
441
+ self._symbols.append(sym)
442
+ self._syms_by_name.setdefault(sym.name, list()).append(sym)
443
+ lief_to_elf[s] = sym
444
+
445
+ if (
446
+ self.platform.architecture == Architecture.MIPS32
447
+ or self.platform.architecture == Architecture.MIPS64
448
+ ):
449
+ # All MIPS dynamic symbols have an implicit rela.
450
+ # MIPS dynamic symbols always have a GOT entry;
451
+ # to save space, the ABI just assumes that the rela exists
452
+
453
+ # Find the GOT and the number of local entries
454
+ gotoff = None
455
+ gotsym = None
456
+ local_gotno = None
457
+ for dt in elf.dynamic_entries:
458
+ if dt.tag.value == DT_MIPS_GOTSYM:
459
+ gotsym = dt.value
460
+ if dt.tag.value == DT_MIPS_LOCAL_GOTNO:
461
+ local_gotno = dt.value
462
+ if dt.tag.value == DT_PLTGOT:
463
+ gotoff = dt.value
464
+ if (
465
+ local_gotno is not None
466
+ and gotsym is not None
467
+ and gotoff is not None
468
+ ):
469
+ break
470
+
471
+ if local_gotno is None or gotoff is None or gotsym is None:
472
+ log.error("MIPS binary missing got information")
473
+ else:
474
+ # We found the GOT info; we're actually a dynamic binary
475
+ # Figure out the GOT entry size based on arch
476
+ if self.platform.architecture == Architecture.MIPS32:
477
+ gotent = 4
478
+ rela_type = R_MIPS_32
479
+ else:
480
+ gotent = 8
481
+ rela_type = R_MIPS_64
482
+
483
+ # Rebase the GOT offset relative to the image
484
+ gotoff = self._rebase_file(gotoff)
485
+
486
+ # Skip the first local_gotno entries
487
+ gotoff += gotent * local_gotno
488
+
489
+ for s in list(elf.dynamic_symbols)[gotsym:]:
490
+ sym = lief_to_elf[s]
491
+ rela = ElfRela(offset=gotoff, type=rela_type, symbol=sym, addend=0)
492
+ sym.relas.append(rela)
493
+ self._relas.append(rela)
494
+
495
+ gotoff += gotent
496
+
497
+ for r in elf.relocations:
498
+ # Build a rela, and tie it to its symbol
499
+ sym = lief_to_elf[r.symbol]
500
+ rela = ElfRela(
501
+ offset=r.address + baseaddr, type=r.type, symbol=sym, addend=r.addend
502
+ )
503
+ sym.relas.append(rela)
504
+ self._relas.append(rela)
505
+
506
+ def _get_symbols(self, name: typing.Union[str, int]) -> typing.List[ElfSymbol]:
507
+ if isinstance(name, str):
508
+ # Caller wants to look up a symbol by name
509
+ if name not in self._syms_by_name:
510
+ raise ConfigurationError(f"No symbol named {name}")
511
+
512
+ syms = self._syms_by_name[name]
513
+ return list(syms)
514
+ elif isinstance(name, int):
515
+ return [self._symbols[name]]
516
+ else:
517
+ raise TypeError("Symbols must be specified by str names or int indexes")
518
+
519
+ def get_symbol_value(
520
+ self, name: typing.Union[str, int], rebase: bool = True
521
+ ) -> int:
522
+ syms = self._get_symbols(name)
523
+ if len(syms) > 1:
524
+ for sym in syms:
525
+ if sym.value != syms[0].value and sym.baseaddr != syms[0].baseaddr:
526
+ raise ConfigurationError(f"Conflicting syms named {name}")
527
+
528
+ val = syms[0].value
529
+ if rebase:
530
+ val += syms[0].baseaddr
531
+ return val
532
+
533
+ def get_symbol_size(self, name: typing.Union[str, int]):
534
+ syms = self._get_symbols(name)
535
+ if len(syms) > 1:
536
+ for sym in syms:
537
+ if sym.size != syms[0].size:
538
+ raise ConfigurationError(f"Conflicting syms named {name}")
539
+ return syms[0].size
540
+
541
+ def update_symbol_value(
542
+ self, name: typing.Union[str, int], value: int, rebase: bool = True
543
+ ) -> None:
544
+ syms = self._get_symbols(name)
545
+ if len(syms) > 1:
546
+ raise ConfigurationError(f"Multiple syms named {name}")
547
+ sym = syms[0]
548
+
549
+ if rebase:
550
+ # Value provided is absolute; rebase it to the symbol's base address
551
+ value -= sym.baseaddr
552
+
553
+ # Update the value
554
+ sym.value = value
555
+
556
+ if self._relocator is not None:
557
+ for rela in sym.relas:
558
+ # Relocate!
559
+ self._relocator.relocate(self, rela)
560
+ else:
561
+ log.error(f"No platform defined; cannot relocate {name}!")
562
+
563
+
564
+ __all__ = ["ElfExecutable"]
@@ -0,0 +1,32 @@
1
+ from .aarch64 import AArch64ElfRelocator
2
+ from .amd64 import AMD64ElfRelocator
3
+ from .arm import (
4
+ Armv5TElfRelocator,
5
+ Armv6MElfRelocator,
6
+ Armv7AElfRelocator,
7
+ Armv7MElfRelocator,
8
+ Armv7RElfRelocator,
9
+ )
10
+ from .i386 import I386ElfRelocator
11
+ from .mips import MIPSELElfRelocator, MIPSElfRelocator
12
+ from .ppc import PowerPCElfRelocator
13
+ from .rela import ElfRelocator
14
+ from .riscv64 import RISCV64ElfRelocator
15
+ from .xtensa import XtensaElfRelocator
16
+
17
+ __all__ = [
18
+ "AArch64ElfRelocator",
19
+ "AMD64ElfRelocator",
20
+ "Armv5TElfRelocator",
21
+ "Armv6MElfRelocator",
22
+ "Armv7AElfRelocator",
23
+ "Armv7MElfRelocator",
24
+ "Armv7RElfRelocator",
25
+ "I386ElfRelocator",
26
+ "MIPSElfRelocator",
27
+ "MIPSELElfRelocator",
28
+ "PowerPCElfRelocator",
29
+ "RISCV64ElfRelocator",
30
+ "XtensaElfRelocator",
31
+ "ElfRelocator",
32
+ ]
@@ -0,0 +1,27 @@
1
+ from ..... import platforms
2
+ from .....exceptions import ConfigurationError
3
+ from ..structs import ElfRela
4
+ from .rela import ElfRelocator
5
+
6
+ R_AARCH64_GLOB_DAT = 1025 # Create GOT entry
7
+ R_AARCH64_JUMP_SLOT = 1026 # Create PLT entry
8
+ R_AARCH64_RELATIVE = 1027 # Adjust by program base
9
+
10
+
11
+ class AArch64ElfRelocator(ElfRelocator):
12
+ arch = platforms.Architecture.AARCH64
13
+ byteorder = platforms.Byteorder.LITTLE
14
+
15
+ def _compute_value(self, rela: ElfRela):
16
+ if (
17
+ rela.type == R_AARCH64_GLOB_DAT
18
+ or rela.type == R_AARCH64_JUMP_SLOT
19
+ or rela.type == R_AARCH64_RELATIVE
20
+ ):
21
+ # Different semantics, all behave the same
22
+ val = rela.symbol.value + rela.symbol.baseaddr + rela.addend
23
+ return val.to_bytes(8, "little")
24
+ else:
25
+ raise ConfigurationError(
26
+ "Unknown relocation type for {rela.symbol.name}: {rela.type}"
27
+ )
@@ -0,0 +1,32 @@
1
+ from ..... import platforms
2
+ from .....exceptions import ConfigurationError
3
+ from ..structs import ElfRela
4
+ from .rela import ElfRelocator
5
+
6
+ R_X86_64_GLOB_DAT = 6 # Create GOT entry
7
+ R_X86_64_JUMP_SLOT = 7 # Create PLT entry
8
+ R_X86_64_RELATIVE = 8 # Adjust by program base
9
+ R_X86_64_NUM = 43 # This and higher aren't valid
10
+
11
+
12
+ class AMD64ElfRelocator(ElfRelocator):
13
+ arch = platforms.Architecture.X86_64
14
+ byteorder = platforms.Byteorder.LITTLE
15
+
16
+ def _compute_value(self, rela: ElfRela):
17
+ if (
18
+ rela.type == R_X86_64_GLOB_DAT
19
+ or rela.type == R_X86_64_JUMP_SLOT
20
+ or rela.type == R_X86_64_RELATIVE
21
+ ):
22
+ # Different semantics, all behave the same
23
+ val = rela.symbol.value + rela.symbol.baseaddr + rela.addend
24
+ return val.to_bytes(8, "little")
25
+ elif rela.type >= 0 and rela.type < R_X86_64_NUM:
26
+ raise ConfigurationError(
27
+ "Valid, but unsupported relocation for {rela.symbol.name}: {rela.type}"
28
+ )
29
+ else:
30
+ raise ConfigurationError(
31
+ "Invalid relocation type for {rela.symbol.name}: {rela.type}"
32
+ )