angr 9.2.156__cp310-cp310-macosx_11_0_arm64.whl → 9.2.157__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_base.py +87 -71
- angr/analyses/cfg/cfg_fast.py +5 -0
- angr/analyses/decompiler/clinic.py +182 -104
- angr/analyses/decompiler/decompiler.py +11 -0
- angr/analyses/decompiler/dephication/graph_vvar_mapping.py +1 -1
- angr/analyses/decompiler/structured_codegen/c.py +18 -5
- angr/analyses/disassembly.py +5 -11
- angr/analyses/s_propagator.py +2 -4
- angr/analyses/stack_pointer_tracker.py +3 -7
- angr/analyses/typehoon/simple_solver.py +3 -3
- angr/analyses/variable_recovery/engine_base.py +2 -8
- angr/analyses/variable_recovery/variable_recovery.py +4 -3
- angr/calling_conventions.py +3 -3
- angr/engines/hook.py +1 -1
- angr/engines/icicle.py +229 -0
- angr/engines/pcode/behavior.py +1 -4
- angr/engines/pcode/emulate.py +1 -4
- angr/engines/pcode/lifter.py +2 -10
- angr/engines/vex/claripy/irop.py +2 -2
- angr/knowledge_plugins/functions/function.py +18 -10
- angr/knowledge_plugins/functions/function_manager.py +68 -5
- angr/knowledge_plugins/variables/variable_manager.py +15 -3
- angr/lib/angr_native.dylib +0 -0
- angr/rustylib.cpython-310-darwin.so +0 -0
- angr/sim_variable.py +31 -0
- angr/storage/memory_mixins/address_concretization_mixin.py +2 -2
- angr/storage/memory_mixins/convenient_mappings_mixin.py +1 -1
- {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/METADATA +7 -8
- {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/RECORD +34 -34
- {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/WHEEL +1 -1
- angr/rustylib.pyi +0 -165
- {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/entry_points.txt +0 -0
- {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/top_level.txt +0 -0
|
@@ -676,7 +676,7 @@ class SimpleSolver:
|
|
|
676
676
|
|
|
677
677
|
@staticmethod
|
|
678
678
|
def _get_all_paths(
|
|
679
|
-
graph: networkx.DiGraph,
|
|
679
|
+
graph: networkx.DiGraph[TypeVariable | DerivedTypeVariable],
|
|
680
680
|
sketch: Sketch,
|
|
681
681
|
node: DerivedTypeVariable,
|
|
682
682
|
visited: dict[TypeVariable | DerivedTypeVariable, SketchNode],
|
|
@@ -684,7 +684,7 @@ class SimpleSolver:
|
|
|
684
684
|
if node not in graph:
|
|
685
685
|
return
|
|
686
686
|
curr_node = visited[node]
|
|
687
|
-
for _, succ, data in graph.out_edges(node, data=True):
|
|
687
|
+
for _, succ, data in sorted(graph.out_edges(node, data=True), key=lambda x: str(x[1])):
|
|
688
688
|
label = data["label"]
|
|
689
689
|
if succ not in visited:
|
|
690
690
|
if isinstance(curr_node.typevar, DerivedTypeVariable):
|
|
@@ -1408,7 +1408,7 @@ class SimpleSolver:
|
|
|
1408
1408
|
visited.add(curr_node)
|
|
1409
1409
|
|
|
1410
1410
|
out_edges = sketch.graph.out_edges(curr_node, data=True)
|
|
1411
|
-
for _, succ, data in out_edges:
|
|
1411
|
+
for _, succ, data in sorted(out_edges, key=lambda x: str(x[1])):
|
|
1412
1412
|
if isinstance(succ, RecursiveRefNode):
|
|
1413
1413
|
ref = succ
|
|
1414
1414
|
succ: SketchNode | None = sketch.lookup(succ.target) # type: ignore
|
|
@@ -784,20 +784,14 @@ class SimEngineVRBase(
|
|
|
784
784
|
|
|
785
785
|
all_vars = {(0, variable) for variable in variables}
|
|
786
786
|
|
|
787
|
-
all_vars_list =
|
|
787
|
+
all_vars_list = sorted(all_vars, key=lambda val: (val[0], val[1].key), reverse=True)
|
|
788
788
|
|
|
789
789
|
if len(all_vars_list) > 1:
|
|
790
|
-
# sort by some value so that the outcome here isn't random
|
|
791
|
-
cast(list[tuple[int, SimStackVariable]], all_vars_list).sort(
|
|
792
|
-
reverse=True,
|
|
793
|
-
key=lambda val: (val[0], val[1].offset, val[1].base, val[1].base_addr, val[1].size),
|
|
794
|
-
)
|
|
795
|
-
|
|
796
790
|
l.warning(
|
|
797
791
|
"Reading memory with overlapping variables: %s. Ignoring all but the first one.", all_vars_list
|
|
798
792
|
)
|
|
799
793
|
|
|
800
|
-
var_offset, var =
|
|
794
|
+
var_offset, var = all_vars_list[0] # won't fail
|
|
801
795
|
# calculate variable_offset
|
|
802
796
|
if dynamic_offset is None:
|
|
803
797
|
offset_into_variable = var_offset
|
|
@@ -324,14 +324,15 @@ class VariableRecoveryState(VariableRecoveryStateBase):
|
|
|
324
324
|
except SimMemoryMissingError:
|
|
325
325
|
pass
|
|
326
326
|
|
|
327
|
-
|
|
327
|
+
existing_vars_list = sorted(existing_variables, key=lambda val: (val[0], val[1].key), reverse=True)
|
|
328
|
+
if len(existing_vars_list) > 1:
|
|
328
329
|
# create a phi node for all other variables
|
|
329
330
|
l.warning(
|
|
330
331
|
"Reading memory with overlapping variables: %s. Ignoring all but the first one.", existing_variables
|
|
331
332
|
)
|
|
332
333
|
|
|
333
|
-
if
|
|
334
|
-
offset, variable =
|
|
334
|
+
if existing_vars_list:
|
|
335
|
+
offset, variable = existing_vars_list[0]
|
|
335
336
|
self.variable_manager[self.func_addr].read_from(variable, offset, self._codeloc_from_state(state))
|
|
336
337
|
|
|
337
338
|
def _hook_memory_write(self, state):
|
angr/calling_conventions.py
CHANGED
|
@@ -725,7 +725,7 @@ class SimCC:
|
|
|
725
725
|
"""
|
|
726
726
|
session = self.ArgSession(self)
|
|
727
727
|
if self.return_in_implicit_outparam(ret_ty):
|
|
728
|
-
self.next_arg(session, SimTypePointer(SimTypeBottom()))
|
|
728
|
+
self.next_arg(session, SimTypePointer(SimTypeBottom()).with_arch(self.arch))
|
|
729
729
|
return session
|
|
730
730
|
|
|
731
731
|
def return_in_implicit_outparam(self, ty) -> bool: # pylint:disable=unused-argument
|
|
@@ -762,7 +762,7 @@ class SimCC:
|
|
|
762
762
|
assert self.RETURN_VAL is not None
|
|
763
763
|
ptr_loc = self.RETURN_VAL
|
|
764
764
|
else:
|
|
765
|
-
ptr_loc = self.next_arg(self.ArgSession(self), SimTypePointer(SimTypeBottom()))
|
|
765
|
+
ptr_loc = self.next_arg(self.ArgSession(self), SimTypePointer(SimTypeBottom()).with_arch(self.arch))
|
|
766
766
|
return SimReferenceArgument(
|
|
767
767
|
ptr_loc, SimStackArg(0, ty.size // self.arch.byte_width, is_fp=isinstance(ty, SimTypeFloat))
|
|
768
768
|
)
|
|
@@ -1445,7 +1445,7 @@ class SimCCMicrosoftAMD64(SimCC):
|
|
|
1445
1445
|
size = subty.size
|
|
1446
1446
|
if chosen is None:
|
|
1447
1447
|
# fallback to void*
|
|
1448
|
-
chosen = SimTypePointer(SimTypeBottom())
|
|
1448
|
+
chosen = SimTypePointer(SimTypeBottom()).with_arch(self.arch)
|
|
1449
1449
|
return self.return_val(chosen, perspective_returned=perspective_returned)
|
|
1450
1450
|
|
|
1451
1451
|
if not isinstance(ty, SimStruct):
|
angr/engines/hook.py
CHANGED
|
@@ -49,7 +49,7 @@ class HooksMixin(SuccessorsEngine, ProcedureMixin):
|
|
|
49
49
|
|
|
50
50
|
return None
|
|
51
51
|
|
|
52
|
-
def process_successors(self, successors, procedure=None, **kwargs):
|
|
52
|
+
def process_successors(self, successors, *, procedure=None, **kwargs):
|
|
53
53
|
state = self.state
|
|
54
54
|
if procedure is None:
|
|
55
55
|
procedure = self._lookup_hook(state, procedure)
|
angr/engines/icicle.py
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""icicle.py: An angr engine that uses Icicle to execute code."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
import claripy
|
|
10
|
+
import pypcode
|
|
11
|
+
from archinfo import Arch, Endness
|
|
12
|
+
|
|
13
|
+
from angr.engines.failure import SimEngineFailure
|
|
14
|
+
from angr.engines.hook import HooksMixin
|
|
15
|
+
from angr.engines.successors import SuccessorsEngine
|
|
16
|
+
from angr.engines.syscall import SimEngineSyscall
|
|
17
|
+
from angr.rustylib.icicle import Icicle, VmExit, ExceptionCode
|
|
18
|
+
from angr.sim_state import SimState
|
|
19
|
+
|
|
20
|
+
log = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
PROCESSORS_DIR = os.path.join(os.path.dirname(pypcode.__file__), "processors")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class IcicleStateTranslationData:
|
|
28
|
+
"""
|
|
29
|
+
Represents the saved information needed to convert an Icicle state back
|
|
30
|
+
to an angr state.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
base_state: SimState
|
|
34
|
+
registers: set[str]
|
|
35
|
+
writable_pages: set[int]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class IcicleEngine(SuccessorsEngine):
|
|
39
|
+
"""
|
|
40
|
+
An angr engine that uses Icicle to execute concrete states. The purpose of
|
|
41
|
+
this implementation is to provide a high-performance concrete execution
|
|
42
|
+
engine in angr. While historically, angr has focused on symbolic execution,
|
|
43
|
+
better support for concrete execution enables new use cases such as fuzzing
|
|
44
|
+
in angr. This is ideal for testing bespoke binary targets, such as
|
|
45
|
+
microcontroller firmware, which may be difficult to correctly harness for
|
|
46
|
+
use with traditional fuzzing engines.
|
|
47
|
+
|
|
48
|
+
This class is the base class for the Icicle engine. It implements execution
|
|
49
|
+
by creating an Icicle instance, copying the state from angr to Icicle, and then
|
|
50
|
+
running the Icicle instance. The results are then copied back to the angr
|
|
51
|
+
state. It is likely the case that this can be improved by re-using the Icicle
|
|
52
|
+
instance across multiple runs and only copying the state when necessary.
|
|
53
|
+
|
|
54
|
+
For a more complete implementation, use the UberIcicleEngine class, which
|
|
55
|
+
intends to provide a more complete set of features, such as hooks and syscalls.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def __make_icicle_arch(arch: Arch) -> str | None:
|
|
60
|
+
"""
|
|
61
|
+
Convert an angr architecture to an Icicle architecture. Not particularly
|
|
62
|
+
accurate, just a set of heuristics to get the right architecture. When
|
|
63
|
+
adding a new architecture, this function may need to be updated.
|
|
64
|
+
"""
|
|
65
|
+
if arch.linux_name == "arm":
|
|
66
|
+
return "armv7a" if arch.memory_endness == Endness.LE else "armeb"
|
|
67
|
+
return arch.linux_name
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def __is_arm(icicle_arch: str) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
Check if the architecture is arm based on the address.
|
|
73
|
+
"""
|
|
74
|
+
return icicle_arch.startswith(("arm", "thumb"))
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def __is_thumb(icicle_arch: str, addr: int) -> bool:
|
|
78
|
+
"""
|
|
79
|
+
Check if the architecture is thumb based on the address.
|
|
80
|
+
"""
|
|
81
|
+
return IcicleEngine.__is_arm(icicle_arch) and addr & 1 == 1
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def __get_pages(state: SimState) -> set[int]:
|
|
85
|
+
"""
|
|
86
|
+
Unfortunately, the memory model doesn't have a way to get all pages.
|
|
87
|
+
Instead, we can get all of the backers from the loader, then all of the
|
|
88
|
+
pages from the PagedMemoryMixin and then do some math.
|
|
89
|
+
"""
|
|
90
|
+
pages = set()
|
|
91
|
+
page_size = state.memory.page_size
|
|
92
|
+
|
|
93
|
+
# pages from loader segments
|
|
94
|
+
proj = state.project
|
|
95
|
+
if proj is not None:
|
|
96
|
+
for addr, backer in proj.loader.memory.backers():
|
|
97
|
+
start = addr // page_size
|
|
98
|
+
end = (addr + len(backer) - 1) // page_size
|
|
99
|
+
pages.update(range(start, end + 1))
|
|
100
|
+
|
|
101
|
+
# pages from the memory model
|
|
102
|
+
pages.update(state.memory._pages)
|
|
103
|
+
|
|
104
|
+
return pages
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def __convert_angr_state_to_icicle(state: SimState) -> tuple[Icicle, IcicleStateTranslationData]:
|
|
108
|
+
icicle_arch = IcicleEngine.__make_icicle_arch(state.arch)
|
|
109
|
+
if icicle_arch is None:
|
|
110
|
+
raise ValueError("Unsupported architecture")
|
|
111
|
+
|
|
112
|
+
proj = state.project
|
|
113
|
+
if proj is None:
|
|
114
|
+
raise ValueError("IcicleEngine requires a project to be set")
|
|
115
|
+
|
|
116
|
+
emu = Icicle(icicle_arch, PROCESSORS_DIR)
|
|
117
|
+
|
|
118
|
+
copied_registers = set()
|
|
119
|
+
|
|
120
|
+
# To create a state in Icicle, we need to do the following:
|
|
121
|
+
# 1. Copy the register values
|
|
122
|
+
for register in state.arch.register_list:
|
|
123
|
+
register = register.vex_name.lower() if register.vex_name is not None else register.name
|
|
124
|
+
try:
|
|
125
|
+
emu.reg_write(register, state.solver.eval(state.registers.load(register), cast_to=int))
|
|
126
|
+
copied_registers.add(register)
|
|
127
|
+
except KeyError:
|
|
128
|
+
log.debug("Register %s not found in icicle", register)
|
|
129
|
+
|
|
130
|
+
# Unset the thumb bit if necessary
|
|
131
|
+
if IcicleEngine.__is_thumb(icicle_arch, state.addr):
|
|
132
|
+
emu.pc = state.addr & ~1
|
|
133
|
+
emu.isa_mode = 1
|
|
134
|
+
elif "arm" in icicle_arch: # Hack to work around us calling it r15t
|
|
135
|
+
emu.pc = state.addr
|
|
136
|
+
|
|
137
|
+
# Special case for x86 gs register
|
|
138
|
+
if state.arch.name == "X86":
|
|
139
|
+
emu.reg_write("GS_OFFSET", state.registers.load("gs").concrete_value << 16)
|
|
140
|
+
|
|
141
|
+
# 2. Copy the memory contents
|
|
142
|
+
|
|
143
|
+
mapped_pages = IcicleEngine.__get_pages(state)
|
|
144
|
+
writable_pages = set()
|
|
145
|
+
for page_num in mapped_pages:
|
|
146
|
+
addr = page_num * state.memory.page_size
|
|
147
|
+
size = state.memory.page_size
|
|
148
|
+
perm_bits = state.memory.permissions(addr).concrete_value
|
|
149
|
+
emu.mem_map(addr, size, perm_bits)
|
|
150
|
+
memory = state.memory.concrete_load(addr, size)
|
|
151
|
+
emu.mem_write(addr, memory)
|
|
152
|
+
|
|
153
|
+
if perm_bits & 2:
|
|
154
|
+
writable_pages.add(page_num)
|
|
155
|
+
|
|
156
|
+
# Add breakpoints for simprocedures
|
|
157
|
+
for addr in proj._sim_procedures:
|
|
158
|
+
emu.add_breakpoint(addr)
|
|
159
|
+
|
|
160
|
+
translation_data = IcicleStateTranslationData(
|
|
161
|
+
base_state=state,
|
|
162
|
+
registers=copied_registers,
|
|
163
|
+
writable_pages=writable_pages,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
return (emu, translation_data)
|
|
167
|
+
|
|
168
|
+
@staticmethod
|
|
169
|
+
def __convert_icicle_state_to_angr(emu: Icicle, translation_data: IcicleStateTranslationData) -> SimState:
|
|
170
|
+
state = translation_data.base_state.copy()
|
|
171
|
+
|
|
172
|
+
# 1. Copy the register values
|
|
173
|
+
for register in translation_data.registers:
|
|
174
|
+
state.registers.store(register, emu.reg_read(register))
|
|
175
|
+
|
|
176
|
+
if IcicleEngine.__is_arm(emu.architecture): # Hack to work around us calling it r15t
|
|
177
|
+
state.registers.store("pc", (emu.pc | 1) if emu.isa_mode == 1 else emu.pc)
|
|
178
|
+
|
|
179
|
+
# 2. Copy the memory contents
|
|
180
|
+
for page_num in translation_data.writable_pages:
|
|
181
|
+
addr = page_num * state.memory.page_size
|
|
182
|
+
state.memory.store(addr, emu.mem_read(addr, state.memory.page_size))
|
|
183
|
+
|
|
184
|
+
return state
|
|
185
|
+
|
|
186
|
+
def process_successors(self, successors, *, num_inst=0, **kwargs):
|
|
187
|
+
if len(kwargs) > 0:
|
|
188
|
+
log.warning("IcicleEngine.process_successors received unknown kwargs: %s", kwargs)
|
|
189
|
+
|
|
190
|
+
emu, translation_data = self.__convert_angr_state_to_icicle(self.state)
|
|
191
|
+
|
|
192
|
+
if num_inst > 0:
|
|
193
|
+
emu.icount_limit = num_inst
|
|
194
|
+
|
|
195
|
+
status = emu.run() # pylint: ignore=assignment-from-no-return (pylint bug)
|
|
196
|
+
exc = emu.exception_code
|
|
197
|
+
|
|
198
|
+
if status == VmExit.UnhandledException:
|
|
199
|
+
if exc in (
|
|
200
|
+
ExceptionCode.ReadUnmapped,
|
|
201
|
+
ExceptionCode.ReadPerm,
|
|
202
|
+
ExceptionCode.WriteUnmapped,
|
|
203
|
+
ExceptionCode.WritePerm,
|
|
204
|
+
ExceptionCode.ExecViolation,
|
|
205
|
+
):
|
|
206
|
+
jumpkind = "Ijk_SigSEGV"
|
|
207
|
+
elif exc == ExceptionCode.Syscall:
|
|
208
|
+
jumpkind = "Ijk_Syscall"
|
|
209
|
+
elif exc == ExceptionCode.Halt:
|
|
210
|
+
jumpkind = "Ijk_Exit"
|
|
211
|
+
elif exc == ExceptionCode.InvalidInstruction:
|
|
212
|
+
jumpkind = "Ijk_NoDecode"
|
|
213
|
+
else:
|
|
214
|
+
jumpkind = "Ijk_EmFail"
|
|
215
|
+
else:
|
|
216
|
+
jumpkind = "Ijk_Boring"
|
|
217
|
+
|
|
218
|
+
successor_state = IcicleEngine.__convert_icicle_state_to_angr(emu, translation_data)
|
|
219
|
+
successors.add_successor(successor_state, successor_state.ip, claripy.true(), jumpkind, add_guard=False)
|
|
220
|
+
|
|
221
|
+
successors.processed = True
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class UberIcicleEngine(SimEngineFailure, SimEngineSyscall, HooksMixin, IcicleEngine):
|
|
225
|
+
"""
|
|
226
|
+
An extension of the IcicleEngine that uses mixins to add support for
|
|
227
|
+
syscalls and hooks. Most users will prefer to use this engine instead of the
|
|
228
|
+
IcicleEngine directly.
|
|
229
|
+
"""
|
angr/engines/pcode/behavior.py
CHANGED
|
@@ -4,13 +4,10 @@ from collections.abc import Callable, Iterable
|
|
|
4
4
|
|
|
5
5
|
import claripy
|
|
6
6
|
from claripy.ast.bv import BV
|
|
7
|
+
from pypcode import OpCode
|
|
7
8
|
|
|
8
9
|
from angr.errors import AngrError
|
|
9
10
|
|
|
10
|
-
try:
|
|
11
|
-
from pypcode import OpCode
|
|
12
|
-
except ImportError:
|
|
13
|
-
OpCode = None
|
|
14
11
|
|
|
15
12
|
# pylint:disable=abstract-method
|
|
16
13
|
|
angr/engines/pcode/emulate.py
CHANGED
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
|
|
4
4
|
import claripy
|
|
5
5
|
from claripy.ast.bv import BV
|
|
6
|
+
from pypcode import OpCode, Varnode, PcodeOp
|
|
6
7
|
|
|
7
8
|
from angr.engines.engine import SimEngine
|
|
8
9
|
from angr.utils.constants import DEFAULT_STATEMENT
|
|
@@ -10,10 +11,6 @@ from .lifter import IRSB
|
|
|
10
11
|
from .behavior import OpBehavior
|
|
11
12
|
from angr.errors import AngrError
|
|
12
13
|
from angr.state_plugins.inspect import BP_BEFORE, BP_AFTER
|
|
13
|
-
import contextlib
|
|
14
|
-
|
|
15
|
-
with contextlib.suppress(ImportError):
|
|
16
|
-
from pypcode import OpCode, Varnode, PcodeOp
|
|
17
14
|
|
|
18
15
|
|
|
19
16
|
l = logging.getLogger(__name__)
|
angr/engines/pcode/lifter.py
CHANGED
|
@@ -11,8 +11,9 @@ from typing import Any, TYPE_CHECKING
|
|
|
11
11
|
from collections.abc import Iterable, Sequence
|
|
12
12
|
|
|
13
13
|
import archinfo
|
|
14
|
-
from archinfo import ArchARM, ArchPcode
|
|
15
14
|
import cle
|
|
15
|
+
import pypcode
|
|
16
|
+
from archinfo import ArchARM, ArchPcode
|
|
16
17
|
from cachetools import LRUCache
|
|
17
18
|
|
|
18
19
|
# FIXME: Reusing these errors from pyvex for compatibility. Eventually these
|
|
@@ -28,16 +29,7 @@ from angr.errors import SimEngineError, SimTranslationError, SimError
|
|
|
28
29
|
from angr import sim_options as o
|
|
29
30
|
from angr.block import DisassemblerBlock, DisassemblerInsn
|
|
30
31
|
|
|
31
|
-
|
|
32
|
-
try:
|
|
33
|
-
import pypcode
|
|
34
|
-
except ImportError:
|
|
35
|
-
pypcode = None
|
|
36
|
-
|
|
37
|
-
|
|
38
32
|
if TYPE_CHECKING:
|
|
39
|
-
# this is to make pyright happy; otherwise it believes pypcode is None
|
|
40
|
-
import pypcode
|
|
41
33
|
from pypcode import PcodeOp, Context
|
|
42
34
|
|
|
43
35
|
|
angr/engines/vex/claripy/irop.py
CHANGED
|
@@ -982,10 +982,10 @@ class SimIROp:
|
|
|
982
982
|
return self._fp_vector_comparison(claripy.fpEQ, a0, a1)
|
|
983
983
|
|
|
984
984
|
def _op_fgeneric_CmpLE(self, a0, a1):
|
|
985
|
-
return self._fp_vector_comparison(claripy.
|
|
985
|
+
return self._fp_vector_comparison(claripy.fpLEQ, a0, a1)
|
|
986
986
|
|
|
987
987
|
def _op_fgeneric_CmpLT(self, a0, a1):
|
|
988
|
-
return self._fp_vector_comparison(claripy.
|
|
988
|
+
return self._fp_vector_comparison(claripy.fpLT, a0, a1)
|
|
989
989
|
|
|
990
990
|
def _auto_vectorize(self, f, args, rm=None, rm_passed=False):
|
|
991
991
|
if rm is not None:
|
|
@@ -206,14 +206,17 @@ class Function(Serializable):
|
|
|
206
206
|
if is_plt is not None:
|
|
207
207
|
self.is_plt = is_plt
|
|
208
208
|
else:
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
|
|
209
|
+
if self._function_manager is not None:
|
|
210
|
+
# use the faster cached version
|
|
211
|
+
self.is_plt = self._function_manager.is_plt_cached(addr)
|
|
212
|
+
else:
|
|
213
|
+
# Whether this function is a PLT entry or not is primarily relying on the PLT detection in CLE; it may
|
|
214
|
+
# also be updated (to True) during CFG recovery.
|
|
215
|
+
if self.project is None:
|
|
216
|
+
raise ValueError(
|
|
217
|
+
"'is_plt' must be specified if you do not specify a function manager for this new function."
|
|
218
|
+
)
|
|
219
|
+
self.is_plt = self.project.loader.find_plt_stub_name(addr) is not None
|
|
217
220
|
|
|
218
221
|
# Determine the name of this function
|
|
219
222
|
if name is None:
|
|
@@ -726,8 +729,13 @@ class Function(Serializable):
|
|
|
726
729
|
if hooker is not None:
|
|
727
730
|
binary_name = hooker.library_name
|
|
728
731
|
|
|
729
|
-
if binary_name is None
|
|
730
|
-
|
|
732
|
+
if binary_name is None:
|
|
733
|
+
if self._function_manager is not None:
|
|
734
|
+
# use the faster cached version
|
|
735
|
+
binary_name = self._function_manager.get_binary_name_cached(self.addr)
|
|
736
|
+
else:
|
|
737
|
+
if self.binary is not None and self.binary.binary:
|
|
738
|
+
binary_name = os.path.basename(self.binary.binary)
|
|
731
739
|
|
|
732
740
|
return binary_name
|
|
733
741
|
|
|
@@ -7,11 +7,14 @@ import logging
|
|
|
7
7
|
import collections.abc
|
|
8
8
|
import re
|
|
9
9
|
import weakref
|
|
10
|
+
import bisect
|
|
11
|
+
import os
|
|
10
12
|
from sortedcontainers import SortedDict
|
|
11
13
|
|
|
12
14
|
import networkx
|
|
13
15
|
|
|
14
16
|
from archinfo.arch_soot import SootMethodDescriptor
|
|
17
|
+
import cle
|
|
15
18
|
|
|
16
19
|
from angr.errors import SimEngineError
|
|
17
20
|
from angr.knowledge_plugins.plugin import KnowledgeBasePlugin
|
|
@@ -49,7 +52,8 @@ class FunctionDict(SortedDict):
|
|
|
49
52
|
t = Function(self._backref, addr)
|
|
50
53
|
with contextlib.suppress(Exception):
|
|
51
54
|
self[addr] = t
|
|
52
|
-
self._backref
|
|
55
|
+
if self._backref is not None:
|
|
56
|
+
self._backref._function_added(t)
|
|
53
57
|
return t
|
|
54
58
|
|
|
55
59
|
def get(self, addr):
|
|
@@ -85,7 +89,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
85
89
|
super().__init__(kb=kb)
|
|
86
90
|
self.function_address_types = self._kb._project.arch.function_address_types
|
|
87
91
|
self.address_types = self._kb._project.arch.address_types
|
|
88
|
-
self._function_map:
|
|
92
|
+
self._function_map: FunctionDict[int, Function] = FunctionDict(self, key_types=self.function_address_types)
|
|
89
93
|
self.function_addrs_set: set = set()
|
|
90
94
|
self.callgraph = networkx.MultiDiGraph()
|
|
91
95
|
self.block_map = {}
|
|
@@ -93,6 +97,12 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
93
97
|
# Registers used for passing arguments around
|
|
94
98
|
self._arg_registers = kb._project.arch.argument_registers
|
|
95
99
|
|
|
100
|
+
# local PLT dictionary cache
|
|
101
|
+
self._rplt_cache_ranges: None | list[tuple[int, int]] = None
|
|
102
|
+
self._rplt_cache: None | set[int] = None
|
|
103
|
+
# local binary name cache: min_addr -> (max_addr, binary_name)
|
|
104
|
+
self._binname_cache: None | SortedDict[int, tuple[int, str | None]] = None
|
|
105
|
+
|
|
96
106
|
def __setstate__(self, state):
|
|
97
107
|
self._kb = state["_kb"]
|
|
98
108
|
self.function_address_types = state["function_address_types"]
|
|
@@ -101,7 +111,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
101
111
|
self.callgraph = state["callgraph"]
|
|
102
112
|
self.block_map = state["block_map"]
|
|
103
113
|
|
|
104
|
-
self._function_map._backref = self
|
|
114
|
+
self._function_map._backref = weakref.proxy(self)
|
|
105
115
|
for func in self._function_map.values():
|
|
106
116
|
func._function_manager = self
|
|
107
117
|
|
|
@@ -131,18 +141,71 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
131
141
|
self.callgraph = networkx.MultiDiGraph()
|
|
132
142
|
self.block_map.clear()
|
|
133
143
|
self.function_addrs_set = set()
|
|
144
|
+
# cache
|
|
145
|
+
self._rplt_cache = None
|
|
146
|
+
self._rplt_cache_ranges = None
|
|
147
|
+
self._binname_cache = None
|
|
134
148
|
|
|
135
|
-
def
|
|
149
|
+
def _genenate_callmap_sif(self, filepath):
|
|
136
150
|
"""
|
|
137
151
|
Generate a sif file from the call map.
|
|
138
152
|
|
|
139
153
|
:param filepath: Path of the sif file
|
|
140
154
|
:return: None
|
|
141
155
|
"""
|
|
142
|
-
with open(filepath, "
|
|
156
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
143
157
|
for src, dst in self.callgraph.edges():
|
|
144
158
|
f.write(f"{src:#x}\tDirectEdge\t{dst:#x}\n")
|
|
145
159
|
|
|
160
|
+
def _addr_in_plt_cached_ranges(self, addr: int) -> bool:
|
|
161
|
+
if self._rplt_cache_ranges is None:
|
|
162
|
+
return False
|
|
163
|
+
pos = bisect.bisect_left(self._rplt_cache_ranges, addr, key=lambda x: x[0])
|
|
164
|
+
return pos > 0 and self._rplt_cache_ranges[pos - 1][0] <= addr < self._rplt_cache_ranges[pos - 1][1]
|
|
165
|
+
|
|
166
|
+
def is_plt_cached(self, addr: int) -> bool:
|
|
167
|
+
# check if the addr is in the cache range
|
|
168
|
+
if not self._addr_in_plt_cached_ranges(addr):
|
|
169
|
+
# find the object containing this addr
|
|
170
|
+
obj = self._kb._project.loader.find_object_containing(addr, membership_check=False)
|
|
171
|
+
if obj is None:
|
|
172
|
+
return False
|
|
173
|
+
if self._rplt_cache_ranges is None:
|
|
174
|
+
self._rplt_cache_ranges = []
|
|
175
|
+
obj_range = obj.min_addr, obj.max_addr
|
|
176
|
+
idx = bisect.bisect_left(self._rplt_cache_ranges, obj_range)
|
|
177
|
+
if not (idx < len(self._rplt_cache_ranges) and self._rplt_cache_ranges[idx] == obj_range):
|
|
178
|
+
self._rplt_cache_ranges.insert(idx, obj_range)
|
|
179
|
+
if isinstance(obj, cle.MetaELF):
|
|
180
|
+
if self._rplt_cache is None:
|
|
181
|
+
self._rplt_cache = set()
|
|
182
|
+
self._rplt_cache |= set(obj.reverse_plt)
|
|
183
|
+
|
|
184
|
+
return addr in self._rplt_cache if self._rplt_cache is not None else False
|
|
185
|
+
|
|
186
|
+
def _binname_cache_get_addr_base(self, addr: int) -> int | None:
|
|
187
|
+
if self._binname_cache is None:
|
|
188
|
+
return None
|
|
189
|
+
try:
|
|
190
|
+
base_addr = next(self._binname_cache.irange(maximum=addr, reverse=True))
|
|
191
|
+
except StopIteration:
|
|
192
|
+
return None
|
|
193
|
+
return base_addr if base_addr <= addr < self._binname_cache[base_addr][0] else None
|
|
194
|
+
|
|
195
|
+
def get_binary_name_cached(self, addr: int) -> str | None:
|
|
196
|
+
base_addr = self._binname_cache_get_addr_base(addr)
|
|
197
|
+
if base_addr is None:
|
|
198
|
+
# not cached; cache it first
|
|
199
|
+
obj = self._kb._project.loader.find_object_containing(addr, membership_check=False)
|
|
200
|
+
if obj is None:
|
|
201
|
+
return None
|
|
202
|
+
if self._binname_cache is None:
|
|
203
|
+
self._binname_cache = SortedDict()
|
|
204
|
+
binary_basename = os.path.basename(obj.binary) if obj.binary else None
|
|
205
|
+
self._binname_cache[obj.min_addr] = obj.max_addr, binary_basename
|
|
206
|
+
base_addr = obj.min_addr
|
|
207
|
+
return self._binname_cache[base_addr][1] if self._binname_cache is not None else None
|
|
208
|
+
|
|
146
209
|
def _add_node(self, function_addr, node, syscall=None, size=None):
|
|
147
210
|
if isinstance(node, self.address_types):
|
|
148
211
|
node = self._kb._project.factory.snippet(node, size=size)
|
|
@@ -582,7 +582,7 @@ class VariableManagerInternal(Serializable):
|
|
|
582
582
|
return phi
|
|
583
583
|
|
|
584
584
|
# allocate a new phi variable
|
|
585
|
-
repre =
|
|
585
|
+
repre = sorted(variables, key=lambda val: val.key)[0]
|
|
586
586
|
repre_type = type(repre)
|
|
587
587
|
repre_size = max(var.size for var in variables)
|
|
588
588
|
if repre_type is SimRegisterVariable:
|
|
@@ -635,7 +635,13 @@ class VariableManagerInternal(Serializable):
|
|
|
635
635
|
return loc in self._variable_to_stmt[variable]
|
|
636
636
|
|
|
637
637
|
def find_variable_by_stmt(self, block_addr, stmt_idx, sort, block_idx: int | None = None):
|
|
638
|
-
|
|
638
|
+
variables = sorted(
|
|
639
|
+
self.find_variables_by_stmt(block_addr, stmt_idx, sort, block_idx=block_idx),
|
|
640
|
+
key=lambda var: (var[1], var[0].key),
|
|
641
|
+
)
|
|
642
|
+
if variables:
|
|
643
|
+
return variables[0]
|
|
644
|
+
return None
|
|
639
645
|
|
|
640
646
|
def find_variables_by_stmt(
|
|
641
647
|
self, block_addr: int, stmt_idx: int, sort: str, block_idx: int | None = None
|
|
@@ -667,7 +673,13 @@ class VariableManagerInternal(Serializable):
|
|
|
667
673
|
return var_and_offsets
|
|
668
674
|
|
|
669
675
|
def find_variable_by_atom(self, block_addr, stmt_idx, atom, block_idx: int | None = None):
|
|
670
|
-
|
|
676
|
+
variables = sorted(
|
|
677
|
+
self.find_variables_by_atom(block_addr, stmt_idx, atom, block_idx=block_idx),
|
|
678
|
+
key=lambda val: (val[1], val[0].key),
|
|
679
|
+
)
|
|
680
|
+
if variables:
|
|
681
|
+
return variables[0]
|
|
682
|
+
return None
|
|
671
683
|
|
|
672
684
|
def find_variables_by_atom(
|
|
673
685
|
self, block_addr, stmt_idx, atom, block_idx: int | None = None
|
angr/lib/angr_native.dylib
CHANGED
|
Binary file
|
|
Binary file
|