angr 9.2.133__py3-none-win_amd64.whl → 9.2.135__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/__init__.py +2 -1
- angr/analyses/calling_convention/__init__.py +6 -0
- angr/analyses/{calling_convention.py → calling_convention/calling_convention.py} +28 -61
- angr/analyses/calling_convention/fact_collector.py +503 -0
- angr/analyses/calling_convention/utils.py +57 -0
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -6
- angr/analyses/complete_calling_conventions.py +32 -3
- angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +0 -6
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +1 -6
- angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +0 -6
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +0 -6
- angr/analyses/decompiler/structured_codegen/c.py +15 -5
- angr/analyses/variable_recovery/engine_vex.py +5 -0
- angr/calling_conventions.py +12 -4
- angr/knowledge_plugins/functions/function.py +4 -4
- angr/knowledge_plugins/functions/function_manager.py +6 -0
- angr/lib/angr_native.dll +0 -0
- angr/storage/memory_mixins/name_resolution_mixin.py +1 -1
- angr/utils/bits.py +13 -0
- {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/METADATA +6 -6
- {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/RECORD +26 -23
- {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/LICENSE +0 -0
- {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/WHEEL +0 -0
- {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/entry_points.txt +0 -0
- {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import pyvex
|
|
5
|
+
import claripy
|
|
6
|
+
|
|
7
|
+
from angr.utils.bits import s2u, u2s
|
|
8
|
+
from angr.block import Block
|
|
9
|
+
from angr.analyses.analysis import Analysis
|
|
10
|
+
from angr.analyses import AnalysesHub
|
|
11
|
+
from angr.knowledge_plugins.functions import Function
|
|
12
|
+
from angr.codenode import BlockNode, HookNode
|
|
13
|
+
from angr.engines.light import SimEngineNostmtVEX, SimEngineLight, SpOffset, RegisterOffset
|
|
14
|
+
from angr.calling_conventions import SimRegArg, SimStackArg, default_cc
|
|
15
|
+
from angr.sim_type import SimTypeBottom
|
|
16
|
+
from .utils import is_sane_register_variable
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FactCollectorState:
|
|
20
|
+
"""
|
|
21
|
+
The abstract state for FactCollector.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
__slots__ = (
|
|
25
|
+
"bp_value",
|
|
26
|
+
"callee_stored_regs",
|
|
27
|
+
"reg_reads",
|
|
28
|
+
"reg_writes",
|
|
29
|
+
"simple_stack",
|
|
30
|
+
"sp_value",
|
|
31
|
+
"stack_reads",
|
|
32
|
+
"stack_writes",
|
|
33
|
+
"tmps",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def __init__(self):
|
|
37
|
+
self.tmps = {}
|
|
38
|
+
self.simple_stack = {}
|
|
39
|
+
|
|
40
|
+
self.callee_stored_regs: dict[int, int] = {} # reg offset -> stack offset
|
|
41
|
+
self.reg_reads = {}
|
|
42
|
+
self.reg_writes: set[int] = set()
|
|
43
|
+
self.stack_reads = {}
|
|
44
|
+
self.stack_writes: set[int] = set()
|
|
45
|
+
self.sp_value = 0
|
|
46
|
+
self.bp_value = 0
|
|
47
|
+
|
|
48
|
+
def register_read(self, offset: int, size_in_bytes: int):
|
|
49
|
+
if offset in self.reg_writes:
|
|
50
|
+
return
|
|
51
|
+
if offset not in self.reg_reads:
|
|
52
|
+
self.reg_reads[offset] = size_in_bytes
|
|
53
|
+
else:
|
|
54
|
+
self.reg_reads[offset] = max(self.reg_reads[offset], size_in_bytes)
|
|
55
|
+
|
|
56
|
+
def register_written(self, offset: int, size_in_bytes: int):
|
|
57
|
+
for o in range(size_in_bytes):
|
|
58
|
+
self.reg_writes.add(offset + o)
|
|
59
|
+
|
|
60
|
+
def stack_read(self, offset: int, size_in_bytes: int):
|
|
61
|
+
if offset in self.stack_writes:
|
|
62
|
+
return
|
|
63
|
+
if offset not in self.stack_reads:
|
|
64
|
+
self.stack_reads[offset] = size_in_bytes
|
|
65
|
+
else:
|
|
66
|
+
self.stack_reads[offset] = max(self.stack_reads[offset], size_in_bytes)
|
|
67
|
+
|
|
68
|
+
def stack_written(self, offset: int, size_int_bytes: int):
|
|
69
|
+
for o in range(size_int_bytes):
|
|
70
|
+
self.stack_writes.add(offset + o)
|
|
71
|
+
|
|
72
|
+
def copy(self, with_tmps: bool = False) -> FactCollectorState:
|
|
73
|
+
new_state = FactCollectorState()
|
|
74
|
+
new_state.reg_reads = self.reg_reads.copy()
|
|
75
|
+
new_state.stack_reads = self.stack_reads.copy()
|
|
76
|
+
new_state.stack_writes = self.stack_writes.copy()
|
|
77
|
+
new_state.reg_writes = self.reg_writes.copy()
|
|
78
|
+
new_state.callee_stored_regs = self.callee_stored_regs.copy()
|
|
79
|
+
new_state.sp_value = self.sp_value
|
|
80
|
+
new_state.bp_value = self.bp_value
|
|
81
|
+
new_state.simple_stack = self.simple_stack.copy()
|
|
82
|
+
if with_tmps:
|
|
83
|
+
new_state.tmps = self.tmps.copy()
|
|
84
|
+
return new_state
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactCollectorState].binop_handler
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class SimEngineFactCollectorVEX(
|
|
91
|
+
SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
|
|
92
|
+
SimEngineLight[type[FactCollectorState], SpOffset | RegisterOffset | int, Block, None],
|
|
93
|
+
):
|
|
94
|
+
"""
|
|
95
|
+
THe engine for FactCollector.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
def __init__(self, project, bp_as_gpr: bool):
|
|
99
|
+
self.bp_as_gpr = bp_as_gpr
|
|
100
|
+
super().__init__(project)
|
|
101
|
+
|
|
102
|
+
def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
|
|
103
|
+
if self.block.vex.jumpkind == "Ijk_Call":
|
|
104
|
+
self.state.register_written(self.arch.ret_offset, self.arch.bytes)
|
|
105
|
+
|
|
106
|
+
def _top(self, bits: int):
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
def _is_top(self, expr: Any) -> bool:
|
|
110
|
+
raise NotImplementedError
|
|
111
|
+
|
|
112
|
+
def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def _handle_stmt_Put(self, stmt):
|
|
116
|
+
v = self._expr(stmt.data)
|
|
117
|
+
if stmt.offset == self.arch.sp_offset and isinstance(v, SpOffset):
|
|
118
|
+
self.state.sp_value = v.offset
|
|
119
|
+
elif stmt.offset == self.arch.bp_offset and isinstance(v, SpOffset):
|
|
120
|
+
self.state.bp_value = v.offset
|
|
121
|
+
else:
|
|
122
|
+
self.state.register_written(stmt.offset, stmt.data.result_size(self.tyenv) // self.arch.byte_width)
|
|
123
|
+
|
|
124
|
+
def _handle_stmt_Store(self, stmt: pyvex.IRStmt.Store):
|
|
125
|
+
addr = self._expr(stmt.addr)
|
|
126
|
+
if isinstance(addr, SpOffset):
|
|
127
|
+
self.state.stack_written(addr.offset, stmt.data.result_size(self.tyenv) // self.arch.byte_width)
|
|
128
|
+
data = self._expr(stmt.data)
|
|
129
|
+
if isinstance(data, RegisterOffset) and not isinstance(data, SpOffset):
|
|
130
|
+
# push reg; we record the stored register as well as the stack slot offset
|
|
131
|
+
self.state.callee_stored_regs[data.reg] = u2s(addr.offset, self.arch.bits)
|
|
132
|
+
if isinstance(data, SpOffset):
|
|
133
|
+
self.state.simple_stack[addr.offset] = data
|
|
134
|
+
|
|
135
|
+
def _handle_stmt_WrTmp(self, stmt: pyvex.IRStmt.WrTmp):
|
|
136
|
+
v = self._expr(stmt.data)
|
|
137
|
+
if v is not None:
|
|
138
|
+
self.state.tmps[stmt.tmp] = v
|
|
139
|
+
|
|
140
|
+
def _handle_expr_Const(self, expr: pyvex.IRExpr.Const):
|
|
141
|
+
return expr.con.value
|
|
142
|
+
|
|
143
|
+
def _handle_expr_GSPTR(self, expr):
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
def _handle_expr_Get(self, expr) -> SpOffset | None:
|
|
147
|
+
if expr.offset == self.arch.sp_offset:
|
|
148
|
+
return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
|
|
149
|
+
if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
|
|
150
|
+
return SpOffset(self.arch.bits, self.state.bp_value, is_base=False)
|
|
151
|
+
bits = expr.result_size(self.tyenv)
|
|
152
|
+
self.state.register_read(expr.offset, bits // self.arch.byte_width)
|
|
153
|
+
return RegisterOffset(bits, expr.offset, 0)
|
|
154
|
+
|
|
155
|
+
def _handle_expr_GetI(self, expr):
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
def _handle_expr_ITE(self, expr):
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
def _handle_expr_Load(self, expr):
|
|
162
|
+
addr = self._expr(expr.addr)
|
|
163
|
+
if isinstance(addr, SpOffset):
|
|
164
|
+
self.state.stack_read(addr.offset, expr.result_size(self.tyenv) // self.arch.byte_width)
|
|
165
|
+
return self.state.simple_stack.get(addr.offset)
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
def _handle_expr_RdTmp(self, expr):
|
|
169
|
+
return self.state.tmps.get(expr.tmp, None)
|
|
170
|
+
|
|
171
|
+
def _handle_expr_VECRET(self, expr):
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
@binop_handler
|
|
175
|
+
def _handle_binop_Add(self, expr):
|
|
176
|
+
op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
|
|
177
|
+
if isinstance(op0, SpOffset) and isinstance(op1, int):
|
|
178
|
+
return SpOffset(op0.bits, s2u(op0.offset + op1, op0.bits), is_base=op0.is_base)
|
|
179
|
+
if isinstance(op1, SpOffset) and isinstance(op0, int):
|
|
180
|
+
return SpOffset(op1.bits, s2u(op1.offset + op0, op1.bits), is_base=op1.is_base)
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
@binop_handler
|
|
184
|
+
def _handle_binop_Sub(self, expr):
|
|
185
|
+
op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
|
|
186
|
+
if isinstance(op0, SpOffset) and isinstance(op1, int):
|
|
187
|
+
return SpOffset(op0.bits, s2u(op0.offset - op1, op0.bits), is_base=op0.is_base)
|
|
188
|
+
if isinstance(op1, SpOffset) and isinstance(op0, int):
|
|
189
|
+
return SpOffset(op1.bits, s2u(op1.offset - op0, op1.bits), is_base=op1.is_base)
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
@binop_handler
|
|
193
|
+
def _handle_binop_And(self, expr):
|
|
194
|
+
op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
|
|
195
|
+
if isinstance(op0, SpOffset):
|
|
196
|
+
return op0
|
|
197
|
+
if isinstance(op1, SpOffset):
|
|
198
|
+
return op1
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class FactCollector(Analysis):
|
|
203
|
+
"""
|
|
204
|
+
An extremely fast analysis that extracts necessary facts of a function for CallingConventionAnalysis to make
|
|
205
|
+
decision on the calling convention and prototype of a function.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def __init__(self, func: Function, max_depth: int = 5):
|
|
209
|
+
self.function = func
|
|
210
|
+
self._max_depth = max_depth
|
|
211
|
+
|
|
212
|
+
self.input_args: list[SimRegArg | SimStackArg] | None = None
|
|
213
|
+
self.retval_size: int | None = None
|
|
214
|
+
|
|
215
|
+
self._analyze()
|
|
216
|
+
|
|
217
|
+
def _analyze(self):
|
|
218
|
+
# breadth-first search using function graph, collect registers and stack variables that are written to as well
|
|
219
|
+
# as read from, until max_depth is reached
|
|
220
|
+
|
|
221
|
+
end_states = self._analyze_startpoint()
|
|
222
|
+
self._analyze_endpoints_for_retval_size()
|
|
223
|
+
callee_restored_regs = self._analyze_endpoints_for_restored_regs()
|
|
224
|
+
self._determine_input_args(end_states, callee_restored_regs)
|
|
225
|
+
|
|
226
|
+
def _analyze_startpoint(self):
|
|
227
|
+
func_graph = self.function.transition_graph
|
|
228
|
+
startpoint = self.function.startpoint
|
|
229
|
+
bp_as_gpr = self.function.info.get("bp_as_gpr", False)
|
|
230
|
+
engine = SimEngineFactCollectorVEX(self.project, bp_as_gpr)
|
|
231
|
+
init_state = FactCollectorState()
|
|
232
|
+
if self.project.arch.call_pushes_ret:
|
|
233
|
+
init_state.sp_value = self.project.arch.bytes
|
|
234
|
+
init_state.bp_value = init_state.sp_value
|
|
235
|
+
|
|
236
|
+
traversed = set()
|
|
237
|
+
queue: list[tuple[int, FactCollectorState, BlockNode | HookNode | Function, BlockNode | HookNode | None]] = [
|
|
238
|
+
(0, init_state, startpoint, None)
|
|
239
|
+
]
|
|
240
|
+
end_states: list[FactCollectorState] = []
|
|
241
|
+
while queue:
|
|
242
|
+
depth, state, node, retnode = queue.pop(0)
|
|
243
|
+
traversed.add(node)
|
|
244
|
+
|
|
245
|
+
if depth > self._max_depth:
|
|
246
|
+
end_states.append(state)
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
if isinstance(node, BlockNode) and node.size == 0:
|
|
250
|
+
continue
|
|
251
|
+
if isinstance(node, HookNode):
|
|
252
|
+
# attempt to convert it into a function
|
|
253
|
+
if self.kb.functions.contains_addr(node.addr):
|
|
254
|
+
node = self.kb.functions.get_by_addr(node.addr)
|
|
255
|
+
else:
|
|
256
|
+
continue
|
|
257
|
+
if isinstance(node, Function):
|
|
258
|
+
if node.calling_convention is not None and node.prototype is not None:
|
|
259
|
+
# consume args and overwrite the return register
|
|
260
|
+
self._handle_function(state, node)
|
|
261
|
+
if node.returning is False or retnode is None:
|
|
262
|
+
# the function call does not return
|
|
263
|
+
end_states.append(state)
|
|
264
|
+
else:
|
|
265
|
+
# enqueue the retnode, but we don't increment the depth
|
|
266
|
+
new_state = state.copy()
|
|
267
|
+
if self.project.arch.call_pushes_ret:
|
|
268
|
+
new_state.sp_value += self.project.arch.bytes
|
|
269
|
+
queue.append((depth, new_state, retnode, None))
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
block = self.project.factory.block(node.addr, size=node.size)
|
|
273
|
+
engine.process(state, block=block)
|
|
274
|
+
|
|
275
|
+
successor_added = False
|
|
276
|
+
call_succ, ret_succ = None, None
|
|
277
|
+
for _, succ, data in func_graph.out_edges(node, data=True):
|
|
278
|
+
edge_type = data.get("type")
|
|
279
|
+
if succ not in traversed and depth + 1 <= self._max_depth:
|
|
280
|
+
if edge_type == "fake_return":
|
|
281
|
+
ret_succ = succ
|
|
282
|
+
elif edge_type == "transition":
|
|
283
|
+
successor_added = True
|
|
284
|
+
queue.append((depth + 1, state.copy(), succ, None))
|
|
285
|
+
elif edge_type == "call":
|
|
286
|
+
call_succ = succ
|
|
287
|
+
if call_succ is not None:
|
|
288
|
+
successor_added = True
|
|
289
|
+
queue.append((depth + 1, state.copy(), call_succ, ret_succ))
|
|
290
|
+
|
|
291
|
+
if not successor_added:
|
|
292
|
+
end_states.append(state)
|
|
293
|
+
|
|
294
|
+
return end_states
|
|
295
|
+
|
|
296
|
+
def _handle_function(self, state: FactCollectorState, func: Function) -> None:
|
|
297
|
+
try:
|
|
298
|
+
arg_locs = func.calling_convention.arg_locs(func.prototype)
|
|
299
|
+
except (TypeError, ValueError):
|
|
300
|
+
func.prototype = None
|
|
301
|
+
return
|
|
302
|
+
|
|
303
|
+
if None in arg_locs:
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
for arg_loc in arg_locs:
|
|
307
|
+
for loc in arg_loc.get_footprint():
|
|
308
|
+
if isinstance(loc, SimRegArg):
|
|
309
|
+
state.register_read(self.project.arch.registers[loc.reg_name][0] + loc.reg_offset, loc.size)
|
|
310
|
+
elif isinstance(loc, SimStackArg):
|
|
311
|
+
sp_value = state.sp_value
|
|
312
|
+
if sp_value is not None:
|
|
313
|
+
state.stack_read(sp_value + loc.stack_offset, loc.size)
|
|
314
|
+
|
|
315
|
+
# clobber caller-saved regs
|
|
316
|
+
for reg_name in func.calling_convention.CALLER_SAVED_REGS:
|
|
317
|
+
offset = self.project.arch.registers[reg_name][0]
|
|
318
|
+
state.register_written(offset, self.project.arch.registers[reg_name][1])
|
|
319
|
+
|
|
320
|
+
def _analyze_endpoints_for_retval_size(self):
|
|
321
|
+
"""
|
|
322
|
+
Analyze all endpoints to determine the return value size.
|
|
323
|
+
"""
|
|
324
|
+
func_graph = self.function.transition_graph
|
|
325
|
+
cc_cls = default_cc(
|
|
326
|
+
self.project.arch.name, platform=self.project.simos.name if self.project.simos is not None else None
|
|
327
|
+
)
|
|
328
|
+
cc = cc_cls(self.project.arch)
|
|
329
|
+
if isinstance(cc.RETURN_VAL, SimRegArg):
|
|
330
|
+
retreg_offset = cc.RETURN_VAL.check_offset(self.project.arch)
|
|
331
|
+
else:
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
retval_sizes = []
|
|
335
|
+
for endpoint in self.function.endpoints:
|
|
336
|
+
traversed = set()
|
|
337
|
+
queue: list[tuple[int, BlockNode | HookNode]] = [(0, endpoint)]
|
|
338
|
+
while queue:
|
|
339
|
+
depth, node = queue.pop(0)
|
|
340
|
+
traversed.add(node)
|
|
341
|
+
|
|
342
|
+
if depth > 3:
|
|
343
|
+
break
|
|
344
|
+
|
|
345
|
+
if isinstance(node, BlockNode) and node.size == 0:
|
|
346
|
+
continue
|
|
347
|
+
if isinstance(node, HookNode):
|
|
348
|
+
# attempt to convert it into a function
|
|
349
|
+
if self.kb.functions.contains_addr(node.addr):
|
|
350
|
+
node = self.kb.functions.get_by_addr(node.addr)
|
|
351
|
+
else:
|
|
352
|
+
continue
|
|
353
|
+
if isinstance(node, Function):
|
|
354
|
+
if (
|
|
355
|
+
node.calling_convention is not None
|
|
356
|
+
and node.prototype is not None
|
|
357
|
+
and node.prototype.returnty is not None
|
|
358
|
+
and not isinstance(node.prototype.returnty, SimTypeBottom)
|
|
359
|
+
):
|
|
360
|
+
# assume the function overwrites the return variable
|
|
361
|
+
retval_size = (
|
|
362
|
+
node.prototype.returnty.with_arch(self.project.arch).size // self.project.arch.byte_width
|
|
363
|
+
)
|
|
364
|
+
retval_sizes.append(retval_size)
|
|
365
|
+
continue
|
|
366
|
+
|
|
367
|
+
block = self.project.factory.block(node.addr, size=node.size)
|
|
368
|
+
# scan the block statements backwards to find writes to the return value register
|
|
369
|
+
retval_size = None
|
|
370
|
+
for stmt in reversed(block.vex.statements):
|
|
371
|
+
if isinstance(stmt, pyvex.IRStmt.Put):
|
|
372
|
+
size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
|
|
373
|
+
if stmt.offset == retreg_offset:
|
|
374
|
+
retval_size = max(size, 1)
|
|
375
|
+
|
|
376
|
+
if retval_size is not None:
|
|
377
|
+
retval_sizes.append(retval_size)
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
for pred, _, data in func_graph.in_edges(node, data=True):
|
|
381
|
+
edge_type = data.get("type")
|
|
382
|
+
if pred not in traversed and depth + 1 <= self._max_depth:
|
|
383
|
+
if edge_type == "fake_return":
|
|
384
|
+
continue
|
|
385
|
+
if edge_type in {"transition", "call"}:
|
|
386
|
+
queue.append((depth + 1, pred))
|
|
387
|
+
|
|
388
|
+
self.retval_size = max(retval_sizes) if retval_sizes else None
|
|
389
|
+
|
|
390
|
+
def _analyze_endpoints_for_restored_regs(self):
|
|
391
|
+
"""
|
|
392
|
+
Analyze all endpoints to determine the restored registers.
|
|
393
|
+
"""
|
|
394
|
+
func_graph = self.function.transition_graph
|
|
395
|
+
callee_restored_regs = set()
|
|
396
|
+
|
|
397
|
+
for endpoint in self.function.endpoints:
|
|
398
|
+
traversed = set()
|
|
399
|
+
queue: list[tuple[int, BlockNode | HookNode]] = [(0, endpoint)]
|
|
400
|
+
while queue:
|
|
401
|
+
depth, node = queue.pop(0)
|
|
402
|
+
traversed.add(node)
|
|
403
|
+
|
|
404
|
+
if depth > 3:
|
|
405
|
+
break
|
|
406
|
+
|
|
407
|
+
if isinstance(node, BlockNode) and node.size == 0:
|
|
408
|
+
continue
|
|
409
|
+
if isinstance(node, (HookNode, Function)):
|
|
410
|
+
continue
|
|
411
|
+
|
|
412
|
+
block = self.project.factory.block(node.addr, size=node.size)
|
|
413
|
+
# scan the block statements backwards to find all statements that restore registers from the stack
|
|
414
|
+
tmps = {}
|
|
415
|
+
for stmt in block.vex.statements:
|
|
416
|
+
if isinstance(stmt, pyvex.IRStmt.WrTmp):
|
|
417
|
+
if isinstance(stmt.data, pyvex.IRExpr.Get) and stmt.data.offset in {
|
|
418
|
+
self.project.arch.bp_offset,
|
|
419
|
+
self.project.arch.sp_offset,
|
|
420
|
+
}:
|
|
421
|
+
tmps[stmt.tmp] = "sp"
|
|
422
|
+
elif (
|
|
423
|
+
isinstance(stmt.data, pyvex.IRExpr.Load)
|
|
424
|
+
and isinstance(stmt.data.addr, pyvex.IRExpr.RdTmp)
|
|
425
|
+
and tmps.get(stmt.data.addr.tmp) == "sp"
|
|
426
|
+
):
|
|
427
|
+
tmps[stmt.tmp] = "stack_value"
|
|
428
|
+
elif isinstance(stmt.data, pyvex.IRExpr.Const):
|
|
429
|
+
tmps[stmt.tmp] = "const"
|
|
430
|
+
elif isinstance(stmt.data, pyvex.IRExpr.Binop) and ( # noqa:SIM102
|
|
431
|
+
stmt.data.op.startswith("Iop_Add") or stmt.data.op.startswith("Iop_Sub")
|
|
432
|
+
):
|
|
433
|
+
if (
|
|
434
|
+
isinstance(stmt.data.args[0], pyvex.IRExpr.RdTmp)
|
|
435
|
+
and tmps.get(stmt.data.args[0].tmp) == "sp"
|
|
436
|
+
) or (
|
|
437
|
+
isinstance(stmt.data.args[1], pyvex.IRExpr.RdTmp)
|
|
438
|
+
and tmps.get(stmt.data.args[1].tmp) == "sp"
|
|
439
|
+
):
|
|
440
|
+
tmps[stmt.tmp] = "sp"
|
|
441
|
+
if isinstance(stmt, pyvex.IRStmt.Put):
|
|
442
|
+
size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
|
|
443
|
+
# is the data loaded from the stack?
|
|
444
|
+
if (
|
|
445
|
+
size == self.project.arch.bytes
|
|
446
|
+
and isinstance(stmt.data, pyvex.IRExpr.RdTmp)
|
|
447
|
+
and tmps.get(stmt.data.tmp) == "stack_value"
|
|
448
|
+
):
|
|
449
|
+
callee_restored_regs.add(stmt.offset)
|
|
450
|
+
|
|
451
|
+
for pred, _, data in func_graph.in_edges(node, data=True):
|
|
452
|
+
edge_type = data.get("type")
|
|
453
|
+
if pred not in traversed and depth + 1 <= self._max_depth and edge_type == "transition":
|
|
454
|
+
queue.append((depth + 1, pred))
|
|
455
|
+
|
|
456
|
+
return callee_restored_regs
|
|
457
|
+
|
|
458
|
+
def _determine_input_args(self, end_states: list[FactCollectorState], callee_restored_regs: set[int]) -> None:
|
|
459
|
+
self.input_args = []
|
|
460
|
+
reg_offset_created = set()
|
|
461
|
+
callee_saved_regs = set()
|
|
462
|
+
callee_saved_reg_stack_offsets = set()
|
|
463
|
+
|
|
464
|
+
# determine callee-saved registers
|
|
465
|
+
for state in end_states:
|
|
466
|
+
for reg_offset, stack_offset in state.callee_stored_regs.items():
|
|
467
|
+
if reg_offset in callee_restored_regs:
|
|
468
|
+
callee_saved_regs.add(reg_offset)
|
|
469
|
+
callee_saved_reg_stack_offsets.add(stack_offset)
|
|
470
|
+
|
|
471
|
+
for state in end_states:
|
|
472
|
+
for offset, size in state.reg_reads.items():
|
|
473
|
+
if (
|
|
474
|
+
offset in reg_offset_created
|
|
475
|
+
or offset == self.project.arch.bp_offset
|
|
476
|
+
or not is_sane_register_variable(self.project.arch, offset, size)
|
|
477
|
+
or offset in callee_saved_regs
|
|
478
|
+
):
|
|
479
|
+
continue
|
|
480
|
+
reg_offset_created.add(offset)
|
|
481
|
+
if self.project.arch.name in {"AMD64", "X86"} and size < self.project.arch.bytes:
|
|
482
|
+
# use complete registers on AMD64 and X86
|
|
483
|
+
reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
|
|
484
|
+
arg = SimRegArg(reg_name, self.project.arch.bytes)
|
|
485
|
+
else:
|
|
486
|
+
reg_name = self.project.arch.translate_register_name(offset, size=size)
|
|
487
|
+
arg = SimRegArg(reg_name, size)
|
|
488
|
+
self.input_args.append(arg)
|
|
489
|
+
|
|
490
|
+
stack_offset_created = set()
|
|
491
|
+
ret_addr_offset = 0 if not self.project.arch.call_pushes_ret else self.project.arch.bytes
|
|
492
|
+
for state in end_states:
|
|
493
|
+
for offset, size in state.stack_reads.items():
|
|
494
|
+
offset = u2s(offset, self.project.arch.bits)
|
|
495
|
+
if offset - ret_addr_offset > 0:
|
|
496
|
+
if offset in stack_offset_created or offset in callee_saved_reg_stack_offsets:
|
|
497
|
+
continue
|
|
498
|
+
stack_offset_created.add(offset)
|
|
499
|
+
arg = SimStackArg(offset - ret_addr_offset, size)
|
|
500
|
+
self.input_args.append(arg)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
AnalysesHub.register_default("FunctionFactCollector", FactCollector)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import archinfo
|
|
5
|
+
from archinfo.arch_arm import is_arm_arch, ArchARMHF
|
|
6
|
+
|
|
7
|
+
from angr.calling_conventions import SimCC
|
|
8
|
+
|
|
9
|
+
l = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def is_sane_register_variable(arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | None = None) -> bool:
|
|
13
|
+
"""
|
|
14
|
+
Filters all registers that are surly not members of function arguments.
|
|
15
|
+
This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which
|
|
16
|
+
is a VEX-specific register) :-(
|
|
17
|
+
|
|
18
|
+
:param reg_offset: The register offset.
|
|
19
|
+
:param reg_size: The register size.
|
|
20
|
+
:return: True if it is an acceptable function argument, False otherwise.
|
|
21
|
+
:rtype: bool
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
arch_name = arch.name
|
|
25
|
+
if ":" in arch_name:
|
|
26
|
+
# for pcode architectures, we only leave registers that are known to be used as input arguments
|
|
27
|
+
if def_cc is not None:
|
|
28
|
+
return arch.translate_register_name(reg_offset, size=reg_size) in def_cc.ARG_REGS
|
|
29
|
+
return True
|
|
30
|
+
|
|
31
|
+
# VEX
|
|
32
|
+
if arch_name == "AARCH64":
|
|
33
|
+
return 16 <= reg_offset < 80 # x0-x7
|
|
34
|
+
|
|
35
|
+
if arch_name == "AMD64":
|
|
36
|
+
return 24 <= reg_offset < 40 or 64 <= reg_offset < 104 # rcx, rdx # rsi, rdi, r8, r9, r10
|
|
37
|
+
# 224 <= reg_offset < 480) # xmm0-xmm7
|
|
38
|
+
|
|
39
|
+
if is_arm_arch(arch):
|
|
40
|
+
if isinstance(arch, ArchARMHF):
|
|
41
|
+
return 8 <= reg_offset < 24 or 128 <= reg_offset < 160 # r0 - 32 # s0 - s7, or d0 - d4
|
|
42
|
+
return 8 <= reg_offset < 24 # r0-r3
|
|
43
|
+
|
|
44
|
+
if arch_name == "MIPS32":
|
|
45
|
+
return 24 <= reg_offset < 40 # a0-a3
|
|
46
|
+
|
|
47
|
+
if arch_name == "MIPS64":
|
|
48
|
+
return 48 <= reg_offset < 80 or 112 <= reg_offset < 208 # a0-a3 or t4-t7
|
|
49
|
+
|
|
50
|
+
if arch_name == "PPC32":
|
|
51
|
+
return 28 <= reg_offset < 60 # r3-r10
|
|
52
|
+
|
|
53
|
+
if arch_name == "X86":
|
|
54
|
+
return 8 <= reg_offset < 24 or 160 <= reg_offset < 288 # eax, ebx, ecx, edx # xmm0-xmm7
|
|
55
|
+
|
|
56
|
+
l.critical("Unsupported architecture %s.", arch.name)
|
|
57
|
+
return True
|
|
@@ -1639,7 +1639,7 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
1639
1639
|
# If we're just reading a constant, don't bother with the rest of this mess!
|
|
1640
1640
|
if isinstance(load_stmt, pyvex.IRStmt.WrTmp):
|
|
1641
1641
|
assert isinstance(load_stmt.data, pyvex.IRExpr.Load)
|
|
1642
|
-
if
|
|
1642
|
+
if isinstance(load_stmt.data.addr, pyvex.IRExpr.Const):
|
|
1643
1643
|
# It's directly loading from a constant address
|
|
1644
1644
|
# e.g.,
|
|
1645
1645
|
# ldr r0, =main+1
|
|
@@ -1656,7 +1656,7 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
1656
1656
|
l.info("Resolved constant indirect jump from %#08x to %#08x", addr, jump_target_addr)
|
|
1657
1657
|
return jump_target
|
|
1658
1658
|
|
|
1659
|
-
elif isinstance(load_stmt, pyvex.IRStmt.LoadG) and
|
|
1659
|
+
elif isinstance(load_stmt, pyvex.IRStmt.LoadG) and isinstance(load_stmt.addr, pyvex.IRExpr.Const):
|
|
1660
1660
|
# It's directly loading from a constant address
|
|
1661
1661
|
# e.g.,
|
|
1662
1662
|
# 4352c SUB R1, R11, #0x1000
|
|
@@ -2269,9 +2269,9 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
2269
2269
|
|
|
2270
2270
|
if isinstance(load_stmt, pyvex.IRStmt.WrTmp):
|
|
2271
2271
|
assert isinstance(load_stmt.data, pyvex.IRExpr.Load)
|
|
2272
|
-
if
|
|
2272
|
+
if isinstance(load_stmt.data.addr, pyvex.IRExpr.RdTmp):
|
|
2273
2273
|
load_addr_tmp = load_stmt.data.addr.tmp
|
|
2274
|
-
elif
|
|
2274
|
+
elif isinstance(load_stmt.data.addr, pyvex.IRExpr.Const):
|
|
2275
2275
|
# It's directly loading from a constant address
|
|
2276
2276
|
# e.g.,
|
|
2277
2277
|
# ldr r0, =main+1
|
|
@@ -2280,9 +2280,9 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
2280
2280
|
jump_target_addr = load_stmt.data.addr.con.value
|
|
2281
2281
|
return claripy.BVV(jump_target_addr, state.arch.bits)
|
|
2282
2282
|
elif isinstance(load_stmt, pyvex.IRStmt.LoadG):
|
|
2283
|
-
if
|
|
2283
|
+
if isinstance(load_stmt.addr, pyvex.IRExpr.RdTmp):
|
|
2284
2284
|
load_addr_tmp = load_stmt.addr.tmp
|
|
2285
|
-
elif
|
|
2285
|
+
elif isinstance(load_stmt.addr, pyvex.IRExpr.Const):
|
|
2286
2286
|
# It's directly loading from a constant address
|
|
2287
2287
|
# e.g.,
|
|
2288
2288
|
# 4352c SUB R1, R11, #0x1000
|
|
@@ -7,6 +7,7 @@ import threading
|
|
|
7
7
|
import time
|
|
8
8
|
import logging
|
|
9
9
|
from collections import defaultdict
|
|
10
|
+
from enum import Enum
|
|
10
11
|
|
|
11
12
|
import networkx
|
|
12
13
|
|
|
@@ -16,7 +17,7 @@ from angr.utils.graph import GraphUtils
|
|
|
16
17
|
from angr.simos import SimWindows
|
|
17
18
|
from angr.utils.mp import mp_context, Initializer
|
|
18
19
|
from angr.knowledge_plugins.cfg import CFGModel
|
|
19
|
-
from . import Analysis, register_analysis, VariableRecoveryFast, CallingConventionAnalysis
|
|
20
|
+
from . import Analysis, register_analysis, VariableRecoveryFast, CallingConventionAnalysis, FactCollector
|
|
20
21
|
|
|
21
22
|
if TYPE_CHECKING:
|
|
22
23
|
from angr.calling_conventions import SimCC
|
|
@@ -30,6 +31,18 @@ _l = logging.getLogger(name=__name__)
|
|
|
30
31
|
_mp_context = mp_context()
|
|
31
32
|
|
|
32
33
|
|
|
34
|
+
class CallingConventionAnalysisMode(Enum):
|
|
35
|
+
"""
|
|
36
|
+
The mode of calling convention analysis.
|
|
37
|
+
|
|
38
|
+
FAST: Using FactCollector to collect facts, then use facts for calling convention analysis.
|
|
39
|
+
VARIABLES: Using variables in VariableManager for calling convention analysis.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
FAST = "fast"
|
|
43
|
+
VARIABLES = "variables"
|
|
44
|
+
|
|
45
|
+
|
|
33
46
|
class CompleteCallingConventionsAnalysis(Analysis):
|
|
34
47
|
"""
|
|
35
48
|
Implements full-binary calling convention analysis. During the initial analysis of a binary, you may set
|
|
@@ -39,6 +52,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
|
|
|
39
52
|
|
|
40
53
|
def __init__(
|
|
41
54
|
self,
|
|
55
|
+
mode: CallingConventionAnalysisMode = CallingConventionAnalysisMode.FAST,
|
|
42
56
|
recover_variables=False,
|
|
43
57
|
low_priority=False,
|
|
44
58
|
force=False,
|
|
@@ -71,6 +85,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
|
|
|
71
85
|
:param workers: Number of multiprocessing workers.
|
|
72
86
|
"""
|
|
73
87
|
|
|
88
|
+
self.mode = mode
|
|
74
89
|
self._recover_variables = recover_variables
|
|
75
90
|
self._low_priority = low_priority
|
|
76
91
|
self._force = force
|
|
@@ -88,6 +103,10 @@ class CompleteCallingConventionsAnalysis(Analysis):
|
|
|
88
103
|
self._func_graphs = func_graphs if func_graphs else {}
|
|
89
104
|
self.prototype_libnames: set[str] = set()
|
|
90
105
|
|
|
106
|
+
# sanity check
|
|
107
|
+
if self.mode not in {CallingConventionAnalysisMode.FAST, CallingConventionAnalysisMode.VARIABLES}:
|
|
108
|
+
raise ValueError(f"Invalid calling convention analysis mode {self.mode}.")
|
|
109
|
+
|
|
91
110
|
self._func_addrs = [] # a list that holds addresses of all functions to be analyzed
|
|
92
111
|
self._results = []
|
|
93
112
|
if workers > 0:
|
|
@@ -322,7 +341,11 @@ class CompleteCallingConventionsAnalysis(Analysis):
|
|
|
322
341
|
self.kb.variables.get_function_manager(func_addr),
|
|
323
342
|
)
|
|
324
343
|
|
|
325
|
-
if
|
|
344
|
+
if (
|
|
345
|
+
self.mode == CallingConventionAnalysisMode.VARIABLES
|
|
346
|
+
and self._recover_variables
|
|
347
|
+
and self.function_needs_variable_recovery(func)
|
|
348
|
+
):
|
|
326
349
|
# special case: we don't have a PCode-engine variable recovery analysis for PCode architectures!
|
|
327
350
|
if ":" in self.project.arch.name and self._func_graphs and func.addr in self._func_graphs:
|
|
328
351
|
# this is a pcode architecture
|
|
@@ -341,9 +364,15 @@ class CompleteCallingConventionsAnalysis(Analysis):
|
|
|
341
364
|
)
|
|
342
365
|
return None, None, None, None
|
|
343
366
|
|
|
367
|
+
kwargs = {}
|
|
368
|
+
if self.mode == CallingConventionAnalysisMode.FAST:
|
|
369
|
+
facts = self.project.analyses[FactCollector].prep(kb=self.kb)(func)
|
|
370
|
+
kwargs["input_args"] = facts.input_args
|
|
371
|
+
kwargs["retval_size"] = facts.retval_size
|
|
372
|
+
|
|
344
373
|
# determine the calling convention of each function
|
|
345
374
|
cc_analysis = self.project.analyses[CallingConventionAnalysis].prep(kb=self.kb)(
|
|
346
|
-
func, cfg=self._cfg, analyze_callsites=self._analyze_callsites
|
|
375
|
+
func, cfg=self._cfg, analyze_callsites=self._analyze_callsites, **kwargs
|
|
347
376
|
)
|
|
348
377
|
|
|
349
378
|
if cc_analysis.cc is not None:
|