angr 9.2.133__py3-none-manylinux2014_aarch64.whl → 9.2.135__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (25) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +2 -1
  3. angr/analyses/calling_convention/__init__.py +6 -0
  4. angr/analyses/{calling_convention.py → calling_convention/calling_convention.py} +28 -61
  5. angr/analyses/calling_convention/fact_collector.py +503 -0
  6. angr/analyses/calling_convention/utils.py +57 -0
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -6
  8. angr/analyses/complete_calling_conventions.py +32 -3
  9. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +0 -6
  10. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +1 -6
  11. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +0 -6
  12. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +0 -6
  13. angr/analyses/decompiler/structured_codegen/c.py +15 -5
  14. angr/analyses/variable_recovery/engine_vex.py +5 -0
  15. angr/calling_conventions.py +12 -4
  16. angr/knowledge_plugins/functions/function.py +4 -4
  17. angr/knowledge_plugins/functions/function_manager.py +6 -0
  18. angr/storage/memory_mixins/name_resolution_mixin.py +1 -1
  19. angr/utils/bits.py +13 -0
  20. {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/METADATA +6 -6
  21. {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/RECORD +25 -22
  22. {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/LICENSE +0 -0
  23. {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/WHEEL +0 -0
  24. {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/entry_points.txt +0 -0
  25. {angr-9.2.133.dist-info → angr-9.2.135.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,503 @@
1
+ from __future__ import annotations
2
+ from typing import Any
3
+
4
+ import pyvex
5
+ import claripy
6
+
7
+ from angr.utils.bits import s2u, u2s
8
+ from angr.block import Block
9
+ from angr.analyses.analysis import Analysis
10
+ from angr.analyses import AnalysesHub
11
+ from angr.knowledge_plugins.functions import Function
12
+ from angr.codenode import BlockNode, HookNode
13
+ from angr.engines.light import SimEngineNostmtVEX, SimEngineLight, SpOffset, RegisterOffset
14
+ from angr.calling_conventions import SimRegArg, SimStackArg, default_cc
15
+ from angr.sim_type import SimTypeBottom
16
+ from .utils import is_sane_register_variable
17
+
18
+
19
+ class FactCollectorState:
20
+ """
21
+ The abstract state for FactCollector.
22
+ """
23
+
24
+ __slots__ = (
25
+ "bp_value",
26
+ "callee_stored_regs",
27
+ "reg_reads",
28
+ "reg_writes",
29
+ "simple_stack",
30
+ "sp_value",
31
+ "stack_reads",
32
+ "stack_writes",
33
+ "tmps",
34
+ )
35
+
36
+ def __init__(self):
37
+ self.tmps = {}
38
+ self.simple_stack = {}
39
+
40
+ self.callee_stored_regs: dict[int, int] = {} # reg offset -> stack offset
41
+ self.reg_reads = {}
42
+ self.reg_writes: set[int] = set()
43
+ self.stack_reads = {}
44
+ self.stack_writes: set[int] = set()
45
+ self.sp_value = 0
46
+ self.bp_value = 0
47
+
48
+ def register_read(self, offset: int, size_in_bytes: int):
49
+ if offset in self.reg_writes:
50
+ return
51
+ if offset not in self.reg_reads:
52
+ self.reg_reads[offset] = size_in_bytes
53
+ else:
54
+ self.reg_reads[offset] = max(self.reg_reads[offset], size_in_bytes)
55
+
56
+ def register_written(self, offset: int, size_in_bytes: int):
57
+ for o in range(size_in_bytes):
58
+ self.reg_writes.add(offset + o)
59
+
60
+ def stack_read(self, offset: int, size_in_bytes: int):
61
+ if offset in self.stack_writes:
62
+ return
63
+ if offset not in self.stack_reads:
64
+ self.stack_reads[offset] = size_in_bytes
65
+ else:
66
+ self.stack_reads[offset] = max(self.stack_reads[offset], size_in_bytes)
67
+
68
+ def stack_written(self, offset: int, size_int_bytes: int):
69
+ for o in range(size_int_bytes):
70
+ self.stack_writes.add(offset + o)
71
+
72
+ def copy(self, with_tmps: bool = False) -> FactCollectorState:
73
+ new_state = FactCollectorState()
74
+ new_state.reg_reads = self.reg_reads.copy()
75
+ new_state.stack_reads = self.stack_reads.copy()
76
+ new_state.stack_writes = self.stack_writes.copy()
77
+ new_state.reg_writes = self.reg_writes.copy()
78
+ new_state.callee_stored_regs = self.callee_stored_regs.copy()
79
+ new_state.sp_value = self.sp_value
80
+ new_state.bp_value = self.bp_value
81
+ new_state.simple_stack = self.simple_stack.copy()
82
+ if with_tmps:
83
+ new_state.tmps = self.tmps.copy()
84
+ return new_state
85
+
86
+
87
+ binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactCollectorState].binop_handler
88
+
89
+
90
+ class SimEngineFactCollectorVEX(
91
+ SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
92
+ SimEngineLight[type[FactCollectorState], SpOffset | RegisterOffset | int, Block, None],
93
+ ):
94
+ """
95
+ THe engine for FactCollector.
96
+ """
97
+
98
+ def __init__(self, project, bp_as_gpr: bool):
99
+ self.bp_as_gpr = bp_as_gpr
100
+ super().__init__(project)
101
+
102
+ def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
103
+ if self.block.vex.jumpkind == "Ijk_Call":
104
+ self.state.register_written(self.arch.ret_offset, self.arch.bytes)
105
+
106
+ def _top(self, bits: int):
107
+ return None
108
+
109
+ def _is_top(self, expr: Any) -> bool:
110
+ raise NotImplementedError
111
+
112
+ def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
113
+ return None
114
+
115
+ def _handle_stmt_Put(self, stmt):
116
+ v = self._expr(stmt.data)
117
+ if stmt.offset == self.arch.sp_offset and isinstance(v, SpOffset):
118
+ self.state.sp_value = v.offset
119
+ elif stmt.offset == self.arch.bp_offset and isinstance(v, SpOffset):
120
+ self.state.bp_value = v.offset
121
+ else:
122
+ self.state.register_written(stmt.offset, stmt.data.result_size(self.tyenv) // self.arch.byte_width)
123
+
124
+ def _handle_stmt_Store(self, stmt: pyvex.IRStmt.Store):
125
+ addr = self._expr(stmt.addr)
126
+ if isinstance(addr, SpOffset):
127
+ self.state.stack_written(addr.offset, stmt.data.result_size(self.tyenv) // self.arch.byte_width)
128
+ data = self._expr(stmt.data)
129
+ if isinstance(data, RegisterOffset) and not isinstance(data, SpOffset):
130
+ # push reg; we record the stored register as well as the stack slot offset
131
+ self.state.callee_stored_regs[data.reg] = u2s(addr.offset, self.arch.bits)
132
+ if isinstance(data, SpOffset):
133
+ self.state.simple_stack[addr.offset] = data
134
+
135
+ def _handle_stmt_WrTmp(self, stmt: pyvex.IRStmt.WrTmp):
136
+ v = self._expr(stmt.data)
137
+ if v is not None:
138
+ self.state.tmps[stmt.tmp] = v
139
+
140
+ def _handle_expr_Const(self, expr: pyvex.IRExpr.Const):
141
+ return expr.con.value
142
+
143
+ def _handle_expr_GSPTR(self, expr):
144
+ return None
145
+
146
+ def _handle_expr_Get(self, expr) -> SpOffset | None:
147
+ if expr.offset == self.arch.sp_offset:
148
+ return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
149
+ if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
150
+ return SpOffset(self.arch.bits, self.state.bp_value, is_base=False)
151
+ bits = expr.result_size(self.tyenv)
152
+ self.state.register_read(expr.offset, bits // self.arch.byte_width)
153
+ return RegisterOffset(bits, expr.offset, 0)
154
+
155
+ def _handle_expr_GetI(self, expr):
156
+ return None
157
+
158
+ def _handle_expr_ITE(self, expr):
159
+ return None
160
+
161
+ def _handle_expr_Load(self, expr):
162
+ addr = self._expr(expr.addr)
163
+ if isinstance(addr, SpOffset):
164
+ self.state.stack_read(addr.offset, expr.result_size(self.tyenv) // self.arch.byte_width)
165
+ return self.state.simple_stack.get(addr.offset)
166
+ return None
167
+
168
+ def _handle_expr_RdTmp(self, expr):
169
+ return self.state.tmps.get(expr.tmp, None)
170
+
171
+ def _handle_expr_VECRET(self, expr):
172
+ return None
173
+
174
+ @binop_handler
175
+ def _handle_binop_Add(self, expr):
176
+ op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
177
+ if isinstance(op0, SpOffset) and isinstance(op1, int):
178
+ return SpOffset(op0.bits, s2u(op0.offset + op1, op0.bits), is_base=op0.is_base)
179
+ if isinstance(op1, SpOffset) and isinstance(op0, int):
180
+ return SpOffset(op1.bits, s2u(op1.offset + op0, op1.bits), is_base=op1.is_base)
181
+ return None
182
+
183
+ @binop_handler
184
+ def _handle_binop_Sub(self, expr):
185
+ op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
186
+ if isinstance(op0, SpOffset) and isinstance(op1, int):
187
+ return SpOffset(op0.bits, s2u(op0.offset - op1, op0.bits), is_base=op0.is_base)
188
+ if isinstance(op1, SpOffset) and isinstance(op0, int):
189
+ return SpOffset(op1.bits, s2u(op1.offset - op0, op1.bits), is_base=op1.is_base)
190
+ return None
191
+
192
+ @binop_handler
193
+ def _handle_binop_And(self, expr):
194
+ op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
195
+ if isinstance(op0, SpOffset):
196
+ return op0
197
+ if isinstance(op1, SpOffset):
198
+ return op1
199
+ return None
200
+
201
+
202
+ class FactCollector(Analysis):
203
+ """
204
+ An extremely fast analysis that extracts necessary facts of a function for CallingConventionAnalysis to make
205
+ decision on the calling convention and prototype of a function.
206
+ """
207
+
208
+ def __init__(self, func: Function, max_depth: int = 5):
209
+ self.function = func
210
+ self._max_depth = max_depth
211
+
212
+ self.input_args: list[SimRegArg | SimStackArg] | None = None
213
+ self.retval_size: int | None = None
214
+
215
+ self._analyze()
216
+
217
+ def _analyze(self):
218
+ # breadth-first search using function graph, collect registers and stack variables that are written to as well
219
+ # as read from, until max_depth is reached
220
+
221
+ end_states = self._analyze_startpoint()
222
+ self._analyze_endpoints_for_retval_size()
223
+ callee_restored_regs = self._analyze_endpoints_for_restored_regs()
224
+ self._determine_input_args(end_states, callee_restored_regs)
225
+
226
+ def _analyze_startpoint(self):
227
+ func_graph = self.function.transition_graph
228
+ startpoint = self.function.startpoint
229
+ bp_as_gpr = self.function.info.get("bp_as_gpr", False)
230
+ engine = SimEngineFactCollectorVEX(self.project, bp_as_gpr)
231
+ init_state = FactCollectorState()
232
+ if self.project.arch.call_pushes_ret:
233
+ init_state.sp_value = self.project.arch.bytes
234
+ init_state.bp_value = init_state.sp_value
235
+
236
+ traversed = set()
237
+ queue: list[tuple[int, FactCollectorState, BlockNode | HookNode | Function, BlockNode | HookNode | None]] = [
238
+ (0, init_state, startpoint, None)
239
+ ]
240
+ end_states: list[FactCollectorState] = []
241
+ while queue:
242
+ depth, state, node, retnode = queue.pop(0)
243
+ traversed.add(node)
244
+
245
+ if depth > self._max_depth:
246
+ end_states.append(state)
247
+ break
248
+
249
+ if isinstance(node, BlockNode) and node.size == 0:
250
+ continue
251
+ if isinstance(node, HookNode):
252
+ # attempt to convert it into a function
253
+ if self.kb.functions.contains_addr(node.addr):
254
+ node = self.kb.functions.get_by_addr(node.addr)
255
+ else:
256
+ continue
257
+ if isinstance(node, Function):
258
+ if node.calling_convention is not None and node.prototype is not None:
259
+ # consume args and overwrite the return register
260
+ self._handle_function(state, node)
261
+ if node.returning is False or retnode is None:
262
+ # the function call does not return
263
+ end_states.append(state)
264
+ else:
265
+ # enqueue the retnode, but we don't increment the depth
266
+ new_state = state.copy()
267
+ if self.project.arch.call_pushes_ret:
268
+ new_state.sp_value += self.project.arch.bytes
269
+ queue.append((depth, new_state, retnode, None))
270
+ continue
271
+
272
+ block = self.project.factory.block(node.addr, size=node.size)
273
+ engine.process(state, block=block)
274
+
275
+ successor_added = False
276
+ call_succ, ret_succ = None, None
277
+ for _, succ, data in func_graph.out_edges(node, data=True):
278
+ edge_type = data.get("type")
279
+ if succ not in traversed and depth + 1 <= self._max_depth:
280
+ if edge_type == "fake_return":
281
+ ret_succ = succ
282
+ elif edge_type == "transition":
283
+ successor_added = True
284
+ queue.append((depth + 1, state.copy(), succ, None))
285
+ elif edge_type == "call":
286
+ call_succ = succ
287
+ if call_succ is not None:
288
+ successor_added = True
289
+ queue.append((depth + 1, state.copy(), call_succ, ret_succ))
290
+
291
+ if not successor_added:
292
+ end_states.append(state)
293
+
294
+ return end_states
295
+
296
+ def _handle_function(self, state: FactCollectorState, func: Function) -> None:
297
+ try:
298
+ arg_locs = func.calling_convention.arg_locs(func.prototype)
299
+ except (TypeError, ValueError):
300
+ func.prototype = None
301
+ return
302
+
303
+ if None in arg_locs:
304
+ return
305
+
306
+ for arg_loc in arg_locs:
307
+ for loc in arg_loc.get_footprint():
308
+ if isinstance(loc, SimRegArg):
309
+ state.register_read(self.project.arch.registers[loc.reg_name][0] + loc.reg_offset, loc.size)
310
+ elif isinstance(loc, SimStackArg):
311
+ sp_value = state.sp_value
312
+ if sp_value is not None:
313
+ state.stack_read(sp_value + loc.stack_offset, loc.size)
314
+
315
+ # clobber caller-saved regs
316
+ for reg_name in func.calling_convention.CALLER_SAVED_REGS:
317
+ offset = self.project.arch.registers[reg_name][0]
318
+ state.register_written(offset, self.project.arch.registers[reg_name][1])
319
+
320
+ def _analyze_endpoints_for_retval_size(self):
321
+ """
322
+ Analyze all endpoints to determine the return value size.
323
+ """
324
+ func_graph = self.function.transition_graph
325
+ cc_cls = default_cc(
326
+ self.project.arch.name, platform=self.project.simos.name if self.project.simos is not None else None
327
+ )
328
+ cc = cc_cls(self.project.arch)
329
+ if isinstance(cc.RETURN_VAL, SimRegArg):
330
+ retreg_offset = cc.RETURN_VAL.check_offset(self.project.arch)
331
+ else:
332
+ return
333
+
334
+ retval_sizes = []
335
+ for endpoint in self.function.endpoints:
336
+ traversed = set()
337
+ queue: list[tuple[int, BlockNode | HookNode]] = [(0, endpoint)]
338
+ while queue:
339
+ depth, node = queue.pop(0)
340
+ traversed.add(node)
341
+
342
+ if depth > 3:
343
+ break
344
+
345
+ if isinstance(node, BlockNode) and node.size == 0:
346
+ continue
347
+ if isinstance(node, HookNode):
348
+ # attempt to convert it into a function
349
+ if self.kb.functions.contains_addr(node.addr):
350
+ node = self.kb.functions.get_by_addr(node.addr)
351
+ else:
352
+ continue
353
+ if isinstance(node, Function):
354
+ if (
355
+ node.calling_convention is not None
356
+ and node.prototype is not None
357
+ and node.prototype.returnty is not None
358
+ and not isinstance(node.prototype.returnty, SimTypeBottom)
359
+ ):
360
+ # assume the function overwrites the return variable
361
+ retval_size = (
362
+ node.prototype.returnty.with_arch(self.project.arch).size // self.project.arch.byte_width
363
+ )
364
+ retval_sizes.append(retval_size)
365
+ continue
366
+
367
+ block = self.project.factory.block(node.addr, size=node.size)
368
+ # scan the block statements backwards to find writes to the return value register
369
+ retval_size = None
370
+ for stmt in reversed(block.vex.statements):
371
+ if isinstance(stmt, pyvex.IRStmt.Put):
372
+ size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
373
+ if stmt.offset == retreg_offset:
374
+ retval_size = max(size, 1)
375
+
376
+ if retval_size is not None:
377
+ retval_sizes.append(retval_size)
378
+ continue
379
+
380
+ for pred, _, data in func_graph.in_edges(node, data=True):
381
+ edge_type = data.get("type")
382
+ if pred not in traversed and depth + 1 <= self._max_depth:
383
+ if edge_type == "fake_return":
384
+ continue
385
+ if edge_type in {"transition", "call"}:
386
+ queue.append((depth + 1, pred))
387
+
388
+ self.retval_size = max(retval_sizes) if retval_sizes else None
389
+
390
+ def _analyze_endpoints_for_restored_regs(self):
391
+ """
392
+ Analyze all endpoints to determine the restored registers.
393
+ """
394
+ func_graph = self.function.transition_graph
395
+ callee_restored_regs = set()
396
+
397
+ for endpoint in self.function.endpoints:
398
+ traversed = set()
399
+ queue: list[tuple[int, BlockNode | HookNode]] = [(0, endpoint)]
400
+ while queue:
401
+ depth, node = queue.pop(0)
402
+ traversed.add(node)
403
+
404
+ if depth > 3:
405
+ break
406
+
407
+ if isinstance(node, BlockNode) and node.size == 0:
408
+ continue
409
+ if isinstance(node, (HookNode, Function)):
410
+ continue
411
+
412
+ block = self.project.factory.block(node.addr, size=node.size)
413
+ # scan the block statements backwards to find all statements that restore registers from the stack
414
+ tmps = {}
415
+ for stmt in block.vex.statements:
416
+ if isinstance(stmt, pyvex.IRStmt.WrTmp):
417
+ if isinstance(stmt.data, pyvex.IRExpr.Get) and stmt.data.offset in {
418
+ self.project.arch.bp_offset,
419
+ self.project.arch.sp_offset,
420
+ }:
421
+ tmps[stmt.tmp] = "sp"
422
+ elif (
423
+ isinstance(stmt.data, pyvex.IRExpr.Load)
424
+ and isinstance(stmt.data.addr, pyvex.IRExpr.RdTmp)
425
+ and tmps.get(stmt.data.addr.tmp) == "sp"
426
+ ):
427
+ tmps[stmt.tmp] = "stack_value"
428
+ elif isinstance(stmt.data, pyvex.IRExpr.Const):
429
+ tmps[stmt.tmp] = "const"
430
+ elif isinstance(stmt.data, pyvex.IRExpr.Binop) and ( # noqa:SIM102
431
+ stmt.data.op.startswith("Iop_Add") or stmt.data.op.startswith("Iop_Sub")
432
+ ):
433
+ if (
434
+ isinstance(stmt.data.args[0], pyvex.IRExpr.RdTmp)
435
+ and tmps.get(stmt.data.args[0].tmp) == "sp"
436
+ ) or (
437
+ isinstance(stmt.data.args[1], pyvex.IRExpr.RdTmp)
438
+ and tmps.get(stmt.data.args[1].tmp) == "sp"
439
+ ):
440
+ tmps[stmt.tmp] = "sp"
441
+ if isinstance(stmt, pyvex.IRStmt.Put):
442
+ size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
443
+ # is the data loaded from the stack?
444
+ if (
445
+ size == self.project.arch.bytes
446
+ and isinstance(stmt.data, pyvex.IRExpr.RdTmp)
447
+ and tmps.get(stmt.data.tmp) == "stack_value"
448
+ ):
449
+ callee_restored_regs.add(stmt.offset)
450
+
451
+ for pred, _, data in func_graph.in_edges(node, data=True):
452
+ edge_type = data.get("type")
453
+ if pred not in traversed and depth + 1 <= self._max_depth and edge_type == "transition":
454
+ queue.append((depth + 1, pred))
455
+
456
+ return callee_restored_regs
457
+
458
+ def _determine_input_args(self, end_states: list[FactCollectorState], callee_restored_regs: set[int]) -> None:
459
+ self.input_args = []
460
+ reg_offset_created = set()
461
+ callee_saved_regs = set()
462
+ callee_saved_reg_stack_offsets = set()
463
+
464
+ # determine callee-saved registers
465
+ for state in end_states:
466
+ for reg_offset, stack_offset in state.callee_stored_regs.items():
467
+ if reg_offset in callee_restored_regs:
468
+ callee_saved_regs.add(reg_offset)
469
+ callee_saved_reg_stack_offsets.add(stack_offset)
470
+
471
+ for state in end_states:
472
+ for offset, size in state.reg_reads.items():
473
+ if (
474
+ offset in reg_offset_created
475
+ or offset == self.project.arch.bp_offset
476
+ or not is_sane_register_variable(self.project.arch, offset, size)
477
+ or offset in callee_saved_regs
478
+ ):
479
+ continue
480
+ reg_offset_created.add(offset)
481
+ if self.project.arch.name in {"AMD64", "X86"} and size < self.project.arch.bytes:
482
+ # use complete registers on AMD64 and X86
483
+ reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
484
+ arg = SimRegArg(reg_name, self.project.arch.bytes)
485
+ else:
486
+ reg_name = self.project.arch.translate_register_name(offset, size=size)
487
+ arg = SimRegArg(reg_name, size)
488
+ self.input_args.append(arg)
489
+
490
+ stack_offset_created = set()
491
+ ret_addr_offset = 0 if not self.project.arch.call_pushes_ret else self.project.arch.bytes
492
+ for state in end_states:
493
+ for offset, size in state.stack_reads.items():
494
+ offset = u2s(offset, self.project.arch.bits)
495
+ if offset - ret_addr_offset > 0:
496
+ if offset in stack_offset_created or offset in callee_saved_reg_stack_offsets:
497
+ continue
498
+ stack_offset_created.add(offset)
499
+ arg = SimStackArg(offset - ret_addr_offset, size)
500
+ self.input_args.append(arg)
501
+
502
+
503
+ AnalysesHub.register_default("FunctionFactCollector", FactCollector)
@@ -0,0 +1,57 @@
1
+ from __future__ import annotations
2
+ import logging
3
+
4
+ import archinfo
5
+ from archinfo.arch_arm import is_arm_arch, ArchARMHF
6
+
7
+ from angr.calling_conventions import SimCC
8
+
9
+ l = logging.getLogger(__name__)
10
+
11
+
12
+ def is_sane_register_variable(arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | None = None) -> bool:
13
+ """
14
+ Filters all registers that are surly not members of function arguments.
15
+ This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which
16
+ is a VEX-specific register) :-(
17
+
18
+ :param reg_offset: The register offset.
19
+ :param reg_size: The register size.
20
+ :return: True if it is an acceptable function argument, False otherwise.
21
+ :rtype: bool
22
+ """
23
+
24
+ arch_name = arch.name
25
+ if ":" in arch_name:
26
+ # for pcode architectures, we only leave registers that are known to be used as input arguments
27
+ if def_cc is not None:
28
+ return arch.translate_register_name(reg_offset, size=reg_size) in def_cc.ARG_REGS
29
+ return True
30
+
31
+ # VEX
32
+ if arch_name == "AARCH64":
33
+ return 16 <= reg_offset < 80 # x0-x7
34
+
35
+ if arch_name == "AMD64":
36
+ return 24 <= reg_offset < 40 or 64 <= reg_offset < 104 # rcx, rdx # rsi, rdi, r8, r9, r10
37
+ # 224 <= reg_offset < 480) # xmm0-xmm7
38
+
39
+ if is_arm_arch(arch):
40
+ if isinstance(arch, ArchARMHF):
41
+ return 8 <= reg_offset < 24 or 128 <= reg_offset < 160 # r0 - 32 # s0 - s7, or d0 - d4
42
+ return 8 <= reg_offset < 24 # r0-r3
43
+
44
+ if arch_name == "MIPS32":
45
+ return 24 <= reg_offset < 40 # a0-a3
46
+
47
+ if arch_name == "MIPS64":
48
+ return 48 <= reg_offset < 80 or 112 <= reg_offset < 208 # a0-a3 or t4-t7
49
+
50
+ if arch_name == "PPC32":
51
+ return 28 <= reg_offset < 60 # r3-r10
52
+
53
+ if arch_name == "X86":
54
+ return 8 <= reg_offset < 24 or 160 <= reg_offset < 288 # eax, ebx, ecx, edx # xmm0-xmm7
55
+
56
+ l.critical("Unsupported architecture %s.", arch.name)
57
+ return True
@@ -1639,7 +1639,7 @@ class JumpTableResolver(IndirectJumpResolver):
1639
1639
  # If we're just reading a constant, don't bother with the rest of this mess!
1640
1640
  if isinstance(load_stmt, pyvex.IRStmt.WrTmp):
1641
1641
  assert isinstance(load_stmt.data, pyvex.IRExpr.Load)
1642
- if type(load_stmt.data.addr) is pyvex.IRExpr.Const:
1642
+ if isinstance(load_stmt.data.addr, pyvex.IRExpr.Const):
1643
1643
  # It's directly loading from a constant address
1644
1644
  # e.g.,
1645
1645
  # ldr r0, =main+1
@@ -1656,7 +1656,7 @@ class JumpTableResolver(IndirectJumpResolver):
1656
1656
  l.info("Resolved constant indirect jump from %#08x to %#08x", addr, jump_target_addr)
1657
1657
  return jump_target
1658
1658
 
1659
- elif isinstance(load_stmt, pyvex.IRStmt.LoadG) and type(load_stmt.addr) is pyvex.IRExpr.Const:
1659
+ elif isinstance(load_stmt, pyvex.IRStmt.LoadG) and isinstance(load_stmt.addr, pyvex.IRExpr.Const):
1660
1660
  # It's directly loading from a constant address
1661
1661
  # e.g.,
1662
1662
  # 4352c SUB R1, R11, #0x1000
@@ -2269,9 +2269,9 @@ class JumpTableResolver(IndirectJumpResolver):
2269
2269
 
2270
2270
  if isinstance(load_stmt, pyvex.IRStmt.WrTmp):
2271
2271
  assert isinstance(load_stmt.data, pyvex.IRExpr.Load)
2272
- if type(load_stmt.data.addr) is pyvex.IRExpr.RdTmp:
2272
+ if isinstance(load_stmt.data.addr, pyvex.IRExpr.RdTmp):
2273
2273
  load_addr_tmp = load_stmt.data.addr.tmp
2274
- elif type(load_stmt.data.addr) is pyvex.IRExpr.Const:
2274
+ elif isinstance(load_stmt.data.addr, pyvex.IRExpr.Const):
2275
2275
  # It's directly loading from a constant address
2276
2276
  # e.g.,
2277
2277
  # ldr r0, =main+1
@@ -2280,9 +2280,9 @@ class JumpTableResolver(IndirectJumpResolver):
2280
2280
  jump_target_addr = load_stmt.data.addr.con.value
2281
2281
  return claripy.BVV(jump_target_addr, state.arch.bits)
2282
2282
  elif isinstance(load_stmt, pyvex.IRStmt.LoadG):
2283
- if type(load_stmt.addr) is pyvex.IRExpr.RdTmp:
2283
+ if isinstance(load_stmt.addr, pyvex.IRExpr.RdTmp):
2284
2284
  load_addr_tmp = load_stmt.addr.tmp
2285
- elif type(load_stmt.addr) is pyvex.IRExpr.Const:
2285
+ elif isinstance(load_stmt.addr, pyvex.IRExpr.Const):
2286
2286
  # It's directly loading from a constant address
2287
2287
  # e.g.,
2288
2288
  # 4352c SUB R1, R11, #0x1000
@@ -7,6 +7,7 @@ import threading
7
7
  import time
8
8
  import logging
9
9
  from collections import defaultdict
10
+ from enum import Enum
10
11
 
11
12
  import networkx
12
13
 
@@ -16,7 +17,7 @@ from angr.utils.graph import GraphUtils
16
17
  from angr.simos import SimWindows
17
18
  from angr.utils.mp import mp_context, Initializer
18
19
  from angr.knowledge_plugins.cfg import CFGModel
19
- from . import Analysis, register_analysis, VariableRecoveryFast, CallingConventionAnalysis
20
+ from . import Analysis, register_analysis, VariableRecoveryFast, CallingConventionAnalysis, FactCollector
20
21
 
21
22
  if TYPE_CHECKING:
22
23
  from angr.calling_conventions import SimCC
@@ -30,6 +31,18 @@ _l = logging.getLogger(name=__name__)
30
31
  _mp_context = mp_context()
31
32
 
32
33
 
34
+ class CallingConventionAnalysisMode(Enum):
35
+ """
36
+ The mode of calling convention analysis.
37
+
38
+ FAST: Using FactCollector to collect facts, then use facts for calling convention analysis.
39
+ VARIABLES: Using variables in VariableManager for calling convention analysis.
40
+ """
41
+
42
+ FAST = "fast"
43
+ VARIABLES = "variables"
44
+
45
+
33
46
  class CompleteCallingConventionsAnalysis(Analysis):
34
47
  """
35
48
  Implements full-binary calling convention analysis. During the initial analysis of a binary, you may set
@@ -39,6 +52,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
39
52
 
40
53
  def __init__(
41
54
  self,
55
+ mode: CallingConventionAnalysisMode = CallingConventionAnalysisMode.FAST,
42
56
  recover_variables=False,
43
57
  low_priority=False,
44
58
  force=False,
@@ -71,6 +85,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
71
85
  :param workers: Number of multiprocessing workers.
72
86
  """
73
87
 
88
+ self.mode = mode
74
89
  self._recover_variables = recover_variables
75
90
  self._low_priority = low_priority
76
91
  self._force = force
@@ -88,6 +103,10 @@ class CompleteCallingConventionsAnalysis(Analysis):
88
103
  self._func_graphs = func_graphs if func_graphs else {}
89
104
  self.prototype_libnames: set[str] = set()
90
105
 
106
+ # sanity check
107
+ if self.mode not in {CallingConventionAnalysisMode.FAST, CallingConventionAnalysisMode.VARIABLES}:
108
+ raise ValueError(f"Invalid calling convention analysis mode {self.mode}.")
109
+
91
110
  self._func_addrs = [] # a list that holds addresses of all functions to be analyzed
92
111
  self._results = []
93
112
  if workers > 0:
@@ -322,7 +341,11 @@ class CompleteCallingConventionsAnalysis(Analysis):
322
341
  self.kb.variables.get_function_manager(func_addr),
323
342
  )
324
343
 
325
- if self._recover_variables and self.function_needs_variable_recovery(func):
344
+ if (
345
+ self.mode == CallingConventionAnalysisMode.VARIABLES
346
+ and self._recover_variables
347
+ and self.function_needs_variable_recovery(func)
348
+ ):
326
349
  # special case: we don't have a PCode-engine variable recovery analysis for PCode architectures!
327
350
  if ":" in self.project.arch.name and self._func_graphs and func.addr in self._func_graphs:
328
351
  # this is a pcode architecture
@@ -341,9 +364,15 @@ class CompleteCallingConventionsAnalysis(Analysis):
341
364
  )
342
365
  return None, None, None, None
343
366
 
367
+ kwargs = {}
368
+ if self.mode == CallingConventionAnalysisMode.FAST:
369
+ facts = self.project.analyses[FactCollector].prep(kb=self.kb)(func)
370
+ kwargs["input_args"] = facts.input_args
371
+ kwargs["retval_size"] = facts.retval_size
372
+
344
373
  # determine the calling convention of each function
345
374
  cc_analysis = self.project.analyses[CallingConventionAnalysis].prep(kb=self.kb)(
346
- func, cfg=self._cfg, analyze_callsites=self._analyze_callsites
375
+ func, cfg=self._cfg, analyze_callsites=self._analyze_callsites, **kwargs
347
376
  )
348
377
 
349
378
  if cc_analysis.cc is not None: