angr 9.2.123__py3-none-manylinux2014_aarch64.whl → 9.2.125__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/__init__.py +9 -1
- angr/analyses/cfg/indirect_jump_resolvers/mips_elf_fast.py +11 -8
- angr/analyses/cfg/indirect_jump_resolvers/mips_elf_got.py +2 -2
- angr/analyses/codecave.py +77 -0
- angr/analyses/decompiler/ail_simplifier.py +16 -19
- angr/analyses/decompiler/callsite_maker.py +8 -7
- angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +24 -2
- angr/analyses/decompiler/clinic.py +58 -2
- angr/analyses/decompiler/condition_processor.py +10 -3
- angr/analyses/decompiler/decompilation_cache.py +2 -0
- angr/analyses/decompiler/decompiler.py +54 -8
- angr/analyses/decompiler/dephication/graph_vvar_mapping.py +10 -2
- angr/analyses/decompiler/dephication/rewriting_engine.py +64 -1
- angr/analyses/decompiler/expression_narrower.py +5 -1
- angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
- angr/analyses/decompiler/optimization_passes/div_simplifier.py +4 -1
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +13 -0
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +23 -4
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +3 -1
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +8 -5
- angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +10 -5
- angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +18 -7
- angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +6 -0
- angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +2 -0
- angr/analyses/decompiler/peephole_optimizations/const_mull_a_shift.py +2 -0
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
- angr/analyses/decompiler/peephole_optimizations/remove_cascading_conversions.py +8 -2
- angr/analyses/decompiler/region_identifier.py +36 -0
- angr/analyses/decompiler/region_simplifiers/loop.py +2 -8
- angr/analyses/decompiler/region_simplifiers/switch_cluster_simplifier.py +9 -3
- angr/analyses/decompiler/ssailification/rewriting.py +5 -2
- angr/analyses/decompiler/ssailification/rewriting_engine.py +151 -25
- angr/analyses/decompiler/ssailification/rewriting_state.py +1 -0
- angr/analyses/decompiler/ssailification/ssailification.py +17 -9
- angr/analyses/decompiler/ssailification/traversal.py +3 -1
- angr/analyses/decompiler/ssailification/traversal_engine.py +35 -8
- angr/analyses/decompiler/ssailification/traversal_state.py +1 -0
- angr/analyses/decompiler/structured_codegen/c.py +42 -4
- angr/analyses/decompiler/structuring/phoenix.py +3 -0
- angr/analyses/patchfinder.py +137 -0
- angr/analyses/pathfinder.py +282 -0
- angr/analyses/propagator/engine_ail.py +10 -3
- angr/analyses/reaching_definitions/engine_ail.py +10 -15
- angr/analyses/s_propagator.py +16 -9
- angr/analyses/s_reaching_definitions/s_rda_view.py +127 -63
- angr/analyses/smc.py +159 -0
- angr/analyses/variable_recovery/engine_ail.py +14 -0
- angr/analyses/variable_recovery/engine_base.py +11 -0
- angr/angrdb/models.py +1 -2
- angr/engines/light/engine.py +12 -0
- angr/engines/vex/heavy/heavy.py +2 -0
- angr/exploration_techniques/spiller_db.py +1 -2
- angr/knowledge_plugins/__init__.py +2 -0
- angr/knowledge_plugins/decompilation.py +45 -0
- angr/knowledge_plugins/functions/function.py +4 -0
- angr/knowledge_plugins/functions/function_manager.py +18 -9
- angr/knowledge_plugins/functions/function_parser.py +1 -1
- angr/knowledge_plugins/functions/soot_function.py +1 -0
- angr/knowledge_plugins/key_definitions/atoms.py +8 -0
- angr/misc/ux.py +2 -2
- angr/procedures/definitions/parse_win32json.py +2 -1
- angr/project.py +17 -1
- angr/state_plugins/history.py +6 -4
- angr/storage/memory_mixins/actions_mixin.py +7 -7
- angr/storage/memory_mixins/address_concretization_mixin.py +5 -5
- angr/storage/memory_mixins/bvv_conversion_mixin.py +1 -1
- angr/storage/memory_mixins/clouseau_mixin.py +3 -3
- angr/storage/memory_mixins/conditional_store_mixin.py +3 -3
- angr/storage/memory_mixins/default_filler_mixin.py +3 -3
- angr/storage/memory_mixins/memory_mixin.py +45 -34
- angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +15 -14
- angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +27 -16
- angr/storage/memory_mixins/paged_memory/pages/cooperation.py +18 -9
- angr/storage/memory_mixins/paged_memory/pages/ispo_mixin.py +5 -5
- angr/storage/memory_mixins/paged_memory/pages/multi_values.py +89 -55
- angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +16 -25
- angr/storage/memory_mixins/paged_memory/pages/permissions_mixin.py +11 -9
- angr/storage/memory_mixins/paged_memory/pages/ultra_page.py +23 -7
- angr/storage/memory_mixins/paged_memory/privileged_mixin.py +1 -1
- angr/storage/memory_mixins/regioned_memory/region_meta_mixin.py +9 -7
- angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +9 -9
- angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +1 -0
- angr/storage/memory_mixins/simple_interface_mixin.py +2 -2
- angr/storage/memory_mixins/simplification_mixin.py +2 -2
- angr/storage/memory_mixins/size_resolution_mixin.py +1 -1
- angr/storage/memory_mixins/slotted_memory.py +3 -3
- angr/storage/memory_mixins/smart_find_mixin.py +1 -0
- angr/storage/memory_mixins/underconstrained_mixin.py +5 -5
- angr/storage/memory_mixins/unwrapper_mixin.py +4 -4
- angr/storage/memory_object.py +4 -3
- angr/utils/bits.py +4 -0
- angr/utils/constants.py +1 -1
- angr/utils/graph.py +15 -0
- angr/utils/tagged_interval_map.py +112 -0
- angr/vaults.py +2 -2
- {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/METADATA +6 -6
- {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/RECORD +103 -96
- {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/WHEEL +1 -1
- {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/LICENSE +0 -0
- {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/entry_points.txt +0 -0
- {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
from collections import OrderedDict
|
|
3
3
|
|
|
4
4
|
from ailment.statement import Assignment, Call, Store, ConditionalJump
|
|
5
|
-
from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression
|
|
5
|
+
from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression, Tmp, DirtyExpression
|
|
6
6
|
|
|
7
7
|
from angr.engines.light import SimEngineLight, SimEngineLightAILMixin
|
|
8
8
|
from angr.utils.ssa import get_reg_offset_base
|
|
@@ -21,7 +21,14 @@ class SimEngineSSATraversal(
|
|
|
21
21
|
state: TraversalState
|
|
22
22
|
|
|
23
23
|
def __init__(
|
|
24
|
-
self,
|
|
24
|
+
self,
|
|
25
|
+
arch,
|
|
26
|
+
sp_tracker=None,
|
|
27
|
+
bp_as_gpr: bool = False,
|
|
28
|
+
def_to_loc=None,
|
|
29
|
+
loc_to_defs=None,
|
|
30
|
+
stackvars: bool = False,
|
|
31
|
+
tmps: bool = False,
|
|
25
32
|
):
|
|
26
33
|
super().__init__()
|
|
27
34
|
|
|
@@ -29,14 +36,15 @@ class SimEngineSSATraversal(
|
|
|
29
36
|
self.sp_tracker = sp_tracker
|
|
30
37
|
self.bp_as_gpr = bp_as_gpr
|
|
31
38
|
self.stackvars = stackvars
|
|
39
|
+
self.tmps = tmps
|
|
32
40
|
|
|
33
|
-
self.def_to_loc = def_to_loc if def_to_loc is not None else
|
|
41
|
+
self.def_to_loc = def_to_loc if def_to_loc is not None else []
|
|
34
42
|
self.loc_to_defs = loc_to_defs if loc_to_defs is not None else OrderedDict()
|
|
35
43
|
|
|
36
44
|
def _handle_Assignment(self, stmt: Assignment):
|
|
37
45
|
if isinstance(stmt.dst, Register):
|
|
38
46
|
codeloc = self._codeloc()
|
|
39
|
-
self.def_to_loc
|
|
47
|
+
self.def_to_loc.append((stmt.dst, codeloc))
|
|
40
48
|
if codeloc not in self.loc_to_defs:
|
|
41
49
|
self.loc_to_defs[codeloc] = OrderedSet()
|
|
42
50
|
self.loc_to_defs[codeloc].add(stmt.dst)
|
|
@@ -52,7 +60,7 @@ class SimEngineSSATraversal(
|
|
|
52
60
|
|
|
53
61
|
if self.stackvars and isinstance(stmt.addr, StackBaseOffset) and isinstance(stmt.addr.offset, int):
|
|
54
62
|
codeloc = self._codeloc()
|
|
55
|
-
self.def_to_loc
|
|
63
|
+
self.def_to_loc.append((stmt, codeloc))
|
|
56
64
|
if codeloc not in self.loc_to_defs:
|
|
57
65
|
self.loc_to_defs[codeloc] = OrderedSet()
|
|
58
66
|
self.loc_to_defs[codeloc].add(stmt)
|
|
@@ -69,7 +77,7 @@ class SimEngineSSATraversal(
|
|
|
69
77
|
def _handle_Call(self, stmt: Call):
|
|
70
78
|
if stmt.ret_expr is not None and isinstance(stmt.ret_expr, Register):
|
|
71
79
|
codeloc = self._codeloc()
|
|
72
|
-
self.def_to_loc
|
|
80
|
+
self.def_to_loc.append((stmt.ret_expr, codeloc))
|
|
73
81
|
if codeloc not in self.loc_to_defs:
|
|
74
82
|
self.loc_to_defs[codeloc] = OrderedSet()
|
|
75
83
|
self.loc_to_defs[codeloc].add(stmt.ret_expr)
|
|
@@ -79,18 +87,30 @@ class SimEngineSSATraversal(
|
|
|
79
87
|
|
|
80
88
|
super()._ail_handle_Call(stmt)
|
|
81
89
|
|
|
90
|
+
_handle_CallExpr = _handle_Call
|
|
91
|
+
|
|
82
92
|
def _handle_Register(self, expr: Register):
|
|
83
93
|
base_offset = get_reg_offset_base(expr.reg_offset, self.arch)
|
|
84
94
|
|
|
85
95
|
if base_offset not in self.state.live_registers:
|
|
86
96
|
codeloc = self._codeloc()
|
|
87
|
-
self.def_to_loc
|
|
97
|
+
self.def_to_loc.append((expr, codeloc))
|
|
88
98
|
if codeloc not in self.loc_to_defs:
|
|
89
99
|
self.loc_to_defs[codeloc] = OrderedSet()
|
|
90
100
|
self.loc_to_defs[codeloc].add(expr)
|
|
91
101
|
|
|
92
102
|
self.state.live_registers.add(base_offset)
|
|
93
103
|
|
|
104
|
+
def _handle_Tmp(self, expr: Tmp):
|
|
105
|
+
if self.tmps:
|
|
106
|
+
codeloc = self._codeloc()
|
|
107
|
+
self.def_to_loc.append((expr, codeloc))
|
|
108
|
+
if codeloc not in self.loc_to_defs:
|
|
109
|
+
self.loc_to_defs[codeloc] = OrderedSet()
|
|
110
|
+
self.loc_to_defs[codeloc].add(expr)
|
|
111
|
+
|
|
112
|
+
self.state.live_tmps.add(expr.tmp_idx)
|
|
113
|
+
|
|
94
114
|
def _handle_Cmp(self, expr: BinaryOp):
|
|
95
115
|
self._expr(expr.operands[0])
|
|
96
116
|
self._expr(expr.operands[1])
|
|
@@ -123,9 +143,16 @@ class SimEngineSSATraversal(
|
|
|
123
143
|
for operand in expr.operands:
|
|
124
144
|
self._expr(operand)
|
|
125
145
|
|
|
146
|
+
def _handle_DirtyExpression(self, expr: DirtyExpression):
|
|
147
|
+
for operand in expr.operands:
|
|
148
|
+
self._expr(operand)
|
|
149
|
+
if expr.guard is not None:
|
|
150
|
+
self._expr(expr.guard)
|
|
151
|
+
if expr.maddr is not None:
|
|
152
|
+
self._expr(expr.maddr)
|
|
153
|
+
|
|
126
154
|
def _handle_Dummy(self, expr):
|
|
127
155
|
pass
|
|
128
156
|
|
|
129
157
|
_handle_VirtualVariable = _handle_Dummy
|
|
130
158
|
_handle_Phi = _handle_Dummy
|
|
131
|
-
_handle_DirtyExpression = _handle_Dummy
|
|
@@ -18,6 +18,7 @@ class TraversalState:
|
|
|
18
18
|
|
|
19
19
|
self.live_registers: set[int] = set() if live_registers is None else live_registers
|
|
20
20
|
self.live_stackvars: set[tuple[int, int]] = set() if live_stackvars is None else live_stackvars
|
|
21
|
+
self.live_tmps: set[int] = set() # tmps are internal to a block only and never propagated from another state
|
|
21
22
|
|
|
22
23
|
def copy(self) -> TraversalState:
|
|
23
24
|
return TraversalState(
|
|
@@ -1408,7 +1408,7 @@ class CUnsupportedStatement(CStatement):
|
|
|
1408
1408
|
class CDirtyStatement(CExpression):
|
|
1409
1409
|
__slots__ = ("dirty",)
|
|
1410
1410
|
|
|
1411
|
-
def __init__(self, dirty, **kwargs):
|
|
1411
|
+
def __init__(self, dirty: CDirtyExpression, **kwargs):
|
|
1412
1412
|
super().__init__(**kwargs)
|
|
1413
1413
|
self.dirty = dirty
|
|
1414
1414
|
|
|
@@ -1420,7 +1420,7 @@ class CDirtyStatement(CExpression):
|
|
|
1420
1420
|
indent_str = self.indent_str(indent=indent)
|
|
1421
1421
|
|
|
1422
1422
|
yield indent_str, None
|
|
1423
|
-
yield
|
|
1423
|
+
yield from self.dirty.c_repr_chunks()
|
|
1424
1424
|
yield "\n", None
|
|
1425
1425
|
|
|
1426
1426
|
|
|
@@ -2303,6 +2303,38 @@ class CMultiStatementExpression(CExpression):
|
|
|
2303
2303
|
yield ")", paren
|
|
2304
2304
|
|
|
2305
2305
|
|
|
2306
|
+
class CVEXCCallExpression(CExpression):
|
|
2307
|
+
"""
|
|
2308
|
+
ccall_name(arg0, arg1, ...)
|
|
2309
|
+
"""
|
|
2310
|
+
|
|
2311
|
+
__slots__ = (
|
|
2312
|
+
"callee",
|
|
2313
|
+
"operands",
|
|
2314
|
+
"tags",
|
|
2315
|
+
)
|
|
2316
|
+
|
|
2317
|
+
def __init__(self, callee: str, operands: list[CExpression], tags=None, **kwargs):
|
|
2318
|
+
super().__init__(**kwargs)
|
|
2319
|
+
self.callee = callee
|
|
2320
|
+
self.operands = operands
|
|
2321
|
+
self.tags = tags
|
|
2322
|
+
|
|
2323
|
+
@property
|
|
2324
|
+
def type(self):
|
|
2325
|
+
return SimTypeInt().with_arch(self.codegen.project.arch)
|
|
2326
|
+
|
|
2327
|
+
def c_repr_chunks(self, indent=0, asexpr=False):
|
|
2328
|
+
paren = CClosingObject("(")
|
|
2329
|
+
yield f"{self.callee}", self
|
|
2330
|
+
yield "(", paren
|
|
2331
|
+
for idx, operand in enumerate(self.operands):
|
|
2332
|
+
if idx != 0:
|
|
2333
|
+
yield ", ", None
|
|
2334
|
+
yield from operand.c_repr_chunks()
|
|
2335
|
+
yield ")", paren
|
|
2336
|
+
|
|
2337
|
+
|
|
2306
2338
|
class CDirtyExpression(CExpression):
|
|
2307
2339
|
"""
|
|
2308
2340
|
Ideally all dirty expressions should be handled and converted to proper conversions during conversion from VEX to
|
|
@@ -2424,6 +2456,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2424
2456
|
Expr.BinaryOp: self._handle_Expr_BinaryOp,
|
|
2425
2457
|
Expr.Convert: self._handle_Expr_Convert,
|
|
2426
2458
|
Expr.StackBaseOffset: self._handle_Expr_StackBaseOffset,
|
|
2459
|
+
Expr.VEXCCallExpression: self._handle_Expr_VEXCCallExpression,
|
|
2427
2460
|
Expr.DirtyExpression: self._handle_Expr_Dirty,
|
|
2428
2461
|
Expr.ITE: self._handle_Expr_ITE,
|
|
2429
2462
|
Expr.Reinterpret: self._handle_Reinterpret,
|
|
@@ -3318,7 +3351,8 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3318
3351
|
return clabel
|
|
3319
3352
|
|
|
3320
3353
|
def _handle_Stmt_Dirty(self, stmt: Stmt.DirtyStatement, **kwargs):
|
|
3321
|
-
|
|
3354
|
+
dirty = self._handle(stmt.dirty)
|
|
3355
|
+
return CDirtyStatement(dirty, codegen=self)
|
|
3322
3356
|
|
|
3323
3357
|
#
|
|
3324
3358
|
# AIL expression handlers
|
|
@@ -3519,7 +3553,11 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3519
3553
|
|
|
3520
3554
|
return CTypeCast(None, dst_type.with_arch(self.project.arch), child, tags=expr.tags, codegen=self)
|
|
3521
3555
|
|
|
3522
|
-
def
|
|
3556
|
+
def _handle_Expr_VEXCCallExpression(self, expr: Expr.VEXCCallExpression, **kwargs):
|
|
3557
|
+
operands = [self._handle(arg) for arg in expr.operands]
|
|
3558
|
+
return CVEXCCallExpression(expr.callee, operands, tags=expr.tags, codegen=self)
|
|
3559
|
+
|
|
3560
|
+
def _handle_Expr_Dirty(self, expr: Expr.DirtyExpression, **kwargs):
|
|
3523
3561
|
return CDirtyExpression(expr, codegen=self)
|
|
3524
3562
|
|
|
3525
3563
|
def _handle_Expr_ITE(self, expr: Expr.ITE, **kwargs):
|
|
@@ -566,6 +566,9 @@ class PhoenixStructurer(StructurerBase):
|
|
|
566
566
|
|
|
567
567
|
if next_node is node:
|
|
568
568
|
break
|
|
569
|
+
if next_node is head:
|
|
570
|
+
# we don't want a loop with region head not as the first node of the body!
|
|
571
|
+
return False, None
|
|
569
572
|
if next_node is not node and next_node in seen_nodes:
|
|
570
573
|
return False, None
|
|
571
574
|
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# pylint:disable=missing-class-docstring
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from sortedcontainers import SortedDict
|
|
9
|
+
|
|
10
|
+
from angr.analyses import Analysis, AnalysesHub
|
|
11
|
+
from angr.utils.bits import ffs
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from angr.knowledge_plugins import Function
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
log = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class OverlappingFunctionsAnalysis(Analysis):
|
|
21
|
+
"""
|
|
22
|
+
Identify functions with interleaved blocks.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
overlapping_functions: dict[int, list[int]]
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
self.overlapping_functions = defaultdict(list)
|
|
29
|
+
addr_to_func_max_addr = SortedDict()
|
|
30
|
+
|
|
31
|
+
for func in self.project.kb.functions.values():
|
|
32
|
+
if func.is_alignment:
|
|
33
|
+
continue
|
|
34
|
+
func_max_addr = max((block.addr + block.size) for block in func.blocks)
|
|
35
|
+
addr_to_func_max_addr[func.addr] = (func, func_max_addr)
|
|
36
|
+
|
|
37
|
+
for idx, (addr, (func, max_addr)) in enumerate(addr_to_func_max_addr.items()):
|
|
38
|
+
for other_addr in addr_to_func_max_addr.islice(idx + 1):
|
|
39
|
+
if other_addr >= max_addr:
|
|
40
|
+
break
|
|
41
|
+
|
|
42
|
+
self.overlapping_functions[addr].append(other_addr)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class FunctionAlignmentAnalysis(Analysis):
|
|
46
|
+
"""
|
|
47
|
+
Determine typical function alignment
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
alignment: int | None
|
|
51
|
+
|
|
52
|
+
def __init__(self):
|
|
53
|
+
self.alignment = None
|
|
54
|
+
|
|
55
|
+
if len(self.project.kb.functions) == 0:
|
|
56
|
+
if self.project.kb.cfgs.get_most_accurate() is None:
|
|
57
|
+
log.warning("Please run CFGFast analysis first, to identify functions")
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
alignment_bins = defaultdict(int)
|
|
61
|
+
count = 0
|
|
62
|
+
for func in self.project.kb.functions.values():
|
|
63
|
+
if not (func.is_alignment or func.is_plt or func.is_simprocedure):
|
|
64
|
+
alignment_bins[ffs(func.addr)] += 1
|
|
65
|
+
count += 1
|
|
66
|
+
|
|
67
|
+
# FIXME: Higher alignment values will be naturally aligned
|
|
68
|
+
|
|
69
|
+
typical_alignment = max(alignment_bins, key=lambda k: alignment_bins[k])
|
|
70
|
+
if count > 10 and alignment_bins[typical_alignment] >= count / 4: # XXX: cutoff
|
|
71
|
+
self.alignment = 1 << max(typical_alignment, 0)
|
|
72
|
+
log.debug("Function alignment appears to be %d bytes", self.alignment)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class AtypicallyAlignedFunction:
|
|
77
|
+
function: Function
|
|
78
|
+
expected_alignment: int
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class PatchedOutFunctionality:
|
|
83
|
+
patched_function: Function
|
|
84
|
+
patched_out_function: Function
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class PatchFinderAnalysis(Analysis):
|
|
88
|
+
"""
|
|
89
|
+
Looks for binary patches using some basic heuristics:
|
|
90
|
+
- Looking for interleaved functions
|
|
91
|
+
- Looking for unaligned functions
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
# FIXME: Possible additional heuristics:
|
|
95
|
+
# - Jumps out to end of function, then back
|
|
96
|
+
# - Looking for patch jumps, e.g. push <addr>; ret
|
|
97
|
+
# - Looking for instruction partials broken by a patch (nodecode)
|
|
98
|
+
# - Unusual stack manipulation
|
|
99
|
+
|
|
100
|
+
atypical_alignments: list[Function]
|
|
101
|
+
possibly_patched_out: list[PatchedOutFunctionality]
|
|
102
|
+
|
|
103
|
+
def __init__(self):
|
|
104
|
+
self.atypical_alignments = []
|
|
105
|
+
self.possibly_patched_out = []
|
|
106
|
+
|
|
107
|
+
if len(self.project.kb.functions) == 0:
|
|
108
|
+
if self.project.kb.cfgs.get_most_accurate() is None:
|
|
109
|
+
log.warning("Please run CFGFast analysis first, to identify functions")
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
# In CFGFast with scanning enabled, a function may be created from unreachable blocks within another function.
|
|
113
|
+
# Search for interleaved/overlapping functions to identify possible patches.
|
|
114
|
+
overlapping_functions = self.project.analyses.OverlappingFunctions().overlapping_functions
|
|
115
|
+
for addr, overlapping_func_addrs in overlapping_functions.items():
|
|
116
|
+
func = self.project.kb.functions[addr]
|
|
117
|
+
|
|
118
|
+
# Are the overlapping functions reachable?
|
|
119
|
+
for overlapping_addr in overlapping_func_addrs:
|
|
120
|
+
overlapping_func = self.project.kb.functions[overlapping_addr]
|
|
121
|
+
if self.project.kb.callgraph.in_degree(overlapping_addr) == 0:
|
|
122
|
+
self.possibly_patched_out.append(PatchedOutFunctionality(func, overlapping_func))
|
|
123
|
+
# FIXME: What does the patch do?
|
|
124
|
+
|
|
125
|
+
# Look for unaligned functions
|
|
126
|
+
expected_alignment = self.project.analyses.FunctionAlignment().alignment
|
|
127
|
+
if expected_alignment is not None and expected_alignment > self.project.arch.instruction_alignment:
|
|
128
|
+
for func in self.project.kb.functions.values():
|
|
129
|
+
if not (func.is_alignment or func.is_plt or func.is_simprocedure) and func.addr & (
|
|
130
|
+
expected_alignment - 1
|
|
131
|
+
):
|
|
132
|
+
self.atypical_alignments.append(AtypicallyAlignedFunction(func, expected_alignment))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
AnalysesHub.register_default("OverlappingFunctions", OverlappingFunctionsAnalysis)
|
|
136
|
+
AnalysesHub.register_default("FunctionAlignment", FunctionAlignmentAnalysis)
|
|
137
|
+
AnalysesHub.register_default("PatchFinder", PatchFinderAnalysis)
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
# pylint:disable=missing-class-docstring
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from enum import Enum, auto
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from weakref import ref
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
|
|
8
|
+
from networkx import DiGraph
|
|
9
|
+
from networkx.algorithms.shortest_paths import single_target_shortest_path_length
|
|
10
|
+
|
|
11
|
+
from angr.sim_state import SimState
|
|
12
|
+
from angr.engines.successors import SimSuccessors
|
|
13
|
+
from angr.knowledge_plugins.cfg import CFGModel, CFGNode
|
|
14
|
+
from .analysis import Analysis, AnalysesHub
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Unreachable(Exception):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(eq=False)
|
|
22
|
+
class SimStateMarker:
|
|
23
|
+
addr: int
|
|
24
|
+
parent: SimStateMarker | None = None
|
|
25
|
+
banned: bool = False
|
|
26
|
+
misses: int = 0
|
|
27
|
+
|
|
28
|
+
def __repr__(self):
|
|
29
|
+
inner_repr = "None" if self.parent is None else "..."
|
|
30
|
+
return f"SimStateMarker(addr={self.addr:#x}, parent={inner_repr}, banned={self.banned}, misses={self.misses})"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SuccessorsKind(Enum):
|
|
34
|
+
SAT = auto()
|
|
35
|
+
UNSAT = auto()
|
|
36
|
+
MISSING = auto()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class TestPathReport:
|
|
41
|
+
path_markers: dict[int, SimStateMarker]
|
|
42
|
+
termination: SuccessorsKind
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def nilref():
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Pathfinder(Analysis):
|
|
50
|
+
def __init__(self, start_state: SimState, goal_addr: int, cfg: CFGModel, cache_size=10000):
|
|
51
|
+
self.start_state = start_state
|
|
52
|
+
self.goal_addr = goal_addr
|
|
53
|
+
self.goal_state: SimState | None = None
|
|
54
|
+
self.cfg = cfg
|
|
55
|
+
self.cache_size = cache_size
|
|
56
|
+
|
|
57
|
+
# HACK HACK HACK HACK TODO FIXME FISH PLEASE GET RID OF THIS
|
|
58
|
+
extra_edges = []
|
|
59
|
+
for node in self.cfg.graph.nodes:
|
|
60
|
+
if node.is_syscall:
|
|
61
|
+
for pred in self.cfg.graph.pred[node]:
|
|
62
|
+
for succ, data in self.cfg.graph.succ[pred].items():
|
|
63
|
+
if data["jumpkind"] == "Ijk_FakeRet":
|
|
64
|
+
extra_edges.append((node, succ))
|
|
65
|
+
for node, succ in extra_edges:
|
|
66
|
+
self.cfg.graph.add_edge(node, succ, jumpkind="Ijk_Ret")
|
|
67
|
+
|
|
68
|
+
goal_node = self.cfg.get_any_node(goal_addr)
|
|
69
|
+
if goal_node is None:
|
|
70
|
+
raise ValueError(f"Node {goal_addr:#x} is not in graph")
|
|
71
|
+
|
|
72
|
+
self.start_marker = SimStateMarker(start_state.addr)
|
|
73
|
+
self.transition_cache: DiGraph[SimStateMarker] = DiGraph()
|
|
74
|
+
self.transition_cache.add_node(self.start_marker, state=ref(start_state))
|
|
75
|
+
self.base_heuristic: dict[int, int] = {
|
|
76
|
+
node.addr: dist for node, dist in single_target_shortest_path_length(cfg.graph, goal_node)
|
|
77
|
+
}
|
|
78
|
+
self.state_cache = {}
|
|
79
|
+
self.unsat_markers = set()
|
|
80
|
+
self.extra_weight = defaultdict(int)
|
|
81
|
+
|
|
82
|
+
self._search_frontier_marker = self.start_marker
|
|
83
|
+
self._search_path: list[tuple[int, str]] = [(self.start_marker.addr, "Ijk_Boring")]
|
|
84
|
+
self._search_stack = []
|
|
85
|
+
self._search_backtrack_to = {self.start_marker}
|
|
86
|
+
self._search_address_backtrack_points = {self.start_marker.addr: self.start_marker}
|
|
87
|
+
|
|
88
|
+
def cache_state(self, state: SimState):
|
|
89
|
+
self.state_cache[state] = self.state_cache.pop(state, None)
|
|
90
|
+
if len(self.state_cache) > self.cache_size:
|
|
91
|
+
self.state_cache.pop(next(iter(self.state_cache)))
|
|
92
|
+
|
|
93
|
+
def marker_to_state(self, marker: SimStateMarker) -> SimState | None:
|
|
94
|
+
return self.transition_cache.nodes[marker]["state"]()
|
|
95
|
+
|
|
96
|
+
def analyze(self) -> bool:
|
|
97
|
+
while True:
|
|
98
|
+
search_path = self.find_best_hypothesis_path()
|
|
99
|
+
result = self.test_path(search_path)
|
|
100
|
+
if result.termination == SuccessorsKind.SAT:
|
|
101
|
+
self.goal_state = self.marker_to_state(result.path_markers[len(search_path) - 1])
|
|
102
|
+
return True
|
|
103
|
+
marker = result.path_markers[max(result.path_markers)]
|
|
104
|
+
marker.banned = True
|
|
105
|
+
self._search_backtrack_to.add(marker)
|
|
106
|
+
if result.termination == SuccessorsKind.UNSAT:
|
|
107
|
+
self.unsat_markers.add(marker)
|
|
108
|
+
|
|
109
|
+
def _search_backtrack(self):
|
|
110
|
+
if self._search_address_backtrack_points[self._search_frontier_marker.addr] is self._search_frontier_marker:
|
|
111
|
+
self._search_address_backtrack_points.pop(self._search_frontier_marker.addr)
|
|
112
|
+
|
|
113
|
+
self._search_frontier_marker = self._search_frontier_marker.parent
|
|
114
|
+
if self._search_frontier_marker is None:
|
|
115
|
+
raise Unreachable
|
|
116
|
+
|
|
117
|
+
addr, jumpkind = self._search_path.pop()
|
|
118
|
+
if jumpkind == "Ijk_Ret":
|
|
119
|
+
self._search_stack.append(addr)
|
|
120
|
+
elif jumpkind == "Ijk_Call" or jumpkind.startswith("Ijk_Sys"):
|
|
121
|
+
self._search_stack.pop()
|
|
122
|
+
|
|
123
|
+
def find_best_hypothesis_path(self) -> tuple[int, ...]:
|
|
124
|
+
assert self._search_backtrack_to, "Uhh every iteration should set at least one backtrack point"
|
|
125
|
+
if self.start_marker in self._search_backtrack_to:
|
|
126
|
+
self._search_frontier_marker = self.start_marker
|
|
127
|
+
self._search_path: list[tuple[int, str]] = [(self.start_marker.addr, "Ijk_Boring")]
|
|
128
|
+
self._search_stack = []
|
|
129
|
+
self._search_backtrack_to = set()
|
|
130
|
+
else:
|
|
131
|
+
while self._search_backtrack_to:
|
|
132
|
+
self._search_backtrack_to.discard(self._search_frontier_marker)
|
|
133
|
+
try:
|
|
134
|
+
self._search_backtrack()
|
|
135
|
+
except Unreachable as e:
|
|
136
|
+
raise RuntimeError("oops") from e
|
|
137
|
+
|
|
138
|
+
while self._search_path[-1][0] != self.goal_addr:
|
|
139
|
+
banned = {
|
|
140
|
+
marker.addr for marker in self.transition_cache.succ[self._search_frontier_marker] if marker.banned
|
|
141
|
+
}
|
|
142
|
+
current_node = self.cfg.get_any_node(self._search_path[-1][0])
|
|
143
|
+
options = [
|
|
144
|
+
(node, data["jumpkind"], self.base_heuristic[node.addr] + self.extra_weight[node.addr])
|
|
145
|
+
for node, data in self.cfg.graph.succ[current_node].items()
|
|
146
|
+
if data["jumpkind"] != "Ijk_FakeRet"
|
|
147
|
+
and node.addr not in banned
|
|
148
|
+
and node.addr in self.base_heuristic
|
|
149
|
+
and (data["jumpkind"] != "Ijk_Ret" or node.addr == self._search_stack[-1])
|
|
150
|
+
]
|
|
151
|
+
if not options:
|
|
152
|
+
# backtrack
|
|
153
|
+
self._search_frontier_marker.banned = True
|
|
154
|
+
self._search_backtrack()
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
best_node, best_jumpkind, best_weight = min(
|
|
158
|
+
options,
|
|
159
|
+
default=(None, None),
|
|
160
|
+
key=lambda xyz: xyz[2],
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
assert isinstance(best_jumpkind, str)
|
|
164
|
+
assert isinstance(best_node, CFGNode)
|
|
165
|
+
self.extra_weight[best_node.addr] += 1
|
|
166
|
+
self._search_path.append((best_node.addr, best_jumpkind))
|
|
167
|
+
|
|
168
|
+
if best_jumpkind == "Ijk_Call" or best_jumpkind.startswith("Ijk_Sys"):
|
|
169
|
+
self._search_stack.append(
|
|
170
|
+
next(
|
|
171
|
+
iter(
|
|
172
|
+
node.addr
|
|
173
|
+
for node, data in self.cfg.graph.succ[current_node].items()
|
|
174
|
+
if data["jumpkind"] == "Ijk_FakeRet"
|
|
175
|
+
),
|
|
176
|
+
None,
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
elif best_jumpkind == "Ijk_Ret":
|
|
180
|
+
self._search_stack.pop()
|
|
181
|
+
|
|
182
|
+
frontier_marker_nullable = next(
|
|
183
|
+
(
|
|
184
|
+
marker
|
|
185
|
+
for marker in self.transition_cache.succ[self._search_frontier_marker]
|
|
186
|
+
if marker.addr == best_node.addr
|
|
187
|
+
),
|
|
188
|
+
None,
|
|
189
|
+
)
|
|
190
|
+
if frontier_marker_nullable is None:
|
|
191
|
+
new_marker = SimStateMarker(best_node.addr, self._search_frontier_marker)
|
|
192
|
+
self.transition_cache.add_node(new_marker, state=nilref)
|
|
193
|
+
self.transition_cache.add_edge(self._search_frontier_marker, new_marker)
|
|
194
|
+
self._search_frontier_marker = new_marker
|
|
195
|
+
else:
|
|
196
|
+
self._search_frontier_marker = frontier_marker_nullable
|
|
197
|
+
|
|
198
|
+
if self._search_frontier_marker.addr not in self._search_address_backtrack_points:
|
|
199
|
+
self._search_address_backtrack_points[self._search_frontier_marker.addr] = self._search_frontier_marker
|
|
200
|
+
|
|
201
|
+
# TODO does this go above the above stanza?
|
|
202
|
+
if sum(weight == best_weight for _, _, weight in options) != 1:
|
|
203
|
+
self._search_backtrack_to.add(self._search_address_backtrack_points[self._search_frontier_marker.addr])
|
|
204
|
+
|
|
205
|
+
return tuple(addr for addr, _ in self._search_path)
|
|
206
|
+
|
|
207
|
+
def diagnose_unsat(self, state: SimState):
|
|
208
|
+
pass
|
|
209
|
+
|
|
210
|
+
def test_path(self, bbl_addr_trace: tuple[int, ...]) -> TestPathReport:
|
|
211
|
+
assert bbl_addr_trace[0] == self.start_marker.addr, "Paths must begin with the start state"
|
|
212
|
+
|
|
213
|
+
known_markers = [self.start_marker]
|
|
214
|
+
for addr in bbl_addr_trace[1:]:
|
|
215
|
+
for succ in self.transition_cache.succ[known_markers[-1]]:
|
|
216
|
+
if succ.addr == addr:
|
|
217
|
+
break
|
|
218
|
+
else:
|
|
219
|
+
break
|
|
220
|
+
known_markers.append(succ)
|
|
221
|
+
|
|
222
|
+
marker = None
|
|
223
|
+
for ri, marker_ in enumerate(reversed(known_markers)):
|
|
224
|
+
i = len(known_markers) - 1 - ri
|
|
225
|
+
state: SimState = self.transition_cache.nodes[marker_]["state"]()
|
|
226
|
+
marker = marker_
|
|
227
|
+
if state is not None:
|
|
228
|
+
break
|
|
229
|
+
else:
|
|
230
|
+
assert False, "The first item in known_markers should always have a resolvable weakref"
|
|
231
|
+
|
|
232
|
+
while i != len(bbl_addr_trace) - 1:
|
|
233
|
+
assert state.addr == bbl_addr_trace[i]
|
|
234
|
+
|
|
235
|
+
marker.misses += 1
|
|
236
|
+
successors = state.step(strict_block_end=True)
|
|
237
|
+
succ, kind = find_successor(successors, bbl_addr_trace[i + 1])
|
|
238
|
+
|
|
239
|
+
# cache state
|
|
240
|
+
if i + 1 < len(known_markers):
|
|
241
|
+
succ_marker = known_markers[i + 1]
|
|
242
|
+
else:
|
|
243
|
+
succ_marker = SimStateMarker(bbl_addr_trace[i + 1], parent=marker)
|
|
244
|
+
self.transition_cache.add_node(succ_marker)
|
|
245
|
+
self.transition_cache.add_edge(marker, succ_marker)
|
|
246
|
+
self.transition_cache.nodes[succ_marker]["state"] = ref(succ) if succ is not None else nilref
|
|
247
|
+
if succ is not None:
|
|
248
|
+
self.cache_state(succ)
|
|
249
|
+
|
|
250
|
+
if kind == SuccessorsKind.SAT:
|
|
251
|
+
assert succ is not None
|
|
252
|
+
state = succ
|
|
253
|
+
marker = succ_marker
|
|
254
|
+
i += 1
|
|
255
|
+
continue
|
|
256
|
+
if kind == SuccessorsKind.UNSAT:
|
|
257
|
+
assert succ is not None
|
|
258
|
+
return TestPathReport(
|
|
259
|
+
path_markers={i: marker, i + 1: succ_marker},
|
|
260
|
+
termination=SuccessorsKind.UNSAT,
|
|
261
|
+
)
|
|
262
|
+
return TestPathReport(path_markers={i: marker, i + 1: succ_marker}, termination=SuccessorsKind.MISSING)
|
|
263
|
+
|
|
264
|
+
return TestPathReport(path_markers={i: marker}, termination=SuccessorsKind.SAT)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def find_successor(successors: SimSuccessors, target_addr: int) -> tuple[SimState | None, SuccessorsKind]:
|
|
268
|
+
for succ in successors.flat_successors:
|
|
269
|
+
if succ.addr == target_addr:
|
|
270
|
+
return succ, SuccessorsKind.SAT
|
|
271
|
+
for succ in successors.unsat_successors:
|
|
272
|
+
if succ.addr == target_addr:
|
|
273
|
+
return succ, SuccessorsKind.UNSAT
|
|
274
|
+
for succ in successors.unconstrained_successors:
|
|
275
|
+
succ2 = succ.copy()
|
|
276
|
+
succ2.add_constraints(succ2._ip == target_addr)
|
|
277
|
+
if succ2.satisfiable():
|
|
278
|
+
return succ2, SuccessorsKind.SAT
|
|
279
|
+
return None, SuccessorsKind.MISSING
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
AnalysesHub.register_default("Pathfinder", Pathfinder)
|
|
@@ -740,9 +740,16 @@ class SimEnginePropagatorAIL(
|
|
|
740
740
|
return PropValue.from_value_and_details(v, expr.size, expr, self._codeloc())
|
|
741
741
|
|
|
742
742
|
def _ail_handle_DirtyExpression(self, expr: Expr.DirtyExpression) -> PropValue | None: # pylint:disable=no-self-use
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
743
|
+
for operand in expr.operands:
|
|
744
|
+
_ = self._expr(operand)
|
|
745
|
+
|
|
746
|
+
return PropValue.from_value_and_details(self.state.top(expr.bits), expr.size, expr, self._codeloc())
|
|
747
|
+
|
|
748
|
+
def _ail_handle_VEXCCallExpression(
|
|
749
|
+
self, expr: Expr.VEXCCallExpression
|
|
750
|
+
) -> PropValue | None: # pylint:disable=no-self-use
|
|
751
|
+
for operand in expr.operands:
|
|
752
|
+
_ = self._expr(operand)
|
|
746
753
|
|
|
747
754
|
return PropValue.from_value_and_details(self.state.top(expr.bits), expr.size, expr, self._codeloc())
|
|
748
755
|
|