angr 9.2.142__py3-none-manylinux2014_aarch64.whl → 9.2.144__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +22 -10
- angr/analyses/calling_convention/fact_collector.py +72 -14
- angr/analyses/cfg/cfg_base.py +7 -2
- angr/analyses/cfg/cfg_emulated.py +13 -4
- angr/analyses/cfg/cfg_fast.py +21 -60
- angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
- angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
- angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
- angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -102
- angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
- angr/analyses/complete_calling_conventions.py +18 -5
- angr/analyses/decompiler/ail_simplifier.py +95 -65
- angr/analyses/decompiler/clinic.py +162 -68
- angr/analyses/decompiler/decompiler.py +4 -4
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +49 -14
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
- angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
- angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
- angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
- angr/analyses/decompiler/sequence_walker.py +8 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
- angr/analyses/decompiler/ssailification/ssailification.py +10 -2
- angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
- angr/analyses/decompiler/structured_codegen/c.py +25 -4
- angr/analyses/decompiler/utils.py +13 -0
- angr/analyses/disassembly.py +3 -3
- angr/analyses/fcp/fcp.py +1 -4
- angr/analyses/s_propagator.py +40 -29
- angr/analyses/s_reaching_definitions/s_rda_model.py +45 -36
- angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
- angr/analyses/s_reaching_definitions/s_reaching_definitions.py +41 -42
- angr/analyses/typehoon/dfa.py +13 -3
- angr/analyses/typehoon/typehoon.py +60 -18
- angr/analyses/typehoon/typevars.py +11 -7
- angr/analyses/variable_recovery/engine_ail.py +19 -23
- angr/analyses/variable_recovery/engine_base.py +26 -30
- angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
- angr/calling_conventions.py +18 -8
- angr/knowledge_plugins/functions/function.py +29 -15
- angr/knowledge_plugins/key_definitions/constants.py +2 -2
- angr/knowledge_plugins/key_definitions/liveness.py +4 -4
- angr/lib/angr_native.so +0 -0
- angr/procedures/definitions/linux_kernel.py +5 -0
- angr/state_plugins/unicorn_engine.py +24 -8
- angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
- angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
- angr/utils/doms.py +40 -33
- angr/utils/graph.py +26 -20
- angr/utils/ssa/__init__.py +21 -14
- angr/utils/ssa/vvar_uses_collector.py +2 -2
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/METADATA +11 -8
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/RECORD +61 -58
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/WHEEL +1 -1
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/LICENSE +0 -0
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/entry_points.txt +0 -0
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@ import networkx
|
|
|
12
12
|
import capstone
|
|
13
13
|
|
|
14
14
|
import ailment
|
|
15
|
+
from angr import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
|
|
15
16
|
|
|
16
17
|
from angr.errors import AngrDecompilationError
|
|
17
18
|
from angr.knowledge_base import KnowledgeBase
|
|
@@ -22,6 +23,7 @@ from angr.utils import timethis
|
|
|
22
23
|
from angr.utils.graph import GraphUtils
|
|
23
24
|
from angr.calling_conventions import SimRegArg, SimStackArg, SimFunctionArgument
|
|
24
25
|
from angr.sim_type import (
|
|
26
|
+
dereference_simtype,
|
|
25
27
|
SimTypeChar,
|
|
26
28
|
SimTypeInt,
|
|
27
29
|
SimTypeLongLong,
|
|
@@ -120,6 +122,7 @@ class Clinic(Analysis):
|
|
|
120
122
|
desired_variables: set[str] | None = None,
|
|
121
123
|
force_loop_single_exit: bool = True,
|
|
122
124
|
complete_successors: bool = False,
|
|
125
|
+
max_type_constraints: int = 750,
|
|
123
126
|
):
|
|
124
127
|
if not func.normalized and mode == ClinicMode.DECOMPILE:
|
|
125
128
|
raise ValueError("Decompilation must work on normalized function graphs.")
|
|
@@ -130,7 +133,7 @@ class Clinic(Analysis):
|
|
|
130
133
|
self.cc_graph: networkx.DiGraph | None = None
|
|
131
134
|
self.unoptimized_graph: networkx.DiGraph | None = None
|
|
132
135
|
self.arg_list = None
|
|
133
|
-
self.arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable,
|
|
136
|
+
self.arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None
|
|
134
137
|
self.variable_kb = variable_kb
|
|
135
138
|
self.externs: set[SimMemoryVariable] = set()
|
|
136
139
|
self.data_refs: dict[int, list[DataRefDesc]] = {} # data address to data reference description
|
|
@@ -153,6 +156,7 @@ class Clinic(Analysis):
|
|
|
153
156
|
self.reaching_definitions: ReachingDefinitionsAnalysis | None = None
|
|
154
157
|
self._cache = cache
|
|
155
158
|
self._mode = mode
|
|
159
|
+
self._max_type_constraints = max_type_constraints
|
|
156
160
|
self.vvar_id_start = vvar_id_start
|
|
157
161
|
self.vvar_to_vvar: dict[int, int] | None = None
|
|
158
162
|
# during SSA conversion, we create secondary stack variables because they overlap and are larger than the
|
|
@@ -304,10 +308,10 @@ class Clinic(Analysis):
|
|
|
304
308
|
self._update_progress(29.0, text="Recovering calling conventions (AIL mode)")
|
|
305
309
|
self._recover_calling_conventions(func_graph=ail_graph)
|
|
306
310
|
|
|
307
|
-
return ail_graph
|
|
311
|
+
return self._apply_callsite_prototype_and_calling_convention(ail_graph)
|
|
308
312
|
|
|
309
313
|
def _slice_variables(self, ail_graph):
|
|
310
|
-
assert self.variable_kb is not None
|
|
314
|
+
assert self.variable_kb is not None and self._desired_variables is not None
|
|
311
315
|
|
|
312
316
|
nodes_index = {(n.addr, n.idx): n for n in ail_graph.nodes()}
|
|
313
317
|
|
|
@@ -383,7 +387,7 @@ class Clinic(Analysis):
|
|
|
383
387
|
# replace the return statement with an assignment to the return register
|
|
384
388
|
blk.statements.pop(idx)
|
|
385
389
|
|
|
386
|
-
if stmt.ret_exprs:
|
|
390
|
+
if stmt.ret_exprs and self.project.arch.ret_offset is not None:
|
|
387
391
|
assign_to_retreg = ailment.Stmt.Assignment(
|
|
388
392
|
self._ail_manager.next_atom(),
|
|
389
393
|
ailment.Expr.Register(
|
|
@@ -430,14 +434,17 @@ class Clinic(Analysis):
|
|
|
430
434
|
if callee_clinic.arg_vvars:
|
|
431
435
|
for arg_idx in sorted(callee_clinic.arg_vvars.keys()):
|
|
432
436
|
param_vvar, reg_arg = callee_clinic.arg_vvars[arg_idx]
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
437
|
+
if isinstance(reg_arg, SimRegisterVariable):
|
|
438
|
+
reg_offset = reg_arg.reg
|
|
439
|
+
stmt = ailment.Stmt.Assignment(
|
|
440
|
+
self._ail_manager.next_atom(),
|
|
441
|
+
param_vvar,
|
|
442
|
+
ailment.Expr.Register(self._ail_manager.next_atom(), None, reg_offset, reg_arg.bits),
|
|
443
|
+
ins_addr=caller_block.addr + caller_block.original_size,
|
|
444
|
+
)
|
|
445
|
+
caller_block.statements.append(stmt)
|
|
446
|
+
else:
|
|
447
|
+
raise NotImplementedError("Unsupported parameter type")
|
|
441
448
|
|
|
442
449
|
ail_graph.add_edge(caller_block, callee_start)
|
|
443
450
|
|
|
@@ -669,6 +676,7 @@ class Clinic(Analysis):
|
|
|
669
676
|
self._convert_all()
|
|
670
677
|
|
|
671
678
|
# there must be at least one Load or one Store
|
|
679
|
+
assert self._blocks_by_addr_and_size is not None
|
|
672
680
|
found_load_or_store = False
|
|
673
681
|
for ail_block in self._blocks_by_addr_and_size.values():
|
|
674
682
|
for stmt in ail_block.statements:
|
|
@@ -726,7 +734,7 @@ class Clinic(Analysis):
|
|
|
726
734
|
self.arg_list = None
|
|
727
735
|
self.variable_kb = None
|
|
728
736
|
self.cc_graph = None
|
|
729
|
-
self.externs =
|
|
737
|
+
self.externs = set()
|
|
730
738
|
self.data_refs: dict[int, list[DataRefDesc]] = self._collect_data_refs(ail_graph)
|
|
731
739
|
|
|
732
740
|
@staticmethod
|
|
@@ -754,7 +762,7 @@ class Clinic(Analysis):
|
|
|
754
762
|
return graph_copy
|
|
755
763
|
|
|
756
764
|
def copy_graph(self, graph=None) -> networkx.DiGraph:
|
|
757
|
-
return self._copy_graph(graph or self.graph)
|
|
765
|
+
return self._copy_graph(graph or self.graph) # type:ignore
|
|
758
766
|
|
|
759
767
|
@timethis
|
|
760
768
|
def _set_function_graph(self):
|
|
@@ -765,6 +773,7 @@ class Clinic(Analysis):
|
|
|
765
773
|
"""
|
|
766
774
|
Alignment blocks are basic blocks that only consist of nops. They should not be included in the graph.
|
|
767
775
|
"""
|
|
776
|
+
assert self._func_graph is not None
|
|
768
777
|
for node in list(self._func_graph.nodes()):
|
|
769
778
|
if self._func_graph.in_degree(node) == 0 and CFGBase._is_noop_block(
|
|
770
779
|
self.project.arch, self.project.factory.block(node.addr, node.size)
|
|
@@ -872,7 +881,7 @@ class Clinic(Analysis):
|
|
|
872
881
|
callsite_block_addr=callsite.addr,
|
|
873
882
|
callsite_insn_addr=callsite_ins_addr,
|
|
874
883
|
func_graph=func_graph,
|
|
875
|
-
fail_fast=self._fail_fast,
|
|
884
|
+
fail_fast=self._fail_fast, # type:ignore
|
|
876
885
|
)
|
|
877
886
|
|
|
878
887
|
if cc.cc is not None and cc.prototype is not None:
|
|
@@ -953,6 +962,7 @@ class Clinic(Analysis):
|
|
|
953
962
|
|
|
954
963
|
:return: None
|
|
955
964
|
"""
|
|
965
|
+
assert self._func_graph is not None
|
|
956
966
|
|
|
957
967
|
for block_node in self._func_graph.nodes():
|
|
958
968
|
ail_block = self._convert(block_node)
|
|
@@ -1063,7 +1073,9 @@ class Clinic(Analysis):
|
|
|
1063
1073
|
self.project.hooked_by(successors[0].addr), UnresolvableCallTarget
|
|
1064
1074
|
):
|
|
1065
1075
|
# found a single successor - replace the last statement
|
|
1076
|
+
assert isinstance(last_stmt.target, ailment.Expr.Expression) # not a string
|
|
1066
1077
|
new_last_stmt = last_stmt.copy()
|
|
1078
|
+
assert isinstance(successors[0].addr, int)
|
|
1067
1079
|
new_last_stmt.target = ailment.Expr.Const(None, None, successors[0].addr, last_stmt.target.bits)
|
|
1068
1080
|
block.statements[-1] = new_last_stmt
|
|
1069
1081
|
|
|
@@ -1105,7 +1117,7 @@ class Clinic(Analysis):
|
|
|
1105
1117
|
if self.kb.functions.contains_addr(target_addr):
|
|
1106
1118
|
# replace the statement
|
|
1107
1119
|
target_func = self.kb.functions.get_by_addr(target_addr)
|
|
1108
|
-
if target_func.returning:
|
|
1120
|
+
if target_func.returning and self.project.arch.ret_offset is not None:
|
|
1109
1121
|
ret_reg_offset = self.project.arch.ret_offset
|
|
1110
1122
|
ret_expr = ailment.Expr.Register(
|
|
1111
1123
|
None,
|
|
@@ -1136,6 +1148,74 @@ class Clinic(Analysis):
|
|
|
1136
1148
|
|
|
1137
1149
|
return ail_graph
|
|
1138
1150
|
|
|
1151
|
+
def _apply_callsite_prototype_and_calling_convention(self, ail_graph: networkx.DiGraph) -> networkx.DiGraph:
|
|
1152
|
+
for block in ail_graph.nodes():
|
|
1153
|
+
if not block.statements:
|
|
1154
|
+
continue
|
|
1155
|
+
|
|
1156
|
+
last_stmt = block.statements[-1]
|
|
1157
|
+
if not isinstance(last_stmt, ailment.Stmt.Call):
|
|
1158
|
+
continue
|
|
1159
|
+
|
|
1160
|
+
cc = last_stmt.calling_convention
|
|
1161
|
+
prototype = last_stmt.prototype
|
|
1162
|
+
if cc and prototype:
|
|
1163
|
+
continue
|
|
1164
|
+
|
|
1165
|
+
# manually-specified call-site prototype
|
|
1166
|
+
has_callsite_prototype = self.kb.callsite_prototypes.has_prototype(block.addr)
|
|
1167
|
+
if has_callsite_prototype:
|
|
1168
|
+
manually_specified = self.kb.callsite_prototypes.get_prototype_type(block.addr)
|
|
1169
|
+
if manually_specified:
|
|
1170
|
+
cc = self.kb.callsite_prototypes.get_cc(block.addr)
|
|
1171
|
+
prototype = self.kb.callsite_prototypes.get_prototype(block.addr)
|
|
1172
|
+
|
|
1173
|
+
# function-specific prototype
|
|
1174
|
+
func = None
|
|
1175
|
+
if cc is None or prototype is None:
|
|
1176
|
+
target = None
|
|
1177
|
+
if isinstance(last_stmt.target, ailment.Expr.Const):
|
|
1178
|
+
target = last_stmt.target.value
|
|
1179
|
+
|
|
1180
|
+
if target is not None and target in self.kb.functions:
|
|
1181
|
+
# function-specific logic when the calling target is known
|
|
1182
|
+
func = self.kb.functions[target]
|
|
1183
|
+
if func.prototype is None:
|
|
1184
|
+
func.find_declaration()
|
|
1185
|
+
cc = func.calling_convention
|
|
1186
|
+
prototype = func.prototype
|
|
1187
|
+
|
|
1188
|
+
# automatically recovered call-site prototype
|
|
1189
|
+
if (cc is None or prototype is None) and has_callsite_prototype:
|
|
1190
|
+
cc = self.kb.callsite_prototypes.get_cc(block.addr)
|
|
1191
|
+
prototype = self.kb.callsite_prototypes.get_prototype(block.addr)
|
|
1192
|
+
|
|
1193
|
+
# ensure the prototype has been resolved
|
|
1194
|
+
if prototype is not None and func is not None:
|
|
1195
|
+
# make sure the function prototype is resolved.
|
|
1196
|
+
# TODO: Cache resolved function prototypes globally
|
|
1197
|
+
prototype_libname = func.prototype_libname
|
|
1198
|
+
type_collections = []
|
|
1199
|
+
if prototype_libname is not None:
|
|
1200
|
+
prototype_lib = SIM_LIBRARIES[prototype_libname]
|
|
1201
|
+
if prototype_lib.type_collection_names:
|
|
1202
|
+
for typelib_name in prototype_lib.type_collection_names:
|
|
1203
|
+
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
1204
|
+
if type_collections:
|
|
1205
|
+
prototype = dereference_simtype(prototype, type_collections).with_arch( # type: ignore
|
|
1206
|
+
self.project.arch
|
|
1207
|
+
)
|
|
1208
|
+
|
|
1209
|
+
if cc is None:
|
|
1210
|
+
l.warning("Call site %#x (callee %s) has an unknown calling convention.", block.addr, repr(func))
|
|
1211
|
+
|
|
1212
|
+
new_last_stmt = last_stmt.copy()
|
|
1213
|
+
new_last_stmt.calling_convention = cc
|
|
1214
|
+
new_last_stmt.prototype = prototype
|
|
1215
|
+
block.statements[-1] = new_last_stmt
|
|
1216
|
+
|
|
1217
|
+
return ail_graph
|
|
1218
|
+
|
|
1139
1219
|
@timethis
|
|
1140
1220
|
def _make_ailgraph(self) -> networkx.DiGraph:
|
|
1141
1221
|
return self._function_graph_to_ail_graph(self._func_graph)
|
|
@@ -1589,9 +1669,9 @@ class Clinic(Analysis):
|
|
|
1589
1669
|
tmp_kb.functions = self.kb.functions
|
|
1590
1670
|
vr = self.project.analyses.VariableRecoveryFast(
|
|
1591
1671
|
self.function, # pylint:disable=unused-variable
|
|
1592
|
-
fail_fast=self._fail_fast,
|
|
1672
|
+
fail_fast=self._fail_fast, # type:ignore
|
|
1593
1673
|
func_graph=ail_graph,
|
|
1594
|
-
kb=tmp_kb,
|
|
1674
|
+
kb=tmp_kb, # type:ignore
|
|
1595
1675
|
track_sp=False,
|
|
1596
1676
|
func_args=arg_list,
|
|
1597
1677
|
unify_variables=False,
|
|
@@ -1610,9 +1690,13 @@ class Clinic(Analysis):
|
|
|
1610
1690
|
stackvar_max_sizes = var_manager.get_stackvar_max_sizes(self.stack_items)
|
|
1611
1691
|
tv_max_sizes = {}
|
|
1612
1692
|
for v, s in stackvar_max_sizes.items():
|
|
1693
|
+
assert isinstance(v, SimStackVariable)
|
|
1613
1694
|
if v in vr.var_to_typevars:
|
|
1614
1695
|
for tv in vr.var_to_typevars[v]:
|
|
1615
1696
|
tv_max_sizes[tv] = s
|
|
1697
|
+
if v.offset in vr.stack_offset_typevars:
|
|
1698
|
+
tv = vr.stack_offset_typevars[v.offset]
|
|
1699
|
+
tv_max_sizes[tv] = s
|
|
1616
1700
|
# clean up existing types for this function
|
|
1617
1701
|
var_manager.remove_types()
|
|
1618
1702
|
# TODO: Type inference for global variables
|
|
@@ -1624,35 +1708,49 @@ class Clinic(Analysis):
|
|
|
1624
1708
|
must_struct |= typevars
|
|
1625
1709
|
else:
|
|
1626
1710
|
must_struct = None
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
var_mapping=vr.var_to_typevars,
|
|
1634
|
-
must_struct=must_struct,
|
|
1635
|
-
ground_truth=groundtruth,
|
|
1636
|
-
stackvar_max_sizes=tv_max_sizes,
|
|
1637
|
-
)
|
|
1638
|
-
# tp.pp_constraints()
|
|
1639
|
-
# tp.pp_solution()
|
|
1640
|
-
tp.update_variable_types(
|
|
1641
|
-
self.function.addr,
|
|
1642
|
-
{v: t for v, t in vr.var_to_typevars.items() if isinstance(v, (SimRegisterVariable, SimStackVariable))},
|
|
1643
|
-
)
|
|
1644
|
-
tp.update_variable_types(
|
|
1645
|
-
"global",
|
|
1646
|
-
{
|
|
1647
|
-
v: t
|
|
1648
|
-
for v, t in vr.var_to_typevars.items()
|
|
1649
|
-
if isinstance(v, SimMemoryVariable) and not isinstance(v, SimStackVariable)
|
|
1650
|
-
},
|
|
1651
|
-
)
|
|
1652
|
-
except Exception: # pylint:disable=broad-except
|
|
1653
|
-
l.warning(
|
|
1654
|
-
"Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
|
|
1711
|
+
total_type_constraints = sum(len(tc) for tc in vr.type_constraints.values()) if vr.type_constraints else 0
|
|
1712
|
+
if total_type_constraints > self._max_type_constraints:
|
|
1713
|
+
l.info(
|
|
1714
|
+
"The number of type constraints (%d) is greater than the threshold (%d). Skipping type inference.",
|
|
1715
|
+
total_type_constraints,
|
|
1716
|
+
self._max_type_constraints,
|
|
1655
1717
|
)
|
|
1718
|
+
else:
|
|
1719
|
+
try:
|
|
1720
|
+
tp = self.project.analyses.Typehoon(
|
|
1721
|
+
vr.type_constraints,
|
|
1722
|
+
vr.func_typevar,
|
|
1723
|
+
kb=tmp_kb,
|
|
1724
|
+
fail_fast=self._fail_fast,
|
|
1725
|
+
var_mapping=vr.var_to_typevars,
|
|
1726
|
+
stack_offset_tvs=vr.stack_offset_typevars,
|
|
1727
|
+
must_struct=must_struct,
|
|
1728
|
+
ground_truth=groundtruth,
|
|
1729
|
+
stackvar_max_sizes=tv_max_sizes,
|
|
1730
|
+
)
|
|
1731
|
+
# tp.pp_constraints()
|
|
1732
|
+
# tp.pp_solution()
|
|
1733
|
+
tp.update_variable_types(
|
|
1734
|
+
self.function.addr,
|
|
1735
|
+
{
|
|
1736
|
+
v: t
|
|
1737
|
+
for v, t in vr.var_to_typevars.items()
|
|
1738
|
+
if isinstance(v, (SimRegisterVariable, SimStackVariable))
|
|
1739
|
+
},
|
|
1740
|
+
vr.stack_offset_typevars,
|
|
1741
|
+
)
|
|
1742
|
+
tp.update_variable_types(
|
|
1743
|
+
"global",
|
|
1744
|
+
{
|
|
1745
|
+
v: t
|
|
1746
|
+
for v, t in vr.var_to_typevars.items()
|
|
1747
|
+
if isinstance(v, SimMemoryVariable) and not isinstance(v, SimStackVariable)
|
|
1748
|
+
},
|
|
1749
|
+
)
|
|
1750
|
+
except Exception: # pylint:disable=broad-except
|
|
1751
|
+
l.warning(
|
|
1752
|
+
"Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
|
|
1753
|
+
)
|
|
1656
1754
|
|
|
1657
1755
|
# for any left-over variables, assign Bottom type (which will get "corrected" into a default type in
|
|
1658
1756
|
# VariableManager)
|
|
@@ -1671,14 +1769,10 @@ class Clinic(Analysis):
|
|
|
1671
1769
|
func_blocks=list(ail_graph),
|
|
1672
1770
|
)
|
|
1673
1771
|
|
|
1674
|
-
# Link variables to
|
|
1772
|
+
# Link variables and struct member information to every statement and expression
|
|
1675
1773
|
for block in ail_graph.nodes():
|
|
1676
1774
|
self._link_variables_on_block(block, tmp_kb)
|
|
1677
1775
|
|
|
1678
|
-
# Link struct member info to Store statements
|
|
1679
|
-
for block in ail_graph.nodes():
|
|
1680
|
-
self._link_struct_member_info_on_block(block, tmp_kb)
|
|
1681
|
-
|
|
1682
1776
|
if self._cache is not None:
|
|
1683
1777
|
self._cache.type_constraints = vr.type_constraints
|
|
1684
1778
|
self._cache.func_typevar = vr.func_typevar
|
|
@@ -1686,22 +1780,6 @@ class Clinic(Analysis):
|
|
|
1686
1780
|
|
|
1687
1781
|
return tmp_kb
|
|
1688
1782
|
|
|
1689
|
-
def _link_struct_member_info_on_block(self, block, kb):
|
|
1690
|
-
variable_manager = kb.variables[self.function.addr]
|
|
1691
|
-
for stmt in block.statements:
|
|
1692
|
-
if isinstance(stmt, ailment.Stmt.Store) and isinstance((var := stmt.variable), SimStackVariable):
|
|
1693
|
-
offset = var.offset
|
|
1694
|
-
if offset in variable_manager.stack_offset_to_struct_member_info:
|
|
1695
|
-
stmt.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[offset]
|
|
1696
|
-
elif (
|
|
1697
|
-
isinstance(stmt, ailment.Stmt.Assignment)
|
|
1698
|
-
and isinstance(stmt.dst, ailment.Expr.VirtualVariable)
|
|
1699
|
-
and stmt.dst.was_stack
|
|
1700
|
-
):
|
|
1701
|
-
offset = stmt.dst.stack_offset
|
|
1702
|
-
if offset in variable_manager.stack_offset_to_struct_member_info:
|
|
1703
|
-
stmt.dst.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[offset]
|
|
1704
|
-
|
|
1705
1783
|
def _link_variables_on_block(self, block, kb):
|
|
1706
1784
|
"""
|
|
1707
1785
|
Link atoms (AIL expressions) in the given block to corresponding variables identified previously.
|
|
@@ -1737,6 +1815,12 @@ class Clinic(Analysis):
|
|
|
1737
1815
|
)
|
|
1738
1816
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.data)
|
|
1739
1817
|
|
|
1818
|
+
# link struct member info
|
|
1819
|
+
if isinstance(stmt.variable, SimStackVariable):
|
|
1820
|
+
off = stmt.variable.offset
|
|
1821
|
+
if off in variable_manager.stack_offset_to_struct_member_info:
|
|
1822
|
+
stmt.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
|
|
1823
|
+
|
|
1740
1824
|
elif stmt_type is ailment.Stmt.Assignment:
|
|
1741
1825
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.dst)
|
|
1742
1826
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.src)
|
|
@@ -1804,6 +1888,11 @@ class Clinic(Analysis):
|
|
|
1804
1888
|
expr.variable = var
|
|
1805
1889
|
expr.variable_offset = offset
|
|
1806
1890
|
|
|
1891
|
+
if isinstance(expr, ailment.Expr.VirtualVariable) and expr.was_stack:
|
|
1892
|
+
off = expr.stack_offset
|
|
1893
|
+
if off in variable_manager.stack_offset_to_struct_member_info:
|
|
1894
|
+
expr.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
|
|
1895
|
+
|
|
1807
1896
|
elif type(expr) is ailment.Expr.Load:
|
|
1808
1897
|
variables = variable_manager.find_variables_by_atom(block.addr, stmt_idx, expr, block_idx=block.idx)
|
|
1809
1898
|
if len(variables) == 0:
|
|
@@ -1838,6 +1927,11 @@ class Clinic(Analysis):
|
|
|
1838
1927
|
expr.variable = var
|
|
1839
1928
|
expr.variable_offset = offset
|
|
1840
1929
|
|
|
1930
|
+
if isinstance(var, SimStackVariable):
|
|
1931
|
+
off = var.offset
|
|
1932
|
+
if off in variable_manager.stack_offset_to_struct_member_info:
|
|
1933
|
+
expr.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
|
|
1934
|
+
|
|
1841
1935
|
elif type(expr) is ailment.Expr.BinaryOp:
|
|
1842
1936
|
variables = variable_manager.find_variables_by_atom(block.addr, stmt_idx, expr, block_idx=block.idx)
|
|
1843
1937
|
if len(variables) >= 1:
|
|
@@ -2683,7 +2777,7 @@ class Clinic(Analysis):
|
|
|
2683
2777
|
def _next_atom(self) -> int:
|
|
2684
2778
|
return self._ail_manager.next_atom()
|
|
2685
2779
|
|
|
2686
|
-
def parse_variable_addr(self, addr: ailment.Expr.Expression) -> tuple[Any, Any]
|
|
2780
|
+
def parse_variable_addr(self, addr: ailment.Expr.Expression) -> tuple[Any, Any]:
|
|
2687
2781
|
if isinstance(addr, ailment.Expr.Const):
|
|
2688
2782
|
return addr, 0
|
|
2689
2783
|
if isinstance(addr, ailment.Expr.BinaryOp) and addr.op == "Add":
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
from collections.abc import Iterable
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any, TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
import networkx
|
|
9
9
|
from cle import SymbolType
|
|
@@ -35,9 +35,9 @@ if TYPE_CHECKING:
|
|
|
35
35
|
|
|
36
36
|
l = logging.getLogger(name=__name__)
|
|
37
37
|
|
|
38
|
-
_PEEPHOLE_OPTIMIZATIONS_TYPE =
|
|
39
|
-
Iterable[
|
|
40
|
-
|
|
38
|
+
_PEEPHOLE_OPTIMIZATIONS_TYPE = (
|
|
39
|
+
Iterable[type["PeepholeOptimizationStmtBase"] | type["PeepholeOptimizationExprBase"]] | None
|
|
40
|
+
)
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class Decompiler(Analysis):
|
|
@@ -16,7 +16,7 @@ class BasePointerSaveSimplifier(OptimizationPass):
|
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
ARCHES = ["X86", "AMD64", "ARMEL", "ARMHF", "ARMCortexM", "MIPS32", "MIPS64"]
|
|
19
|
-
PLATFORMS =
|
|
19
|
+
PLATFORMS = None
|
|
20
20
|
STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
|
|
21
21
|
NAME = "Simplify base pointer saving"
|
|
22
22
|
DESCRIPTION = __doc__.strip()
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from typing import TYPE_CHECKING
|
|
3
|
+
from collections import defaultdict
|
|
3
4
|
|
|
4
5
|
import networkx
|
|
5
6
|
|
|
6
7
|
from ailment import AILBlockWalker, Block
|
|
7
|
-
from ailment.statement import ConditionalJump, Statement
|
|
8
|
+
from ailment.statement import ConditionalJump, Statement, Assignment
|
|
8
9
|
from ailment.expression import Const, BinaryOp, VirtualVariable
|
|
9
10
|
|
|
10
11
|
from angr.analyses.decompiler.utils import first_nonlabel_nonphi_statement
|
|
@@ -41,6 +42,7 @@ class CCondPropBlockWalker(AILBlockWalker):
|
|
|
41
42
|
self._new_block: Block | None = None # output
|
|
42
43
|
self.vvar_id = vvar_id
|
|
43
44
|
self.const_value = const_value
|
|
45
|
+
self.abort = False
|
|
44
46
|
|
|
45
47
|
def walk(self, block: Block):
|
|
46
48
|
self._new_block = None
|
|
@@ -48,6 +50,17 @@ class CCondPropBlockWalker(AILBlockWalker):
|
|
|
48
50
|
return self._new_block
|
|
49
51
|
|
|
50
52
|
def _handle_stmt(self, stmt_idx: int, stmt: Statement, block: Block): # type: ignore
|
|
53
|
+
if self.abort:
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable) and stmt.dst.varid == self.vvar_id:
|
|
57
|
+
# we see the assignment of this virtual variable; this is the original block that creates this variable
|
|
58
|
+
# and checks if this variable is equal to a constant value. as such, we stop processing this block.
|
|
59
|
+
# an example appears in binary 1de5cda760f9ed80bb6f4a35edcebc86ccec14c49cf4775ddf2ffc3e05ff35f4, function
|
|
60
|
+
# 0x4657C0, blocks 0x465bd6 and 0x465a5c
|
|
61
|
+
self.abort = True
|
|
62
|
+
return
|
|
63
|
+
|
|
51
64
|
r = super()._handle_stmt(stmt_idx, stmt, block)
|
|
52
65
|
if r is not None:
|
|
53
66
|
# replace the original statement
|
|
@@ -58,7 +71,9 @@ class CCondPropBlockWalker(AILBlockWalker):
|
|
|
58
71
|
def _handle_VirtualVariable( # type: ignore
|
|
59
72
|
self, expr_idx: int, expr: VirtualVariable, stmt_idx: int, stmt: Statement, block: Block | None
|
|
60
73
|
) -> Const | None:
|
|
61
|
-
if expr.varid == self.vvar_id
|
|
74
|
+
if expr.varid == self.vvar_id and not (
|
|
75
|
+
isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable) and stmt.dst.varid == self.vvar_id
|
|
76
|
+
):
|
|
62
77
|
return Const(expr.idx, None, self.const_value.value, self.const_value.bits, **expr.tags)
|
|
63
78
|
return None
|
|
64
79
|
|
|
@@ -80,19 +95,9 @@ class ConditionConstantPropagation(OptimizationPass):
|
|
|
80
95
|
|
|
81
96
|
def _check(self):
|
|
82
97
|
cconds = self._find_const_conditions()
|
|
83
|
-
if not cconds:
|
|
84
|
-
return False, None
|
|
85
|
-
return True, {"cconds": cconds}
|
|
86
|
-
|
|
87
|
-
@timethis
|
|
88
|
-
def _analyze(self, cache=None):
|
|
89
|
-
if not cache or cache.get("cconds", None) is None: # noqa: SIM108
|
|
90
|
-
cconds = self._find_const_conditions()
|
|
91
|
-
else:
|
|
92
|
-
cconds = cache["cconds"]
|
|
93
98
|
|
|
94
99
|
if not cconds:
|
|
95
|
-
return
|
|
100
|
+
return False, None
|
|
96
101
|
|
|
97
102
|
# group cconds according to their sources
|
|
98
103
|
cconds_by_src: dict[tuple[int, int | None], list[ConstantCondition]] = {}
|
|
@@ -102,6 +107,36 @@ class ConditionConstantPropagation(OptimizationPass):
|
|
|
102
107
|
cconds_by_src[src] = []
|
|
103
108
|
cconds_by_src[src].append(ccond)
|
|
104
109
|
|
|
110
|
+
# eliminate conflicting conditions
|
|
111
|
+
for src in list(cconds_by_src):
|
|
112
|
+
cconds = cconds_by_src[src]
|
|
113
|
+
vvar_id_to_values = defaultdict(set)
|
|
114
|
+
ccond_dict = {} # keyed by vvar_id; used for deduplication
|
|
115
|
+
for ccond in cconds:
|
|
116
|
+
vvar_id_to_values[ccond.vvar_id].add(ccond.value)
|
|
117
|
+
ccond_dict[ccond.vvar_id] = ccond
|
|
118
|
+
new_cconds = []
|
|
119
|
+
for vid, vvalues in vvar_id_to_values.items():
|
|
120
|
+
if len(vvalues) == 1:
|
|
121
|
+
new_cconds.append(ccond_dict[vid])
|
|
122
|
+
if new_cconds:
|
|
123
|
+
cconds_by_src[src] = new_cconds
|
|
124
|
+
else:
|
|
125
|
+
del cconds_by_src[src]
|
|
126
|
+
|
|
127
|
+
if not cconds_by_src:
|
|
128
|
+
return False, None
|
|
129
|
+
return True, {"cconds_by_src": cconds_by_src}
|
|
130
|
+
|
|
131
|
+
@timethis
|
|
132
|
+
def _analyze(self, cache=None):
|
|
133
|
+
if not cache or cache.get("cconds_by_src", None) is None:
|
|
134
|
+
return
|
|
135
|
+
cconds_by_src = cache["cconds_by_src"]
|
|
136
|
+
|
|
137
|
+
if not cconds_by_src:
|
|
138
|
+
return
|
|
139
|
+
|
|
105
140
|
# calculate a dominance frontier for each block
|
|
106
141
|
entry_node_addr, entry_node_idx = self.entry_node_addr
|
|
107
142
|
entry_node = self._get_block(entry_node_addr, idx=entry_node_idx)
|
|
@@ -114,7 +149,7 @@ class ConditionConstantPropagation(OptimizationPass):
|
|
|
114
149
|
continue
|
|
115
150
|
|
|
116
151
|
for ccond in cconds:
|
|
117
|
-
for _, loc in rda.all_vvar_uses[
|
|
152
|
+
for _, loc in rda.all_vvar_uses[ccond.vvar_id]:
|
|
118
153
|
loc_block = self._get_block(loc.block_addr, idx=loc.block_idx)
|
|
119
154
|
if loc_block is None:
|
|
120
155
|
continue
|
|
@@ -192,6 +192,14 @@ class ITERegionConverter(OptimizationPass):
|
|
|
192
192
|
if region_head not in self._graph or region_tail not in self._graph:
|
|
193
193
|
return False
|
|
194
194
|
|
|
195
|
+
# ensure all phi statements in region_tail have valid source vvars
|
|
196
|
+
for stmt in region_tail.statements:
|
|
197
|
+
if not is_phi_assignment(stmt):
|
|
198
|
+
continue
|
|
199
|
+
for _, vvar in stmt.src.src_and_vvars:
|
|
200
|
+
if vvar is None:
|
|
201
|
+
return False
|
|
202
|
+
|
|
195
203
|
#
|
|
196
204
|
# create a new region_head
|
|
197
205
|
#
|
|
@@ -443,6 +443,11 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
443
443
|
"""
|
|
444
444
|
Wrapper for _analyze() that verifies the graph is structurable before and after the optimization.
|
|
445
445
|
"""
|
|
446
|
+
# replace the normal check in OptimizationPass.analyze()
|
|
447
|
+
ret, cache = self._check()
|
|
448
|
+
if not ret:
|
|
449
|
+
return
|
|
450
|
+
|
|
446
451
|
if not self._graph_is_structurable(self._graph, initial=True):
|
|
447
452
|
return
|
|
448
453
|
|
|
@@ -450,11 +455,6 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
450
455
|
if self._require_gotos and not self._initial_gotos:
|
|
451
456
|
return
|
|
452
457
|
|
|
453
|
-
# replace the normal check in OptimizationPass.analyze()
|
|
454
|
-
ret, cache = self._check()
|
|
455
|
-
if not ret:
|
|
456
|
-
return
|
|
457
|
-
|
|
458
458
|
# setup for the very first analysis
|
|
459
459
|
self.out_graph = networkx.DiGraph(self._graph)
|
|
460
460
|
if self._max_opt_iters > 1:
|
|
@@ -95,6 +95,7 @@ class ReturnDuplicatorBase:
|
|
|
95
95
|
minimize_copies_for_regions: bool = True,
|
|
96
96
|
ri: RegionIdentifier | None = None,
|
|
97
97
|
scratch: dict[str, Any] | None = None,
|
|
98
|
+
max_func_blocks: int = 1500,
|
|
98
99
|
):
|
|
99
100
|
self._max_calls_in_region = max_calls_in_regions
|
|
100
101
|
self._minimize_copies_for_regions = minimize_copies_for_regions
|
|
@@ -105,6 +106,7 @@ class ReturnDuplicatorBase:
|
|
|
105
106
|
self._func = func
|
|
106
107
|
self._ri: RegionIdentifier | None = ri
|
|
107
108
|
self.vvar_id_start = vvar_id_start
|
|
109
|
+
self._max_func_blocks = max_func_blocks
|
|
108
110
|
|
|
109
111
|
def next_node_idx(self) -> int:
|
|
110
112
|
node_idx = self.scratch.get("returndup_node_idx", 0) + 1
|
|
@@ -123,6 +125,9 @@ class ReturnDuplicatorBase:
|
|
|
123
125
|
#
|
|
124
126
|
|
|
125
127
|
def _check(self):
|
|
128
|
+
# is this function too large?
|
|
129
|
+
if len(self._func.block_addrs_set) > self._max_func_blocks:
|
|
130
|
+
return False, None
|
|
126
131
|
# does this function have end points?
|
|
127
132
|
return bool(self._func.endpoints), None
|
|
128
133
|
|
|
@@ -5,6 +5,7 @@ from .a_mul_const_div_shr_const import AMulConstDivShrConst
|
|
|
5
5
|
from .a_shl_const_sub_a import AShlConstSubA
|
|
6
6
|
from .a_sub_a_div import ASubADiv
|
|
7
7
|
from .a_sub_a_div_const_mul_const import ASubADivConstMulConst
|
|
8
|
+
from .a_sub_a_shr_const_shr_const import ASubAShrConstShrConst
|
|
8
9
|
from .arm_cmpf import ARMCmpF
|
|
9
10
|
from .bswap import Bswap
|
|
10
11
|
from .coalesce_same_cascading_ifs import CoalesceSameCascadingIfs
|
|
@@ -57,6 +58,7 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
|
|
|
57
58
|
AMulConstSubA,
|
|
58
59
|
ASubADiv,
|
|
59
60
|
ASubADivConstMulConst,
|
|
61
|
+
ASubAShrConstShrConst,
|
|
60
62
|
ARMCmpF,
|
|
61
63
|
Bswap,
|
|
62
64
|
CoalesceSameCascadingIfs,
|