angr 9.2.160__cp310-abi3-macosx_10_9_x86_64.whl → 9.2.162__cp310-abi3-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +4 -1
- angr/analyses/analysis.py +0 -1
- angr/analyses/cfg/cfg_base.py +5 -1
- angr/analyses/decompiler/ail_simplifier.py +101 -2
- angr/analyses/decompiler/block_simplifier.py +13 -8
- angr/analyses/decompiler/clinic.py +1 -0
- angr/analyses/decompiler/condition_processor.py +24 -0
- angr/analyses/decompiler/counters/call_counter.py +11 -1
- angr/analyses/decompiler/decompiler.py +3 -1
- angr/analyses/decompiler/graph_region.py +11 -2
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -0
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +31 -11
- angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +2 -0
- angr/analyses/decompiler/peephole_optimizations/__init__.py +4 -4
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +53 -0
- angr/analyses/decompiler/peephole_optimizations/modulo_simplifier.py +89 -0
- angr/analyses/decompiler/peephole_optimizations/{const_mull_a_shift.py → optimized_div_simplifier.py} +139 -25
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +18 -9
- angr/analyses/decompiler/region_simplifiers/goto.py +3 -3
- angr/analyses/decompiler/region_simplifiers/if_.py +2 -2
- angr/analyses/decompiler/region_simplifiers/loop.py +2 -2
- angr/analyses/decompiler/structured_codegen/c.py +3 -3
- angr/analyses/decompiler/structuring/dream.py +1 -1
- angr/analyses/decompiler/structuring/phoenix.py +138 -99
- angr/analyses/decompiler/structuring/recursive_structurer.py +3 -2
- angr/analyses/decompiler/structuring/sailr.py +51 -43
- angr/analyses/decompiler/structuring/structurer_base.py +2 -3
- angr/analyses/deobfuscator/string_obf_opt_passes.py +1 -1
- angr/analyses/disassembly.py +1 -1
- angr/analyses/reaching_definitions/function_handler.py +1 -0
- angr/analyses/s_propagator.py +2 -2
- angr/analyses/s_reaching_definitions/s_rda_model.py +1 -0
- angr/analyses/s_reaching_definitions/s_reaching_definitions.py +5 -2
- angr/analyses/variable_recovery/engine_base.py +17 -1
- angr/analyses/variable_recovery/variable_recovery_base.py +30 -2
- angr/analyses/variable_recovery/variable_recovery_fast.py +11 -2
- angr/emulator.py +143 -0
- angr/engines/concrete.py +66 -0
- angr/engines/icicle.py +66 -30
- angr/exploration_techniques/driller_core.py +2 -2
- angr/knowledge_plugins/functions/function.py +1 -1
- angr/knowledge_plugins/functions/function_manager.py +1 -2
- angr/project.py +7 -0
- angr/rustylib.abi3.so +0 -0
- angr/sim_type.py +16 -8
- angr/simos/javavm.py +1 -1
- angr/unicornlib.dylib +0 -0
- angr/utils/graph.py +48 -13
- angr/utils/library.py +13 -12
- angr/utils/ssa/__init__.py +57 -5
- {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/METADATA +5 -5
- {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/RECORD +57 -55
- angr/analyses/decompiler/peephole_optimizations/a_sub_a_div_const_mul_const.py +0 -57
- {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/WHEEL +0 -0
- {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/entry_points.txt +0 -0
- {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# pylint: disable=wrong-import-position
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
__version__ = "9.2.
|
|
5
|
+
__version__ = "9.2.162"
|
|
6
6
|
|
|
7
7
|
if bytes is str:
|
|
8
8
|
raise Exception(
|
|
@@ -192,6 +192,7 @@ from . import concretization_strategies
|
|
|
192
192
|
from .distributed import Server
|
|
193
193
|
from .knowledge_base import KnowledgeBase
|
|
194
194
|
from .procedures.definitions import load_external_definitions
|
|
195
|
+
from .emulator import Emulator, EmulatorStopReason
|
|
195
196
|
|
|
196
197
|
# for compatibility reasons
|
|
197
198
|
from . import sim_manager as manager
|
|
@@ -259,6 +260,8 @@ __all__ = (
|
|
|
259
260
|
"AngrVaultError",
|
|
260
261
|
"Blade",
|
|
261
262
|
"Block",
|
|
263
|
+
"Emulator",
|
|
264
|
+
"EmulatorStopReason",
|
|
262
265
|
"ExplorationTechnique",
|
|
263
266
|
"KnowledgeBase",
|
|
264
267
|
"PTChunk",
|
angr/analyses/analysis.py
CHANGED
angr/analyses/cfg/cfg_base.py
CHANGED
|
@@ -2566,7 +2566,11 @@ class CFGBase(Analysis):
|
|
|
2566
2566
|
"""
|
|
2567
2567
|
|
|
2568
2568
|
if arch.name == "X86" or arch.name == "AMD64":
|
|
2569
|
-
|
|
2569
|
+
block_bytes_set = set(block.bytes)
|
|
2570
|
+
if block_bytes_set == {0x90}:
|
|
2571
|
+
return True
|
|
2572
|
+
if block_bytes_set == {0xCC}:
|
|
2573
|
+
# technically this is not a no-op, but for our purposes we can settle for now
|
|
2570
2574
|
return True
|
|
2571
2575
|
elif arch.name == "MIPS32":
|
|
2572
2576
|
if arch.memory_endness == "Iend_BE":
|
|
@@ -10,7 +10,16 @@ import networkx
|
|
|
10
10
|
|
|
11
11
|
from angr.ailment import AILBlockWalker
|
|
12
12
|
from angr.ailment.block import Block
|
|
13
|
-
from angr.ailment.statement import
|
|
13
|
+
from angr.ailment.statement import (
|
|
14
|
+
Statement,
|
|
15
|
+
Assignment,
|
|
16
|
+
Store,
|
|
17
|
+
Call,
|
|
18
|
+
ConditionalJump,
|
|
19
|
+
DirtyStatement,
|
|
20
|
+
WeakAssignment,
|
|
21
|
+
Return,
|
|
22
|
+
)
|
|
14
23
|
from angr.ailment.expression import (
|
|
15
24
|
Register,
|
|
16
25
|
Convert,
|
|
@@ -226,6 +235,15 @@ class AILSimplifier(Analysis):
|
|
|
226
235
|
# reaching definition analysis results are no longer reliable
|
|
227
236
|
self._clear_cache()
|
|
228
237
|
|
|
238
|
+
_l.debug("Rewriting constant expressions with phi variables")
|
|
239
|
+
phi_const_rewritten = self._rewrite_phi_const_exprs()
|
|
240
|
+
self.simplified |= phi_const_rewritten
|
|
241
|
+
if phi_const_rewritten:
|
|
242
|
+
_l.debug("... constant expressions with phi variables rewritten")
|
|
243
|
+
self._rebuild_func_graph()
|
|
244
|
+
# reaching definition analysis results are no longer reliable
|
|
245
|
+
self._clear_cache()
|
|
246
|
+
|
|
229
247
|
if self._only_consts:
|
|
230
248
|
return
|
|
231
249
|
|
|
@@ -698,6 +716,11 @@ class AILSimplifier(Analysis):
|
|
|
698
716
|
if not replacements_by_block_addrs_and_idx:
|
|
699
717
|
return False
|
|
700
718
|
|
|
719
|
+
return self._replace_exprs_in_blocks(replacements_by_block_addrs_and_idx)
|
|
720
|
+
|
|
721
|
+
def _replace_exprs_in_blocks(
|
|
722
|
+
self, replacements: dict[tuple[int, int | None], dict[CodeLocation, dict[Expression, Expression]]]
|
|
723
|
+
) -> bool:
|
|
701
724
|
blocks_by_addr_and_idx = {(node.addr, node.idx): node for node in self.func_graph.nodes()}
|
|
702
725
|
|
|
703
726
|
if self._stack_arg_offsets:
|
|
@@ -706,7 +729,7 @@ class AILSimplifier(Analysis):
|
|
|
706
729
|
insn_addrs_using_stack_args = None
|
|
707
730
|
|
|
708
731
|
replaced = False
|
|
709
|
-
for (block_addr, block_idx), reps in
|
|
732
|
+
for (block_addr, block_idx), reps in replacements.items():
|
|
710
733
|
block = blocks_by_addr_and_idx[(block_addr, block_idx)]
|
|
711
734
|
|
|
712
735
|
# only replace loads if there are stack arguments in this block
|
|
@@ -787,6 +810,72 @@ class AILSimplifier(Analysis):
|
|
|
787
810
|
|
|
788
811
|
return changed
|
|
789
812
|
|
|
813
|
+
#
|
|
814
|
+
# Rewriting constant expressions with phi variables
|
|
815
|
+
#
|
|
816
|
+
|
|
817
|
+
def _rewrite_phi_const_exprs(self) -> bool:
|
|
818
|
+
"""
|
|
819
|
+
Rewrite phi variables that are definitely constant expressions to constants.
|
|
820
|
+
"""
|
|
821
|
+
|
|
822
|
+
# gather constant assignments
|
|
823
|
+
|
|
824
|
+
vvar_values: dict[int, tuple[int, int]] = {}
|
|
825
|
+
for block in self.func_graph:
|
|
826
|
+
for stmt in block.statements:
|
|
827
|
+
if (
|
|
828
|
+
isinstance(stmt, Assignment)
|
|
829
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
830
|
+
and isinstance(stmt.src, Const)
|
|
831
|
+
and isinstance(stmt.src.value, int)
|
|
832
|
+
):
|
|
833
|
+
vvar_values[stmt.dst.varid] = stmt.src.value, stmt.src.bits
|
|
834
|
+
|
|
835
|
+
srda = self._compute_reaching_definitions()
|
|
836
|
+
# compute vvar reachability for phi variables
|
|
837
|
+
# ensure that each phi variable is fully defined, i.e., all its source variables are defined
|
|
838
|
+
g = networkx.Graph()
|
|
839
|
+
for phi_vvar_id, vvar_ids in srda.phivarid_to_varids_with_unknown.items():
|
|
840
|
+
for vvar_id in vvar_ids:
|
|
841
|
+
# we cannot store None to networkx graph, so we use -1 to represent unknown source vvars
|
|
842
|
+
g.add_edge(phi_vvar_id, vvar_id if vvar_id is not None else -1)
|
|
843
|
+
|
|
844
|
+
phi_vvar_ids = srda.phi_vvar_ids
|
|
845
|
+
to_replace = {}
|
|
846
|
+
for cc in networkx.algorithms.connected_components(g):
|
|
847
|
+
if -1 in cc:
|
|
848
|
+
continue
|
|
849
|
+
normal_vvar_ids = cc.difference(phi_vvar_ids)
|
|
850
|
+
# ensure there is at least one phi variable and all remaining vvars are constant non-phi variables
|
|
851
|
+
if len(normal_vvar_ids) < len(cc) and len(normal_vvar_ids.intersection(vvar_values)) == len(
|
|
852
|
+
normal_vvar_ids
|
|
853
|
+
):
|
|
854
|
+
all_values = {vvar_values[vvar_id] for vvar_id in normal_vvar_ids}
|
|
855
|
+
if len(all_values) == 1:
|
|
856
|
+
# found it!
|
|
857
|
+
value, bits = next(iter(all_values))
|
|
858
|
+
for var_id in cc:
|
|
859
|
+
to_replace[var_id] = value, bits
|
|
860
|
+
|
|
861
|
+
# build the replacement dictionary
|
|
862
|
+
blocks_dict = {(node.addr, node.idx): node for node in self.func_graph.nodes()}
|
|
863
|
+
replacements: dict[tuple[int, int | None], dict[CodeLocation, dict[Expression, Expression]]] = defaultdict(dict)
|
|
864
|
+
for vvar_id, (value, bits) in to_replace.items():
|
|
865
|
+
for expr, use_loc in srda.all_vvar_uses[vvar_id]:
|
|
866
|
+
if expr is None:
|
|
867
|
+
continue
|
|
868
|
+
assert use_loc.block_addr is not None
|
|
869
|
+
key = use_loc.block_addr, use_loc.block_idx
|
|
870
|
+
stmt = blocks_dict[key].statements[use_loc.stmt_idx]
|
|
871
|
+
if is_phi_assignment(stmt):
|
|
872
|
+
continue
|
|
873
|
+
if use_loc not in replacements[key]:
|
|
874
|
+
replacements[key][use_loc] = {}
|
|
875
|
+
replacements[key][use_loc][expr] = Const(None, None, value, bits, **expr.tags)
|
|
876
|
+
|
|
877
|
+
return self._replace_exprs_in_blocks(replacements) if replacements else False
|
|
878
|
+
|
|
790
879
|
#
|
|
791
880
|
# Unifying local variables
|
|
792
881
|
#
|
|
@@ -1563,6 +1652,11 @@ class AILSimplifier(Analysis):
|
|
|
1563
1652
|
stackarg_offsets = (
|
|
1564
1653
|
{(tpl[1] & mask) for tpl in self._stack_arg_offsets} if self._stack_arg_offsets is not None else None
|
|
1565
1654
|
)
|
|
1655
|
+
retpoints: set[tuple[int, int]] = {
|
|
1656
|
+
(node.addr, node.idx)
|
|
1657
|
+
for node in self.func_graph
|
|
1658
|
+
if node.statements and isinstance(node.statements[-1], Return) and self.func_graph.out_degree[node] == 0
|
|
1659
|
+
}
|
|
1566
1660
|
|
|
1567
1661
|
while True:
|
|
1568
1662
|
new_dead_vars_found = False
|
|
@@ -1596,6 +1690,11 @@ class AILSimplifier(Analysis):
|
|
|
1596
1690
|
elif vvar_id in self._secondary_stackvars:
|
|
1597
1691
|
# secondary stack variables are potentially removable
|
|
1598
1692
|
pass
|
|
1693
|
+
elif (def_codeloc.block_addr, def_codeloc.block_idx) in retpoints:
|
|
1694
|
+
# slack variable assignments in endpoint blocks are potentially removable.
|
|
1695
|
+
# note that this is a hack! we should rely on more reliable stack variable
|
|
1696
|
+
# eliminatability detection.
|
|
1697
|
+
pass
|
|
1599
1698
|
elif stackarg_offsets is not None:
|
|
1600
1699
|
# we always remove definitions for stack arguments
|
|
1601
1700
|
assert vvar.stack_offset is not None
|
|
@@ -5,15 +5,14 @@ from typing import TYPE_CHECKING
|
|
|
5
5
|
from collections.abc import Iterable, Mapping
|
|
6
6
|
|
|
7
7
|
from angr.ailment.statement import Statement, Assignment, Call, Store, Jump
|
|
8
|
-
from angr.ailment.expression import Tmp, Load, Const, Register, Convert, Expression
|
|
8
|
+
from angr.ailment.expression import Tmp, Load, Const, Register, Convert, Expression, VirtualVariable
|
|
9
9
|
from angr.ailment import AILBlockWalkerBase
|
|
10
|
-
|
|
11
10
|
from angr.code_location import ExternalCodeLocation, CodeLocation
|
|
12
|
-
|
|
13
11
|
from angr.knowledge_plugins.key_definitions import atoms
|
|
14
12
|
from angr.analyses.s_propagator import SPropagatorAnalysis
|
|
15
13
|
from angr.analyses.s_reaching_definitions import SReachingDefinitionsAnalysis, SRDAModel
|
|
16
14
|
from angr.analyses import Analysis, register_analysis
|
|
15
|
+
from angr.utils.ssa import has_reference_to_vvar
|
|
17
16
|
from .peephole_optimizations import (
|
|
18
17
|
MULTI_STMT_OPTS,
|
|
19
18
|
STMT_OPTS,
|
|
@@ -247,6 +246,10 @@ class BlockSimplifier(Analysis):
|
|
|
247
246
|
# don't replace
|
|
248
247
|
r = False
|
|
249
248
|
new_stmt = None
|
|
249
|
+
elif isinstance(old, VirtualVariable) and has_reference_to_vvar(stmt, old.varid):
|
|
250
|
+
# never replace an l-value with an r-value
|
|
251
|
+
r = False
|
|
252
|
+
new_stmt = None
|
|
250
253
|
elif isinstance(stmt, Call) and isinstance(new, Call) and old == stmt.ret_expr:
|
|
251
254
|
# special case: do not replace the ret_expr of a call statement to another call statement
|
|
252
255
|
r = False
|
|
@@ -330,18 +333,20 @@ class BlockSimplifier(Analysis):
|
|
|
330
333
|
for idx, stmt in enumerate(block.statements):
|
|
331
334
|
if type(stmt) is Assignment:
|
|
332
335
|
# tmps can't execute new code
|
|
333
|
-
if type(stmt.dst) is Tmp and stmt.dst.tmp_idx not in used_tmps:
|
|
334
|
-
|
|
336
|
+
if (type(stmt.dst) is Tmp and stmt.dst.tmp_idx not in used_tmps) or idx in dead_defs_stmt_idx:
|
|
337
|
+
# is it assigning to an unused tmp or a dead virgin?
|
|
335
338
|
|
|
336
|
-
# is it a dead virgin?
|
|
337
|
-
if idx in dead_defs_stmt_idx:
|
|
338
339
|
# does .src involve any Call expressions? if so, we cannot remove it
|
|
339
340
|
walker = HasCallExprWalker()
|
|
340
341
|
walker.walk_expression(stmt.src)
|
|
341
342
|
if not walker.has_call_expr:
|
|
342
343
|
continue
|
|
343
344
|
|
|
344
|
-
|
|
345
|
+
if type(stmt.dst) is Tmp and isinstance(stmt.src, Call):
|
|
346
|
+
# eliminate the assignment and replace it with the call
|
|
347
|
+
stmt = stmt.src
|
|
348
|
+
|
|
349
|
+
if isinstance(stmt, Assignment) and stmt.src == stmt.dst:
|
|
345
350
|
continue
|
|
346
351
|
|
|
347
352
|
new_statements.append(stmt)
|
|
@@ -1816,6 +1816,7 @@ class Clinic(Analysis):
|
|
|
1816
1816
|
self.function, # pylint:disable=unused-variable
|
|
1817
1817
|
fail_fast=self._fail_fast, # type:ignore
|
|
1818
1818
|
func_graph=ail_graph,
|
|
1819
|
+
entry_node_addr=self.entry_node_addr,
|
|
1819
1820
|
kb=tmp_kb, # type:ignore
|
|
1820
1821
|
track_sp=False,
|
|
1821
1822
|
func_args=arg_list,
|
|
@@ -241,6 +241,30 @@ class ConditionProcessor:
|
|
|
241
241
|
self.guarding_conditions = {}
|
|
242
242
|
self._ast2annotations = {}
|
|
243
243
|
|
|
244
|
+
def have_opposite_edge_conditions(self, graph: networkx.DiGraph, src, dst0, dst1) -> bool:
|
|
245
|
+
"""
|
|
246
|
+
Check if the edge conditions of two edges (src, dst0) and (src, dst1) are opposite to each other. Try to avoid
|
|
247
|
+
condition translation if possible.
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
if src in graph and graph.out_degree[src] == 2 and graph.has_edge(src, dst0) and graph.has_edge(src, dst1):
|
|
251
|
+
# sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
|
|
252
|
+
if isinstance(src, ailment.Block) and src.statements and is_head_controlled_loop_block(src):
|
|
253
|
+
last_stmt = next(
|
|
254
|
+
iter(stmt for stmt in src.statements[:-1] if isinstance(stmt, ailment.Stmt.ConditionalJump)), None
|
|
255
|
+
)
|
|
256
|
+
assert last_stmt is not None
|
|
257
|
+
else:
|
|
258
|
+
last_stmt = self.get_last_statement(src)
|
|
259
|
+
|
|
260
|
+
if isinstance(last_stmt, ailment.Stmt.ConditionalJump):
|
|
261
|
+
return True
|
|
262
|
+
|
|
263
|
+
# fallback
|
|
264
|
+
edge_cond_left = self.recover_edge_condition(graph, src, dst0)
|
|
265
|
+
edge_cond_right = self.recover_edge_condition(graph, src, dst1)
|
|
266
|
+
return claripy.is_true(claripy.Not(edge_cond_left) == edge_cond_right) # type: ignore
|
|
267
|
+
|
|
244
268
|
def recover_edge_condition(self, graph: networkx.DiGraph, src, dst):
|
|
245
269
|
edge = src, dst
|
|
246
270
|
edge_data = graph.get_edge_data(*edge)
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
from typing import TYPE_CHECKING
|
|
3
3
|
|
|
4
4
|
from angr.ailment import Block
|
|
5
|
-
from angr.ailment.statement import Label
|
|
5
|
+
from angr.ailment.statement import Label, ConditionalJump
|
|
6
6
|
from angr.ailment.block_walker import AILBlockWalkerBase
|
|
7
7
|
|
|
8
8
|
from angr.analyses.decompiler.sequence_walker import SequenceWalker
|
|
@@ -18,6 +18,9 @@ class AILBlockCallCounter(AILBlockWalkerBase):
|
|
|
18
18
|
|
|
19
19
|
calls = 0
|
|
20
20
|
|
|
21
|
+
def _handle_ConditionalJump(self, stmt_idx: int, stmt: ConditionalJump, block: Block | None):
|
|
22
|
+
return
|
|
23
|
+
|
|
21
24
|
def _handle_CallExpr(self, expr_idx: int, expr: Call, stmt_idx: int, stmt, block: Block | None):
|
|
22
25
|
self.calls += 1
|
|
23
26
|
super()._handle_CallExpr(expr_idx, expr, stmt_idx, stmt, block)
|
|
@@ -40,6 +43,13 @@ class AILCallCounter(SequenceWalker):
|
|
|
40
43
|
self.calls = 0
|
|
41
44
|
self.non_label_stmts = 0
|
|
42
45
|
|
|
46
|
+
def _handle_Condition(self, node, **kwargs):
|
|
47
|
+
# do not count calls in conditions
|
|
48
|
+
if node.true_node is not None:
|
|
49
|
+
super()._handle(node.true_node, **kwargs)
|
|
50
|
+
if node.false_node is not None:
|
|
51
|
+
super()._handle(node.false_node, **kwargs)
|
|
52
|
+
|
|
43
53
|
def _handle_Block(self, node: Block, **kwargs): # pylint:disable=unused-argument
|
|
44
54
|
ctr = AILBlockCallCounter()
|
|
45
55
|
ctr.walk(node)
|
|
@@ -618,7 +618,9 @@ class Decompiler(Analysis):
|
|
|
618
618
|
new_type = var_manager.get_variable_type(var)
|
|
619
619
|
if new_type is not None:
|
|
620
620
|
self.func.prototype.args = (
|
|
621
|
-
self.func.prototype.args[:i]
|
|
621
|
+
*self.func.prototype.args[:i],
|
|
622
|
+
new_type,
|
|
623
|
+
*self.func.prototype.args[i + 1 :],
|
|
622
624
|
)
|
|
623
625
|
except Exception: # pylint:disable=broad-except
|
|
624
626
|
if self._fail_fast:
|
|
@@ -324,6 +324,15 @@ class GraphRegion:
|
|
|
324
324
|
out_edges = list(graph.out_edges(node))
|
|
325
325
|
|
|
326
326
|
graph.remove_node(node)
|
|
327
|
+
|
|
328
|
+
# FIXME: this is a giant hack to work around the problem that the graph region might have been restructured
|
|
329
|
+
# but not updated in *all* other regions whose .graph_with_successors references this graph region (we only
|
|
330
|
+
# update the parent_region graph right now).
|
|
331
|
+
existing_graph_regions: dict[int, GraphRegion] = {r.addr: r for r in graph if isinstance(r, GraphRegion)}
|
|
332
|
+
for r in sub_graph:
|
|
333
|
+
if isinstance(r, GraphRegion) and r not in graph and r.addr in existing_graph_regions:
|
|
334
|
+
self._replaced_regions[r] = existing_graph_regions[r.addr]
|
|
335
|
+
|
|
327
336
|
sub_graph_nodes = [self._replaced_regions.get(nn, nn) for nn in sub_graph.nodes]
|
|
328
337
|
sub_graph_edges = [
|
|
329
338
|
(self._replaced_regions.get(src, src), self._replaced_regions.get(dst, dst)) for src, dst in sub_graph.edges
|
|
@@ -376,11 +385,11 @@ class GraphRegion:
|
|
|
376
385
|
else:
|
|
377
386
|
if dst_in_subgraph in sub_graph:
|
|
378
387
|
for src in sub_graph.predecessors(dst_in_subgraph):
|
|
379
|
-
graph.add_edge(src, dst)
|
|
388
|
+
graph.add_edge(self._replaced_regions.get(src, src), dst)
|
|
380
389
|
elif reference_full_graph is not None and dst_in_subgraph in reference_full_graph:
|
|
381
390
|
for src in reference_full_graph.predecessors(dst_in_subgraph):
|
|
382
391
|
if src in graph:
|
|
383
|
-
graph.add_edge(src, dst)
|
|
392
|
+
graph.add_edge(self._replaced_regions.get(src, src), dst)
|
|
384
393
|
else:
|
|
385
394
|
# it may happen that the dst node no longer exists in sub_graph or its successors
|
|
386
395
|
# this is because we have deemed that the dst node is no longer a valid successor for sub_graph
|
|
@@ -313,7 +313,7 @@ class ConstPropOptReverter(OptimizationPass):
|
|
|
313
313
|
|
|
314
314
|
# construct new constant block
|
|
315
315
|
new_const_block = const_block.copy()
|
|
316
|
-
new_const_block.statements = new_const_block.statements[:-1]
|
|
316
|
+
new_const_block.statements = [*new_const_block.statements[:-1], reg_assign, symb_return_stmt.copy()]
|
|
317
317
|
self._update_block(const_block, new_const_block)
|
|
318
318
|
self.resolution = True
|
|
319
319
|
else:
|
|
@@ -159,6 +159,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
159
159
|
def __init__(self, func, min_distinct_cases=2, **kwargs):
|
|
160
160
|
super().__init__(
|
|
161
161
|
func,
|
|
162
|
+
require_structurable_graph=False,
|
|
162
163
|
require_gotos=False,
|
|
163
164
|
prevent_new_gotos=False,
|
|
164
165
|
simplify_ail=False,
|
|
@@ -15,7 +15,7 @@ from angr.analyses.decompiler.ailgraph_walker import AILGraphWalker
|
|
|
15
15
|
from angr.analyses.decompiler.condition_processor import ConditionProcessor
|
|
16
16
|
from angr.analyses.decompiler.goto_manager import Goto, GotoManager
|
|
17
17
|
from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
|
|
18
|
-
from angr.analyses.decompiler.utils import add_labels, remove_edges_in_ailgraph
|
|
18
|
+
from angr.analyses.decompiler.utils import add_labels, remove_edges_in_ailgraph, is_empty_node
|
|
19
19
|
from angr.analyses.decompiler.counters import ControlFlowStructureCounter
|
|
20
20
|
from angr.project import Project
|
|
21
21
|
|
|
@@ -432,12 +432,13 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
432
432
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
433
433
|
|
|
434
434
|
_initial_gotos: set[Goto]
|
|
435
|
-
_goto_manager: GotoManager
|
|
435
|
+
_goto_manager: GotoManager | None
|
|
436
436
|
_prev_graph: networkx.DiGraph
|
|
437
437
|
|
|
438
438
|
def __init__(
|
|
439
439
|
self,
|
|
440
440
|
func,
|
|
441
|
+
require_structurable_graph: bool = True,
|
|
441
442
|
prevent_new_gotos: bool = True,
|
|
442
443
|
strictly_less_gotos: bool = False,
|
|
443
444
|
recover_structure_fails: bool = True,
|
|
@@ -450,6 +451,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
450
451
|
**kwargs,
|
|
451
452
|
):
|
|
452
453
|
super().__init__(func, **kwargs)
|
|
454
|
+
self._require_structurable_graph = require_structurable_graph
|
|
453
455
|
self._prevent_new_gotos = prevent_new_gotos
|
|
454
456
|
self._strictly_less_gotos = strictly_less_gotos
|
|
455
457
|
self._recover_structure_fails = recover_structure_fails
|
|
@@ -459,6 +461,8 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
459
461
|
self._must_improve_rel_quality = must_improve_rel_quality
|
|
460
462
|
self._readd_labels = readd_labels
|
|
461
463
|
self._edges_to_remove = edges_to_remove or []
|
|
464
|
+
self._goto_manager = None
|
|
465
|
+
self._initial_gotos = set()
|
|
462
466
|
|
|
463
467
|
# relative quality metrics (excludes gotos)
|
|
464
468
|
self._initial_structure_counter = None
|
|
@@ -476,13 +480,20 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
476
480
|
if not ret:
|
|
477
481
|
return
|
|
478
482
|
|
|
479
|
-
|
|
480
|
-
|
|
483
|
+
# only initialize self._goto_manager if this optimization requires a structurable graph or gotos
|
|
484
|
+
initial_structurable: bool | None = None
|
|
485
|
+
if self._require_structurable_graph or self._require_gotos or self._prevent_new_gotos:
|
|
486
|
+
initial_structurable = self._graph_is_structurable(self._graph, initial=True)
|
|
481
487
|
|
|
482
|
-
self.
|
|
483
|
-
if self._require_gotos and not self._initial_gotos:
|
|
488
|
+
if self._require_structurable_graph and initial_structurable is False:
|
|
484
489
|
return
|
|
485
490
|
|
|
491
|
+
if self._require_gotos:
|
|
492
|
+
assert self._goto_manager is not None
|
|
493
|
+
self._initial_gotos = self._goto_manager.gotos.copy()
|
|
494
|
+
if not self._initial_gotos:
|
|
495
|
+
return
|
|
496
|
+
|
|
486
497
|
# setup for the very first analysis
|
|
487
498
|
self.out_graph = networkx.DiGraph(self._graph)
|
|
488
499
|
if self._max_opt_iters > 1:
|
|
@@ -500,7 +511,13 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
500
511
|
if self._readd_labels:
|
|
501
512
|
self.out_graph = add_labels(self.out_graph)
|
|
502
513
|
|
|
503
|
-
if
|
|
514
|
+
if (
|
|
515
|
+
self._require_structurable_graph
|
|
516
|
+
and self._max_opt_iters <= 1
|
|
517
|
+
and not self._graph_is_structurable(self.out_graph, readd_labels=False)
|
|
518
|
+
):
|
|
519
|
+
# fixed-point analysis ensures that the output graph is always structurable, otherwise it clears the output
|
|
520
|
+
# graph. so we only check the structurability of the graph when fixed-point analysis did not run.
|
|
504
521
|
self.out_graph = None
|
|
505
522
|
return
|
|
506
523
|
|
|
@@ -523,13 +540,16 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
523
540
|
return
|
|
524
541
|
|
|
525
542
|
def _get_new_gotos(self):
|
|
543
|
+
assert self._goto_manager is not None
|
|
526
544
|
return self._goto_manager.gotos
|
|
527
545
|
|
|
528
546
|
def _fixed_point_analyze(self, cache=None):
|
|
529
547
|
had_any_changes = False
|
|
530
548
|
for _ in range(self._max_opt_iters):
|
|
531
|
-
if self._require_gotos
|
|
532
|
-
|
|
549
|
+
if self._require_gotos:
|
|
550
|
+
assert self._goto_manager is not None
|
|
551
|
+
if not self._goto_manager.gotos:
|
|
552
|
+
break
|
|
533
553
|
|
|
534
554
|
# backup the graph before the optimization
|
|
535
555
|
if self._recover_structure_fails and self.out_graph is not None:
|
|
@@ -590,7 +610,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
590
610
|
_l.warning("Internal structuring failed for OptimizationPass on %s", self._func.name)
|
|
591
611
|
rs = None
|
|
592
612
|
|
|
593
|
-
if not rs or not rs.result or
|
|
613
|
+
if not rs or not rs.result or is_empty_node(rs.result) or rs.result_incomplete:
|
|
594
614
|
return False
|
|
595
615
|
|
|
596
616
|
rs = self.project.analyses.RegionSimplifier(self._func, rs.result, arg_vvars=self._arg_vvars, kb=self.kb)
|
|
@@ -648,7 +668,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
648
668
|
# Gotos play an important part in readability and control flow structure. We already count gotos in other parts
|
|
649
669
|
# of the analysis, so we don't need to count them here. However, some gotos are worse than others. Much
|
|
650
670
|
# like loops, trading gotos (keeping the same total, but getting worse types), is bad for decompilation.
|
|
651
|
-
if len(self._initial_gotos) == len(self._goto_manager.gotos) != 0:
|
|
671
|
+
if self._goto_manager is not None and len(self._initial_gotos) == len(self._goto_manager.gotos) != 0:
|
|
652
672
|
prev_labels = self._initial_structure_counter.goto_targets
|
|
653
673
|
curr_labels = self._current_structure_counter.goto_targets
|
|
654
674
|
|
|
@@ -55,6 +55,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
55
55
|
region_identifier=None,
|
|
56
56
|
vvar_id_start: int | None = None,
|
|
57
57
|
scratch: dict[str, Any] | None = None,
|
|
58
|
+
max_func_blocks: int = 500,
|
|
58
59
|
**kwargs,
|
|
59
60
|
):
|
|
60
61
|
StructuringOptimizationPass.__init__(
|
|
@@ -76,6 +77,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
76
77
|
ri=region_identifier,
|
|
77
78
|
vvar_id_start=vvar_id_start,
|
|
78
79
|
scratch=scratch,
|
|
80
|
+
max_func_blocks=max_func_blocks,
|
|
79
81
|
)
|
|
80
82
|
self.analyze()
|
|
81
83
|
|
|
@@ -4,14 +4,14 @@ from .a_div_const_add_a_mul_n_div_const import ADivConstAddAMulNDivConst
|
|
|
4
4
|
from .a_mul_const_div_shr_const import AMulConstDivShrConst
|
|
5
5
|
from .a_shl_const_sub_a import AShlConstSubA
|
|
6
6
|
from .a_sub_a_div import ASubADiv
|
|
7
|
-
from .
|
|
7
|
+
from .modulo_simplifier import ModuloSimplifier
|
|
8
8
|
from .a_sub_a_shr_const_shr_const import ASubAShrConstShrConst
|
|
9
9
|
from .arm_cmpf import ARMCmpF
|
|
10
10
|
from .bswap import Bswap
|
|
11
11
|
from .cas_intrinsics import CASIntrinsics
|
|
12
12
|
from .coalesce_same_cascading_ifs import CoalesceSameCascadingIfs
|
|
13
13
|
from .constant_derefs import ConstantDereferences
|
|
14
|
-
from .
|
|
14
|
+
from .optimized_div_simplifier import OptimizedDivisionSimplifier
|
|
15
15
|
from .extended_byte_and_mask import ExtendedByteAndMask
|
|
16
16
|
from .remove_empty_if_body import RemoveEmptyIfBody
|
|
17
17
|
from .remove_redundant_ite_branch import RemoveRedundantITEBranches
|
|
@@ -61,14 +61,14 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
|
|
|
61
61
|
AShlConstSubA,
|
|
62
62
|
AMulConstSubA,
|
|
63
63
|
ASubADiv,
|
|
64
|
-
|
|
64
|
+
ModuloSimplifier,
|
|
65
65
|
ASubAShrConstShrConst,
|
|
66
66
|
ARMCmpF,
|
|
67
67
|
Bswap,
|
|
68
68
|
CASIntrinsics,
|
|
69
69
|
CoalesceSameCascadingIfs,
|
|
70
70
|
ConstantDereferences,
|
|
71
|
-
|
|
71
|
+
OptimizedDivisionSimplifier,
|
|
72
72
|
ExtendedByteAndMask,
|
|
73
73
|
RemoveEmptyIfBody,
|
|
74
74
|
RemoveRedundantITEBranches,
|
|
@@ -170,6 +170,10 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
170
170
|
if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
|
|
171
171
|
return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
|
|
172
172
|
|
|
173
|
+
r = EagerEvaluation._combine_like_terms(expr)
|
|
174
|
+
if r is not None:
|
|
175
|
+
return r
|
|
176
|
+
|
|
173
177
|
if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
|
|
174
178
|
assert isinstance(expr.operands[0].offset, int) and isinstance(expr.operands[1].offset, int)
|
|
175
179
|
return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
|
|
@@ -354,6 +358,55 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
354
358
|
|
|
355
359
|
return None
|
|
356
360
|
|
|
361
|
+
@staticmethod
|
|
362
|
+
def _combine_like_terms(expr: BinaryOp) -> BinaryOp | None:
|
|
363
|
+
"""
|
|
364
|
+
Combine like terms for binary operations.
|
|
365
|
+
"""
|
|
366
|
+
|
|
367
|
+
op = expr.op
|
|
368
|
+
assert op in {"Add", "Sub"}
|
|
369
|
+
|
|
370
|
+
expr0, expr1 = expr.operands
|
|
371
|
+
|
|
372
|
+
conv = None
|
|
373
|
+
if isinstance(expr0, Convert) and expr0.from_bits < expr0.to_bits:
|
|
374
|
+
conv = expr0.from_bits, expr0.to_bits, expr0.is_signed
|
|
375
|
+
expr0 = expr0.operand
|
|
376
|
+
|
|
377
|
+
if isinstance(expr0, BinaryOp) and expr0.op == "Mul" and isinstance(expr0.operands[1], Const):
|
|
378
|
+
n = expr0.operands[0]
|
|
379
|
+
|
|
380
|
+
if isinstance(n, Convert) and n.from_bits > n.to_bits:
|
|
381
|
+
if conv is not None and (n.to_bits, n.from_bits, n.is_signed) != conv:
|
|
382
|
+
return None
|
|
383
|
+
n = n.operand
|
|
384
|
+
|
|
385
|
+
if n.likes(expr1):
|
|
386
|
+
# (n * C) - n ==> (C - 1) * n
|
|
387
|
+
coeff_0 = expr0.operands[1]
|
|
388
|
+
coeff = Const(coeff_0.idx, None, coeff_0.value - 1, expr.bits, **coeff_0.tags)
|
|
389
|
+
return BinaryOp(
|
|
390
|
+
expr.idx, "Mul", [n, coeff], expr.signed, variable=expr.variable, bits=expr.bits, **expr.tags
|
|
391
|
+
)
|
|
392
|
+
if isinstance(expr1, BinaryOp) and expr1.op == "Mul" and isinstance(expr.operands[1].operands[1], Const):
|
|
393
|
+
n1 = expr.operands[1].operands[0]
|
|
394
|
+
if n.likes(n1):
|
|
395
|
+
# (n * C) - (n1 * C1) ==> n * (C - C1)
|
|
396
|
+
coeff_0 = expr0.operands[1]
|
|
397
|
+
coeff_1 = expr1.operands[1]
|
|
398
|
+
coeff = Const(coeff_0.idx, None, coeff_0.value - coeff_1.value, expr.bits, **coeff_0.tags)
|
|
399
|
+
return BinaryOp(
|
|
400
|
+
expr.idx,
|
|
401
|
+
"Mul",
|
|
402
|
+
[n, coeff],
|
|
403
|
+
expr.signed,
|
|
404
|
+
variable=expr.variable,
|
|
405
|
+
bits=expr.bits,
|
|
406
|
+
**expr.tags,
|
|
407
|
+
)
|
|
408
|
+
return None
|
|
409
|
+
|
|
357
410
|
@staticmethod
|
|
358
411
|
def _optimize_unaryop(expr: UnaryOp):
|
|
359
412
|
if expr.op == "Neg" and isinstance(expr.operand, Const) and isinstance(expr.operand.value, int):
|