angr 9.2.139__py3-none-manylinux2014_x86_64.whl → 9.2.141__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +136 -53
- angr/analyses/calling_convention/fact_collector.py +44 -18
- angr/analyses/calling_convention/utils.py +3 -1
- angr/analyses/cfg/cfg_base.py +13 -0
- angr/analyses/cfg/cfg_fast.py +11 -0
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +9 -8
- angr/analyses/decompiler/ail_simplifier.py +115 -72
- angr/analyses/decompiler/callsite_maker.py +24 -11
- angr/analyses/decompiler/clinic.py +78 -43
- angr/analyses/decompiler/decompiler.py +18 -7
- angr/analyses/decompiler/expression_narrower.py +1 -1
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
- angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
- angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +84 -15
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +92 -11
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +53 -9
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
- angr/analyses/decompiler/region_identifier.py +6 -4
- angr/analyses/decompiler/region_simplifiers/expr_folding.py +287 -122
- angr/analyses/decompiler/region_simplifiers/region_simplifier.py +31 -13
- angr/analyses/decompiler/ssailification/rewriting.py +23 -15
- angr/analyses/decompiler/ssailification/rewriting_engine.py +105 -24
- angr/analyses/decompiler/ssailification/ssailification.py +22 -14
- angr/analyses/decompiler/structured_codegen/c.py +73 -137
- angr/analyses/decompiler/structuring/dream.py +22 -18
- angr/analyses/decompiler/structuring/phoenix.py +158 -41
- angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
- angr/analyses/decompiler/structuring/structurer_base.py +37 -10
- angr/analyses/decompiler/structuring/structurer_nodes.py +4 -1
- angr/analyses/decompiler/utils.py +106 -21
- angr/analyses/deobfuscator/api_obf_finder.py +8 -5
- angr/analyses/deobfuscator/api_obf_type2_finder.py +18 -10
- angr/analyses/deobfuscator/string_obf_finder.py +105 -18
- angr/analyses/forward_analysis/forward_analysis.py +1 -1
- angr/analyses/propagator/top_checker_mixin.py +6 -6
- angr/analyses/reaching_definitions/__init__.py +2 -1
- angr/analyses/reaching_definitions/dep_graph.py +1 -12
- angr/analyses/reaching_definitions/engine_vex.py +36 -31
- angr/analyses/reaching_definitions/function_handler.py +15 -2
- angr/analyses/reaching_definitions/rd_state.py +1 -37
- angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
- angr/analyses/s_propagator.py +6 -41
- angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
- angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
- angr/analyses/stack_pointer_tracker.py +36 -22
- angr/analyses/typehoon/simple_solver.py +45 -7
- angr/analyses/typehoon/typeconsts.py +18 -5
- angr/analyses/variable_recovery/engine_ail.py +1 -1
- angr/analyses/variable_recovery/engine_base.py +7 -5
- angr/analyses/variable_recovery/engine_vex.py +20 -4
- angr/block.py +69 -107
- angr/callable.py +14 -7
- angr/calling_conventions.py +30 -11
- angr/distributed/__init__.py +1 -1
- angr/engines/__init__.py +7 -8
- angr/engines/engine.py +1 -120
- angr/engines/failure.py +2 -2
- angr/engines/hook.py +2 -2
- angr/engines/light/engine.py +2 -2
- angr/engines/pcode/engine.py +2 -14
- angr/engines/procedure.py +2 -2
- angr/engines/soot/engine.py +2 -2
- angr/engines/soot/statements/switch.py +1 -1
- angr/engines/successors.py +124 -11
- angr/engines/syscall.py +2 -2
- angr/engines/unicorn.py +3 -3
- angr/engines/vex/heavy/heavy.py +3 -15
- angr/factory.py +12 -22
- angr/knowledge_plugins/key_definitions/atoms.py +8 -4
- angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
- angr/knowledge_plugins/variables/variable_manager.py +7 -5
- angr/sim_type.py +19 -17
- angr/simos/simos.py +3 -1
- angr/state_plugins/plugin.py +19 -4
- angr/storage/memory_mixins/memory_mixin.py +1 -1
- angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
- angr/utils/ssa/__init__.py +119 -4
- angr/utils/types.py +48 -0
- {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/METADATA +6 -6
- {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/RECORD +87 -86
- {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/LICENSE +0 -0
- {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/WHEEL +0 -0
- {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/entry_points.txt +0 -0
- {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,7 @@ import networkx
|
|
|
7
7
|
|
|
8
8
|
from ailment import Block, AILBlockWalkerBase
|
|
9
9
|
from ailment.statement import ConditionalJump, Label, Assignment, Jump
|
|
10
|
-
from ailment.expression import Expression, BinaryOp, Const, Load
|
|
10
|
+
from ailment.expression import VirtualVariable, Expression, BinaryOp, Const, Load
|
|
11
11
|
|
|
12
12
|
from angr.utils.graph import GraphUtils
|
|
13
13
|
from angr.analyses.decompiler.utils import first_nonlabel_nonphi_statement, remove_last_statement
|
|
@@ -257,6 +257,24 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
257
257
|
_l.debug("Skipping switch-case conversion due to too few distinct cases for %s", real_cases[0])
|
|
258
258
|
continue
|
|
259
259
|
|
|
260
|
+
# RULE 4: the default case should not reach other case nodes in the subregion
|
|
261
|
+
default_addr_and_idx = next(
|
|
262
|
+
((case.target, case.target_idx) for case in cases if case.value == "default"), None
|
|
263
|
+
)
|
|
264
|
+
if default_addr_and_idx is None:
|
|
265
|
+
continue
|
|
266
|
+
default_addr, default_idx = default_addr_and_idx
|
|
267
|
+
default_node = self._get_block(default_addr, idx=default_idx)
|
|
268
|
+
default_reachable_from_case = False
|
|
269
|
+
for case in cases:
|
|
270
|
+
if case.value == "default":
|
|
271
|
+
continue
|
|
272
|
+
if self._node_reachable_from_node_in_region(case.original_node, default_node):
|
|
273
|
+
default_reachable_from_case = True
|
|
274
|
+
break
|
|
275
|
+
if default_reachable_from_case:
|
|
276
|
+
continue
|
|
277
|
+
|
|
260
278
|
original_nodes = [case.original_node for case in real_cases]
|
|
261
279
|
original_head: Block = original_nodes[0]
|
|
262
280
|
original_nodes = original_nodes[1:]
|
|
@@ -320,6 +338,10 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
320
338
|
node_to_heads[succ].add(new_head)
|
|
321
339
|
graph_copy.remove_node(onode)
|
|
322
340
|
for onode in redundant_nodes:
|
|
341
|
+
if onode in original_nodes:
|
|
342
|
+
# sometimes they overlap
|
|
343
|
+
# e.g., 0x402cc7 in mv_-O2
|
|
344
|
+
continue
|
|
323
345
|
# ensure all nodes that are only reachable from onode are also removed
|
|
324
346
|
# FIXME: Remove the entire path of nodes instead of only the immediate successors
|
|
325
347
|
successors = list(graph_copy.successors(onode))
|
|
@@ -396,6 +418,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
396
418
|
default_case_candidates = {}
|
|
397
419
|
last_comp = None
|
|
398
420
|
stack = [(head, 0, 0xFFFF_FFFF_FFFF_FFFF)]
|
|
421
|
+
head_varhash = variable_comparisons[head][1]
|
|
399
422
|
|
|
400
423
|
# cursed: there is an infinite loop in the following loop that
|
|
401
424
|
# occurs rarely. we need to keep track of the nodes we've seen
|
|
@@ -418,12 +441,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
418
441
|
next_addr,
|
|
419
442
|
next_addr_idx,
|
|
420
443
|
) = variable_comparisons[comp]
|
|
421
|
-
last_varhash = cases[-1].variable_hash if cases else None
|
|
422
444
|
|
|
423
445
|
if op == "eq":
|
|
424
446
|
# eq always indicates a new case
|
|
425
447
|
|
|
426
|
-
if
|
|
448
|
+
if head_varhash == variable_hash:
|
|
427
449
|
if target == comp.addr and target_idx == comp.idx:
|
|
428
450
|
# invalid
|
|
429
451
|
break
|
|
@@ -443,9 +465,10 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
443
465
|
# new variable!
|
|
444
466
|
if last_comp is not None and comp.addr not in default_case_candidates:
|
|
445
467
|
default_case_candidates[comp.addr] = Case(
|
|
446
|
-
last_comp, None,
|
|
468
|
+
last_comp, None, head_varhash, None, "default", comp.addr, comp.idx, None
|
|
447
469
|
)
|
|
448
|
-
|
|
470
|
+
break
|
|
471
|
+
continue
|
|
449
472
|
|
|
450
473
|
successors = [succ for succ in self._graph.successors(comp) if succ is not comp]
|
|
451
474
|
succ_addrs = {(succ.addr, succ.idx) for succ in successors}
|
|
@@ -505,7 +528,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
505
528
|
# gt always indicates new subtrees
|
|
506
529
|
gt_addr, gt_idx, le_addr, le_idx = target, target_idx, next_addr, next_addr_idx
|
|
507
530
|
# TODO: We don't yet support gt nodes acting as the head of a switch
|
|
508
|
-
if
|
|
531
|
+
if head_varhash == variable_hash:
|
|
509
532
|
successors = [succ for succ in self._graph.successors(comp) if succ is not comp]
|
|
510
533
|
succ_addrs = {(succ.addr, succ.idx) for succ in successors}
|
|
511
534
|
if succ_addrs != {(gt_addr, gt_idx), (le_addr, le_idx)}:
|
|
@@ -526,21 +549,34 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
526
549
|
le_added = True
|
|
527
550
|
if gt_added or le_added:
|
|
528
551
|
if not le_added:
|
|
529
|
-
if
|
|
552
|
+
# if min_ + 1 == value, it means we actually have another case! it's not a default case
|
|
553
|
+
if min_ + 1 == value:
|
|
554
|
+
cases.append(
|
|
555
|
+
Case(comp, comp_type, variable_hash, expr, min_ + 1, le_addr, le_idx, None)
|
|
556
|
+
)
|
|
557
|
+
used_nodes.add(comp)
|
|
558
|
+
elif le_addr not in default_case_candidates:
|
|
530
559
|
default_case_candidates[le_addr] = Case(
|
|
531
560
|
comp, None, variable_hash, expr, "default", le_addr, le_idx, None
|
|
532
561
|
)
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
562
|
+
if not gt_added:
|
|
563
|
+
# likewise, this means we have another non-default case
|
|
564
|
+
if value == max_:
|
|
565
|
+
cases.append(
|
|
566
|
+
Case(comp, comp_type, variable_hash, expr, max_, gt_addr, gt_idx, None)
|
|
567
|
+
)
|
|
568
|
+
used_nodes.add(comp)
|
|
569
|
+
elif gt_addr not in default_case_candidates:
|
|
570
|
+
default_case_candidates[gt_addr] = Case(
|
|
571
|
+
comp, None, variable_hash, expr, "default", gt_addr, gt_idx, None
|
|
572
|
+
)
|
|
537
573
|
extra_cmp_nodes.append(comp)
|
|
538
574
|
used_nodes.add(comp)
|
|
539
575
|
else:
|
|
540
576
|
break
|
|
541
577
|
else:
|
|
542
578
|
# checking on a new variable... it probably was not a switch-case
|
|
543
|
-
|
|
579
|
+
continue
|
|
544
580
|
|
|
545
581
|
if cases and len(default_case_candidates) <= 1:
|
|
546
582
|
if default_case_candidates:
|
|
@@ -606,6 +642,27 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
606
642
|
|
|
607
643
|
return varhash_to_caselists
|
|
608
644
|
|
|
645
|
+
def _node_reachable_from_node_in_region(self, to_node, from_node) -> bool:
|
|
646
|
+
# find the region that contains the to_node
|
|
647
|
+
to_node_region = None
|
|
648
|
+
from_node_region = None
|
|
649
|
+
for region in self._ri.regions_by_block_addrs:
|
|
650
|
+
if (to_node.addr, to_node.idx) in region:
|
|
651
|
+
to_node_region = region
|
|
652
|
+
if (from_node.addr, from_node.idx) in region:
|
|
653
|
+
from_node_region = region
|
|
654
|
+
|
|
655
|
+
if to_node_region is None or from_node_region is None:
|
|
656
|
+
return False
|
|
657
|
+
if to_node_region != from_node_region:
|
|
658
|
+
return False
|
|
659
|
+
|
|
660
|
+
# get a subgraph
|
|
661
|
+
all_nodes = [self._get_block(a, idx=idx) for a, idx in to_node_region]
|
|
662
|
+
subgraph = self._graph.subgraph(all_nodes)
|
|
663
|
+
|
|
664
|
+
return networkx.has_path(subgraph, from_node, to_node)
|
|
665
|
+
|
|
609
666
|
@staticmethod
|
|
610
667
|
def _find_switch_variable_comparison_type_a(
|
|
611
668
|
node,
|
|
@@ -625,7 +682,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
625
682
|
)
|
|
626
683
|
):
|
|
627
684
|
cond = stmt.condition
|
|
628
|
-
if
|
|
685
|
+
if (
|
|
686
|
+
isinstance(cond, BinaryOp)
|
|
687
|
+
and isinstance(cond.operands[0], VirtualVariable)
|
|
688
|
+
and isinstance(cond.operands[1], Const)
|
|
689
|
+
):
|
|
629
690
|
variable_hash = StableVarExprHasher(cond.operands[0]).hash
|
|
630
691
|
value = cond.operands[1].value
|
|
631
692
|
if cond.op == "CmpEQ":
|
|
@@ -672,7 +733,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
672
733
|
)
|
|
673
734
|
):
|
|
674
735
|
cond = stmt.condition
|
|
675
|
-
if
|
|
736
|
+
if (
|
|
737
|
+
isinstance(cond, BinaryOp)
|
|
738
|
+
and isinstance(cond.operands[0], VirtualVariable)
|
|
739
|
+
and isinstance(cond.operands[1], Const)
|
|
740
|
+
):
|
|
676
741
|
variable_hash = StableVarExprHasher(cond.operands[0]).hash
|
|
677
742
|
value = cond.operands[1].value
|
|
678
743
|
if cond.op == "CmpEQ":
|
|
@@ -719,7 +784,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
719
784
|
)
|
|
720
785
|
):
|
|
721
786
|
cond = stmt.condition
|
|
722
|
-
if
|
|
787
|
+
if (
|
|
788
|
+
isinstance(cond, BinaryOp)
|
|
789
|
+
and isinstance(cond.operands[0], VirtualVariable)
|
|
790
|
+
and isinstance(cond.operands[1], Const)
|
|
791
|
+
):
|
|
723
792
|
variable_hash = StableVarExprHasher(cond.operands[0]).hash
|
|
724
793
|
value = cond.operands[1].value
|
|
725
794
|
op = cond.op
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
# pylint:disable=unused-argument
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
import logging
|
|
4
|
-
from
|
|
4
|
+
from collections import namedtuple
|
|
5
5
|
from collections.abc import Generator
|
|
6
|
+
from typing import Any, TYPE_CHECKING
|
|
6
7
|
from enum import Enum
|
|
7
8
|
|
|
8
9
|
import networkx
|
|
@@ -10,10 +11,11 @@ import networkx
|
|
|
10
11
|
import ailment
|
|
11
12
|
|
|
12
13
|
from angr.analyses.decompiler import RegionIdentifier
|
|
14
|
+
from angr.analyses.decompiler.ailgraph_walker import AILGraphWalker
|
|
13
15
|
from angr.analyses.decompiler.condition_processor import ConditionProcessor
|
|
14
16
|
from angr.analyses.decompiler.goto_manager import Goto, GotoManager
|
|
15
17
|
from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
|
|
16
|
-
from angr.analyses.decompiler.utils import add_labels
|
|
18
|
+
from angr.analyses.decompiler.utils import add_labels, remove_edges_in_ailgraph
|
|
17
19
|
from angr.analyses.decompiler.counters import ControlFlowStructureCounter
|
|
18
20
|
from angr.project import Project
|
|
19
21
|
|
|
@@ -24,6 +26,9 @@ if TYPE_CHECKING:
|
|
|
24
26
|
_l = logging.getLogger(__name__)
|
|
25
27
|
|
|
26
28
|
|
|
29
|
+
BlockCache = namedtuple("BlockCache", ("rd", "prop"))
|
|
30
|
+
|
|
31
|
+
|
|
27
32
|
class MultipleBlocksException(Exception):
|
|
28
33
|
"""
|
|
29
34
|
An exception that is raised in _get_block() where multiple blocks satisfy the criteria but only one block was
|
|
@@ -129,6 +134,8 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
129
134
|
force_loop_single_exit: bool = True,
|
|
130
135
|
complete_successors: bool = False,
|
|
131
136
|
avoid_vvar_ids: set[int] | None = None,
|
|
137
|
+
arg_vvars: set[int] | None = None,
|
|
138
|
+
peephole_optimizations=None,
|
|
132
139
|
**kwargs,
|
|
133
140
|
):
|
|
134
141
|
super().__init__(func)
|
|
@@ -141,6 +148,7 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
141
148
|
self._rd = reaching_definitions
|
|
142
149
|
self._scratch = scratch if scratch is not None else {}
|
|
143
150
|
self._new_block_addrs = set()
|
|
151
|
+
self._arg_vvars = arg_vvars
|
|
144
152
|
self.vvar_id_start = vvar_id_start
|
|
145
153
|
self.entry_node_addr: tuple[int, int | None] = (
|
|
146
154
|
entry_node_addr if entry_node_addr is not None else (func.addr, None)
|
|
@@ -148,6 +156,7 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
148
156
|
self._force_loop_single_exit = force_loop_single_exit
|
|
149
157
|
self._complete_successors = complete_successors
|
|
150
158
|
self._avoid_vvar_ids = avoid_vvar_ids or set()
|
|
159
|
+
self._peephole_optimizations = peephole_optimizations
|
|
151
160
|
|
|
152
161
|
# output
|
|
153
162
|
self.out_graph: networkx.DiGraph | None = None
|
|
@@ -265,9 +274,77 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
265
274
|
def _is_sub(expr):
|
|
266
275
|
return isinstance(expr, ailment.Expr.BinaryOp) and expr.op == "Sub"
|
|
267
276
|
|
|
277
|
+
def _simplify_blocks(
|
|
278
|
+
self,
|
|
279
|
+
ail_graph: networkx.DiGraph,
|
|
280
|
+
cache: dict | None = None,
|
|
281
|
+
):
|
|
282
|
+
"""
|
|
283
|
+
Simplify all blocks in self._blocks.
|
|
284
|
+
|
|
285
|
+
:param ail_graph: The AIL function graph.
|
|
286
|
+
:param cache: A block-level cache that stores reaching definition analysis results and
|
|
287
|
+
propagation results.
|
|
288
|
+
:return: None
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
blocks_by_addr_and_idx: dict[tuple[int, int | None], ailment.Block] = {}
|
|
292
|
+
|
|
293
|
+
for ail_block in ail_graph.nodes():
|
|
294
|
+
simplified = self._simplify_block(
|
|
295
|
+
ail_block,
|
|
296
|
+
cache=cache,
|
|
297
|
+
)
|
|
298
|
+
key = ail_block.addr, ail_block.idx
|
|
299
|
+
blocks_by_addr_and_idx[key] = simplified
|
|
300
|
+
|
|
301
|
+
# update blocks_map to allow node_addr to node lookup
|
|
302
|
+
def _replace_node_handler(node):
|
|
303
|
+
key = node.addr, node.idx
|
|
304
|
+
if key in blocks_by_addr_and_idx:
|
|
305
|
+
return blocks_by_addr_and_idx[key]
|
|
306
|
+
return None
|
|
307
|
+
|
|
308
|
+
AILGraphWalker(ail_graph, _replace_node_handler, replace_nodes=True).walk()
|
|
309
|
+
|
|
310
|
+
return ail_graph
|
|
311
|
+
|
|
312
|
+
def _simplify_block(self, ail_block, cache=None):
|
|
313
|
+
"""
|
|
314
|
+
Simplify a single AIL block.
|
|
315
|
+
|
|
316
|
+
:param ailment.Block ail_block: The AIL block to simplify.
|
|
317
|
+
:return: A simplified AIL block.
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
cached_rd, cached_prop = None, None
|
|
321
|
+
cache_item = None
|
|
322
|
+
cache_key = ail_block.addr, ail_block.idx
|
|
323
|
+
if cache:
|
|
324
|
+
cache_item = cache.get(cache_key, None)
|
|
325
|
+
if cache_item:
|
|
326
|
+
# cache hit
|
|
327
|
+
cached_rd = cache_item.rd
|
|
328
|
+
cached_prop = cache_item.prop
|
|
329
|
+
|
|
330
|
+
simp = self.project.analyses.AILBlockSimplifier(
|
|
331
|
+
ail_block,
|
|
332
|
+
self._func.addr,
|
|
333
|
+
peephole_optimizations=self._peephole_optimizations,
|
|
334
|
+
cached_reaching_definitions=cached_rd,
|
|
335
|
+
cached_propagator=cached_prop,
|
|
336
|
+
)
|
|
337
|
+
# update the cache
|
|
338
|
+
if cache is not None:
|
|
339
|
+
if cache_item:
|
|
340
|
+
del cache[cache_key]
|
|
341
|
+
cache[cache_key] = BlockCache(simp._reaching_definitions, simp._propagator)
|
|
342
|
+
return simp.result_block
|
|
343
|
+
|
|
268
344
|
def _simplify_graph(self, graph):
|
|
269
345
|
MAX_SIMP_ITERATION = 8
|
|
270
346
|
for _ in range(MAX_SIMP_ITERATION):
|
|
347
|
+
self._simplify_blocks(graph)
|
|
271
348
|
simp = self.project.analyses.AILSimplifier(
|
|
272
349
|
self._func,
|
|
273
350
|
func_graph=graph,
|
|
@@ -331,14 +408,15 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
331
408
|
def __init__(
|
|
332
409
|
self,
|
|
333
410
|
func,
|
|
334
|
-
prevent_new_gotos=True,
|
|
335
|
-
strictly_less_gotos=False,
|
|
336
|
-
recover_structure_fails=True,
|
|
337
|
-
must_improve_rel_quality=True,
|
|
338
|
-
max_opt_iters=1,
|
|
339
|
-
simplify_ail=True,
|
|
340
|
-
require_gotos=True,
|
|
341
|
-
readd_labels=False,
|
|
411
|
+
prevent_new_gotos: bool = True,
|
|
412
|
+
strictly_less_gotos: bool = False,
|
|
413
|
+
recover_structure_fails: bool = True,
|
|
414
|
+
must_improve_rel_quality: bool = True,
|
|
415
|
+
max_opt_iters: int = 1,
|
|
416
|
+
simplify_ail: bool = True,
|
|
417
|
+
require_gotos: bool = True,
|
|
418
|
+
readd_labels: bool = False,
|
|
419
|
+
edges_to_remove: list[tuple[tuple[int, int | None], tuple[int, int | None]]] | None = None,
|
|
342
420
|
**kwargs,
|
|
343
421
|
):
|
|
344
422
|
super().__init__(func, **kwargs)
|
|
@@ -350,6 +428,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
350
428
|
self._require_gotos = require_gotos
|
|
351
429
|
self._must_improve_rel_quality = must_improve_rel_quality
|
|
352
430
|
self._readd_labels = readd_labels
|
|
431
|
+
self._edges_to_remove = edges_to_remove or []
|
|
353
432
|
|
|
354
433
|
# relative quality metrics (excludes gotos)
|
|
355
434
|
self._initial_structure_counter = None
|
|
@@ -452,6 +531,8 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
452
531
|
if readd_labels:
|
|
453
532
|
graph = add_labels(graph)
|
|
454
533
|
|
|
534
|
+
remove_edges_in_ailgraph(graph, self._edges_to_remove)
|
|
535
|
+
|
|
455
536
|
self._ri = self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
|
|
456
537
|
self._func,
|
|
457
538
|
graph=graph,
|
|
@@ -482,7 +563,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
482
563
|
if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
|
|
483
564
|
return False
|
|
484
565
|
|
|
485
|
-
rs = self.project.analyses.RegionSimplifier(self._func, rs.result,
|
|
566
|
+
rs = self.project.analyses.RegionSimplifier(self._func, rs.result, arg_vvars=self._arg_vvars, kb=self.kb)
|
|
486
567
|
if not rs or rs.goto_manager is None or rs.result is None:
|
|
487
568
|
return False
|
|
488
569
|
|
|
@@ -34,13 +34,23 @@ class FreshVirtualVariableRewriter(AILBlockWalker):
|
|
|
34
34
|
def _handle_Assignment(self, stmt_idx: int, stmt: Assignment, block: Block | None):
|
|
35
35
|
new_stmt = super()._handle_Assignment(stmt_idx, stmt, block)
|
|
36
36
|
dst = new_stmt.dst if new_stmt is not None else stmt.dst
|
|
37
|
+
src = new_stmt.src if new_stmt is not None else stmt.src
|
|
37
38
|
if isinstance(dst, VirtualVariable):
|
|
38
39
|
self.vvar_mapping[dst.varid] = self.vvar_idx
|
|
39
40
|
self.vvar_idx += 1
|
|
40
41
|
|
|
41
|
-
dst = VirtualVariable(
|
|
42
|
+
dst = VirtualVariable(
|
|
43
|
+
dst.idx,
|
|
44
|
+
self.vvar_mapping[dst.varid],
|
|
45
|
+
dst.bits,
|
|
46
|
+
dst.category,
|
|
47
|
+
dst.oident,
|
|
48
|
+
variable=dst.variable,
|
|
49
|
+
variable_offset=dst.variable_offset,
|
|
50
|
+
**dst.tags,
|
|
51
|
+
)
|
|
42
52
|
|
|
43
|
-
return Assignment(stmt.idx, dst,
|
|
53
|
+
return Assignment(stmt.idx, dst, src, **stmt.tags)
|
|
44
54
|
|
|
45
55
|
return new_stmt
|
|
46
56
|
|
|
@@ -133,18 +143,31 @@ class ReturnDuplicatorBase:
|
|
|
133
143
|
self._supergraph = to_ail_supergraph(graph)
|
|
134
144
|
for region_head, (in_edges, region) in endnode_regions.items():
|
|
135
145
|
is_single_const_ret_region = self._is_simple_return_graph(region)
|
|
146
|
+
dup_pred_nodes = []
|
|
147
|
+
# duplicate the entire region if at least (N-2) in-edges for the region head is deemed should be duplicated.
|
|
148
|
+
# otherwise we only duplicate the edges that should be duplicated
|
|
136
149
|
for in_edge in in_edges:
|
|
137
150
|
pred_node = in_edge[0]
|
|
138
151
|
if self._should_duplicate_dst(
|
|
139
152
|
pred_node, region_head, graph, dst_is_const_ret=is_single_const_ret_region
|
|
140
153
|
):
|
|
154
|
+
dup_pred_nodes.append(pred_node)
|
|
155
|
+
|
|
156
|
+
dup_count = len(dup_pred_nodes)
|
|
157
|
+
dup_all = dup_count >= len(in_edges) - 2 > 0
|
|
158
|
+
if dup_all:
|
|
159
|
+
for pred_node in sorted((in_edge[0] for in_edge in in_edges), key=lambda x: x.addr):
|
|
141
160
|
# every eligible pred gets a new region copy
|
|
142
161
|
self._copy_region([pred_node], region_head, region, graph)
|
|
162
|
+
graph_changed = True
|
|
163
|
+
else:
|
|
164
|
+
for pred_node in dup_pred_nodes:
|
|
165
|
+
self._copy_region([pred_node], region_head, region, graph)
|
|
166
|
+
graph_changed = True
|
|
143
167
|
|
|
144
168
|
if region_head in graph and graph.in_degree(region_head) == 0:
|
|
145
169
|
graph.remove_nodes_from(region)
|
|
146
|
-
|
|
147
|
-
graph_changed = True
|
|
170
|
+
graph_changed = True
|
|
148
171
|
|
|
149
172
|
return graph_changed
|
|
150
173
|
|
|
@@ -199,10 +222,10 @@ class ReturnDuplicatorBase:
|
|
|
199
222
|
|
|
200
223
|
return end_node_regions
|
|
201
224
|
|
|
202
|
-
def _copy_region(self, pred_nodes, region_head, region, graph):
|
|
225
|
+
def _copy_region(self, pred_nodes: list[Block], region_head, region, graph):
|
|
203
226
|
# copy the entire return region
|
|
204
227
|
copies: dict[Block, Block] = {}
|
|
205
|
-
queue = [(pred_node, region_head) for pred_node in pred_nodes]
|
|
228
|
+
queue: list[tuple[Block, Block]] = [(pred_node, region_head) for pred_node in pred_nodes]
|
|
206
229
|
vvar_mapping: dict[int, int] = {}
|
|
207
230
|
while queue:
|
|
208
231
|
pred, node = queue.pop(0)
|
|
@@ -224,12 +247,33 @@ class ReturnDuplicatorBase:
|
|
|
224
247
|
last_stmt = ConditionProcessor.get_last_statement(pred)
|
|
225
248
|
if isinstance(last_stmt, Jump):
|
|
226
249
|
if isinstance(last_stmt.target, Const) and last_stmt.target.value == node_copy.addr:
|
|
227
|
-
|
|
250
|
+
updated_last_stmt = Jump(
|
|
251
|
+
last_stmt.idx, last_stmt.target, target_idx=node_copy.idx, **last_stmt.tags
|
|
252
|
+
)
|
|
253
|
+
pred.statements[-1] = updated_last_stmt
|
|
228
254
|
elif isinstance(last_stmt, ConditionalJump):
|
|
229
255
|
if isinstance(last_stmt.true_target, Const) and last_stmt.true_target.value == node_copy.addr:
|
|
230
|
-
|
|
256
|
+
updated_last_stmt = ConditionalJump(
|
|
257
|
+
last_stmt.idx,
|
|
258
|
+
last_stmt.condition,
|
|
259
|
+
last_stmt.true_target,
|
|
260
|
+
last_stmt.false_target,
|
|
261
|
+
true_target_idx=node_copy.idx,
|
|
262
|
+
false_target_idx=last_stmt.false_target_idx,
|
|
263
|
+
**last_stmt.tags,
|
|
264
|
+
)
|
|
265
|
+
pred.statements[-1] = updated_last_stmt
|
|
231
266
|
elif isinstance(last_stmt.false_target, Const) and last_stmt.false_target.value == node_copy.addr:
|
|
232
|
-
|
|
267
|
+
updated_last_stmt = ConditionalJump(
|
|
268
|
+
last_stmt.idx,
|
|
269
|
+
last_stmt.condition,
|
|
270
|
+
last_stmt.true_target,
|
|
271
|
+
last_stmt.false_target,
|
|
272
|
+
true_target_idx=last_stmt.true_target_idx,
|
|
273
|
+
false_target_idx=node_copy.idx,
|
|
274
|
+
**last_stmt.tags,
|
|
275
|
+
)
|
|
276
|
+
pred.statements[-1] = updated_last_stmt
|
|
233
277
|
except EmptyBlockNotice:
|
|
234
278
|
pass
|
|
235
279
|
|
|
@@ -29,7 +29,12 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
29
29
|
@staticmethod
|
|
30
30
|
def _optimize_binaryop(expr: BinaryOp):
|
|
31
31
|
if expr.op == "Add":
|
|
32
|
-
if
|
|
32
|
+
if (
|
|
33
|
+
isinstance(expr.operands[0], Const)
|
|
34
|
+
and isinstance(expr.operands[0].value, int)
|
|
35
|
+
and isinstance(expr.operands[1], Const)
|
|
36
|
+
and isinstance(expr.operands[1].value, int)
|
|
37
|
+
):
|
|
33
38
|
mask = (1 << expr.bits) - 1
|
|
34
39
|
return Const(
|
|
35
40
|
expr.idx, None, (expr.operands[0].value + expr.operands[1].value) & mask, expr.bits, **expr.tags
|
|
@@ -99,13 +104,19 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
99
104
|
new_const = Const(const1.idx, None, const1.value + 1, const1.bits, **const1.tags)
|
|
100
105
|
return BinaryOp(expr.idx, "Mul", [x1, new_const], expr.signed, **expr.tags)
|
|
101
106
|
elif op0_is_mulconst and op1_is_mulconst:
|
|
107
|
+
assert x0 is not None and x1 is not None and const0 is not None and const1 is not None
|
|
102
108
|
if x0.likes(x1):
|
|
103
109
|
# x * A + x * B => (A + B) * x
|
|
104
110
|
new_const = Const(const0.idx, None, const0.value + const1.value, const0.bits, **const0.tags)
|
|
105
111
|
return BinaryOp(expr.idx, "Mul", [x0, new_const], expr.signed, **expr.tags)
|
|
106
112
|
|
|
107
113
|
elif expr.op == "Sub":
|
|
108
|
-
if
|
|
114
|
+
if (
|
|
115
|
+
isinstance(expr.operands[0], Const)
|
|
116
|
+
and isinstance(expr.operands[0].value, int)
|
|
117
|
+
and isinstance(expr.operands[1], Const)
|
|
118
|
+
and isinstance(expr.operands[1].value, int)
|
|
119
|
+
):
|
|
109
120
|
mask = (1 << expr.bits) - 1
|
|
110
121
|
return Const(
|
|
111
122
|
expr.idx, None, (expr.operands[0].value - expr.operands[1].value) & mask, expr.bits, **expr.tags
|
|
@@ -138,12 +149,19 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
138
149
|
return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
|
|
139
150
|
|
|
140
151
|
if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
|
|
152
|
+
assert isinstance(expr.operands[0].offset, int) and isinstance(expr.operands[1].offset, int)
|
|
141
153
|
return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
|
|
142
154
|
|
|
143
155
|
elif expr.op == "And":
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
156
|
+
op0, op1 = expr.operands
|
|
157
|
+
if (
|
|
158
|
+
isinstance(op0, Const)
|
|
159
|
+
and isinstance(op0.value, int)
|
|
160
|
+
and isinstance(op1, Const)
|
|
161
|
+
and isinstance(op1.value, int)
|
|
162
|
+
):
|
|
163
|
+
return Const(expr.idx, None, (op0.value & op1.value), expr.bits, **expr.tags)
|
|
164
|
+
if isinstance(op1, Const) and op1.value == 0:
|
|
147
165
|
return Const(expr.idx, None, 0, expr.bits, **expr.tags)
|
|
148
166
|
|
|
149
167
|
elif expr.op == "Mul":
|
|
@@ -156,6 +174,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
156
174
|
and isinstance(expr.operands[1], Const)
|
|
157
175
|
and expr.operands[1].is_int
|
|
158
176
|
):
|
|
177
|
+
assert isinstance(expr.operands[0].value, int) and isinstance(expr.operands[1].value, int)
|
|
159
178
|
# constant multiplication
|
|
160
179
|
mask = (1 << expr.bits) - 1
|
|
161
180
|
return Const(
|
|
@@ -235,7 +254,13 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
235
254
|
return Const(expr0.idx, None, (const_a << expr1.value) & mask, expr0.bits, **expr0.tags)
|
|
236
255
|
|
|
237
256
|
elif expr.op == "Or":
|
|
238
|
-
|
|
257
|
+
op0, op1 = expr.operands
|
|
258
|
+
if (
|
|
259
|
+
isinstance(op0, Const)
|
|
260
|
+
and isinstance(op0.value, int)
|
|
261
|
+
and isinstance(op1, Const)
|
|
262
|
+
and isinstance(op1.value, int)
|
|
263
|
+
):
|
|
239
264
|
return Const(expr.idx, None, expr.operands[0].value | expr.operands[1].value, expr.bits, **expr.tags)
|
|
240
265
|
if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
|
|
241
266
|
return expr.operands[1]
|
|
@@ -248,6 +273,16 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
248
273
|
if expr.operands[0].likes(expr.operands[1]):
|
|
249
274
|
return expr.operands[0]
|
|
250
275
|
|
|
276
|
+
elif expr.op == "Xor":
|
|
277
|
+
op0, op1 = expr.operands
|
|
278
|
+
if (
|
|
279
|
+
isinstance(op0, Const)
|
|
280
|
+
and isinstance(op0.value, int)
|
|
281
|
+
and isinstance(op1, Const)
|
|
282
|
+
and isinstance(op1.value, int)
|
|
283
|
+
):
|
|
284
|
+
return Const(expr.idx, None, expr.operands[0].value ^ expr.operands[1].value, expr.bits, **expr.tags)
|
|
285
|
+
|
|
251
286
|
elif expr.op in {"CmpEQ", "CmpLE", "CmpGE"}:
|
|
252
287
|
if expr.operands[0].likes(expr.operands[1]):
|
|
253
288
|
# x == x => 1
|
|
@@ -288,7 +323,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
288
323
|
|
|
289
324
|
@staticmethod
|
|
290
325
|
def _optimize_unaryop(expr: UnaryOp):
|
|
291
|
-
if expr.op == "Neg" and isinstance(expr.operand, Const):
|
|
326
|
+
if expr.op == "Neg" and isinstance(expr.operand, Const) and isinstance(expr.operand.value, int):
|
|
292
327
|
const_a = expr.operand.value
|
|
293
328
|
mask = (2**expr.bits) - 1
|
|
294
329
|
return Const(expr.idx, None, (~const_a) & mask, expr.bits, **expr.tags)
|
|
@@ -304,6 +339,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
304
339
|
and expr.to_type == Convert.TYPE_INT
|
|
305
340
|
and expr.from_bits > expr.to_bits
|
|
306
341
|
):
|
|
342
|
+
assert isinstance(expr.operand.value, int)
|
|
307
343
|
# truncation
|
|
308
344
|
mask = (1 << expr.to_bits) - 1
|
|
309
345
|
v = expr.operand.value & mask
|
|
@@ -315,6 +351,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
315
351
|
and expr.to_type == Convert.TYPE_INT
|
|
316
352
|
and expr.from_bits <= expr.to_bits
|
|
317
353
|
):
|
|
354
|
+
assert isinstance(expr.operand.value, int)
|
|
318
355
|
if expr.is_signed is False:
|
|
319
356
|
# unsigned extension
|
|
320
357
|
return Const(expr.idx, expr.operand.variable, expr.operand.value, expr.to_bits, **expr.operand.tags)
|
|
@@ -106,7 +106,7 @@ class RegionIdentifier(Analysis):
|
|
|
106
106
|
# make regions into block address lists
|
|
107
107
|
self.regions_by_block_addrs = self._make_regions_by_block_addrs()
|
|
108
108
|
|
|
109
|
-
def _make_regions_by_block_addrs(self) -> list[list[int]]:
|
|
109
|
+
def _make_regions_by_block_addrs(self) -> list[list[tuple[int, int | None]]]:
|
|
110
110
|
"""
|
|
111
111
|
Creates a list of addr lists representing each region without recursion. A single region is defined
|
|
112
112
|
as a set of only blocks, no Graphs containing nested regions. The list contains the address of each
|
|
@@ -124,13 +124,15 @@ class RegionIdentifier(Analysis):
|
|
|
124
124
|
children_blocks = []
|
|
125
125
|
for node in region.graph.nodes:
|
|
126
126
|
if isinstance(node, Block):
|
|
127
|
-
children_blocks.append(node.addr)
|
|
127
|
+
children_blocks.append((node.addr, node.idx))
|
|
128
128
|
elif isinstance(node, MultiNode):
|
|
129
|
-
children_blocks += [n.addr for n in node.nodes]
|
|
129
|
+
children_blocks += [(n.addr, node.idx) for n in node.nodes]
|
|
130
130
|
elif isinstance(node, GraphRegion):
|
|
131
131
|
if node not in seen_regions:
|
|
132
132
|
children_regions.append(node)
|
|
133
|
-
children_blocks.append(
|
|
133
|
+
children_blocks.append(
|
|
134
|
+
(node.head.addr, node.head.idx if hasattr(node.head, "idx") else None)
|
|
135
|
+
)
|
|
134
136
|
seen_regions.add(node)
|
|
135
137
|
else:
|
|
136
138
|
continue
|