angr 9.2.140__py3-none-win_amd64.whl → 9.2.142__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +105 -35
- angr/analyses/calling_convention/fact_collector.py +44 -18
- angr/analyses/calling_convention/utils.py +3 -1
- angr/analyses/cfg/cfg_base.py +38 -4
- angr/analyses/cfg/cfg_fast.py +23 -7
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +13 -8
- angr/analyses/class_identifier.py +8 -7
- angr/analyses/complete_calling_conventions.py +1 -1
- angr/analyses/decompiler/ail_simplifier.py +105 -62
- angr/analyses/decompiler/callsite_maker.py +24 -11
- angr/analyses/decompiler/clinic.py +83 -5
- angr/analyses/decompiler/condition_processor.py +7 -7
- angr/analyses/decompiler/decompilation_cache.py +2 -1
- angr/analyses/decompiler/decompiler.py +11 -2
- angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
- angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
- angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +85 -16
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +78 -1
- angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
- angr/analyses/decompiler/region_identifier.py +76 -51
- angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
- angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
- angr/analyses/decompiler/ssailification/rewriting.py +70 -32
- angr/analyses/decompiler/ssailification/rewriting_engine.py +118 -24
- angr/analyses/decompiler/ssailification/ssailification.py +22 -14
- angr/analyses/decompiler/stack_item.py +36 -0
- angr/analyses/decompiler/structured_codegen/c.py +86 -145
- angr/analyses/decompiler/structuring/dream.py +1 -1
- angr/analyses/decompiler/structuring/phoenix.py +9 -4
- angr/analyses/decompiler/structuring/structurer_base.py +2 -1
- angr/analyses/decompiler/utils.py +46 -20
- angr/analyses/find_objects_static.py +2 -1
- angr/analyses/reaching_definitions/engine_vex.py +13 -0
- angr/analyses/reaching_definitions/function_handler.py +24 -10
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
- angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
- angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
- angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
- angr/analyses/reaching_definitions/rd_state.py +11 -7
- angr/analyses/s_liveness.py +44 -6
- angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
- angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
- angr/analyses/typehoon/simple_solver.py +35 -8
- angr/analyses/typehoon/typehoon.py +3 -1
- angr/analyses/variable_recovery/engine_ail.py +1 -1
- angr/analyses/variable_recovery/engine_vex.py +20 -4
- angr/calling_conventions.py +17 -12
- angr/factory.py +8 -3
- angr/knowledge_plugins/functions/function.py +5 -10
- angr/knowledge_plugins/variables/variable_manager.py +34 -5
- angr/lib/angr_native.dll +0 -0
- angr/procedures/definitions/__init__.py +3 -10
- angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
- angr/procedures/win32_kernel/__fastfail.py +15 -0
- angr/sim_procedure.py +2 -2
- angr/simos/simos.py +17 -11
- angr/simos/windows.py +42 -1
- angr/utils/ail.py +41 -1
- angr/utils/cpp.py +17 -0
- angr/utils/doms.py +142 -0
- angr/utils/library.py +1 -1
- angr/utils/types.py +59 -0
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/RECORD +76 -71
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
|
@@ -6,6 +6,7 @@ from .structured_codegen import BaseStructuredCodeGenerator
|
|
|
6
6
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
8
|
from angr.analyses.decompiler.optimization_passes.expr_op_swapper import OpDescriptor
|
|
9
|
+
from angr.analyses.typehoon.typevars import TypeVariable, TypeConstraint
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class DecompilationCache:
|
|
@@ -29,7 +30,7 @@ class DecompilationCache:
|
|
|
29
30
|
def __init__(self, addr):
|
|
30
31
|
self.parameters: dict[str, Any] = {}
|
|
31
32
|
self.addr = addr
|
|
32
|
-
self.type_constraints: set | None = None
|
|
33
|
+
self.type_constraints: dict[TypeVariable, set[TypeConstraint]] | None = None
|
|
33
34
|
self.func_typevar = None
|
|
34
35
|
self.var_to_typevar: dict | None = None
|
|
35
36
|
self.codegen: BaseStructuredCodeGenerator | None = None
|
|
@@ -31,6 +31,7 @@ from .presets import DECOMPILATION_PRESETS, DecompilationPreset
|
|
|
31
31
|
if TYPE_CHECKING:
|
|
32
32
|
from angr.knowledge_plugins.cfg.cfg_model import CFGModel
|
|
33
33
|
from .peephole_optimizations import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase
|
|
34
|
+
from angr.analyses.typehoon.typevars import TypeVariable, TypeConstraint
|
|
34
35
|
|
|
35
36
|
l = logging.getLogger(name=__name__)
|
|
36
37
|
|
|
@@ -135,6 +136,7 @@ class Decompiler(Analysis):
|
|
|
135
136
|
self.unoptimized_ail_graph: networkx.DiGraph | None = None
|
|
136
137
|
self.ail_graph: networkx.DiGraph | None = None
|
|
137
138
|
self.vvar_id_start = None
|
|
139
|
+
self._copied_var_ids: set[int] = set()
|
|
138
140
|
self._optimization_scratch: dict[str, Any] = {}
|
|
139
141
|
self.expr_collapse_depth = expr_collapse_depth
|
|
140
142
|
|
|
@@ -267,6 +269,7 @@ class Decompiler(Analysis):
|
|
|
267
269
|
self._variable_kb = clinic.variable_kb
|
|
268
270
|
self._update_progress(70.0, text="Identifying regions")
|
|
269
271
|
self.vvar_id_start = clinic.vvar_id_start
|
|
272
|
+
self._copied_var_ids = clinic.copied_var_ids
|
|
270
273
|
|
|
271
274
|
if clinic.graph is None:
|
|
272
275
|
# the function is empty
|
|
@@ -500,6 +503,8 @@ class Decompiler(Analysis):
|
|
|
500
503
|
scratch=self._optimization_scratch,
|
|
501
504
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
502
505
|
complete_successors=self._complete_successors,
|
|
506
|
+
peephole_optimizations=self._peephole_optimizations,
|
|
507
|
+
avoid_vvar_ids=self._copied_var_ids,
|
|
503
508
|
**kwargs,
|
|
504
509
|
)
|
|
505
510
|
|
|
@@ -545,7 +550,9 @@ class Decompiler(Analysis):
|
|
|
545
550
|
SimMemoryVariable(symbol.rebased_addr, 1, name=symbol.name, ident=ident),
|
|
546
551
|
)
|
|
547
552
|
|
|
548
|
-
def reflow_variable_types(
|
|
553
|
+
def reflow_variable_types(
|
|
554
|
+
self, type_constraints: dict[TypeVariable, set[TypeConstraint]], func_typevar, var_to_typevar: dict, codegen
|
|
555
|
+
):
|
|
549
556
|
"""
|
|
550
557
|
Re-run type inference on an existing variable recovery result, then rerun codegen to generate new results.
|
|
551
558
|
|
|
@@ -605,7 +612,9 @@ class Decompiler(Analysis):
|
|
|
605
612
|
var = arg.variable
|
|
606
613
|
new_type = var_manager.get_variable_type(var)
|
|
607
614
|
if new_type is not None:
|
|
608
|
-
self.func.prototype.args
|
|
615
|
+
self.func.prototype.args = (
|
|
616
|
+
self.func.prototype.args[:i] + (new_type,) + self.func.prototype.args[i + 1 :]
|
|
617
|
+
)
|
|
609
618
|
except Exception: # pylint:disable=broad-except
|
|
610
619
|
l.warning(
|
|
611
620
|
"Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
|
|
@@ -283,14 +283,12 @@ class GraphDephicationVVarMapping(Analysis): # pylint:disable=abstract-method
|
|
|
283
283
|
|
|
284
284
|
@staticmethod
|
|
285
285
|
def _prepend_stmt(block, stmt):
|
|
286
|
-
|
|
287
|
-
# TODO: fix the assumption elsewhere in the code base.
|
|
288
|
-
first_nonlabel_idx = len(block.statements)
|
|
286
|
+
first_nonlabel_nonphi_idx = len(block.statements)
|
|
289
287
|
for i, s in enumerate(block.statements):
|
|
290
|
-
if not isinstance(s, Label):
|
|
291
|
-
|
|
288
|
+
if not isinstance(s, Label) and not is_phi_assignment(s):
|
|
289
|
+
first_nonlabel_nonphi_idx = i
|
|
292
290
|
break
|
|
293
|
-
block.statements.insert(
|
|
291
|
+
block.statements.insert(first_nonlabel_nonphi_idx, stmt)
|
|
294
292
|
|
|
295
293
|
@staticmethod
|
|
296
294
|
def _used_in_phi(dst_block, src_block, vvar_id: int) -> bool:
|
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
|
|
5
5
|
import ailment
|
|
6
6
|
|
|
7
|
+
from angr.analyses.decompiler.stack_item import StackItem, StackItemType
|
|
7
8
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
8
9
|
|
|
9
10
|
_l = logging.getLogger(name=__name__)
|
|
@@ -62,11 +63,16 @@ class BasePointerSaveSimplifier(OptimizationPass):
|
|
|
62
63
|
return
|
|
63
64
|
|
|
64
65
|
# update the first block
|
|
65
|
-
block, stmt_idx,
|
|
66
|
+
block, stmt_idx, save_dst = save_stmt
|
|
66
67
|
block_copy = block.copy()
|
|
67
68
|
block_copy.statements.pop(stmt_idx)
|
|
68
69
|
self._update_block(block, block_copy)
|
|
69
70
|
|
|
71
|
+
# update stack_items
|
|
72
|
+
self.stack_items[save_dst.stack_offset] = StackItem(
|
|
73
|
+
save_dst.stack_offset, save_dst.size, "saved_bp", StackItemType.SAVED_BP
|
|
74
|
+
)
|
|
75
|
+
|
|
70
76
|
# update all endpoint blocks
|
|
71
77
|
if restore_stmts:
|
|
72
78
|
for block, stmt_idx, _ in restore_stmts:
|
|
@@ -74,7 +80,7 @@ class BasePointerSaveSimplifier(OptimizationPass):
|
|
|
74
80
|
block_copy.statements.pop(stmt_idx)
|
|
75
81
|
self._update_block(block, block_copy)
|
|
76
82
|
|
|
77
|
-
def _find_baseptr_save_stmt(self):
|
|
83
|
+
def _find_baseptr_save_stmt(self) -> tuple[ailment.Block, int, ailment.Expr.VirtualVariable] | None:
|
|
78
84
|
"""
|
|
79
85
|
Find the AIL statement that saves the base pointer to a stack slot.
|
|
80
86
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
2
3
|
|
|
3
4
|
import networkx
|
|
4
5
|
|
|
@@ -6,9 +7,14 @@ from ailment import AILBlockWalker, Block
|
|
|
6
7
|
from ailment.statement import ConditionalJump, Statement
|
|
7
8
|
from ailment.expression import Const, BinaryOp, VirtualVariable
|
|
8
9
|
|
|
9
|
-
from angr.analyses.decompiler.
|
|
10
|
+
from angr.analyses.decompiler.utils import first_nonlabel_nonphi_statement
|
|
11
|
+
from angr.utils.graph import dominates
|
|
12
|
+
from angr.utils.timing import timethis
|
|
10
13
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
11
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from angr.analyses.s_reaching_definitions import SRDAModel
|
|
17
|
+
|
|
12
18
|
|
|
13
19
|
class ConstantCondition:
|
|
14
20
|
"""
|
|
@@ -78,6 +84,7 @@ class ConditionConstantPropagation(OptimizationPass):
|
|
|
78
84
|
return False, None
|
|
79
85
|
return True, {"cconds": cconds}
|
|
80
86
|
|
|
87
|
+
@timethis
|
|
81
88
|
def _analyze(self, cache=None):
|
|
82
89
|
if not cache or cache.get("cconds", None) is None: # noqa: SIM108
|
|
83
90
|
cconds = self._find_const_conditions()
|
|
@@ -98,23 +105,27 @@ class ConditionConstantPropagation(OptimizationPass):
|
|
|
98
105
|
# calculate a dominance frontier for each block
|
|
99
106
|
entry_node_addr, entry_node_idx = self.entry_node_addr
|
|
100
107
|
entry_node = self._get_block(entry_node_addr, idx=entry_node_idx)
|
|
101
|
-
|
|
108
|
+
idoms = networkx.algorithms.immediate_dominators(self._graph, entry_node)
|
|
109
|
+
rda: SRDAModel = self.project.analyses.SReachingDefinitions(self._func, func_graph=self._graph).model
|
|
102
110
|
|
|
103
111
|
for src, cconds in cconds_by_src.items():
|
|
104
112
|
head_block = self._get_block(src[0], idx=src[1])
|
|
105
113
|
if head_block is None:
|
|
106
114
|
continue
|
|
107
|
-
frontier = df.get(head_block)
|
|
108
|
-
if frontier is None:
|
|
109
|
-
continue
|
|
110
|
-
graph_slice = RegionIdentifier.slice_graph(self._graph, head_block, frontier, include_frontier=False)
|
|
111
|
-
for ccond in cconds:
|
|
112
|
-
walker = CCondPropBlockWalker(ccond.vvar_id, ccond.value)
|
|
113
|
-
for block in graph_slice:
|
|
114
|
-
new_block = walker.walk(block)
|
|
115
|
-
if new_block is not None:
|
|
116
|
-
self._update_block(block, new_block)
|
|
117
115
|
|
|
116
|
+
for ccond in cconds:
|
|
117
|
+
for _, loc in rda.all_vvar_uses[rda.varid_to_vvar[ccond.vvar_id]]:
|
|
118
|
+
loc_block = self._get_block(loc.block_addr, idx=loc.block_idx)
|
|
119
|
+
if loc_block is None:
|
|
120
|
+
continue
|
|
121
|
+
if dominates(idoms, head_block, loc_block):
|
|
122
|
+
# the constant condition dominates the use site
|
|
123
|
+
walker = CCondPropBlockWalker(ccond.vvar_id, ccond.value)
|
|
124
|
+
new_block = walker.walk(loc_block)
|
|
125
|
+
if new_block is not None:
|
|
126
|
+
self._update_block(loc_block, new_block)
|
|
127
|
+
|
|
128
|
+
@timethis
|
|
118
129
|
def _find_const_conditions(self) -> list[ConstantCondition]:
|
|
119
130
|
cconds = []
|
|
120
131
|
|
|
@@ -122,28 +133,46 @@ class ConditionConstantPropagation(OptimizationPass):
|
|
|
122
133
|
if block.statements:
|
|
123
134
|
last_stmt = block.statements[-1]
|
|
124
135
|
if (
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
136
|
+
isinstance(last_stmt, ConditionalJump)
|
|
137
|
+
and isinstance(last_stmt.true_target, Const)
|
|
138
|
+
and isinstance(last_stmt.false_target, Const)
|
|
128
139
|
):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
)
|
|
147
|
-
cconds.append(ccond)
|
|
140
|
+
self._extract_const_condition_from_stmt(last_stmt, cconds)
|
|
141
|
+
else:
|
|
142
|
+
# also check the first non-phi statement; rep stos may generate blocks whose conditional checks
|
|
143
|
+
# are at the beginning of the block
|
|
144
|
+
|
|
145
|
+
# we could have used is_head_controlled_loop_block, but at this point the block is simplified enough
|
|
146
|
+
# that the first non-label, non-phi statement must be a ConditionalJump that controls the execution
|
|
147
|
+
# of the loop body, so the following logic should work fine.
|
|
148
|
+
|
|
149
|
+
first_stmt = first_nonlabel_nonphi_statement(block)
|
|
150
|
+
if (
|
|
151
|
+
first_stmt is not last_stmt
|
|
152
|
+
and isinstance(first_stmt, ConditionalJump)
|
|
153
|
+
and isinstance(first_stmt.true_target, Const)
|
|
154
|
+
and isinstance(first_stmt.false_target, Const)
|
|
155
|
+
):
|
|
156
|
+
self._extract_const_condition_from_stmt(first_stmt, cconds)
|
|
148
157
|
|
|
149
158
|
return cconds
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def _extract_const_condition_from_stmt(stmt: ConditionalJump, cconds: list[ConstantCondition]) -> None:
|
|
162
|
+
if isinstance(stmt.condition, BinaryOp):
|
|
163
|
+
cond = stmt.condition
|
|
164
|
+
op = cond.op
|
|
165
|
+
op0, op1 = cond.operands
|
|
166
|
+
if isinstance(op0, Const):
|
|
167
|
+
op0, op1 = op1, op0
|
|
168
|
+
if isinstance(op0, VirtualVariable) and isinstance(op1, Const) and op1.is_int:
|
|
169
|
+
if op == "CmpEQ":
|
|
170
|
+
ccond = ConstantCondition(
|
|
171
|
+
op0.varid, op1, stmt.true_target.value, stmt.true_target_idx # type: ignore
|
|
172
|
+
)
|
|
173
|
+
cconds.append(ccond)
|
|
174
|
+
elif op == "CmpNE":
|
|
175
|
+
ccond = ConstantCondition(
|
|
176
|
+
op0.varid, op1, stmt.false_target.value, stmt.false_target_idx # type: ignore
|
|
177
|
+
)
|
|
178
|
+
cconds.append(ccond)
|
|
@@ -950,7 +950,9 @@ class DuplicationReverter(StructuringOptimizationPass):
|
|
|
950
950
|
#
|
|
951
951
|
|
|
952
952
|
def _share_subregion(self, blocks: list[Block]) -> bool:
|
|
953
|
-
return any(
|
|
953
|
+
return any(
|
|
954
|
+
all((block.addr, block.idx) in region for block in blocks) for region in self._ri.regions_by_block_addrs
|
|
955
|
+
)
|
|
954
956
|
|
|
955
957
|
def _is_valid_candidate(self, b0, b1):
|
|
956
958
|
# blocks must have statements
|
|
@@ -6,7 +6,11 @@ import ailment
|
|
|
6
6
|
from ailment.expression import Op
|
|
7
7
|
|
|
8
8
|
from angr.analyses.decompiler.structuring.structurer_nodes import ConditionNode
|
|
9
|
-
from angr.analyses.decompiler.utils import
|
|
9
|
+
from angr.analyses.decompiler.utils import (
|
|
10
|
+
structured_node_is_simple_return,
|
|
11
|
+
sequence_to_statements,
|
|
12
|
+
structured_node_has_multi_predecessors,
|
|
13
|
+
)
|
|
10
14
|
from angr.analyses.decompiler.sequence_walker import SequenceWalker
|
|
11
15
|
from .optimization_pass import SequenceOptimizationPass, OptimizationPassStage
|
|
12
16
|
|
|
@@ -43,7 +47,22 @@ class FlipBooleanWalker(SequenceWalker):
|
|
|
43
47
|
and structured_node_is_simple_return(seq_node.nodes[idx + 1], self._graph)
|
|
44
48
|
and node not in type1_condition_nodes
|
|
45
49
|
):
|
|
46
|
-
|
|
50
|
+
# Type 2: Special Filter:
|
|
51
|
+
# consider code that looks like the following:
|
|
52
|
+
# {if (cond) {LABEL: ... } return;}; goto LABEL;
|
|
53
|
+
#
|
|
54
|
+
# if we were to do the normal flip, this happens:
|
|
55
|
+
# {if (!cond) return; LABEL: ...}; goto LABEL;
|
|
56
|
+
#
|
|
57
|
+
# This is incorrect because we've now created an infinite loop in the event that cond is false,
|
|
58
|
+
# which is not what the original code was. The gist here is that you can't ever flip these cases
|
|
59
|
+
# in the presence of more than one incoming edge to `...` region.
|
|
60
|
+
#
|
|
61
|
+
# To eliminate this illegal case, we simply need to find all the condition nodes of the above structure
|
|
62
|
+
# that have multiple incoming edges to the `...` region.
|
|
63
|
+
illegal_flip = structured_node_has_multi_predecessors(node.true_node, self._graph)
|
|
64
|
+
if not illegal_flip:
|
|
65
|
+
type2_condition_nodes.append((idx, node, seq_node.nodes[idx + 1]))
|
|
47
66
|
|
|
48
67
|
for node in type1_condition_nodes:
|
|
49
68
|
if isinstance(node.condition, Op) and structured_node_is_simple_return(node.false_node, self._graph):
|
|
@@ -7,7 +7,7 @@ import networkx
|
|
|
7
7
|
|
|
8
8
|
from ailment import Block, AILBlockWalkerBase
|
|
9
9
|
from ailment.statement import ConditionalJump, Label, Assignment, Jump
|
|
10
|
-
from ailment.expression import Expression, BinaryOp, Const, Load
|
|
10
|
+
from ailment.expression import VirtualVariable, Expression, BinaryOp, Const, Load
|
|
11
11
|
|
|
12
12
|
from angr.utils.graph import GraphUtils
|
|
13
13
|
from angr.analyses.decompiler.utils import first_nonlabel_nonphi_statement, remove_last_statement
|
|
@@ -216,7 +216,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
216
216
|
def _analyze(self, cache=None):
|
|
217
217
|
variablehash_to_cases = self._find_cascading_switch_variable_comparisons()
|
|
218
218
|
|
|
219
|
-
if not variablehash_to_cases:
|
|
219
|
+
if not variablehash_to_cases or all(not caselists for caselists in variablehash_to_cases.values()):
|
|
220
220
|
return False
|
|
221
221
|
|
|
222
222
|
graph_copy = networkx.DiGraph(self._graph)
|
|
@@ -257,6 +257,24 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
257
257
|
_l.debug("Skipping switch-case conversion due to too few distinct cases for %s", real_cases[0])
|
|
258
258
|
continue
|
|
259
259
|
|
|
260
|
+
# RULE 4: the default case should not reach other case nodes in the subregion
|
|
261
|
+
default_addr_and_idx = next(
|
|
262
|
+
((case.target, case.target_idx) for case in cases if case.value == "default"), None
|
|
263
|
+
)
|
|
264
|
+
if default_addr_and_idx is None:
|
|
265
|
+
continue
|
|
266
|
+
default_addr, default_idx = default_addr_and_idx
|
|
267
|
+
default_node = self._get_block(default_addr, idx=default_idx)
|
|
268
|
+
default_reachable_from_case = False
|
|
269
|
+
for case in cases:
|
|
270
|
+
if case.value == "default":
|
|
271
|
+
continue
|
|
272
|
+
if self._node_reachable_from_node_in_region(case.original_node, default_node):
|
|
273
|
+
default_reachable_from_case = True
|
|
274
|
+
break
|
|
275
|
+
if default_reachable_from_case:
|
|
276
|
+
continue
|
|
277
|
+
|
|
260
278
|
original_nodes = [case.original_node for case in real_cases]
|
|
261
279
|
original_head: Block = original_nodes[0]
|
|
262
280
|
original_nodes = original_nodes[1:]
|
|
@@ -320,6 +338,10 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
320
338
|
node_to_heads[succ].add(new_head)
|
|
321
339
|
graph_copy.remove_node(onode)
|
|
322
340
|
for onode in redundant_nodes:
|
|
341
|
+
if onode in original_nodes:
|
|
342
|
+
# sometimes they overlap
|
|
343
|
+
# e.g., 0x402cc7 in mv_-O2
|
|
344
|
+
continue
|
|
323
345
|
# ensure all nodes that are only reachable from onode are also removed
|
|
324
346
|
# FIXME: Remove the entire path of nodes instead of only the immediate successors
|
|
325
347
|
successors = list(graph_copy.successors(onode))
|
|
@@ -396,6 +418,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
396
418
|
default_case_candidates = {}
|
|
397
419
|
last_comp = None
|
|
398
420
|
stack = [(head, 0, 0xFFFF_FFFF_FFFF_FFFF)]
|
|
421
|
+
head_varhash = variable_comparisons[head][1]
|
|
399
422
|
|
|
400
423
|
# cursed: there is an infinite loop in the following loop that
|
|
401
424
|
# occurs rarely. we need to keep track of the nodes we've seen
|
|
@@ -418,12 +441,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
418
441
|
next_addr,
|
|
419
442
|
next_addr_idx,
|
|
420
443
|
) = variable_comparisons[comp]
|
|
421
|
-
last_varhash = cases[-1].variable_hash if cases else None
|
|
422
444
|
|
|
423
445
|
if op == "eq":
|
|
424
446
|
# eq always indicates a new case
|
|
425
447
|
|
|
426
|
-
if
|
|
448
|
+
if head_varhash == variable_hash:
|
|
427
449
|
if target == comp.addr and target_idx == comp.idx:
|
|
428
450
|
# invalid
|
|
429
451
|
break
|
|
@@ -443,9 +465,10 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
443
465
|
# new variable!
|
|
444
466
|
if last_comp is not None and comp.addr not in default_case_candidates:
|
|
445
467
|
default_case_candidates[comp.addr] = Case(
|
|
446
|
-
last_comp, None,
|
|
468
|
+
last_comp, None, head_varhash, None, "default", comp.addr, comp.idx, None
|
|
447
469
|
)
|
|
448
|
-
|
|
470
|
+
break
|
|
471
|
+
continue
|
|
449
472
|
|
|
450
473
|
successors = [succ for succ in self._graph.successors(comp) if succ is not comp]
|
|
451
474
|
succ_addrs = {(succ.addr, succ.idx) for succ in successors}
|
|
@@ -505,7 +528,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
505
528
|
# gt always indicates new subtrees
|
|
506
529
|
gt_addr, gt_idx, le_addr, le_idx = target, target_idx, next_addr, next_addr_idx
|
|
507
530
|
# TODO: We don't yet support gt nodes acting as the head of a switch
|
|
508
|
-
if
|
|
531
|
+
if head_varhash == variable_hash:
|
|
509
532
|
successors = [succ for succ in self._graph.successors(comp) if succ is not comp]
|
|
510
533
|
succ_addrs = {(succ.addr, succ.idx) for succ in successors}
|
|
511
534
|
if succ_addrs != {(gt_addr, gt_idx), (le_addr, le_idx)}:
|
|
@@ -526,21 +549,34 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
526
549
|
le_added = True
|
|
527
550
|
if gt_added or le_added:
|
|
528
551
|
if not le_added:
|
|
529
|
-
if
|
|
552
|
+
# if min_ + 1 == value, it means we actually have another case! it's not a default case
|
|
553
|
+
if min_ + 1 == value:
|
|
554
|
+
cases.append(
|
|
555
|
+
Case(comp, comp_type, variable_hash, expr, min_ + 1, le_addr, le_idx, None)
|
|
556
|
+
)
|
|
557
|
+
used_nodes.add(comp)
|
|
558
|
+
elif le_addr not in default_case_candidates:
|
|
530
559
|
default_case_candidates[le_addr] = Case(
|
|
531
560
|
comp, None, variable_hash, expr, "default", le_addr, le_idx, None
|
|
532
561
|
)
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
562
|
+
if not gt_added:
|
|
563
|
+
# likewise, this means we have another non-default case
|
|
564
|
+
if value == max_:
|
|
565
|
+
cases.append(
|
|
566
|
+
Case(comp, comp_type, variable_hash, expr, max_, gt_addr, gt_idx, None)
|
|
567
|
+
)
|
|
568
|
+
used_nodes.add(comp)
|
|
569
|
+
elif gt_addr not in default_case_candidates:
|
|
570
|
+
default_case_candidates[gt_addr] = Case(
|
|
571
|
+
comp, None, variable_hash, expr, "default", gt_addr, gt_idx, None
|
|
572
|
+
)
|
|
537
573
|
extra_cmp_nodes.append(comp)
|
|
538
574
|
used_nodes.add(comp)
|
|
539
575
|
else:
|
|
540
576
|
break
|
|
541
577
|
else:
|
|
542
578
|
# checking on a new variable... it probably was not a switch-case
|
|
543
|
-
|
|
579
|
+
continue
|
|
544
580
|
|
|
545
581
|
if cases and len(default_case_candidates) <= 1:
|
|
546
582
|
if default_case_candidates:
|
|
@@ -606,6 +642,27 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
606
642
|
|
|
607
643
|
return varhash_to_caselists
|
|
608
644
|
|
|
645
|
+
def _node_reachable_from_node_in_region(self, to_node, from_node) -> bool:
|
|
646
|
+
# find the region that contains the to_node
|
|
647
|
+
to_node_region = None
|
|
648
|
+
from_node_region = None
|
|
649
|
+
for region in self._ri.regions_by_block_addrs:
|
|
650
|
+
if (to_node.addr, to_node.idx) in region:
|
|
651
|
+
to_node_region = region
|
|
652
|
+
if (from_node.addr, from_node.idx) in region:
|
|
653
|
+
from_node_region = region
|
|
654
|
+
|
|
655
|
+
if to_node_region is None or from_node_region is None:
|
|
656
|
+
return False
|
|
657
|
+
if to_node_region != from_node_region:
|
|
658
|
+
return False
|
|
659
|
+
|
|
660
|
+
# get a subgraph
|
|
661
|
+
all_nodes = [self._get_block(a, idx=idx) for a, idx in to_node_region]
|
|
662
|
+
subgraph = self._graph.subgraph(all_nodes)
|
|
663
|
+
|
|
664
|
+
return networkx.has_path(subgraph, from_node, to_node)
|
|
665
|
+
|
|
609
666
|
@staticmethod
|
|
610
667
|
def _find_switch_variable_comparison_type_a(
|
|
611
668
|
node,
|
|
@@ -625,7 +682,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
625
682
|
)
|
|
626
683
|
):
|
|
627
684
|
cond = stmt.condition
|
|
628
|
-
if
|
|
685
|
+
if (
|
|
686
|
+
isinstance(cond, BinaryOp)
|
|
687
|
+
and isinstance(cond.operands[0], VirtualVariable)
|
|
688
|
+
and isinstance(cond.operands[1], Const)
|
|
689
|
+
):
|
|
629
690
|
variable_hash = StableVarExprHasher(cond.operands[0]).hash
|
|
630
691
|
value = cond.operands[1].value
|
|
631
692
|
if cond.op == "CmpEQ":
|
|
@@ -672,7 +733,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
672
733
|
)
|
|
673
734
|
):
|
|
674
735
|
cond = stmt.condition
|
|
675
|
-
if
|
|
736
|
+
if (
|
|
737
|
+
isinstance(cond, BinaryOp)
|
|
738
|
+
and isinstance(cond.operands[0], VirtualVariable)
|
|
739
|
+
and isinstance(cond.operands[1], Const)
|
|
740
|
+
):
|
|
676
741
|
variable_hash = StableVarExprHasher(cond.operands[0]).hash
|
|
677
742
|
value = cond.operands[1].value
|
|
678
743
|
if cond.op == "CmpEQ":
|
|
@@ -719,7 +784,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
719
784
|
)
|
|
720
785
|
):
|
|
721
786
|
cond = stmt.condition
|
|
722
|
-
if
|
|
787
|
+
if (
|
|
788
|
+
isinstance(cond, BinaryOp)
|
|
789
|
+
and isinstance(cond.operands[0], VirtualVariable)
|
|
790
|
+
and isinstance(cond.operands[1], Const)
|
|
791
|
+
):
|
|
723
792
|
variable_hash = StableVarExprHasher(cond.operands[0]).hash
|
|
724
793
|
value = cond.operands[1].value
|
|
725
794
|
op = cond.op
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
# pylint:disable=unused-argument
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
import logging
|
|
4
|
-
from
|
|
4
|
+
from collections import namedtuple
|
|
5
5
|
from collections.abc import Generator
|
|
6
|
+
from typing import Any, TYPE_CHECKING
|
|
6
7
|
from enum import Enum
|
|
7
8
|
|
|
8
9
|
import networkx
|
|
@@ -10,6 +11,7 @@ import networkx
|
|
|
10
11
|
import ailment
|
|
11
12
|
|
|
12
13
|
from angr.analyses.decompiler import RegionIdentifier
|
|
14
|
+
from angr.analyses.decompiler.ailgraph_walker import AILGraphWalker
|
|
13
15
|
from angr.analyses.decompiler.condition_processor import ConditionProcessor
|
|
14
16
|
from angr.analyses.decompiler.goto_manager import Goto, GotoManager
|
|
15
17
|
from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
|
|
@@ -19,11 +21,15 @@ from angr.project import Project
|
|
|
19
21
|
|
|
20
22
|
if TYPE_CHECKING:
|
|
21
23
|
from angr.knowledge_plugins.functions import Function
|
|
24
|
+
from angr.analyses.decompiler.stack_item import StackItem
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
_l = logging.getLogger(__name__)
|
|
25
28
|
|
|
26
29
|
|
|
30
|
+
BlockCache = namedtuple("BlockCache", ("rd", "prop"))
|
|
31
|
+
|
|
32
|
+
|
|
27
33
|
class MultipleBlocksException(Exception):
|
|
28
34
|
"""
|
|
29
35
|
An exception that is raised in _get_block() where multiple blocks satisfy the criteria but only one block was
|
|
@@ -130,6 +136,7 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
130
136
|
complete_successors: bool = False,
|
|
131
137
|
avoid_vvar_ids: set[int] | None = None,
|
|
132
138
|
arg_vvars: set[int] | None = None,
|
|
139
|
+
peephole_optimizations=None,
|
|
133
140
|
**kwargs,
|
|
134
141
|
):
|
|
135
142
|
super().__init__(func)
|
|
@@ -150,9 +157,11 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
150
157
|
self._force_loop_single_exit = force_loop_single_exit
|
|
151
158
|
self._complete_successors = complete_successors
|
|
152
159
|
self._avoid_vvar_ids = avoid_vvar_ids or set()
|
|
160
|
+
self._peephole_optimizations = peephole_optimizations
|
|
153
161
|
|
|
154
162
|
# output
|
|
155
163
|
self.out_graph: networkx.DiGraph | None = None
|
|
164
|
+
self.stack_items: dict[int, StackItem] = {}
|
|
156
165
|
|
|
157
166
|
@property
|
|
158
167
|
def blocks_by_addr(self) -> dict[int, set[ailment.Block]]:
|
|
@@ -267,9 +276,77 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
267
276
|
def _is_sub(expr):
|
|
268
277
|
return isinstance(expr, ailment.Expr.BinaryOp) and expr.op == "Sub"
|
|
269
278
|
|
|
279
|
+
def _simplify_blocks(
|
|
280
|
+
self,
|
|
281
|
+
ail_graph: networkx.DiGraph,
|
|
282
|
+
cache: dict | None = None,
|
|
283
|
+
):
|
|
284
|
+
"""
|
|
285
|
+
Simplify all blocks in self._blocks.
|
|
286
|
+
|
|
287
|
+
:param ail_graph: The AIL function graph.
|
|
288
|
+
:param cache: A block-level cache that stores reaching definition analysis results and
|
|
289
|
+
propagation results.
|
|
290
|
+
:return: None
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
blocks_by_addr_and_idx: dict[tuple[int, int | None], ailment.Block] = {}
|
|
294
|
+
|
|
295
|
+
for ail_block in ail_graph.nodes():
|
|
296
|
+
simplified = self._simplify_block(
|
|
297
|
+
ail_block,
|
|
298
|
+
cache=cache,
|
|
299
|
+
)
|
|
300
|
+
key = ail_block.addr, ail_block.idx
|
|
301
|
+
blocks_by_addr_and_idx[key] = simplified
|
|
302
|
+
|
|
303
|
+
# update blocks_map to allow node_addr to node lookup
|
|
304
|
+
def _replace_node_handler(node):
|
|
305
|
+
key = node.addr, node.idx
|
|
306
|
+
if key in blocks_by_addr_and_idx:
|
|
307
|
+
return blocks_by_addr_and_idx[key]
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
AILGraphWalker(ail_graph, _replace_node_handler, replace_nodes=True).walk()
|
|
311
|
+
|
|
312
|
+
return ail_graph
|
|
313
|
+
|
|
314
|
+
def _simplify_block(self, ail_block, cache=None):
|
|
315
|
+
"""
|
|
316
|
+
Simplify a single AIL block.
|
|
317
|
+
|
|
318
|
+
:param ailment.Block ail_block: The AIL block to simplify.
|
|
319
|
+
:return: A simplified AIL block.
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
cached_rd, cached_prop = None, None
|
|
323
|
+
cache_item = None
|
|
324
|
+
cache_key = ail_block.addr, ail_block.idx
|
|
325
|
+
if cache:
|
|
326
|
+
cache_item = cache.get(cache_key, None)
|
|
327
|
+
if cache_item:
|
|
328
|
+
# cache hit
|
|
329
|
+
cached_rd = cache_item.rd
|
|
330
|
+
cached_prop = cache_item.prop
|
|
331
|
+
|
|
332
|
+
simp = self.project.analyses.AILBlockSimplifier(
|
|
333
|
+
ail_block,
|
|
334
|
+
self._func.addr,
|
|
335
|
+
peephole_optimizations=self._peephole_optimizations,
|
|
336
|
+
cached_reaching_definitions=cached_rd,
|
|
337
|
+
cached_propagator=cached_prop,
|
|
338
|
+
)
|
|
339
|
+
# update the cache
|
|
340
|
+
if cache is not None:
|
|
341
|
+
if cache_item:
|
|
342
|
+
del cache[cache_key]
|
|
343
|
+
cache[cache_key] = BlockCache(simp._reaching_definitions, simp._propagator)
|
|
344
|
+
return simp.result_block
|
|
345
|
+
|
|
270
346
|
def _simplify_graph(self, graph):
|
|
271
347
|
MAX_SIMP_ITERATION = 8
|
|
272
348
|
for _ in range(MAX_SIMP_ITERATION):
|
|
349
|
+
self._simplify_blocks(graph)
|
|
273
350
|
simp = self.project.analyses.AILSimplifier(
|
|
274
351
|
self._func,
|
|
275
352
|
func_graph=graph,
|