angr 9.2.95__py3-none-manylinux2014_x86_64.whl → 9.2.97__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_fast.py +9 -6
- angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +6 -1
- angr/analyses/complete_calling_conventions.py +27 -11
- angr/analyses/decompiler/ail_simplifier.py +30 -8
- angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +20 -7
- angr/analyses/decompiler/clinic.py +21 -5
- angr/analyses/decompiler/condition_processor.py +11 -0
- angr/analyses/decompiler/decompiler.py +58 -46
- angr/analyses/decompiler/optimization_passes/__init__.py +11 -5
- angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +13 -7
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +31 -11
- angr/analyses/decompiler/optimization_passes/{return_duplicator.py → return_duplicator_base.py} +54 -102
- angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +57 -0
- angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +121 -0
- angr/analyses/decompiler/region_identifier.py +13 -0
- angr/analyses/decompiler/seq_to_blocks.py +19 -0
- angr/analyses/decompiler/structured_codegen/c.py +21 -0
- angr/analyses/decompiler/structuring/phoenix.py +28 -4
- angr/analyses/decompiler/structuring/recursive_structurer.py +35 -1
- angr/analyses/decompiler/structuring/structurer_base.py +3 -0
- angr/analyses/decompiler/utils.py +41 -6
- angr/analyses/disassembly.py +4 -1
- angr/analyses/find_objects_static.py +15 -10
- angr/analyses/forward_analysis/forward_analysis.py +15 -1
- angr/analyses/propagator/engine_ail.py +40 -0
- angr/analyses/propagator/propagator.py +6 -3
- angr/analyses/reaching_definitions/engine_ail.py +16 -24
- angr/analyses/reaching_definitions/rd_state.py +14 -1
- angr/analyses/reaching_definitions/reaching_definitions.py +19 -2
- angr/analyses/variable_recovery/engine_ail.py +6 -6
- angr/analyses/variable_recovery/engine_base.py +22 -4
- angr/analyses/variable_recovery/variable_recovery_base.py +4 -1
- angr/engines/light/engine.py +8 -1
- angr/knowledge_plugins/key_definitions/atoms.py +4 -2
- angr/knowledge_plugins/key_definitions/environment.py +11 -0
- angr/knowledge_plugins/key_definitions/live_definitions.py +41 -8
- angr/knowledge_plugins/key_definitions/uses.py +18 -4
- angr/knowledge_plugins/propagations/states.py +22 -3
- angr/knowledge_plugins/types.py +6 -0
- angr/knowledge_plugins/variables/variable_manager.py +54 -5
- angr/simos/simos.py +2 -0
- angr/storage/memory_mixins/__init__.py +3 -0
- angr/storage/memory_mixins/multi_value_merger_mixin.py +22 -11
- angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +20 -2
- angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +81 -44
- angr/utils/cowdict.py +4 -2
- angr/utils/funcid.py +6 -0
- angr/utils/mp.py +1 -1
- {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/METADATA +6 -6
- {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/RECORD +55 -52
- {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/LICENSE +0 -0
- {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/WHEEL +0 -0
- {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/entry_points.txt +0 -0
- {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ import ailment
|
|
|
5
5
|
from ailment.expression import Op
|
|
6
6
|
|
|
7
7
|
from ..structuring.structurer_nodes import ConditionNode
|
|
8
|
-
from ..utils import structured_node_is_simple_return
|
|
8
|
+
from ..utils import structured_node_is_simple_return, sequence_to_statements
|
|
9
9
|
from ..sequence_walker import SequenceWalker
|
|
10
10
|
from .optimization_pass import SequenceOptimizationPass, OptimizationPassStage
|
|
11
11
|
|
|
@@ -13,12 +13,14 @@ from .optimization_pass import SequenceOptimizationPass, OptimizationPassStage
|
|
|
13
13
|
class FlipBooleanWalker(SequenceWalker):
|
|
14
14
|
"""
|
|
15
15
|
Walks a SequenceNode and handles every sequence.
|
|
16
|
+
Uses the flip_size to determine when to flip the condition on large if-statement bodies.
|
|
16
17
|
"""
|
|
17
18
|
|
|
18
|
-
def __init__(self, graph, last_node=None):
|
|
19
|
+
def __init__(self, graph, flip_size=10, last_node=None):
|
|
19
20
|
super().__init__()
|
|
20
21
|
self._graph = graph
|
|
21
22
|
self._last_node = last_node
|
|
23
|
+
self._flip_size = flip_size
|
|
22
24
|
|
|
23
25
|
def _handle_Sequence(self, seq_node, **kwargs):
|
|
24
26
|
# Type 1:
|
|
@@ -48,9 +50,12 @@ class FlipBooleanWalker(SequenceWalker):
|
|
|
48
50
|
node.true_node, node.false_node = node.false_node, node.true_node
|
|
49
51
|
|
|
50
52
|
for idx, cond_node, successor in type2_condition_nodes:
|
|
51
|
-
#
|
|
52
|
-
#
|
|
53
|
-
if
|
|
53
|
+
# there are two possibilities when you might want to flip the condition and move the return statement:
|
|
54
|
+
# 1. This if-stmt if found somewhere in the middle of the function
|
|
55
|
+
# 2. This if-stmt is pretty large, but still ends in a return outside of the if-stmt
|
|
56
|
+
if (successor is not self._last_node) or (
|
|
57
|
+
len(sequence_to_statements(cond_node.true_node)) >= self._flip_size
|
|
58
|
+
):
|
|
54
59
|
cond_node.condition = ailment.expression.negate(cond_node.condition)
|
|
55
60
|
seq_node.nodes[idx + 1] = cond_node.true_node
|
|
56
61
|
cond_node.true_node = successor
|
|
@@ -71,15 +76,16 @@ class FlipBooleanCmp(SequenceOptimizationPass):
|
|
|
71
76
|
NAME = "Flip small ret booleans"
|
|
72
77
|
DESCRIPTION = "When false node has no successors, flip condition so else scope can be simplified later"
|
|
73
78
|
|
|
74
|
-
def __init__(self, func, **kwargs):
|
|
79
|
+
def __init__(self, func, flip_size=10, **kwargs):
|
|
75
80
|
super().__init__(func, **kwargs)
|
|
76
81
|
self._graph = kwargs.get("graph", None)
|
|
82
|
+
self._flip_size = flip_size
|
|
77
83
|
self.analyze()
|
|
78
84
|
|
|
79
85
|
def _check(self):
|
|
80
86
|
return bool(self.seq.nodes), None
|
|
81
87
|
|
|
82
88
|
def _analyze(self, cache=None):
|
|
83
|
-
walker = FlipBooleanWalker(self._graph, last_node=self.seq.nodes[-1])
|
|
89
|
+
walker = FlipBooleanWalker(self._graph, last_node=self.seq.nodes[-1], flip_size=self._flip_size)
|
|
84
90
|
walker.walk(self.seq)
|
|
85
91
|
self.out_seq = self.seq
|
|
@@ -6,6 +6,7 @@ import networkx # pylint:disable=unused-import
|
|
|
6
6
|
import ailment
|
|
7
7
|
|
|
8
8
|
from angr.analyses.decompiler import RegionIdentifier
|
|
9
|
+
from angr.analyses.decompiler.condition_processor import ConditionProcessor
|
|
9
10
|
from angr.analyses.decompiler.goto_manager import GotoManager
|
|
10
11
|
from angr.analyses.decompiler.structuring import RecursiveStructurer, PhoenixStructurer
|
|
11
12
|
from angr.analyses.decompiler.utils import add_labels
|
|
@@ -93,6 +94,26 @@ class BaseOptimizationPass:
|
|
|
93
94
|
"""
|
|
94
95
|
raise NotImplementedError()
|
|
95
96
|
|
|
97
|
+
def _simplify_graph(self, graph):
|
|
98
|
+
simp = self.project.analyses.AILSimplifier(
|
|
99
|
+
self._func,
|
|
100
|
+
func_graph=graph,
|
|
101
|
+
use_callee_saved_regs_at_return=False,
|
|
102
|
+
gp=self._func.info.get("gp", None) if self.project.arch.name in {"MIPS32", "MIPS64"} else None,
|
|
103
|
+
)
|
|
104
|
+
return simp.func_graph if simp.simplified else graph
|
|
105
|
+
|
|
106
|
+
def _recover_regions(self, graph: networkx.DiGraph, condition_processor=None, update_graph: bool = False):
|
|
107
|
+
return self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
|
|
108
|
+
self._func,
|
|
109
|
+
graph=graph,
|
|
110
|
+
cond_proc=condition_processor or ConditionProcessor(self.project.arch),
|
|
111
|
+
update_graph=update_graph,
|
|
112
|
+
# TODO: find a way to pass Phoenix/DREAM options here (see decompiler.py for correct use)
|
|
113
|
+
force_loop_single_exit=True,
|
|
114
|
+
complete_successors=False,
|
|
115
|
+
)
|
|
116
|
+
|
|
96
117
|
|
|
97
118
|
class OptimizationPass(BaseOptimizationPass):
|
|
98
119
|
"""
|
|
@@ -256,6 +277,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
256
277
|
recover_structure_fails=True,
|
|
257
278
|
max_opt_iters=1,
|
|
258
279
|
simplify_ail=True,
|
|
280
|
+
require_gotos=True,
|
|
259
281
|
**kwargs,
|
|
260
282
|
):
|
|
261
283
|
super().__init__(func, **kwargs)
|
|
@@ -264,6 +286,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
264
286
|
self._recover_structure_fails = recover_structure_fails
|
|
265
287
|
self._max_opt_iters = max_opt_iters
|
|
266
288
|
self._simplify_ail = simplify_ail
|
|
289
|
+
self._require_gotos = require_gotos
|
|
267
290
|
|
|
268
291
|
self._goto_manager: Optional[GotoManager] = None
|
|
269
292
|
self._prev_graph: Optional[networkx.DiGraph] = None
|
|
@@ -279,6 +302,9 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
279
302
|
return
|
|
280
303
|
|
|
281
304
|
initial_gotos = self._goto_manager.gotos.copy()
|
|
305
|
+
if self._require_gotos and not initial_gotos:
|
|
306
|
+
return
|
|
307
|
+
|
|
282
308
|
# replace the normal check in OptimizationPass.analyze()
|
|
283
309
|
ret, cache = self._check()
|
|
284
310
|
if not ret:
|
|
@@ -304,7 +330,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
304
330
|
# simplify the AIL graph
|
|
305
331
|
if self._simplify_ail:
|
|
306
332
|
# this should not (TM) change the structure of the graph but is needed for later optimizations
|
|
307
|
-
self.out_graph = self.
|
|
333
|
+
self.out_graph = self._simplify_graph(self.out_graph)
|
|
308
334
|
|
|
309
335
|
if self._prevent_new_gotos:
|
|
310
336
|
prev_gotos = len(initial_gotos)
|
|
@@ -317,6 +343,9 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
317
343
|
|
|
318
344
|
def _fixed_point_analyze(self, cache=None):
|
|
319
345
|
for _ in range(self._max_opt_iters):
|
|
346
|
+
if self._require_gotos and not self._goto_manager.gotos:
|
|
347
|
+
break
|
|
348
|
+
|
|
320
349
|
# backup the graph before the optimization
|
|
321
350
|
if self._recover_structure_fails and self.out_graph is not None:
|
|
322
351
|
self._prev_graph = networkx.DiGraph(self.out_graph)
|
|
@@ -331,15 +360,6 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
331
360
|
self.out_graph = self._prev_graph if self._recover_structure_fails else None
|
|
332
361
|
break
|
|
333
362
|
|
|
334
|
-
def _simplify_ail_graph(self, graph):
|
|
335
|
-
simp = self.project.analyses.AILSimplifier(
|
|
336
|
-
self._func,
|
|
337
|
-
func_graph=graph,
|
|
338
|
-
use_callee_saved_regs_at_return=False,
|
|
339
|
-
gp=self._func.info.get("gp", None) if self.project.arch.name in {"MIPS32", "MIPS64"} else None,
|
|
340
|
-
)
|
|
341
|
-
return simp.func_graph if simp.simplified else graph
|
|
342
|
-
|
|
343
363
|
def _graph_is_structurable(self, graph, readd_labels=False) -> bool:
|
|
344
364
|
"""
|
|
345
365
|
Checks weather the input graph is structurable under the Phoenix schema-matching structuring algorithm.
|
|
@@ -367,7 +387,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
367
387
|
func=self._func,
|
|
368
388
|
structurer_cls=PhoenixStructurer,
|
|
369
389
|
)
|
|
370
|
-
if not rs or not rs.result or not rs.result.nodes:
|
|
390
|
+
if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
|
|
371
391
|
return False
|
|
372
392
|
|
|
373
393
|
rs = self.project.analyses.RegionSimplifier(self._func, rs.result, kb=self.kb, variable_kb=self._variable_kb)
|
angr/analyses/decompiler/optimization_passes/{return_duplicator.py → return_duplicator_base.py}
RENAMED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
from typing import Any, Tuple, Dict, List
|
|
1
|
+
from typing import Any, Tuple, Dict, List, Optional
|
|
2
2
|
from itertools import count
|
|
3
3
|
import copy
|
|
4
4
|
import logging
|
|
5
|
-
import inspect
|
|
6
5
|
|
|
7
6
|
import ailment.expression
|
|
8
7
|
import networkx
|
|
@@ -11,138 +10,84 @@ from ailment import Block
|
|
|
11
10
|
from ailment.statement import Jump, ConditionalJump, Assignment, Return, Label
|
|
12
11
|
from ailment.expression import Const
|
|
13
12
|
|
|
14
|
-
from .optimization_pass import StructuringOptimizationPass
|
|
15
13
|
from ..condition_processor import ConditionProcessor, EmptyBlockNotice
|
|
16
14
|
from ..graph_region import GraphRegion
|
|
17
15
|
from ..utils import remove_labels, to_ail_supergraph, calls_in_graph
|
|
18
|
-
from ..structuring.structurer_nodes import MultiNode
|
|
16
|
+
from ..structuring.structurer_nodes import MultiNode, ConditionNode
|
|
17
|
+
from ..region_identifier import RegionIdentifier
|
|
19
18
|
|
|
20
19
|
_l = logging.getLogger(name=__name__)
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
class
|
|
22
|
+
class ReturnDuplicatorBase:
|
|
24
23
|
"""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
Some compilers, including GCC, Clang, and MSVC, apply various optimizations to reduce the number of statements in
|
|
29
|
-
code. These optimizations will take equivalent statements, or a subset of them, and replace them with a single
|
|
30
|
-
copy that is jumped to by gotos -- optimizing for space and sometimes speed.
|
|
31
|
-
|
|
32
|
-
This optimization pass will revert those gotos by re-duplicating the condensed blocks. Since Return statements
|
|
33
|
-
are the most common, we use this optimization pass to revert only gotos to return statements. Additionally, we
|
|
34
|
-
perform some additional readability fixups, like not re-duplicating returns to shared components.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
func: The function to optimize.
|
|
38
|
-
node_idx_start: The index to start at when creating new nodes. This is used by Clinic to ensure that
|
|
39
|
-
node indices are unique across multiple passes.
|
|
40
|
-
max_opt_iters: The maximum number of optimization iterations to perform.
|
|
41
|
-
max_calls_in_regions: The maximum number of calls that can be in a region. This is used to prevent
|
|
42
|
-
duplicating too much code.
|
|
43
|
-
prevent_new_gotos: If True, this optimization pass will prevent new gotos from being created.
|
|
44
|
-
minimize_copies_for_regions: If True, this optimization pass will minimize the number of copies by doing only
|
|
45
|
-
a single copy for connected in_edges that form a region.
|
|
24
|
+
The base class for implementing Return Duplication as described in the SAILR paper.
|
|
25
|
+
This base class describes the general algorithm for duplicating return regions in a graph.
|
|
46
26
|
"""
|
|
47
27
|
|
|
48
|
-
|
|
49
|
-
PLATFORMS = None
|
|
50
|
-
NAME = "Duplicate return blocks to reduce goto statements"
|
|
51
|
-
DESCRIPTION = inspect.cleandoc(__doc__[: __doc__.index("Args:")]) # pylint:disable=unsubscriptable-object
|
|
52
|
-
|
|
28
|
+
# pylint:disable=unused-argument
|
|
53
29
|
def __init__(
|
|
54
30
|
self,
|
|
55
31
|
func,
|
|
56
|
-
# internal parameters that should be used by Clinic
|
|
57
32
|
node_idx_start: int = 0,
|
|
58
|
-
# settings
|
|
59
|
-
max_opt_iters: int = 10,
|
|
60
33
|
max_calls_in_regions: int = 2,
|
|
61
|
-
prevent_new_gotos: bool = True,
|
|
62
34
|
minimize_copies_for_regions: bool = True,
|
|
35
|
+
ri: Optional[RegionIdentifier] = None,
|
|
63
36
|
**kwargs,
|
|
64
37
|
):
|
|
65
|
-
|
|
38
|
+
self.node_idx = count(start=node_idx_start)
|
|
66
39
|
self._max_calls_in_region = max_calls_in_regions
|
|
67
40
|
self._minimize_copies_for_regions = minimize_copies_for_regions
|
|
68
41
|
|
|
69
|
-
|
|
70
|
-
self.
|
|
42
|
+
# this should also be set by the optimization passes initer
|
|
43
|
+
self._func = func
|
|
44
|
+
self._ri: Optional[RegionIdentifier] = ri
|
|
45
|
+
|
|
46
|
+
#
|
|
47
|
+
# must implement these methods
|
|
48
|
+
#
|
|
49
|
+
|
|
50
|
+
def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False) -> bool:
|
|
51
|
+
raise NotImplementedError()
|
|
52
|
+
|
|
53
|
+
#
|
|
54
|
+
# main analysis
|
|
55
|
+
#
|
|
71
56
|
|
|
72
57
|
def _check(self):
|
|
73
58
|
# does this function have end points?
|
|
74
59
|
return bool(self._func.endpoints), None
|
|
75
60
|
|
|
76
|
-
def
|
|
61
|
+
def _analyze_core(self, graph: networkx.DiGraph) -> bool:
|
|
77
62
|
"""
|
|
78
|
-
This
|
|
63
|
+
This function does the core checks and duplications to the graph passed.
|
|
64
|
+
The return value is True if the graph was changed.
|
|
79
65
|
"""
|
|
80
66
|
graph_changed = False
|
|
81
|
-
endnode_regions = self._find_endnode_regions(
|
|
67
|
+
endnode_regions = self._find_endnode_regions(graph)
|
|
82
68
|
|
|
83
69
|
if self._minimize_copies_for_regions:
|
|
84
70
|
# perform a second pass to minimize the number of copies by doing only a single copy
|
|
85
71
|
# for connected in_edges that form a region
|
|
86
|
-
endnode_regions = self._copy_connected_edge_components(endnode_regions,
|
|
72
|
+
endnode_regions = self._copy_connected_edge_components(endnode_regions, graph)
|
|
87
73
|
|
|
88
74
|
for region_head, (in_edges, region) in endnode_regions.items():
|
|
89
75
|
is_single_const_ret_region = self._is_simple_return_graph(region)
|
|
90
76
|
for in_edge in in_edges:
|
|
91
77
|
pred_node = in_edge[0]
|
|
92
78
|
if self._should_duplicate_dst(
|
|
93
|
-
pred_node, region_head,
|
|
79
|
+
pred_node, region_head, graph, dst_is_const_ret=is_single_const_ret_region
|
|
94
80
|
):
|
|
95
81
|
# every eligible pred gets a new region copy
|
|
96
|
-
self._copy_region([pred_node], region_head, region,
|
|
82
|
+
self._copy_region([pred_node], region_head, region, graph)
|
|
97
83
|
|
|
98
|
-
if region_head in
|
|
99
|
-
|
|
84
|
+
if region_head in graph and graph.in_degree(region_head) == 0:
|
|
85
|
+
graph.remove_nodes_from(region)
|
|
100
86
|
|
|
101
87
|
graph_changed = True
|
|
102
88
|
|
|
103
89
|
return graph_changed
|
|
104
90
|
|
|
105
|
-
def _is_goto_edge(
|
|
106
|
-
self,
|
|
107
|
-
src: Block,
|
|
108
|
-
dst: Block,
|
|
109
|
-
graph: networkx.DiGraph = None,
|
|
110
|
-
check_for_ifstmts=True,
|
|
111
|
-
max_level_check=1,
|
|
112
|
-
):
|
|
113
|
-
"""
|
|
114
|
-
TODO: correct how goto edge addressing works
|
|
115
|
-
This function only exists because a long-standing bug that sometimes reports the if-stmt addr
|
|
116
|
-
above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
|
|
117
|
-
see if they are a goto. This needs to include Jump to deal with loops.
|
|
118
|
-
"""
|
|
119
|
-
if check_for_ifstmts and graph is not None:
|
|
120
|
-
blocks = [src]
|
|
121
|
-
level_blocks = [src]
|
|
122
|
-
for _ in range(max_level_check):
|
|
123
|
-
new_level_blocks = []
|
|
124
|
-
for lblock in level_blocks:
|
|
125
|
-
new_level_blocks += list(graph.predecessors(lblock))
|
|
126
|
-
|
|
127
|
-
blocks += new_level_blocks
|
|
128
|
-
level_blocks = new_level_blocks
|
|
129
|
-
|
|
130
|
-
src_direct_parents = list(graph.predecessors(src))
|
|
131
|
-
for block in blocks:
|
|
132
|
-
if not block or not block.statements:
|
|
133
|
-
continue
|
|
134
|
-
|
|
135
|
-
# special case if-stmts that are next to each other
|
|
136
|
-
if block in src_direct_parents and isinstance(block.statements[-1], ConditionalJump):
|
|
137
|
-
continue
|
|
138
|
-
|
|
139
|
-
if self._goto_manager.is_goto_edge(block, dst):
|
|
140
|
-
return True
|
|
141
|
-
else:
|
|
142
|
-
return self._goto_manager.is_goto_edge(src, dst)
|
|
143
|
-
|
|
144
|
-
return False
|
|
145
|
-
|
|
146
91
|
def _find_endnode_regions(self, graph) -> Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]]:
|
|
147
92
|
"""
|
|
148
93
|
Find all the regions that contain a node with no successors. These are the "end nodes" of the graph.
|
|
@@ -194,14 +139,6 @@ class ReturnDuplicator(StructuringOptimizationPass):
|
|
|
194
139
|
|
|
195
140
|
return end_node_regions
|
|
196
141
|
|
|
197
|
-
def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
|
|
198
|
-
# returns that are only returning a constant should be duplicated always;
|
|
199
|
-
if dst_is_const_ret:
|
|
200
|
-
return True
|
|
201
|
-
|
|
202
|
-
# check above
|
|
203
|
-
return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
|
|
204
|
-
|
|
205
142
|
def _copy_region(self, pred_nodes, region_head, region, graph):
|
|
206
143
|
# copy the entire return region
|
|
207
144
|
copies = {}
|
|
@@ -295,8 +232,14 @@ class ReturnDuplicator(StructuringOptimizationPass):
|
|
|
295
232
|
@staticmethod
|
|
296
233
|
def _is_simple_return_graph(graph: networkx.DiGraph, max_assigns=1):
|
|
297
234
|
"""
|
|
298
|
-
Checks if the graph is a
|
|
299
|
-
|
|
235
|
+
Checks if the provided graph is a graph that ONLY contains a "simple" return.
|
|
236
|
+
If there were absolutely no bugs in angr, we could just check that a single return block exists.
|
|
237
|
+
However, due to some propagation bugs, these cases can all happen and are all valid:
|
|
238
|
+
1. [Jmp] -> [Jmp] -> [Ret]
|
|
239
|
+
2. [Jmp] -> [Jmp, x=0] -> [Ret x]
|
|
240
|
+
3. [Jmp] -> [Jmp, x=rdi] -> [Ret x]
|
|
241
|
+
|
|
242
|
+
To deal with this, we need to do the sketchy checks we do below.
|
|
300
243
|
"""
|
|
301
244
|
labeless_graph = to_ail_supergraph(remove_labels(graph))
|
|
302
245
|
nodes = list(labeless_graph.nodes())
|
|
@@ -466,14 +409,20 @@ class ReturnDuplicator(StructuringOptimizationPass):
|
|
|
466
409
|
|
|
467
410
|
@staticmethod
|
|
468
411
|
def _find_block_sets_in_all_regions(top_region: GraphRegion):
|
|
412
|
+
def _unpack_block_type_to_addrs(node):
|
|
413
|
+
if isinstance(node, Block):
|
|
414
|
+
return {node.addr}
|
|
415
|
+
elif isinstance(node, MultiNode):
|
|
416
|
+
return {n.addr for n in node.nodes}
|
|
417
|
+
elif isinstance(node, ConditionNode):
|
|
418
|
+
return _unpack_block_type_to_addrs(node.true_node) | _unpack_block_type_to_addrs(node.false_node)
|
|
419
|
+
return set()
|
|
420
|
+
|
|
469
421
|
def _unpack_region_to_block_addrs(region: GraphRegion):
|
|
470
422
|
region_addrs = set()
|
|
471
423
|
for node in region.graph.nodes:
|
|
472
|
-
if isinstance(node, Block):
|
|
473
|
-
region_addrs
|
|
474
|
-
elif isinstance(node, MultiNode):
|
|
475
|
-
for _node in node.nodes:
|
|
476
|
-
region_addrs.add(_node.addr)
|
|
424
|
+
if isinstance(node, (Block, MultiNode, ConditionNode)):
|
|
425
|
+
region_addrs |= _unpack_block_type_to_addrs(node)
|
|
477
426
|
elif isinstance(node, GraphRegion):
|
|
478
427
|
region_addrs |= _unpack_region_to_block_addrs(node)
|
|
479
428
|
|
|
@@ -487,6 +436,9 @@ class ReturnDuplicator(StructuringOptimizationPass):
|
|
|
487
436
|
elif isinstance(node, MultiNode):
|
|
488
437
|
for _node in node.nodes:
|
|
489
438
|
addrs_by_region[region].add(_node.addr)
|
|
439
|
+
elif isinstance(node, ConditionNode):
|
|
440
|
+
addrs_by_region[region] |= _unpack_block_type_to_addrs(node.true_node)
|
|
441
|
+
addrs_by_region[region] |= _unpack_block_type_to_addrs(node.false_node)
|
|
490
442
|
else:
|
|
491
443
|
addrs_by_region[region] |= _unpack_region_to_block_addrs(node)
|
|
492
444
|
_unpack_every_region(node, addrs_by_region)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import networkx
|
|
4
|
+
|
|
5
|
+
from .return_duplicator_base import ReturnDuplicatorBase
|
|
6
|
+
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
7
|
+
|
|
8
|
+
_l = logging.getLogger(name=__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
|
|
12
|
+
"""
|
|
13
|
+
This is a light-level goto-less version of the ReturnDuplicator optimization pass. It will only
|
|
14
|
+
duplicate return-only blocks.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
ARCHES = None
|
|
18
|
+
PLATFORMS = None
|
|
19
|
+
STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
|
|
20
|
+
NAME = "Duplicate return-only blocks (high)"
|
|
21
|
+
DESCRIPTION = __doc__
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
func,
|
|
26
|
+
# internal parameters that should be used by Clinic
|
|
27
|
+
node_idx_start: int = 0,
|
|
28
|
+
# settings
|
|
29
|
+
max_calls_in_regions: int = 2,
|
|
30
|
+
minimize_copies_for_regions: bool = True,
|
|
31
|
+
**kwargs,
|
|
32
|
+
):
|
|
33
|
+
ReturnDuplicatorBase.__init__(
|
|
34
|
+
self,
|
|
35
|
+
func,
|
|
36
|
+
node_idx_start=node_idx_start,
|
|
37
|
+
max_calls_in_regions=max_calls_in_regions,
|
|
38
|
+
minimize_copies_for_regions=minimize_copies_for_regions,
|
|
39
|
+
**kwargs,
|
|
40
|
+
)
|
|
41
|
+
OptimizationPass.__init__(self, func, **kwargs)
|
|
42
|
+
# since we run before the RegionIdentification pass in the decompiler, we need to collect it early here
|
|
43
|
+
self._ri = self._recover_regions(self._graph)
|
|
44
|
+
|
|
45
|
+
self.analyze()
|
|
46
|
+
|
|
47
|
+
def _check(self):
|
|
48
|
+
return ReturnDuplicatorBase._check(self)
|
|
49
|
+
|
|
50
|
+
def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
|
|
51
|
+
# TODO: implement a better check
|
|
52
|
+
return dst_is_const_ret
|
|
53
|
+
|
|
54
|
+
def _analyze(self, cache=None):
|
|
55
|
+
copy_graph = networkx.DiGraph(self._graph)
|
|
56
|
+
if self._analyze_core(copy_graph):
|
|
57
|
+
self.out_graph = self._simplify_graph(copy_graph)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import inspect
|
|
3
|
+
|
|
4
|
+
import networkx
|
|
5
|
+
|
|
6
|
+
from ailment import Block
|
|
7
|
+
from ailment.statement import ConditionalJump
|
|
8
|
+
|
|
9
|
+
from .return_duplicator_base import ReturnDuplicatorBase
|
|
10
|
+
from .optimization_pass import StructuringOptimizationPass
|
|
11
|
+
|
|
12
|
+
_l = logging.getLogger(name=__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
16
|
+
"""
|
|
17
|
+
An optimization pass that reverts a subset of Irreducible Statement Condensing (ISC) optimizations, as described
|
|
18
|
+
in the USENIX 2024 paper SAILR. This is the heavy/goto version of the ReturnDuplicator optimization pass.
|
|
19
|
+
|
|
20
|
+
Some compilers, including GCC, Clang, and MSVC, apply various optimizations to reduce the number of statements in
|
|
21
|
+
code. These optimizations will take equivalent statements, or a subset of them, and replace them with a single
|
|
22
|
+
copy that is jumped to by gotos -- optimizing for space and sometimes speed.
|
|
23
|
+
|
|
24
|
+
This optimization pass will revert those gotos by re-duplicating the condensed blocks. Since Return statements
|
|
25
|
+
are the most common, we use this optimization pass to revert only gotos to return statements. Additionally, we
|
|
26
|
+
perform some additional readability fixups, like not re-duplicating returns to shared components.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
func: The function to optimize.
|
|
30
|
+
node_idx_start: The index to start at when creating new nodes. This is used by Clinic to ensure that
|
|
31
|
+
node indices are unique across multiple passes.
|
|
32
|
+
max_opt_iters: The maximum number of optimization iterations to perform.
|
|
33
|
+
max_calls_in_regions: The maximum number of calls that can be in a region. This is used to prevent
|
|
34
|
+
duplicating too much code.
|
|
35
|
+
prevent_new_gotos: If True, this optimization pass will prevent new gotos from being created.
|
|
36
|
+
minimize_copies_for_regions: If True, this optimization pass will minimize the number of copies by doing only
|
|
37
|
+
a single copy for connected in_edges that form a region.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
ARCHES = None
|
|
41
|
+
PLATFORMS = None
|
|
42
|
+
NAME = "Duplicate returns connect with gotos (low)"
|
|
43
|
+
DESCRIPTION = inspect.cleandoc(__doc__[: __doc__.index("Args:")]) # pylint:disable=unsubscriptable-object
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
func,
|
|
48
|
+
# internal parameters that should be used by Clinic
|
|
49
|
+
node_idx_start: int = 0,
|
|
50
|
+
# settings
|
|
51
|
+
max_opt_iters: int = 4,
|
|
52
|
+
max_calls_in_regions: int = 2,
|
|
53
|
+
prevent_new_gotos: bool = True,
|
|
54
|
+
minimize_copies_for_regions: bool = True,
|
|
55
|
+
**kwargs,
|
|
56
|
+
):
|
|
57
|
+
ReturnDuplicatorBase.__init__(
|
|
58
|
+
self,
|
|
59
|
+
func,
|
|
60
|
+
node_idx_start=node_idx_start,
|
|
61
|
+
max_calls_in_regions=max_calls_in_regions,
|
|
62
|
+
minimize_copies_for_regions=minimize_copies_for_regions,
|
|
63
|
+
**kwargs,
|
|
64
|
+
)
|
|
65
|
+
StructuringOptimizationPass.__init__(
|
|
66
|
+
self, func, max_opt_iters=max_opt_iters, prevent_new_gotos=prevent_new_gotos, require_gotos=True, **kwargs
|
|
67
|
+
)
|
|
68
|
+
self.analyze()
|
|
69
|
+
|
|
70
|
+
def _check(self):
|
|
71
|
+
return ReturnDuplicatorBase._check(self)
|
|
72
|
+
|
|
73
|
+
def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
|
|
74
|
+
return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
|
|
75
|
+
|
|
76
|
+
def _is_goto_edge(
|
|
77
|
+
self,
|
|
78
|
+
src: Block,
|
|
79
|
+
dst: Block,
|
|
80
|
+
graph: networkx.DiGraph = None,
|
|
81
|
+
check_for_ifstmts=True,
|
|
82
|
+
max_level_check=1,
|
|
83
|
+
):
|
|
84
|
+
"""
|
|
85
|
+
TODO: correct how goto edge addressing works
|
|
86
|
+
This function only exists because a long-standing bug that sometimes reports the if-stmt addr
|
|
87
|
+
above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
|
|
88
|
+
see if they are a goto. This needs to include Jump to deal with loops.
|
|
89
|
+
"""
|
|
90
|
+
if check_for_ifstmts and graph is not None:
|
|
91
|
+
blocks = [src]
|
|
92
|
+
level_blocks = [src]
|
|
93
|
+
for _ in range(max_level_check):
|
|
94
|
+
new_level_blocks = []
|
|
95
|
+
for lblock in level_blocks:
|
|
96
|
+
new_level_blocks += list(graph.predecessors(lblock))
|
|
97
|
+
|
|
98
|
+
blocks += new_level_blocks
|
|
99
|
+
level_blocks = new_level_blocks
|
|
100
|
+
|
|
101
|
+
src_direct_parents = list(graph.predecessors(src))
|
|
102
|
+
for block in blocks:
|
|
103
|
+
if not block or not block.statements:
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# special case if-stmts that are next to each other
|
|
107
|
+
if block in src_direct_parents and isinstance(block.statements[-1], ConditionalJump):
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
if self._goto_manager.is_goto_edge(block, dst):
|
|
111
|
+
return True
|
|
112
|
+
else:
|
|
113
|
+
return self._goto_manager.is_goto_edge(src, dst)
|
|
114
|
+
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
def _analyze(self, cache=None):
|
|
118
|
+
"""
|
|
119
|
+
This analysis is run in a loop in analyze() for a maximum of max_opt_iters times.
|
|
120
|
+
"""
|
|
121
|
+
return self._analyze_core(self.out_graph)
|
|
@@ -718,6 +718,13 @@ class RegionIdentifier(Analysis):
|
|
|
718
718
|
region.graph_with_successors.add_edge(nn, succ)
|
|
719
719
|
region.successors.add(succ)
|
|
720
720
|
|
|
721
|
+
# add edges between successors
|
|
722
|
+
for succ_0 in region.successors:
|
|
723
|
+
for succ_1 in region.successors:
|
|
724
|
+
if succ_0 is not succ_1:
|
|
725
|
+
if secondary_graph.has_edge(succ_0, succ_1):
|
|
726
|
+
region.graph_with_successors.add_edge(succ_0, succ_1)
|
|
727
|
+
|
|
721
728
|
# l.debug("Walked back %d levels in postdom tree.", levels)
|
|
722
729
|
l.debug("Node %r, frontier %r.", node, frontier)
|
|
723
730
|
# l.debug("Identified an acyclic region %s.", self._dbg_block_list(region.graph.nodes()))
|
|
@@ -929,6 +936,12 @@ class RegionIdentifier(Analysis):
|
|
|
929
936
|
region.successors = []
|
|
930
937
|
region.successors += list(abnormal_exit_nodes)
|
|
931
938
|
|
|
939
|
+
for succ_0 in region.successors:
|
|
940
|
+
for succ_1 in region.successors:
|
|
941
|
+
if succ_0 is not succ_1:
|
|
942
|
+
if graph.has_edge(succ_0, succ_1):
|
|
943
|
+
region.graph_with_successors.add_edge(succ_0, succ_1)
|
|
944
|
+
|
|
932
945
|
for node in loop_nodes:
|
|
933
946
|
graph.remove_node(node)
|
|
934
947
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from ailment import Block
|
|
2
|
+
|
|
3
|
+
from .sequence_walker import SequenceWalker
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SequenceToBlocks(SequenceWalker):
|
|
7
|
+
"""
|
|
8
|
+
A helper class to convert a sequence node into a list of blocks.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
handlers = {
|
|
13
|
+
Block: self._handle_Block,
|
|
14
|
+
}
|
|
15
|
+
self.blocks = []
|
|
16
|
+
super().__init__(handlers, force_forward_scan=True, update_seqnode_in_place=False)
|
|
17
|
+
|
|
18
|
+
def _handle_Block(self, node: Block, **kwargs): # pylint:disable=unused-argument
|
|
19
|
+
self.blocks.append(node)
|