angr 9.2.166__cp310-abi3-macosx_10_12_x86_64.whl → 9.2.167__cp310-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -8
- angr/analyses/decompiler/clinic.py +3 -0
- angr/analyses/decompiler/condition_processor.py +44 -1
- angr/analyses/decompiler/decompiler.py +6 -0
- angr/analyses/decompiler/node_replacer.py +42 -0
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +3 -0
- angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +5 -76
- angr/analyses/decompiler/region_identifier.py +12 -3
- angr/analyses/decompiler/sequence_walker.py +11 -7
- angr/analyses/decompiler/structuring/phoenix.py +645 -305
- angr/analyses/decompiler/structuring/structurer_base.py +75 -1
- angr/analyses/decompiler/utils.py +71 -28
- angr/analyses/reaching_definitions/engine_vex.py +3 -2
- angr/procedures/glibc/scanf.py +8 -0
- angr/procedures/glibc/sscanf.py +4 -0
- angr/rustylib.abi3.so +0 -0
- angr/unicornlib.dylib +0 -0
- angr/utils/graph.py +62 -24
- {angr-9.2.166.dist-info → angr-9.2.167.dist-info}/METADATA +5 -5
- {angr-9.2.166.dist-info → angr-9.2.167.dist-info}/RECORD +26 -25
- {angr-9.2.166.dist-info → angr-9.2.167.dist-info}/WHEEL +0 -0
- {angr-9.2.166.dist-info → angr-9.2.167.dist-info}/entry_points.txt +0 -0
- {angr-9.2.166.dist-info → angr-9.2.167.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.166.dist-info → angr-9.2.167.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
|
@@ -920,32 +920,32 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
920
920
|
# more sanity checks
|
|
921
921
|
|
|
922
922
|
# for a typical jump table, the current block has only one predecessor, and the predecessor to the current
|
|
923
|
-
# block has two successors
|
|
923
|
+
# block has two successors
|
|
924
924
|
# for a typical vtable call (or jump if at the end of a function), the block as two predecessors that form a
|
|
925
925
|
# diamond shape
|
|
926
926
|
curr_node = func.get_node(addr)
|
|
927
|
-
if curr_node is None or curr_node not in func.
|
|
927
|
+
if curr_node is None or curr_node not in func.transition_graph:
|
|
928
928
|
l.debug("Could not find the node %#x in the function transition graph", addr)
|
|
929
929
|
return False, None
|
|
930
|
-
preds = list(func.
|
|
930
|
+
preds = list(func.transition_graph.predecessors(curr_node))
|
|
931
931
|
pred_endaddrs = {pred.addr + pred.size for pred in preds} # handle non-normalized CFGs
|
|
932
932
|
if func_graph_complete and not is_arm and not potential_call_table:
|
|
933
933
|
# on ARM you can do a single-block jump table...
|
|
934
934
|
if len(pred_endaddrs) == 1:
|
|
935
|
-
pred_succs = [succ for succ in func.
|
|
935
|
+
pred_succs = [succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
|
|
936
936
|
if len(pred_succs) != 2:
|
|
937
937
|
l.debug("Expect two successors to the single predecessor, found %d.", len(pred_succs))
|
|
938
938
|
return False, None
|
|
939
939
|
elif len(pred_endaddrs) == 2 and len(preds) == 2:
|
|
940
940
|
pred_succs = set(
|
|
941
|
-
[succ for succ in func.
|
|
942
|
-
+ [succ for succ in func.
|
|
941
|
+
[succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
|
|
942
|
+
+ [succ for succ in func.transition_graph.successors(preds[1]) if succ.addr != preds[1].addr]
|
|
943
943
|
)
|
|
944
944
|
is_diamond = False
|
|
945
945
|
if len(pred_succs) == 2:
|
|
946
946
|
non_node_succ = next(iter(pred_succ for pred_succ in pred_succs if pred_succ is not curr_node))
|
|
947
|
-
while func.
|
|
948
|
-
non_node_succ = next(iter(func.
|
|
947
|
+
while func.transition_graph.out_degree[non_node_succ] == 1:
|
|
948
|
+
non_node_succ = next(iter(func.transition_graph.successors(non_node_succ)))
|
|
949
949
|
if non_node_succ == curr_node:
|
|
950
950
|
is_diamond = True
|
|
951
951
|
break
|
|
@@ -142,6 +142,7 @@ class Clinic(Analysis):
|
|
|
142
142
|
optimization_scratch: dict[str, Any] | None = None,
|
|
143
143
|
desired_variables: set[str] | None = None,
|
|
144
144
|
force_loop_single_exit: bool = True,
|
|
145
|
+
refine_loops_with_single_successor: bool = False,
|
|
145
146
|
complete_successors: bool = False,
|
|
146
147
|
max_type_constraints: int = 100_000,
|
|
147
148
|
type_constraint_set_degradation_threshold: int = 150,
|
|
@@ -212,6 +213,7 @@ class Clinic(Analysis):
|
|
|
212
213
|
self._inlining_parents = inlining_parents or ()
|
|
213
214
|
self._desired_variables = desired_variables
|
|
214
215
|
self._force_loop_single_exit = force_loop_single_exit
|
|
216
|
+
self._refine_loops_with_single_successor = refine_loops_with_single_successor
|
|
215
217
|
self._complete_successors = complete_successors
|
|
216
218
|
|
|
217
219
|
self._register_save_areas_removed: bool = False
|
|
@@ -1550,6 +1552,7 @@ class Clinic(Analysis):
|
|
|
1550
1552
|
entry_node_addr=self.entry_node_addr,
|
|
1551
1553
|
scratch=self.optimization_scratch,
|
|
1552
1554
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
1555
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
1553
1556
|
complete_successors=self._complete_successors,
|
|
1554
1557
|
stack_pointer_tracker=stack_pointer_tracker,
|
|
1555
1558
|
**kwargs,
|
|
@@ -239,6 +239,24 @@ class ConditionProcessor:
|
|
|
239
239
|
condition translation if possible.
|
|
240
240
|
"""
|
|
241
241
|
|
|
242
|
+
if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
|
|
243
|
+
cond_node = src.nodes[-1]
|
|
244
|
+
if (
|
|
245
|
+
isinstance(cond_node.true_node, ailment.Block)
|
|
246
|
+
and isinstance(cond_node.false_node, ailment.Block)
|
|
247
|
+
and cond_node.true_node.statements
|
|
248
|
+
and cond_node.false_node.statements
|
|
249
|
+
):
|
|
250
|
+
last_stmt_true = self.get_last_statement(cond_node.true_node)
|
|
251
|
+
last_stmt_false = self.get_last_statement(cond_node.false_node)
|
|
252
|
+
if (
|
|
253
|
+
isinstance(last_stmt_true, ailment.Stmt.Jump)
|
|
254
|
+
and isinstance(last_stmt_false, ailment.Stmt.Jump)
|
|
255
|
+
and isinstance(last_stmt_true.target, ailment.Expr.Const)
|
|
256
|
+
and isinstance(last_stmt_false.target, ailment.Expr.Const)
|
|
257
|
+
):
|
|
258
|
+
return {last_stmt_true.target.value, last_stmt_false.target.value} == {dst0.addr, dst1.addr}
|
|
259
|
+
|
|
242
260
|
if src in graph and graph.out_degree[src] == 2 and graph.has_edge(src, dst0) and graph.has_edge(src, dst1):
|
|
243
261
|
# sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
|
|
244
262
|
if isinstance(src, ailment.Block) and src.statements and is_head_controlled_loop_block(src):
|
|
@@ -247,7 +265,10 @@ class ConditionProcessor:
|
|
|
247
265
|
)
|
|
248
266
|
assert last_stmt is not None
|
|
249
267
|
else:
|
|
250
|
-
|
|
268
|
+
try:
|
|
269
|
+
last_stmt = self.get_last_statement(src)
|
|
270
|
+
except EmptyBlockNotice:
|
|
271
|
+
last_stmt = None
|
|
251
272
|
|
|
252
273
|
if isinstance(last_stmt, ailment.Stmt.ConditionalJump):
|
|
253
274
|
return True
|
|
@@ -258,6 +279,28 @@ class ConditionProcessor:
|
|
|
258
279
|
return claripy.is_true(claripy.Not(edge_cond_left) == edge_cond_right) # type: ignore
|
|
259
280
|
|
|
260
281
|
def recover_edge_condition(self, graph: networkx.DiGraph, src, dst):
|
|
282
|
+
|
|
283
|
+
def _check_condnode_and_get_condition(cond_node: ConditionNode) -> claripy.ast.Bool | None:
|
|
284
|
+
for cond_block, negate in [(cond_node.true_node, False), (cond_node.false_node, True)]:
|
|
285
|
+
if isinstance(cond_block, ailment.Block) and cond_block.statements:
|
|
286
|
+
last_stmt = self.get_last_statement(cond_block)
|
|
287
|
+
if (
|
|
288
|
+
isinstance(last_stmt, ailment.Stmt.Jump)
|
|
289
|
+
and isinstance(last_stmt.target, ailment.Expr.Const)
|
|
290
|
+
and last_stmt.target.value == dst.addr
|
|
291
|
+
):
|
|
292
|
+
return claripy.Not(cond_node.condition) if negate else cond_node.condition
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
|
|
296
|
+
predicate = _check_condnode_and_get_condition(src.nodes[-1])
|
|
297
|
+
if predicate is not None:
|
|
298
|
+
return predicate
|
|
299
|
+
if isinstance(src, ConditionNode):
|
|
300
|
+
predicate = _check_condnode_and_get_condition(src)
|
|
301
|
+
if predicate is not None:
|
|
302
|
+
return predicate
|
|
303
|
+
|
|
261
304
|
edge = src, dst
|
|
262
305
|
edge_data = graph.get_edge_data(*edge)
|
|
263
306
|
edge_type = edge_data.get("type", "transition") if edge_data is not None else "transition"
|
|
@@ -222,6 +222,7 @@ class Decompiler(Analysis):
|
|
|
222
222
|
# determine a few arguments according to the structuring algorithm
|
|
223
223
|
fold_callexprs_into_conditions = False
|
|
224
224
|
self._force_loop_single_exit = True
|
|
225
|
+
self._refine_loops_with_single_successor = False
|
|
225
226
|
self._complete_successors = False
|
|
226
227
|
self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
|
|
227
228
|
if "structurer_cls" not in self._recursive_structurer_params:
|
|
@@ -229,6 +230,7 @@ class Decompiler(Analysis):
|
|
|
229
230
|
# is the algorithm based on Phoenix (a schema-based algorithm)?
|
|
230
231
|
if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
|
|
231
232
|
self._force_loop_single_exit = False
|
|
233
|
+
# self._refine_loops_with_single_successor = True
|
|
232
234
|
self._complete_successors = True
|
|
233
235
|
fold_callexprs_into_conditions = True
|
|
234
236
|
|
|
@@ -261,6 +263,7 @@ class Decompiler(Analysis):
|
|
|
261
263
|
desired_variables=self._desired_variables,
|
|
262
264
|
optimization_scratch=self._optimization_scratch,
|
|
263
265
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
266
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
264
267
|
complete_successors=self._complete_successors,
|
|
265
268
|
ail_graph=self._clinic_graph,
|
|
266
269
|
arg_vvars=self._clinic_arg_vvars,
|
|
@@ -396,6 +399,7 @@ class Decompiler(Analysis):
|
|
|
396
399
|
cond_proc=condition_processor,
|
|
397
400
|
update_graph=update_graph,
|
|
398
401
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
402
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
399
403
|
complete_successors=self._complete_successors,
|
|
400
404
|
entry_node_addr=self.clinic.entry_node_addr,
|
|
401
405
|
**self.options_to_params(self.options_by_class["region_identifier"]),
|
|
@@ -444,6 +448,7 @@ class Decompiler(Analysis):
|
|
|
444
448
|
entry_node_addr=self.clinic.entry_node_addr,
|
|
445
449
|
scratch=self._optimization_scratch,
|
|
446
450
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
451
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
447
452
|
complete_successors=self._complete_successors,
|
|
448
453
|
**kwargs,
|
|
449
454
|
)
|
|
@@ -507,6 +512,7 @@ class Decompiler(Analysis):
|
|
|
507
512
|
entry_node_addr=self.clinic.entry_node_addr,
|
|
508
513
|
scratch=self._optimization_scratch,
|
|
509
514
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
515
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
510
516
|
complete_successors=self._complete_successors,
|
|
511
517
|
peephole_optimizations=self._peephole_optimizations,
|
|
512
518
|
avoid_vvar_ids=self._copied_var_ids,
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from angr.ailment import Block
|
|
4
|
+
from .sequence_walker import SequenceWalker
|
|
5
|
+
from .structuring.structurer_nodes import BaseNode, SequenceNode, MultiNode
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NodeReplacer(SequenceWalker):
|
|
9
|
+
"""
|
|
10
|
+
Replaces nodes in a node with new nodes based on a mapping.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, root: BaseNode, replacements: dict) -> None:
|
|
14
|
+
super().__init__(update_seqnode_in_place=False)
|
|
15
|
+
|
|
16
|
+
self.root = root
|
|
17
|
+
self.replacements = replacements
|
|
18
|
+
self.result: BaseNode = self.walk(self.root) # type:ignore
|
|
19
|
+
|
|
20
|
+
def _handle(self, node: BaseNode, **kwargs):
|
|
21
|
+
return self.replacements[node] if node in self.replacements else super()._handle(node, **kwargs)
|
|
22
|
+
|
|
23
|
+
def _handle_MultiNode(self, node: MultiNode, **kwargs):
|
|
24
|
+
changed = False
|
|
25
|
+
nodes_copy = list(node.nodes)
|
|
26
|
+
|
|
27
|
+
i = len(nodes_copy) - 1
|
|
28
|
+
has_non_block = False
|
|
29
|
+
while i > -1:
|
|
30
|
+
node_ = nodes_copy[i]
|
|
31
|
+
new_node = self._handle(node_, parent=node, index=i)
|
|
32
|
+
if new_node is not None:
|
|
33
|
+
changed = True
|
|
34
|
+
nodes_copy[i] = new_node
|
|
35
|
+
if not isinstance(new_node, Block):
|
|
36
|
+
has_non_block = True
|
|
37
|
+
i -= 1
|
|
38
|
+
if not changed:
|
|
39
|
+
return None
|
|
40
|
+
if has_non_block:
|
|
41
|
+
return SequenceNode(node.addr, nodes=nodes_copy)
|
|
42
|
+
return MultiNode(nodes_copy, addr=node.addr, idx=node.idx)
|
|
@@ -135,6 +135,7 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
135
135
|
entry_node_addr=None,
|
|
136
136
|
scratch: dict[str, Any] | None = None,
|
|
137
137
|
force_loop_single_exit: bool = True,
|
|
138
|
+
refine_loops_with_single_successor: bool = False,
|
|
138
139
|
complete_successors: bool = False,
|
|
139
140
|
avoid_vvar_ids: set[int] | None = None,
|
|
140
141
|
arg_vvars: set[int] | None = None,
|
|
@@ -158,6 +159,7 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
158
159
|
entry_node_addr if entry_node_addr is not None else (func.addr, None)
|
|
159
160
|
)
|
|
160
161
|
self._force_loop_single_exit = force_loop_single_exit
|
|
162
|
+
self._refine_loops_with_single_successor = refine_loops_with_single_successor
|
|
161
163
|
self._complete_successors = complete_successors
|
|
162
164
|
self._avoid_vvar_ids = avoid_vvar_ids or set()
|
|
163
165
|
self._peephole_optimizations = peephole_optimizations
|
|
@@ -397,6 +399,7 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
397
399
|
cond_proc=condition_processor or ConditionProcessor(self.project.arch),
|
|
398
400
|
update_graph=update_graph,
|
|
399
401
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
402
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
400
403
|
complete_successors=self._complete_successors,
|
|
401
404
|
entry_node_addr=self.entry_node_addr,
|
|
402
405
|
)
|
|
@@ -6,7 +6,7 @@ from typing import Any
|
|
|
6
6
|
import networkx
|
|
7
7
|
|
|
8
8
|
from angr.ailment import Block
|
|
9
|
-
from angr.ailment.statement import ConditionalJump
|
|
9
|
+
from angr.ailment.statement import ConditionalJump
|
|
10
10
|
|
|
11
11
|
from .return_duplicator_base import ReturnDuplicatorBase
|
|
12
12
|
from .optimization_pass import StructuringOptimizationPass
|
|
@@ -53,7 +53,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
53
53
|
prevent_new_gotos: bool = True,
|
|
54
54
|
minimize_copies_for_regions: bool = True,
|
|
55
55
|
region_identifier=None,
|
|
56
|
-
vvar_id_start: int
|
|
56
|
+
vvar_id_start: int = 0,
|
|
57
57
|
scratch: dict[str, Any] | None = None,
|
|
58
58
|
max_func_blocks: int = 500,
|
|
59
59
|
**kwargs,
|
|
@@ -91,8 +91,9 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
91
91
|
self,
|
|
92
92
|
src: Block,
|
|
93
93
|
dst: Block,
|
|
94
|
-
graph: networkx.DiGraph = None,
|
|
95
94
|
max_level_check=1,
|
|
95
|
+
*,
|
|
96
|
+
graph: networkx.DiGraph,
|
|
96
97
|
):
|
|
97
98
|
"""
|
|
98
99
|
TODO: Implement a more principled way of checking if an edge is a goto edge with Phoenix's structuring info
|
|
@@ -100,6 +101,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
100
101
|
above a goto edge as the goto src.
|
|
101
102
|
"""
|
|
102
103
|
# Do a simple and fast check first
|
|
104
|
+
assert self._goto_manager is not None
|
|
103
105
|
is_simple_goto = self._goto_manager.is_goto_edge(src, dst)
|
|
104
106
|
if is_simple_goto:
|
|
105
107
|
return True
|
|
@@ -155,79 +157,6 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
155
157
|
# keep testing the next edge
|
|
156
158
|
node = succ
|
|
157
159
|
|
|
158
|
-
# Special case 3: In Phoenix, regions full of only if-stmts can be collapsed and moved. This causes
|
|
159
|
-
# the goto manager to report gotos that are at the top of the region instead of ones in the middle of it.
|
|
160
|
-
# Because of this, we need to gather all the nodes above the original src and check if any of them
|
|
161
|
-
# go to the destination. Additionally, we need to do this on the supergraph to get rid of
|
|
162
|
-
# goto edges that are removed by Phoenix.
|
|
163
|
-
# This case is observed in the test case `TestDecompiler.test_tail_tail_bytes_ret_dup`.
|
|
164
|
-
if self._supergraph is None:
|
|
165
|
-
return False
|
|
166
|
-
|
|
167
|
-
super_to_og_nodes = {n: self._supergraph.nodes[n]["original_nodes"] for n in self._supergraph.nodes}
|
|
168
|
-
og_to_super_nodes = {og: super_n for super_n, ogs in super_to_og_nodes.items() for og in ogs}
|
|
169
|
-
super_src = og_to_super_nodes.get(src)
|
|
170
|
-
super_dst = og_to_super_nodes.get(dst)
|
|
171
|
-
if super_src is None or super_dst is None:
|
|
172
|
-
return False
|
|
173
|
-
|
|
174
|
-
# collect all nodes which have only an if-stmt in them that are ancestors of super_src
|
|
175
|
-
check_blks = {super_src}
|
|
176
|
-
level_blocks = {super_src}
|
|
177
|
-
for _ in range(10):
|
|
178
|
-
done = False
|
|
179
|
-
if_blks = set()
|
|
180
|
-
for lblock in level_blocks:
|
|
181
|
-
preds = list(self._supergraph.predecessors(lblock))
|
|
182
|
-
for pred in preds:
|
|
183
|
-
only_cond_jump = all(isinstance(s, (ConditionalJump, Label)) for s in pred.statements)
|
|
184
|
-
if only_cond_jump:
|
|
185
|
-
if_blks.add(pred)
|
|
186
|
-
|
|
187
|
-
done = len(if_blks) == 0
|
|
188
|
-
|
|
189
|
-
if done:
|
|
190
|
-
break
|
|
191
|
-
|
|
192
|
-
check_blks |= if_blks
|
|
193
|
-
level_blocks = if_blks
|
|
194
|
-
|
|
195
|
-
# convert all the found if-only super-blocks back into their original blocks
|
|
196
|
-
og_check_blocks = set()
|
|
197
|
-
for blk in check_blks:
|
|
198
|
-
og_check_blocks |= set(super_to_og_nodes[blk])
|
|
199
|
-
|
|
200
|
-
# check if any of the original blocks are gotos to the destination
|
|
201
|
-
goto_hits = 0
|
|
202
|
-
for block in og_check_blocks:
|
|
203
|
-
if self._goto_manager.is_goto_edge(block, dst):
|
|
204
|
-
goto_hits += 1
|
|
205
|
-
|
|
206
|
-
# Although it is good to find a goto in the if-only block region, having more than a single goto
|
|
207
|
-
# existing that goes to the same dst is a bad sign. This can be seen in the the following test:
|
|
208
|
-
# TestDecompiler.test_dd_iread_ret_dup_region
|
|
209
|
-
#
|
|
210
|
-
# It occurs when you have something like:
|
|
211
|
-
# ```
|
|
212
|
-
# if (a || c)
|
|
213
|
-
# goto target;
|
|
214
|
-
# target:
|
|
215
|
-
# return 0;
|
|
216
|
-
# ```
|
|
217
|
-
#
|
|
218
|
-
#
|
|
219
|
-
# This looks like an edge from (a, target) and (c, target) but it is actually a single edge.
|
|
220
|
-
# If you allow both to duplicate you get the following:
|
|
221
|
-
# ```
|
|
222
|
-
# if (a):
|
|
223
|
-
# return
|
|
224
|
-
# if (c):
|
|
225
|
-
# return
|
|
226
|
-
# ```
|
|
227
|
-
# This is not the desired behavior.
|
|
228
|
-
# So we need to check if there is only a single goto that goes to the destination.
|
|
229
|
-
return goto_hits == 1
|
|
230
|
-
|
|
231
160
|
return False
|
|
232
161
|
|
|
233
162
|
def _analyze(self, cache=None):
|
|
@@ -43,6 +43,7 @@ class RegionIdentifier(Analysis):
|
|
|
43
43
|
update_graph=True,
|
|
44
44
|
largest_successor_tree_outside_loop=True,
|
|
45
45
|
force_loop_single_exit=True,
|
|
46
|
+
refine_loops_with_single_successor=False,
|
|
46
47
|
complete_successors=False,
|
|
47
48
|
entry_node_addr: tuple[int, int | None] | None = None,
|
|
48
49
|
):
|
|
@@ -70,6 +71,7 @@ class RegionIdentifier(Analysis):
|
|
|
70
71
|
self.regions_by_block_addrs = []
|
|
71
72
|
self._largest_successor_tree_outside_loop = largest_successor_tree_outside_loop
|
|
72
73
|
self._force_loop_single_exit = force_loop_single_exit
|
|
74
|
+
self._refine_loops_with_single_successor = refine_loops_with_single_successor
|
|
73
75
|
self._complete_successors = complete_successors
|
|
74
76
|
# we keep a dictionary of node and their traversal order in a quasi-topological traversal and update this
|
|
75
77
|
# dictionary as we update the graph
|
|
@@ -265,13 +267,18 @@ class RegionIdentifier(Analysis):
|
|
|
265
267
|
|
|
266
268
|
# special case: any node with more than two non-self successors are probably the head of a switch-case. we
|
|
267
269
|
# should include all successors into the loop subgraph.
|
|
270
|
+
# we must be extra careful here to not include nodes that are reachable from outside the loop subgraph. an
|
|
271
|
+
# example is in binary 064e1d62c8542d658d83f7e231cc3b935a1f18153b8aea809dcccfd446a91c93, loop 0x40d7b0 should
|
|
272
|
+
# not include block 0x40d9d5 because this node has a out-of-loop-body predecessor (block 0x40d795).
|
|
268
273
|
while True:
|
|
269
274
|
updated = False
|
|
270
275
|
for node in list(loop_subgraph):
|
|
271
276
|
nonself_successors = [succ for succ in graph.successors(node) if succ is not node]
|
|
272
277
|
if len(nonself_successors) > 2:
|
|
273
278
|
for succ in nonself_successors:
|
|
274
|
-
if not loop_subgraph.has_edge(node, succ)
|
|
279
|
+
if not loop_subgraph.has_edge(node, succ) and all(
|
|
280
|
+
pred in loop_subgraph for pred in graph.predecessors(succ)
|
|
281
|
+
):
|
|
275
282
|
updated = True
|
|
276
283
|
loop_subgraph.add_edge(node, succ)
|
|
277
284
|
if not updated:
|
|
@@ -280,7 +287,9 @@ class RegionIdentifier(Analysis):
|
|
|
280
287
|
return set(loop_subgraph)
|
|
281
288
|
|
|
282
289
|
def _refine_loop(self, graph: networkx.DiGraph, head, initial_loop_nodes, initial_exit_nodes):
|
|
283
|
-
if len(initial_exit_nodes)
|
|
290
|
+
if (self._refine_loops_with_single_successor and len(initial_exit_nodes) == 0) or (
|
|
291
|
+
not self._refine_loops_with_single_successor and len(initial_exit_nodes) <= 1
|
|
292
|
+
):
|
|
284
293
|
return initial_loop_nodes, initial_exit_nodes
|
|
285
294
|
|
|
286
295
|
refined_loop_nodes = initial_loop_nodes.copy()
|
|
@@ -713,7 +722,7 @@ class RegionIdentifier(Analysis):
|
|
|
713
722
|
|
|
714
723
|
# visit the nodes in post-order
|
|
715
724
|
region_created = False
|
|
716
|
-
for node in list(
|
|
725
|
+
for node in list(GraphUtils.dfs_postorder_nodes_deterministic(graph_copy, head)):
|
|
717
726
|
if node is dummy_endnode:
|
|
718
727
|
# skip the dummy endnode
|
|
719
728
|
continue
|
|
@@ -110,24 +110,28 @@ class SequenceWalker:
|
|
|
110
110
|
|
|
111
111
|
def _handle_MultiNode(self, node, **kwargs):
|
|
112
112
|
changed = False
|
|
113
|
-
|
|
113
|
+
nodes = node.nodes if self._update_seqnode_in_place else list(node.nodes)
|
|
114
114
|
|
|
115
115
|
if self._force_forward_scan:
|
|
116
|
-
for i, node_ in enumerate(
|
|
116
|
+
for i, node_ in enumerate(nodes):
|
|
117
117
|
new_node = self._handle(node_, parent=node, index=i)
|
|
118
118
|
if new_node is not None:
|
|
119
119
|
changed = True
|
|
120
|
-
|
|
120
|
+
nodes[i] = new_node
|
|
121
121
|
else:
|
|
122
|
-
i = len(
|
|
122
|
+
i = len(nodes) - 1
|
|
123
123
|
while i > -1:
|
|
124
|
-
node_ =
|
|
124
|
+
node_ = nodes[i]
|
|
125
125
|
new_node = self._handle(node_, parent=node, index=i)
|
|
126
126
|
if new_node is not None:
|
|
127
127
|
changed = True
|
|
128
|
-
|
|
128
|
+
nodes[i] = new_node
|
|
129
129
|
i -= 1
|
|
130
|
-
|
|
130
|
+
if not changed:
|
|
131
|
+
return None
|
|
132
|
+
if self._update_seqnode_in_place:
|
|
133
|
+
return node
|
|
134
|
+
return MultiNode(nodes, addr=node.addr, idx=node.idx)
|
|
131
135
|
|
|
132
136
|
def _handle_SwitchCase(self, node, **kwargs):
|
|
133
137
|
self._handle(node.switch_expr, parent=node, label="switch_expr")
|