angr 9.2.112__py3-none-win_amd64.whl → 9.2.114__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_base.py +3 -0
- angr/analyses/decompiler/condition_processor.py +9 -2
- angr/analyses/decompiler/optimization_passes/__init__.py +3 -1
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +364 -0
- angr/analyses/decompiler/optimization_passes/deadblock_remover.py +1 -1
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +99 -12
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +79 -9
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +21 -0
- angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +111 -9
- angr/analyses/decompiler/redundant_label_remover.py +17 -0
- angr/analyses/decompiler/region_simplifiers/switch_cluster_simplifier.py +5 -0
- angr/analyses/decompiler/seq_cf_structure_counter.py +37 -0
- angr/analyses/decompiler/structured_codegen/c.py +4 -5
- angr/analyses/decompiler/structuring/phoenix.py +86 -6
- angr/analyses/decompiler/utils.py +6 -1
- angr/analyses/reaching_definitions/rd_state.py +2 -0
- angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
- angr/angrdb/serializers/loader.py +91 -7
- angr/calling_conventions.py +21 -13
- angr/knowledge_plugins/key_definitions/live_definitions.py +5 -0
- angr/knowledge_plugins/propagations/states.py +3 -2
- angr/lib/angr_native.dll +0 -0
- angr/procedures/stubs/ReturnUnconstrained.py +1 -2
- angr/procedures/stubs/syscall_stub.py +1 -2
- angr/sim_type.py +354 -136
- angr/state_plugins/debug_variables.py +2 -2
- angr/storage/memory_mixins/multi_value_merger_mixin.py +12 -2
- {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/METADATA +26 -26
- {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/RECORD +34 -32
- {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/WHEEL +1 -1
- {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/LICENSE +0 -0
- {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/entry_points.txt +0 -0
- {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# pylint:disable=unused-argument
|
|
2
|
+
import logging
|
|
2
3
|
from typing import TYPE_CHECKING
|
|
3
4
|
from collections.abc import Generator
|
|
4
5
|
from enum import Enum
|
|
@@ -11,10 +12,13 @@ from angr.analyses.decompiler.condition_processor import ConditionProcessor
|
|
|
11
12
|
from angr.analyses.decompiler.goto_manager import GotoManager
|
|
12
13
|
from angr.analyses.decompiler.structuring import RecursiveStructurer, PhoenixStructurer
|
|
13
14
|
from angr.analyses.decompiler.utils import add_labels
|
|
15
|
+
from angr.analyses.decompiler.seq_cf_structure_counter import ControlFlowStructureCounter
|
|
14
16
|
|
|
15
17
|
if TYPE_CHECKING:
|
|
16
18
|
from angr.knowledge_plugins.functions import Function
|
|
17
19
|
|
|
20
|
+
_l = logging.getLogger(__name__)
|
|
21
|
+
|
|
18
22
|
|
|
19
23
|
class MultipleBlocksException(Exception):
|
|
20
24
|
"""
|
|
@@ -274,6 +278,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
274
278
|
prevent_new_gotos=True,
|
|
275
279
|
strictly_less_gotos=False,
|
|
276
280
|
recover_structure_fails=True,
|
|
281
|
+
must_improve_rel_quality=True,
|
|
277
282
|
max_opt_iters=1,
|
|
278
283
|
simplify_ail=True,
|
|
279
284
|
require_gotos=True,
|
|
@@ -286,10 +291,15 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
286
291
|
self._max_opt_iters = max_opt_iters
|
|
287
292
|
self._simplify_ail = simplify_ail
|
|
288
293
|
self._require_gotos = require_gotos
|
|
294
|
+
self._must_improve_rel_quality = must_improve_rel_quality
|
|
289
295
|
|
|
290
296
|
self._goto_manager: GotoManager | None = None
|
|
291
297
|
self._prev_graph: networkx.DiGraph | None = None
|
|
292
298
|
|
|
299
|
+
# relative quality metrics (excludes gotos)
|
|
300
|
+
self._initial_structure_counter = None
|
|
301
|
+
self._current_structure_counter = None
|
|
302
|
+
|
|
293
303
|
def _analyze(self, cache=None) -> bool:
|
|
294
304
|
raise NotImplementedError()
|
|
295
305
|
|
|
@@ -297,7 +307,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
297
307
|
"""
|
|
298
308
|
Wrapper for _analyze() that verifies the graph is structurable before and after the optimization.
|
|
299
309
|
"""
|
|
300
|
-
if not self._graph_is_structurable(self._graph):
|
|
310
|
+
if not self._graph_is_structurable(self._graph, initial=True):
|
|
301
311
|
return
|
|
302
312
|
|
|
303
313
|
initial_gotos = self._goto_manager.gotos.copy()
|
|
@@ -340,6 +350,10 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
340
350
|
self.out_graph = None
|
|
341
351
|
return
|
|
342
352
|
|
|
353
|
+
if self._must_improve_rel_quality and not self._improves_relative_quality():
|
|
354
|
+
self.out_graph = None
|
|
355
|
+
return
|
|
356
|
+
|
|
343
357
|
def _fixed_point_analyze(self, cache=None):
|
|
344
358
|
for _ in range(self._max_opt_iters):
|
|
345
359
|
if self._require_gotos and not self._goto_manager.gotos:
|
|
@@ -359,7 +373,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
359
373
|
self.out_graph = self._prev_graph if self._recover_structure_fails else None
|
|
360
374
|
break
|
|
361
375
|
|
|
362
|
-
def _graph_is_structurable(self, graph, readd_labels=False) -> bool:
|
|
376
|
+
def _graph_is_structurable(self, graph, readd_labels=False, initial=False) -> bool:
|
|
363
377
|
"""
|
|
364
378
|
Checks weather the input graph is structurable under the Phoenix schema-matching structuring algorithm.
|
|
365
379
|
As a side effect, this will also update the region identifier and goto manager of this optimization pass.
|
|
@@ -380,18 +394,74 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
380
394
|
if self._ri is None:
|
|
381
395
|
return False
|
|
382
396
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
397
|
+
# we should try-catch structuring here because we can often pass completely invalid graphs
|
|
398
|
+
# that break the assumptions of the structuring algorithm
|
|
399
|
+
try:
|
|
400
|
+
rs = self.project.analyses[RecursiveStructurer].prep(kb=self.kb)(
|
|
401
|
+
self._ri.region,
|
|
402
|
+
cond_proc=self._ri.cond_proc,
|
|
403
|
+
func=self._func,
|
|
404
|
+
structurer_cls=PhoenixStructurer,
|
|
405
|
+
)
|
|
406
|
+
# pylint:disable=broad-except
|
|
407
|
+
except Exception:
|
|
408
|
+
_l.warning("Internal structuring failed for OptimizationPass on %s", self._func.name)
|
|
409
|
+
rs = None
|
|
410
|
+
|
|
389
411
|
if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
|
|
390
412
|
return False
|
|
391
413
|
|
|
392
414
|
rs = self.project.analyses.RegionSimplifier(self._func, rs.result, kb=self.kb, variable_kb=self._variable_kb)
|
|
393
|
-
if not rs or rs.goto_manager is None:
|
|
415
|
+
if not rs or rs.goto_manager is None or rs.result is None:
|
|
394
416
|
return False
|
|
395
417
|
|
|
418
|
+
self._analyze_simplified_region(rs.result, initial=initial)
|
|
396
419
|
self._goto_manager = rs.goto_manager
|
|
397
420
|
return True
|
|
421
|
+
|
|
422
|
+
# pylint:disable=no-self-use
|
|
423
|
+
def _analyze_simplified_region(self, region, initial=False):
|
|
424
|
+
"""
|
|
425
|
+
Analyze the simplified regions after a successful structuring pass.
|
|
426
|
+
This should be overridden by the subclass if it needs to do anything with the simplified regions for making
|
|
427
|
+
optimizations decisions.
|
|
428
|
+
"""
|
|
429
|
+
if region is None:
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
# record quality metrics
|
|
433
|
+
if self._must_improve_rel_quality:
|
|
434
|
+
if initial:
|
|
435
|
+
self._initial_structure_counter = ControlFlowStructureCounter(region)
|
|
436
|
+
else:
|
|
437
|
+
self._current_structure_counter = ControlFlowStructureCounter(region)
|
|
438
|
+
|
|
439
|
+
def _improves_relative_quality(self) -> bool:
|
|
440
|
+
"""
|
|
441
|
+
Checks if the new structured output improves (or maintains) the relative quality of the control flow structures
|
|
442
|
+
present in the function.
|
|
443
|
+
|
|
444
|
+
For now, this only involves loops
|
|
445
|
+
"""
|
|
446
|
+
if self._initial_structure_counter is None or self._current_structure_counter is None:
|
|
447
|
+
_l.warning("Relative quality check failed due to missing structure counters")
|
|
448
|
+
return True
|
|
449
|
+
|
|
450
|
+
prev_wloops = self._initial_structure_counter.while_loops
|
|
451
|
+
curr_wloops = self._current_structure_counter.while_loops
|
|
452
|
+
prev_dloops = self._initial_structure_counter.do_while_loops
|
|
453
|
+
curr_dloops = self._current_structure_counter.do_while_loops
|
|
454
|
+
prev_floops = self._initial_structure_counter.for_loops
|
|
455
|
+
curr_floops = self._current_structure_counter.for_loops
|
|
456
|
+
total_prev_loops = prev_wloops + prev_dloops + prev_floops
|
|
457
|
+
total_curr_loops = curr_wloops + curr_dloops + curr_floops
|
|
458
|
+
|
|
459
|
+
# Sometimes, if we mess up structuring you can easily tell because we traded "good" loops for "bad" loops.
|
|
460
|
+
# Generally, loops are ordered good -> bad as follows: for, while, do-while.
|
|
461
|
+
# Note: this check is only for _trading_, meaning the total number of loops must be the same.
|
|
462
|
+
#
|
|
463
|
+
# 1. We traded to remove a for-loop
|
|
464
|
+
if curr_floops < prev_floops and total_curr_loops == total_prev_loops:
|
|
465
|
+
return False
|
|
466
|
+
|
|
467
|
+
return True
|
|
@@ -38,6 +38,7 @@ class ReturnDuplicatorBase:
|
|
|
38
38
|
self.node_idx = count(start=node_idx_start)
|
|
39
39
|
self._max_calls_in_region = max_calls_in_regions
|
|
40
40
|
self._minimize_copies_for_regions = minimize_copies_for_regions
|
|
41
|
+
self._supergraph = None
|
|
41
42
|
|
|
42
43
|
# this should also be set by the optimization passes initer
|
|
43
44
|
self._func = func
|
|
@@ -71,6 +72,8 @@ class ReturnDuplicatorBase:
|
|
|
71
72
|
# for connected in_edges that form a region
|
|
72
73
|
endnode_regions = self._copy_connected_edge_components(endnode_regions, graph)
|
|
73
74
|
|
|
75
|
+
# refresh the supergraph
|
|
76
|
+
self._supergraph = to_ail_supergraph(graph)
|
|
74
77
|
for region_head, (in_edges, region) in endnode_regions.items():
|
|
75
78
|
is_single_const_ret_region = self._is_simple_return_graph(region)
|
|
76
79
|
for in_edge in in_edges:
|
|
@@ -150,6 +153,7 @@ class ReturnDuplicatorBase:
|
|
|
150
153
|
else:
|
|
151
154
|
node_copy = copy.deepcopy(node)
|
|
152
155
|
node_copy.idx = next(self.node_idx)
|
|
156
|
+
self._fix_copied_node_labels(node_copy)
|
|
153
157
|
copies[node] = node_copy
|
|
154
158
|
|
|
155
159
|
# modify Jump.target_idx and ConditionalJump.{true,false}_target_idx accordingly
|
|
@@ -446,3 +450,20 @@ class ReturnDuplicatorBase:
|
|
|
446
450
|
all_region_block_sets = {}
|
|
447
451
|
_unpack_every_region(top_region, all_region_block_sets)
|
|
448
452
|
return all_region_block_sets
|
|
453
|
+
|
|
454
|
+
@staticmethod
|
|
455
|
+
def _fix_copied_node_labels(block: Block):
|
|
456
|
+
for i in range(len(block.statements)): # pylint:disable=consider-using-enumerate
|
|
457
|
+
stmt = block.statements[i]
|
|
458
|
+
if isinstance(stmt, Label):
|
|
459
|
+
# fix the default name by suffixing it with the new block ID
|
|
460
|
+
new_name = stmt.name if stmt.name else f"Label_{stmt.ins_addr:x}"
|
|
461
|
+
if stmt.block_idx is not None:
|
|
462
|
+
suffix = f"__{stmt.block_idx}"
|
|
463
|
+
if new_name.endswith(suffix):
|
|
464
|
+
new_name = new_name[: -len(suffix)]
|
|
465
|
+
else:
|
|
466
|
+
new_name = stmt.name
|
|
467
|
+
new_name += f"__{block.idx}"
|
|
468
|
+
|
|
469
|
+
block.statements[i] = Label(stmt.idx, new_name, stmt.ins_addr, block_idx=block.idx, **stmt.tags)
|
|
@@ -4,7 +4,7 @@ import inspect
|
|
|
4
4
|
import networkx
|
|
5
5
|
|
|
6
6
|
from ailment import Block
|
|
7
|
-
from ailment.statement import ConditionalJump
|
|
7
|
+
from ailment.statement import ConditionalJump, Label
|
|
8
8
|
|
|
9
9
|
from .return_duplicator_base import ReturnDuplicatorBase
|
|
10
10
|
from .optimization_pass import StructuringOptimizationPass
|
|
@@ -71,23 +71,29 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
71
71
|
return ReturnDuplicatorBase._check(self)
|
|
72
72
|
|
|
73
73
|
def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
|
|
74
|
-
return self._is_goto_edge(src, dst, graph=graph
|
|
74
|
+
return self._is_goto_edge(src, dst, graph=graph)
|
|
75
75
|
|
|
76
76
|
def _is_goto_edge(
|
|
77
77
|
self,
|
|
78
78
|
src: Block,
|
|
79
79
|
dst: Block,
|
|
80
80
|
graph: networkx.DiGraph = None,
|
|
81
|
-
check_for_ifstmts=True,
|
|
82
81
|
max_level_check=1,
|
|
83
82
|
):
|
|
84
83
|
"""
|
|
85
|
-
TODO:
|
|
84
|
+
TODO: Implement a more principled way of checking if an edge is a goto edge with Phoenix's structuring info
|
|
86
85
|
This function only exists because a long-standing bug that sometimes reports the if-stmt addr
|
|
87
|
-
above a goto edge as the goto src.
|
|
88
|
-
see if they are a goto. This needs to include Jump to deal with loops.
|
|
86
|
+
above a goto edge as the goto src.
|
|
89
87
|
"""
|
|
90
|
-
|
|
88
|
+
# Do a simple and fast check first
|
|
89
|
+
is_simple_goto = self._goto_manager.is_goto_edge(src, dst)
|
|
90
|
+
if is_simple_goto:
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
if graph is not None:
|
|
94
|
+
# Special case 1:
|
|
95
|
+
# We need to check for predecessors above the goto and see if they are a goto.
|
|
96
|
+
# This needs to include Jump to deal with loops.
|
|
91
97
|
blocks = [src]
|
|
92
98
|
level_blocks = [src]
|
|
93
99
|
for _ in range(max_level_check):
|
|
@@ -109,8 +115,104 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
109
115
|
|
|
110
116
|
if self._goto_manager.is_goto_edge(block, dst):
|
|
111
117
|
return True
|
|
112
|
-
|
|
113
|
-
|
|
118
|
+
|
|
119
|
+
# Special case 2: A "goto edge" that ReturnDuplicator wants to test might be an edge that Phoenix
|
|
120
|
+
# includes in its loop region (during the cyclic refinement). In fact, Phoenix tends to include as many
|
|
121
|
+
# nodes as possible into the loop region, and generate a goto edge (which ends up in the structured code)
|
|
122
|
+
# from `dst` to the loop successor.
|
|
123
|
+
# an example of this is captured by the test case `TestDecompiler.test_stty_recover_mode_ret_dup_region`.
|
|
124
|
+
# until someone (ideally @mahaloz) implements a more principled way of translating "goto statements" that
|
|
125
|
+
# Phoenix generates and "goto edges" that ReturnDuplicator tests, we rely on the following stopgap to
|
|
126
|
+
# handle this case.
|
|
127
|
+
node = dst
|
|
128
|
+
while True:
|
|
129
|
+
succs = list(graph.successors(node))
|
|
130
|
+
if len(succs) != 1:
|
|
131
|
+
break
|
|
132
|
+
succ = succs[0]
|
|
133
|
+
if succ is node:
|
|
134
|
+
# loop!
|
|
135
|
+
break
|
|
136
|
+
succ_preds = list(graph.predecessors(succ))
|
|
137
|
+
if len(succ_preds) != 1:
|
|
138
|
+
break
|
|
139
|
+
if self._goto_manager.is_goto_edge(node, succ):
|
|
140
|
+
return True
|
|
141
|
+
# keep testing the next edge
|
|
142
|
+
node = succ
|
|
143
|
+
|
|
144
|
+
# Special case 3: In Phoenix, regions full of only if-stmts can be collapsed and moved. This causes
|
|
145
|
+
# the goto manager to report gotos that are at the top of the region instead of ones in the middle of it.
|
|
146
|
+
# Because of this, we need to gather all the nodes above the original src and check if any of them
|
|
147
|
+
# go to the destination. Additionally, we need to do this on the supergraph to get rid of
|
|
148
|
+
# goto edges that are removed by Phoenix.
|
|
149
|
+
# This case is observed in the test case `TestDecompiler.test_tail_tail_bytes_ret_dup`.
|
|
150
|
+
if self._supergraph is None:
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
super_to_og_nodes = {n: self._supergraph.nodes[n]["original_nodes"] for n in self._supergraph.nodes}
|
|
154
|
+
og_to_super_nodes = {og: super_n for super_n, ogs in super_to_og_nodes.items() for og in ogs}
|
|
155
|
+
super_src = og_to_super_nodes.get(src, None)
|
|
156
|
+
super_dst = og_to_super_nodes.get(dst, None)
|
|
157
|
+
if super_src is None or super_dst is None:
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
# collect all nodes which have only an if-stmt in them that are ancestors of super_src
|
|
161
|
+
check_blks = {super_src}
|
|
162
|
+
level_blocks = {super_src}
|
|
163
|
+
for _ in range(10):
|
|
164
|
+
done = False
|
|
165
|
+
if_blks = set()
|
|
166
|
+
for lblock in level_blocks:
|
|
167
|
+
preds = list(self._supergraph.predecessors(lblock))
|
|
168
|
+
for pred in preds:
|
|
169
|
+
only_cond_jump = all(isinstance(s, (ConditionalJump, Label)) for s in pred.statements)
|
|
170
|
+
if only_cond_jump:
|
|
171
|
+
if_blks.add(pred)
|
|
172
|
+
|
|
173
|
+
done = len(if_blks) == 0
|
|
174
|
+
|
|
175
|
+
if done:
|
|
176
|
+
break
|
|
177
|
+
|
|
178
|
+
check_blks |= if_blks
|
|
179
|
+
level_blocks = if_blks
|
|
180
|
+
|
|
181
|
+
# convert all the found if-only super-blocks back into their original blocks
|
|
182
|
+
og_check_blocks = set()
|
|
183
|
+
for blk in check_blks:
|
|
184
|
+
og_check_blocks |= set(super_to_og_nodes[blk])
|
|
185
|
+
|
|
186
|
+
# check if any of the original blocks are gotos to the destination
|
|
187
|
+
goto_hits = 0
|
|
188
|
+
for block in og_check_blocks:
|
|
189
|
+
if self._goto_manager.is_goto_edge(block, dst):
|
|
190
|
+
goto_hits += 1
|
|
191
|
+
|
|
192
|
+
# Although it is good to find a goto in the if-only block region, having more than a single goto
|
|
193
|
+
# existing that goes to the same dst is a bad sign. This can be seen in the the following test:
|
|
194
|
+
# TestDecompiler.test_dd_iread_ret_dup_region
|
|
195
|
+
#
|
|
196
|
+
# It occurs when you have something like:
|
|
197
|
+
# ```
|
|
198
|
+
# if (a || c)
|
|
199
|
+
# goto target;
|
|
200
|
+
# target:
|
|
201
|
+
# return 0;
|
|
202
|
+
# ```
|
|
203
|
+
#
|
|
204
|
+
#
|
|
205
|
+
# This looks like an edge from (a, target) and (c, target) but it is actually a single edge.
|
|
206
|
+
# If you allow both to duplicate you get the following:
|
|
207
|
+
# ```
|
|
208
|
+
# if (a):
|
|
209
|
+
# return
|
|
210
|
+
# if (c):
|
|
211
|
+
# return
|
|
212
|
+
# ```
|
|
213
|
+
# This is not the desired behavior.
|
|
214
|
+
# So we need to check if there is only a single goto that goes to the destination.
|
|
215
|
+
return goto_hits == 1
|
|
114
216
|
|
|
115
217
|
return False
|
|
116
218
|
|
|
@@ -30,6 +30,9 @@ class RedundantLabelRemover:
|
|
|
30
30
|
self._walker0 = SequenceWalker(handlers=handlers0)
|
|
31
31
|
self._walker0.walk(self.root)
|
|
32
32
|
|
|
33
|
+
# update jump targets
|
|
34
|
+
self._update_jump_targets()
|
|
35
|
+
|
|
33
36
|
handlers1 = {
|
|
34
37
|
ailment.Block: self._handle_Block,
|
|
35
38
|
}
|
|
@@ -37,6 +40,20 @@ class RedundantLabelRemover:
|
|
|
37
40
|
self._walker1.walk(self.root)
|
|
38
41
|
self.result = self.root
|
|
39
42
|
|
|
43
|
+
def _update_jump_targets(self) -> None:
|
|
44
|
+
"""
|
|
45
|
+
Update self._jump_targets after the first pass fills in self._new_jump_target.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
if self._new_jump_target:
|
|
49
|
+
jump_targets = set()
|
|
50
|
+
for jt in self._jump_targets:
|
|
51
|
+
if jt in self._new_jump_target:
|
|
52
|
+
jump_targets.add(self._new_jump_target[jt])
|
|
53
|
+
else:
|
|
54
|
+
jump_targets.add(jt)
|
|
55
|
+
self._jump_targets = jump_targets
|
|
56
|
+
|
|
40
57
|
#
|
|
41
58
|
# Handlers
|
|
42
59
|
#
|
|
@@ -4,6 +4,8 @@ from typing import DefaultDict, Any
|
|
|
4
4
|
from collections import OrderedDict, defaultdict
|
|
5
5
|
|
|
6
6
|
import ailment
|
|
7
|
+
from ailment import UnaryOp
|
|
8
|
+
from ailment.expression import negate
|
|
7
9
|
|
|
8
10
|
from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
|
|
9
11
|
from ..structuring.structurer_nodes import SwitchCaseNode, ConditionNode, SequenceNode, MultiNode, BaseNode, BreakNode
|
|
@@ -520,6 +522,9 @@ def simplify_lowered_switches_core(
|
|
|
520
522
|
|
|
521
523
|
if outermost_node is None:
|
|
522
524
|
return False
|
|
525
|
+
if isinstance(outermost_node.condition, UnaryOp) and outermost_node.condition.op == "Not":
|
|
526
|
+
# attempt to flip any simple negated comparison for normalized operations
|
|
527
|
+
outermost_node.condition = negate(outermost_node.condition.operand)
|
|
523
528
|
|
|
524
529
|
caseno_to_node = {}
|
|
525
530
|
default_node_candidates: list[tuple[BaseNode, BaseNode]] = [] # parent to default node candidate
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from angr.analyses.decompiler.sequence_walker import SequenceWalker
|
|
2
|
+
from angr.analyses.decompiler.structuring.structurer_nodes import SwitchCaseNode, LoopNode
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ControlFlowStructureCounter(SequenceWalker):
|
|
6
|
+
"""
|
|
7
|
+
Counts the number of different types of control flow structures found in a sequence of nodes.
|
|
8
|
+
This should be used after the sequence has been simplified.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, node):
|
|
12
|
+
handlers = {
|
|
13
|
+
LoopNode: self._handle_Loop,
|
|
14
|
+
}
|
|
15
|
+
super().__init__(handlers)
|
|
16
|
+
|
|
17
|
+
self.while_loops = 0
|
|
18
|
+
self.do_while_loops = 0
|
|
19
|
+
self.for_loops = 0
|
|
20
|
+
|
|
21
|
+
self.walk(node)
|
|
22
|
+
|
|
23
|
+
def _handle_Loop(self, node: LoopNode, **kwargs):
|
|
24
|
+
if node.sort == "while":
|
|
25
|
+
self.while_loops += 1
|
|
26
|
+
elif node.sort == "do-while":
|
|
27
|
+
self.do_while_loops += 1
|
|
28
|
+
elif node.sort == "for":
|
|
29
|
+
self.for_loops += 1
|
|
30
|
+
|
|
31
|
+
return super()._handle_Loop(node, **kwargs)
|
|
32
|
+
|
|
33
|
+
def _handle_Condition(self, node, parent=None, **kwargs):
|
|
34
|
+
return super()._handle_Condition(node, parent=parent, **kwargs)
|
|
35
|
+
|
|
36
|
+
def _handle_SwitchCase(self, node: SwitchCaseNode, parent=None, **kwargs):
|
|
37
|
+
return super()._handle_SwitchCase(node, parent=parent, **kwargs)
|
|
@@ -2769,9 +2769,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2769
2769
|
if offset == 0:
|
|
2770
2770
|
data_type = renegotiate_type(data_type, base_type)
|
|
2771
2771
|
if base_type == data_type or (
|
|
2772
|
-
not
|
|
2773
|
-
and not isinstance(data_type, SimTypeBottom)
|
|
2774
|
-
and base_type.size < data_type.size
|
|
2772
|
+
base_type.size is not None and data_type.size is not None and base_type.size < data_type.size
|
|
2775
2773
|
):
|
|
2776
2774
|
# case 1: we're done because we found it
|
|
2777
2775
|
# case 2: we're done because we can never find it and we might as well stop early
|
|
@@ -2784,7 +2782,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2784
2782
|
return _force_type_cast(base_type, data_type, expr)
|
|
2785
2783
|
return CUnaryOp("Dereference", expr, codegen=self)
|
|
2786
2784
|
|
|
2787
|
-
if
|
|
2785
|
+
if base_type.size is None:
|
|
2788
2786
|
stride = 1
|
|
2789
2787
|
else:
|
|
2790
2788
|
stride = base_type.size // self.project.arch.byte_width or 1
|
|
@@ -2968,7 +2966,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2968
2966
|
kernel_type = unpack_typeref(unpack_pointer(kernel.type))
|
|
2969
2967
|
assert kernel_type
|
|
2970
2968
|
|
|
2971
|
-
if
|
|
2969
|
+
if kernel_type.size is None:
|
|
2972
2970
|
return bail_out()
|
|
2973
2971
|
kernel_stride = kernel_type.size // self.project.arch.byte_width
|
|
2974
2972
|
|
|
@@ -3699,6 +3697,7 @@ class MakeTypecastsImplicit(CStructuredCodeWalker):
|
|
|
3699
3697
|
and isinstance(intermediate_ty, (SimTypeChar, SimTypeInt, SimTypeNum))
|
|
3700
3698
|
and isinstance(start_ty, (SimTypeChar, SimTypeInt, SimTypeNum))
|
|
3701
3699
|
):
|
|
3700
|
+
assert dst_ty.size and start_ty.size and intermediate_ty.size
|
|
3702
3701
|
if dst_ty.size <= start_ty.size and dst_ty.size <= intermediate_ty.size:
|
|
3703
3702
|
# this is a down- or neutral-cast with an intermediate step that doesn't matter
|
|
3704
3703
|
result = child.expr
|
|
@@ -12,7 +12,7 @@ from ailment.block import Block
|
|
|
12
12
|
from ailment.statement import Statement, ConditionalJump, Jump, Label, Return
|
|
13
13
|
from ailment.expression import Const, UnaryOp, MultiStatementExpression
|
|
14
14
|
|
|
15
|
-
from angr.utils.graph import GraphUtils
|
|
15
|
+
from angr.utils.graph import GraphUtils, TemporaryNode, PostDominators
|
|
16
16
|
from ....knowledge_plugins.cfg import IndirectJumpType
|
|
17
17
|
from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
|
|
18
18
|
from ....utils.graph import dominates, to_acyclic_graph, dfs_back_edges
|
|
@@ -24,6 +24,7 @@ from ..utils import (
|
|
|
24
24
|
is_empty_or_label_only_node,
|
|
25
25
|
has_nonlabel_statements,
|
|
26
26
|
first_nonlabel_statement,
|
|
27
|
+
structured_node_is_simple_return,
|
|
27
28
|
)
|
|
28
29
|
from ..call_counter import AILCallCounter
|
|
29
30
|
from .structurer_nodes import (
|
|
@@ -719,7 +720,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
719
720
|
break_stmt = Jump(
|
|
720
721
|
None,
|
|
721
722
|
Const(None, None, successor.addr, self.project.arch.bits),
|
|
722
|
-
None,
|
|
723
|
+
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
723
724
|
ins_addr=last_src_stmt.ins_addr,
|
|
724
725
|
)
|
|
725
726
|
break_node = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
|
|
@@ -727,7 +728,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
727
728
|
break_stmt = Jump(
|
|
728
729
|
None,
|
|
729
730
|
Const(None, None, successor.addr, self.project.arch.bits),
|
|
730
|
-
None,
|
|
731
|
+
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
731
732
|
ins_addr=last_src_stmt.ins_addr,
|
|
732
733
|
)
|
|
733
734
|
break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
|
|
@@ -744,7 +745,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
744
745
|
break_stmt = Jump(
|
|
745
746
|
None,
|
|
746
747
|
Const(None, None, successor.addr, self.project.arch.bits),
|
|
747
|
-
None,
|
|
748
|
+
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
748
749
|
ins_addr=last_src_stmt.ins_addr,
|
|
749
750
|
)
|
|
750
751
|
break_node = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
|
|
@@ -2144,7 +2145,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2144
2145
|
node_seq = {nn: (len(ordered_nodes) - idx) for (idx, nn) in enumerate(ordered_nodes)} # post-order
|
|
2145
2146
|
|
|
2146
2147
|
if all_edges_wo_dominance:
|
|
2147
|
-
all_edges_wo_dominance = self.
|
|
2148
|
+
all_edges_wo_dominance = self._order_virtualizable_edges(full_graph, all_edges_wo_dominance, node_seq)
|
|
2148
2149
|
# virtualize the first edge
|
|
2149
2150
|
src, dst = all_edges_wo_dominance[0]
|
|
2150
2151
|
self._virtualize_edge(graph, full_graph, src, dst)
|
|
@@ -2152,7 +2153,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2152
2153
|
return True
|
|
2153
2154
|
|
|
2154
2155
|
if secondary_edges:
|
|
2155
|
-
secondary_edges = self.
|
|
2156
|
+
secondary_edges = self._order_virtualizable_edges(full_graph, secondary_edges, node_seq)
|
|
2156
2157
|
# virtualize the first edge
|
|
2157
2158
|
src, dst = secondary_edges[0]
|
|
2158
2159
|
self._virtualize_edge(graph, full_graph, src, dst)
|
|
@@ -2501,6 +2502,85 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2501
2502
|
break
|
|
2502
2503
|
return None
|
|
2503
2504
|
|
|
2505
|
+
def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
|
|
2506
|
+
"""
|
|
2507
|
+
Returns a list of edges that are ordered by the best edges to virtualize first.
|
|
2508
|
+
The criteria for "best" is defined by a variety of heuristics described below.
|
|
2509
|
+
"""
|
|
2510
|
+
if len(edges) <= 1:
|
|
2511
|
+
return edges
|
|
2512
|
+
|
|
2513
|
+
# TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
|
|
2514
|
+
try:
|
|
2515
|
+
entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
|
|
2516
|
+
except IndexError:
|
|
2517
|
+
entry_node = None
|
|
2518
|
+
|
|
2519
|
+
best_edges = edges
|
|
2520
|
+
if self._phoenix_improved and entry_node is not None:
|
|
2521
|
+
# the first few heuristics are based on the post-dominator count of the edge
|
|
2522
|
+
# so we collect them for each candidate edge
|
|
2523
|
+
edge_postdom_count = {}
|
|
2524
|
+
edge_sibling_count = {}
|
|
2525
|
+
for edge in edges:
|
|
2526
|
+
_, dst = edge
|
|
2527
|
+
graph_copy = networkx.DiGraph(graph)
|
|
2528
|
+
graph_copy.remove_edge(*edge)
|
|
2529
|
+
sibling_cnt = graph_copy.in_degree(dst)
|
|
2530
|
+
if sibling_cnt == 0:
|
|
2531
|
+
continue
|
|
2532
|
+
|
|
2533
|
+
edge_sibling_count[edge] = sibling_cnt
|
|
2534
|
+
post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
|
|
2535
|
+
post_doms = set()
|
|
2536
|
+
for postdom_node, dominatee in post_dom_graph.edges():
|
|
2537
|
+
if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
|
|
2538
|
+
post_doms.add((postdom_node, dominatee))
|
|
2539
|
+
edge_postdom_count[edge] = len(post_doms)
|
|
2540
|
+
|
|
2541
|
+
# H1: the edge that has the least amount of sibling edges should be virtualized first
|
|
2542
|
+
# this is believed to reduce the amount of virtualization needed in future rounds and increase
|
|
2543
|
+
# the edges that enter a single outer-scope if-stmt
|
|
2544
|
+
if edge_sibling_count:
|
|
2545
|
+
min_sibling_count = min(edge_sibling_count.values())
|
|
2546
|
+
best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
|
|
2547
|
+
if len(best_edges) == 1:
|
|
2548
|
+
return best_edges
|
|
2549
|
+
|
|
2550
|
+
# create the next heuristic based on the best edges from the previous heuristic
|
|
2551
|
+
filtered_edge_postdom_count = edge_postdom_count.copy()
|
|
2552
|
+
for edge in list(edge_postdom_count.keys()):
|
|
2553
|
+
if edge not in best_edges:
|
|
2554
|
+
del filtered_edge_postdom_count[edge]
|
|
2555
|
+
if filtered_edge_postdom_count:
|
|
2556
|
+
edge_postdom_count = filtered_edge_postdom_count
|
|
2557
|
+
|
|
2558
|
+
# H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
|
|
2559
|
+
# first. this is believed to make the code more linear looking be reducing the amount of scopes.
|
|
2560
|
+
# informally, we believe post-dominators to be an inverse indicator of the number of scopes present
|
|
2561
|
+
if edge_postdom_count:
|
|
2562
|
+
max_postdom_count = max(edge_postdom_count.values())
|
|
2563
|
+
best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
|
|
2564
|
+
if len(best_edges) == 1:
|
|
2565
|
+
return best_edges
|
|
2566
|
+
|
|
2567
|
+
# H3: the edge that goes directly to a return statement should be virtualized first
|
|
2568
|
+
# this is believed to be good because it can be corrected in later optimization by duplicating
|
|
2569
|
+
# the return
|
|
2570
|
+
candidate_edges = best_edges
|
|
2571
|
+
best_edges = []
|
|
2572
|
+
for src, dst in candidate_edges:
|
|
2573
|
+
if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
|
|
2574
|
+
best_edges.append((src, dst))
|
|
2575
|
+
|
|
2576
|
+
if len(best_edges) == 1:
|
|
2577
|
+
return best_edges
|
|
2578
|
+
elif not best_edges:
|
|
2579
|
+
best_edges = candidate_edges
|
|
2580
|
+
|
|
2581
|
+
# if we have another tie, or we never used improved heuristics, then we do the chick_order.
|
|
2582
|
+
return PhoenixStructurer._chick_order_edges(best_edges, node_seq)
|
|
2583
|
+
|
|
2504
2584
|
@staticmethod
|
|
2505
2585
|
def _chick_order_edges(edges: list, node_seq: dict[Any, int]) -> list:
|
|
2506
2586
|
graph = networkx.DiGraph()
|
|
@@ -409,7 +409,9 @@ def update_labels(graph: networkx.DiGraph):
|
|
|
409
409
|
return add_labels(remove_labels(graph))
|
|
410
410
|
|
|
411
411
|
|
|
412
|
-
def structured_node_is_simple_return(
|
|
412
|
+
def structured_node_is_simple_return(
|
|
413
|
+
node: Union["SequenceNode", "MultiNode"], graph: networkx.DiGraph, use_packed_successors=False
|
|
414
|
+
) -> bool:
|
|
413
415
|
"""
|
|
414
416
|
Will check if a "simple return" is contained within the node a simple returns looks like this:
|
|
415
417
|
if (cond) {
|
|
@@ -452,6 +454,9 @@ def structured_node_is_simple_return(node: Union["SequenceNode", "MultiNode"], g
|
|
|
452
454
|
if valid_last_stmt and last_block.statements:
|
|
453
455
|
valid_last_stmt = not isinstance(last_block.statements[-1], (ailment.Stmt.ConditionalJump, ailment.Stmt.Jump))
|
|
454
456
|
|
|
457
|
+
if use_packed_successors:
|
|
458
|
+
last_block = node
|
|
459
|
+
|
|
455
460
|
return valid_last_stmt and last_block in graph and not list(graph.successors(last_block))
|
|
456
461
|
|
|
457
462
|
|
|
@@ -93,6 +93,7 @@ class ReachingDefinitionsState:
|
|
|
93
93
|
all_definitions: set[Definition] | None = None,
|
|
94
94
|
initializer: Optional["RDAStateInitializer"] = None,
|
|
95
95
|
element_limit: int = 5,
|
|
96
|
+
merge_into_tops: bool = True,
|
|
96
97
|
):
|
|
97
98
|
# handy short-hands
|
|
98
99
|
self.codeloc = codeloc
|
|
@@ -130,6 +131,7 @@ class ReachingDefinitionsState:
|
|
|
130
131
|
track_tmps=self._track_tmps,
|
|
131
132
|
canonical_size=canonical_size,
|
|
132
133
|
element_limit=element_limit,
|
|
134
|
+
merge_into_tops=merge_into_tops,
|
|
133
135
|
)
|
|
134
136
|
if self.analysis is not None:
|
|
135
137
|
self.live_definitions.project = self.analysis.project
|