angr 9.2.112__py3-none-manylinux2014_aarch64.whl → 9.2.114__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +3 -0
  3. angr/analyses/decompiler/condition_processor.py +9 -2
  4. angr/analyses/decompiler/optimization_passes/__init__.py +3 -1
  5. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +364 -0
  6. angr/analyses/decompiler/optimization_passes/deadblock_remover.py +1 -1
  7. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +99 -12
  8. angr/analyses/decompiler/optimization_passes/optimization_pass.py +79 -9
  9. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +21 -0
  10. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +111 -9
  11. angr/analyses/decompiler/redundant_label_remover.py +17 -0
  12. angr/analyses/decompiler/region_simplifiers/switch_cluster_simplifier.py +5 -0
  13. angr/analyses/decompiler/seq_cf_structure_counter.py +37 -0
  14. angr/analyses/decompiler/structured_codegen/c.py +4 -5
  15. angr/analyses/decompiler/structuring/phoenix.py +86 -6
  16. angr/analyses/decompiler/utils.py +6 -1
  17. angr/analyses/reaching_definitions/rd_state.py +2 -0
  18. angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
  19. angr/angrdb/serializers/loader.py +91 -7
  20. angr/calling_conventions.py +21 -13
  21. angr/knowledge_plugins/key_definitions/live_definitions.py +5 -0
  22. angr/knowledge_plugins/propagations/states.py +3 -2
  23. angr/procedures/stubs/ReturnUnconstrained.py +1 -2
  24. angr/procedures/stubs/syscall_stub.py +1 -2
  25. angr/sim_type.py +354 -136
  26. angr/state_plugins/debug_variables.py +2 -2
  27. angr/storage/memory_mixins/multi_value_merger_mixin.py +12 -2
  28. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/METADATA +26 -26
  29. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/RECORD +33 -31
  30. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/WHEEL +1 -1
  31. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/LICENSE +0 -0
  32. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/entry_points.txt +0 -0
  33. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  # pylint:disable=unused-argument
2
+ import logging
2
3
  from typing import TYPE_CHECKING
3
4
  from collections.abc import Generator
4
5
  from enum import Enum
@@ -11,10 +12,13 @@ from angr.analyses.decompiler.condition_processor import ConditionProcessor
11
12
  from angr.analyses.decompiler.goto_manager import GotoManager
12
13
  from angr.analyses.decompiler.structuring import RecursiveStructurer, PhoenixStructurer
13
14
  from angr.analyses.decompiler.utils import add_labels
15
+ from angr.analyses.decompiler.seq_cf_structure_counter import ControlFlowStructureCounter
14
16
 
15
17
  if TYPE_CHECKING:
16
18
  from angr.knowledge_plugins.functions import Function
17
19
 
20
+ _l = logging.getLogger(__name__)
21
+
18
22
 
19
23
  class MultipleBlocksException(Exception):
20
24
  """
@@ -274,6 +278,7 @@ class StructuringOptimizationPass(OptimizationPass):
274
278
  prevent_new_gotos=True,
275
279
  strictly_less_gotos=False,
276
280
  recover_structure_fails=True,
281
+ must_improve_rel_quality=True,
277
282
  max_opt_iters=1,
278
283
  simplify_ail=True,
279
284
  require_gotos=True,
@@ -286,10 +291,15 @@ class StructuringOptimizationPass(OptimizationPass):
286
291
  self._max_opt_iters = max_opt_iters
287
292
  self._simplify_ail = simplify_ail
288
293
  self._require_gotos = require_gotos
294
+ self._must_improve_rel_quality = must_improve_rel_quality
289
295
 
290
296
  self._goto_manager: GotoManager | None = None
291
297
  self._prev_graph: networkx.DiGraph | None = None
292
298
 
299
+ # relative quality metrics (excludes gotos)
300
+ self._initial_structure_counter = None
301
+ self._current_structure_counter = None
302
+
293
303
  def _analyze(self, cache=None) -> bool:
294
304
  raise NotImplementedError()
295
305
 
@@ -297,7 +307,7 @@ class StructuringOptimizationPass(OptimizationPass):
297
307
  """
298
308
  Wrapper for _analyze() that verifies the graph is structurable before and after the optimization.
299
309
  """
300
- if not self._graph_is_structurable(self._graph):
310
+ if not self._graph_is_structurable(self._graph, initial=True):
301
311
  return
302
312
 
303
313
  initial_gotos = self._goto_manager.gotos.copy()
@@ -340,6 +350,10 @@ class StructuringOptimizationPass(OptimizationPass):
340
350
  self.out_graph = None
341
351
  return
342
352
 
353
+ if self._must_improve_rel_quality and not self._improves_relative_quality():
354
+ self.out_graph = None
355
+ return
356
+
343
357
  def _fixed_point_analyze(self, cache=None):
344
358
  for _ in range(self._max_opt_iters):
345
359
  if self._require_gotos and not self._goto_manager.gotos:
@@ -359,7 +373,7 @@ class StructuringOptimizationPass(OptimizationPass):
359
373
  self.out_graph = self._prev_graph if self._recover_structure_fails else None
360
374
  break
361
375
 
362
- def _graph_is_structurable(self, graph, readd_labels=False) -> bool:
376
+ def _graph_is_structurable(self, graph, readd_labels=False, initial=False) -> bool:
363
377
  """
364
378
  Checks weather the input graph is structurable under the Phoenix schema-matching structuring algorithm.
365
379
  As a side effect, this will also update the region identifier and goto manager of this optimization pass.
@@ -380,18 +394,74 @@ class StructuringOptimizationPass(OptimizationPass):
380
394
  if self._ri is None:
381
395
  return False
382
396
 
383
- rs = self.project.analyses[RecursiveStructurer].prep(kb=self.kb)(
384
- self._ri.region,
385
- cond_proc=self._ri.cond_proc,
386
- func=self._func,
387
- structurer_cls=PhoenixStructurer,
388
- )
397
+ # we should try-catch structuring here because we can often pass completely invalid graphs
398
+ # that break the assumptions of the structuring algorithm
399
+ try:
400
+ rs = self.project.analyses[RecursiveStructurer].prep(kb=self.kb)(
401
+ self._ri.region,
402
+ cond_proc=self._ri.cond_proc,
403
+ func=self._func,
404
+ structurer_cls=PhoenixStructurer,
405
+ )
406
+ # pylint:disable=broad-except
407
+ except Exception:
408
+ _l.warning("Internal structuring failed for OptimizationPass on %s", self._func.name)
409
+ rs = None
410
+
389
411
  if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
390
412
  return False
391
413
 
392
414
  rs = self.project.analyses.RegionSimplifier(self._func, rs.result, kb=self.kb, variable_kb=self._variable_kb)
393
- if not rs or rs.goto_manager is None:
415
+ if not rs or rs.goto_manager is None or rs.result is None:
394
416
  return False
395
417
 
418
+ self._analyze_simplified_region(rs.result, initial=initial)
396
419
  self._goto_manager = rs.goto_manager
397
420
  return True
421
+
422
+ # pylint:disable=no-self-use
423
+ def _analyze_simplified_region(self, region, initial=False):
424
+ """
425
+ Analyze the simplified regions after a successful structuring pass.
426
+ This should be overridden by the subclass if it needs to do anything with the simplified regions for making
427
+ optimizations decisions.
428
+ """
429
+ if region is None:
430
+ return
431
+
432
+ # record quality metrics
433
+ if self._must_improve_rel_quality:
434
+ if initial:
435
+ self._initial_structure_counter = ControlFlowStructureCounter(region)
436
+ else:
437
+ self._current_structure_counter = ControlFlowStructureCounter(region)
438
+
439
+ def _improves_relative_quality(self) -> bool:
440
+ """
441
+ Checks if the new structured output improves (or maintains) the relative quality of the control flow structures
442
+ present in the function.
443
+
444
+ For now, this only involves loops
445
+ """
446
+ if self._initial_structure_counter is None or self._current_structure_counter is None:
447
+ _l.warning("Relative quality check failed due to missing structure counters")
448
+ return True
449
+
450
+ prev_wloops = self._initial_structure_counter.while_loops
451
+ curr_wloops = self._current_structure_counter.while_loops
452
+ prev_dloops = self._initial_structure_counter.do_while_loops
453
+ curr_dloops = self._current_structure_counter.do_while_loops
454
+ prev_floops = self._initial_structure_counter.for_loops
455
+ curr_floops = self._current_structure_counter.for_loops
456
+ total_prev_loops = prev_wloops + prev_dloops + prev_floops
457
+ total_curr_loops = curr_wloops + curr_dloops + curr_floops
458
+
459
+ # Sometimes, if we mess up structuring you can easily tell because we traded "good" loops for "bad" loops.
460
+ # Generally, loops are ordered good -> bad as follows: for, while, do-while.
461
+ # Note: this check is only for _trading_, meaning the total number of loops must be the same.
462
+ #
463
+ # 1. We traded to remove a for-loop
464
+ if curr_floops < prev_floops and total_curr_loops == total_prev_loops:
465
+ return False
466
+
467
+ return True
@@ -38,6 +38,7 @@ class ReturnDuplicatorBase:
38
38
  self.node_idx = count(start=node_idx_start)
39
39
  self._max_calls_in_region = max_calls_in_regions
40
40
  self._minimize_copies_for_regions = minimize_copies_for_regions
41
+ self._supergraph = None
41
42
 
42
43
  # this should also be set by the optimization passes initer
43
44
  self._func = func
@@ -71,6 +72,8 @@ class ReturnDuplicatorBase:
71
72
  # for connected in_edges that form a region
72
73
  endnode_regions = self._copy_connected_edge_components(endnode_regions, graph)
73
74
 
75
+ # refresh the supergraph
76
+ self._supergraph = to_ail_supergraph(graph)
74
77
  for region_head, (in_edges, region) in endnode_regions.items():
75
78
  is_single_const_ret_region = self._is_simple_return_graph(region)
76
79
  for in_edge in in_edges:
@@ -150,6 +153,7 @@ class ReturnDuplicatorBase:
150
153
  else:
151
154
  node_copy = copy.deepcopy(node)
152
155
  node_copy.idx = next(self.node_idx)
156
+ self._fix_copied_node_labels(node_copy)
153
157
  copies[node] = node_copy
154
158
 
155
159
  # modify Jump.target_idx and ConditionalJump.{true,false}_target_idx accordingly
@@ -446,3 +450,20 @@ class ReturnDuplicatorBase:
446
450
  all_region_block_sets = {}
447
451
  _unpack_every_region(top_region, all_region_block_sets)
448
452
  return all_region_block_sets
453
+
454
+ @staticmethod
455
+ def _fix_copied_node_labels(block: Block):
456
+ for i in range(len(block.statements)): # pylint:disable=consider-using-enumerate
457
+ stmt = block.statements[i]
458
+ if isinstance(stmt, Label):
459
+ # fix the default name by suffixing it with the new block ID
460
+ new_name = stmt.name if stmt.name else f"Label_{stmt.ins_addr:x}"
461
+ if stmt.block_idx is not None:
462
+ suffix = f"__{stmt.block_idx}"
463
+ if new_name.endswith(suffix):
464
+ new_name = new_name[: -len(suffix)]
465
+ else:
466
+ new_name = stmt.name
467
+ new_name += f"__{block.idx}"
468
+
469
+ block.statements[i] = Label(stmt.idx, new_name, stmt.ins_addr, block_idx=block.idx, **stmt.tags)
@@ -4,7 +4,7 @@ import inspect
4
4
  import networkx
5
5
 
6
6
  from ailment import Block
7
- from ailment.statement import ConditionalJump
7
+ from ailment.statement import ConditionalJump, Label
8
8
 
9
9
  from .return_duplicator_base import ReturnDuplicatorBase
10
10
  from .optimization_pass import StructuringOptimizationPass
@@ -71,23 +71,29 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
71
71
  return ReturnDuplicatorBase._check(self)
72
72
 
73
73
  def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
74
- return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
74
+ return self._is_goto_edge(src, dst, graph=graph)
75
75
 
76
76
  def _is_goto_edge(
77
77
  self,
78
78
  src: Block,
79
79
  dst: Block,
80
80
  graph: networkx.DiGraph = None,
81
- check_for_ifstmts=True,
82
81
  max_level_check=1,
83
82
  ):
84
83
  """
85
- TODO: correct how goto edge addressing works
84
+ TODO: Implement a more principled way of checking if an edge is a goto edge with Phoenix's structuring info
86
85
  This function only exists because a long-standing bug that sometimes reports the if-stmt addr
87
- above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
88
- see if they are a goto. This needs to include Jump to deal with loops.
86
+ above a goto edge as the goto src.
89
87
  """
90
- if check_for_ifstmts and graph is not None:
88
+ # Do a simple and fast check first
89
+ is_simple_goto = self._goto_manager.is_goto_edge(src, dst)
90
+ if is_simple_goto:
91
+ return True
92
+
93
+ if graph is not None:
94
+ # Special case 1:
95
+ # We need to check for predecessors above the goto and see if they are a goto.
96
+ # This needs to include Jump to deal with loops.
91
97
  blocks = [src]
92
98
  level_blocks = [src]
93
99
  for _ in range(max_level_check):
@@ -109,8 +115,104 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
109
115
 
110
116
  if self._goto_manager.is_goto_edge(block, dst):
111
117
  return True
112
- else:
113
- return self._goto_manager.is_goto_edge(src, dst)
118
+
119
+ # Special case 2: A "goto edge" that ReturnDuplicator wants to test might be an edge that Phoenix
120
+ # includes in its loop region (during the cyclic refinement). In fact, Phoenix tends to include as many
121
+ # nodes as possible into the loop region, and generate a goto edge (which ends up in the structured code)
122
+ # from `dst` to the loop successor.
123
+ # an example of this is captured by the test case `TestDecompiler.test_stty_recover_mode_ret_dup_region`.
124
+ # until someone (ideally @mahaloz) implements a more principled way of translating "goto statements" that
125
+ # Phoenix generates and "goto edges" that ReturnDuplicator tests, we rely on the following stopgap to
126
+ # handle this case.
127
+ node = dst
128
+ while True:
129
+ succs = list(graph.successors(node))
130
+ if len(succs) != 1:
131
+ break
132
+ succ = succs[0]
133
+ if succ is node:
134
+ # loop!
135
+ break
136
+ succ_preds = list(graph.predecessors(succ))
137
+ if len(succ_preds) != 1:
138
+ break
139
+ if self._goto_manager.is_goto_edge(node, succ):
140
+ return True
141
+ # keep testing the next edge
142
+ node = succ
143
+
144
+ # Special case 3: In Phoenix, regions full of only if-stmts can be collapsed and moved. This causes
145
+ # the goto manager to report gotos that are at the top of the region instead of ones in the middle of it.
146
+ # Because of this, we need to gather all the nodes above the original src and check if any of them
147
+ # go to the destination. Additionally, we need to do this on the supergraph to get rid of
148
+ # goto edges that are removed by Phoenix.
149
+ # This case is observed in the test case `TestDecompiler.test_tail_tail_bytes_ret_dup`.
150
+ if self._supergraph is None:
151
+ return False
152
+
153
+ super_to_og_nodes = {n: self._supergraph.nodes[n]["original_nodes"] for n in self._supergraph.nodes}
154
+ og_to_super_nodes = {og: super_n for super_n, ogs in super_to_og_nodes.items() for og in ogs}
155
+ super_src = og_to_super_nodes.get(src, None)
156
+ super_dst = og_to_super_nodes.get(dst, None)
157
+ if super_src is None or super_dst is None:
158
+ return False
159
+
160
+ # collect all nodes which have only an if-stmt in them that are ancestors of super_src
161
+ check_blks = {super_src}
162
+ level_blocks = {super_src}
163
+ for _ in range(10):
164
+ done = False
165
+ if_blks = set()
166
+ for lblock in level_blocks:
167
+ preds = list(self._supergraph.predecessors(lblock))
168
+ for pred in preds:
169
+ only_cond_jump = all(isinstance(s, (ConditionalJump, Label)) for s in pred.statements)
170
+ if only_cond_jump:
171
+ if_blks.add(pred)
172
+
173
+ done = len(if_blks) == 0
174
+
175
+ if done:
176
+ break
177
+
178
+ check_blks |= if_blks
179
+ level_blocks = if_blks
180
+
181
+ # convert all the found if-only super-blocks back into their original blocks
182
+ og_check_blocks = set()
183
+ for blk in check_blks:
184
+ og_check_blocks |= set(super_to_og_nodes[blk])
185
+
186
+ # check if any of the original blocks are gotos to the destination
187
+ goto_hits = 0
188
+ for block in og_check_blocks:
189
+ if self._goto_manager.is_goto_edge(block, dst):
190
+ goto_hits += 1
191
+
192
+ # Although it is good to find a goto in the if-only block region, having more than a single goto
193
+ # existing that goes to the same dst is a bad sign. This can be seen in the the following test:
194
+ # TestDecompiler.test_dd_iread_ret_dup_region
195
+ #
196
+ # It occurs when you have something like:
197
+ # ```
198
+ # if (a || c)
199
+ # goto target;
200
+ # target:
201
+ # return 0;
202
+ # ```
203
+ #
204
+ #
205
+ # This looks like an edge from (a, target) and (c, target) but it is actually a single edge.
206
+ # If you allow both to duplicate you get the following:
207
+ # ```
208
+ # if (a):
209
+ # return
210
+ # if (c):
211
+ # return
212
+ # ```
213
+ # This is not the desired behavior.
214
+ # So we need to check if there is only a single goto that goes to the destination.
215
+ return goto_hits == 1
114
216
 
115
217
  return False
116
218
 
@@ -30,6 +30,9 @@ class RedundantLabelRemover:
30
30
  self._walker0 = SequenceWalker(handlers=handlers0)
31
31
  self._walker0.walk(self.root)
32
32
 
33
+ # update jump targets
34
+ self._update_jump_targets()
35
+
33
36
  handlers1 = {
34
37
  ailment.Block: self._handle_Block,
35
38
  }
@@ -37,6 +40,20 @@ class RedundantLabelRemover:
37
40
  self._walker1.walk(self.root)
38
41
  self.result = self.root
39
42
 
43
+ def _update_jump_targets(self) -> None:
44
+ """
45
+ Update self._jump_targets after the first pass fills in self._new_jump_target.
46
+ """
47
+
48
+ if self._new_jump_target:
49
+ jump_targets = set()
50
+ for jt in self._jump_targets:
51
+ if jt in self._new_jump_target:
52
+ jump_targets.add(self._new_jump_target[jt])
53
+ else:
54
+ jump_targets.add(jt)
55
+ self._jump_targets = jump_targets
56
+
40
57
  #
41
58
  # Handlers
42
59
  #
@@ -4,6 +4,8 @@ from typing import DefaultDict, Any
4
4
  from collections import OrderedDict, defaultdict
5
5
 
6
6
  import ailment
7
+ from ailment import UnaryOp
8
+ from ailment.expression import negate
7
9
 
8
10
  from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
9
11
  from ..structuring.structurer_nodes import SwitchCaseNode, ConditionNode, SequenceNode, MultiNode, BaseNode, BreakNode
@@ -520,6 +522,9 @@ def simplify_lowered_switches_core(
520
522
 
521
523
  if outermost_node is None:
522
524
  return False
525
+ if isinstance(outermost_node.condition, UnaryOp) and outermost_node.condition.op == "Not":
526
+ # attempt to flip any simple negated comparison for normalized operations
527
+ outermost_node.condition = negate(outermost_node.condition.operand)
523
528
 
524
529
  caseno_to_node = {}
525
530
  default_node_candidates: list[tuple[BaseNode, BaseNode]] = [] # parent to default node candidate
@@ -0,0 +1,37 @@
1
+ from angr.analyses.decompiler.sequence_walker import SequenceWalker
2
+ from angr.analyses.decompiler.structuring.structurer_nodes import SwitchCaseNode, LoopNode
3
+
4
+
5
+ class ControlFlowStructureCounter(SequenceWalker):
6
+ """
7
+ Counts the number of different types of control flow structures found in a sequence of nodes.
8
+ This should be used after the sequence has been simplified.
9
+ """
10
+
11
+ def __init__(self, node):
12
+ handlers = {
13
+ LoopNode: self._handle_Loop,
14
+ }
15
+ super().__init__(handlers)
16
+
17
+ self.while_loops = 0
18
+ self.do_while_loops = 0
19
+ self.for_loops = 0
20
+
21
+ self.walk(node)
22
+
23
+ def _handle_Loop(self, node: LoopNode, **kwargs):
24
+ if node.sort == "while":
25
+ self.while_loops += 1
26
+ elif node.sort == "do-while":
27
+ self.do_while_loops += 1
28
+ elif node.sort == "for":
29
+ self.for_loops += 1
30
+
31
+ return super()._handle_Loop(node, **kwargs)
32
+
33
+ def _handle_Condition(self, node, parent=None, **kwargs):
34
+ return super()._handle_Condition(node, parent=parent, **kwargs)
35
+
36
+ def _handle_SwitchCase(self, node: SwitchCaseNode, parent=None, **kwargs):
37
+ return super()._handle_SwitchCase(node, parent=parent, **kwargs)
@@ -2769,9 +2769,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2769
2769
  if offset == 0:
2770
2770
  data_type = renegotiate_type(data_type, base_type)
2771
2771
  if base_type == data_type or (
2772
- not isinstance(base_type, SimTypeBottom)
2773
- and not isinstance(data_type, SimTypeBottom)
2774
- and base_type.size < data_type.size
2772
+ base_type.size is not None and data_type.size is not None and base_type.size < data_type.size
2775
2773
  ):
2776
2774
  # case 1: we're done because we found it
2777
2775
  # case 2: we're done because we can never find it and we might as well stop early
@@ -2784,7 +2782,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2784
2782
  return _force_type_cast(base_type, data_type, expr)
2785
2783
  return CUnaryOp("Dereference", expr, codegen=self)
2786
2784
 
2787
- if isinstance(base_type, SimTypeBottom):
2785
+ if base_type.size is None:
2788
2786
  stride = 1
2789
2787
  else:
2790
2788
  stride = base_type.size // self.project.arch.byte_width or 1
@@ -2968,7 +2966,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2968
2966
  kernel_type = unpack_typeref(unpack_pointer(kernel.type))
2969
2967
  assert kernel_type
2970
2968
 
2971
- if isinstance(kernel_type, SimTypeBottom):
2969
+ if kernel_type.size is None:
2972
2970
  return bail_out()
2973
2971
  kernel_stride = kernel_type.size // self.project.arch.byte_width
2974
2972
 
@@ -3699,6 +3697,7 @@ class MakeTypecastsImplicit(CStructuredCodeWalker):
3699
3697
  and isinstance(intermediate_ty, (SimTypeChar, SimTypeInt, SimTypeNum))
3700
3698
  and isinstance(start_ty, (SimTypeChar, SimTypeInt, SimTypeNum))
3701
3699
  ):
3700
+ assert dst_ty.size and start_ty.size and intermediate_ty.size
3702
3701
  if dst_ty.size <= start_ty.size and dst_ty.size <= intermediate_ty.size:
3703
3702
  # this is a down- or neutral-cast with an intermediate step that doesn't matter
3704
3703
  result = child.expr
@@ -12,7 +12,7 @@ from ailment.block import Block
12
12
  from ailment.statement import Statement, ConditionalJump, Jump, Label, Return
13
13
  from ailment.expression import Const, UnaryOp, MultiStatementExpression
14
14
 
15
- from angr.utils.graph import GraphUtils
15
+ from angr.utils.graph import GraphUtils, TemporaryNode, PostDominators
16
16
  from ....knowledge_plugins.cfg import IndirectJumpType
17
17
  from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
18
18
  from ....utils.graph import dominates, to_acyclic_graph, dfs_back_edges
@@ -24,6 +24,7 @@ from ..utils import (
24
24
  is_empty_or_label_only_node,
25
25
  has_nonlabel_statements,
26
26
  first_nonlabel_statement,
27
+ structured_node_is_simple_return,
27
28
  )
28
29
  from ..call_counter import AILCallCounter
29
30
  from .structurer_nodes import (
@@ -719,7 +720,7 @@ class PhoenixStructurer(StructurerBase):
719
720
  break_stmt = Jump(
720
721
  None,
721
722
  Const(None, None, successor.addr, self.project.arch.bits),
722
- None,
723
+ target_idx=successor.idx if isinstance(successor, Block) else None,
723
724
  ins_addr=last_src_stmt.ins_addr,
724
725
  )
725
726
  break_node = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
@@ -727,7 +728,7 @@ class PhoenixStructurer(StructurerBase):
727
728
  break_stmt = Jump(
728
729
  None,
729
730
  Const(None, None, successor.addr, self.project.arch.bits),
730
- None,
731
+ target_idx=successor.idx if isinstance(successor, Block) else None,
731
732
  ins_addr=last_src_stmt.ins_addr,
732
733
  )
733
734
  break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
@@ -744,7 +745,7 @@ class PhoenixStructurer(StructurerBase):
744
745
  break_stmt = Jump(
745
746
  None,
746
747
  Const(None, None, successor.addr, self.project.arch.bits),
747
- None,
748
+ target_idx=successor.idx if isinstance(successor, Block) else None,
748
749
  ins_addr=last_src_stmt.ins_addr,
749
750
  )
750
751
  break_node = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
@@ -2144,7 +2145,7 @@ class PhoenixStructurer(StructurerBase):
2144
2145
  node_seq = {nn: (len(ordered_nodes) - idx) for (idx, nn) in enumerate(ordered_nodes)} # post-order
2145
2146
 
2146
2147
  if all_edges_wo_dominance:
2147
- all_edges_wo_dominance = self._chick_order_edges(all_edges_wo_dominance, node_seq)
2148
+ all_edges_wo_dominance = self._order_virtualizable_edges(full_graph, all_edges_wo_dominance, node_seq)
2148
2149
  # virtualize the first edge
2149
2150
  src, dst = all_edges_wo_dominance[0]
2150
2151
  self._virtualize_edge(graph, full_graph, src, dst)
@@ -2152,7 +2153,7 @@ class PhoenixStructurer(StructurerBase):
2152
2153
  return True
2153
2154
 
2154
2155
  if secondary_edges:
2155
- secondary_edges = self._chick_order_edges(secondary_edges, node_seq)
2156
+ secondary_edges = self._order_virtualizable_edges(full_graph, secondary_edges, node_seq)
2156
2157
  # virtualize the first edge
2157
2158
  src, dst = secondary_edges[0]
2158
2159
  self._virtualize_edge(graph, full_graph, src, dst)
@@ -2501,6 +2502,85 @@ class PhoenixStructurer(StructurerBase):
2501
2502
  break
2502
2503
  return None
2503
2504
 
2505
+ def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
2506
+ """
2507
+ Returns a list of edges that are ordered by the best edges to virtualize first.
2508
+ The criteria for "best" is defined by a variety of heuristics described below.
2509
+ """
2510
+ if len(edges) <= 1:
2511
+ return edges
2512
+
2513
+ # TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
2514
+ try:
2515
+ entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
2516
+ except IndexError:
2517
+ entry_node = None
2518
+
2519
+ best_edges = edges
2520
+ if self._phoenix_improved and entry_node is not None:
2521
+ # the first few heuristics are based on the post-dominator count of the edge
2522
+ # so we collect them for each candidate edge
2523
+ edge_postdom_count = {}
2524
+ edge_sibling_count = {}
2525
+ for edge in edges:
2526
+ _, dst = edge
2527
+ graph_copy = networkx.DiGraph(graph)
2528
+ graph_copy.remove_edge(*edge)
2529
+ sibling_cnt = graph_copy.in_degree(dst)
2530
+ if sibling_cnt == 0:
2531
+ continue
2532
+
2533
+ edge_sibling_count[edge] = sibling_cnt
2534
+ post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
2535
+ post_doms = set()
2536
+ for postdom_node, dominatee in post_dom_graph.edges():
2537
+ if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
2538
+ post_doms.add((postdom_node, dominatee))
2539
+ edge_postdom_count[edge] = len(post_doms)
2540
+
2541
+ # H1: the edge that has the least amount of sibling edges should be virtualized first
2542
+ # this is believed to reduce the amount of virtualization needed in future rounds and increase
2543
+ # the edges that enter a single outer-scope if-stmt
2544
+ if edge_sibling_count:
2545
+ min_sibling_count = min(edge_sibling_count.values())
2546
+ best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
2547
+ if len(best_edges) == 1:
2548
+ return best_edges
2549
+
2550
+ # create the next heuristic based on the best edges from the previous heuristic
2551
+ filtered_edge_postdom_count = edge_postdom_count.copy()
2552
+ for edge in list(edge_postdom_count.keys()):
2553
+ if edge not in best_edges:
2554
+ del filtered_edge_postdom_count[edge]
2555
+ if filtered_edge_postdom_count:
2556
+ edge_postdom_count = filtered_edge_postdom_count
2557
+
2558
+ # H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
2559
+ # first. this is believed to make the code more linear looking be reducing the amount of scopes.
2560
+ # informally, we believe post-dominators to be an inverse indicator of the number of scopes present
2561
+ if edge_postdom_count:
2562
+ max_postdom_count = max(edge_postdom_count.values())
2563
+ best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
2564
+ if len(best_edges) == 1:
2565
+ return best_edges
2566
+
2567
+ # H3: the edge that goes directly to a return statement should be virtualized first
2568
+ # this is believed to be good because it can be corrected in later optimization by duplicating
2569
+ # the return
2570
+ candidate_edges = best_edges
2571
+ best_edges = []
2572
+ for src, dst in candidate_edges:
2573
+ if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
2574
+ best_edges.append((src, dst))
2575
+
2576
+ if len(best_edges) == 1:
2577
+ return best_edges
2578
+ elif not best_edges:
2579
+ best_edges = candidate_edges
2580
+
2581
+ # if we have another tie, or we never used improved heuristics, then we do the chick_order.
2582
+ return PhoenixStructurer._chick_order_edges(best_edges, node_seq)
2583
+
2504
2584
  @staticmethod
2505
2585
  def _chick_order_edges(edges: list, node_seq: dict[Any, int]) -> list:
2506
2586
  graph = networkx.DiGraph()
@@ -409,7 +409,9 @@ def update_labels(graph: networkx.DiGraph):
409
409
  return add_labels(remove_labels(graph))
410
410
 
411
411
 
412
- def structured_node_is_simple_return(node: Union["SequenceNode", "MultiNode"], graph: networkx.DiGraph) -> bool:
412
+ def structured_node_is_simple_return(
413
+ node: Union["SequenceNode", "MultiNode"], graph: networkx.DiGraph, use_packed_successors=False
414
+ ) -> bool:
413
415
  """
414
416
  Will check if a "simple return" is contained within the node a simple returns looks like this:
415
417
  if (cond) {
@@ -452,6 +454,9 @@ def structured_node_is_simple_return(node: Union["SequenceNode", "MultiNode"], g
452
454
  if valid_last_stmt and last_block.statements:
453
455
  valid_last_stmt = not isinstance(last_block.statements[-1], (ailment.Stmt.ConditionalJump, ailment.Stmt.Jump))
454
456
 
457
+ if use_packed_successors:
458
+ last_block = node
459
+
455
460
  return valid_last_stmt and last_block in graph and not list(graph.successors(last_block))
456
461
 
457
462
 
@@ -93,6 +93,7 @@ class ReachingDefinitionsState:
93
93
  all_definitions: set[Definition] | None = None,
94
94
  initializer: Optional["RDAStateInitializer"] = None,
95
95
  element_limit: int = 5,
96
+ merge_into_tops: bool = True,
96
97
  ):
97
98
  # handy short-hands
98
99
  self.codeloc = codeloc
@@ -130,6 +131,7 @@ class ReachingDefinitionsState:
130
131
  track_tmps=self._track_tmps,
131
132
  canonical_size=canonical_size,
132
133
  element_limit=element_limit,
134
+ merge_into_tops=merge_into_tops,
133
135
  )
134
136
  if self.analysis is not None:
135
137
  self.live_definitions.project = self.analysis.project