angr 9.2.95__py3-none-manylinux2014_x86_64.whl → 9.2.97__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (55) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_fast.py +9 -6
  3. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +6 -1
  4. angr/analyses/complete_calling_conventions.py +27 -11
  5. angr/analyses/decompiler/ail_simplifier.py +30 -8
  6. angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +20 -7
  7. angr/analyses/decompiler/clinic.py +21 -5
  8. angr/analyses/decompiler/condition_processor.py +11 -0
  9. angr/analyses/decompiler/decompiler.py +58 -46
  10. angr/analyses/decompiler/optimization_passes/__init__.py +11 -5
  11. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +13 -7
  12. angr/analyses/decompiler/optimization_passes/optimization_pass.py +31 -11
  13. angr/analyses/decompiler/optimization_passes/{return_duplicator.py → return_duplicator_base.py} +54 -102
  14. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +57 -0
  15. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +121 -0
  16. angr/analyses/decompiler/region_identifier.py +13 -0
  17. angr/analyses/decompiler/seq_to_blocks.py +19 -0
  18. angr/analyses/decompiler/structured_codegen/c.py +21 -0
  19. angr/analyses/decompiler/structuring/phoenix.py +28 -4
  20. angr/analyses/decompiler/structuring/recursive_structurer.py +35 -1
  21. angr/analyses/decompiler/structuring/structurer_base.py +3 -0
  22. angr/analyses/decompiler/utils.py +41 -6
  23. angr/analyses/disassembly.py +4 -1
  24. angr/analyses/find_objects_static.py +15 -10
  25. angr/analyses/forward_analysis/forward_analysis.py +15 -1
  26. angr/analyses/propagator/engine_ail.py +40 -0
  27. angr/analyses/propagator/propagator.py +6 -3
  28. angr/analyses/reaching_definitions/engine_ail.py +16 -24
  29. angr/analyses/reaching_definitions/rd_state.py +14 -1
  30. angr/analyses/reaching_definitions/reaching_definitions.py +19 -2
  31. angr/analyses/variable_recovery/engine_ail.py +6 -6
  32. angr/analyses/variable_recovery/engine_base.py +22 -4
  33. angr/analyses/variable_recovery/variable_recovery_base.py +4 -1
  34. angr/engines/light/engine.py +8 -1
  35. angr/knowledge_plugins/key_definitions/atoms.py +4 -2
  36. angr/knowledge_plugins/key_definitions/environment.py +11 -0
  37. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -8
  38. angr/knowledge_plugins/key_definitions/uses.py +18 -4
  39. angr/knowledge_plugins/propagations/states.py +22 -3
  40. angr/knowledge_plugins/types.py +6 -0
  41. angr/knowledge_plugins/variables/variable_manager.py +54 -5
  42. angr/simos/simos.py +2 -0
  43. angr/storage/memory_mixins/__init__.py +3 -0
  44. angr/storage/memory_mixins/multi_value_merger_mixin.py +22 -11
  45. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +20 -2
  46. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +81 -44
  47. angr/utils/cowdict.py +4 -2
  48. angr/utils/funcid.py +6 -0
  49. angr/utils/mp.py +1 -1
  50. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/METADATA +6 -6
  51. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/RECORD +55 -52
  52. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/LICENSE +0 -0
  53. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/WHEEL +0 -0
  54. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/entry_points.txt +0 -0
  55. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ import ailment
5
5
  from ailment.expression import Op
6
6
 
7
7
  from ..structuring.structurer_nodes import ConditionNode
8
- from ..utils import structured_node_is_simple_return
8
+ from ..utils import structured_node_is_simple_return, sequence_to_statements
9
9
  from ..sequence_walker import SequenceWalker
10
10
  from .optimization_pass import SequenceOptimizationPass, OptimizationPassStage
11
11
 
@@ -13,12 +13,14 @@ from .optimization_pass import SequenceOptimizationPass, OptimizationPassStage
13
13
  class FlipBooleanWalker(SequenceWalker):
14
14
  """
15
15
  Walks a SequenceNode and handles every sequence.
16
+ Uses the flip_size to determine when to flip the condition on large if-statement bodies.
16
17
  """
17
18
 
18
- def __init__(self, graph, last_node=None):
19
+ def __init__(self, graph, flip_size=10, last_node=None):
19
20
  super().__init__()
20
21
  self._graph = graph
21
22
  self._last_node = last_node
23
+ self._flip_size = flip_size
22
24
 
23
25
  def _handle_Sequence(self, seq_node, **kwargs):
24
26
  # Type 1:
@@ -48,9 +50,12 @@ class FlipBooleanWalker(SequenceWalker):
48
50
  node.true_node, node.false_node = node.false_node, node.true_node
49
51
 
50
52
  for idx, cond_node, successor in type2_condition_nodes:
51
- # flipping the condition on the last node of the program will cause
52
- # the program to look strange, so avoid this case
53
- if successor is not self._last_node:
53
+ # there are two possibilities when you might want to flip the condition and move the return statement:
54
+ # 1. This if-stmt if found somewhere in the middle of the function
55
+ # 2. This if-stmt is pretty large, but still ends in a return outside of the if-stmt
56
+ if (successor is not self._last_node) or (
57
+ len(sequence_to_statements(cond_node.true_node)) >= self._flip_size
58
+ ):
54
59
  cond_node.condition = ailment.expression.negate(cond_node.condition)
55
60
  seq_node.nodes[idx + 1] = cond_node.true_node
56
61
  cond_node.true_node = successor
@@ -71,15 +76,16 @@ class FlipBooleanCmp(SequenceOptimizationPass):
71
76
  NAME = "Flip small ret booleans"
72
77
  DESCRIPTION = "When false node has no successors, flip condition so else scope can be simplified later"
73
78
 
74
- def __init__(self, func, **kwargs):
79
+ def __init__(self, func, flip_size=10, **kwargs):
75
80
  super().__init__(func, **kwargs)
76
81
  self._graph = kwargs.get("graph", None)
82
+ self._flip_size = flip_size
77
83
  self.analyze()
78
84
 
79
85
  def _check(self):
80
86
  return bool(self.seq.nodes), None
81
87
 
82
88
  def _analyze(self, cache=None):
83
- walker = FlipBooleanWalker(self._graph, last_node=self.seq.nodes[-1])
89
+ walker = FlipBooleanWalker(self._graph, last_node=self.seq.nodes[-1], flip_size=self._flip_size)
84
90
  walker.walk(self.seq)
85
91
  self.out_seq = self.seq
@@ -6,6 +6,7 @@ import networkx # pylint:disable=unused-import
6
6
  import ailment
7
7
 
8
8
  from angr.analyses.decompiler import RegionIdentifier
9
+ from angr.analyses.decompiler.condition_processor import ConditionProcessor
9
10
  from angr.analyses.decompiler.goto_manager import GotoManager
10
11
  from angr.analyses.decompiler.structuring import RecursiveStructurer, PhoenixStructurer
11
12
  from angr.analyses.decompiler.utils import add_labels
@@ -93,6 +94,26 @@ class BaseOptimizationPass:
93
94
  """
94
95
  raise NotImplementedError()
95
96
 
97
+ def _simplify_graph(self, graph):
98
+ simp = self.project.analyses.AILSimplifier(
99
+ self._func,
100
+ func_graph=graph,
101
+ use_callee_saved_regs_at_return=False,
102
+ gp=self._func.info.get("gp", None) if self.project.arch.name in {"MIPS32", "MIPS64"} else None,
103
+ )
104
+ return simp.func_graph if simp.simplified else graph
105
+
106
+ def _recover_regions(self, graph: networkx.DiGraph, condition_processor=None, update_graph: bool = False):
107
+ return self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
108
+ self._func,
109
+ graph=graph,
110
+ cond_proc=condition_processor or ConditionProcessor(self.project.arch),
111
+ update_graph=update_graph,
112
+ # TODO: find a way to pass Phoenix/DREAM options here (see decompiler.py for correct use)
113
+ force_loop_single_exit=True,
114
+ complete_successors=False,
115
+ )
116
+
96
117
 
97
118
  class OptimizationPass(BaseOptimizationPass):
98
119
  """
@@ -256,6 +277,7 @@ class StructuringOptimizationPass(OptimizationPass):
256
277
  recover_structure_fails=True,
257
278
  max_opt_iters=1,
258
279
  simplify_ail=True,
280
+ require_gotos=True,
259
281
  **kwargs,
260
282
  ):
261
283
  super().__init__(func, **kwargs)
@@ -264,6 +286,7 @@ class StructuringOptimizationPass(OptimizationPass):
264
286
  self._recover_structure_fails = recover_structure_fails
265
287
  self._max_opt_iters = max_opt_iters
266
288
  self._simplify_ail = simplify_ail
289
+ self._require_gotos = require_gotos
267
290
 
268
291
  self._goto_manager: Optional[GotoManager] = None
269
292
  self._prev_graph: Optional[networkx.DiGraph] = None
@@ -279,6 +302,9 @@ class StructuringOptimizationPass(OptimizationPass):
279
302
  return
280
303
 
281
304
  initial_gotos = self._goto_manager.gotos.copy()
305
+ if self._require_gotos and not initial_gotos:
306
+ return
307
+
282
308
  # replace the normal check in OptimizationPass.analyze()
283
309
  ret, cache = self._check()
284
310
  if not ret:
@@ -304,7 +330,7 @@ class StructuringOptimizationPass(OptimizationPass):
304
330
  # simplify the AIL graph
305
331
  if self._simplify_ail:
306
332
  # this should not (TM) change the structure of the graph but is needed for later optimizations
307
- self.out_graph = self._simplify_ail_graph(self.out_graph)
333
+ self.out_graph = self._simplify_graph(self.out_graph)
308
334
 
309
335
  if self._prevent_new_gotos:
310
336
  prev_gotos = len(initial_gotos)
@@ -317,6 +343,9 @@ class StructuringOptimizationPass(OptimizationPass):
317
343
 
318
344
  def _fixed_point_analyze(self, cache=None):
319
345
  for _ in range(self._max_opt_iters):
346
+ if self._require_gotos and not self._goto_manager.gotos:
347
+ break
348
+
320
349
  # backup the graph before the optimization
321
350
  if self._recover_structure_fails and self.out_graph is not None:
322
351
  self._prev_graph = networkx.DiGraph(self.out_graph)
@@ -331,15 +360,6 @@ class StructuringOptimizationPass(OptimizationPass):
331
360
  self.out_graph = self._prev_graph if self._recover_structure_fails else None
332
361
  break
333
362
 
334
- def _simplify_ail_graph(self, graph):
335
- simp = self.project.analyses.AILSimplifier(
336
- self._func,
337
- func_graph=graph,
338
- use_callee_saved_regs_at_return=False,
339
- gp=self._func.info.get("gp", None) if self.project.arch.name in {"MIPS32", "MIPS64"} else None,
340
- )
341
- return simp.func_graph if simp.simplified else graph
342
-
343
363
  def _graph_is_structurable(self, graph, readd_labels=False) -> bool:
344
364
  """
345
365
  Checks weather the input graph is structurable under the Phoenix schema-matching structuring algorithm.
@@ -367,7 +387,7 @@ class StructuringOptimizationPass(OptimizationPass):
367
387
  func=self._func,
368
388
  structurer_cls=PhoenixStructurer,
369
389
  )
370
- if not rs or not rs.result or not rs.result.nodes:
390
+ if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
371
391
  return False
372
392
 
373
393
  rs = self.project.analyses.RegionSimplifier(self._func, rs.result, kb=self.kb, variable_kb=self._variable_kb)
@@ -1,8 +1,7 @@
1
- from typing import Any, Tuple, Dict, List
1
+ from typing import Any, Tuple, Dict, List, Optional
2
2
  from itertools import count
3
3
  import copy
4
4
  import logging
5
- import inspect
6
5
 
7
6
  import ailment.expression
8
7
  import networkx
@@ -11,138 +10,84 @@ from ailment import Block
11
10
  from ailment.statement import Jump, ConditionalJump, Assignment, Return, Label
12
11
  from ailment.expression import Const
13
12
 
14
- from .optimization_pass import StructuringOptimizationPass
15
13
  from ..condition_processor import ConditionProcessor, EmptyBlockNotice
16
14
  from ..graph_region import GraphRegion
17
15
  from ..utils import remove_labels, to_ail_supergraph, calls_in_graph
18
- from ..structuring.structurer_nodes import MultiNode
16
+ from ..structuring.structurer_nodes import MultiNode, ConditionNode
17
+ from ..region_identifier import RegionIdentifier
19
18
 
20
19
  _l = logging.getLogger(name=__name__)
21
20
 
22
21
 
23
- class ReturnDuplicator(StructuringOptimizationPass):
22
+ class ReturnDuplicatorBase:
24
23
  """
25
- An optimization pass that reverts a subset of Irreducible Statement Condensing (ISC) optimizations, as described
26
- in the USENIX 2024 paper SAILR.
27
-
28
- Some compilers, including GCC, Clang, and MSVC, apply various optimizations to reduce the number of statements in
29
- code. These optimizations will take equivalent statements, or a subset of them, and replace them with a single
30
- copy that is jumped to by gotos -- optimizing for space and sometimes speed.
31
-
32
- This optimization pass will revert those gotos by re-duplicating the condensed blocks. Since Return statements
33
- are the most common, we use this optimization pass to revert only gotos to return statements. Additionally, we
34
- perform some additional readability fixups, like not re-duplicating returns to shared components.
35
-
36
- Args:
37
- func: The function to optimize.
38
- node_idx_start: The index to start at when creating new nodes. This is used by Clinic to ensure that
39
- node indices are unique across multiple passes.
40
- max_opt_iters: The maximum number of optimization iterations to perform.
41
- max_calls_in_regions: The maximum number of calls that can be in a region. This is used to prevent
42
- duplicating too much code.
43
- prevent_new_gotos: If True, this optimization pass will prevent new gotos from being created.
44
- minimize_copies_for_regions: If True, this optimization pass will minimize the number of copies by doing only
45
- a single copy for connected in_edges that form a region.
24
+ The base class for implementing Return Duplication as described in the SAILR paper.
25
+ This base class describes the general algorithm for duplicating return regions in a graph.
46
26
  """
47
27
 
48
- ARCHES = None
49
- PLATFORMS = None
50
- NAME = "Duplicate return blocks to reduce goto statements"
51
- DESCRIPTION = inspect.cleandoc(__doc__[: __doc__.index("Args:")]) # pylint:disable=unsubscriptable-object
52
-
28
+ # pylint:disable=unused-argument
53
29
  def __init__(
54
30
  self,
55
31
  func,
56
- # internal parameters that should be used by Clinic
57
32
  node_idx_start: int = 0,
58
- # settings
59
- max_opt_iters: int = 10,
60
33
  max_calls_in_regions: int = 2,
61
- prevent_new_gotos: bool = True,
62
34
  minimize_copies_for_regions: bool = True,
35
+ ri: Optional[RegionIdentifier] = None,
63
36
  **kwargs,
64
37
  ):
65
- super().__init__(func, max_opt_iters=max_opt_iters, prevent_new_gotos=prevent_new_gotos, **kwargs)
38
+ self.node_idx = count(start=node_idx_start)
66
39
  self._max_calls_in_region = max_calls_in_regions
67
40
  self._minimize_copies_for_regions = minimize_copies_for_regions
68
41
 
69
- self.node_idx = count(start=node_idx_start)
70
- self.analyze()
42
+ # this should also be set by the optimization passes initer
43
+ self._func = func
44
+ self._ri: Optional[RegionIdentifier] = ri
45
+
46
+ #
47
+ # must implement these methods
48
+ #
49
+
50
+ def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False) -> bool:
51
+ raise NotImplementedError()
52
+
53
+ #
54
+ # main analysis
55
+ #
71
56
 
72
57
  def _check(self):
73
58
  # does this function have end points?
74
59
  return bool(self._func.endpoints), None
75
60
 
76
- def _analyze(self, cache=None):
61
+ def _analyze_core(self, graph: networkx.DiGraph) -> bool:
77
62
  """
78
- This analysis is run in a loop in analyze() for a maximum of max_opt_iters times.
63
+ This function does the core checks and duplications to the graph passed.
64
+ The return value is True if the graph was changed.
79
65
  """
80
66
  graph_changed = False
81
- endnode_regions = self._find_endnode_regions(self.out_graph)
67
+ endnode_regions = self._find_endnode_regions(graph)
82
68
 
83
69
  if self._minimize_copies_for_regions:
84
70
  # perform a second pass to minimize the number of copies by doing only a single copy
85
71
  # for connected in_edges that form a region
86
- endnode_regions = self._copy_connected_edge_components(endnode_regions, self.out_graph)
72
+ endnode_regions = self._copy_connected_edge_components(endnode_regions, graph)
87
73
 
88
74
  for region_head, (in_edges, region) in endnode_regions.items():
89
75
  is_single_const_ret_region = self._is_simple_return_graph(region)
90
76
  for in_edge in in_edges:
91
77
  pred_node = in_edge[0]
92
78
  if self._should_duplicate_dst(
93
- pred_node, region_head, self.out_graph, dst_is_const_ret=is_single_const_ret_region
79
+ pred_node, region_head, graph, dst_is_const_ret=is_single_const_ret_region
94
80
  ):
95
81
  # every eligible pred gets a new region copy
96
- self._copy_region([pred_node], region_head, region, self.out_graph)
82
+ self._copy_region([pred_node], region_head, region, graph)
97
83
 
98
- if region_head in self.out_graph and self.out_graph.in_degree(region_head) == 0:
99
- self.out_graph.remove_nodes_from(region)
84
+ if region_head in graph and graph.in_degree(region_head) == 0:
85
+ graph.remove_nodes_from(region)
100
86
 
101
87
  graph_changed = True
102
88
 
103
89
  return graph_changed
104
90
 
105
- def _is_goto_edge(
106
- self,
107
- src: Block,
108
- dst: Block,
109
- graph: networkx.DiGraph = None,
110
- check_for_ifstmts=True,
111
- max_level_check=1,
112
- ):
113
- """
114
- TODO: correct how goto edge addressing works
115
- This function only exists because a long-standing bug that sometimes reports the if-stmt addr
116
- above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
117
- see if they are a goto. This needs to include Jump to deal with loops.
118
- """
119
- if check_for_ifstmts and graph is not None:
120
- blocks = [src]
121
- level_blocks = [src]
122
- for _ in range(max_level_check):
123
- new_level_blocks = []
124
- for lblock in level_blocks:
125
- new_level_blocks += list(graph.predecessors(lblock))
126
-
127
- blocks += new_level_blocks
128
- level_blocks = new_level_blocks
129
-
130
- src_direct_parents = list(graph.predecessors(src))
131
- for block in blocks:
132
- if not block or not block.statements:
133
- continue
134
-
135
- # special case if-stmts that are next to each other
136
- if block in src_direct_parents and isinstance(block.statements[-1], ConditionalJump):
137
- continue
138
-
139
- if self._goto_manager.is_goto_edge(block, dst):
140
- return True
141
- else:
142
- return self._goto_manager.is_goto_edge(src, dst)
143
-
144
- return False
145
-
146
91
  def _find_endnode_regions(self, graph) -> Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]]:
147
92
  """
148
93
  Find all the regions that contain a node with no successors. These are the "end nodes" of the graph.
@@ -194,14 +139,6 @@ class ReturnDuplicator(StructuringOptimizationPass):
194
139
 
195
140
  return end_node_regions
196
141
 
197
- def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
198
- # returns that are only returning a constant should be duplicated always;
199
- if dst_is_const_ret:
200
- return True
201
-
202
- # check above
203
- return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
204
-
205
142
  def _copy_region(self, pred_nodes, region_head, region, graph):
206
143
  # copy the entire return region
207
144
  copies = {}
@@ -295,8 +232,14 @@ class ReturnDuplicator(StructuringOptimizationPass):
295
232
  @staticmethod
296
233
  def _is_simple_return_graph(graph: networkx.DiGraph, max_assigns=1):
297
234
  """
298
- Checks if the graph is a single block, or a series of simple assignments, that ends
299
- in a return statement. This is used to know when we MUST duplicate the return block.
235
+ Checks if the provided graph is a graph that ONLY contains a "simple" return.
236
+ If there were absolutely no bugs in angr, we could just check that a single return block exists.
237
+ However, due to some propagation bugs, these cases can all happen and are all valid:
238
+ 1. [Jmp] -> [Jmp] -> [Ret]
239
+ 2. [Jmp] -> [Jmp, x=0] -> [Ret x]
240
+ 3. [Jmp] -> [Jmp, x=rdi] -> [Ret x]
241
+
242
+ To deal with this, we need to do the sketchy checks we do below.
300
243
  """
301
244
  labeless_graph = to_ail_supergraph(remove_labels(graph))
302
245
  nodes = list(labeless_graph.nodes())
@@ -466,14 +409,20 @@ class ReturnDuplicator(StructuringOptimizationPass):
466
409
 
467
410
  @staticmethod
468
411
  def _find_block_sets_in_all_regions(top_region: GraphRegion):
412
+ def _unpack_block_type_to_addrs(node):
413
+ if isinstance(node, Block):
414
+ return {node.addr}
415
+ elif isinstance(node, MultiNode):
416
+ return {n.addr for n in node.nodes}
417
+ elif isinstance(node, ConditionNode):
418
+ return _unpack_block_type_to_addrs(node.true_node) | _unpack_block_type_to_addrs(node.false_node)
419
+ return set()
420
+
469
421
  def _unpack_region_to_block_addrs(region: GraphRegion):
470
422
  region_addrs = set()
471
423
  for node in region.graph.nodes:
472
- if isinstance(node, Block):
473
- region_addrs.add(node.addr)
474
- elif isinstance(node, MultiNode):
475
- for _node in node.nodes:
476
- region_addrs.add(_node.addr)
424
+ if isinstance(node, (Block, MultiNode, ConditionNode)):
425
+ region_addrs |= _unpack_block_type_to_addrs(node)
477
426
  elif isinstance(node, GraphRegion):
478
427
  region_addrs |= _unpack_region_to_block_addrs(node)
479
428
 
@@ -487,6 +436,9 @@ class ReturnDuplicator(StructuringOptimizationPass):
487
436
  elif isinstance(node, MultiNode):
488
437
  for _node in node.nodes:
489
438
  addrs_by_region[region].add(_node.addr)
439
+ elif isinstance(node, ConditionNode):
440
+ addrs_by_region[region] |= _unpack_block_type_to_addrs(node.true_node)
441
+ addrs_by_region[region] |= _unpack_block_type_to_addrs(node.false_node)
490
442
  else:
491
443
  addrs_by_region[region] |= _unpack_region_to_block_addrs(node)
492
444
  _unpack_every_region(node, addrs_by_region)
@@ -0,0 +1,57 @@
1
+ import logging
2
+
3
+ import networkx
4
+
5
+ from .return_duplicator_base import ReturnDuplicatorBase
6
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
7
+
8
+ _l = logging.getLogger(name=__name__)
9
+
10
+
11
+ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
12
+ """
13
+ This is a light-level goto-less version of the ReturnDuplicator optimization pass. It will only
14
+ duplicate return-only blocks.
15
+ """
16
+
17
+ ARCHES = None
18
+ PLATFORMS = None
19
+ STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
20
+ NAME = "Duplicate return-only blocks (high)"
21
+ DESCRIPTION = __doc__
22
+
23
+ def __init__(
24
+ self,
25
+ func,
26
+ # internal parameters that should be used by Clinic
27
+ node_idx_start: int = 0,
28
+ # settings
29
+ max_calls_in_regions: int = 2,
30
+ minimize_copies_for_regions: bool = True,
31
+ **kwargs,
32
+ ):
33
+ ReturnDuplicatorBase.__init__(
34
+ self,
35
+ func,
36
+ node_idx_start=node_idx_start,
37
+ max_calls_in_regions=max_calls_in_regions,
38
+ minimize_copies_for_regions=minimize_copies_for_regions,
39
+ **kwargs,
40
+ )
41
+ OptimizationPass.__init__(self, func, **kwargs)
42
+ # since we run before the RegionIdentification pass in the decompiler, we need to collect it early here
43
+ self._ri = self._recover_regions(self._graph)
44
+
45
+ self.analyze()
46
+
47
+ def _check(self):
48
+ return ReturnDuplicatorBase._check(self)
49
+
50
+ def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
51
+ # TODO: implement a better check
52
+ return dst_is_const_ret
53
+
54
+ def _analyze(self, cache=None):
55
+ copy_graph = networkx.DiGraph(self._graph)
56
+ if self._analyze_core(copy_graph):
57
+ self.out_graph = self._simplify_graph(copy_graph)
@@ -0,0 +1,121 @@
1
+ import logging
2
+ import inspect
3
+
4
+ import networkx
5
+
6
+ from ailment import Block
7
+ from ailment.statement import ConditionalJump
8
+
9
+ from .return_duplicator_base import ReturnDuplicatorBase
10
+ from .optimization_pass import StructuringOptimizationPass
11
+
12
+ _l = logging.getLogger(name=__name__)
13
+
14
+
15
+ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
16
+ """
17
+ An optimization pass that reverts a subset of Irreducible Statement Condensing (ISC) optimizations, as described
18
+ in the USENIX 2024 paper SAILR. This is the heavy/goto version of the ReturnDuplicator optimization pass.
19
+
20
+ Some compilers, including GCC, Clang, and MSVC, apply various optimizations to reduce the number of statements in
21
+ code. These optimizations will take equivalent statements, or a subset of them, and replace them with a single
22
+ copy that is jumped to by gotos -- optimizing for space and sometimes speed.
23
+
24
+ This optimization pass will revert those gotos by re-duplicating the condensed blocks. Since Return statements
25
+ are the most common, we use this optimization pass to revert only gotos to return statements. Additionally, we
26
+ perform some additional readability fixups, like not re-duplicating returns to shared components.
27
+
28
+ Args:
29
+ func: The function to optimize.
30
+ node_idx_start: The index to start at when creating new nodes. This is used by Clinic to ensure that
31
+ node indices are unique across multiple passes.
32
+ max_opt_iters: The maximum number of optimization iterations to perform.
33
+ max_calls_in_regions: The maximum number of calls that can be in a region. This is used to prevent
34
+ duplicating too much code.
35
+ prevent_new_gotos: If True, this optimization pass will prevent new gotos from being created.
36
+ minimize_copies_for_regions: If True, this optimization pass will minimize the number of copies by doing only
37
+ a single copy for connected in_edges that form a region.
38
+ """
39
+
40
+ ARCHES = None
41
+ PLATFORMS = None
42
+ NAME = "Duplicate returns connect with gotos (low)"
43
+ DESCRIPTION = inspect.cleandoc(__doc__[: __doc__.index("Args:")]) # pylint:disable=unsubscriptable-object
44
+
45
+ def __init__(
46
+ self,
47
+ func,
48
+ # internal parameters that should be used by Clinic
49
+ node_idx_start: int = 0,
50
+ # settings
51
+ max_opt_iters: int = 4,
52
+ max_calls_in_regions: int = 2,
53
+ prevent_new_gotos: bool = True,
54
+ minimize_copies_for_regions: bool = True,
55
+ **kwargs,
56
+ ):
57
+ ReturnDuplicatorBase.__init__(
58
+ self,
59
+ func,
60
+ node_idx_start=node_idx_start,
61
+ max_calls_in_regions=max_calls_in_regions,
62
+ minimize_copies_for_regions=minimize_copies_for_regions,
63
+ **kwargs,
64
+ )
65
+ StructuringOptimizationPass.__init__(
66
+ self, func, max_opt_iters=max_opt_iters, prevent_new_gotos=prevent_new_gotos, require_gotos=True, **kwargs
67
+ )
68
+ self.analyze()
69
+
70
+ def _check(self):
71
+ return ReturnDuplicatorBase._check(self)
72
+
73
+ def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
74
+ return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
75
+
76
+ def _is_goto_edge(
77
+ self,
78
+ src: Block,
79
+ dst: Block,
80
+ graph: networkx.DiGraph = None,
81
+ check_for_ifstmts=True,
82
+ max_level_check=1,
83
+ ):
84
+ """
85
+ TODO: correct how goto edge addressing works
86
+ This function only exists because a long-standing bug that sometimes reports the if-stmt addr
87
+ above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
88
+ see if they are a goto. This needs to include Jump to deal with loops.
89
+ """
90
+ if check_for_ifstmts and graph is not None:
91
+ blocks = [src]
92
+ level_blocks = [src]
93
+ for _ in range(max_level_check):
94
+ new_level_blocks = []
95
+ for lblock in level_blocks:
96
+ new_level_blocks += list(graph.predecessors(lblock))
97
+
98
+ blocks += new_level_blocks
99
+ level_blocks = new_level_blocks
100
+
101
+ src_direct_parents = list(graph.predecessors(src))
102
+ for block in blocks:
103
+ if not block or not block.statements:
104
+ continue
105
+
106
+ # special case if-stmts that are next to each other
107
+ if block in src_direct_parents and isinstance(block.statements[-1], ConditionalJump):
108
+ continue
109
+
110
+ if self._goto_manager.is_goto_edge(block, dst):
111
+ return True
112
+ else:
113
+ return self._goto_manager.is_goto_edge(src, dst)
114
+
115
+ return False
116
+
117
+ def _analyze(self, cache=None):
118
+ """
119
+ This analysis is run in a loop in analyze() for a maximum of max_opt_iters times.
120
+ """
121
+ return self._analyze_core(self.out_graph)
@@ -718,6 +718,13 @@ class RegionIdentifier(Analysis):
718
718
  region.graph_with_successors.add_edge(nn, succ)
719
719
  region.successors.add(succ)
720
720
 
721
+ # add edges between successors
722
+ for succ_0 in region.successors:
723
+ for succ_1 in region.successors:
724
+ if succ_0 is not succ_1:
725
+ if secondary_graph.has_edge(succ_0, succ_1):
726
+ region.graph_with_successors.add_edge(succ_0, succ_1)
727
+
721
728
  # l.debug("Walked back %d levels in postdom tree.", levels)
722
729
  l.debug("Node %r, frontier %r.", node, frontier)
723
730
  # l.debug("Identified an acyclic region %s.", self._dbg_block_list(region.graph.nodes()))
@@ -929,6 +936,12 @@ class RegionIdentifier(Analysis):
929
936
  region.successors = []
930
937
  region.successors += list(abnormal_exit_nodes)
931
938
 
939
+ for succ_0 in region.successors:
940
+ for succ_1 in region.successors:
941
+ if succ_0 is not succ_1:
942
+ if graph.has_edge(succ_0, succ_1):
943
+ region.graph_with_successors.add_edge(succ_0, succ_1)
944
+
932
945
  for node in loop_nodes:
933
946
  graph.remove_node(node)
934
947
 
@@ -0,0 +1,19 @@
1
+ from ailment import Block
2
+
3
+ from .sequence_walker import SequenceWalker
4
+
5
+
6
+ class SequenceToBlocks(SequenceWalker):
7
+ """
8
+ A helper class to convert a sequence node into a list of blocks.
9
+ """
10
+
11
+ def __init__(self):
12
+ handlers = {
13
+ Block: self._handle_Block,
14
+ }
15
+ self.blocks = []
16
+ super().__init__(handlers, force_forward_scan=True, update_seqnode_in_place=False)
17
+
18
+ def _handle_Block(self, node: Block, **kwargs): # pylint:disable=unused-argument
19
+ self.blocks.append(node)