angr 9.2.126__py3-none-manylinux2014_x86_64.whl → 9.2.128__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (43) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/analysis.py +8 -2
  3. angr/analyses/cfg/cfg_fast.py +12 -1
  4. angr/analyses/decompiler/clinic.py +23 -2
  5. angr/analyses/decompiler/condition_processor.py +5 -7
  6. angr/analyses/decompiler/decompilation_cache.py +4 -0
  7. angr/analyses/decompiler/decompiler.py +36 -7
  8. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +1 -2
  9. angr/analyses/decompiler/graph_region.py +3 -6
  10. angr/analyses/decompiler/label_collector.py +32 -0
  11. angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
  12. angr/analyses/decompiler/optimization_passes/optimization_pass.py +6 -3
  13. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +41 -3
  14. angr/analyses/decompiler/optimization_passes/switch_reused_entry_rewriter.py +102 -0
  15. angr/analyses/decompiler/presets/basic.py +2 -0
  16. angr/analyses/decompiler/presets/fast.py +2 -0
  17. angr/analyses/decompiler/presets/full.py +2 -0
  18. angr/analyses/decompiler/region_identifier.py +8 -8
  19. angr/analyses/decompiler/ssailification/traversal.py +1 -0
  20. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -0
  21. angr/analyses/decompiler/structured_codegen/c.py +0 -3
  22. angr/analyses/decompiler/structured_codegen/dwarf_import.py +4 -1
  23. angr/analyses/decompiler/structuring/phoenix.py +131 -31
  24. angr/analyses/decompiler/structuring/recursive_structurer.py +3 -1
  25. angr/analyses/decompiler/structuring/structurer_base.py +33 -1
  26. angr/analyses/reaching_definitions/function_handler_library/string.py +2 -2
  27. angr/analyses/s_liveness.py +3 -3
  28. angr/analyses/s_propagator.py +74 -3
  29. angr/angrdb/models.py +2 -1
  30. angr/angrdb/serializers/kb.py +3 -3
  31. angr/angrdb/serializers/structured_code.py +5 -3
  32. angr/calling_conventions.py +1 -1
  33. angr/knowledge_base.py +1 -1
  34. angr/knowledge_plugins/__init__.py +0 -2
  35. angr/knowledge_plugins/structured_code.py +1 -1
  36. angr/utils/ssa/__init__.py +8 -3
  37. {angr-9.2.126.dist-info → angr-9.2.128.dist-info}/METADATA +6 -6
  38. {angr-9.2.126.dist-info → angr-9.2.128.dist-info}/RECORD +42 -41
  39. {angr-9.2.126.dist-info → angr-9.2.128.dist-info}/WHEEL +1 -1
  40. angr/knowledge_plugins/decompilation.py +0 -45
  41. {angr-9.2.126.dist-info → angr-9.2.128.dist-info}/LICENSE +0 -0
  42. {angr-9.2.126.dist-info → angr-9.2.128.dist-info}/entry_points.txt +0 -0
  43. {angr-9.2.126.dist-info → angr-9.2.128.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,102 @@
1
+ # pylint:disable=too-many-boolean-expressions
2
+ from __future__ import annotations
3
+ from itertools import count
4
+ import logging
5
+
6
+ from ailment.block import Block
7
+ from ailment.statement import Jump
8
+ from ailment.expression import Const
9
+
10
+ from angr.knowledge_plugins.cfg import IndirectJumpType
11
+
12
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
13
+
14
+
15
+ _l = logging.getLogger(name=__name__)
16
+
17
+
18
+ class SwitchReusedEntryRewriter(OptimizationPass):
19
+ """
20
+ For each switch-case construct (identified by jump tables), rewrite the entry into a goto block when we detect
21
+ situations where an entry node is reused by edges in switch-case constructs that are not the current one. This code
22
+ reuse is usually caused by compiler code deduplication.
23
+ """
24
+
25
+ ARCHES = None
26
+ PLATFORMS = None
27
+ STAGE = OptimizationPassStage.AFTER_AIL_GRAPH_CREATION
28
+ NAME = "Rewrite switch-case entry nodes with multiple predecessors into goto statements."
29
+ DESCRIPTION = __doc__.strip()
30
+
31
+ def __init__(self, func, **kwargs):
32
+ super().__init__(func, **kwargs)
33
+
34
+ self.node_idx = count(start=self._scratch.get("node_idx", 0))
35
+
36
+ self.analyze()
37
+
38
+ self._scratch["node_idx"] = next(self.node_idx)
39
+
40
+ def _check(self):
41
+ jumptables = self.kb.cfgs.get_most_accurate().jump_tables
42
+ switch_jump_block_addrs = {
43
+ jumptable.addr
44
+ for jumptable in jumptables.values()
45
+ if jumptable.type
46
+ in {IndirectJumpType.Jumptable_AddressComputed, IndirectJumpType.Jumptable_AddressLoadedFromMemory}
47
+ }
48
+ jump_node_addrs = self._func.block_addrs_set.intersection(switch_jump_block_addrs)
49
+ if not jump_node_addrs:
50
+ return False, None
51
+
52
+ # ensure each jump table entry node has only one predecessor
53
+ reused_entries: dict[Block, set[Block]] = {}
54
+ for jumptable in jumptables.values():
55
+ for entry_addr in sorted(set(jumptable.jumptable_entries)):
56
+ entry_nodes = self._get_blocks(entry_addr)
57
+ for entry_node in entry_nodes:
58
+ preds = list(self._graph.predecessors(entry_node))
59
+ if len(preds) > 1:
60
+ non_current_jumptable_preds = [pred for pred in preds if pred.addr != jumptable.addr]
61
+ if any(p.addr in switch_jump_block_addrs for p in non_current_jumptable_preds):
62
+ reused_entries[entry_node] = {
63
+ pred for pred in preds if pred.addr in switch_jump_block_addrs
64
+ }
65
+
66
+ if not reused_entries:
67
+ return False, None
68
+ cache = {"reused_entries": reused_entries}
69
+ return True, cache
70
+
71
+ def _analyze(self, cache=None):
72
+
73
+ reused_entries: dict[Block, set[Block]] = cache["reused_entries"]
74
+ out_graph = None
75
+
76
+ for entry_node, pred_nodes in reused_entries.items():
77
+ # we assign the entry node to the predecessor with the lowest address
78
+ sorted_pred_nodes = sorted(pred_nodes, key=lambda x: (x.addr, x.idx))
79
+
80
+ for head_node in sorted_pred_nodes[1:]:
81
+
82
+ # create the new goto node
83
+ goto_stmt = Jump(
84
+ None,
85
+ Const(None, None, entry_node.addr, self.project.arch.bits, ins_addr=entry_node.addr),
86
+ target_idx=entry_node.idx,
87
+ ins_addr=entry_node.addr,
88
+ )
89
+ goto_node = Block(
90
+ entry_node.addr,
91
+ 0,
92
+ statements=[goto_stmt],
93
+ idx=next(self.node_idx),
94
+ )
95
+
96
+ if out_graph is None:
97
+ out_graph = self._graph
98
+ out_graph.remove_edge(head_node, entry_node)
99
+ out_graph.add_edge(head_node, goto_node)
100
+ # we are virtualizing these edges, so we don't need to add the edge from goto_node to the entry_node
101
+
102
+ self.out_graph = out_graph
@@ -9,6 +9,7 @@ from angr.analyses.decompiler.optimization_passes import (
9
9
  RetAddrSaveSimplifier,
10
10
  X86GccGetPcSimplifier,
11
11
  CallStatementRewriter,
12
+ SwitchReusedEntryRewriter,
12
13
  )
13
14
 
14
15
 
@@ -23,6 +24,7 @@ preset_basic = DecompilationPreset(
23
24
  RetAddrSaveSimplifier,
24
25
  X86GccGetPcSimplifier,
25
26
  CallStatementRewriter,
27
+ SwitchReusedEntryRewriter,
26
28
  ],
27
29
  )
28
30
 
@@ -21,6 +21,7 @@ from angr.analyses.decompiler.optimization_passes import (
21
21
  CallStatementRewriter,
22
22
  MultiSimplifier,
23
23
  DeadblockRemover,
24
+ SwitchReusedEntryRewriter,
24
25
  )
25
26
 
26
27
 
@@ -41,6 +42,7 @@ preset_fast = DecompilationPreset(
41
42
  ReturnDuplicatorHigh,
42
43
  DeadblockRemover,
43
44
  SwitchDefaultCaseDuplicator,
45
+ SwitchReusedEntryRewriter,
44
46
  LoweredSwitchSimplifier,
45
47
  ReturnDuplicatorLow,
46
48
  ReturnDeduplicator,
@@ -26,6 +26,7 @@ from angr.analyses.decompiler.optimization_passes import (
26
26
  FlipBooleanCmp,
27
27
  InlinedStringTransformationSimplifier,
28
28
  CallStatementRewriter,
29
+ SwitchReusedEntryRewriter,
29
30
  )
30
31
 
31
32
 
@@ -57,6 +58,7 @@ preset_full = DecompilationPreset(
57
58
  FlipBooleanCmp,
58
59
  InlinedStringTransformationSimplifier,
59
60
  CallStatementRewriter,
61
+ SwitchReusedEntryRewriter,
60
62
  ],
61
63
  )
62
64
 
@@ -532,7 +532,7 @@ class RegionIdentifier(Analysis):
532
532
  )
533
533
  if len(region.successors) > 1 and self._force_loop_single_exit:
534
534
  # multi-successor region. refinement is required
535
- self._refine_loop_successors(region, graph)
535
+ self._refine_loop_successors_to_guarded_successors(region, graph)
536
536
 
537
537
  # if the head node is in the graph and it's not the head of the graph, we will need to update the head node
538
538
  # address.
@@ -543,10 +543,10 @@ class RegionIdentifier(Analysis):
543
543
 
544
544
  return region
545
545
 
546
- def _refine_loop_successors(self, region, graph: networkx.DiGraph):
546
+ def _refine_loop_successors_to_guarded_successors(self, region, graph: networkx.DiGraph):
547
547
  """
548
- If there are multiple successors of a loop, convert them into conditional gotos. Eventually there should be
549
- only one loop successor.
548
+ If there are multiple successors of a loop, convert them into guarded successors. Eventually there should be
549
+ only one loop successor. This is used in the DREAM structuring algorithm.
550
550
 
551
551
  :param GraphRegion region: The cyclic region to refine.
552
552
  :param networkx.DiGraph graph: The current graph that is being structured.
@@ -565,11 +565,11 @@ class RegionIdentifier(Analysis):
565
565
  cond = ConditionNode(
566
566
  condnode_addr,
567
567
  None,
568
- self.cond_proc.reaching_conditions[successors[0]],
569
- successors[0],
570
- false_node=None,
568
+ self.cond_proc.reaching_conditions[successors[1]],
569
+ successors[1],
570
+ false_node=successors[0],
571
571
  )
572
- for succ in successors[1:]:
572
+ for succ in successors[2:]:
573
573
  cond = ConditionNode(
574
574
  condnode_addr,
575
575
  None,
@@ -32,6 +32,7 @@ class TraversalAnalysis(ForwardAnalysis[None, NodeType, object, object]):
32
32
  )
33
33
  self._engine_ail = SimEngineSSATraversal(
34
34
  self.project.arch,
35
+ self.project.simos,
35
36
  sp_tracker=sp_tracker,
36
37
  bp_as_gpr=bp_as_gpr,
37
38
  stackvars=self._stackvars,
@@ -7,6 +7,7 @@ from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCal
7
7
  from angr.engines.light import SimEngineLight, SimEngineLightAILMixin
8
8
  from angr.utils.ssa import get_reg_offset_base
9
9
  from angr.utils.orderedset import OrderedSet
10
+ from angr.calling_conventions import default_cc
10
11
  from .traversal_state import TraversalState
11
12
 
12
13
 
@@ -23,6 +24,7 @@ class SimEngineSSATraversal(
23
24
  def __init__(
24
25
  self,
25
26
  arch,
27
+ simos,
26
28
  sp_tracker=None,
27
29
  bp_as_gpr: bool = False,
28
30
  def_to_loc=None,
@@ -33,6 +35,7 @@ class SimEngineSSATraversal(
33
35
  super().__init__()
34
36
 
35
37
  self.arch = arch
38
+ self.simos = simos
36
39
  self.sp_tracker = sp_tracker
37
40
  self.bp_as_gpr = bp_as_gpr
38
41
  self.stackvars = stackvars
@@ -75,6 +78,18 @@ class SimEngineSSATraversal(
75
78
  self._expr(stmt.false_target)
76
79
 
77
80
  def _handle_Call(self, stmt: Call):
81
+
82
+ # kill caller-saved registers
83
+ cc = (
84
+ default_cc(self.arch.name, platform=self.simos.name if self.simos is not None else None)
85
+ if stmt.calling_convention is None
86
+ else stmt.calling_convention
87
+ )
88
+ for reg_name in cc.CALLER_SAVED_REGS:
89
+ reg_offset = self.arch.registers[reg_name][0]
90
+ base_off = get_reg_offset_base(reg_offset, self.arch)
91
+ self.state.live_registers.discard(base_off)
92
+
78
93
  if stmt.ret_expr is not None and isinstance(stmt.ret_expr, Register):
79
94
  codeloc = self._codeloc()
80
95
  self.def_to_loc.append((stmt.ret_expr, codeloc))
@@ -2511,9 +2511,6 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2511
2511
 
2512
2512
  self._analyze()
2513
2513
 
2514
- if flavor is not None:
2515
- self.kb.structured_code[(func.addr, flavor)] = self
2516
-
2517
2514
  def reapply_options(self, options):
2518
2515
  for option, value in options:
2519
2516
  if option.param == "braces_on_own_lines":
@@ -5,6 +5,7 @@ import logging
5
5
  from sortedcontainers import SortedList
6
6
 
7
7
  from angr.analyses import Analysis, register_analysis
8
+ from angr.analyses.decompiler.decompilation_cache import DecompilationCache
8
9
  from .base import BaseStructuredCodeGenerator, InstructionMapping, PositionMapping
9
10
  from angr.knowledge_plugins.functions.function import Function
10
11
 
@@ -30,7 +31,9 @@ class ImportSourceCode(BaseStructuredCodeGenerator, Analysis):
30
31
  self.regenerate_text()
31
32
 
32
33
  if flavor is not None and self.text:
33
- self.kb.structured_code[(function.addr, flavor)] = self
34
+ if (function.addr, flavor) not in self.kb.decompilations:
35
+ self.kb.decompilations[(function.addr, flavor)] = DecompilationCache(function.addr)
36
+ self.kb.decompilations[(function.addr, flavor)].codegen = self
34
37
 
35
38
  def regenerate_text(self):
36
39
  cache = {}
@@ -127,6 +127,8 @@ class PhoenixStructurer(StructurerBase):
127
127
  @staticmethod
128
128
  def _assert_graph_ok(g, msg: str) -> None:
129
129
  if _DEBUG:
130
+ if g is None:
131
+ return
130
132
  assert (
131
133
  len(list(networkx.connected_components(networkx.Graph(g)))) <= 1
132
134
  ), f"{msg}: More than one connected component. Please report this."
@@ -229,7 +231,8 @@ class PhoenixStructurer(StructurerBase):
229
231
  )
230
232
  l.debug("... matching cyclic schemas: %s at %r", matched, node)
231
233
  any_matches |= matched
232
- self._assert_graph_ok(self._region.graph, "Removed incorrect edges")
234
+ if matched:
235
+ self._assert_graph_ok(self._region.graph, "Removed incorrect edges")
233
236
  return any_matches
234
237
 
235
238
  def _match_cyclic_schemas(self, node, head, graph, full_graph) -> bool:
@@ -559,9 +562,12 @@ class PhoenixStructurer(StructurerBase):
559
562
  seq_node = SequenceNode(node.addr, nodes=[node])
560
563
  seen_nodes = set()
561
564
  while True:
562
- succs = list(full_graph.successors(next_node))
565
+ succs = list(graph.successors(next_node))
563
566
  if len(succs) != 1:
564
567
  return False, None
568
+ if full_graph.out_degree[next_node] > 1:
569
+ # all successors in the full graph should have been refined away at this point
570
+ return False, None
565
571
  next_node = succs[0]
566
572
 
567
573
  if next_node is node:
@@ -600,6 +606,8 @@ class PhoenixStructurer(StructurerBase):
600
606
  refined = self._refine_cyclic_core(head)
601
607
  l.debug("... refined: %s", refined)
602
608
  if refined:
609
+ self._assert_graph_ok(self._region.graph, "Refinement went wrong")
610
+ # cyclic refinement may create dangling nodes in the full graph
603
611
  return True
604
612
  return False
605
613
 
@@ -1020,7 +1028,9 @@ class PhoenixStructurer(StructurerBase):
1020
1028
  any_matches |= matched
1021
1029
  if matched:
1022
1030
  break
1023
- self._assert_graph_ok(self._region.graph, "Removed incorrect edges")
1031
+
1032
+ self._assert_graph_ok(self._region.graph, "Removed incorrect edges")
1033
+
1024
1034
  return any_matches
1025
1035
 
1026
1036
  # switch cases
@@ -1094,7 +1104,7 @@ class PhoenixStructurer(StructurerBase):
1094
1104
  to_remove,
1095
1105
  graph,
1096
1106
  full_graph,
1097
- can_bail=True,
1107
+ bail_on_nonhead_outedges=True,
1098
1108
  )
1099
1109
  if not r:
1100
1110
  return False
@@ -1161,18 +1171,18 @@ class PhoenixStructurer(StructurerBase):
1161
1171
  node_pred = next(iter(graph.predecessors(node)))
1162
1172
 
1163
1173
  case_nodes = list(graph.successors(node_a))
1164
- case_node_successors = set()
1165
- for case_node in case_nodes:
1166
- if case_node is node_pred:
1167
- continue
1168
- if case_node.addr in jump_table.jumptable_entries:
1169
- succs = set(graph.successors(case_node))
1170
- case_node_successors |= {succ for succ in succs if succ.addr not in jump_table.jumptable_entries}
1171
- if len(case_node_successors) > 1:
1172
- return False
1173
1174
 
1174
- # we will definitely be able to structure this into a full switch-case. remove node from switch_case_known_heads
1175
- self.switch_case_known_heads.remove(node)
1175
+ # case 1: the common successor happens to be directly reachable from node_a (usually as a result of compiler
1176
+ # optimization)
1177
+ # example: touch_touch_no_switch.o:main
1178
+ r = self.switch_case_entry_node_has_common_successor_case_1(graph, jump_table, case_nodes, node_pred)
1179
+
1180
+ # case 2: the common successor is not directly reachable from node_a. this is a more common case.
1181
+ if not r:
1182
+ r |= self.switch_case_entry_node_has_common_successor_case_2(graph, jump_table, case_nodes, node_pred)
1183
+
1184
+ if not r:
1185
+ return False
1176
1186
 
1177
1187
  # un-structure IncompleteSwitchCaseNode
1178
1188
  if isinstance(node_a, SequenceNode) and node_a.nodes and isinstance(node_a.nodes[0], IncompleteSwitchCaseNode):
@@ -1186,8 +1196,10 @@ class PhoenixStructurer(StructurerBase):
1186
1196
  # update node_a
1187
1197
  node_a = next(iter(nn for nn in graph.nodes if nn.addr == target))
1188
1198
 
1199
+ case_and_entry_addrs = self._find_case_and_entry_addrs(node_a, graph, cmp_lb, jump_table)
1200
+
1189
1201
  cases, node_default, to_remove = self._switch_build_cases(
1190
- {cmp_lb + i: entry_addr for (i, entry_addr) in enumerate(jump_table.jumptable_entries)},
1202
+ case_and_entry_addrs,
1191
1203
  node,
1192
1204
  node_a,
1193
1205
  node_b_addr,
@@ -1203,7 +1215,7 @@ class PhoenixStructurer(StructurerBase):
1203
1215
  to_remove.add(node_default)
1204
1216
 
1205
1217
  to_remove.add(node_a) # add node_a
1206
- self._make_switch_cases_core(
1218
+ r = self._make_switch_cases_core(
1207
1219
  node,
1208
1220
  cmp_expr,
1209
1221
  cases,
@@ -1215,7 +1227,11 @@ class PhoenixStructurer(StructurerBase):
1215
1227
  full_graph,
1216
1228
  node_a=node_a,
1217
1229
  )
1230
+ if not r:
1231
+ return False
1218
1232
 
1233
+ # fully structured into a switch-case. remove node from switch_case_known_heads
1234
+ self.switch_case_known_heads.remove(node)
1219
1235
  self._switch_handle_gotos(cases, node_default, switch_end_addr)
1220
1236
 
1221
1237
  return True
@@ -1253,8 +1269,10 @@ class PhoenixStructurer(StructurerBase):
1253
1269
  else:
1254
1270
  return False
1255
1271
 
1272
+ case_and_entry_addrs = self._find_case_and_entry_addrs(node, graph, cmp_lb, jump_table)
1273
+
1256
1274
  cases, node_default, to_remove = self._switch_build_cases(
1257
- {cmp_lb + i: entry_addr for (i, entry_addr) in enumerate(jump_table.jumptable_entries)},
1275
+ case_and_entry_addrs,
1258
1276
  node,
1259
1277
  node,
1260
1278
  default_addr,
@@ -1265,12 +1283,10 @@ class PhoenixStructurer(StructurerBase):
1265
1283
  # there must be a default case
1266
1284
  return False
1267
1285
 
1268
- self._make_switch_cases_core(
1286
+ return self._make_switch_cases_core(
1269
1287
  node, cmp_expr, cases, default_addr, node_default, node.addr, to_remove, graph, full_graph
1270
1288
  )
1271
1289
 
1272
- return True
1273
-
1274
1290
  def _match_acyclic_incomplete_switch_cases(
1275
1291
  self, node, graph: networkx.DiGraph, full_graph: networkx.DiGraph, jump_tables: dict
1276
1292
  ) -> bool:
@@ -1322,14 +1338,9 @@ class PhoenixStructurer(StructurerBase):
1322
1338
  # and a case node (addr.b). The addr.a node is a successor to the head node while the addr.b node is a
1323
1339
  # successor to node_a
1324
1340
  default_node_candidates = [nn for nn in graph.nodes if nn.addr == node_b_addr]
1325
- if len(default_node_candidates) == 0:
1326
- node_default: BaseNode | None = None
1327
- elif len(default_node_candidates) == 1:
1328
- node_default: BaseNode | None = default_node_candidates[0]
1329
- else:
1330
- node_default: BaseNode | None = next(
1331
- iter(nn for nn in default_node_candidates if graph.has_edge(head_node, nn)), None
1332
- )
1341
+ node_default: BaseNode | None = next(
1342
+ iter(nn for nn in default_node_candidates if graph.has_edge(head_node, nn)), None
1343
+ )
1333
1344
 
1334
1345
  if node_default is not None and not isinstance(node_default, SequenceNode):
1335
1346
  # make the default node a SequenceNode so that we can insert Break and Continue nodes into it later
@@ -1432,7 +1443,7 @@ class PhoenixStructurer(StructurerBase):
1432
1443
  graph: networkx.DiGraph,
1433
1444
  full_graph: networkx.DiGraph,
1434
1445
  node_a=None,
1435
- can_bail=False,
1446
+ bail_on_nonhead_outedges: bool = False,
1436
1447
  ) -> bool:
1437
1448
  scnode = SwitchCaseNode(cmp_expr, cases, node_default, addr=addr)
1438
1449
 
@@ -1454,14 +1465,24 @@ class PhoenixStructurer(StructurerBase):
1454
1465
  if dst not in to_remove:
1455
1466
  out_edges.append((nn, dst))
1456
1467
 
1457
- if can_bail:
1468
+ if bail_on_nonhead_outedges:
1458
1469
  nonhead_out_nodes = {edge[1] for edge in out_edges if edge[1] is not head}
1459
1470
  if len(nonhead_out_nodes) > 1:
1460
1471
  # not ready to be structured yet - do it later
1461
1472
  return False
1462
1473
 
1474
+ # check if structuring will create any dangling nodes
1475
+ for case_node in to_remove:
1476
+ if case_node is not node_default and case_node is not node_a and case_node is not head:
1477
+ for succ in graph.successors(case_node):
1478
+ if succ is not case_node and succ is not head and graph.in_degree[succ] == 1:
1479
+ # succ will be dangling - not ready to be structured yet - do it later
1480
+ return False
1481
+
1463
1482
  if node_default is not None:
1464
1483
  # the head no longer goes to the default case
1484
+ if graph.has_edge(head, node_default):
1485
+ pass
1465
1486
  graph.remove_edge(head, node_default)
1466
1487
  full_graph.remove_edge(head, node_default)
1467
1488
  else:
@@ -1505,6 +1526,13 @@ class PhoenixStructurer(StructurerBase):
1505
1526
  if full_graph.has_edge(head, out_dst):
1506
1527
  full_graph.remove_edge(head, out_dst)
1507
1528
 
1529
+ # fix full_graph if needed: remove successors that are no longer needed
1530
+ for out_src, out_dst in out_edges[1:]:
1531
+ if out_dst in full_graph and out_dst not in graph and full_graph.in_degree[out_dst] == 0:
1532
+ full_graph.remove_node(out_dst)
1533
+ if out_dst in self._region.successors:
1534
+ self._region.successors.remove(out_dst)
1535
+
1508
1536
  # remove the last statement (conditional jump) in the head node
1509
1537
  remove_last_statement(head)
1510
1538
 
@@ -1514,6 +1542,25 @@ class PhoenixStructurer(StructurerBase):
1514
1542
 
1515
1543
  return True
1516
1544
 
1545
+ @staticmethod
1546
+ def _find_case_and_entry_addrs(
1547
+ jump_head, graph, cmp_lb: int, jump_table
1548
+ ) -> dict[int, int | tuple[int, int | None]]:
1549
+ case_and_entry_addrs = {}
1550
+
1551
+ addr_to_entry_nodes = defaultdict(list)
1552
+ for succ in graph.successors(jump_head):
1553
+ addr_to_entry_nodes[succ.addr].append(succ)
1554
+
1555
+ for i, entry_addr in enumerate(jump_table.jumptable_entries):
1556
+ case_no = cmp_lb + i
1557
+ if entry_addr in addr_to_entry_nodes and isinstance(addr_to_entry_nodes[entry_addr][0], (MultiNode, Block)):
1558
+ case_and_entry_addrs[case_no] = entry_addr, addr_to_entry_nodes[entry_addr][0].idx
1559
+ else:
1560
+ case_and_entry_addrs[case_no] = entry_addr
1561
+
1562
+ return case_and_entry_addrs
1563
+
1517
1564
  # other acyclic schemas
1518
1565
 
1519
1566
  def _match_acyclic_sequence(self, graph, full_graph, start_node) -> bool:
@@ -1982,6 +2029,11 @@ class PhoenixStructurer(StructurerBase):
1982
2029
 
1983
2030
  if full_graph.in_degree[left] > 1 and full_graph.in_degree[right] == 1:
1984
2031
  left, right = right, left
2032
+
2033
+ # ensure left and right nodes are not the head of a switch-case construct
2034
+ if left in self.switch_case_known_heads or right in self.switch_case_known_heads:
2035
+ return None
2036
+
1985
2037
  if (
1986
2038
  self._is_sequential_statement_block(left)
1987
2039
  and full_graph.in_degree[left] == 1
@@ -2024,6 +2076,11 @@ class PhoenixStructurer(StructurerBase):
2024
2076
 
2025
2077
  if full_graph.in_degree[left] == 1 and full_graph.in_degree[right] == 2:
2026
2078
  left, right = right, left
2079
+
2080
+ # ensure left and right nodes are not the head of a switch-case construct
2081
+ if left in self.switch_case_known_heads or right in self.switch_case_known_heads:
2082
+ return None
2083
+
2027
2084
  if (
2028
2085
  self._is_sequential_statement_block(right)
2029
2086
  and full_graph.in_degree[left] == 2
@@ -2060,6 +2117,11 @@ class PhoenixStructurer(StructurerBase):
2060
2117
 
2061
2118
  if full_graph.in_degree[left] > 1 and full_graph.in_degree[successor] == 1:
2062
2119
  left, successor = successor, left
2120
+
2121
+ # ensure left and successor nodes are not the head of a switch-case construct
2122
+ if left in self.switch_case_known_heads or successor in self.switch_case_known_heads:
2123
+ return None
2124
+
2063
2125
  if (
2064
2126
  self._is_sequential_statement_block(left)
2065
2127
  and full_graph.in_degree[left] == 1
@@ -2103,6 +2165,11 @@ class PhoenixStructurer(StructurerBase):
2103
2165
 
2104
2166
  if full_graph.in_degree[left] > 1 and full_graph.in_degree[else_node] == 1:
2105
2167
  left, else_node = else_node, left
2168
+
2169
+ # ensure left and else nodes are not the head of a switch-case construct
2170
+ if left in self.switch_case_known_heads or else_node in self.switch_case_known_heads:
2171
+ return None
2172
+
2106
2173
  if (
2107
2174
  self._is_sequential_statement_block(left)
2108
2175
  and full_graph.in_degree[left] == 1
@@ -2563,3 +2630,36 @@ class PhoenixStructurer(StructurerBase):
2563
2630
  graph_with_str.add_edge(f'"{src!r}"', f'"{dst!r}"')
2564
2631
 
2565
2632
  networkx.drawing.nx_pydot.write_dot(graph_with_str, path)
2633
+
2634
+ @staticmethod
2635
+ def switch_case_entry_node_has_common_successor_case_1(graph, jump_table, case_nodes, node_pred) -> bool:
2636
+ all_succs = set()
2637
+ for case_node in case_nodes:
2638
+ if case_node is node_pred:
2639
+ continue
2640
+ if case_node.addr in jump_table.jumptable_entries:
2641
+ all_succs |= set(graph.successors(case_node))
2642
+
2643
+ case_node_successors = set()
2644
+ for case_node in case_nodes:
2645
+ if case_node is node_pred:
2646
+ continue
2647
+ if case_node in all_succs:
2648
+ continue
2649
+ if case_node.addr in jump_table.jumptable_entries:
2650
+ succs = set(graph.successors(case_node))
2651
+ case_node_successors |= {succ for succ in succs if succ.addr not in jump_table.jumptable_entries}
2652
+
2653
+ return len(case_node_successors) <= 1
2654
+
2655
+ @staticmethod
2656
+ def switch_case_entry_node_has_common_successor_case_2(graph, jump_table, case_nodes, node_pred) -> bool:
2657
+ case_node_successors = set()
2658
+ for case_node in case_nodes:
2659
+ if case_node is node_pred:
2660
+ continue
2661
+ if case_node.addr in jump_table.jumptable_entries:
2662
+ succs = set(graph.successors(case_node))
2663
+ case_node_successors |= {succ for succ in succs if succ.addr not in jump_table.jumptable_entries}
2664
+
2665
+ return len(case_node_successors) <= 1
@@ -83,7 +83,9 @@ class RecursiveStructurer(Analysis):
83
83
  # Get the parent region
84
84
  parent_region = parent_map.get(current_region)
85
85
  # structure this region
86
- st: StructurerBase = self.project.analyses[self.structurer_cls].prep()(
86
+ st: StructurerBase = self.project.analyses[self.structurer_cls].prep(
87
+ kb=self.kb, fail_fast=self._fail_fast
88
+ )(
87
89
  current_region.copy(),
88
90
  parent_map=parent_map,
89
91
  condition_processor=self.cond_proc,
@@ -18,6 +18,7 @@ from angr.analyses.decompiler.utils import (
18
18
  remove_last_statement,
19
19
  has_nonlabel_nonphi_statements,
20
20
  )
21
+ from angr.analyses.decompiler.label_collector import LabelCollector
21
22
  from .structurer_nodes import (
22
23
  MultiNode,
23
24
  SequenceNode,
@@ -800,9 +801,17 @@ class StructurerBase(Analysis):
800
801
  starting_case_ids.append(idx)
801
802
  continue
802
803
 
804
+ # we can't just collect addresses and block IDs of switch-case entry nodes because SequenceNode does not keep
805
+ # track of block IDs.
806
+ case_label_addrs = set()
807
+ for case_node in cases.values():
808
+ lc = LabelCollector(case_node)
809
+ for lst in lc.labels.values():
810
+ case_label_addrs |= set(lst)
811
+
803
812
  for idx in starting_case_ids:
804
813
  new_cases[idx] = cases[idx]
805
- self._remove_last_statement_if_jump(new_cases[idx])
814
+ self._remove_last_statement_if_jump_to_addr(new_cases[idx], case_label_addrs)
806
815
  succs = networkx.dfs_successors(graph, idx)
807
816
  idx_ = idx
808
817
  while idx_ in succs:
@@ -813,6 +822,29 @@ class StructurerBase(Analysis):
813
822
 
814
823
  return new_cases
815
824
 
825
+ @staticmethod
826
+ def _remove_last_statement_if_jump_to_addr(
827
+ node: BaseNode | ailment.Block, addr_and_ids: set[tuple[int, int | None]]
828
+ ) -> ailment.Stmt.Jump | ailment.Stmt.ConditionalJump | None:
829
+ try:
830
+ last_stmts = ConditionProcessor.get_last_statements(node)
831
+ except EmptyBlockNotice:
832
+ return None
833
+
834
+ if len(last_stmts) == 1 and isinstance(last_stmts[0], (ailment.Stmt.Jump, ailment.Stmt.ConditionalJump)):
835
+ last_stmt = last_stmts[0]
836
+ jump_targets = []
837
+ if isinstance(last_stmt, ailment.Stmt.Jump) and isinstance(last_stmt.target, ailment.Expr.Const):
838
+ jump_targets = [(last_stmt.target.value, last_stmt.target_idx)]
839
+ elif isinstance(last_stmt, ailment.Stmt.ConditionalJump):
840
+ if isinstance(last_stmt.true_target, ailment.Expr.Const):
841
+ jump_targets.append((last_stmt.true_target.value, last_stmt.true_target_idx))
842
+ if isinstance(last_stmt.false_target, ailment.Expr.Const):
843
+ jump_targets.append((last_stmt.false_target.value, last_stmt.false_target_idx))
844
+ if any(tpl in addr_and_ids for tpl in jump_targets):
845
+ return remove_last_statement(node)
846
+ return None
847
+
816
848
  @staticmethod
817
849
  def _remove_last_statement_if_jump(
818
850
  node: BaseNode | ailment.Block,
@@ -46,8 +46,8 @@ class LibcStringHandlers(FunctionHandler):
46
46
 
47
47
  @FunctionCallDataUnwrapped.decorate
48
48
  def handle_impl_strncpy(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
49
- n = state.get_concrete_value(data.args_atoms[1])
50
- src_atom = state.deref(data.args_atoms[2], DerefSize.NULL_TERMINATE if n is None else n)
49
+ n = state.get_concrete_value(data.args_atoms[2])
50
+ src_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE if n is None else n)
51
51
  src_str = state.get_values(src_atom)
52
52
  if src_str is not None:
53
53
  dst_atom = state.deref(data.args_atoms[0], len(src_str) // 8)