angr 9.2.166__cp310-abi3-manylinux_2_28_aarch64.whl → 9.2.167__cp310-abi3-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

@@ -13,7 +13,7 @@ from angr.ailment.block import Block
13
13
  from angr.ailment.statement import Statement, ConditionalJump, Jump, Label, Return
14
14
  from angr.ailment.expression import Const, UnaryOp, MultiStatementExpression, BinaryOp
15
15
 
16
- from angr.utils.graph import GraphUtils, Dominators, compute_dominance_frontier
16
+ from angr.utils.graph import GraphUtils
17
17
  from angr.utils.ail import is_phi_assignment, is_head_controlled_loop_block
18
18
  from angr.knowledge_plugins.cfg import IndirectJump, IndirectJumpType
19
19
  from angr.utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
@@ -24,6 +24,7 @@ from angr.analyses.decompiler.utils import (
24
24
  remove_last_statements,
25
25
  extract_jump_targets,
26
26
  switch_extract_cmp_bounds,
27
+ switch_extract_cmp_bounds_from_condition,
27
28
  is_empty_or_label_only_node,
28
29
  has_nonlabel_nonphi_statements,
29
30
  first_nonlabel_nonphi_statement,
@@ -31,6 +32,7 @@ from angr.analyses.decompiler.utils import (
31
32
  switch_extract_switch_expr_from_jump_target,
32
33
  )
33
34
  from angr.analyses.decompiler.counters.call_counter import AILCallCounter
35
+ from angr.analyses.decompiler.node_replacer import NodeReplacer
34
36
  from .structurer_nodes import (
35
37
  ConditionNode,
36
38
  SequenceNode,
@@ -71,6 +73,23 @@ class MultiStmtExprMode(str, Enum):
71
73
  MAX_ONE_CALL = "Only when less than one call"
72
74
 
73
75
 
76
+ class GraphEdgeFilter:
77
+ """
78
+ Filters away edges in a graph that are marked as deleted (outgoing-edges) during cyclic refinement.
79
+ """
80
+
81
+ def __init__(self, graph: networkx.DiGraph):
82
+ self.graph = graph
83
+
84
+ def __call__(self, src, dst) -> bool:
85
+ d = self.graph[src][dst]
86
+ return not d.get("cyclic_refinement_outgoing", False)
87
+
88
+
89
+ def _f(graph: networkx.DiGraph):
90
+ return networkx.subgraph_view(graph, filter_edge=GraphEdgeFilter(graph))
91
+
92
+
74
93
  class PhoenixStructurer(StructurerBase):
75
94
  """
76
95
  Structure a region using a structuring algorithm that is similar to the one in Phoenix decompiler (described in the
@@ -110,7 +129,7 @@ class PhoenixStructurer(StructurerBase):
110
129
  self.whitelist_edges: set[tuple[int, int]] = set()
111
130
  # also whitelist certain nodes that are definitely header for switch-case constructs. they should not be merged
112
131
  # into another node before we successfully structure the entire switch-case.
113
- self.switch_case_known_heads: set[Block] = set()
132
+ self.switch_case_known_heads: set[Block | BaseNode] = set()
114
133
 
115
134
  # whitelist certain nodes that should be treated as a tail node for do-whiles. these nodes should not be
116
135
  # absorbed into other SequenceNodes
@@ -126,6 +145,12 @@ class PhoenixStructurer(StructurerBase):
126
145
  self._improve_algorithm = improve_algorithm
127
146
  self._edge_virtualization_hints = []
128
147
 
148
+ # for each region, we only convert a switch-case head into an IncompleteSwitchCaseNode once. this is to avoid
149
+ # loops of creating and unpacking IncompleteSwitchCaseNode (when the entire switch-case construct is not yet
150
+ # ready to be structured, e.g., default node has a successor A and all case nodes have a successor B).
151
+ # TestDecompiler.test_decompiling_abnormal_switch_case_within_a_loop_with_redundant_jump captures this case.
152
+ self._matched_incomplete_switch_case_addrs: set[int] = set()
153
+
129
154
  # node_order keeps a dictionary of nodes and their order in a quasi-topological sort of the region full graph
130
155
  # (graph_with_successors). _generate_node_order() initializes this dictionary. we then update this dictionary
131
156
  # when new nodes are created. we do not populate this dictionary when working on acyclic graphs because it's
@@ -221,6 +246,7 @@ class PhoenixStructurer(StructurerBase):
221
246
  else:
222
247
  if pre_refinement_region is not None:
223
248
  # we could not make a loop after the last cycle refinement. restore the graph
249
+ l.debug("Could not structure the cyclic graph. Restoring the region to the pre-refinement state.")
224
250
  self._region = pre_refinement_region
225
251
 
226
252
  self.result = None # the actual result is in self._region.graph and self._region.graph_with_successors
@@ -230,7 +256,7 @@ class PhoenixStructurer(StructurerBase):
230
256
 
231
257
  if self._node_order is None:
232
258
  self._generate_node_order()
233
- acyclic_graph = to_acyclic_graph(self._region.graph, node_order=self._node_order)
259
+ acyclic_graph = to_acyclic_graph(_f(self._region.graph), node_order=self._node_order)
234
260
  for node in list(GraphUtils.dfs_postorder_nodes_deterministic(acyclic_graph, self._region.head)):
235
261
  if node not in self._region.graph:
236
262
  continue
@@ -281,10 +307,13 @@ class PhoenixStructurer(StructurerBase):
281
307
  self._rewrite_jumps_to_continues(loop_node.sequence_node)
282
308
  return True
283
309
 
284
- matched, loop_node = self._match_cyclic_natural_loop(node, head, graph, full_graph)
310
+ matched, loop_node, successor_node = self._match_cyclic_natural_loop(node, head, graph, full_graph)
285
311
  if matched:
286
312
  assert loop_node is not None
287
- if self._region.successors is not None and len(self._region.successors) == 1:
313
+ if successor_node is not None:
314
+ # traverse this node and rewrite all conditional jumps that go outside the loop to breaks
315
+ self._rewrite_conditional_jumps_to_breaks(loop_node.sequence_node, [successor_node.addr])
316
+ elif self._region.successors is not None and len(self._region.successors) == 1:
288
317
  # traverse this node and rewrite all conditional jumps that go outside the loop to breaks
289
318
  self._rewrite_conditional_jumps_to_breaks(
290
319
  loop_node.sequence_node, [succ.addr for succ in self._region.successors]
@@ -293,12 +322,16 @@ class PhoenixStructurer(StructurerBase):
293
322
  self._rewrite_jumps_to_continues(loop_node.sequence_node)
294
323
  return matched
295
324
 
296
- def _match_cyclic_while(self, node, head, graph, full_graph) -> tuple[bool, LoopNode | None, BaseNode | None]:
297
- succs = list(full_graph.successors(node))
325
+ def _match_cyclic_while(
326
+ self, node, head, graph_raw, full_graph_raw
327
+ ) -> tuple[bool, LoopNode | None, BaseNode | None]:
328
+ full_graph = _f(full_graph_raw)
329
+
330
+ succs = list(full_graph_raw.successors(node))
298
331
  if len(succs) == 2:
299
332
  left, right = succs
300
333
 
301
- if full_graph.has_edge(right, node) and not full_graph.has_edge(left, node):
334
+ if full_graph_raw.has_edge(right, node) and not full_graph_raw.has_edge(left, node):
302
335
  left, right = right, left
303
336
  if left is node:
304
337
  # self loop
@@ -332,54 +365,70 @@ class PhoenixStructurer(StructurerBase):
332
365
  # it's a while loop if the conditional jump (or the head block) is at the beginning of node
333
366
  loop_type = "while" if head_block_idx == 0 else "do-while"
334
367
  # otherwise it's a do-while loop
335
- if self.cond_proc.have_opposite_edge_conditions(full_graph, head_block, left, right):
368
+ if self.cond_proc.have_opposite_edge_conditions(full_graph_raw, head_block, left, right):
336
369
  # c = !c
337
- edge_cond_left = self.cond_proc.recover_edge_condition(full_graph, head_block, left)
370
+ edge_cond_left = self.cond_proc.recover_edge_condition(full_graph_raw, head_block, left)
338
371
  if head_block_idx == 0:
339
372
  self._remove_first_statement_if_jump(head_block)
340
373
  else:
341
374
  remove_last_statement(head_block)
342
375
  seq_node = SequenceNode(node.addr, nodes=[node]) if not isinstance(node, SequenceNode) else node
343
376
  loop_node = LoopNode(loop_type, edge_cond_left, seq_node, addr=seq_node.addr)
344
- self.replace_nodes(graph, node, loop_node, self_loop=False)
345
- self.replace_nodes(full_graph, node, loop_node, self_loop=False)
377
+ self.replace_nodes(graph_raw, node, loop_node, self_loop=False, drop_refinement_marks=True)
378
+ self.replace_nodes(
379
+ full_graph_raw,
380
+ node,
381
+ loop_node,
382
+ self_loop=False,
383
+ update_node_order=True,
384
+ drop_refinement_marks=True,
385
+ )
346
386
 
347
387
  # ensure the loop has only one successor: the right node
348
- self._remove_edges_except(graph, loop_node, right)
349
- self._remove_edges_except(full_graph, loop_node, right)
388
+ self._remove_edges_except(graph_raw, loop_node, right)
389
+ self._remove_edges_except(full_graph_raw, loop_node, right)
350
390
 
351
391
  return True, loop_node, right
352
392
  elif (
353
- full_graph.has_edge(left, node)
393
+ full_graph_raw.has_edge(left, node)
354
394
  and left is not head
355
- and full_graph.in_degree[left] == 1
395
+ and full_graph_raw.in_degree[left] == 1
356
396
  and full_graph.out_degree[left] == 1
357
- and not full_graph.has_edge(right, node)
397
+ and not full_graph_raw.has_edge(right, node)
358
398
  ):
359
399
  # possible candidate
360
400
  _, _, head_block = self._find_node_going_to_dst(node, left, condjump_only=True)
361
401
  if head_block is not None:
362
- if self.cond_proc.have_opposite_edge_conditions(full_graph, head_block, left, right):
402
+ if self.cond_proc.have_opposite_edge_conditions(full_graph_raw, head_block, left, right):
363
403
  # c = !c
364
404
  if PhoenixStructurer._is_single_statement_block(node):
365
405
  # the single-statement-block check is to ensure we don't execute any code before the
366
406
  # conditional jump. this way the entire node can be dropped.
367
- edge_cond_left = self.cond_proc.recover_edge_condition(full_graph, head_block, left)
407
+ edge_cond_left = self.cond_proc.recover_edge_condition(full_graph_raw, head_block, left)
368
408
  new_node = SequenceNode(node.addr, nodes=[left])
369
409
  loop_node = LoopNode("while", edge_cond_left, new_node, addr=node.addr)
370
410
 
371
411
  # on the original graph
372
- self.replace_nodes(graph, node, loop_node, old_node_1=left, self_loop=False)
412
+ self.replace_nodes(
413
+ graph_raw, node, loop_node, old_node_1=left, self_loop=False, drop_refinement_marks=True
414
+ )
373
415
  # on the graph with successors
374
- self.replace_nodes(full_graph, node, loop_node, old_node_1=left, self_loop=False)
416
+ self.replace_nodes(
417
+ full_graph_raw,
418
+ node,
419
+ loop_node,
420
+ old_node_1=left,
421
+ self_loop=False,
422
+ update_node_order=True,
423
+ )
375
424
 
376
425
  # ensure the loop has only one successor: the right node
377
- self._remove_edges_except(graph, loop_node, right)
378
- self._remove_edges_except(full_graph, loop_node, right)
426
+ self._remove_edges_except(graph_raw, loop_node, right)
427
+ self._remove_edges_except(full_graph_raw, loop_node, right)
379
428
 
380
429
  return True, loop_node, right
381
430
  # we generate a while-true loop instead
382
- edge_cond_right = self.cond_proc.recover_edge_condition(full_graph, head_block, right)
431
+ edge_cond_right = self.cond_proc.recover_edge_condition(full_graph_raw, head_block, right)
383
432
  last_stmt = self._remove_last_statement_if_jump(head_block)
384
433
  assert last_stmt is not None
385
434
  cond_jump = Jump(
@@ -394,13 +443,23 @@ class PhoenixStructurer(StructurerBase):
394
443
  loop_node = LoopNode("while", claripy.true(), new_node, addr=node.addr)
395
444
 
396
445
  # on the original graph
397
- self.replace_nodes(graph, node, loop_node, old_node_1=left, self_loop=False)
446
+ self.replace_nodes(
447
+ graph_raw, node, loop_node, old_node_1=left, self_loop=False, drop_refinement_marks=True
448
+ )
398
449
  # on the graph with successors
399
- self.replace_nodes(full_graph, node, loop_node, old_node_1=left, self_loop=False)
450
+ self.replace_nodes(
451
+ full_graph_raw,
452
+ node,
453
+ loop_node,
454
+ old_node_1=left,
455
+ self_loop=False,
456
+ update_node_order=True,
457
+ drop_refinement_marks=True,
458
+ )
400
459
 
401
460
  # ensure the loop has only one successor: the right node
402
- self._remove_edges_except(graph, loop_node, right)
403
- self._remove_edges_except(full_graph, loop_node, right)
461
+ self._remove_edges_except(graph_raw, loop_node, right)
462
+ self._remove_edges_except(full_graph_raw, loop_node, right)
404
463
 
405
464
  return True, loop_node, right
406
465
 
@@ -417,26 +476,39 @@ class PhoenixStructurer(StructurerBase):
417
476
  loop_node = LoopNode("while", claripy.true(), new_node, addr=node.addr)
418
477
 
419
478
  # on the original graph
420
- self.replace_nodes(graph, node, loop_node, old_node_1=left, self_loop=False)
479
+ self.replace_nodes(
480
+ graph_raw, node, loop_node, old_node_1=left, self_loop=False, drop_refinement_marks=True
481
+ )
421
482
  # on the graph with successors
422
- self.replace_nodes(full_graph, node, loop_node, old_node_1=left, self_loop=False)
483
+ self.replace_nodes(
484
+ full_graph_raw,
485
+ node,
486
+ loop_node,
487
+ old_node_1=left,
488
+ self_loop=False,
489
+ update_node_order=True,
490
+ drop_refinement_marks=True,
491
+ )
423
492
 
424
493
  # ensure the loop has only one successor: the right node
425
- self._remove_edges_except(graph, loop_node, right)
426
- self._remove_edges_except(full_graph, loop_node, right)
494
+ self._remove_edges_except(graph_raw, loop_node, right)
495
+ self._remove_edges_except(full_graph_raw, loop_node, right)
427
496
 
428
497
  return True, loop_node, right
429
498
 
430
499
  return False, None, None
431
500
 
432
501
  def _match_cyclic_while_with_single_successor(
433
- self, node, head, graph, full_graph
502
+ self, node, head, graph_raw, full_graph_raw
434
503
  ) -> tuple[bool, LoopNode | None, BaseNode | None]:
435
504
  if self._region.successors:
436
505
  return False, None, None
437
506
  if node is not head:
438
507
  return False, None, None
439
508
 
509
+ full_graph = full_graph_raw
510
+ graph = graph_raw
511
+
440
512
  if not (node is head or graph.in_degree[node] == 2):
441
513
  return False, None, None
442
514
 
@@ -497,16 +569,18 @@ class PhoenixStructurer(StructurerBase):
497
569
  # on the original graph
498
570
  for node_ in seq_node.nodes:
499
571
  if node_ is not node_copy:
500
- graph.remove_node(node_)
501
- self.replace_nodes(graph, node, loop_node, self_loop=False)
502
- graph.add_edge(loop_node, successor_node)
572
+ graph_raw.remove_node(node_)
573
+ self.replace_nodes(graph_raw, node, loop_node, self_loop=False, drop_refinement_marks=True)
574
+ graph_raw.add_edge(loop_node, successor_node)
503
575
 
504
576
  # on the graph with successors
505
577
  for node_ in seq_node.nodes:
506
578
  if node_ is not node_copy:
507
- full_graph.remove_node(node_)
508
- self.replace_nodes(full_graph, node, loop_node, self_loop=False)
509
- full_graph.add_edge(loop_node, successor_node)
579
+ full_graph_raw.remove_node(node_)
580
+ self.replace_nodes(
581
+ full_graph_raw, node, loop_node, self_loop=False, update_node_order=True, drop_refinement_marks=True
582
+ )
583
+ full_graph_raw.add_edge(loop_node, successor_node)
510
584
 
511
585
  if self._node_order is not None:
512
586
  self._node_order[loop_node] = self._node_order[node]
@@ -523,10 +597,16 @@ class PhoenixStructurer(StructurerBase):
523
597
  return False
524
598
  return all(isinstance(stmt, Return) for stmt in last_stmts)
525
599
 
526
- def _match_cyclic_dowhile(self, node, head, graph, full_graph) -> tuple[bool, LoopNode | None, BaseNode | None]:
527
- preds = list(full_graph.predecessors(node))
600
+ def _match_cyclic_dowhile(
601
+ self, node, head, graph_raw, full_graph_raw
602
+ ) -> tuple[bool, LoopNode | None, BaseNode | None]:
603
+ full_graph = _f(full_graph_raw)
604
+
605
+ preds_raw = list(full_graph_raw.predecessors(node))
606
+ succs_raw = list(full_graph_raw.successors(node))
528
607
  succs = list(full_graph.successors(node))
529
- if ((node is head and len(preds) >= 1) or len(preds) >= 2) and len(succs) == 1:
608
+
609
+ if ((node is head and len(preds_raw) >= 1) or len(preds_raw) >= 2) and len(succs) == 1:
530
610
  succ = succs[0]
531
611
  succ_preds = list(full_graph.predecessors(succ))
532
612
  succ_succs = list(full_graph.successors(succ))
@@ -534,7 +614,9 @@ class PhoenixStructurer(StructurerBase):
534
614
  succ_succs.remove(node)
535
615
  out_node = succ_succs[0]
536
616
 
537
- if full_graph.has_edge(succ, node):
617
+ if (len(succs_raw) == 1 or (len(succs_raw) == 2 and out_node in succs_raw)) and full_graph.has_edge(
618
+ succ, node
619
+ ):
538
620
  # possible candidate
539
621
  _, _, succ_block = self._find_node_going_to_dst(succ, out_node, condjump_only=True)
540
622
  if succ_block is not None:
@@ -569,12 +651,22 @@ class PhoenixStructurer(StructurerBase):
569
651
  loop_node = LoopNode("do-while", edge_cond_succhead, new_node, addr=node.addr)
570
652
 
571
653
  # on the original graph
572
- self.replace_nodes(graph, node, loop_node, old_node_1=succ, self_loop=False)
654
+ self.replace_nodes(
655
+ graph_raw, node, loop_node, old_node_1=succ, self_loop=False, drop_refinement_marks=True
656
+ )
573
657
  # on the graph with successors
574
- self.replace_nodes(full_graph, node, loop_node, old_node_1=succ, self_loop=False)
658
+ self.replace_nodes(
659
+ full_graph_raw,
660
+ node,
661
+ loop_node,
662
+ old_node_1=succ,
663
+ self_loop=False,
664
+ update_node_order=True,
665
+ drop_refinement_marks=True,
666
+ )
575
667
 
576
668
  return True, loop_node, out_node
577
- elif ((node is head and len(preds) >= 1) or len(preds) >= 2) and len(succs) == 2 and node in succs:
669
+ elif ((node is head and len(preds_raw) >= 1) or len(preds_raw) >= 2) and len(succs) == 2 and node in succs:
578
670
  # head forms a self-loop
579
671
  succs.remove(node)
580
672
  succ = succs[0]
@@ -588,60 +680,98 @@ class PhoenixStructurer(StructurerBase):
588
680
  loop_node = LoopNode("do-while", edge_cond_head, seq_node, addr=seq_node.addr)
589
681
 
590
682
  # on the original graph
591
- self.replace_nodes(graph, node, loop_node, self_loop=False)
683
+ self.replace_nodes(graph_raw, node, loop_node, self_loop=False, drop_refinement_marks=True)
592
684
  # on the graph with successors
593
- self.replace_nodes(full_graph, node, loop_node, self_loop=False)
685
+ self.replace_nodes(
686
+ full_graph_raw,
687
+ node,
688
+ loop_node,
689
+ self_loop=False,
690
+ update_node_order=True,
691
+ drop_refinement_marks=True,
692
+ )
594
693
 
595
694
  return True, loop_node, succ
596
695
  return False, None, None
597
696
 
598
- def _match_cyclic_natural_loop(self, node, head, graph, full_graph) -> tuple[bool, LoopNode | None]:
697
+ def _match_cyclic_natural_loop(
698
+ self, node, head, graph_raw, full_graph_raw
699
+ ) -> tuple[bool, LoopNode | None, BaseNode | None]:
700
+
701
+ full_graph = _f(full_graph_raw)
702
+ graph = _f(graph_raw)
703
+
599
704
  if not (node is head or graph.in_degree[node] == 2):
600
- return False, None
705
+ return False, None, None
601
706
 
602
707
  # check if there is a cycle that starts with node and ends with node
603
708
  next_node = node
604
709
  seq_node = SequenceNode(node.addr, nodes=[node])
605
710
  seen_nodes = set()
711
+ loop_successor_candidates = set()
606
712
  while True:
607
- succs = list(graph.successors(next_node))
713
+ succs = list(full_graph.successors(next_node))
608
714
  if len(succs) != 1:
609
- return False, None
715
+ return False, None, None
610
716
  if full_graph.out_degree[next_node] > 1:
611
717
  # all successors in the full graph should have been refined away at this point
612
- return False, None
718
+ return False, None, None
719
+
720
+ if full_graph_raw.out_degree[next_node] > 1:
721
+ for _, raw_succ, edge_data in full_graph_raw.out_edges(next_node, data=True):
722
+ if raw_succ is succs[0]:
723
+ continue
724
+ if edge_data.get("cyclic_refinement_outgoing", False) is True:
725
+ loop_successor_candidates.add(raw_succ)
726
+ else:
727
+ # bad node found
728
+ return False, None, None
729
+
613
730
  next_node = succs[0]
614
731
 
615
732
  if next_node is node:
616
733
  break
617
734
  if next_node is head:
618
735
  # we don't want a loop with region head not as the first node of the body!
619
- return False, None
736
+ return False, None, None
620
737
  if next_node is not node and next_node in seen_nodes:
621
- return False, None
738
+ return False, None, None
622
739
 
623
740
  seen_nodes.add(next_node)
624
741
  seq_node.nodes.append(next_node)
625
742
 
743
+ if len(loop_successor_candidates) > 1:
744
+ return False, None, None
745
+
626
746
  loop_node = LoopNode("while", claripy.true(), seq_node, addr=node.addr)
627
747
 
628
748
  # on the original graph
629
749
  for node_ in seq_node.nodes:
630
750
  if node_ is not node:
631
- graph.remove_node(node_)
632
- self.replace_nodes(graph, node, loop_node, self_loop=False)
751
+ graph_raw.remove_node(node_)
752
+ self.replace_nodes(graph_raw, node, loop_node, self_loop=False, drop_refinement_marks=True)
633
753
 
634
754
  # on the graph with successors
635
755
  for node_ in seq_node.nodes:
636
756
  if node_ is not node:
637
- full_graph.remove_node(node_)
638
- self.replace_nodes(full_graph, node, loop_node, self_loop=False)
757
+ full_graph_raw.remove_node(node_)
758
+ self.replace_nodes(
759
+ full_graph_raw, node, loop_node, self_loop=False, update_node_order=True, drop_refinement_marks=True
760
+ )
761
+
762
+ successor = None if not loop_successor_candidates else next(iter(loop_successor_candidates))
763
+ if successor is not None:
764
+ if successor in graph:
765
+ graph_raw.add_edge(loop_node, successor)
766
+ if successor in full_graph:
767
+ full_graph_raw.add_edge(loop_node, successor)
639
768
 
640
- return True, loop_node
769
+ return True, loop_node, successor
641
770
 
642
771
  def _refine_cyclic(self) -> bool:
643
- loop_heads = {t for _, t in dfs_back_edges(self._region.graph, self._region.head)}
644
- sorted_loop_heads = GraphUtils.quasi_topological_sort_nodes(self._region.graph, nodes=list(loop_heads))
772
+ graph = _f(self._region.graph)
773
+ loop_heads = {t for _, t in dfs_back_edges(graph, self._region.head, visit_all_nodes=True)}
774
+ sorted_loop_heads = GraphUtils.quasi_topological_sort_nodes(graph, nodes=list(loop_heads))
645
775
 
646
776
  for head in sorted_loop_heads:
647
777
  l.debug("... refining cyclic at %r", head)
@@ -654,13 +784,16 @@ class PhoenixStructurer(StructurerBase):
654
784
  return False
655
785
 
656
786
  def _refine_cyclic_core(self, loop_head) -> bool:
657
- graph: networkx.DiGraph = self._region.graph
658
- fullgraph: networkx.DiGraph = (
787
+ graph_raw: networkx.DiGraph = self._region.graph
788
+ fullgraph_raw: networkx.DiGraph = (
659
789
  self._region.graph_with_successors
660
790
  if self._region.graph_with_successors is not None
661
791
  else networkx.DiGraph(self._region.graph)
662
792
  )
663
793
 
794
+ graph = _f(graph_raw)
795
+ fullgraph = _f(fullgraph_raw)
796
+
664
797
  # check if there is an out-going edge from the loop head
665
798
  head_succs = list(fullgraph.successors(loop_head))
666
799
  successor = None # the loop successor
@@ -710,15 +843,26 @@ class PhoenixStructurer(StructurerBase):
710
843
  continue_edges, outgoing_edges, successor = result_natural
711
844
 
712
845
  if outgoing_edges:
713
- # if there is a single successor, we convert all out-going edges into breaks;
846
+ # if there is a single successor, we convert all but the first one out-going edges into breaks;
714
847
  # if there are multiple successors, and if the current region does not have a parent region, then we
715
- # convert all out-going edges into gotos;
848
+ # convert all but the first successor-targeting out-going edges into gotos;
716
849
  # otherwise we give up.
717
850
 
718
851
  if self._parent_region is not None and len({dst for _, dst in outgoing_edges}) > 1:
719
852
  # give up because there is a parent region
720
853
  return False
721
854
 
855
+ # sanity check: if removing outgoing edges would create dangling nodes, then it means we are not ready for
856
+ # cyclic refinement yet.
857
+ outgoing_edges_by_dst = defaultdict(list)
858
+ for src, dst in outgoing_edges:
859
+ outgoing_edges_by_dst[dst].append(src)
860
+ for dst, srcs in outgoing_edges_by_dst.items():
861
+ if dst in graph and graph.in_degree[dst] == len(srcs):
862
+ return False
863
+
864
+ outgoing_edges = sorted(outgoing_edges, key=lambda edge: (edge[0].addr, edge[1].addr))
865
+
722
866
  if successor is None:
723
867
  successor_and_edgecounts = defaultdict(int)
724
868
  for _, dst in outgoing_edges:
@@ -741,7 +885,7 @@ class PhoenixStructurer(StructurerBase):
741
885
  # block in src may not be the actual block that has a direct jump or a conditional jump to dst. as
742
886
  # a result, we should walk all blocks in src to find the jump to dst, then extract the condition
743
887
  # and augment the corresponding block with a ConditionalBreak.
744
- _, src_parent, src_block = self._find_node_going_to_dst(src, dst)
888
+ _, _, src_block = self._find_node_going_to_dst(src, dst)
745
889
  if src_block is None:
746
890
  l.warning(
747
891
  "Cannot find the source block jumping to the destination block at %#x. "
@@ -749,11 +893,15 @@ class PhoenixStructurer(StructurerBase):
749
893
  dst.addr,
750
894
  )
751
895
  # remove the edge anyway
752
- fullgraph.remove_edge(src, dst)
896
+ fullgraph_raw[src][dst]["cyclic_refinement_outgoing"] = True
897
+ if graph.has_edge(src, dst):
898
+ graph_raw[src][dst]["cyclic_refinement_outgoing"] = True
753
899
  elif not isinstance(src_block, (Block, MultiNode)):
754
900
  # it has probably been structured into BreakNode or ConditionalBreakNode
755
901
  # just remove the edge
756
- fullgraph.remove_edge(src, dst)
902
+ fullgraph_raw[src][dst]["cyclic_refinement_outgoing"] = True
903
+ if graph.has_edge(src, dst):
904
+ graph_raw[src][dst]["cyclic_refinement_outgoing"] = True
757
905
  else:
758
906
  has_continue = False
759
907
  # at the same time, examine if there is an edge that goes from src to the continue node. if so,
@@ -788,14 +936,28 @@ class PhoenixStructurer(StructurerBase):
788
936
  ins_addr=last_src_stmt.ins_addr,
789
937
  )
790
938
  break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
939
+ fallthrough_node = next(iter(succ for succ in fullgraph.successors(src) if succ is not dst))
940
+ fallthrough_stmt = Jump(
941
+ None,
942
+ Const(None, None, fallthrough_node.addr, self.project.arch.bits),
943
+ target_idx=successor.idx if isinstance(successor, Block) else None,
944
+ ins_addr=last_src_stmt.ins_addr,
945
+ )
946
+ break_node_inner_fallthrough = Block(
947
+ last_src_stmt.ins_addr, None, statements=[fallthrough_stmt]
948
+ )
791
949
  break_node = ConditionNode(
792
950
  last_src_stmt.ins_addr,
793
951
  None,
794
952
  break_cond,
795
953
  break_node_inner,
954
+ false_node=break_node_inner_fallthrough,
796
955
  )
797
- new_node = SequenceNode(src_block.addr, nodes=[src_block, break_node])
956
+ new_src_block = self._copy_and_remove_last_statement_if_jump(src_block)
957
+ new_node = SequenceNode(src_block.addr, nodes=[new_src_block, break_node])
798
958
  if has_continue:
959
+ assert continue_node is not None
960
+
799
961
  if continue_node.addr is not None and self.is_a_jump_target(
800
962
  last_src_stmt, continue_node.addr
801
963
  ):
@@ -828,32 +990,30 @@ class PhoenixStructurer(StructurerBase):
828
990
  # we don't handle it here.
829
991
  pass
830
992
 
831
- self._remove_last_statement_if_jump(src_block)
832
- fullgraph.remove_edge(src, dst)
833
- if src_parent is not None:
834
- # replace the node in its parent node
835
- self.replace_node_in_node(src_parent, src_block, new_node)
836
- else:
837
- # directly replace the node in graph
838
- self.replace_nodes(graph, src, new_node)
839
- self.replace_nodes(fullgraph, src, new_node)
840
- if src is loop_head:
841
- loop_head = new_node
842
- if src is continue_node:
843
- continue_node = new_node
844
-
845
- self._replace_node_in_edge_list(outgoing_edges, src_block, new_node)
846
- self._replace_node_in_edge_list(continue_edges, src_block, new_node)
847
-
848
- # remove the last jump or conditional jump in src_block
849
- self._remove_last_statement_if_jump(src_block)
993
+ # we cannot modify the original src_block because loop refinement may fail and we must restore
994
+ # the original graph
995
+ new_src = NodeReplacer(src, {src_block: new_node}).result
996
+ if graph.has_edge(src, dst):
997
+ graph_raw[src][dst]["cyclic_refinement_outgoing"] = True
998
+ self.replace_nodes(graph_raw, src, new_src)
999
+ fullgraph_raw[src][dst]["cyclic_refinement_outgoing"] = True
1000
+ self.replace_nodes(fullgraph_raw, src, new_src, update_node_order=True)
1001
+ if src is loop_head:
1002
+ loop_head = new_src
1003
+ if src is continue_node:
1004
+ continue_node = new_src
1005
+
1006
+ self._replace_node_in_edge_list(outgoing_edges, src, new_src)
1007
+ self._replace_node_in_edge_list(continue_edges, src, new_src)
850
1008
 
851
1009
  else:
852
1010
  self.virtualized_edges.add((src, dst))
853
- fullgraph.remove_edge(src, dst)
1011
+ fullgraph_raw.remove_edge(src, dst)
1012
+ if graph.has_edge(src, dst):
1013
+ graph_raw.remove_edge(src, dst)
854
1014
  if fullgraph.in_degree[dst] == 0:
855
1015
  # drop this node
856
- fullgraph.remove_node(dst)
1016
+ fullgraph_raw.remove_node(dst)
857
1017
  if self._region.successors and dst in self._region.successors:
858
1018
  self._region.successors.remove(dst)
859
1019
 
@@ -868,10 +1028,12 @@ class PhoenixStructurer(StructurerBase):
868
1028
  for src, _ in continue_edges:
869
1029
  if src is src_to_ignore:
870
1030
  # this edge will be handled during loop structuring
1031
+ # mark it regardless
871
1032
  continue
872
1033
 
873
1034
  # due to prior structuring of sub regions, the continue node may already be a Jump statement deep in
874
1035
  # src at this point. we need to find the Jump statement and replace it.
1036
+ assert continue_node is not None
875
1037
  _, _, cont_block = self._find_node_going_to_dst(src, continue_node)
876
1038
  if cont_block is None:
877
1039
  # cont_block is not found. but it's ok. one possibility is that src is a jump table head with one
@@ -884,12 +1046,12 @@ class PhoenixStructurer(StructurerBase):
884
1046
  continue_node,
885
1047
  )
886
1048
  if graph.has_edge(src, continue_node):
887
- graph.remove_edge(src, continue_node)
888
- fullgraph.remove_edge(src, continue_node)
1049
+ graph_raw.remove_edge(src, continue_node)
1050
+ fullgraph_raw.remove_edge(src, continue_node)
889
1051
  else:
890
1052
  # remove the edge.
891
- graph.remove_edge(src, continue_node)
892
- fullgraph.remove_edge(src, continue_node)
1053
+ graph_raw.remove_edge(src, continue_node)
1054
+ fullgraph_raw.remove_edge(src, continue_node)
893
1055
  # replace it with the original node plus the continue node
894
1056
  try:
895
1057
  last_stmt = self.cond_proc.get_last_statement(cont_block)
@@ -917,40 +1079,83 @@ class PhoenixStructurer(StructurerBase):
917
1079
  elif isinstance(last_stmt, Jump):
918
1080
  new_cont_node = ContinueNode(last_stmt.ins_addr, continue_node.addr)
919
1081
 
920
- if new_cont_node is not None:
921
- self._remove_last_statement_if_jump(cont_block)
922
- new_node = SequenceNode(src.addr, nodes=[src, new_cont_node])
923
- self.replace_nodes(graph, src, new_node)
924
- self.replace_nodes(fullgraph, src, new_node)
1082
+ if new_cont_node is not None and isinstance(cont_block, (Block, MultiNode)):
1083
+ new_cont_block = self._copy_and_remove_last_statement_if_jump(cont_block)
1084
+ new_node = NodeReplacer(src, {cont_block: new_cont_block}).result
1085
+ new_src = SequenceNode(new_node.addr, nodes=[new_node, new_cont_node])
1086
+ self.replace_nodes(graph_raw, src, new_src)
1087
+ self.replace_nodes(fullgraph_raw, src, new_src, update_node_order=True)
925
1088
 
926
1089
  if loop_type == "do-while":
927
1090
  self.dowhile_known_tail_nodes.add(continue_node)
928
1091
 
929
1092
  return bool(outgoing_edges or len(continue_edges) > 1)
930
1093
 
1094
+ @staticmethod
1095
+ def _refine_cyclic_determine_loop_body(graph, fullgraph, loop_head, successor=None) -> set[BaseNode]:
1096
+ # determine the loop body: all nodes that have paths going to loop_head
1097
+ loop_body = {loop_head}
1098
+ for node in networkx.descendants(fullgraph, loop_head):
1099
+ if node in graph and networkx.has_path(graph, node, loop_head):
1100
+ loop_body.add(node)
1101
+
1102
+ # extend the loop body if possible
1103
+ while True:
1104
+ loop_body_updated = False
1105
+ for node in list(loop_body):
1106
+ new_nodes = set()
1107
+ succ_not_in_loop_body = False
1108
+ for succ in fullgraph.successors(node):
1109
+ if successor is not None and succ is successor:
1110
+ continue
1111
+ if succ not in loop_body and succ in graph and fullgraph.out_degree[succ] <= 1:
1112
+ if all(pred in loop_body for pred in fullgraph.predecessors(succ)):
1113
+ new_nodes.add(succ)
1114
+ else:
1115
+ # one of the predecessors of this successor is not in the loop body
1116
+ succ_not_in_loop_body = True
1117
+ if new_nodes and not succ_not_in_loop_body:
1118
+ loop_body |= new_nodes
1119
+ loop_body_updated = True
1120
+ if not loop_body_updated:
1121
+ break
1122
+
1123
+ return loop_body
1124
+
1125
+ @staticmethod
1126
+ def _refine_cyclic_is_while_loop_check_loop_head_successors(graph, head_succs) -> tuple[bool, Any]:
1127
+ assert len(head_succs) == 2
1128
+ a, b = head_succs
1129
+ a_in_graph = a in graph
1130
+ b_in_graph = b in graph
1131
+ if a_in_graph ^ b_in_graph:
1132
+ return True, b if a_in_graph else a
1133
+ return False, None
1134
+
931
1135
  def _refine_cyclic_is_while_loop(
932
1136
  self, graph, fullgraph, loop_head, head_succs
933
1137
  ) -> tuple[bool, tuple[list, list, BaseNode, BaseNode] | None]:
934
- if len(head_succs) == 2 and any(head_succ not in graph for head_succ in head_succs):
935
- # make sure the head_pred is not already structured
936
- _, _, head_block_0 = self._find_node_going_to_dst(loop_head, head_succs[0])
937
- _, _, head_block_1 = self._find_node_going_to_dst(loop_head, head_succs[1])
938
- if head_block_0 is head_block_1 and head_block_0 is not None:
939
- # there is an out-going edge from the loop head
940
- # virtualize all other edges
941
- continue_edges: list[tuple[BaseNode, BaseNode]] = []
942
- outgoing_edges = []
943
- successor = next(iter(head_succ for head_succ in head_succs if head_succ not in graph))
944
- for node in networkx.descendants(graph, loop_head):
945
- succs = list(fullgraph.successors(node))
946
- if loop_head in succs:
947
- continue_edges.append((node, loop_head))
948
-
949
- outside_succs = [succ for succ in succs if succ not in graph]
950
- for outside_succ in outside_succs:
951
- outgoing_edges.append((node, outside_succ))
952
-
953
- return True, (continue_edges, outgoing_edges, loop_head, successor)
1138
+ if len(head_succs) == 2:
1139
+ r, successor = self._refine_cyclic_is_while_loop_check_loop_head_successors(graph, head_succs)
1140
+ if r:
1141
+ # make sure the head_pred is not already structured
1142
+ _, _, head_block_0 = self._find_node_going_to_dst(loop_head, head_succs[0])
1143
+ _, _, head_block_1 = self._find_node_going_to_dst(loop_head, head_succs[1])
1144
+ if head_block_0 is head_block_1 and head_block_0 is not None:
1145
+ # there is an out-going edge from the loop head
1146
+ # virtualize all other edges
1147
+ continue_edges: list[tuple[BaseNode, BaseNode]] = []
1148
+ outgoing_edges = []
1149
+ # note that because we have determined that the loop is a while loop, outgoing_edges do not contain
1150
+ # edges that go from the loop head to the successor.
1151
+ for node in list(networkx.descendants(graph, loop_head)):
1152
+ succs = list(fullgraph.successors(node))
1153
+ if loop_head in succs:
1154
+ continue_edges.append((node, loop_head))
1155
+ outside_succs = [succ for succ in succs if succ not in graph]
1156
+ for outside_succ in outside_succs:
1157
+ outgoing_edges.append((node, outside_succ))
1158
+ return True, (continue_edges, outgoing_edges, loop_head, successor)
954
1159
  return False, None
955
1160
 
956
1161
  def _refine_cyclic_is_dowhile_loop(
@@ -961,69 +1166,52 @@ class PhoenixStructurer(StructurerBase):
961
1166
  if len(head_preds) == 1:
962
1167
  head_pred = head_preds[0]
963
1168
  head_pred_succs = list(fullgraph.successors(head_pred))
964
- if len(head_pred_succs) == 2 and any(nn not in graph for nn in head_pred_succs):
1169
+ if len(head_pred_succs) == 2:
1170
+ successor = next(iter(nn for nn in head_pred_succs if nn is not loop_head))
965
1171
  # make sure the head_pred is not already structured
966
- _, _, src_block_0 = self._find_node_going_to_dst(head_pred, head_pred_succs[0])
967
- _, _, src_block_1 = self._find_node_going_to_dst(head_pred, head_pred_succs[1])
1172
+ _, _, src_block_0 = self._find_node_going_to_dst(head_pred, loop_head)
1173
+ _, _, src_block_1 = self._find_node_going_to_dst(head_pred, successor)
968
1174
  if src_block_0 is src_block_1 and src_block_0 is not None:
969
1175
  continue_edges: list[tuple[BaseNode, BaseNode]] = []
970
1176
  outgoing_edges = []
971
1177
  # there is an out-going edge from the loop tail
972
1178
  # virtualize all other edges
973
- successor = next(iter(nn for nn in head_pred_succs if nn not in graph))
974
1179
  continue_node = head_pred
975
- for node in networkx.descendants(graph, loop_head):
1180
+ loop_body = PhoenixStructurer._refine_cyclic_determine_loop_body(
1181
+ graph, fullgraph, loop_head, successor=successor
1182
+ )
1183
+ for node in loop_body:
976
1184
  if node is head_pred:
977
1185
  continue
978
1186
  succs = list(fullgraph.successors(node))
979
1187
  if head_pred in succs:
980
1188
  continue_edges.append((node, head_pred))
981
1189
 
982
- outside_succs = [succ for succ in succs if succ not in graph]
1190
+ outside_succs = [succ for succ in succs if succ not in loop_body]
983
1191
  for outside_succ in outside_succs:
984
1192
  outgoing_edges.append((node, outside_succ))
985
1193
 
986
1194
  return True, (continue_edges, outgoing_edges, continue_node, successor)
987
1195
  return False, None
988
1196
 
989
- def _refine_cyclic_make_natural_loop(
990
- self, graph, fullgraph, loop_head
991
- ) -> tuple[bool, tuple[list, list, Any] | None]:
1197
+ @staticmethod
1198
+ def _refine_cyclic_make_natural_loop(graph, fullgraph, loop_head) -> tuple[bool, tuple[list, list, Any] | None]:
992
1199
  continue_edges = []
993
1200
  outgoing_edges = []
994
1201
 
995
- # find dominance frontier
996
- doms = Dominators(fullgraph, self._region.head)
997
- dom_frontiers = compute_dominance_frontier(fullgraph, doms.dom)
1202
+ loop_body = PhoenixStructurer._refine_cyclic_determine_loop_body(graph, fullgraph, loop_head)
998
1203
 
999
- if loop_head not in dom_frontiers:
1000
- return False, None
1001
- dom_frontier = dom_frontiers[loop_head]
1002
-
1003
- # now this is a little complex
1004
- dom_frontier = {node for node in dom_frontier if node is not loop_head}
1005
- if len(dom_frontier) == 0:
1006
- # the dominance frontier is empty (the loop head dominates all nodes in the full graph). however, this does
1007
- # not mean that the loop head must dominate all the nodes, because we only have a limited view of the full
1008
- # graph (e.g., some predecessors of the successor may not be in this full graph). as such, successors are
1009
- # the ones that are in the fullgraph but not in the graph.
1010
- successor_candidates = set()
1011
- for node in networkx.descendants(graph, loop_head):
1012
- for succ in fullgraph.successors(node):
1013
- if succ not in graph:
1014
- successor_candidates.add(succ)
1015
- if loop_head is succ:
1016
- continue_edges.append((node, succ))
1204
+ # determine successor candidates using the loop body
1205
+ successor_candidates = set()
1206
+ for node in loop_body:
1207
+ for succ in fullgraph.successors(node):
1208
+ if succ not in loop_body:
1209
+ successor_candidates.add(succ)
1017
1210
 
1018
- else:
1019
- # this loop has a single successor
1020
- successor_candidates = dom_frontier
1021
- # traverse the loop body to find all continue edges
1022
- tmp_graph = networkx.DiGraph(graph)
1023
- tmp_graph.remove_nodes_from(successor_candidates)
1024
- for node in networkx.descendants(tmp_graph, loop_head):
1025
- if tmp_graph.has_edge(node, loop_head):
1026
- continue_edges.append((node, loop_head))
1211
+ # traverse the loop body to find all continue edges
1212
+ for node in loop_body:
1213
+ if graph.has_edge(node, loop_head):
1214
+ continue_edges.append((node, loop_head))
1027
1215
 
1028
1216
  if len(successor_candidates) == 0:
1029
1217
  successor = None
@@ -1039,7 +1227,7 @@ class PhoenixStructurer(StructurerBase):
1039
1227
  # mark all edges as outgoing edges so they will be virtualized if they don't lead to the successor
1040
1228
  for node in successor_candidates:
1041
1229
  for pred in fullgraph.predecessors(node):
1042
- if networkx.has_path(doms.dom, loop_head, pred):
1230
+ if pred in graph:
1043
1231
  outgoing_edges.append((pred, node))
1044
1232
 
1045
1233
  return True, (continue_edges, outgoing_edges, successor)
@@ -1092,15 +1280,16 @@ class PhoenixStructurer(StructurerBase):
1092
1280
  acyclic_graph = graph
1093
1281
  else:
1094
1282
  acyclic_graph = networkx.DiGraph(graph)
1095
- acyclic_graph.remove_edges_from(graph.in_edges(head))
1096
-
1097
- self._assert_graph_ok(acyclic_graph, "Removed wrong edges")
1283
+ if len([node for node in acyclic_graph if acyclic_graph.in_degree[node] == 0]) == 0:
1284
+ acyclic_graph.remove_edges_from(graph.in_edges(head))
1285
+ self._assert_graph_ok(acyclic_graph, "Removed wrong edges")
1098
1286
 
1099
1287
  for node in list(GraphUtils.dfs_postorder_nodes_deterministic(acyclic_graph, head)):
1100
1288
  if node not in graph:
1101
1289
  continue
1102
1290
  if graph.has_edge(node, head):
1103
- # it's a back edge. skip
1291
+ # it's a back edge
1292
+ l.debug("... %r -> %r is a back edge", node, head)
1104
1293
  continue
1105
1294
  l.debug("... matching acyclic switch-case constructs at %r", node)
1106
1295
  matched = self._match_acyclic_switch_cases(graph, full_graph, node)
@@ -1159,7 +1348,9 @@ class PhoenixStructurer(StructurerBase):
1159
1348
  return r
1160
1349
  return self._match_acyclic_incomplete_switch_cases(node, graph, full_graph)
1161
1350
 
1162
- def _match_acyclic_switch_cases_incomplete_switch_head(self, node, graph, full_graph) -> bool:
1351
+ def _match_acyclic_switch_cases_incomplete_switch_head(
1352
+ self, node, graph_raw: networkx.DiGraph, full_graph_raw: networkx.DiGraph
1353
+ ) -> bool:
1163
1354
  try:
1164
1355
  last_stmts = self.cond_proc.get_last_statements(node)
1165
1356
  except EmptyBlockNotice:
@@ -1187,8 +1378,8 @@ class PhoenixStructurer(StructurerBase):
1187
1378
  node,
1188
1379
  node,
1189
1380
  node_default_addr,
1190
- graph,
1191
- full_graph,
1381
+ graph_raw,
1382
+ full_graph_raw,
1192
1383
  )
1193
1384
  if node_default_addr is not None and node_default is None:
1194
1385
  # the default node is not found. it's likely the node has been structured and is part of another construct
@@ -1200,8 +1391,8 @@ class PhoenixStructurer(StructurerBase):
1200
1391
  ins_addr=SWITCH_MISSING_DEFAULT_NODE_ADDR,
1201
1392
  )
1202
1393
  node_default = Block(SWITCH_MISSING_DEFAULT_NODE_ADDR, 0, statements=[jmp_to_default_node])
1203
- graph.add_edge(node, node_default)
1204
- full_graph.add_edge(node, node_default)
1394
+ graph_raw.add_edge(node, node_default)
1395
+ full_graph_raw.add_edge(node, node_default)
1205
1396
  if self._node_order is not None:
1206
1397
  self._node_order[node_default] = self._node_order[node]
1207
1398
  r = self._make_switch_cases_core(
@@ -1212,8 +1403,8 @@ class PhoenixStructurer(StructurerBase):
1212
1403
  node_default,
1213
1404
  last_stmt.ins_addr,
1214
1405
  to_remove,
1215
- graph,
1216
- full_graph,
1406
+ graph_raw,
1407
+ full_graph_raw,
1217
1408
  bail_on_nonhead_outedges=True,
1218
1409
  )
1219
1410
  if not r:
@@ -1233,18 +1424,62 @@ class PhoenixStructurer(StructurerBase):
1233
1424
  self._switch_handle_gotos(cases, node_default, switch_end_addr)
1234
1425
  return True
1235
1426
 
1236
- def _match_acyclic_switch_cases_address_loaded_from_memory(self, node, graph, full_graph) -> bool:
1237
- try:
1238
- last_stmt = self.cond_proc.get_last_statement(node)
1239
- except EmptyBlockNotice:
1240
- return False
1427
+ def _match_acyclic_switch_cases_address_loaded_from_memory(self, node, graph_raw, full_graph_raw) -> bool:
1241
1428
 
1242
- if last_stmt is None:
1243
- return False
1429
+ successor_addrs: list[int] = []
1430
+ cmp_expr: int = 0
1431
+ cmp_lb: int = 0
1432
+ switch_head_addr: int = 0
1244
1433
 
1245
- successor_addrs = extract_jump_targets(last_stmt)
1246
- if len(successor_addrs) != 2:
1247
- return False
1434
+ # case 1: the last block is a ConditionNode with two goto statements
1435
+ if isinstance(node, SequenceNode) and node.nodes and isinstance(node.nodes[-1], ConditionNode):
1436
+ cond_node = node.nodes[-1]
1437
+ assert isinstance(cond_node, ConditionNode)
1438
+ if (
1439
+ cond_node.true_node is not None
1440
+ and cond_node.false_node is not None
1441
+ and isinstance(cond_node.true_node, Block)
1442
+ and isinstance(cond_node.false_node, Block)
1443
+ ):
1444
+ successor_addrs = [
1445
+ *extract_jump_targets(cond_node.true_node.statements[-1]),
1446
+ *extract_jump_targets(cond_node.false_node.statements[-1]),
1447
+ ]
1448
+ if len(successor_addrs) != 2 or None in successor_addrs:
1449
+ return False
1450
+
1451
+ # extract the comparison expression, lower-, and upper-bounds from the last statement
1452
+ cmp = switch_extract_cmp_bounds_from_condition(
1453
+ self.cond_proc.convert_claripy_bool_ast(cond_node.condition)
1454
+ )
1455
+ if not cmp:
1456
+ return False
1457
+ cmp_expr, cmp_lb, cmp_ub = cmp # pylint:disable=unused-variable
1458
+
1459
+ assert cond_node.addr is not None
1460
+ switch_head_addr = cond_node.addr
1461
+
1462
+ # case 2: the last statement is a conditional jump
1463
+ if not successor_addrs:
1464
+ try:
1465
+ last_stmt = self.cond_proc.get_last_statement(node)
1466
+ except EmptyBlockNotice:
1467
+ return False
1468
+
1469
+ if last_stmt is None:
1470
+ return False
1471
+
1472
+ successor_addrs = extract_jump_targets(last_stmt)
1473
+ if len(successor_addrs) != 2:
1474
+ return False
1475
+
1476
+ # extract the comparison expression, lower-, and upper-bounds from the last statement
1477
+ cmp = switch_extract_cmp_bounds(last_stmt)
1478
+ if not cmp:
1479
+ return False
1480
+ cmp_expr, cmp_lb, cmp_ub = cmp # pylint:disable=unused-variable
1481
+
1482
+ switch_head_addr = last_stmt.ins_addr
1248
1483
 
1249
1484
  for t in successor_addrs:
1250
1485
  if t in self.jump_tables:
@@ -1258,11 +1493,8 @@ class PhoenixStructurer(StructurerBase):
1258
1493
  if jump_table.type != IndirectJumpType.Jumptable_AddressLoadedFromMemory:
1259
1494
  return False
1260
1495
 
1261
- # extract the comparison expression, lower-, and upper-bounds from the last statement
1262
- cmp = switch_extract_cmp_bounds(last_stmt)
1263
- if not cmp:
1264
- return False
1265
- cmp_expr, cmp_lb, cmp_ub = cmp # pylint:disable=unused-variable
1496
+ graph = _f(graph_raw)
1497
+ full_graph = _f(full_graph_raw)
1266
1498
 
1267
1499
  node_a = next(iter(nn for nn in graph.nodes if nn.addr == target), None)
1268
1500
  if node_a is None:
@@ -1279,6 +1511,8 @@ class PhoenixStructurer(StructurerBase):
1279
1511
 
1280
1512
  # populate whitelist_edges
1281
1513
  assert jump_table.jumptable_entries is not None
1514
+ assert isinstance(node_a.addr, int)
1515
+ assert isinstance(node.addr, int)
1282
1516
  for case_node_addr in jump_table.jumptable_entries:
1283
1517
  self.whitelist_edges.add((node_a.addr, case_node_addr))
1284
1518
  self.whitelist_edges.add((node.addr, node_b_addr))
@@ -1307,22 +1541,22 @@ class PhoenixStructurer(StructurerBase):
1307
1541
  node_default = self._switch_find_default_node(graph, node, node_b_addr)
1308
1542
  if node_default is not None:
1309
1543
  # ensure we have successfully structured node_default
1310
- if full_graph.out_degree(node_default) > 1:
1544
+ if full_graph.out_degree[node_default] > 1:
1311
1545
  return False
1312
1546
 
1313
1547
  # un-structure IncompleteSwitchCaseNode
1314
1548
  if isinstance(node_a, SequenceNode) and node_a.nodes and isinstance(node_a.nodes[0], IncompleteSwitchCaseNode):
1315
- _, new_seq_node = self._unpack_sequencenode_head(graph, node_a)
1549
+ _, new_seq_node = self._unpack_sequencenode_head(graph_raw, node_a)
1316
1550
  if new_seq_node is not None and self._node_order is not None:
1317
1551
  self._node_order[new_seq_node] = self._node_order[node_a]
1318
- self._unpack_sequencenode_head(full_graph, node_a, new_seq=new_seq_node)
1552
+ self._unpack_sequencenode_head(full_graph_raw, node_a, new_seq=new_seq_node)
1319
1553
  # update node_a
1320
1554
  node_a = next(iter(nn for nn in graph.nodes if nn.addr == target))
1321
1555
  if isinstance(node_a, IncompleteSwitchCaseNode):
1322
- r = self._unpack_incompleteswitchcasenode(graph, node_a)
1556
+ r = self._unpack_incompleteswitchcasenode(graph_raw, node_a)
1323
1557
  if not r:
1324
1558
  return False
1325
- self._unpack_incompleteswitchcasenode(full_graph, node_a) # this shall not fail
1559
+ self._unpack_incompleteswitchcasenode(full_graph_raw, node_a) # this shall not fail
1326
1560
  # update node_a
1327
1561
  node_a = next(iter(nn for nn in graph.nodes if nn.addr == target))
1328
1562
  if self._node_order is not None:
@@ -1339,8 +1573,8 @@ class PhoenixStructurer(StructurerBase):
1339
1573
  node,
1340
1574
  node_a,
1341
1575
  node_b_addr,
1342
- graph,
1343
- full_graph,
1576
+ graph_raw,
1577
+ full_graph_raw,
1344
1578
  )
1345
1579
 
1346
1580
  if isinstance(better_node_a, SwitchCaseNode) and better_node_a.default_node is None:
@@ -1352,7 +1586,7 @@ class PhoenixStructurer(StructurerBase):
1352
1586
  # if node_a and default_node have different successors we need to bail
1353
1587
  return False
1354
1588
 
1355
- for pgraph in (graph, full_graph):
1589
+ for pgraph in (graph_raw, full_graph_raw):
1356
1590
  all_preds = set(pgraph.pred[node])
1357
1591
  all_succs = set(pgraph.succ[node_a])
1358
1592
  if node_default is not None:
@@ -1383,10 +1617,10 @@ class PhoenixStructurer(StructurerBase):
1383
1617
  cases,
1384
1618
  node_b_addr,
1385
1619
  node_default,
1386
- last_stmt.ins_addr,
1620
+ switch_head_addr,
1387
1621
  to_remove,
1388
- graph,
1389
- full_graph,
1622
+ graph_raw,
1623
+ full_graph_raw,
1390
1624
  node_a=node_a,
1391
1625
  )
1392
1626
  if not r:
@@ -1399,12 +1633,18 @@ class PhoenixStructurer(StructurerBase):
1399
1633
 
1400
1634
  return True
1401
1635
 
1402
- def _match_acyclic_switch_cases_address_loaded_from_memory_no_default_node(self, node, graph, full_graph) -> bool:
1636
+ def _match_acyclic_switch_cases_address_loaded_from_memory_no_default_node(
1637
+ self, node, graph_raw, full_graph_raw
1638
+ ) -> bool:
1403
1639
  # sanity checks
1404
1640
  if not isinstance(node, IncompleteSwitchCaseNode):
1405
1641
  return False
1406
1642
  if node.addr not in self.jump_tables:
1407
1643
  return False
1644
+
1645
+ graph = _f(graph_raw)
1646
+ full_graph = _f(full_graph_raw)
1647
+
1408
1648
  # ensure _match_acyclic_switch_cases_address_load_from_memory cannot structure its predecessor (and this node)
1409
1649
  preds = list(graph.predecessors(node))
1410
1650
  if len(preds) != 1:
@@ -1449,10 +1689,10 @@ class PhoenixStructurer(StructurerBase):
1449
1689
 
1450
1690
  # un-structure IncompleteSwitchCaseNode
1451
1691
  if isinstance(node, IncompleteSwitchCaseNode):
1452
- r = self._unpack_incompleteswitchcasenode(graph, node)
1692
+ r = self._unpack_incompleteswitchcasenode(graph_raw, node)
1453
1693
  if not r:
1454
1694
  return False
1455
- self._unpack_incompleteswitchcasenode(full_graph, node) # this shall not fail
1695
+ self._unpack_incompleteswitchcasenode(full_graph_raw, node) # this shall not fail
1456
1696
  # update node
1457
1697
  node = next(iter(nn for nn in graph.nodes if nn.addr == jump_table.addr))
1458
1698
 
@@ -1463,8 +1703,8 @@ class PhoenixStructurer(StructurerBase):
1463
1703
  node,
1464
1704
  node,
1465
1705
  None,
1466
- graph,
1467
- full_graph,
1706
+ graph_raw,
1707
+ full_graph_raw,
1468
1708
  )
1469
1709
 
1470
1710
  # we don't know what the end address of this switch-case structure is. let's figure it out
@@ -1477,8 +1717,8 @@ class PhoenixStructurer(StructurerBase):
1477
1717
  None,
1478
1718
  last_stmt.ins_addr,
1479
1719
  to_remove,
1480
- graph,
1481
- full_graph,
1720
+ graph_raw,
1721
+ full_graph_raw,
1482
1722
  node_a=None,
1483
1723
  )
1484
1724
  if not r:
@@ -1490,7 +1730,9 @@ class PhoenixStructurer(StructurerBase):
1490
1730
 
1491
1731
  return True
1492
1732
 
1493
- def _match_acyclic_switch_cases_address_loaded_from_memory_no_ob_check(self, node, graph, full_graph) -> bool:
1733
+ def _match_acyclic_switch_cases_address_loaded_from_memory_no_ob_check(
1734
+ self, node, graph_raw, full_graph_raw
1735
+ ) -> bool:
1494
1736
  if node.addr not in self.jump_tables:
1495
1737
  return False
1496
1738
 
@@ -1522,6 +1764,8 @@ class PhoenixStructurer(StructurerBase):
1522
1764
  self.whitelist_edges.add((node.addr, case_node_addr))
1523
1765
  self.switch_case_known_heads.add(node)
1524
1766
 
1767
+ graph = _f(graph_raw)
1768
+
1525
1769
  # sanity check: case nodes are successors to node. all case nodes must have at most common one successor
1526
1770
  node_pred = None
1527
1771
  if graph.in_degree[node] == 1:
@@ -1548,8 +1792,8 @@ class PhoenixStructurer(StructurerBase):
1548
1792
  node,
1549
1793
  node,
1550
1794
  None,
1551
- graph,
1552
- full_graph,
1795
+ graph_raw,
1796
+ full_graph_raw,
1553
1797
  )
1554
1798
 
1555
1799
  assert node_default is None
@@ -1563,8 +1807,8 @@ class PhoenixStructurer(StructurerBase):
1563
1807
  None,
1564
1808
  last_stmt.ins_addr,
1565
1809
  to_remove,
1566
- graph,
1567
- full_graph,
1810
+ graph_raw,
1811
+ full_graph_raw,
1568
1812
  node_a=None,
1569
1813
  )
1570
1814
  if not r:
@@ -1577,7 +1821,9 @@ class PhoenixStructurer(StructurerBase):
1577
1821
 
1578
1822
  return True
1579
1823
 
1580
- def _match_acyclic_switch_cases_address_computed(self, node, graph, full_graph) -> bool:
1824
+ def _match_acyclic_switch_cases_address_computed(
1825
+ self, node, graph_raw: networkx.DiGraph, full_graph_raw: networkx.DiGraph
1826
+ ) -> bool:
1581
1827
  if node.addr not in self.jump_tables:
1582
1828
  return False
1583
1829
  jump_table = self.jump_tables[node.addr]
@@ -1611,10 +1857,13 @@ class PhoenixStructurer(StructurerBase):
1611
1857
  else:
1612
1858
  return False
1613
1859
 
1860
+ graph = _f(graph_raw)
1861
+ full_graph = _f(full_graph_raw)
1862
+
1614
1863
  node_default = self._switch_find_default_node(graph, node, default_addr)
1615
1864
  if node_default is not None:
1616
1865
  # ensure we have successfully structured node_default
1617
- if full_graph.out_degree(node_default) > 1:
1866
+ if full_graph.out_degree[node_default] > 1:
1618
1867
  return False
1619
1868
 
1620
1869
  case_and_entry_addrs = self._find_case_and_entry_addrs(node, graph, cmp_lb, jump_table)
@@ -1624,19 +1873,19 @@ class PhoenixStructurer(StructurerBase):
1624
1873
  node,
1625
1874
  node,
1626
1875
  default_addr,
1627
- graph,
1628
- full_graph,
1876
+ graph_raw,
1877
+ full_graph_raw,
1629
1878
  )
1630
1879
  if node_default is None:
1631
1880
  # there must be a default case
1632
1881
  return False
1633
1882
 
1634
1883
  return self._make_switch_cases_core(
1635
- node, cmp_expr, cases, default_addr, node_default, node.addr, to_remove, graph, full_graph
1884
+ node, cmp_expr, cases, default_addr, node_default, node.addr, to_remove, graph_raw, full_graph_raw
1636
1885
  )
1637
1886
 
1638
1887
  def _match_acyclic_incomplete_switch_cases(
1639
- self, node, graph: networkx.DiGraph, full_graph: networkx.DiGraph
1888
+ self, node, graph_raw: networkx.DiGraph, full_graph_raw: networkx.DiGraph
1640
1889
  ) -> bool:
1641
1890
  # sanity checks
1642
1891
  if node.addr not in self.jump_tables:
@@ -1646,6 +1895,9 @@ class PhoenixStructurer(StructurerBase):
1646
1895
  if is_empty_or_label_only_node(node):
1647
1896
  return False
1648
1897
 
1898
+ graph = _f(graph_raw)
1899
+ full_graph = _f(full_graph_raw)
1900
+
1649
1901
  successors = list(graph.successors(node))
1650
1902
 
1651
1903
  jump_table = self.jump_tables[node.addr]
@@ -1661,16 +1913,17 @@ class PhoenixStructurer(StructurerBase):
1661
1913
  succ for succ in full_graph.successors(succ) if succ is not node and succ not in successors
1662
1914
  }
1663
1915
  out_nodes = list(out_nodes)
1664
- if len(out_nodes) <= 1:
1916
+ if len(out_nodes) <= 1 and node.addr not in self._matched_incomplete_switch_case_addrs:
1917
+ self._matched_incomplete_switch_case_addrs.add(node.addr)
1665
1918
  new_node = IncompleteSwitchCaseNode(node.addr, node, successors)
1666
- graph.remove_nodes_from(successors)
1667
- self.replace_nodes(graph, node, new_node)
1919
+ graph_raw.remove_nodes_from(successors)
1920
+ self.replace_nodes(graph_raw, node, new_node)
1668
1921
  if out_nodes and out_nodes[0] in graph:
1669
- graph.add_edge(new_node, out_nodes[0])
1670
- full_graph.remove_nodes_from(successors)
1671
- self.replace_nodes(full_graph, node, new_node)
1922
+ graph_raw.add_edge(new_node, out_nodes[0])
1923
+ full_graph_raw.remove_nodes_from(successors)
1924
+ self.replace_nodes(full_graph_raw, node, new_node, update_node_order=True)
1672
1925
  if out_nodes:
1673
- full_graph.add_edge(new_node, out_nodes[0])
1926
+ full_graph_raw.add_edge(new_node, out_nodes[0])
1674
1927
  if self._node_order:
1675
1928
  self._node_order[new_node] = self._node_order[node]
1676
1929
  return True
@@ -1682,12 +1935,14 @@ class PhoenixStructurer(StructurerBase):
1682
1935
  head_node,
1683
1936
  node_a: BaseNode,
1684
1937
  node_b_addr: int | None,
1685
- graph,
1686
- full_graph,
1938
+ graph_raw: networkx.DiGraph,
1939
+ full_graph_raw: networkx.DiGraph,
1687
1940
  ) -> tuple[OrderedDict, Any, set[Any]]:
1688
1941
  cases: OrderedDict[int | tuple[int, ...], SequenceNode] = OrderedDict()
1689
1942
  to_remove = set()
1690
1943
 
1944
+ graph = _f(graph_raw)
1945
+
1691
1946
  default_node_candidates = (
1692
1947
  [nn for nn in graph.nodes if nn.addr == node_b_addr] if node_b_addr is not None else []
1693
1948
  )
@@ -1697,8 +1952,8 @@ class PhoenixStructurer(StructurerBase):
1697
1952
  if node_default is not None and not isinstance(node_default, SequenceNode):
1698
1953
  # make the default node a SequenceNode so that we can insert Break and Continue nodes into it later
1699
1954
  new_node = SequenceNode(node_default.addr, nodes=[node_default])
1700
- self.replace_nodes(graph, node_default, new_node)
1701
- self.replace_nodes(full_graph, node_default, new_node)
1955
+ self.replace_nodes(graph_raw, node_default, new_node)
1956
+ self.replace_nodes(full_graph_raw, node_default, new_node, update_node_order=True)
1702
1957
  node_default = new_node
1703
1958
 
1704
1959
  converted_nodes: dict[tuple[int, int | None], Any] = {}
@@ -1807,7 +2062,7 @@ class PhoenixStructurer(StructurerBase):
1807
2062
  to_remove.add(node_default)
1808
2063
 
1809
2064
  for nn in to_remove:
1810
- if nn is head:
2065
+ if nn is head or (node_a is not None and nn is node_a):
1811
2066
  continue
1812
2067
  for src in graph.predecessors(nn):
1813
2068
  if src not in to_remove:
@@ -1834,6 +2089,17 @@ class PhoenixStructurer(StructurerBase):
1834
2089
  ):
1835
2090
  # succ will be dangling - not ready to be structured yet - do it later
1836
2091
  return False
2092
+ succs = {dst for _, dst in out_edges}
2093
+ dangling_succs = set()
2094
+ if len(succs) > 1:
2095
+ for succ in succs:
2096
+ if succ in graph:
2097
+ non_switch_preds = {pred for pred in graph.predecessors(succ) if pred not in to_remove}
2098
+ if not non_switch_preds:
2099
+ dangling_succs.add(succ)
2100
+ if len(dangling_succs) > 1:
2101
+ # there will definitely be dangling nodes after structuring. it's not ready to be structured yet.
2102
+ return False
1837
2103
 
1838
2104
  if node_default is not None:
1839
2105
  # the head no longer goes to the default case
@@ -1858,45 +2124,59 @@ class PhoenixStructurer(StructurerBase):
1858
2124
  self._node_order[scnode] = self._node_order[head]
1859
2125
 
1860
2126
  if out_edges:
2127
+ # sort out_edges
2128
+ out_edges_to_head = [edge for edge in out_edges if edge[1] is head]
2129
+ other_out_edges = sorted(
2130
+ [edge for edge in out_edges if edge[1] is not head], key=lambda edge: (edge[0].addr, edge[1].addr)
2131
+ )
2132
+
1861
2133
  # for all out edges going to head, we ensure there is a goto at the end of each corresponding case node
1862
- for out_src, out_dst in out_edges:
1863
- if out_dst is head:
1864
- all_case_nodes = list(cases.values())
1865
- if node_default is not None:
1866
- all_case_nodes.append(node_default)
1867
- case_node: SequenceNode = next(nn for nn in all_case_nodes if nn.addr == out_src.addr)
1868
- try:
1869
- case_node_last_stmt = self.cond_proc.get_last_statement(case_node)
1870
- except EmptyBlockNotice:
1871
- case_node_last_stmt = None
1872
- if not isinstance(case_node_last_stmt, Jump):
1873
- jump_stmt = Jump(
1874
- None, Const(None, None, head.addr, self.project.arch.bits), None, ins_addr=out_src.addr
1875
- )
1876
- jump_node = Block(out_src.addr, 0, statements=[jump_stmt])
1877
- case_node.nodes.append(jump_node)
1878
-
1879
- out_edges = [edge for edge in out_edges if edge[1] is not head]
1880
- if out_edges:
1881
- # leave only one out edge and virtualize all other out edges
1882
- out_edge = out_edges[0]
1883
- out_dst = out_edge[1]
1884
- if out_dst in graph:
1885
- graph.add_edge(scnode, out_dst)
1886
- full_graph.add_edge(scnode, out_dst)
1887
- if full_graph.has_edge(head, out_dst):
1888
- full_graph.remove_edge(head, out_dst)
1889
-
1890
- # fix full_graph if needed: remove successors that are no longer needed
1891
- for _out_src, out_dst in out_edges[1:]:
1892
- if out_dst in full_graph and out_dst not in graph and full_graph.in_degree[out_dst] == 0:
1893
- full_graph.remove_node(out_dst)
1894
- assert self._region.successors is not None
1895
- if out_dst in self._region.successors:
1896
- self._region.successors.remove(out_dst)
2134
+ for out_src, out_dst in out_edges_to_head:
2135
+ assert out_dst is head
2136
+ all_case_nodes = list(cases.values())
2137
+ if node_default is not None:
2138
+ all_case_nodes.append(node_default)
2139
+ case_node: SequenceNode = next(nn for nn in all_case_nodes if nn.addr == out_src.addr)
2140
+ try:
2141
+ case_node_last_stmt = self.cond_proc.get_last_statement(case_node)
2142
+ except EmptyBlockNotice:
2143
+ case_node_last_stmt = None
2144
+ if not isinstance(case_node_last_stmt, Jump):
2145
+ jump_stmt = Jump(
2146
+ None, Const(None, None, head.addr, self.project.arch.bits), None, ins_addr=out_src.addr
2147
+ )
2148
+ jump_node = Block(out_src.addr, 0, statements=[jump_stmt])
2149
+ case_node.nodes.append(jump_node)
2150
+
2151
+ if out_edges_to_head: # noqa:SIM108
2152
+ # add an edge from SwitchCaseNode to head so that a loop will be structured later
2153
+ out_dst_succ = head
2154
+ else:
2155
+ # add an edge from SwitchCaseNode to its most immediate successor (if there is one)
2156
+ out_dst_succ = other_out_edges[0][1] if other_out_edges else None
2157
+
2158
+ if out_dst_succ is not None:
2159
+ if out_dst_succ in graph:
2160
+ graph.add_edge(scnode, out_dst_succ)
2161
+ full_graph.add_edge(scnode, out_dst_succ)
2162
+ if full_graph.has_edge(head, out_dst_succ):
2163
+ full_graph.remove_edge(head, out_dst_succ)
2164
+
2165
+ # fix full_graph if needed: remove successors that are no longer needed
2166
+ for _out_src, out_dst in other_out_edges:
2167
+ if (
2168
+ out_dst is not out_dst_succ
2169
+ and out_dst in full_graph
2170
+ and out_dst not in graph
2171
+ and full_graph.in_degree[out_dst] == 0
2172
+ ):
2173
+ full_graph.remove_node(out_dst)
2174
+ assert self._region.successors is not None
2175
+ if out_dst in self._region.successors:
2176
+ self._region.successors.remove(out_dst)
1897
2177
 
1898
2178
  # remove the last statement (conditional jump) in the head node
1899
- remove_last_statement(head)
2179
+ self._remove_last_statement_if_jump_or_schead(head)
1900
2180
 
1901
2181
  if node_a is not None:
1902
2182
  # remove the last statement in node_a
@@ -1943,10 +2223,14 @@ class PhoenixStructurer(StructurerBase):
1943
2223
 
1944
2224
  # other acyclic schemas
1945
2225
 
1946
- def _match_acyclic_sequence(self, graph, full_graph, start_node) -> bool:
2226
+ def _match_acyclic_sequence(self, graph_raw, full_graph_raw, start_node) -> bool:
1947
2227
  """
1948
2228
  Check if there is a sequence of regions, where each region has a single predecessor and a single successor.
1949
2229
  """
2230
+
2231
+ full_graph = _f(full_graph_raw)
2232
+ graph = _f(graph_raw)
2233
+
1950
2234
  succs = list(graph.successors(start_node))
1951
2235
  if len(succs) == 1:
1952
2236
  end_node = succs[0]
@@ -1963,17 +2247,20 @@ class PhoenixStructurer(StructurerBase):
1963
2247
  new_seq = self._merge_nodes(start_node, end_node)
1964
2248
 
1965
2249
  # on the original graph
1966
- self.replace_nodes(graph, start_node, new_seq, old_node_1=end_node if end_node in graph else None)
2250
+ self.replace_nodes(graph_raw, start_node, new_seq, old_node_1=end_node if end_node in graph else None)
1967
2251
  # on the graph with successors
1968
- self.replace_nodes(full_graph, start_node, new_seq, old_node_1=end_node)
2252
+ self.replace_nodes(full_graph_raw, start_node, new_seq, old_node_1=end_node, update_node_order=True)
1969
2253
  return True
1970
2254
  return False
1971
2255
 
1972
- def _match_acyclic_ite(self, graph, full_graph, start_node) -> bool:
2256
+ def _match_acyclic_ite(self, graph_raw, full_graph_raw, start_node) -> bool:
1973
2257
  """
1974
2258
  Check if start_node is the beginning of an If-Then-Else region. Create a Condition node if it is the case.
1975
2259
  """
1976
2260
 
2261
+ full_graph = _f(full_graph_raw)
2262
+ graph = _f(graph_raw)
2263
+
1977
2264
  succs = list(full_graph.successors(start_node))
1978
2265
  if len(succs) == 2:
1979
2266
  left, right = succs
@@ -2021,19 +2308,23 @@ class PhoenixStructurer(StructurerBase):
2021
2308
  if not left_succs:
2022
2309
  # on the original graph
2023
2310
  if left in graph:
2024
- graph.remove_node(left)
2025
- self.replace_nodes(graph, start_node, new_node, old_node_1=right)
2311
+ graph_raw.remove_node(left)
2312
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=right)
2026
2313
  # on the graph with successors
2027
- full_graph.remove_node(left)
2028
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=right)
2314
+ full_graph_raw.remove_node(left)
2315
+ self.replace_nodes(
2316
+ full_graph_raw, start_node, new_node, old_node_1=right, update_node_order=True
2317
+ )
2029
2318
  else:
2030
2319
  # on the original graph
2031
2320
  if right in graph:
2032
- graph.remove_node(right)
2033
- self.replace_nodes(graph, start_node, new_node, old_node_1=left)
2321
+ graph_raw.remove_node(right)
2322
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=left)
2034
2323
  # on the graph with successors
2035
- full_graph.remove_node(right)
2036
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=left)
2324
+ full_graph_raw.remove_node(right)
2325
+ self.replace_nodes(
2326
+ full_graph_raw, start_node, new_node, old_node_1=left, update_node_order=True
2327
+ )
2037
2328
 
2038
2329
  return True
2039
2330
 
@@ -2060,9 +2351,11 @@ class PhoenixStructurer(StructurerBase):
2060
2351
  new_node = SequenceNode(start_node.addr, nodes=[start_node, new_cond_node])
2061
2352
 
2062
2353
  # on the original graph
2063
- self.replace_nodes(graph, start_node, new_node, old_node_1=left)
2354
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=left)
2064
2355
  # on the graph with successors
2065
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=left)
2356
+ self.replace_nodes(
2357
+ full_graph_raw, start_node, new_node, old_node_1=left, update_node_order=True
2358
+ )
2066
2359
 
2067
2360
  return True
2068
2361
 
@@ -2092,9 +2385,9 @@ class PhoenixStructurer(StructurerBase):
2092
2385
  new_node = SequenceNode(start_node.addr, nodes=[start_node, new_cond_node])
2093
2386
 
2094
2387
  # on the original graph
2095
- self.replace_nodes(graph, start_node, new_node, old_node_1=left)
2388
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=left)
2096
2389
  # on the graph with successors
2097
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=left)
2390
+ self.replace_nodes(full_graph_raw, start_node, new_node, old_node_1=left, update_node_order=True)
2098
2391
 
2099
2392
  return True
2100
2393
 
@@ -2146,16 +2439,16 @@ class PhoenixStructurer(StructurerBase):
2146
2439
  new_node = SequenceNode(start_node.addr, nodes=new_nodes)
2147
2440
 
2148
2441
  # on the original graph
2149
- self.replace_nodes(graph, start_node, new_node, old_node_1=left)
2442
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=left)
2150
2443
  # on the graph with successors
2151
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=left)
2444
+ self.replace_nodes(full_graph_raw, start_node, new_node, old_node_1=left, update_node_order=True)
2152
2445
 
2153
2446
  return True
2154
2447
 
2155
2448
  return False
2156
2449
 
2157
2450
  def _match_acyclic_short_circuit_conditions(
2158
- self, graph: networkx.DiGraph, full_graph: networkx.DiGraph, start_node
2451
+ self, graph_raw: networkx.DiGraph, full_graph_raw: networkx.DiGraph, start_node
2159
2452
  ) -> bool:
2160
2453
  """
2161
2454
  Check if start_node is the beginning of an If-Then-Else region with cascading short-circuit expressions as the
@@ -2236,6 +2529,9 @@ class PhoenixStructurer(StructurerBase):
2236
2529
  #
2237
2530
  # We reduce it into if (cond && next_cond) { body } else { else }
2238
2531
 
2532
+ graph = _f(graph_raw)
2533
+ full_graph = _f(full_graph_raw)
2534
+
2239
2535
  # fast-path check to reject nodes that definitely do not work
2240
2536
  if full_graph.out_degree[start_node] != 2:
2241
2537
  return False
@@ -2283,8 +2579,8 @@ class PhoenixStructurer(StructurerBase):
2283
2579
  self._remove_last_statement_if_jump(start_node)
2284
2580
  new_node = SequenceNode(start_node.addr, nodes=[start_node, new_cond_node])
2285
2581
 
2286
- self.replace_nodes(graph, start_node, new_node, old_node_1=left if left in graph else None)
2287
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=left)
2582
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=left if left in graph else None)
2583
+ self.replace_nodes(full_graph_raw, start_node, new_node, old_node_1=left, update_node_order=True)
2288
2584
 
2289
2585
  return True
2290
2586
 
@@ -2317,8 +2613,8 @@ class PhoenixStructurer(StructurerBase):
2317
2613
  self._remove_last_statement_if_jump(start_node)
2318
2614
  new_node = SequenceNode(start_node.addr, nodes=[start_node, new_cond_node])
2319
2615
 
2320
- self.replace_nodes(graph, start_node, new_node, old_node_1=right if right in graph else None)
2321
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=right)
2616
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=right if right in graph else None)
2617
+ self.replace_nodes(full_graph_raw, start_node, new_node, old_node_1=right, update_node_order=True)
2322
2618
 
2323
2619
  return True
2324
2620
 
@@ -2352,8 +2648,8 @@ class PhoenixStructurer(StructurerBase):
2352
2648
  self._remove_last_statement_if_jump(start_node)
2353
2649
  new_node = SequenceNode(start_node.addr, nodes=[start_node, new_cond_node])
2354
2650
 
2355
- self.replace_nodes(graph, start_node, new_node, old_node_1=left if left in graph else None)
2356
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=left)
2651
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=left if left in graph else None)
2652
+ self.replace_nodes(full_graph_raw, start_node, new_node, old_node_1=left, update_node_order=True)
2357
2653
  return True
2358
2654
 
2359
2655
  r = self._match_acyclic_short_circuit_conditions_type_d(graph, full_graph, start_node)
@@ -2385,8 +2681,8 @@ class PhoenixStructurer(StructurerBase):
2385
2681
  self._remove_last_statement_if_jump(start_node)
2386
2682
  new_node = SequenceNode(start_node.addr, nodes=[start_node, new_cond_node])
2387
2683
 
2388
- self.replace_nodes(graph, start_node, new_node, old_node_1=left if left in graph else None)
2389
- self.replace_nodes(full_graph, start_node, new_node, old_node_1=left)
2684
+ self.replace_nodes(graph_raw, start_node, new_node, old_node_1=left if left in graph else None)
2685
+ self.replace_nodes(full_graph_raw, start_node, new_node, old_node_1=left, update_node_order=True)
2390
2686
  return True
2391
2687
 
2392
2688
  return False
@@ -2568,12 +2864,12 @@ class PhoenixStructurer(StructurerBase):
2568
2864
  return left, edge_cond_left, right, edge_cond_left_right, else_node
2569
2865
  return None
2570
2866
 
2571
- def _last_resort_refinement(self, head, graph: networkx.DiGraph, full_graph: networkx.DiGraph) -> bool:
2867
+ def _last_resort_refinement(self, head, graph_raw: networkx.DiGraph, full_graph_raw: networkx.DiGraph) -> bool:
2572
2868
  if self._improve_algorithm:
2573
2869
  while self._edge_virtualization_hints:
2574
2870
  src, dst = self._edge_virtualization_hints.pop(0)
2575
- if graph.has_edge(src, dst):
2576
- self._virtualize_edge(graph, full_graph, src, dst)
2871
+ if _f(graph_raw).has_edge(src, dst):
2872
+ self._virtualize_edge(graph_raw, full_graph_raw, src, dst)
2577
2873
  l.debug("last_resort: Removed edge %r -> %r (type 3)", src, dst)
2578
2874
  return True
2579
2875
 
@@ -2582,9 +2878,13 @@ class PhoenixStructurer(StructurerBase):
2582
2878
  # (src_addr, dst_addr)
2583
2879
  secondary_edges = [] # likewise, edges in this list are ordered by a tuple of (src_addr, dst_addr)
2584
2880
  other_edges = []
2881
+
2882
+ full_graph = _f(full_graph_raw)
2883
+ graph = _f(graph_raw)
2884
+
2585
2885
  idoms = networkx.immediate_dominators(full_graph, head)
2586
2886
  if networkx.is_directed_acyclic_graph(full_graph):
2587
- acyclic_graph = full_graph
2887
+ acyclic_graph = networkx.DiGraph(full_graph)
2588
2888
  else:
2589
2889
  acyclic_graph = to_acyclic_graph(full_graph, node_order=self._node_order)
2590
2890
  for src, dst in acyclic_graph.edges:
@@ -2592,6 +2892,14 @@ class PhoenixStructurer(StructurerBase):
2592
2892
  continue
2593
2893
  if src not in graph:
2594
2894
  continue
2895
+ if (
2896
+ isinstance(src, Block)
2897
+ and src.statements
2898
+ and isinstance(src.statements[-1], IncompleteSwitchCaseHeadStatement)
2899
+ ):
2900
+ # this is a head of an incomplete switch-case construct (that we will definitely be structuring later),
2901
+ # so we do not want to remove any edges going out of this block
2902
+ continue
2595
2903
  if not dominates(idoms, src, dst) and not dominates(idoms, dst, src):
2596
2904
  if (src.addr, dst.addr) not in self.whitelist_edges:
2597
2905
  all_edges_wo_dominance.append((src, dst))
@@ -2622,7 +2930,7 @@ class PhoenixStructurer(StructurerBase):
2622
2930
  all_edges_wo_dominance = self._order_virtualizable_edges(full_graph, all_edges_wo_dominance, node_seq)
2623
2931
  # virtualize the first edge
2624
2932
  src, dst = all_edges_wo_dominance[0]
2625
- self._virtualize_edge(graph, full_graph, src, dst)
2933
+ self._virtualize_edge(graph_raw, full_graph_raw, src, dst)
2626
2934
  l.debug("last_resort: Removed edge %r -> %r (type 1)", src, dst)
2627
2935
  return True
2628
2936
 
@@ -2630,7 +2938,7 @@ class PhoenixStructurer(StructurerBase):
2630
2938
  secondary_edges = self._order_virtualizable_edges(full_graph, secondary_edges, node_seq)
2631
2939
  # virtualize the first edge
2632
2940
  src, dst = secondary_edges[0]
2633
- self._virtualize_edge(graph, full_graph, src, dst)
2941
+ self._virtualize_edge(graph_raw, full_graph_raw, src, dst)
2634
2942
  l.debug("last_resort: Removed edge %r -> %r (type 2)", src, dst)
2635
2943
  return True
2636
2944
 
@@ -2701,7 +3009,7 @@ class PhoenixStructurer(StructurerBase):
2701
3009
  self.virtualized_edges.add((src, dst))
2702
3010
  full_graph.remove_edge(src, dst)
2703
3011
  if new_src is not None:
2704
- self.replace_nodes(full_graph, src, new_src)
3012
+ self.replace_nodes(full_graph, src, new_src, update_node_order=True)
2705
3013
  if remove_src_last_stmt:
2706
3014
  remove_last_statements(src)
2707
3015
 
@@ -2735,7 +3043,7 @@ class PhoenixStructurer(StructurerBase):
2735
3043
  dst: Block | BaseNode,
2736
3044
  last=True,
2737
3045
  condjump_only=False,
2738
- ) -> tuple[int | None, BaseNode | None, Block | MultiNode | BreakNode | None]:
3046
+ ) -> tuple[int | None, BaseNode | None, Block | MultiNode | BreakNode | SequenceNode | None]:
2739
3047
  """
2740
3048
 
2741
3049
  :param node:
@@ -2752,13 +3060,13 @@ class PhoenixStructurer(StructurerBase):
2752
3060
  Holds parent_and_block and is accessible from within the handlers.
2753
3061
  """
2754
3062
 
2755
- parent_and_block: list[tuple[int, Any, Block | MultiNode | BreakNode]] = []
3063
+ parent_and_block: list[tuple[int, Any, Block | MultiNode | BreakNode | SequenceNode]] = []
2756
3064
  block_id: int = -1
2757
3065
 
2758
- def _check(last_stmt):
3066
+ def _check(last_stmt, force_condjump: bool = False):
2759
3067
  return (
2760
3068
  (
2761
- not condjump_only
3069
+ (force_condjump or not condjump_only)
2762
3070
  and isinstance(last_stmt, Jump)
2763
3071
  and isinstance(last_stmt.target, Const)
2764
3072
  and last_stmt.target.value == dst_addr
@@ -2819,11 +3127,29 @@ class PhoenixStructurer(StructurerBase):
2819
3127
  # FIXME: idx is ignored
2820
3128
  _Holder.parent_and_block.append((_Holder.block_id, parent, break_node))
2821
3129
 
3130
+ def _handle_ConditionNode(cond_node: ConditionNode, parent=None, **kwargs): # pylint:disable=unused-argument
3131
+ _Holder.block_id += 1
3132
+ if (
3133
+ isinstance(parent, SequenceNode)
3134
+ and parent.nodes
3135
+ and parent.nodes[-1] is cond_node
3136
+ and isinstance(cond_node.true_node, Block)
3137
+ and cond_node.true_node.statements
3138
+ and isinstance(cond_node.false_node, Block)
3139
+ and cond_node.false_node.statements
3140
+ ):
3141
+ if _check(cond_node.true_node.statements[-1], force_condjump=True) or _check(
3142
+ cond_node.false_node.statements[-1], force_condjump=True
3143
+ ):
3144
+ # we insert the parent node (the SequenceNode) instead
3145
+ _Holder.parent_and_block.append((_Holder.block_id, None, parent))
3146
+
2822
3147
  walker = SequenceWalker(
2823
3148
  handlers={
2824
3149
  Block: _handle_Block,
2825
3150
  MultiNode: _handle_MultiNode,
2826
3151
  BreakNode: _handle_BreakNode,
3152
+ ConditionNode: _handle_ConditionNode,
2827
3153
  },
2828
3154
  update_seqnode_in_place=False,
2829
3155
  force_forward_scan=True,
@@ -3021,9 +3347,22 @@ class PhoenixStructurer(StructurerBase):
3021
3347
  )
3022
3348
  self._node_order = {n: i for i, n in enumerate(ordered_nodes)}
3023
3349
 
3024
- def replace_nodes(self, graph, old_node_0, new_node, old_node_1=None, self_loop=True):
3350
+ def replace_nodes(
3351
+ self,
3352
+ graph,
3353
+ old_node_0,
3354
+ new_node,
3355
+ old_node_1=None,
3356
+ self_loop=True,
3357
+ update_node_order: bool = False,
3358
+ drop_refinement_marks: bool = False,
3359
+ ):
3025
3360
  super().replace_nodes(graph, old_node_0, new_node, old_node_1=old_node_1, self_loop=self_loop)
3026
- if self._node_order is not None and graph is self._region.graph_with_successors:
3361
+ if drop_refinement_marks:
3362
+ for _, dst in list(graph.out_edges(new_node)):
3363
+ if "cyclic_refinement_outgoing" in graph[new_node][dst]:
3364
+ del graph[new_node][dst]["cyclic_refinement_outgoing"]
3365
+ if self._node_order is not None and update_node_order:
3027
3366
  if old_node_1 is not None:
3028
3367
  self._node_order[new_node] = min(self._node_order[old_node_0], self._node_order[old_node_1])
3029
3368
  else:
@@ -3051,8 +3390,9 @@ class PhoenixStructurer(StructurerBase):
3051
3390
  for node in graph:
3052
3391
  graph_with_str.add_node(f'"{node!r}"')
3053
3392
 
3054
- for src, dst in graph.edges:
3055
- graph_with_str.add_edge(f'"{src!r}"', f'"{dst!r}"')
3393
+ for src, dst, data in graph.edges(data=True):
3394
+ data_dict = {} if data.get("cyclic_refinement_outgoing", False) is False else {"CRO": "True"}
3395
+ graph_with_str.add_edge(f'"{src!r}"', f'"{dst!r}"', **data_dict)
3056
3396
 
3057
3397
  networkx.drawing.nx_pydot.write_dot(graph_with_str, path)
3058
3398