angr 9.2.139__py3-none-manylinux2014_x86_64.whl → 9.2.141__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (87) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +136 -53
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/cfg_base.py +13 -0
  6. angr/analyses/cfg/cfg_fast.py +11 -0
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +9 -8
  8. angr/analyses/decompiler/ail_simplifier.py +115 -72
  9. angr/analyses/decompiler/callsite_maker.py +24 -11
  10. angr/analyses/decompiler/clinic.py +78 -43
  11. angr/analyses/decompiler/decompiler.py +18 -7
  12. angr/analyses/decompiler/expression_narrower.py +1 -1
  13. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
  14. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  15. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  16. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
  17. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +84 -15
  18. angr/analyses/decompiler/optimization_passes/optimization_pass.py +92 -11
  19. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +53 -9
  20. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  21. angr/analyses/decompiler/region_identifier.py +6 -4
  22. angr/analyses/decompiler/region_simplifiers/expr_folding.py +287 -122
  23. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +31 -13
  24. angr/analyses/decompiler/ssailification/rewriting.py +23 -15
  25. angr/analyses/decompiler/ssailification/rewriting_engine.py +105 -24
  26. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  27. angr/analyses/decompiler/structured_codegen/c.py +73 -137
  28. angr/analyses/decompiler/structuring/dream.py +22 -18
  29. angr/analyses/decompiler/structuring/phoenix.py +158 -41
  30. angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
  31. angr/analyses/decompiler/structuring/structurer_base.py +37 -10
  32. angr/analyses/decompiler/structuring/structurer_nodes.py +4 -1
  33. angr/analyses/decompiler/utils.py +106 -21
  34. angr/analyses/deobfuscator/api_obf_finder.py +8 -5
  35. angr/analyses/deobfuscator/api_obf_type2_finder.py +18 -10
  36. angr/analyses/deobfuscator/string_obf_finder.py +105 -18
  37. angr/analyses/forward_analysis/forward_analysis.py +1 -1
  38. angr/analyses/propagator/top_checker_mixin.py +6 -6
  39. angr/analyses/reaching_definitions/__init__.py +2 -1
  40. angr/analyses/reaching_definitions/dep_graph.py +1 -12
  41. angr/analyses/reaching_definitions/engine_vex.py +36 -31
  42. angr/analyses/reaching_definitions/function_handler.py +15 -2
  43. angr/analyses/reaching_definitions/rd_state.py +1 -37
  44. angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
  45. angr/analyses/s_propagator.py +6 -41
  46. angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
  47. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  48. angr/analyses/stack_pointer_tracker.py +36 -22
  49. angr/analyses/typehoon/simple_solver.py +45 -7
  50. angr/analyses/typehoon/typeconsts.py +18 -5
  51. angr/analyses/variable_recovery/engine_ail.py +1 -1
  52. angr/analyses/variable_recovery/engine_base.py +7 -5
  53. angr/analyses/variable_recovery/engine_vex.py +20 -4
  54. angr/block.py +69 -107
  55. angr/callable.py +14 -7
  56. angr/calling_conventions.py +30 -11
  57. angr/distributed/__init__.py +1 -1
  58. angr/engines/__init__.py +7 -8
  59. angr/engines/engine.py +1 -120
  60. angr/engines/failure.py +2 -2
  61. angr/engines/hook.py +2 -2
  62. angr/engines/light/engine.py +2 -2
  63. angr/engines/pcode/engine.py +2 -14
  64. angr/engines/procedure.py +2 -2
  65. angr/engines/soot/engine.py +2 -2
  66. angr/engines/soot/statements/switch.py +1 -1
  67. angr/engines/successors.py +124 -11
  68. angr/engines/syscall.py +2 -2
  69. angr/engines/unicorn.py +3 -3
  70. angr/engines/vex/heavy/heavy.py +3 -15
  71. angr/factory.py +12 -22
  72. angr/knowledge_plugins/key_definitions/atoms.py +8 -4
  73. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
  74. angr/knowledge_plugins/variables/variable_manager.py +7 -5
  75. angr/sim_type.py +19 -17
  76. angr/simos/simos.py +3 -1
  77. angr/state_plugins/plugin.py +19 -4
  78. angr/storage/memory_mixins/memory_mixin.py +1 -1
  79. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
  80. angr/utils/ssa/__init__.py +119 -4
  81. angr/utils/types.py +48 -0
  82. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/METADATA +6 -6
  83. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/RECORD +87 -86
  84. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/LICENSE +0 -0
  85. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/WHEEL +0 -0
  86. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/entry_points.txt +0 -0
  87. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ import networkx
7
7
 
8
8
  from ailment import Block, AILBlockWalkerBase
9
9
  from ailment.statement import ConditionalJump, Label, Assignment, Jump
10
- from ailment.expression import Expression, BinaryOp, Const, Load
10
+ from ailment.expression import VirtualVariable, Expression, BinaryOp, Const, Load
11
11
 
12
12
  from angr.utils.graph import GraphUtils
13
13
  from angr.analyses.decompiler.utils import first_nonlabel_nonphi_statement, remove_last_statement
@@ -257,6 +257,24 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
257
257
  _l.debug("Skipping switch-case conversion due to too few distinct cases for %s", real_cases[0])
258
258
  continue
259
259
 
260
+ # RULE 4: the default case should not reach other case nodes in the subregion
261
+ default_addr_and_idx = next(
262
+ ((case.target, case.target_idx) for case in cases if case.value == "default"), None
263
+ )
264
+ if default_addr_and_idx is None:
265
+ continue
266
+ default_addr, default_idx = default_addr_and_idx
267
+ default_node = self._get_block(default_addr, idx=default_idx)
268
+ default_reachable_from_case = False
269
+ for case in cases:
270
+ if case.value == "default":
271
+ continue
272
+ if self._node_reachable_from_node_in_region(case.original_node, default_node):
273
+ default_reachable_from_case = True
274
+ break
275
+ if default_reachable_from_case:
276
+ continue
277
+
260
278
  original_nodes = [case.original_node for case in real_cases]
261
279
  original_head: Block = original_nodes[0]
262
280
  original_nodes = original_nodes[1:]
@@ -320,6 +338,10 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
320
338
  node_to_heads[succ].add(new_head)
321
339
  graph_copy.remove_node(onode)
322
340
  for onode in redundant_nodes:
341
+ if onode in original_nodes:
342
+ # sometimes they overlap
343
+ # e.g., 0x402cc7 in mv_-O2
344
+ continue
323
345
  # ensure all nodes that are only reachable from onode are also removed
324
346
  # FIXME: Remove the entire path of nodes instead of only the immediate successors
325
347
  successors = list(graph_copy.successors(onode))
@@ -396,6 +418,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
396
418
  default_case_candidates = {}
397
419
  last_comp = None
398
420
  stack = [(head, 0, 0xFFFF_FFFF_FFFF_FFFF)]
421
+ head_varhash = variable_comparisons[head][1]
399
422
 
400
423
  # cursed: there is an infinite loop in the following loop that
401
424
  # occurs rarely. we need to keep track of the nodes we've seen
@@ -418,12 +441,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
418
441
  next_addr,
419
442
  next_addr_idx,
420
443
  ) = variable_comparisons[comp]
421
- last_varhash = cases[-1].variable_hash if cases else None
422
444
 
423
445
  if op == "eq":
424
446
  # eq always indicates a new case
425
447
 
426
- if last_varhash is None or last_varhash == variable_hash:
448
+ if head_varhash == variable_hash:
427
449
  if target == comp.addr and target_idx == comp.idx:
428
450
  # invalid
429
451
  break
@@ -443,9 +465,10 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
443
465
  # new variable!
444
466
  if last_comp is not None and comp.addr not in default_case_candidates:
445
467
  default_case_candidates[comp.addr] = Case(
446
- last_comp, None, last_varhash, None, "default", comp.addr, comp.idx, None
468
+ last_comp, None, head_varhash, None, "default", comp.addr, comp.idx, None
447
469
  )
448
- break
470
+ break
471
+ continue
449
472
 
450
473
  successors = [succ for succ in self._graph.successors(comp) if succ is not comp]
451
474
  succ_addrs = {(succ.addr, succ.idx) for succ in successors}
@@ -505,7 +528,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
505
528
  # gt always indicates new subtrees
506
529
  gt_addr, gt_idx, le_addr, le_idx = target, target_idx, next_addr, next_addr_idx
507
530
  # TODO: We don't yet support gt nodes acting as the head of a switch
508
- if last_varhash is not None and last_varhash == variable_hash:
531
+ if head_varhash == variable_hash:
509
532
  successors = [succ for succ in self._graph.successors(comp) if succ is not comp]
510
533
  succ_addrs = {(succ.addr, succ.idx) for succ in successors}
511
534
  if succ_addrs != {(gt_addr, gt_idx), (le_addr, le_idx)}:
@@ -526,21 +549,34 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
526
549
  le_added = True
527
550
  if gt_added or le_added:
528
551
  if not le_added:
529
- if le_addr not in default_case_candidates:
552
+ # if min_ + 1 == value, it means we actually have another case! it's not a default case
553
+ if min_ + 1 == value:
554
+ cases.append(
555
+ Case(comp, comp_type, variable_hash, expr, min_ + 1, le_addr, le_idx, None)
556
+ )
557
+ used_nodes.add(comp)
558
+ elif le_addr not in default_case_candidates:
530
559
  default_case_candidates[le_addr] = Case(
531
560
  comp, None, variable_hash, expr, "default", le_addr, le_idx, None
532
561
  )
533
- elif not gt_added and gt_addr not in default_case_candidates:
534
- default_case_candidates[gt_addr] = Case(
535
- comp, None, variable_hash, expr, "default", gt_addr, gt_idx, None
536
- )
562
+ if not gt_added:
563
+ # likewise, this means we have another non-default case
564
+ if value == max_:
565
+ cases.append(
566
+ Case(comp, comp_type, variable_hash, expr, max_, gt_addr, gt_idx, None)
567
+ )
568
+ used_nodes.add(comp)
569
+ elif gt_addr not in default_case_candidates:
570
+ default_case_candidates[gt_addr] = Case(
571
+ comp, None, variable_hash, expr, "default", gt_addr, gt_idx, None
572
+ )
537
573
  extra_cmp_nodes.append(comp)
538
574
  used_nodes.add(comp)
539
575
  else:
540
576
  break
541
577
  else:
542
578
  # checking on a new variable... it probably was not a switch-case
543
- break
579
+ continue
544
580
 
545
581
  if cases and len(default_case_candidates) <= 1:
546
582
  if default_case_candidates:
@@ -606,6 +642,27 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
606
642
 
607
643
  return varhash_to_caselists
608
644
 
645
+ def _node_reachable_from_node_in_region(self, to_node, from_node) -> bool:
646
+ # find the region that contains the to_node
647
+ to_node_region = None
648
+ from_node_region = None
649
+ for region in self._ri.regions_by_block_addrs:
650
+ if (to_node.addr, to_node.idx) in region:
651
+ to_node_region = region
652
+ if (from_node.addr, from_node.idx) in region:
653
+ from_node_region = region
654
+
655
+ if to_node_region is None or from_node_region is None:
656
+ return False
657
+ if to_node_region != from_node_region:
658
+ return False
659
+
660
+ # get a subgraph
661
+ all_nodes = [self._get_block(a, idx=idx) for a, idx in to_node_region]
662
+ subgraph = self._graph.subgraph(all_nodes)
663
+
664
+ return networkx.has_path(subgraph, from_node, to_node)
665
+
609
666
  @staticmethod
610
667
  def _find_switch_variable_comparison_type_a(
611
668
  node,
@@ -625,7 +682,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
625
682
  )
626
683
  ):
627
684
  cond = stmt.condition
628
- if isinstance(cond, BinaryOp) and isinstance(cond.operands[1], Const):
685
+ if (
686
+ isinstance(cond, BinaryOp)
687
+ and isinstance(cond.operands[0], VirtualVariable)
688
+ and isinstance(cond.operands[1], Const)
689
+ ):
629
690
  variable_hash = StableVarExprHasher(cond.operands[0]).hash
630
691
  value = cond.operands[1].value
631
692
  if cond.op == "CmpEQ":
@@ -672,7 +733,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
672
733
  )
673
734
  ):
674
735
  cond = stmt.condition
675
- if isinstance(cond, BinaryOp) and isinstance(cond.operands[1], Const):
736
+ if (
737
+ isinstance(cond, BinaryOp)
738
+ and isinstance(cond.operands[0], VirtualVariable)
739
+ and isinstance(cond.operands[1], Const)
740
+ ):
676
741
  variable_hash = StableVarExprHasher(cond.operands[0]).hash
677
742
  value = cond.operands[1].value
678
743
  if cond.op == "CmpEQ":
@@ -719,7 +784,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
719
784
  )
720
785
  ):
721
786
  cond = stmt.condition
722
- if isinstance(cond, BinaryOp) and isinstance(cond.operands[1], Const):
787
+ if (
788
+ isinstance(cond, BinaryOp)
789
+ and isinstance(cond.operands[0], VirtualVariable)
790
+ and isinstance(cond.operands[1], Const)
791
+ ):
723
792
  variable_hash = StableVarExprHasher(cond.operands[0]).hash
724
793
  value = cond.operands[1].value
725
794
  op = cond.op
@@ -1,8 +1,9 @@
1
1
  # pylint:disable=unused-argument
2
2
  from __future__ import annotations
3
3
  import logging
4
- from typing import Any, TYPE_CHECKING
4
+ from collections import namedtuple
5
5
  from collections.abc import Generator
6
+ from typing import Any, TYPE_CHECKING
6
7
  from enum import Enum
7
8
 
8
9
  import networkx
@@ -10,10 +11,11 @@ import networkx
10
11
  import ailment
11
12
 
12
13
  from angr.analyses.decompiler import RegionIdentifier
14
+ from angr.analyses.decompiler.ailgraph_walker import AILGraphWalker
13
15
  from angr.analyses.decompiler.condition_processor import ConditionProcessor
14
16
  from angr.analyses.decompiler.goto_manager import Goto, GotoManager
15
17
  from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
16
- from angr.analyses.decompiler.utils import add_labels
18
+ from angr.analyses.decompiler.utils import add_labels, remove_edges_in_ailgraph
17
19
  from angr.analyses.decompiler.counters import ControlFlowStructureCounter
18
20
  from angr.project import Project
19
21
 
@@ -24,6 +26,9 @@ if TYPE_CHECKING:
24
26
  _l = logging.getLogger(__name__)
25
27
 
26
28
 
29
+ BlockCache = namedtuple("BlockCache", ("rd", "prop"))
30
+
31
+
27
32
  class MultipleBlocksException(Exception):
28
33
  """
29
34
  An exception that is raised in _get_block() where multiple blocks satisfy the criteria but only one block was
@@ -129,6 +134,8 @@ class OptimizationPass(BaseOptimizationPass):
129
134
  force_loop_single_exit: bool = True,
130
135
  complete_successors: bool = False,
131
136
  avoid_vvar_ids: set[int] | None = None,
137
+ arg_vvars: set[int] | None = None,
138
+ peephole_optimizations=None,
132
139
  **kwargs,
133
140
  ):
134
141
  super().__init__(func)
@@ -141,6 +148,7 @@ class OptimizationPass(BaseOptimizationPass):
141
148
  self._rd = reaching_definitions
142
149
  self._scratch = scratch if scratch is not None else {}
143
150
  self._new_block_addrs = set()
151
+ self._arg_vvars = arg_vvars
144
152
  self.vvar_id_start = vvar_id_start
145
153
  self.entry_node_addr: tuple[int, int | None] = (
146
154
  entry_node_addr if entry_node_addr is not None else (func.addr, None)
@@ -148,6 +156,7 @@ class OptimizationPass(BaseOptimizationPass):
148
156
  self._force_loop_single_exit = force_loop_single_exit
149
157
  self._complete_successors = complete_successors
150
158
  self._avoid_vvar_ids = avoid_vvar_ids or set()
159
+ self._peephole_optimizations = peephole_optimizations
151
160
 
152
161
  # output
153
162
  self.out_graph: networkx.DiGraph | None = None
@@ -265,9 +274,77 @@ class OptimizationPass(BaseOptimizationPass):
265
274
  def _is_sub(expr):
266
275
  return isinstance(expr, ailment.Expr.BinaryOp) and expr.op == "Sub"
267
276
 
277
+ def _simplify_blocks(
278
+ self,
279
+ ail_graph: networkx.DiGraph,
280
+ cache: dict | None = None,
281
+ ):
282
+ """
283
+ Simplify all blocks in self._blocks.
284
+
285
+ :param ail_graph: The AIL function graph.
286
+ :param cache: A block-level cache that stores reaching definition analysis results and
287
+ propagation results.
288
+ :return: None
289
+ """
290
+
291
+ blocks_by_addr_and_idx: dict[tuple[int, int | None], ailment.Block] = {}
292
+
293
+ for ail_block in ail_graph.nodes():
294
+ simplified = self._simplify_block(
295
+ ail_block,
296
+ cache=cache,
297
+ )
298
+ key = ail_block.addr, ail_block.idx
299
+ blocks_by_addr_and_idx[key] = simplified
300
+
301
+ # update blocks_map to allow node_addr to node lookup
302
+ def _replace_node_handler(node):
303
+ key = node.addr, node.idx
304
+ if key in blocks_by_addr_and_idx:
305
+ return blocks_by_addr_and_idx[key]
306
+ return None
307
+
308
+ AILGraphWalker(ail_graph, _replace_node_handler, replace_nodes=True).walk()
309
+
310
+ return ail_graph
311
+
312
+ def _simplify_block(self, ail_block, cache=None):
313
+ """
314
+ Simplify a single AIL block.
315
+
316
+ :param ailment.Block ail_block: The AIL block to simplify.
317
+ :return: A simplified AIL block.
318
+ """
319
+
320
+ cached_rd, cached_prop = None, None
321
+ cache_item = None
322
+ cache_key = ail_block.addr, ail_block.idx
323
+ if cache:
324
+ cache_item = cache.get(cache_key, None)
325
+ if cache_item:
326
+ # cache hit
327
+ cached_rd = cache_item.rd
328
+ cached_prop = cache_item.prop
329
+
330
+ simp = self.project.analyses.AILBlockSimplifier(
331
+ ail_block,
332
+ self._func.addr,
333
+ peephole_optimizations=self._peephole_optimizations,
334
+ cached_reaching_definitions=cached_rd,
335
+ cached_propagator=cached_prop,
336
+ )
337
+ # update the cache
338
+ if cache is not None:
339
+ if cache_item:
340
+ del cache[cache_key]
341
+ cache[cache_key] = BlockCache(simp._reaching_definitions, simp._propagator)
342
+ return simp.result_block
343
+
268
344
  def _simplify_graph(self, graph):
269
345
  MAX_SIMP_ITERATION = 8
270
346
  for _ in range(MAX_SIMP_ITERATION):
347
+ self._simplify_blocks(graph)
271
348
  simp = self.project.analyses.AILSimplifier(
272
349
  self._func,
273
350
  func_graph=graph,
@@ -331,14 +408,15 @@ class StructuringOptimizationPass(OptimizationPass):
331
408
  def __init__(
332
409
  self,
333
410
  func,
334
- prevent_new_gotos=True,
335
- strictly_less_gotos=False,
336
- recover_structure_fails=True,
337
- must_improve_rel_quality=True,
338
- max_opt_iters=1,
339
- simplify_ail=True,
340
- require_gotos=True,
341
- readd_labels=False,
411
+ prevent_new_gotos: bool = True,
412
+ strictly_less_gotos: bool = False,
413
+ recover_structure_fails: bool = True,
414
+ must_improve_rel_quality: bool = True,
415
+ max_opt_iters: int = 1,
416
+ simplify_ail: bool = True,
417
+ require_gotos: bool = True,
418
+ readd_labels: bool = False,
419
+ edges_to_remove: list[tuple[tuple[int, int | None], tuple[int, int | None]]] | None = None,
342
420
  **kwargs,
343
421
  ):
344
422
  super().__init__(func, **kwargs)
@@ -350,6 +428,7 @@ class StructuringOptimizationPass(OptimizationPass):
350
428
  self._require_gotos = require_gotos
351
429
  self._must_improve_rel_quality = must_improve_rel_quality
352
430
  self._readd_labels = readd_labels
431
+ self._edges_to_remove = edges_to_remove or []
353
432
 
354
433
  # relative quality metrics (excludes gotos)
355
434
  self._initial_structure_counter = None
@@ -452,6 +531,8 @@ class StructuringOptimizationPass(OptimizationPass):
452
531
  if readd_labels:
453
532
  graph = add_labels(graph)
454
533
 
534
+ remove_edges_in_ailgraph(graph, self._edges_to_remove)
535
+
455
536
  self._ri = self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
456
537
  self._func,
457
538
  graph=graph,
@@ -482,7 +563,7 @@ class StructuringOptimizationPass(OptimizationPass):
482
563
  if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
483
564
  return False
484
565
 
485
- rs = self.project.analyses.RegionSimplifier(self._func, rs.result, kb=self.kb, variable_kb=self._variable_kb)
566
+ rs = self.project.analyses.RegionSimplifier(self._func, rs.result, arg_vvars=self._arg_vvars, kb=self.kb)
486
567
  if not rs or rs.goto_manager is None or rs.result is None:
487
568
  return False
488
569
 
@@ -34,13 +34,23 @@ class FreshVirtualVariableRewriter(AILBlockWalker):
34
34
  def _handle_Assignment(self, stmt_idx: int, stmt: Assignment, block: Block | None):
35
35
  new_stmt = super()._handle_Assignment(stmt_idx, stmt, block)
36
36
  dst = new_stmt.dst if new_stmt is not None else stmt.dst
37
+ src = new_stmt.src if new_stmt is not None else stmt.src
37
38
  if isinstance(dst, VirtualVariable):
38
39
  self.vvar_mapping[dst.varid] = self.vvar_idx
39
40
  self.vvar_idx += 1
40
41
 
41
- dst = VirtualVariable(dst.idx, self.vvar_mapping[dst.varid], dst.bits, dst.category, dst.oident, **dst.tags)
42
+ dst = VirtualVariable(
43
+ dst.idx,
44
+ self.vvar_mapping[dst.varid],
45
+ dst.bits,
46
+ dst.category,
47
+ dst.oident,
48
+ variable=dst.variable,
49
+ variable_offset=dst.variable_offset,
50
+ **dst.tags,
51
+ )
42
52
 
43
- return Assignment(stmt.idx, dst, stmt.src, **stmt.tags)
53
+ return Assignment(stmt.idx, dst, src, **stmt.tags)
44
54
 
45
55
  return new_stmt
46
56
 
@@ -133,18 +143,31 @@ class ReturnDuplicatorBase:
133
143
  self._supergraph = to_ail_supergraph(graph)
134
144
  for region_head, (in_edges, region) in endnode_regions.items():
135
145
  is_single_const_ret_region = self._is_simple_return_graph(region)
146
+ dup_pred_nodes = []
147
+ # duplicate the entire region if at least (N-2) in-edges for the region head is deemed should be duplicated.
148
+ # otherwise we only duplicate the edges that should be duplicated
136
149
  for in_edge in in_edges:
137
150
  pred_node = in_edge[0]
138
151
  if self._should_duplicate_dst(
139
152
  pred_node, region_head, graph, dst_is_const_ret=is_single_const_ret_region
140
153
  ):
154
+ dup_pred_nodes.append(pred_node)
155
+
156
+ dup_count = len(dup_pred_nodes)
157
+ dup_all = dup_count >= len(in_edges) - 2 > 0
158
+ if dup_all:
159
+ for pred_node in sorted((in_edge[0] for in_edge in in_edges), key=lambda x: x.addr):
141
160
  # every eligible pred gets a new region copy
142
161
  self._copy_region([pred_node], region_head, region, graph)
162
+ graph_changed = True
163
+ else:
164
+ for pred_node in dup_pred_nodes:
165
+ self._copy_region([pred_node], region_head, region, graph)
166
+ graph_changed = True
143
167
 
144
168
  if region_head in graph and graph.in_degree(region_head) == 0:
145
169
  graph.remove_nodes_from(region)
146
-
147
- graph_changed = True
170
+ graph_changed = True
148
171
 
149
172
  return graph_changed
150
173
 
@@ -199,10 +222,10 @@ class ReturnDuplicatorBase:
199
222
 
200
223
  return end_node_regions
201
224
 
202
- def _copy_region(self, pred_nodes, region_head, region, graph):
225
+ def _copy_region(self, pred_nodes: list[Block], region_head, region, graph):
203
226
  # copy the entire return region
204
227
  copies: dict[Block, Block] = {}
205
- queue = [(pred_node, region_head) for pred_node in pred_nodes]
228
+ queue: list[tuple[Block, Block]] = [(pred_node, region_head) for pred_node in pred_nodes]
206
229
  vvar_mapping: dict[int, int] = {}
207
230
  while queue:
208
231
  pred, node = queue.pop(0)
@@ -224,12 +247,33 @@ class ReturnDuplicatorBase:
224
247
  last_stmt = ConditionProcessor.get_last_statement(pred)
225
248
  if isinstance(last_stmt, Jump):
226
249
  if isinstance(last_stmt.target, Const) and last_stmt.target.value == node_copy.addr:
227
- last_stmt.target_idx = node_copy.idx
250
+ updated_last_stmt = Jump(
251
+ last_stmt.idx, last_stmt.target, target_idx=node_copy.idx, **last_stmt.tags
252
+ )
253
+ pred.statements[-1] = updated_last_stmt
228
254
  elif isinstance(last_stmt, ConditionalJump):
229
255
  if isinstance(last_stmt.true_target, Const) and last_stmt.true_target.value == node_copy.addr:
230
- last_stmt.true_target_idx = node_copy.idx
256
+ updated_last_stmt = ConditionalJump(
257
+ last_stmt.idx,
258
+ last_stmt.condition,
259
+ last_stmt.true_target,
260
+ last_stmt.false_target,
261
+ true_target_idx=node_copy.idx,
262
+ false_target_idx=last_stmt.false_target_idx,
263
+ **last_stmt.tags,
264
+ )
265
+ pred.statements[-1] = updated_last_stmt
231
266
  elif isinstance(last_stmt.false_target, Const) and last_stmt.false_target.value == node_copy.addr:
232
- last_stmt.false_target_idx = node_copy.idx
267
+ updated_last_stmt = ConditionalJump(
268
+ last_stmt.idx,
269
+ last_stmt.condition,
270
+ last_stmt.true_target,
271
+ last_stmt.false_target,
272
+ true_target_idx=last_stmt.true_target_idx,
273
+ false_target_idx=node_copy.idx,
274
+ **last_stmt.tags,
275
+ )
276
+ pred.statements[-1] = updated_last_stmt
233
277
  except EmptyBlockNotice:
234
278
  pass
235
279
 
@@ -29,7 +29,12 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
29
29
  @staticmethod
30
30
  def _optimize_binaryop(expr: BinaryOp):
31
31
  if expr.op == "Add":
32
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
32
+ if (
33
+ isinstance(expr.operands[0], Const)
34
+ and isinstance(expr.operands[0].value, int)
35
+ and isinstance(expr.operands[1], Const)
36
+ and isinstance(expr.operands[1].value, int)
37
+ ):
33
38
  mask = (1 << expr.bits) - 1
34
39
  return Const(
35
40
  expr.idx, None, (expr.operands[0].value + expr.operands[1].value) & mask, expr.bits, **expr.tags
@@ -99,13 +104,19 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
99
104
  new_const = Const(const1.idx, None, const1.value + 1, const1.bits, **const1.tags)
100
105
  return BinaryOp(expr.idx, "Mul", [x1, new_const], expr.signed, **expr.tags)
101
106
  elif op0_is_mulconst and op1_is_mulconst:
107
+ assert x0 is not None and x1 is not None and const0 is not None and const1 is not None
102
108
  if x0.likes(x1):
103
109
  # x * A + x * B => (A + B) * x
104
110
  new_const = Const(const0.idx, None, const0.value + const1.value, const0.bits, **const0.tags)
105
111
  return BinaryOp(expr.idx, "Mul", [x0, new_const], expr.signed, **expr.tags)
106
112
 
107
113
  elif expr.op == "Sub":
108
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
114
+ if (
115
+ isinstance(expr.operands[0], Const)
116
+ and isinstance(expr.operands[0].value, int)
117
+ and isinstance(expr.operands[1], Const)
118
+ and isinstance(expr.operands[1].value, int)
119
+ ):
109
120
  mask = (1 << expr.bits) - 1
110
121
  return Const(
111
122
  expr.idx, None, (expr.operands[0].value - expr.operands[1].value) & mask, expr.bits, **expr.tags
@@ -138,12 +149,19 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
138
149
  return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
139
150
 
140
151
  if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
152
+ assert isinstance(expr.operands[0].offset, int) and isinstance(expr.operands[1].offset, int)
141
153
  return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
142
154
 
143
155
  elif expr.op == "And":
144
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
145
- return Const(expr.idx, None, (expr.operands[0].value & expr.operands[1].value), expr.bits, **expr.tags)
146
- if isinstance(expr.operands[1], Const) and expr.operands[1].value == 0:
156
+ op0, op1 = expr.operands
157
+ if (
158
+ isinstance(op0, Const)
159
+ and isinstance(op0.value, int)
160
+ and isinstance(op1, Const)
161
+ and isinstance(op1.value, int)
162
+ ):
163
+ return Const(expr.idx, None, (op0.value & op1.value), expr.bits, **expr.tags)
164
+ if isinstance(op1, Const) and op1.value == 0:
147
165
  return Const(expr.idx, None, 0, expr.bits, **expr.tags)
148
166
 
149
167
  elif expr.op == "Mul":
@@ -156,6 +174,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
156
174
  and isinstance(expr.operands[1], Const)
157
175
  and expr.operands[1].is_int
158
176
  ):
177
+ assert isinstance(expr.operands[0].value, int) and isinstance(expr.operands[1].value, int)
159
178
  # constant multiplication
160
179
  mask = (1 << expr.bits) - 1
161
180
  return Const(
@@ -235,7 +254,13 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
235
254
  return Const(expr0.idx, None, (const_a << expr1.value) & mask, expr0.bits, **expr0.tags)
236
255
 
237
256
  elif expr.op == "Or":
238
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
257
+ op0, op1 = expr.operands
258
+ if (
259
+ isinstance(op0, Const)
260
+ and isinstance(op0.value, int)
261
+ and isinstance(op1, Const)
262
+ and isinstance(op1.value, int)
263
+ ):
239
264
  return Const(expr.idx, None, expr.operands[0].value | expr.operands[1].value, expr.bits, **expr.tags)
240
265
  if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
241
266
  return expr.operands[1]
@@ -248,6 +273,16 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
248
273
  if expr.operands[0].likes(expr.operands[1]):
249
274
  return expr.operands[0]
250
275
 
276
+ elif expr.op == "Xor":
277
+ op0, op1 = expr.operands
278
+ if (
279
+ isinstance(op0, Const)
280
+ and isinstance(op0.value, int)
281
+ and isinstance(op1, Const)
282
+ and isinstance(op1.value, int)
283
+ ):
284
+ return Const(expr.idx, None, expr.operands[0].value ^ expr.operands[1].value, expr.bits, **expr.tags)
285
+
251
286
  elif expr.op in {"CmpEQ", "CmpLE", "CmpGE"}:
252
287
  if expr.operands[0].likes(expr.operands[1]):
253
288
  # x == x => 1
@@ -288,7 +323,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
288
323
 
289
324
  @staticmethod
290
325
  def _optimize_unaryop(expr: UnaryOp):
291
- if expr.op == "Neg" and isinstance(expr.operand, Const):
326
+ if expr.op == "Neg" and isinstance(expr.operand, Const) and isinstance(expr.operand.value, int):
292
327
  const_a = expr.operand.value
293
328
  mask = (2**expr.bits) - 1
294
329
  return Const(expr.idx, None, (~const_a) & mask, expr.bits, **expr.tags)
@@ -304,6 +339,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
304
339
  and expr.to_type == Convert.TYPE_INT
305
340
  and expr.from_bits > expr.to_bits
306
341
  ):
342
+ assert isinstance(expr.operand.value, int)
307
343
  # truncation
308
344
  mask = (1 << expr.to_bits) - 1
309
345
  v = expr.operand.value & mask
@@ -315,6 +351,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
315
351
  and expr.to_type == Convert.TYPE_INT
316
352
  and expr.from_bits <= expr.to_bits
317
353
  ):
354
+ assert isinstance(expr.operand.value, int)
318
355
  if expr.is_signed is False:
319
356
  # unsigned extension
320
357
  return Const(expr.idx, expr.operand.variable, expr.operand.value, expr.to_bits, **expr.operand.tags)
@@ -106,7 +106,7 @@ class RegionIdentifier(Analysis):
106
106
  # make regions into block address lists
107
107
  self.regions_by_block_addrs = self._make_regions_by_block_addrs()
108
108
 
109
- def _make_regions_by_block_addrs(self) -> list[list[int]]:
109
+ def _make_regions_by_block_addrs(self) -> list[list[tuple[int, int | None]]]:
110
110
  """
111
111
  Creates a list of addr lists representing each region without recursion. A single region is defined
112
112
  as a set of only blocks, no Graphs containing nested regions. The list contains the address of each
@@ -124,13 +124,15 @@ class RegionIdentifier(Analysis):
124
124
  children_blocks = []
125
125
  for node in region.graph.nodes:
126
126
  if isinstance(node, Block):
127
- children_blocks.append(node.addr)
127
+ children_blocks.append((node.addr, node.idx))
128
128
  elif isinstance(node, MultiNode):
129
- children_blocks += [n.addr for n in node.nodes]
129
+ children_blocks += [(n.addr, node.idx) for n in node.nodes]
130
130
  elif isinstance(node, GraphRegion):
131
131
  if node not in seen_regions:
132
132
  children_regions.append(node)
133
- children_blocks.append(node.head.addr)
133
+ children_blocks.append(
134
+ (node.head.addr, node.head.idx if hasattr(node.head, "idx") else None)
135
+ )
134
136
  seen_regions.add(node)
135
137
  else:
136
138
  continue