angr 9.2.141__py3-none-macosx_11_0_arm64.whl → 9.2.143__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (72) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +26 -12
  3. angr/analyses/calling_convention/fact_collector.py +31 -9
  4. angr/analyses/cfg/cfg_base.py +38 -4
  5. angr/analyses/cfg/cfg_fast.py +23 -7
  6. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -1
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +19 -6
  10. angr/analyses/decompiler/ail_simplifier.py +138 -98
  11. angr/analyses/decompiler/clinic.py +73 -5
  12. angr/analyses/decompiler/condition_processor.py +7 -7
  13. angr/analyses/decompiler/decompilation_cache.py +2 -1
  14. angr/analyses/decompiler/decompiler.py +10 -2
  15. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  16. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  17. angr/analyses/decompiler/optimization_passes/condition_constprop.py +110 -46
  18. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
  19. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +2 -0
  21. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  22. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  23. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  24. angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
  25. angr/analyses/decompiler/region_identifier.py +70 -47
  26. angr/analyses/decompiler/sequence_walker.py +8 -0
  27. angr/analyses/decompiler/ssailification/rewriting.py +47 -17
  28. angr/analyses/decompiler/ssailification/rewriting_engine.py +13 -0
  29. angr/analyses/decompiler/stack_item.py +36 -0
  30. angr/analyses/decompiler/structured_codegen/c.py +14 -9
  31. angr/analyses/decompiler/structuring/phoenix.py +3 -3
  32. angr/analyses/decompiler/utils.py +13 -0
  33. angr/analyses/find_objects_static.py +2 -1
  34. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  35. angr/analyses/reaching_definitions/function_handler.py +24 -10
  36. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  37. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  38. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  39. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  40. angr/analyses/reaching_definitions/rd_state.py +11 -7
  41. angr/analyses/s_liveness.py +44 -6
  42. angr/analyses/s_propagator.py +40 -29
  43. angr/analyses/s_reaching_definitions/s_rda_model.py +48 -37
  44. angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
  45. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +21 -21
  46. angr/analyses/typehoon/simple_solver.py +35 -8
  47. angr/analyses/typehoon/typehoon.py +3 -1
  48. angr/analyses/variable_recovery/engine_ail.py +6 -6
  49. angr/calling_conventions.py +20 -10
  50. angr/knowledge_plugins/functions/function.py +5 -10
  51. angr/knowledge_plugins/variables/variable_manager.py +27 -0
  52. angr/lib/angr_native.dylib +0 -0
  53. angr/procedures/definitions/__init__.py +3 -10
  54. angr/procedures/definitions/linux_kernel.py +5 -0
  55. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  56. angr/procedures/win32_kernel/__fastfail.py +15 -0
  57. angr/sim_procedure.py +2 -2
  58. angr/simos/simos.py +14 -10
  59. angr/simos/windows.py +42 -1
  60. angr/utils/ail.py +41 -1
  61. angr/utils/cpp.py +17 -0
  62. angr/utils/doms.py +149 -0
  63. angr/utils/library.py +1 -1
  64. angr/utils/ssa/__init__.py +21 -14
  65. angr/utils/ssa/vvar_uses_collector.py +2 -2
  66. angr/utils/types.py +12 -1
  67. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/METADATA +7 -7
  68. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/RECORD +72 -68
  69. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/LICENSE +0 -0
  70. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/WHEEL +0 -0
  71. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/entry_points.txt +0 -0
  72. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@ import capstone
13
13
 
14
14
  import ailment
15
15
 
16
- from angr.analyses.decompiler.ssailification.ssailification import Ssailification
17
16
  from angr.errors import AngrDecompilationError
18
17
  from angr.knowledge_base import KnowledgeBase
19
18
  from angr.knowledge_plugins.functions import Function
@@ -39,6 +38,8 @@ from angr.procedures.stubs.UnresolvableJumpTarget import UnresolvableJumpTarget
39
38
  from angr.analyses import Analysis, register_analysis
40
39
  from angr.analyses.cfg.cfg_base import CFGBase
41
40
  from angr.analyses.reaching_definitions import ReachingDefinitionsAnalysis
41
+ from .ssailification.ssailification import Ssailification
42
+ from .stack_item import StackItem, StackItemType
42
43
  from .return_maker import ReturnMaker
43
44
  from .ailgraph_walker import AILGraphWalker, RemoveNodeNotice
44
45
  from .optimization_passes import (
@@ -170,6 +171,7 @@ class Clinic(Analysis):
170
171
 
171
172
  self._register_save_areas_removed: bool = False
172
173
  self.edges_to_remove: list[tuple[tuple[int, int | None], tuple[int, int | None]]] = []
174
+ self.copied_var_ids: set[int] = set()
173
175
 
174
176
  self._new_block_addrs = set()
175
177
 
@@ -182,6 +184,10 @@ class Clinic(Analysis):
182
184
  else:
183
185
  self._optimization_passes = []
184
186
 
187
+ self.stack_items: dict[int, StackItem] = {}
188
+ if self.project.arch.call_pushes_ret:
189
+ self.stack_items[0] = StackItem(0, self.project.arch.bytes, "ret_addr", StackItemType.RET_ADDR)
190
+
185
191
  if self._mode == ClinicMode.DECOMPILE:
186
192
  self._analyze_for_decompiling()
187
193
  elif self._mode == ClinicMode.COLLECT_DATA_REFS:
@@ -502,7 +508,7 @@ class Clinic(Analysis):
502
508
  # Run simplification passes
503
509
  self._update_progress(40.0, text="Running simplifications 1")
504
510
  ail_graph = self._run_simplification_passes(
505
- ail_graph, stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION
511
+ ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION
506
512
  )
507
513
 
508
514
  # Simplify the entire function for the first time
@@ -565,7 +571,9 @@ class Clinic(Analysis):
565
571
 
566
572
  # Run simplification passes
567
573
  self._update_progress(65.0, text="Running simplifications 3 ")
568
- ail_graph = self._run_simplification_passes(ail_graph, stage=OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION)
574
+ ail_graph = self._run_simplification_passes(
575
+ ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
576
+ )
569
577
 
570
578
  # Simplify the entire function for the third time
571
579
  self._update_progress(70.0, text="Simplifying function 3")
@@ -632,6 +640,7 @@ class Clinic(Analysis):
632
640
  self.cc_graph = self.copy_graph(ail_graph)
633
641
  self.externs = self._collect_externs(ail_graph, variable_kb)
634
642
  self.vvar_to_vvar = vvar2vvar
643
+ self.copied_var_ids = copied_vvar_ids
635
644
  return ail_graph
636
645
 
637
646
  def _analyze_for_data_refs(self):
@@ -780,6 +789,8 @@ class Clinic(Analysis):
780
789
  :return: None
781
790
  """
782
791
 
792
+ attempted_funcs: set[int] = set()
793
+
783
794
  for node in self.function.transition_graph:
784
795
  if (
785
796
  isinstance(node, BlockNode)
@@ -791,7 +802,12 @@ class Clinic(Analysis):
791
802
  elif isinstance(node, Function):
792
803
  target_func = node
793
804
  else:
805
+ # TODO: Enable call-site analysis for indirect calls
806
+ continue
807
+
808
+ if target_func.addr in attempted_funcs:
794
809
  continue
810
+ attempted_funcs.add(target_func.addr)
795
811
 
796
812
  # case 0: the calling convention and prototype are available
797
813
  if target_func.calling_convention is not None and target_func.prototype is not None:
@@ -811,6 +827,7 @@ class Clinic(Analysis):
811
827
  if cc.cc is not None and cc.prototype is not None:
812
828
  target_func.calling_convention = cc.cc
813
829
  target_func.prototype = cc.prototype
830
+ target_func.prototype_libname = cc.prototype_libname
814
831
  continue
815
832
 
816
833
  # case 3: the callee is a PLT function
@@ -819,6 +836,7 @@ class Clinic(Analysis):
819
836
  if cc.cc is not None and cc.prototype is not None:
820
837
  target_func.calling_convention = cc.cc
821
838
  target_func.prototype = cc.prototype
839
+ target_func.prototype_libname = cc.prototype_libname
822
840
  continue
823
841
 
824
842
  # case 4: fall back to call site analysis
@@ -970,7 +988,29 @@ class Clinic(Analysis):
970
988
  return ailment.Block(block_node.addr, 0, statements=[])
971
989
 
972
990
  block = self.project.factory.block(block_node.addr, block_node.size, cross_insn_opt=False)
973
- return self._convert_vex(block)
991
+ converted = self._convert_vex(block)
992
+
993
+ # architecture-specific setup
994
+ if block.addr == self.function.addr and self.project.arch.name in {"X86", "AMD64"}:
995
+ # setup dflag; this is a hack for most sane ABIs. we may move this logic elsewhere if there are adversarial
996
+ # binaries that mess with dflags and pass them across functions
997
+ dflag_offset, dflag_size = self.project.arch.registers["d"]
998
+ dflag = ailment.Expr.Register(
999
+ self._ail_manager.next_atom(),
1000
+ None,
1001
+ dflag_offset,
1002
+ dflag_size * self.project.arch.byte_width,
1003
+ ins_addr=block.addr,
1004
+ )
1005
+ forward = ailment.Expr.Const(
1006
+ self._ail_manager.next_atom(), None, 1, dflag_size * self.project.arch.byte_width, ins_addr=block.addr
1007
+ )
1008
+ dflag_assignment = ailment.Stmt.Assignment(
1009
+ self._ail_manager.next_atom(), dflag, forward, ins_addr=block.addr
1010
+ )
1011
+ converted.statements.insert(0, dflag_assignment)
1012
+
1013
+ return converted
974
1014
 
975
1015
  def _convert_vex(self, block):
976
1016
  if block.vex.jumpkind not in {"Ijk_Call", "Ijk_Boring", "Ijk_Ret"} and not block.vex.jumpkind.startswith(
@@ -1012,7 +1052,11 @@ class Clinic(Analysis):
1012
1052
  node = self._cfg.get_any_node(block.addr)
1013
1053
  if node is None:
1014
1054
  continue
1015
- successors = self._cfg.get_successors(node, excluding_fakeret=True, jumpkind="Ijk_Call")
1055
+ successors = [
1056
+ node
1057
+ for node, jk in self._cfg.get_successors_and_jumpkinds(node)
1058
+ if jk == "Ijk_Call" or jk.startswith("Ijk_Sys")
1059
+ ]
1016
1060
  if len(successors) == 1:
1017
1061
  succ_addr = successors[0].addr
1018
1062
  if not self.project.is_hooked(succ_addr) or not isinstance(
@@ -1256,6 +1300,7 @@ class Clinic(Analysis):
1256
1300
  ail_graph,
1257
1301
  stage: OptimizationPassStage = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION,
1258
1302
  variable_kb=None,
1303
+ stack_items: dict[int, StackItem] | None = None,
1259
1304
  **kwargs,
1260
1305
  ):
1261
1306
  addr_and_idx_to_blocks: dict[tuple[int, int | None], ailment.Block] = {}
@@ -1301,6 +1346,8 @@ class Clinic(Analysis):
1301
1346
  # clear the cached RDA result
1302
1347
  self.reaching_definitions = None
1303
1348
  self.vvar_id_start = a.vvar_id_start
1349
+ if stack_items is not None and a.stack_items:
1350
+ stack_items.update(a.stack_items)
1304
1351
 
1305
1352
  return ail_graph
1306
1353
 
@@ -1559,6 +1606,13 @@ class Clinic(Analysis):
1559
1606
  if vartype is not None:
1560
1607
  for tv in vr.var_to_typevars[variable]:
1561
1608
  groundtruth[tv] = vartype
1609
+ # get maximum sizes of each stack variable, regardless of its original type
1610
+ stackvar_max_sizes = var_manager.get_stackvar_max_sizes(self.stack_items)
1611
+ tv_max_sizes = {}
1612
+ for v, s in stackvar_max_sizes.items():
1613
+ if v in vr.var_to_typevars:
1614
+ for tv in vr.var_to_typevars[v]:
1615
+ tv_max_sizes[tv] = s
1562
1616
  # clean up existing types for this function
1563
1617
  var_manager.remove_types()
1564
1618
  # TODO: Type inference for global variables
@@ -1579,6 +1633,7 @@ class Clinic(Analysis):
1579
1633
  var_mapping=vr.var_to_typevars,
1580
1634
  must_struct=must_struct,
1581
1635
  ground_truth=groundtruth,
1636
+ stackvar_max_sizes=tv_max_sizes,
1582
1637
  )
1583
1638
  # tp.pp_constraints()
1584
1639
  # tp.pp_solution()
@@ -2439,6 +2494,19 @@ class Clinic(Analysis):
2439
2494
  last_stmt.target.value = succs[0].addr
2440
2495
  elif isinstance(last_stmt, ailment.Stmt.ConditionalJump):
2441
2496
  patch_conditional_jump_target(last_stmt, node.addr, succs[0].addr)
2497
+ # if both branches jump to the same location, we replace it with a jump
2498
+ if (
2499
+ isinstance(last_stmt.true_target, ailment.Expr.Const)
2500
+ and isinstance(last_stmt.false_target, ailment.Expr.Const)
2501
+ and last_stmt.true_target.value == last_stmt.false_target.value
2502
+ ):
2503
+ last_stmt = ailment.Stmt.Jump(
2504
+ last_stmt.idx,
2505
+ last_stmt.true_target,
2506
+ target_idx=last_stmt.true_target.idx,
2507
+ ins_addr=last_stmt.ins_addr,
2508
+ )
2509
+ pred.statements[-1] = last_stmt
2442
2510
  first_cond_jump = first_conditional_jump(pred)
2443
2511
  if first_cond_jump is not None and first_cond_jump is not last_stmt:
2444
2512
  patch_conditional_jump_target(first_cond_jump, node.addr, succs[0].addr)
@@ -16,6 +16,7 @@ from angr.utils.graph import GraphUtils
16
16
  from angr.utils.lazy_import import lazy_import
17
17
  from angr.utils import is_pyinstaller
18
18
  from angr.utils.graph import dominates, inverted_idoms
19
+ from angr.utils.ail import is_head_controlled_loop_block
19
20
  from angr.block import Block, BlockNode
20
21
  from angr.errors import AngrRuntimeError
21
22
  from .peephole_optimizations import InvertNegatedLogicalConjunctionsAndDisjunctions, RemoveRedundantNots
@@ -34,7 +35,7 @@ from .structuring.structurer_nodes import (
34
35
  IncompleteSwitchCaseNode,
35
36
  )
36
37
  from .graph_region import GraphRegion
37
- from .utils import first_nonlabel_nonphi_statement, peephole_optimize_expr
38
+ from .utils import peephole_optimize_expr
38
39
 
39
40
  if is_pyinstaller():
40
41
  # PyInstaller is not happy with lazy import
@@ -671,12 +672,11 @@ class ConditionProcessor:
671
672
  return claripy.true()
672
673
 
673
674
  # sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
674
- if (
675
- isinstance(src_block, ailment.Block)
676
- and src_block.statements
677
- and isinstance(first_nonlabel_nonphi_statement(src_block), ailment.Stmt.ConditionalJump)
678
- ):
679
- last_stmt = first_nonlabel_nonphi_statement(src_block)
675
+ if isinstance(src_block, ailment.Block) and src_block.statements and is_head_controlled_loop_block(src_block):
676
+ last_stmt = next(
677
+ iter(stmt for stmt in src_block.statements[:-1] if isinstance(stmt, ailment.Stmt.ConditionalJump)), None
678
+ )
679
+ assert last_stmt is not None
680
680
  else:
681
681
  last_stmt = self.get_last_statement(src_block)
682
682
 
@@ -6,6 +6,7 @@ from .structured_codegen import BaseStructuredCodeGenerator
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from angr.analyses.decompiler.optimization_passes.expr_op_swapper import OpDescriptor
9
+ from angr.analyses.typehoon.typevars import TypeVariable, TypeConstraint
9
10
 
10
11
 
11
12
  class DecompilationCache:
@@ -29,7 +30,7 @@ class DecompilationCache:
29
30
  def __init__(self, addr):
30
31
  self.parameters: dict[str, Any] = {}
31
32
  self.addr = addr
32
- self.type_constraints: set | None = None
33
+ self.type_constraints: dict[TypeVariable, set[TypeConstraint]] | None = None
33
34
  self.func_typevar = None
34
35
  self.var_to_typevar: dict | None = None
35
36
  self.codegen: BaseStructuredCodeGenerator | None = None
@@ -31,6 +31,7 @@ from .presets import DECOMPILATION_PRESETS, DecompilationPreset
31
31
  if TYPE_CHECKING:
32
32
  from angr.knowledge_plugins.cfg.cfg_model import CFGModel
33
33
  from .peephole_optimizations import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase
34
+ from angr.analyses.typehoon.typevars import TypeVariable, TypeConstraint
34
35
 
35
36
  l = logging.getLogger(name=__name__)
36
37
 
@@ -135,6 +136,7 @@ class Decompiler(Analysis):
135
136
  self.unoptimized_ail_graph: networkx.DiGraph | None = None
136
137
  self.ail_graph: networkx.DiGraph | None = None
137
138
  self.vvar_id_start = None
139
+ self._copied_var_ids: set[int] = set()
138
140
  self._optimization_scratch: dict[str, Any] = {}
139
141
  self.expr_collapse_depth = expr_collapse_depth
140
142
 
@@ -267,6 +269,7 @@ class Decompiler(Analysis):
267
269
  self._variable_kb = clinic.variable_kb
268
270
  self._update_progress(70.0, text="Identifying regions")
269
271
  self.vvar_id_start = clinic.vvar_id_start
272
+ self._copied_var_ids = clinic.copied_var_ids
270
273
 
271
274
  if clinic.graph is None:
272
275
  # the function is empty
@@ -501,6 +504,7 @@ class Decompiler(Analysis):
501
504
  force_loop_single_exit=self._force_loop_single_exit,
502
505
  complete_successors=self._complete_successors,
503
506
  peephole_optimizations=self._peephole_optimizations,
507
+ avoid_vvar_ids=self._copied_var_ids,
504
508
  **kwargs,
505
509
  )
506
510
 
@@ -546,7 +550,9 @@ class Decompiler(Analysis):
546
550
  SimMemoryVariable(symbol.rebased_addr, 1, name=symbol.name, ident=ident),
547
551
  )
548
552
 
549
- def reflow_variable_types(self, type_constraints: set, func_typevar, var_to_typevar: dict, codegen):
553
+ def reflow_variable_types(
554
+ self, type_constraints: dict[TypeVariable, set[TypeConstraint]], func_typevar, var_to_typevar: dict, codegen
555
+ ):
550
556
  """
551
557
  Re-run type inference on an existing variable recovery result, then rerun codegen to generate new results.
552
558
 
@@ -606,7 +612,9 @@ class Decompiler(Analysis):
606
612
  var = arg.variable
607
613
  new_type = var_manager.get_variable_type(var)
608
614
  if new_type is not None:
609
- self.func.prototype.args[i] = new_type
615
+ self.func.prototype.args = (
616
+ self.func.prototype.args[:i] + (new_type,) + self.func.prototype.args[i + 1 :]
617
+ )
610
618
  except Exception: # pylint:disable=broad-except
611
619
  l.warning(
612
620
  "Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
@@ -283,14 +283,12 @@ class GraphDephicationVVarMapping(Analysis): # pylint:disable=abstract-method
283
283
 
284
284
  @staticmethod
285
285
  def _prepend_stmt(block, stmt):
286
- # TODO: This insertion breaks the assumption that all phi statements appear before any assignments. We must
287
- # TODO: fix the assumption elsewhere in the code base.
288
- first_nonlabel_idx = len(block.statements)
286
+ first_nonlabel_nonphi_idx = len(block.statements)
289
287
  for i, s in enumerate(block.statements):
290
- if not isinstance(s, Label):
291
- first_nonlabel_idx = i
288
+ if not isinstance(s, Label) and not is_phi_assignment(s):
289
+ first_nonlabel_nonphi_idx = i
292
290
  break
293
- block.statements.insert(first_nonlabel_idx, stmt)
291
+ block.statements.insert(first_nonlabel_nonphi_idx, stmt)
294
292
 
295
293
  @staticmethod
296
294
  def _used_in_phi(dst_block, src_block, vvar_id: int) -> bool:
@@ -4,6 +4,7 @@ import logging
4
4
 
5
5
  import ailment
6
6
 
7
+ from angr.analyses.decompiler.stack_item import StackItem, StackItemType
7
8
  from .optimization_pass import OptimizationPass, OptimizationPassStage
8
9
 
9
10
  _l = logging.getLogger(name=__name__)
@@ -62,11 +63,16 @@ class BasePointerSaveSimplifier(OptimizationPass):
62
63
  return
63
64
 
64
65
  # update the first block
65
- block, stmt_idx, _ = save_stmt
66
+ block, stmt_idx, save_dst = save_stmt
66
67
  block_copy = block.copy()
67
68
  block_copy.statements.pop(stmt_idx)
68
69
  self._update_block(block, block_copy)
69
70
 
71
+ # update stack_items
72
+ self.stack_items[save_dst.stack_offset] = StackItem(
73
+ save_dst.stack_offset, save_dst.size, "saved_bp", StackItemType.SAVED_BP
74
+ )
75
+
70
76
  # update all endpoint blocks
71
77
  if restore_stmts:
72
78
  for block, stmt_idx, _ in restore_stmts:
@@ -74,7 +80,7 @@ class BasePointerSaveSimplifier(OptimizationPass):
74
80
  block_copy.statements.pop(stmt_idx)
75
81
  self._update_block(block, block_copy)
76
82
 
77
- def _find_baseptr_save_stmt(self):
83
+ def _find_baseptr_save_stmt(self) -> tuple[ailment.Block, int, ailment.Expr.VirtualVariable] | None:
78
84
  """
79
85
  Find the AIL statement that saves the base pointer to a stack slot.
80
86
 
@@ -1,14 +1,21 @@
1
1
  from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+ from collections import defaultdict
2
4
 
3
5
  import networkx
4
6
 
5
7
  from ailment import AILBlockWalker, Block
6
- from ailment.statement import ConditionalJump, Statement
8
+ from ailment.statement import ConditionalJump, Statement, Assignment
7
9
  from ailment.expression import Const, BinaryOp, VirtualVariable
8
10
 
9
- from angr.analyses.decompiler.region_identifier import RegionIdentifier
11
+ from angr.analyses.decompiler.utils import first_nonlabel_nonphi_statement
12
+ from angr.utils.graph import dominates
13
+ from angr.utils.timing import timethis
10
14
  from .optimization_pass import OptimizationPass, OptimizationPassStage
11
15
 
16
+ if TYPE_CHECKING:
17
+ from angr.analyses.s_reaching_definitions import SRDAModel
18
+
12
19
 
13
20
  class ConstantCondition:
14
21
  """
@@ -35,6 +42,7 @@ class CCondPropBlockWalker(AILBlockWalker):
35
42
  self._new_block: Block | None = None # output
36
43
  self.vvar_id = vvar_id
37
44
  self.const_value = const_value
45
+ self.abort = False
38
46
 
39
47
  def walk(self, block: Block):
40
48
  self._new_block = None
@@ -42,6 +50,17 @@ class CCondPropBlockWalker(AILBlockWalker):
42
50
  return self._new_block
43
51
 
44
52
  def _handle_stmt(self, stmt_idx: int, stmt: Statement, block: Block): # type: ignore
53
+ if self.abort:
54
+ return
55
+
56
+ if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable) and stmt.dst.varid == self.vvar_id:
57
+ # we see the assignment of this virtual variable; this is the original block that creates this variable
58
+ # and checks if this variable is equal to a constant value. as such, we stop processing this block.
59
+ # an example appears in binary 1de5cda760f9ed80bb6f4a35edcebc86ccec14c49cf4775ddf2ffc3e05ff35f4, function
60
+ # 0x4657C0, blocks 0x465bd6 and 0x465a5c
61
+ self.abort = True
62
+ return
63
+
45
64
  r = super()._handle_stmt(stmt_idx, stmt, block)
46
65
  if r is not None:
47
66
  # replace the original statement
@@ -52,7 +71,9 @@ class CCondPropBlockWalker(AILBlockWalker):
52
71
  def _handle_VirtualVariable( # type: ignore
53
72
  self, expr_idx: int, expr: VirtualVariable, stmt_idx: int, stmt: Statement, block: Block | None
54
73
  ) -> Const | None:
55
- if expr.varid == self.vvar_id:
74
+ if expr.varid == self.vvar_id and not (
75
+ isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable) and stmt.dst.varid == self.vvar_id
76
+ ):
56
77
  return Const(expr.idx, None, self.const_value.value, self.const_value.bits, **expr.tags)
57
78
  return None
58
79
 
@@ -74,18 +95,9 @@ class ConditionConstantPropagation(OptimizationPass):
74
95
 
75
96
  def _check(self):
76
97
  cconds = self._find_const_conditions()
77
- if not cconds:
78
- return False, None
79
- return True, {"cconds": cconds}
80
-
81
- def _analyze(self, cache=None):
82
- if not cache or cache.get("cconds", None) is None: # noqa: SIM108
83
- cconds = self._find_const_conditions()
84
- else:
85
- cconds = cache["cconds"]
86
98
 
87
99
  if not cconds:
88
- return
100
+ return False, None
89
101
 
90
102
  # group cconds according to their sources
91
103
  cconds_by_src: dict[tuple[int, int | None], list[ConstantCondition]] = {}
@@ -95,26 +107,60 @@ class ConditionConstantPropagation(OptimizationPass):
95
107
  cconds_by_src[src] = []
96
108
  cconds_by_src[src].append(ccond)
97
109
 
110
+ # eliminate conflicting conditions
111
+ for src in list(cconds_by_src):
112
+ cconds = cconds_by_src[src]
113
+ vvar_id_to_values = defaultdict(set)
114
+ ccond_dict = {} # keyed by vvar_id; used for deduplication
115
+ for ccond in cconds:
116
+ vvar_id_to_values[ccond.vvar_id].add(ccond.value)
117
+ ccond_dict[ccond.vvar_id] = ccond
118
+ new_cconds = []
119
+ for vid, vvalues in vvar_id_to_values.items():
120
+ if len(vvalues) == 1:
121
+ new_cconds.append(ccond_dict[vid])
122
+ if new_cconds:
123
+ cconds_by_src[src] = new_cconds
124
+ else:
125
+ del cconds_by_src[src]
126
+
127
+ if not cconds_by_src:
128
+ return False, None
129
+ return True, {"cconds_by_src": cconds_by_src}
130
+
131
+ @timethis
132
+ def _analyze(self, cache=None):
133
+ if not cache or cache.get("cconds_by_src", None) is None:
134
+ return
135
+ cconds_by_src = cache["cconds_by_src"]
136
+
137
+ if not cconds_by_src:
138
+ return
139
+
98
140
  # calculate a dominance frontier for each block
99
141
  entry_node_addr, entry_node_idx = self.entry_node_addr
100
142
  entry_node = self._get_block(entry_node_addr, idx=entry_node_idx)
101
- df = networkx.algorithms.dominance_frontiers(self._graph, entry_node)
143
+ idoms = networkx.algorithms.immediate_dominators(self._graph, entry_node)
144
+ rda: SRDAModel = self.project.analyses.SReachingDefinitions(self._func, func_graph=self._graph).model
102
145
 
103
146
  for src, cconds in cconds_by_src.items():
104
147
  head_block = self._get_block(src[0], idx=src[1])
105
148
  if head_block is None:
106
149
  continue
107
- frontier = df.get(head_block)
108
- if frontier is None:
109
- continue
110
- graph_slice = RegionIdentifier.slice_graph(self._graph, head_block, frontier, include_frontier=False)
111
- for ccond in cconds:
112
- walker = CCondPropBlockWalker(ccond.vvar_id, ccond.value)
113
- for block in graph_slice:
114
- new_block = walker.walk(block)
115
- if new_block is not None:
116
- self._update_block(block, new_block)
117
150
 
151
+ for ccond in cconds:
152
+ for _, loc in rda.all_vvar_uses[ccond.vvar_id]:
153
+ loc_block = self._get_block(loc.block_addr, idx=loc.block_idx)
154
+ if loc_block is None:
155
+ continue
156
+ if dominates(idoms, head_block, loc_block):
157
+ # the constant condition dominates the use site
158
+ walker = CCondPropBlockWalker(ccond.vvar_id, ccond.value)
159
+ new_block = walker.walk(loc_block)
160
+ if new_block is not None:
161
+ self._update_block(loc_block, new_block)
162
+
163
+ @timethis
118
164
  def _find_const_conditions(self) -> list[ConstantCondition]:
119
165
  cconds = []
120
166
 
@@ -122,28 +168,46 @@ class ConditionConstantPropagation(OptimizationPass):
122
168
  if block.statements:
123
169
  last_stmt = block.statements[-1]
124
170
  if (
125
- not isinstance(last_stmt, ConditionalJump)
126
- or not isinstance(last_stmt.true_target, Const)
127
- or not isinstance(last_stmt.false_target, Const)
171
+ isinstance(last_stmt, ConditionalJump)
172
+ and isinstance(last_stmt.true_target, Const)
173
+ and isinstance(last_stmt.false_target, Const)
128
174
  ):
129
- continue
130
-
131
- if isinstance(last_stmt.condition, BinaryOp):
132
- cond = last_stmt.condition
133
- op = cond.op
134
- op0, op1 = cond.operands
135
- if isinstance(op0, Const):
136
- op0, op1 = op1, op0
137
- if isinstance(op0, VirtualVariable) and isinstance(op1, Const) and op1.is_int:
138
- if op == "CmpEQ":
139
- ccond = ConstantCondition(
140
- op0.varid, op1, last_stmt.true_target.value, last_stmt.true_target_idx # type: ignore
141
- )
142
- cconds.append(ccond)
143
- elif op == "CmpNE":
144
- ccond = ConstantCondition(
145
- op0.varid, op1, last_stmt.false_target.value, last_stmt.false_target_idx # type: ignore
146
- )
147
- cconds.append(ccond)
175
+ self._extract_const_condition_from_stmt(last_stmt, cconds)
176
+ else:
177
+ # also check the first non-phi statement; rep stos may generate blocks whose conditional checks
178
+ # are at the beginning of the block
179
+
180
+ # we could have used is_head_controlled_loop_block, but at this point the block is simplified enough
181
+ # that the first non-label, non-phi statement must be a ConditionalJump that controls the execution
182
+ # of the loop body, so the following logic should work fine.
183
+
184
+ first_stmt = first_nonlabel_nonphi_statement(block)
185
+ if (
186
+ first_stmt is not last_stmt
187
+ and isinstance(first_stmt, ConditionalJump)
188
+ and isinstance(first_stmt.true_target, Const)
189
+ and isinstance(first_stmt.false_target, Const)
190
+ ):
191
+ self._extract_const_condition_from_stmt(first_stmt, cconds)
148
192
 
149
193
  return cconds
194
+
195
+ @staticmethod
196
+ def _extract_const_condition_from_stmt(stmt: ConditionalJump, cconds: list[ConstantCondition]) -> None:
197
+ if isinstance(stmt.condition, BinaryOp):
198
+ cond = stmt.condition
199
+ op = cond.op
200
+ op0, op1 = cond.operands
201
+ if isinstance(op0, Const):
202
+ op0, op1 = op1, op0
203
+ if isinstance(op0, VirtualVariable) and isinstance(op1, Const) and op1.is_int:
204
+ if op == "CmpEQ":
205
+ ccond = ConstantCondition(
206
+ op0.varid, op1, stmt.true_target.value, stmt.true_target_idx # type: ignore
207
+ )
208
+ cconds.append(ccond)
209
+ elif op == "CmpNE":
210
+ ccond = ConstantCondition(
211
+ op0.varid, op1, stmt.false_target.value, stmt.false_target_idx # type: ignore
212
+ )
213
+ cconds.append(ccond)
@@ -192,6 +192,14 @@ class ITERegionConverter(OptimizationPass):
192
192
  if region_head not in self._graph or region_tail not in self._graph:
193
193
  return False
194
194
 
195
+ # ensure all phi statements in region_tail have valid source vvars
196
+ for stmt in region_tail.statements:
197
+ if not is_phi_assignment(stmt):
198
+ continue
199
+ for _, vvar in stmt.src.src_and_vvars:
200
+ if vvar is None:
201
+ return False
202
+
195
203
  #
196
204
  # create a new region_head
197
205
  #
@@ -216,7 +216,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
216
216
  def _analyze(self, cache=None):
217
217
  variablehash_to_cases = self._find_cascading_switch_variable_comparisons()
218
218
 
219
- if not variablehash_to_cases:
219
+ if not variablehash_to_cases or all(not caselists for caselists in variablehash_to_cases.values()):
220
220
  return False
221
221
 
222
222
  graph_copy = networkx.DiGraph(self._graph)
@@ -21,6 +21,7 @@ from angr.project import Project
21
21
 
22
22
  if TYPE_CHECKING:
23
23
  from angr.knowledge_plugins.functions import Function
24
+ from angr.analyses.decompiler.stack_item import StackItem
24
25
 
25
26
 
26
27
  _l = logging.getLogger(__name__)
@@ -160,6 +161,7 @@ class OptimizationPass(BaseOptimizationPass):
160
161
 
161
162
  # output
162
163
  self.out_graph: networkx.DiGraph | None = None
164
+ self.stack_items: dict[int, StackItem] = {}
163
165
 
164
166
  @property
165
167
  def blocks_by_addr(self) -> dict[int, set[ailment.Block]]: