angr 9.2.138__py3-none-macosx_11_0_arm64.whl → 9.2.139__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (59) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/fact_collector.py +59 -12
  3. angr/analyses/calling_convention/utils.py +2 -2
  4. angr/analyses/cfg/cfg_fast.py +12 -4
  5. angr/analyses/decompiler/ail_simplifier.py +14 -3
  6. angr/analyses/decompiler/block_simplifier.py +0 -2
  7. angr/analyses/decompiler/callsite_maker.py +80 -14
  8. angr/analyses/decompiler/clinic.py +31 -37
  9. angr/analyses/decompiler/condition_processor.py +2 -2
  10. angr/analyses/decompiler/decompiler.py +2 -0
  11. angr/analyses/decompiler/dephication/rewriting_engine.py +16 -7
  12. angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
  13. angr/analyses/decompiler/optimization_passes/condition_constprop.py +149 -0
  14. angr/analyses/decompiler/optimization_passes/deadblock_remover.py +12 -3
  15. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +1 -1
  16. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -2
  17. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +15 -7
  18. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +7 -10
  19. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +12 -1
  20. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +61 -25
  21. angr/analyses/decompiler/peephole_optimizations/remove_redundant_shifts.py +50 -1
  22. angr/analyses/decompiler/presets/fast.py +2 -0
  23. angr/analyses/decompiler/presets/full.py +2 -0
  24. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -0
  25. angr/analyses/decompiler/ssailification/rewriting_engine.py +20 -2
  26. angr/analyses/decompiler/ssailification/traversal_engine.py +4 -3
  27. angr/analyses/decompiler/structured_codegen/c.py +10 -3
  28. angr/analyses/decompiler/structuring/dream.py +7 -2
  29. angr/analyses/decompiler/structuring/phoenix.py +101 -49
  30. angr/analyses/decompiler/structuring/structurer_base.py +85 -36
  31. angr/analyses/decompiler/structuring/structurer_nodes.py +3 -1
  32. angr/analyses/deobfuscator/api_obf_finder.py +6 -1
  33. angr/analyses/deobfuscator/api_obf_type2_finder.py +158 -0
  34. angr/analyses/s_propagator.py +127 -50
  35. angr/analyses/s_reaching_definitions/s_rda_view.py +2 -2
  36. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +3 -1
  37. angr/analyses/variable_recovery/engine_ail.py +1 -1
  38. angr/analyses/variable_recovery/engine_base.py +55 -62
  39. angr/analyses/variable_recovery/engine_vex.py +1 -1
  40. angr/analyses/variable_recovery/irsb_scanner.py +2 -2
  41. angr/calling_conventions.py +66 -9
  42. angr/engines/engine.py +2 -18
  43. angr/engines/light/engine.py +3 -8
  44. angr/engines/pcode/emulate.py +2 -2
  45. angr/engines/pcode/lifter.py +2 -2
  46. angr/engines/successors.py +1 -8
  47. angr/engines/vex/lifter.py +2 -2
  48. angr/engines/vex/light/light.py +2 -2
  49. angr/knowledge_plugins/cfg/cfg_model.py +3 -2
  50. angr/knowledge_plugins/labels.py +2 -2
  51. angr/knowledge_plugins/obfuscations.py +1 -0
  52. angr/knowledge_plugins/xrefs/xref_manager.py +4 -0
  53. angr/lib/angr_native.dylib +0 -0
  54. {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/METADATA +6 -6
  55. {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/RECORD +59 -57
  56. {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/LICENSE +0 -0
  57. {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/WHEEL +0 -0
  58. {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/entry_points.txt +0 -0
  59. {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/top_level.txt +0 -0
@@ -109,7 +109,7 @@ class Clinic(Analysis):
109
109
  cache: DecompilationCache | None = None,
110
110
  mode: ClinicMode = ClinicMode.DECOMPILE,
111
111
  sp_shift: int = 0,
112
- inline_functions: set[Function] | None = frozenset(),
112
+ inline_functions: set[Function] | None = None,
113
113
  inlined_counts: dict[int, int] | None = None,
114
114
  inlining_parents: set[int] | None = None,
115
115
  vvar_id_start: int = 0,
@@ -131,7 +131,7 @@ class Clinic(Analysis):
131
131
  self.arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimRegArg]] | None = None
132
132
  self.variable_kb = variable_kb
133
133
  self.externs: set[SimMemoryVariable] = set()
134
- self.data_refs: dict[int, int] = {} # data address to instruction address
134
+ self.data_refs: dict[int, list[DataRefDesc]] = {} # data address to data reference description
135
135
  self.optimization_scratch = optimization_scratch if optimization_scratch is not None else {}
136
136
 
137
137
  self._func_graph: networkx.DiGraph | None = None
@@ -159,7 +159,7 @@ class Clinic(Analysis):
159
159
  # inlining help
160
160
  self._sp_shift = sp_shift
161
161
  self._max_stack_depth = 0
162
- self._inline_functions = inline_functions
162
+ self._inline_functions = inline_functions if inline_functions else set()
163
163
  self._inlined_counts = {} if inlined_counts is None else inlined_counts
164
164
  self._inlining_parents = inlining_parents or ()
165
165
  self._desired_variables = desired_variables
@@ -200,7 +200,7 @@ class Clinic(Analysis):
200
200
  """
201
201
 
202
202
  try:
203
- return self._blocks_by_addr_and_size[(addr, size)]
203
+ return self._blocks_by_addr_and_size[(addr, size)] if self._blocks_by_addr_and_size is not None else None
204
204
  except KeyError:
205
205
  return None
206
206
 
@@ -490,9 +490,7 @@ class Clinic(Analysis):
490
490
  # we never remove dead memory definitions before making callsites. otherwise stack arguments may go missing
491
491
  # before they are recognized as stack arguments.
492
492
  self._update_progress(38.0, text="Simplifying blocks 1")
493
- ail_graph = self._simplify_blocks(
494
- ail_graph, stack_pointer_tracker=spt, remove_dead_memdefs=False, cache=block_simplification_cache
495
- )
493
+ ail_graph = self._simplify_blocks(ail_graph, stack_pointer_tracker=spt, cache=block_simplification_cache)
496
494
  self._rewrite_alloca(ail_graph)
497
495
 
498
496
  # Run simplification passes
@@ -515,9 +513,7 @@ class Clinic(Analysis):
515
513
  # Run simplification passes again. there might be more chances for peephole optimizations after function-level
516
514
  # simplification
517
515
  self._update_progress(48.0, text="Simplifying blocks 2")
518
- ail_graph = self._simplify_blocks(
519
- ail_graph, stack_pointer_tracker=spt, remove_dead_memdefs=False, cache=block_simplification_cache
520
- )
516
+ ail_graph = self._simplify_blocks(ail_graph, stack_pointer_tracker=spt, cache=block_simplification_cache)
521
517
 
522
518
  # rewrite (qualified) stack variables into SSA form
523
519
  ail_graph = self._transform_to_ssa_level1(ail_graph, func_args)
@@ -557,7 +553,6 @@ class Clinic(Analysis):
557
553
  self._update_progress(60.0, text="Simplifying blocks 3")
558
554
  ail_graph = self._simplify_blocks(
559
555
  ail_graph,
560
- remove_dead_memdefs=self._remove_dead_memdefs,
561
556
  stack_pointer_tracker=spt,
562
557
  cache=block_simplification_cache,
563
558
  )
@@ -581,7 +576,6 @@ class Clinic(Analysis):
581
576
  self._update_progress(75.0, text="Simplifying blocks 4")
582
577
  ail_graph = self._simplify_blocks(
583
578
  ail_graph,
584
- remove_dead_memdefs=self._remove_dead_memdefs,
585
579
  stack_pointer_tracker=spt,
586
580
  cache=block_simplification_cache,
587
581
  )
@@ -694,9 +688,7 @@ class Clinic(Analysis):
694
688
  # we never remove dead memory definitions before making callsites. otherwise stack arguments may go missing
695
689
  # before they are recognized as stack arguments.
696
690
  self._update_progress(35.0, text="Simplifying blocks 1")
697
- ail_graph = self._simplify_blocks(
698
- ail_graph, stack_pointer_tracker=spt, remove_dead_memdefs=False, cache=block_simplification_cache
699
- )
691
+ ail_graph = self._simplify_blocks(ail_graph, stack_pointer_tracker=spt, cache=block_simplification_cache)
700
692
 
701
693
  # Simplify the entire function for the first time
702
694
  self._update_progress(45.0, text="Simplifying function 1")
@@ -1101,7 +1093,6 @@ class Clinic(Analysis):
1101
1093
  def _simplify_blocks(
1102
1094
  self,
1103
1095
  ail_graph: networkx.DiGraph,
1104
- remove_dead_memdefs=False,
1105
1096
  stack_pointer_tracker=None,
1106
1097
  cache: dict[ailment.Block, NamedTuple] | None = None,
1107
1098
  ):
@@ -1120,7 +1111,6 @@ class Clinic(Analysis):
1120
1111
  for ail_block in ail_graph.nodes():
1121
1112
  simplified = self._simplify_block(
1122
1113
  ail_block,
1123
- remove_dead_memdefs=remove_dead_memdefs,
1124
1114
  stack_pointer_tracker=stack_pointer_tracker,
1125
1115
  cache=cache,
1126
1116
  )
@@ -1138,7 +1128,7 @@ class Clinic(Analysis):
1138
1128
 
1139
1129
  return ail_graph
1140
1130
 
1141
- def _simplify_block(self, ail_block, remove_dead_memdefs=False, stack_pointer_tracker=None, cache=None):
1131
+ def _simplify_block(self, ail_block, stack_pointer_tracker=None, cache=None):
1142
1132
  """
1143
1133
  Simplify a single AIL block.
1144
1134
 
@@ -1149,8 +1139,9 @@ class Clinic(Analysis):
1149
1139
 
1150
1140
  cached_rd, cached_prop = None, None
1151
1141
  cache_item = None
1142
+ cache_key = ail_block.addr, ail_block.idx
1152
1143
  if cache:
1153
- cache_item = cache.get(ail_block, None)
1144
+ cache_item = cache.get(cache_key, None)
1154
1145
  if cache_item:
1155
1146
  # cache hit
1156
1147
  cached_rd = cache_item.rd
@@ -1160,7 +1151,6 @@ class Clinic(Analysis):
1160
1151
  ail_block,
1161
1152
  self.function.addr,
1162
1153
  fail_fast=self._fail_fast,
1163
- remove_dead_memdefs=remove_dead_memdefs,
1164
1154
  stack_pointer_tracker=stack_pointer_tracker,
1165
1155
  peephole_optimizations=self.peephole_optimizations,
1166
1156
  cached_reaching_definitions=cached_rd,
@@ -1169,8 +1159,8 @@ class Clinic(Analysis):
1169
1159
  # update the cache
1170
1160
  if cache is not None:
1171
1161
  if cache_item:
1172
- del cache[ail_block]
1173
- cache[simp.result_block] = BlockCache(simp._reaching_definitions, simp._propagator)
1162
+ del cache[cache_key]
1163
+ cache[cache_key] = BlockCache(simp._reaching_definitions, simp._propagator)
1174
1164
  return simp.result_block
1175
1165
 
1176
1166
  @timethis
@@ -1807,7 +1797,7 @@ class Clinic(Analysis):
1807
1797
  else:
1808
1798
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, expr.operand)
1809
1799
 
1810
- elif type(expr) is ailment.Expr.Convert:
1800
+ elif type(expr) in {ailment.Expr.Convert, ailment.Expr.Reinterpret}:
1811
1801
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, expr.operand)
1812
1802
 
1813
1803
  elif type(expr) is ailment.Expr.ITE:
@@ -1828,7 +1818,7 @@ class Clinic(Analysis):
1828
1818
  expr.variable = var
1829
1819
  expr.variable_offset = offset
1830
1820
 
1831
- elif isinstance(expr, ailment.Expr.Const):
1821
+ elif isinstance(expr, ailment.Expr.Const) and expr.is_int:
1832
1822
  # custom string?
1833
1823
  if hasattr(expr, "custom_string") and expr.custom_string is True:
1834
1824
  s = self.kb.custom_strings[expr.value]
@@ -1873,6 +1863,7 @@ class Clinic(Analysis):
1873
1863
  def _function_graph_to_ail_graph(self, func_graph, blocks_by_addr_and_size=None):
1874
1864
  if blocks_by_addr_and_size is None:
1875
1865
  blocks_by_addr_and_size = self._blocks_by_addr_and_size
1866
+ assert blocks_by_addr_and_size is not None
1876
1867
 
1877
1868
  graph = networkx.DiGraph()
1878
1869
 
@@ -1947,8 +1938,9 @@ class Clinic(Analysis):
1947
1938
  break
1948
1939
  if ite_expr_stmt_idx is None:
1949
1940
  return None
1941
+ assert ite_expr_stmt is not None
1950
1942
 
1951
- ite_expr: ailment.Expr.ITE = ite_expr_stmt.src
1943
+ ite_expr: ailment.Expr.ITE = ite_expr_stmt.src # type: ignore
1952
1944
  new_head_ail.statements = new_head_ail.statements[:ite_expr_stmt_idx]
1953
1945
  # build the conditional jump
1954
1946
  true_block_addr = ite_ins_addr + 1
@@ -1976,6 +1968,7 @@ class Clinic(Analysis):
1976
1968
  break
1977
1969
  if ite_expr_stmt_idx is None:
1978
1970
  return None
1971
+ assert ite_expr_stmt is not None
1979
1972
 
1980
1973
  true_block_ail.statements[ite_expr_stmt_idx] = ailment.Stmt.Assignment(
1981
1974
  ite_expr_stmt.idx, ite_expr_stmt.dst, ite_expr_stmt.src.iftrue, **ite_expr_stmt.tags
@@ -1995,6 +1988,7 @@ class Clinic(Analysis):
1995
1988
  break
1996
1989
  if ite_expr_stmt_idx is None:
1997
1990
  return None
1991
+ assert ite_expr_stmt is not None
1998
1992
 
1999
1993
  false_block_ail.statements[ite_expr_stmt_idx] = ailment.Stmt.Assignment(
2000
1994
  ite_expr_stmt.idx, ite_expr_stmt.dst, ite_expr_stmt.src.iffalse, **ite_expr_stmt.tags
@@ -2002,8 +1996,8 @@ class Clinic(Analysis):
2002
1996
 
2003
1997
  original_block = next(iter(b for b in ail_graph if b.addr == block_addr))
2004
1998
 
2005
- original_block_in_edges = list(ail_graph.in_edges(original_block))
2006
- original_block_out_edges = list(ail_graph.out_edges(original_block))
1999
+ original_block_in_edges = list(ail_graph.in_edges(original_block, data=True))
2000
+ original_block_out_edges = list(ail_graph.out_edges(original_block, data=True))
2007
2001
 
2008
2002
  # build the target block if the target block does not exist in the current function
2009
2003
  end_block_addr = ite_ins_addr + ite_insn_size
@@ -2040,19 +2034,19 @@ class Clinic(Analysis):
2040
2034
 
2041
2035
  if end_block_ail not in ail_graph:
2042
2036
  # newly created. add it and the necessary edges into the graph
2043
- for _, dst in original_block_out_edges:
2037
+ for _, dst, data in original_block_out_edges:
2044
2038
  if dst is original_block:
2045
- ail_graph.add_edge(end_block_ail, new_head_ail)
2039
+ ail_graph.add_edge(end_block_ail, new_head_ail, **data)
2046
2040
  else:
2047
- ail_graph.add_edge(end_block_ail, dst)
2041
+ ail_graph.add_edge(end_block_ail, dst, **data)
2048
2042
 
2049
2043
  # in edges
2050
- for src, _ in original_block_in_edges:
2044
+ for src, _, data in original_block_in_edges:
2051
2045
  if src is original_block:
2052
2046
  # loop
2053
- ail_graph.add_edge(end_block_ail, new_head_ail)
2047
+ ail_graph.add_edge(end_block_ail, new_head_ail, **data)
2054
2048
  else:
2055
- ail_graph.add_edge(src, new_head_ail)
2049
+ ail_graph.add_edge(src, new_head_ail, **data)
2056
2050
 
2057
2051
  # triangle
2058
2052
  ail_graph.add_edge(new_head_ail, true_block_ail)
@@ -2466,7 +2460,7 @@ class Clinic(Analysis):
2466
2460
  expr_idx: int,
2467
2461
  expr: ailment.expression.Expression,
2468
2462
  stmt_idx: int,
2469
- stmt: ailment.statement.Statement,
2463
+ stmt: ailment.statement.Statement | None,
2470
2464
  block: ailment.Block | None,
2471
2465
  ):
2472
2466
  if expr is None:
@@ -2500,7 +2494,7 @@ class Clinic(Analysis):
2500
2494
  expr: ailment.expression.Const,
2501
2495
  stmt_idx: int,
2502
2496
  stmt: ailment.statement.Statement,
2503
- block: ailment.Block | None,
2497
+ block: ailment.Block,
2504
2498
  ):
2505
2499
  if isinstance(expr.value, int) and hasattr(expr, "ins_addr"):
2506
2500
  data_refs[block.addr].append(
@@ -2518,7 +2512,7 @@ class Clinic(Analysis):
2518
2512
  expr: ailment.expression.Load,
2519
2513
  stmt_idx: int,
2520
2514
  stmt: ailment.statement.Statement,
2521
- block: ailment.Block | None,
2515
+ block: ailment.Block,
2522
2516
  ):
2523
2517
  if isinstance(expr.addr, ailment.expression.Const):
2524
2518
  addr = expr.addr
@@ -2548,7 +2542,7 @@ class Clinic(Analysis):
2548
2542
 
2549
2543
  return ailment.AILBlockWalker._handle_Load(walker, expr_idx, expr, stmt_idx, stmt, block)
2550
2544
 
2551
- def handle_Store(stmt_idx: int, stmt: ailment.statement.Store, block: ailment.Block | None):
2545
+ def handle_Store(stmt_idx: int, stmt: ailment.statement.Store, block: ailment.Block):
2552
2546
  if isinstance(stmt.addr, ailment.expression.Const):
2553
2547
  addr = stmt.addr
2554
2548
  if isinstance(addr.value, int) and hasattr(addr, "ins_addr"):
@@ -18,7 +18,7 @@ from angr.utils import is_pyinstaller
18
18
  from angr.utils.graph import dominates, inverted_idoms
19
19
  from angr.block import Block, BlockNode
20
20
  from angr.errors import AngrRuntimeError
21
- from .peephole_optimizations import InvertNegatedLogicalConjunctionsAndDisjunctions
21
+ from .peephole_optimizations import InvertNegatedLogicalConjunctionsAndDisjunctions, RemoveRedundantNots
22
22
  from .structuring.structurer_nodes import (
23
23
  MultiNode,
24
24
  EmptyBlockNotice,
@@ -231,7 +231,7 @@ class ConditionProcessor:
231
231
  self._ast2annotations = {}
232
232
 
233
233
  self._peephole_expr_optimizations = [
234
- cls(None, None, None) for cls in [InvertNegatedLogicalConjunctionsAndDisjunctions]
234
+ cls(None, None, None) for cls in [InvertNegatedLogicalConjunctionsAndDisjunctions, RemoveRedundantNots]
235
235
  ]
236
236
 
237
237
  def clear(self):
@@ -288,6 +288,8 @@ class Decompiler(Analysis):
288
288
  )
289
289
  ri = self._recover_regions(clinic.graph, cond_proc, update_graph=not delay_graph_updates)
290
290
 
291
+ self._update_progress(73.0, text="Running region-simplification passes")
292
+
291
293
  # run optimizations that may require re-RegionIdentification
292
294
  clinic.graph, ri = self._run_region_simplification_passes(
293
295
  clinic.graph,
@@ -15,6 +15,7 @@ from ailment.expression import (
15
15
  ITE,
16
16
  VEXCCallExpression,
17
17
  DirtyExpression,
18
+ Reinterpret,
18
19
  )
19
20
 
20
21
  from angr.engines.light import SimEngineNostmtAIL
@@ -164,7 +165,7 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
164
165
  return Load(expr.idx, new_addr, expr.size, expr.endness, guard=expr.guard, alt=expr.alt, **expr.tags)
165
166
  return None
166
167
 
167
- def _handle_expr_Convert(self, expr):
168
+ def _handle_expr_Convert(self, expr: Convert) -> Convert | None:
168
169
  new_operand = self._expr(expr.operand)
169
170
  if new_operand is not None:
170
171
  return Convert(
@@ -180,6 +181,20 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
180
181
  )
181
182
  return None
182
183
 
184
+ def _handle_expr_Reinterpret(self, expr: Reinterpret) -> Reinterpret | None:
185
+ new_operand = self._expr(expr.operand)
186
+ if new_operand is not None:
187
+ return Reinterpret(
188
+ expr.idx,
189
+ expr.from_bits,
190
+ expr.from_type,
191
+ expr.to_bits,
192
+ expr.to_type,
193
+ new_operand,
194
+ **expr.tags,
195
+ )
196
+ return None
197
+
183
198
  def _handle_expr_Const(self, expr):
184
199
  return None
185
200
 
@@ -346,18 +361,12 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
346
361
  )
347
362
  return None
348
363
 
349
- def _handle_expr_DirtyExpression(self, expr):
350
- return None
351
-
352
364
  def _handle_expr_MultiStatementExpression(self, expr):
353
365
  return None
354
366
 
355
367
  def _handle_expr_Register(self, expr):
356
368
  return None
357
369
 
358
- def _handle_expr_Reinterpret(self, expr):
359
- return None
360
-
361
370
  def _handle_expr_Tmp(self, expr):
362
371
  return None
363
372
 
@@ -32,6 +32,7 @@ from .const_prop_reverter import ConstPropOptReverter
32
32
  from .call_stmt_rewriter import CallStatementRewriter
33
33
  from .duplication_reverter import DuplicationReverter
34
34
  from .switch_reused_entry_rewriter import SwitchReusedEntryRewriter
35
+ from .condition_constprop import ConditionConstantPropagation
35
36
 
36
37
  if TYPE_CHECKING:
37
38
  from angr.analyses.decompiler.presets import DecompilationPreset
@@ -66,6 +67,7 @@ ALL_OPTIMIZATION_PASSES = [
66
67
  InlinedStringTransformationSimplifier,
67
68
  CallStatementRewriter,
68
69
  TagSlicer,
70
+ ConditionConstantPropagation,
69
71
  ]
70
72
 
71
73
  # these passes may duplicate code to remove gotos or improve the structure of the graph
@@ -113,6 +115,7 @@ __all__ = (
113
115
  "BasePointerSaveSimplifier",
114
116
  "CallStatementRewriter",
115
117
  "CodeMotionOptimization",
118
+ "ConditionConstantPropagation",
116
119
  "ConstPropOptReverter",
117
120
  "ConstantDereferencesSimplifier",
118
121
  "CrossJumpReverter",
@@ -0,0 +1,149 @@
1
+ from __future__ import annotations
2
+
3
+ import networkx
4
+
5
+ from ailment import AILBlockWalker, Block
6
+ from ailment.statement import ConditionalJump, Statement
7
+ from ailment.expression import Const, BinaryOp, VirtualVariable
8
+
9
+ from angr.analyses.decompiler.region_identifier import RegionIdentifier
10
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
11
+
12
+
13
+ class ConstantCondition:
14
+ """
15
+ Describes an opportunity for replacing a vvar with a constant value.
16
+ """
17
+
18
+ def __init__(self, vvar_id: int, value: Const, block_addr: int, block_idx: int | None):
19
+ self.vvar_id = vvar_id
20
+ self.value = value
21
+ self.block_addr = block_addr
22
+ self.block_idx = block_idx
23
+
24
+ def __repr__(self):
25
+ return f"<ConstCond vvar_{self.vvar_id} == {self.value} since {self.block_addr:#x}-{self.block_idx}>"
26
+
27
+
28
+ class CCondPropBlockWalker(AILBlockWalker):
29
+ """
30
+ Block walker for ConditionConstantPropagation to replace vvars with constant values.
31
+ """
32
+
33
+ def __init__(self, vvar_id: int, const_value: Const):
34
+ super().__init__()
35
+ self._new_block: Block | None = None # output
36
+ self.vvar_id = vvar_id
37
+ self.const_value = const_value
38
+
39
+ def walk(self, block: Block):
40
+ self._new_block = None
41
+ super().walk(block)
42
+ return self._new_block
43
+
44
+ def _handle_stmt(self, stmt_idx: int, stmt: Statement, block: Block): # type: ignore
45
+ r = super()._handle_stmt(stmt_idx, stmt, block)
46
+ if r is not None:
47
+ # replace the original statement
48
+ if self._new_block is None:
49
+ self._new_block = block.copy()
50
+ self._new_block.statements[stmt_idx] = r
51
+
52
+ def _handle_VirtualVariable( # type: ignore
53
+ self, expr_idx: int, expr: VirtualVariable, stmt_idx: int, stmt: Statement, block: Block | None
54
+ ) -> Const | None:
55
+ if expr.varid == self.vvar_id:
56
+ return Const(expr.idx, None, self.const_value.value, self.const_value.bits, **expr.tags)
57
+ return None
58
+
59
+
60
+ class ConditionConstantPropagation(OptimizationPass):
61
+ """
62
+ Reason about constant propagation opportunities from conditionals and propagate constants in the graph accordingly.
63
+ """
64
+
65
+ ARCHES = None
66
+ PLATFORMS = None
67
+ STAGE = OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION
68
+ NAME = "Propagate constants using information deduced from conditionals."
69
+ DESCRIPTION = __doc__.strip() # type: ignore
70
+
71
+ def __init__(self, func, **kwargs):
72
+ super().__init__(func, **kwargs)
73
+ self.analyze()
74
+
75
+ def _check(self):
76
+ cconds = self._find_const_conditions()
77
+ if not cconds:
78
+ return False, None
79
+ return True, {"cconds": cconds}
80
+
81
+ def _analyze(self, cache=None):
82
+ if not cache or cache.get("cconds", None) is None: # noqa: SIM108
83
+ cconds = self._find_const_conditions()
84
+ else:
85
+ cconds = cache["cconds"]
86
+
87
+ if not cconds:
88
+ return
89
+
90
+ # group cconds according to their sources
91
+ cconds_by_src: dict[tuple[int, int | None], list[ConstantCondition]] = {}
92
+ for ccond in cconds:
93
+ src = ccond.block_addr, ccond.block_idx
94
+ if src not in cconds_by_src:
95
+ cconds_by_src[src] = []
96
+ cconds_by_src[src].append(ccond)
97
+
98
+ # calculate a dominance frontier for each block
99
+ entry_node_addr, entry_node_idx = self.entry_node_addr
100
+ entry_node = self._get_block(entry_node_addr, idx=entry_node_idx)
101
+ df = networkx.algorithms.dominance_frontiers(self._graph, entry_node)
102
+
103
+ for src, cconds in cconds_by_src.items():
104
+ head_block = self._get_block(src[0], idx=src[1])
105
+ if head_block is None:
106
+ continue
107
+ frontier = df.get(head_block)
108
+ if frontier is None:
109
+ continue
110
+ graph_slice = RegionIdentifier.slice_graph(self._graph, head_block, frontier, include_frontier=False)
111
+ for ccond in cconds:
112
+ walker = CCondPropBlockWalker(ccond.vvar_id, ccond.value)
113
+ for block in graph_slice:
114
+ new_block = walker.walk(block)
115
+ if new_block is not None:
116
+ self._update_block(block, new_block)
117
+
118
+ def _find_const_conditions(self) -> list[ConstantCondition]:
119
+ cconds = []
120
+
121
+ for block in self._graph:
122
+ if block.statements:
123
+ last_stmt = block.statements[-1]
124
+ if (
125
+ not isinstance(last_stmt, ConditionalJump)
126
+ or not isinstance(last_stmt.true_target, Const)
127
+ or not isinstance(last_stmt.false_target, Const)
128
+ ):
129
+ continue
130
+
131
+ if isinstance(last_stmt.condition, BinaryOp):
132
+ cond = last_stmt.condition
133
+ op = cond.op
134
+ op0, op1 = cond.operands
135
+ if isinstance(op0, Const):
136
+ op0, op1 = op1, op0
137
+ if isinstance(op0, VirtualVariable) and isinstance(op1, Const) and op1.is_int:
138
+ if op == "CmpEQ":
139
+ ccond = ConstantCondition(
140
+ op0.varid, op1, last_stmt.true_target.value, last_stmt.true_target_idx # type: ignore
141
+ )
142
+ cconds.append(ccond)
143
+ elif op == "CmpNE":
144
+ ccond = ConstantCondition(
145
+ op0.varid, op1, last_stmt.false_target.value, last_stmt.false_target_idx # type: ignore
146
+ )
147
+ cconds.append(ccond)
148
+
149
+ return cconds
@@ -25,13 +25,19 @@ class DeadblockRemover(OptimizationPass):
25
25
  PLATFORMS = None
26
26
  STAGE = OptimizationPassStage.BEFORE_REGION_IDENTIFICATION
27
27
  NAME = "Remove blocks with unsatisfiable conditions"
28
- DESCRIPTION = __doc__.strip()
28
+ DESCRIPTION = __doc__.strip() # type: ignore
29
29
 
30
- def __init__(self, func, **kwargs):
30
+ def __init__(self, func, node_cutoff: int = 200, **kwargs):
31
31
  super().__init__(func, **kwargs)
32
+ self._node_cutoff = node_cutoff
32
33
  self.analyze()
33
34
 
34
35
  def _check(self):
36
+ # don't run this optimization on super large functions
37
+ assert self._graph is not None
38
+ if len(self._graph) >= self._node_cutoff:
39
+ return False, None
40
+
35
41
  cond_proc = ConditionProcessor(self.project.arch)
36
42
  if networkx.is_directed_acyclic_graph(self._graph):
37
43
  acyclic_graph = self._graph
@@ -45,7 +51,10 @@ class DeadblockRemover(OptimizationPass):
45
51
  cache = {"cond_proc": cond_proc}
46
52
  return True, cache
47
53
 
48
- def _analyze(self, cache=None):
54
+ def _analyze(self, cache: dict | None = None):
55
+ assert cache is not None
56
+ assert self._graph is not None
57
+
49
58
  cond_proc = cache["cond_proc"]
50
59
  to_remove = {
51
60
  blk
@@ -136,7 +136,7 @@ class InlinedStringTransformationAILEngine(
136
136
  # jumped to a node that we do not know about
137
137
  break
138
138
  block = self.nodes[self.pc]
139
- self._process(state, block=block, whitelist=None)
139
+ self.process(state, block=block, whitelist=None)
140
140
  if self.pc is None:
141
141
  # not sure where to jump...
142
142
  break
@@ -111,12 +111,15 @@ class OptimizationPass(BaseOptimizationPass):
111
111
  The base class for any function-level graph optimization pass.
112
112
  """
113
113
 
114
+ _graph: networkx.DiGraph
115
+
114
116
  def __init__(
115
117
  self,
116
118
  func,
119
+ *,
120
+ graph,
117
121
  blocks_by_addr=None,
118
122
  blocks_by_addr_and_idx=None,
119
- graph=None,
120
123
  variable_kb=None,
121
124
  region_identifier=None,
122
125
  reaching_definitions=None,
@@ -132,7 +135,7 @@ class OptimizationPass(BaseOptimizationPass):
132
135
  # self._blocks is just a cache
133
136
  self._blocks_by_addr: dict[int, set[ailment.Block]] = blocks_by_addr or {}
134
137
  self._blocks_by_addr_and_idx: dict[tuple[int, int | None], ailment.Block] = blocks_by_addr_and_idx or {}
135
- self._graph: networkx.DiGraph | None = graph
138
+ self._graph = graph
136
139
  self._variable_kb = variable_kb
137
140
  self._ri = region_identifier
138
141
  self._rd = reaching_definitions
@@ -44,7 +44,9 @@ class FreshVirtualVariableRewriter(AILBlockWalker):
44
44
 
45
45
  return new_stmt
46
46
 
47
- def _handle_VirtualVariable(self, expr_idx: int, expr: VirtualVariable, stmt_idx: int, stmt, block: Block | None):
47
+ def _handle_VirtualVariable( # type:ignore
48
+ self, expr_idx: int, expr: VirtualVariable, stmt_idx: int, stmt, block: Block | None
49
+ ) -> VirtualVariable | None:
48
50
  if expr.varid in self.vvar_mapping:
49
51
  return VirtualVariable(
50
52
  expr.idx,
@@ -58,7 +60,7 @@ class FreshVirtualVariableRewriter(AILBlockWalker):
58
60
  )
59
61
  return None
60
62
 
61
- def _handle_stmt(self, stmt_idx: int, stmt, block: Block):
63
+ def _handle_stmt(self, stmt_idx: int, stmt, block: Block): # type:ignore
62
64
  r = super()._handle_stmt(stmt_idx, stmt, block)
63
65
  if r is not None:
64
66
  # replace the original statement
@@ -77,10 +79,11 @@ class ReturnDuplicatorBase:
77
79
  def __init__(
78
80
  self,
79
81
  func,
82
+ *,
83
+ vvar_id_start: int,
80
84
  max_calls_in_regions: int = 2,
81
85
  minimize_copies_for_regions: bool = True,
82
86
  ri: RegionIdentifier | None = None,
83
- vvar_id_start: int | None = None,
84
87
  scratch: dict[str, Any] | None = None,
85
88
  ):
86
89
  self._max_calls_in_region = max_calls_in_regions
@@ -257,8 +260,7 @@ class ReturnDuplicatorBase:
257
260
  # not used in this branch. drop this statement
258
261
  continue
259
262
  else:
260
- phi_var = Phi(stmt.src.idx, stmt.src.bits, [((pred.addr, pred.idx), vvar_src)], **stmt.src.tags)
261
- new_stmt = Assignment(stmt.idx, stmt.dst, phi_var, **stmt.tags)
263
+ new_stmt = Assignment(stmt.idx, stmt.dst, vvar_src, **stmt.tags)
262
264
  stmts.append(new_stmt)
263
265
  continue
264
266
  stmts.append(stmt)
@@ -287,6 +289,8 @@ class ReturnDuplicatorBase:
287
289
  self, endnode_regions: dict[Any, tuple[list[tuple[Any, Any]], networkx.DiGraph]], graph: networkx.DiGraph
288
290
  ):
289
291
  updated_regions = endnode_regions.copy()
292
+ assert self._ri is not None
293
+ assert isinstance(self._ri.region, GraphRegion)
290
294
  all_region_block_addrs = list(self._find_block_sets_in_all_regions(self._ri.region).values())
291
295
  for region_head, (in_edges, region) in endnode_regions.items():
292
296
  is_single_const_ret_region = self._is_simple_return_graph(region)
@@ -356,7 +360,7 @@ class ReturnDuplicatorBase:
356
360
  return False
357
361
 
358
362
  # check if the graph is a single successor chain
359
- if not all(labeless_graph.out_degree(n) <= 1 for n in nodes):
363
+ if not all(labeless_graph.out_degree[n] <= 1 for n in nodes):
360
364
  return False
361
365
 
362
366
  # collect the statements from the top node, make sure one exists
@@ -398,7 +402,11 @@ class ReturnDuplicatorBase:
398
402
  if ret_exprs and len(ret_exprs) > 1:
399
403
  return False
400
404
 
401
- ret_expr = ReturnDuplicatorBase.unwrap_conv(ret_exprs[0]) if ret_exprs and len(ret_exprs) == 1 else None
405
+ if not ret_exprs:
406
+ # a simple return statement that does not carry any value or variable to return
407
+ return True
408
+
409
+ ret_expr = ReturnDuplicatorBase.unwrap_conv(ret_exprs[0])
402
410
  # check if ret_expr is a virtual variable or not
403
411
  if not isinstance(ret_expr, (VirtualVariable, Const)):
404
412
  return False