angr 9.2.138__py3-none-manylinux2014_x86_64.whl → 9.2.140__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (100) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +48 -21
  3. angr/analyses/calling_convention/fact_collector.py +59 -12
  4. angr/analyses/calling_convention/utils.py +2 -2
  5. angr/analyses/cfg/cfg_base.py +13 -0
  6. angr/analyses/cfg/cfg_fast.py +23 -4
  7. angr/analyses/decompiler/ail_simplifier.py +79 -53
  8. angr/analyses/decompiler/block_simplifier.py +0 -2
  9. angr/analyses/decompiler/callsite_maker.py +80 -14
  10. angr/analyses/decompiler/clinic.py +99 -80
  11. angr/analyses/decompiler/condition_processor.py +2 -2
  12. angr/analyses/decompiler/decompiler.py +19 -7
  13. angr/analyses/decompiler/dephication/rewriting_engine.py +16 -7
  14. angr/analyses/decompiler/expression_narrower.py +1 -1
  15. angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
  16. angr/analyses/decompiler/optimization_passes/condition_constprop.py +149 -0
  17. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
  18. angr/analyses/decompiler/optimization_passes/deadblock_remover.py +12 -3
  19. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
  21. angr/analyses/decompiler/optimization_passes/optimization_pass.py +21 -12
  22. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +17 -9
  23. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +7 -10
  24. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +12 -1
  25. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +61 -25
  26. angr/analyses/decompiler/peephole_optimizations/remove_redundant_shifts.py +50 -1
  27. angr/analyses/decompiler/presets/fast.py +2 -0
  28. angr/analyses/decompiler/presets/full.py +2 -0
  29. angr/analyses/decompiler/region_simplifiers/expr_folding.py +259 -108
  30. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +28 -9
  31. angr/analyses/decompiler/ssailification/rewriting_engine.py +20 -2
  32. angr/analyses/decompiler/ssailification/traversal_engine.py +4 -3
  33. angr/analyses/decompiler/structured_codegen/c.py +10 -3
  34. angr/analyses/decompiler/structuring/dream.py +28 -19
  35. angr/analyses/decompiler/structuring/phoenix.py +253 -89
  36. angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
  37. angr/analyses/decompiler/structuring/structurer_base.py +121 -46
  38. angr/analyses/decompiler/structuring/structurer_nodes.py +6 -1
  39. angr/analyses/decompiler/utils.py +60 -1
  40. angr/analyses/deobfuscator/api_obf_finder.py +13 -5
  41. angr/analyses/deobfuscator/api_obf_type2_finder.py +166 -0
  42. angr/analyses/deobfuscator/string_obf_finder.py +105 -18
  43. angr/analyses/forward_analysis/forward_analysis.py +1 -1
  44. angr/analyses/propagator/top_checker_mixin.py +6 -6
  45. angr/analyses/reaching_definitions/__init__.py +2 -1
  46. angr/analyses/reaching_definitions/dep_graph.py +1 -12
  47. angr/analyses/reaching_definitions/engine_vex.py +36 -31
  48. angr/analyses/reaching_definitions/function_handler.py +15 -2
  49. angr/analyses/reaching_definitions/rd_state.py +1 -37
  50. angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
  51. angr/analyses/s_propagator.py +129 -87
  52. angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
  53. angr/analyses/s_reaching_definitions/s_rda_view.py +2 -2
  54. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +3 -1
  55. angr/analyses/stack_pointer_tracker.py +36 -22
  56. angr/analyses/typehoon/simple_solver.py +45 -7
  57. angr/analyses/typehoon/typeconsts.py +18 -5
  58. angr/analyses/variable_recovery/engine_ail.py +1 -1
  59. angr/analyses/variable_recovery/engine_base.py +62 -67
  60. angr/analyses/variable_recovery/engine_vex.py +1 -1
  61. angr/analyses/variable_recovery/irsb_scanner.py +2 -2
  62. angr/block.py +69 -107
  63. angr/callable.py +14 -7
  64. angr/calling_conventions.py +81 -10
  65. angr/distributed/__init__.py +1 -1
  66. angr/engines/__init__.py +7 -8
  67. angr/engines/engine.py +3 -138
  68. angr/engines/failure.py +2 -2
  69. angr/engines/hook.py +2 -2
  70. angr/engines/light/engine.py +5 -10
  71. angr/engines/pcode/emulate.py +2 -2
  72. angr/engines/pcode/engine.py +2 -14
  73. angr/engines/pcode/lifter.py +2 -2
  74. angr/engines/procedure.py +2 -2
  75. angr/engines/soot/engine.py +2 -2
  76. angr/engines/soot/statements/switch.py +1 -1
  77. angr/engines/successors.py +123 -17
  78. angr/engines/syscall.py +2 -2
  79. angr/engines/unicorn.py +3 -3
  80. angr/engines/vex/heavy/heavy.py +3 -15
  81. angr/engines/vex/lifter.py +2 -2
  82. angr/engines/vex/light/light.py +2 -2
  83. angr/factory.py +4 -19
  84. angr/knowledge_plugins/cfg/cfg_model.py +3 -2
  85. angr/knowledge_plugins/key_definitions/atoms.py +8 -4
  86. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
  87. angr/knowledge_plugins/labels.py +2 -2
  88. angr/knowledge_plugins/obfuscations.py +1 -0
  89. angr/knowledge_plugins/xrefs/xref_manager.py +4 -0
  90. angr/sim_type.py +19 -17
  91. angr/state_plugins/plugin.py +19 -4
  92. angr/storage/memory_mixins/memory_mixin.py +1 -1
  93. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
  94. angr/utils/ssa/__init__.py +119 -4
  95. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/METADATA +6 -6
  96. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/RECORD +100 -98
  97. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/LICENSE +0 -0
  98. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/WHEEL +0 -0
  99. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/entry_points.txt +0 -0
  100. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/top_level.txt +0 -0
@@ -32,6 +32,7 @@ from .const_prop_reverter import ConstPropOptReverter
32
32
  from .call_stmt_rewriter import CallStatementRewriter
33
33
  from .duplication_reverter import DuplicationReverter
34
34
  from .switch_reused_entry_rewriter import SwitchReusedEntryRewriter
35
+ from .condition_constprop import ConditionConstantPropagation
35
36
 
36
37
  if TYPE_CHECKING:
37
38
  from angr.analyses.decompiler.presets import DecompilationPreset
@@ -66,6 +67,7 @@ ALL_OPTIMIZATION_PASSES = [
66
67
  InlinedStringTransformationSimplifier,
67
68
  CallStatementRewriter,
68
69
  TagSlicer,
70
+ ConditionConstantPropagation,
69
71
  ]
70
72
 
71
73
  # these passes may duplicate code to remove gotos or improve the structure of the graph
@@ -113,6 +115,7 @@ __all__ = (
113
115
  "BasePointerSaveSimplifier",
114
116
  "CallStatementRewriter",
115
117
  "CodeMotionOptimization",
118
+ "ConditionConstantPropagation",
116
119
  "ConstPropOptReverter",
117
120
  "ConstantDereferencesSimplifier",
118
121
  "CrossJumpReverter",
@@ -0,0 +1,149 @@
1
+ from __future__ import annotations
2
+
3
+ import networkx
4
+
5
+ from ailment import AILBlockWalker, Block
6
+ from ailment.statement import ConditionalJump, Statement
7
+ from ailment.expression import Const, BinaryOp, VirtualVariable
8
+
9
+ from angr.analyses.decompiler.region_identifier import RegionIdentifier
10
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
11
+
12
+
13
+ class ConstantCondition:
14
+ """
15
+ Describes an opportunity for replacing a vvar with a constant value.
16
+ """
17
+
18
+ def __init__(self, vvar_id: int, value: Const, block_addr: int, block_idx: int | None):
19
+ self.vvar_id = vvar_id
20
+ self.value = value
21
+ self.block_addr = block_addr
22
+ self.block_idx = block_idx
23
+
24
+ def __repr__(self):
25
+ return f"<ConstCond vvar_{self.vvar_id} == {self.value} since {self.block_addr:#x}-{self.block_idx}>"
26
+
27
+
28
+ class CCondPropBlockWalker(AILBlockWalker):
29
+ """
30
+ Block walker for ConditionConstantPropagation to replace vvars with constant values.
31
+ """
32
+
33
+ def __init__(self, vvar_id: int, const_value: Const):
34
+ super().__init__()
35
+ self._new_block: Block | None = None # output
36
+ self.vvar_id = vvar_id
37
+ self.const_value = const_value
38
+
39
+ def walk(self, block: Block):
40
+ self._new_block = None
41
+ super().walk(block)
42
+ return self._new_block
43
+
44
+ def _handle_stmt(self, stmt_idx: int, stmt: Statement, block: Block): # type: ignore
45
+ r = super()._handle_stmt(stmt_idx, stmt, block)
46
+ if r is not None:
47
+ # replace the original statement
48
+ if self._new_block is None:
49
+ self._new_block = block.copy()
50
+ self._new_block.statements[stmt_idx] = r
51
+
52
+ def _handle_VirtualVariable( # type: ignore
53
+ self, expr_idx: int, expr: VirtualVariable, stmt_idx: int, stmt: Statement, block: Block | None
54
+ ) -> Const | None:
55
+ if expr.varid == self.vvar_id:
56
+ return Const(expr.idx, None, self.const_value.value, self.const_value.bits, **expr.tags)
57
+ return None
58
+
59
+
60
+ class ConditionConstantPropagation(OptimizationPass):
61
+ """
62
+ Reason about constant propagation opportunities from conditionals and propagate constants in the graph accordingly.
63
+ """
64
+
65
+ ARCHES = None
66
+ PLATFORMS = None
67
+ STAGE = OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION
68
+ NAME = "Propagate constants using information deduced from conditionals."
69
+ DESCRIPTION = __doc__.strip() # type: ignore
70
+
71
+ def __init__(self, func, **kwargs):
72
+ super().__init__(func, **kwargs)
73
+ self.analyze()
74
+
75
+ def _check(self):
76
+ cconds = self._find_const_conditions()
77
+ if not cconds:
78
+ return False, None
79
+ return True, {"cconds": cconds}
80
+
81
+ def _analyze(self, cache=None):
82
+ if not cache or cache.get("cconds", None) is None: # noqa: SIM108
83
+ cconds = self._find_const_conditions()
84
+ else:
85
+ cconds = cache["cconds"]
86
+
87
+ if not cconds:
88
+ return
89
+
90
+ # group cconds according to their sources
91
+ cconds_by_src: dict[tuple[int, int | None], list[ConstantCondition]] = {}
92
+ for ccond in cconds:
93
+ src = ccond.block_addr, ccond.block_idx
94
+ if src not in cconds_by_src:
95
+ cconds_by_src[src] = []
96
+ cconds_by_src[src].append(ccond)
97
+
98
+ # calculate a dominance frontier for each block
99
+ entry_node_addr, entry_node_idx = self.entry_node_addr
100
+ entry_node = self._get_block(entry_node_addr, idx=entry_node_idx)
101
+ df = networkx.algorithms.dominance_frontiers(self._graph, entry_node)
102
+
103
+ for src, cconds in cconds_by_src.items():
104
+ head_block = self._get_block(src[0], idx=src[1])
105
+ if head_block is None:
106
+ continue
107
+ frontier = df.get(head_block)
108
+ if frontier is None:
109
+ continue
110
+ graph_slice = RegionIdentifier.slice_graph(self._graph, head_block, frontier, include_frontier=False)
111
+ for ccond in cconds:
112
+ walker = CCondPropBlockWalker(ccond.vvar_id, ccond.value)
113
+ for block in graph_slice:
114
+ new_block = walker.walk(block)
115
+ if new_block is not None:
116
+ self._update_block(block, new_block)
117
+
118
+ def _find_const_conditions(self) -> list[ConstantCondition]:
119
+ cconds = []
120
+
121
+ for block in self._graph:
122
+ if block.statements:
123
+ last_stmt = block.statements[-1]
124
+ if (
125
+ not isinstance(last_stmt, ConditionalJump)
126
+ or not isinstance(last_stmt.true_target, Const)
127
+ or not isinstance(last_stmt.false_target, Const)
128
+ ):
129
+ continue
130
+
131
+ if isinstance(last_stmt.condition, BinaryOp):
132
+ cond = last_stmt.condition
133
+ op = cond.op
134
+ op0, op1 = cond.operands
135
+ if isinstance(op0, Const):
136
+ op0, op1 = op1, op0
137
+ if isinstance(op0, VirtualVariable) and isinstance(op1, Const) and op1.is_int:
138
+ if op == "CmpEQ":
139
+ ccond = ConstantCondition(
140
+ op0.varid, op1, last_stmt.true_target.value, last_stmt.true_target_idx # type: ignore
141
+ )
142
+ cconds.append(ccond)
143
+ elif op == "CmpNE":
144
+ ccond = ConstantCondition(
145
+ op0.varid, op1, last_stmt.false_target.value, last_stmt.false_target_idx # type: ignore
146
+ )
147
+ cconds.append(ccond)
148
+
149
+ return cconds
@@ -8,7 +8,7 @@ import claripy
8
8
  from ailment import Const
9
9
  from ailment.block_walker import AILBlockWalkerBase
10
10
  from ailment.statement import Call, Statement, ConditionalJump, Assignment, Store, Return
11
- from ailment.expression import Convert, Register, Expression
11
+ from ailment.expression import Convert, Register, Expression, Load
12
12
 
13
13
  from .optimization_pass import OptimizationPass, OptimizationPassStage
14
14
  from angr.analyses.decompiler.structuring import SAILRStructurer, DreamStructurer
@@ -207,16 +207,17 @@ class ConstPropOptReverter(OptimizationPass):
207
207
  continue
208
208
 
209
209
  unwrapped_sym_arg = sym_arg.operands[0] if isinstance(sym_arg, Convert) else sym_arg
210
- try:
210
+ if (
211
+ isinstance(unwrapped_sym_arg, Load)
212
+ and isinstance(unwrapped_sym_arg.addr, Const)
213
+ and isinstance(unwrapped_sym_arg.addr.value, int)
214
+ ):
211
215
  # TODO: make this support more than just Loads
212
216
  # target must be a Load of a memory location
213
217
  target_atom = MemoryLocation(unwrapped_sym_arg.addr.value, unwrapped_sym_arg.size, "Iend_LE")
214
218
  const_state = self.rd.get_reaching_definitions_by_node(blks[calls[const_arg]].addr, OP_BEFORE)
215
-
216
- state_load_vals = const_state.get_value_from_atom(target_atom)
217
- except AttributeError:
218
- continue
219
- except KeyError:
219
+ state_load_vals = const_state.get_values(target_atom)
220
+ else:
220
221
  continue
221
222
 
222
223
  if not state_load_vals:
@@ -25,13 +25,19 @@ class DeadblockRemover(OptimizationPass):
25
25
  PLATFORMS = None
26
26
  STAGE = OptimizationPassStage.BEFORE_REGION_IDENTIFICATION
27
27
  NAME = "Remove blocks with unsatisfiable conditions"
28
- DESCRIPTION = __doc__.strip()
28
+ DESCRIPTION = __doc__.strip() # type: ignore
29
29
 
30
- def __init__(self, func, **kwargs):
30
+ def __init__(self, func, node_cutoff: int = 200, **kwargs):
31
31
  super().__init__(func, **kwargs)
32
+ self._node_cutoff = node_cutoff
32
33
  self.analyze()
33
34
 
34
35
  def _check(self):
36
+ # don't run this optimization on super large functions
37
+ assert self._graph is not None
38
+ if len(self._graph) >= self._node_cutoff:
39
+ return False, None
40
+
35
41
  cond_proc = ConditionProcessor(self.project.arch)
36
42
  if networkx.is_directed_acyclic_graph(self._graph):
37
43
  acyclic_graph = self._graph
@@ -45,7 +51,10 @@ class DeadblockRemover(OptimizationPass):
45
51
  cache = {"cond_proc": cond_proc}
46
52
  return True, cache
47
53
 
48
- def _analyze(self, cache=None):
54
+ def _analyze(self, cache: dict | None = None):
55
+ assert cache is not None
56
+ assert self._graph is not None
57
+
49
58
  cond_proc = cache["cond_proc"]
50
59
  to_remove = {
51
60
  blk
@@ -136,7 +136,7 @@ class InlinedStringTransformationAILEngine(
136
136
  # jumped to a node that we do not know about
137
137
  break
138
138
  block = self.nodes[self.pc]
139
- self._process(state, block=block, whitelist=None)
139
+ self.process(state, block=block, whitelist=None)
140
140
  if self.pc is None:
141
141
  # not sure where to jump...
142
142
  break
@@ -287,19 +287,27 @@ class ITERegionConverter(OptimizationPass):
287
287
  ((region_head.addr, region_head.idx), original_vvars[0] if original_vvars else None)
288
288
  )
289
289
 
290
- new_phi = Phi(
291
- stmt.src.idx,
292
- stmt.src.bits,
293
- new_src_and_vvars,
294
- **stmt.src.tags,
295
- )
296
- new_phi_assignment = Assignment(
297
- stmt.idx,
298
- stmt.dst,
299
- new_phi,
300
- **stmt.tags,
301
- )
302
- stmts.append(new_phi_assignment)
290
+ if len(new_src_and_vvars) == 1:
291
+ new_assignment = Assignment(
292
+ stmt.idx,
293
+ stmt.dst,
294
+ new_src_and_vvars[0][1],
295
+ **stmt.tags,
296
+ )
297
+ else:
298
+ new_phi = Phi(
299
+ stmt.src.idx,
300
+ stmt.src.bits,
301
+ new_src_and_vvars,
302
+ **stmt.src.tags,
303
+ )
304
+ new_assignment = Assignment(
305
+ stmt.idx,
306
+ stmt.dst,
307
+ new_phi,
308
+ **stmt.tags,
309
+ )
310
+ stmts.append(new_assignment)
303
311
  new_region_tail = Block(region_tail.addr, region_tail.original_size, statements=stmts, idx=region_tail.idx)
304
312
 
305
313
  #
@@ -13,7 +13,7 @@ from angr.analyses.decompiler import RegionIdentifier
13
13
  from angr.analyses.decompiler.condition_processor import ConditionProcessor
14
14
  from angr.analyses.decompiler.goto_manager import Goto, GotoManager
15
15
  from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
16
- from angr.analyses.decompiler.utils import add_labels
16
+ from angr.analyses.decompiler.utils import add_labels, remove_edges_in_ailgraph
17
17
  from angr.analyses.decompiler.counters import ControlFlowStructureCounter
18
18
  from angr.project import Project
19
19
 
@@ -111,12 +111,15 @@ class OptimizationPass(BaseOptimizationPass):
111
111
  The base class for any function-level graph optimization pass.
112
112
  """
113
113
 
114
+ _graph: networkx.DiGraph
115
+
114
116
  def __init__(
115
117
  self,
116
118
  func,
119
+ *,
120
+ graph,
117
121
  blocks_by_addr=None,
118
122
  blocks_by_addr_and_idx=None,
119
- graph=None,
120
123
  variable_kb=None,
121
124
  region_identifier=None,
122
125
  reaching_definitions=None,
@@ -126,18 +129,20 @@ class OptimizationPass(BaseOptimizationPass):
126
129
  force_loop_single_exit: bool = True,
127
130
  complete_successors: bool = False,
128
131
  avoid_vvar_ids: set[int] | None = None,
132
+ arg_vvars: set[int] | None = None,
129
133
  **kwargs,
130
134
  ):
131
135
  super().__init__(func)
132
136
  # self._blocks is just a cache
133
137
  self._blocks_by_addr: dict[int, set[ailment.Block]] = blocks_by_addr or {}
134
138
  self._blocks_by_addr_and_idx: dict[tuple[int, int | None], ailment.Block] = blocks_by_addr_and_idx or {}
135
- self._graph: networkx.DiGraph | None = graph
139
+ self._graph = graph
136
140
  self._variable_kb = variable_kb
137
141
  self._ri = region_identifier
138
142
  self._rd = reaching_definitions
139
143
  self._scratch = scratch if scratch is not None else {}
140
144
  self._new_block_addrs = set()
145
+ self._arg_vvars = arg_vvars
141
146
  self.vvar_id_start = vvar_id_start
142
147
  self.entry_node_addr: tuple[int, int | None] = (
143
148
  entry_node_addr if entry_node_addr is not None else (func.addr, None)
@@ -328,14 +333,15 @@ class StructuringOptimizationPass(OptimizationPass):
328
333
  def __init__(
329
334
  self,
330
335
  func,
331
- prevent_new_gotos=True,
332
- strictly_less_gotos=False,
333
- recover_structure_fails=True,
334
- must_improve_rel_quality=True,
335
- max_opt_iters=1,
336
- simplify_ail=True,
337
- require_gotos=True,
338
- readd_labels=False,
336
+ prevent_new_gotos: bool = True,
337
+ strictly_less_gotos: bool = False,
338
+ recover_structure_fails: bool = True,
339
+ must_improve_rel_quality: bool = True,
340
+ max_opt_iters: int = 1,
341
+ simplify_ail: bool = True,
342
+ require_gotos: bool = True,
343
+ readd_labels: bool = False,
344
+ edges_to_remove: list[tuple[tuple[int, int | None], tuple[int, int | None]]] | None = None,
339
345
  **kwargs,
340
346
  ):
341
347
  super().__init__(func, **kwargs)
@@ -347,6 +353,7 @@ class StructuringOptimizationPass(OptimizationPass):
347
353
  self._require_gotos = require_gotos
348
354
  self._must_improve_rel_quality = must_improve_rel_quality
349
355
  self._readd_labels = readd_labels
356
+ self._edges_to_remove = edges_to_remove or []
350
357
 
351
358
  # relative quality metrics (excludes gotos)
352
359
  self._initial_structure_counter = None
@@ -449,6 +456,8 @@ class StructuringOptimizationPass(OptimizationPass):
449
456
  if readd_labels:
450
457
  graph = add_labels(graph)
451
458
 
459
+ remove_edges_in_ailgraph(graph, self._edges_to_remove)
460
+
452
461
  self._ri = self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
453
462
  self._func,
454
463
  graph=graph,
@@ -479,7 +488,7 @@ class StructuringOptimizationPass(OptimizationPass):
479
488
  if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
480
489
  return False
481
490
 
482
- rs = self.project.analyses.RegionSimplifier(self._func, rs.result, kb=self.kb, variable_kb=self._variable_kb)
491
+ rs = self.project.analyses.RegionSimplifier(self._func, rs.result, arg_vvars=self._arg_vvars, kb=self.kb)
483
492
  if not rs or rs.goto_manager is None or rs.result is None:
484
493
  return False
485
494
 
@@ -44,7 +44,9 @@ class FreshVirtualVariableRewriter(AILBlockWalker):
44
44
 
45
45
  return new_stmt
46
46
 
47
- def _handle_VirtualVariable(self, expr_idx: int, expr: VirtualVariable, stmt_idx: int, stmt, block: Block | None):
47
+ def _handle_VirtualVariable( # type:ignore
48
+ self, expr_idx: int, expr: VirtualVariable, stmt_idx: int, stmt, block: Block | None
49
+ ) -> VirtualVariable | None:
48
50
  if expr.varid in self.vvar_mapping:
49
51
  return VirtualVariable(
50
52
  expr.idx,
@@ -58,7 +60,7 @@ class FreshVirtualVariableRewriter(AILBlockWalker):
58
60
  )
59
61
  return None
60
62
 
61
- def _handle_stmt(self, stmt_idx: int, stmt, block: Block):
63
+ def _handle_stmt(self, stmt_idx: int, stmt, block: Block): # type:ignore
62
64
  r = super()._handle_stmt(stmt_idx, stmt, block)
63
65
  if r is not None:
64
66
  # replace the original statement
@@ -77,10 +79,11 @@ class ReturnDuplicatorBase:
77
79
  def __init__(
78
80
  self,
79
81
  func,
82
+ *,
83
+ vvar_id_start: int,
80
84
  max_calls_in_regions: int = 2,
81
85
  minimize_copies_for_regions: bool = True,
82
86
  ri: RegionIdentifier | None = None,
83
- vvar_id_start: int | None = None,
84
87
  scratch: dict[str, Any] | None = None,
85
88
  ):
86
89
  self._max_calls_in_region = max_calls_in_regions
@@ -137,11 +140,11 @@ class ReturnDuplicatorBase:
137
140
  ):
138
141
  # every eligible pred gets a new region copy
139
142
  self._copy_region([pred_node], region_head, region, graph)
143
+ graph_changed = True
140
144
 
141
145
  if region_head in graph and graph.in_degree(region_head) == 0:
142
146
  graph.remove_nodes_from(region)
143
-
144
- graph_changed = True
147
+ graph_changed = True
145
148
 
146
149
  return graph_changed
147
150
 
@@ -257,8 +260,7 @@ class ReturnDuplicatorBase:
257
260
  # not used in this branch. drop this statement
258
261
  continue
259
262
  else:
260
- phi_var = Phi(stmt.src.idx, stmt.src.bits, [((pred.addr, pred.idx), vvar_src)], **stmt.src.tags)
261
- new_stmt = Assignment(stmt.idx, stmt.dst, phi_var, **stmt.tags)
263
+ new_stmt = Assignment(stmt.idx, stmt.dst, vvar_src, **stmt.tags)
262
264
  stmts.append(new_stmt)
263
265
  continue
264
266
  stmts.append(stmt)
@@ -287,6 +289,8 @@ class ReturnDuplicatorBase:
287
289
  self, endnode_regions: dict[Any, tuple[list[tuple[Any, Any]], networkx.DiGraph]], graph: networkx.DiGraph
288
290
  ):
289
291
  updated_regions = endnode_regions.copy()
292
+ assert self._ri is not None
293
+ assert isinstance(self._ri.region, GraphRegion)
290
294
  all_region_block_addrs = list(self._find_block_sets_in_all_regions(self._ri.region).values())
291
295
  for region_head, (in_edges, region) in endnode_regions.items():
292
296
  is_single_const_ret_region = self._is_simple_return_graph(region)
@@ -356,7 +360,7 @@ class ReturnDuplicatorBase:
356
360
  return False
357
361
 
358
362
  # check if the graph is a single successor chain
359
- if not all(labeless_graph.out_degree(n) <= 1 for n in nodes):
363
+ if not all(labeless_graph.out_degree[n] <= 1 for n in nodes):
360
364
  return False
361
365
 
362
366
  # collect the statements from the top node, make sure one exists
@@ -398,7 +402,11 @@ class ReturnDuplicatorBase:
398
402
  if ret_exprs and len(ret_exprs) > 1:
399
403
  return False
400
404
 
401
- ret_expr = ReturnDuplicatorBase.unwrap_conv(ret_exprs[0]) if ret_exprs and len(ret_exprs) == 1 else None
405
+ if not ret_exprs:
406
+ # a simple return statement that does not carry any value or variable to return
407
+ return True
408
+
409
+ ret_expr = ReturnDuplicatorBase.unwrap_conv(ret_exprs[0])
402
410
  # check if ret_expr is a virtual variable or not
403
411
  if not isinstance(ret_expr, (VirtualVariable, Const)):
404
412
  return False
@@ -4,9 +4,9 @@ from typing import Any
4
4
 
5
5
  import networkx
6
6
 
7
+ from angr.analyses.decompiler.structuring import SAILRStructurer, DreamStructurer
7
8
  from .return_duplicator_base import ReturnDuplicatorBase
8
9
  from .optimization_pass import OptimizationPass, OptimizationPassStage
9
- from angr.analyses.decompiler.structuring import SAILRStructurer, DreamStructurer
10
10
 
11
11
  _l = logging.getLogger(name=__name__)
12
12
 
@@ -19,7 +19,7 @@ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
19
19
 
20
20
  ARCHES = None
21
21
  PLATFORMS = None
22
- STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
22
+ STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
23
23
  NAME = "Duplicate return-only blocks (high)"
24
24
  DESCRIPTION = __doc__
25
25
  STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
@@ -28,27 +28,22 @@ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
28
28
  self,
29
29
  func,
30
30
  # settings
31
+ *,
32
+ vvar_id_start: int,
31
33
  max_calls_in_regions: int = 2,
32
34
  minimize_copies_for_regions: bool = True,
33
- region_identifier=None,
34
- vvar_id_start: int | None = None,
35
35
  scratch: dict[str, Any] | None = None,
36
36
  **kwargs,
37
37
  ):
38
- OptimizationPass.__init__(
39
- self, func, vvar_id_start=vvar_id_start, scratch=scratch, region_identifier=region_identifier, **kwargs
40
- )
38
+ OptimizationPass.__init__(self, func, vvar_id_start=vvar_id_start, scratch=scratch, **kwargs)
41
39
  ReturnDuplicatorBase.__init__(
42
40
  self,
43
41
  func,
44
42
  max_calls_in_regions=max_calls_in_regions,
45
43
  minimize_copies_for_regions=minimize_copies_for_regions,
46
- ri=region_identifier,
47
44
  vvar_id_start=vvar_id_start,
48
45
  scratch=scratch,
49
46
  )
50
- # since we run before the RegionIdentification pass in the decompiler, we need to collect it early here
51
- self._ri = self._recover_regions(self._graph)
52
47
 
53
48
  self.analyze()
54
49
 
@@ -60,6 +55,8 @@ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
60
55
  return dst_is_const_ret
61
56
 
62
57
  def _analyze(self, cache=None):
58
+ # since we run before the RegionIdentification pass in the decompiler, we need to collect it early here
59
+ self._ri = self._recover_regions(self._graph)
63
60
  copy_graph = networkx.DiGraph(self._graph)
64
61
  if self._analyze_core(copy_graph):
65
62
  self.out_graph = self._simplify_graph(copy_graph)
@@ -150,7 +150,12 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
150
150
  if isinstance(expr.operands[1], Const) and expr.operands[1].value == 1:
151
151
  # x * 1 => x
152
152
  return expr.operands[0]
153
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
153
+ if (
154
+ isinstance(expr.operands[0], Const)
155
+ and expr.operands[0].is_int
156
+ and isinstance(expr.operands[1], Const)
157
+ and expr.operands[1].is_int
158
+ ):
154
159
  # constant multiplication
155
160
  mask = (1 << expr.bits) - 1
156
161
  return Const(
@@ -236,6 +241,10 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
236
241
  return expr.operands[1]
237
242
  if isinstance(expr.operands[1], Const) and expr.operands[1].value == 0:
238
243
  return expr.operands[0]
244
+ if isinstance(expr.operands[0], Const) and expr.operands[0].value == (1 << expr.operands[0].bits) - 1:
245
+ return expr.operands[0]
246
+ if isinstance(expr.operands[1], Const) and expr.operands[1].value == (1 << expr.operands[1].bits) - 1:
247
+ return expr.operands[1]
239
248
  if expr.operands[0].likes(expr.operands[1]):
240
249
  return expr.operands[0]
241
250
 
@@ -290,6 +299,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
290
299
  def _optimize_convert(expr: Convert):
291
300
  if (
292
301
  isinstance(expr.operand, Const)
302
+ and expr.operand.is_int
293
303
  and expr.from_type == Convert.TYPE_INT
294
304
  and expr.to_type == Convert.TYPE_INT
295
305
  and expr.from_bits > expr.to_bits
@@ -300,6 +310,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
300
310
  return Const(expr.idx, expr.operand.variable, v, expr.to_bits, **expr.operand.tags)
301
311
  if (
302
312
  isinstance(expr.operand, Const)
313
+ and expr.operand.is_int
303
314
  and expr.from_type == Convert.TYPE_INT
304
315
  and expr.to_type == Convert.TYPE_INT
305
316
  and expr.from_bits <= expr.to_bits
@@ -169,29 +169,65 @@ class RemoveRedundantConversions(PeepholeOptimizationExprBase):
169
169
  @staticmethod
170
170
  def _optimize_Convert(expr: Convert):
171
171
  operand_expr = expr.operand
172
- if isinstance(operand_expr, BinaryOp) and operand_expr.op in {
173
- "Mul",
174
- "Shl",
175
- "Div",
176
- "DivMod",
177
- "Mod",
178
- "Add",
179
- "Sub",
180
- }:
181
- op0, op1 = operand_expr.operands
182
- if (
183
- isinstance(op0, Convert)
184
- and isinstance(op1, Convert)
185
- and op0.from_bits == op1.from_bits
186
- and op0.to_bits == op1.to_bits
187
- and expr.from_bits == op0.to_bits
188
- and expr.to_bits == op1.from_bits
189
- ):
190
- return BinaryOp(
191
- operand_expr.idx,
192
- operand_expr.op,
193
- [op0.operand, op1.operand],
194
- expr.is_signed,
195
- **operand_expr.tags,
196
- )
172
+ if isinstance(operand_expr, BinaryOp):
173
+ if operand_expr.op in {
174
+ "Mul",
175
+ "Shl",
176
+ "Div",
177
+ "DivMod",
178
+ "Mod",
179
+ "Add",
180
+ "Sub",
181
+ }:
182
+ op0, op1 = operand_expr.operands
183
+ if (
184
+ isinstance(op0, Convert)
185
+ and isinstance(op1, Convert)
186
+ and op0.from_bits == op1.from_bits
187
+ and op0.to_bits == op1.to_bits
188
+ and expr.from_bits == op0.to_bits
189
+ and expr.to_bits == op1.from_bits
190
+ ):
191
+ return BinaryOp(
192
+ operand_expr.idx,
193
+ operand_expr.op,
194
+ [op0.operand, op1.operand],
195
+ expr.is_signed,
196
+ **operand_expr.tags,
197
+ )
198
+ elif operand_expr.op == "Or" and expr.from_bits > expr.to_bits:
199
+ # Conv(64->32,((vvar_183{reg 128} & 0xffffffff00000000<64>)
200
+ # | Conv(32->64, Load(addr=0x200002dc<32>, size=4, endness=Iend_LE))))
201
+ # =>
202
+ # Conv(64->32, Load(addr=0x200002dc<32>, size=4, endness=Iend_LE))
203
+ high_mask = ((1 << expr.from_bits) - 1) - ((1 << expr.to_bits) - 1)
204
+ op0, op1 = operand_expr.operands
205
+ if (
206
+ isinstance(op0, BinaryOp)
207
+ and op0.op == "And"
208
+ and isinstance(op0.operands[1], Const)
209
+ and op0.operands[1].value == high_mask
210
+ ):
211
+ return Convert(
212
+ expr.idx,
213
+ expr.from_bits,
214
+ expr.to_bits,
215
+ expr.is_signed,
216
+ op1,
217
+ **expr.tags,
218
+ )
219
+ if (
220
+ isinstance(op1, BinaryOp)
221
+ and op1.op == "And"
222
+ and isinstance(op1.operands[1], Const)
223
+ and op1.operands[1].value == high_mask
224
+ ):
225
+ return Convert(
226
+ expr.idx,
227
+ expr.from_bits,
228
+ expr.to_bits,
229
+ expr.is_signed,
230
+ op0,
231
+ **expr.tags,
232
+ )
197
233
  return None