angr 9.2.140__py3-none-manylinux2014_x86_64.whl → 9.2.142__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (75) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +105 -35
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/cfg_base.py +38 -4
  6. angr/analyses/cfg/cfg_fast.py +23 -7
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +13 -8
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +1 -1
  10. angr/analyses/decompiler/ail_simplifier.py +105 -62
  11. angr/analyses/decompiler/callsite_maker.py +24 -11
  12. angr/analyses/decompiler/clinic.py +83 -5
  13. angr/analyses/decompiler/condition_processor.py +7 -7
  14. angr/analyses/decompiler/decompilation_cache.py +2 -1
  15. angr/analyses/decompiler/decompiler.py +11 -2
  16. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
  19. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  20. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  21. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +85 -16
  22. angr/analyses/decompiler/optimization_passes/optimization_pass.py +78 -1
  23. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  24. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
  25. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  27. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  28. angr/analyses/decompiler/region_identifier.py +76 -51
  29. angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
  30. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
  31. angr/analyses/decompiler/ssailification/rewriting.py +70 -32
  32. angr/analyses/decompiler/ssailification/rewriting_engine.py +118 -24
  33. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  34. angr/analyses/decompiler/stack_item.py +36 -0
  35. angr/analyses/decompiler/structured_codegen/c.py +86 -145
  36. angr/analyses/decompiler/structuring/dream.py +1 -1
  37. angr/analyses/decompiler/structuring/phoenix.py +9 -4
  38. angr/analyses/decompiler/structuring/structurer_base.py +2 -1
  39. angr/analyses/decompiler/utils.py +46 -20
  40. angr/analyses/find_objects_static.py +2 -1
  41. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  42. angr/analyses/reaching_definitions/function_handler.py +24 -10
  43. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  44. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  45. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  46. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  47. angr/analyses/reaching_definitions/rd_state.py +11 -7
  48. angr/analyses/s_liveness.py +44 -6
  49. angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  51. angr/analyses/typehoon/simple_solver.py +35 -8
  52. angr/analyses/typehoon/typehoon.py +3 -1
  53. angr/analyses/variable_recovery/engine_ail.py +1 -1
  54. angr/analyses/variable_recovery/engine_vex.py +20 -4
  55. angr/calling_conventions.py +17 -12
  56. angr/factory.py +8 -3
  57. angr/knowledge_plugins/functions/function.py +5 -10
  58. angr/knowledge_plugins/variables/variable_manager.py +34 -5
  59. angr/procedures/definitions/__init__.py +3 -10
  60. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  61. angr/procedures/win32_kernel/__fastfail.py +15 -0
  62. angr/sim_procedure.py +2 -2
  63. angr/simos/simos.py +17 -11
  64. angr/simos/windows.py +42 -1
  65. angr/utils/ail.py +41 -1
  66. angr/utils/cpp.py +17 -0
  67. angr/utils/doms.py +142 -0
  68. angr/utils/library.py +1 -1
  69. angr/utils/types.py +59 -0
  70. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
  71. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/RECORD +75 -70
  72. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
  73. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
  74. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
  75. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,7 @@ from .structured_codegen import BaseStructuredCodeGenerator
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from angr.analyses.decompiler.optimization_passes.expr_op_swapper import OpDescriptor
9
+ from angr.analyses.typehoon.typevars import TypeVariable, TypeConstraint
9
10
 
10
11
 
11
12
  class DecompilationCache:
@@ -29,7 +30,7 @@ class DecompilationCache:
29
30
  def __init__(self, addr):
30
31
  self.parameters: dict[str, Any] = {}
31
32
  self.addr = addr
32
- self.type_constraints: set | None = None
33
+ self.type_constraints: dict[TypeVariable, set[TypeConstraint]] | None = None
33
34
  self.func_typevar = None
34
35
  self.var_to_typevar: dict | None = None
35
36
  self.codegen: BaseStructuredCodeGenerator | None = None
@@ -31,6 +31,7 @@ from .presets import DECOMPILATION_PRESETS, DecompilationPreset
31
31
  if TYPE_CHECKING:
32
32
  from angr.knowledge_plugins.cfg.cfg_model import CFGModel
33
33
  from .peephole_optimizations import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase
34
+ from angr.analyses.typehoon.typevars import TypeVariable, TypeConstraint
34
35
 
35
36
  l = logging.getLogger(name=__name__)
36
37
 
@@ -135,6 +136,7 @@ class Decompiler(Analysis):
135
136
  self.unoptimized_ail_graph: networkx.DiGraph | None = None
136
137
  self.ail_graph: networkx.DiGraph | None = None
137
138
  self.vvar_id_start = None
139
+ self._copied_var_ids: set[int] = set()
138
140
  self._optimization_scratch: dict[str, Any] = {}
139
141
  self.expr_collapse_depth = expr_collapse_depth
140
142
 
@@ -267,6 +269,7 @@ class Decompiler(Analysis):
267
269
  self._variable_kb = clinic.variable_kb
268
270
  self._update_progress(70.0, text="Identifying regions")
269
271
  self.vvar_id_start = clinic.vvar_id_start
272
+ self._copied_var_ids = clinic.copied_var_ids
270
273
 
271
274
  if clinic.graph is None:
272
275
  # the function is empty
@@ -500,6 +503,8 @@ class Decompiler(Analysis):
500
503
  scratch=self._optimization_scratch,
501
504
  force_loop_single_exit=self._force_loop_single_exit,
502
505
  complete_successors=self._complete_successors,
506
+ peephole_optimizations=self._peephole_optimizations,
507
+ avoid_vvar_ids=self._copied_var_ids,
503
508
  **kwargs,
504
509
  )
505
510
 
@@ -545,7 +550,9 @@ class Decompiler(Analysis):
545
550
  SimMemoryVariable(symbol.rebased_addr, 1, name=symbol.name, ident=ident),
546
551
  )
547
552
 
548
- def reflow_variable_types(self, type_constraints: set, func_typevar, var_to_typevar: dict, codegen):
553
+ def reflow_variable_types(
554
+ self, type_constraints: dict[TypeVariable, set[TypeConstraint]], func_typevar, var_to_typevar: dict, codegen
555
+ ):
549
556
  """
550
557
  Re-run type inference on an existing variable recovery result, then rerun codegen to generate new results.
551
558
 
@@ -605,7 +612,9 @@ class Decompiler(Analysis):
605
612
  var = arg.variable
606
613
  new_type = var_manager.get_variable_type(var)
607
614
  if new_type is not None:
608
- self.func.prototype.args[i] = new_type
615
+ self.func.prototype.args = (
616
+ self.func.prototype.args[:i] + (new_type,) + self.func.prototype.args[i + 1 :]
617
+ )
609
618
  except Exception: # pylint:disable=broad-except
610
619
  l.warning(
611
620
  "Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
@@ -283,14 +283,12 @@ class GraphDephicationVVarMapping(Analysis): # pylint:disable=abstract-method
283
283
 
284
284
  @staticmethod
285
285
  def _prepend_stmt(block, stmt):
286
- # TODO: This insertion breaks the assumption that all phi statements appear before any assignments. We must
287
- # TODO: fix the assumption elsewhere in the code base.
288
- first_nonlabel_idx = len(block.statements)
286
+ first_nonlabel_nonphi_idx = len(block.statements)
289
287
  for i, s in enumerate(block.statements):
290
- if not isinstance(s, Label):
291
- first_nonlabel_idx = i
288
+ if not isinstance(s, Label) and not is_phi_assignment(s):
289
+ first_nonlabel_nonphi_idx = i
292
290
  break
293
- block.statements.insert(first_nonlabel_idx, stmt)
291
+ block.statements.insert(first_nonlabel_nonphi_idx, stmt)
294
292
 
295
293
  @staticmethod
296
294
  def _used_in_phi(dst_block, src_block, vvar_id: int) -> bool:
@@ -4,6 +4,7 @@ import logging
4
4
 
5
5
  import ailment
6
6
 
7
+ from angr.analyses.decompiler.stack_item import StackItem, StackItemType
7
8
  from .optimization_pass import OptimizationPass, OptimizationPassStage
8
9
 
9
10
  _l = logging.getLogger(name=__name__)
@@ -62,11 +63,16 @@ class BasePointerSaveSimplifier(OptimizationPass):
62
63
  return
63
64
 
64
65
  # update the first block
65
- block, stmt_idx, _ = save_stmt
66
+ block, stmt_idx, save_dst = save_stmt
66
67
  block_copy = block.copy()
67
68
  block_copy.statements.pop(stmt_idx)
68
69
  self._update_block(block, block_copy)
69
70
 
71
+ # update stack_items
72
+ self.stack_items[save_dst.stack_offset] = StackItem(
73
+ save_dst.stack_offset, save_dst.size, "saved_bp", StackItemType.SAVED_BP
74
+ )
75
+
70
76
  # update all endpoint blocks
71
77
  if restore_stmts:
72
78
  for block, stmt_idx, _ in restore_stmts:
@@ -74,7 +80,7 @@ class BasePointerSaveSimplifier(OptimizationPass):
74
80
  block_copy.statements.pop(stmt_idx)
75
81
  self._update_block(block, block_copy)
76
82
 
77
- def _find_baseptr_save_stmt(self):
83
+ def _find_baseptr_save_stmt(self) -> tuple[ailment.Block, int, ailment.Expr.VirtualVariable] | None:
78
84
  """
79
85
  Find the AIL statement that saves the base pointer to a stack slot.
80
86
 
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+ from typing import TYPE_CHECKING
2
3
 
3
4
  import networkx
4
5
 
@@ -6,9 +7,14 @@ from ailment import AILBlockWalker, Block
6
7
  from ailment.statement import ConditionalJump, Statement
7
8
  from ailment.expression import Const, BinaryOp, VirtualVariable
8
9
 
9
- from angr.analyses.decompiler.region_identifier import RegionIdentifier
10
+ from angr.analyses.decompiler.utils import first_nonlabel_nonphi_statement
11
+ from angr.utils.graph import dominates
12
+ from angr.utils.timing import timethis
10
13
  from .optimization_pass import OptimizationPass, OptimizationPassStage
11
14
 
15
+ if TYPE_CHECKING:
16
+ from angr.analyses.s_reaching_definitions import SRDAModel
17
+
12
18
 
13
19
  class ConstantCondition:
14
20
  """
@@ -78,6 +84,7 @@ class ConditionConstantPropagation(OptimizationPass):
78
84
  return False, None
79
85
  return True, {"cconds": cconds}
80
86
 
87
+ @timethis
81
88
  def _analyze(self, cache=None):
82
89
  if not cache or cache.get("cconds", None) is None: # noqa: SIM108
83
90
  cconds = self._find_const_conditions()
@@ -98,23 +105,27 @@ class ConditionConstantPropagation(OptimizationPass):
98
105
  # calculate a dominance frontier for each block
99
106
  entry_node_addr, entry_node_idx = self.entry_node_addr
100
107
  entry_node = self._get_block(entry_node_addr, idx=entry_node_idx)
101
- df = networkx.algorithms.dominance_frontiers(self._graph, entry_node)
108
+ idoms = networkx.algorithms.immediate_dominators(self._graph, entry_node)
109
+ rda: SRDAModel = self.project.analyses.SReachingDefinitions(self._func, func_graph=self._graph).model
102
110
 
103
111
  for src, cconds in cconds_by_src.items():
104
112
  head_block = self._get_block(src[0], idx=src[1])
105
113
  if head_block is None:
106
114
  continue
107
- frontier = df.get(head_block)
108
- if frontier is None:
109
- continue
110
- graph_slice = RegionIdentifier.slice_graph(self._graph, head_block, frontier, include_frontier=False)
111
- for ccond in cconds:
112
- walker = CCondPropBlockWalker(ccond.vvar_id, ccond.value)
113
- for block in graph_slice:
114
- new_block = walker.walk(block)
115
- if new_block is not None:
116
- self._update_block(block, new_block)
117
115
 
116
+ for ccond in cconds:
117
+ for _, loc in rda.all_vvar_uses[rda.varid_to_vvar[ccond.vvar_id]]:
118
+ loc_block = self._get_block(loc.block_addr, idx=loc.block_idx)
119
+ if loc_block is None:
120
+ continue
121
+ if dominates(idoms, head_block, loc_block):
122
+ # the constant condition dominates the use site
123
+ walker = CCondPropBlockWalker(ccond.vvar_id, ccond.value)
124
+ new_block = walker.walk(loc_block)
125
+ if new_block is not None:
126
+ self._update_block(loc_block, new_block)
127
+
128
+ @timethis
118
129
  def _find_const_conditions(self) -> list[ConstantCondition]:
119
130
  cconds = []
120
131
 
@@ -122,28 +133,46 @@ class ConditionConstantPropagation(OptimizationPass):
122
133
  if block.statements:
123
134
  last_stmt = block.statements[-1]
124
135
  if (
125
- not isinstance(last_stmt, ConditionalJump)
126
- or not isinstance(last_stmt.true_target, Const)
127
- or not isinstance(last_stmt.false_target, Const)
136
+ isinstance(last_stmt, ConditionalJump)
137
+ and isinstance(last_stmt.true_target, Const)
138
+ and isinstance(last_stmt.false_target, Const)
128
139
  ):
129
- continue
130
-
131
- if isinstance(last_stmt.condition, BinaryOp):
132
- cond = last_stmt.condition
133
- op = cond.op
134
- op0, op1 = cond.operands
135
- if isinstance(op0, Const):
136
- op0, op1 = op1, op0
137
- if isinstance(op0, VirtualVariable) and isinstance(op1, Const) and op1.is_int:
138
- if op == "CmpEQ":
139
- ccond = ConstantCondition(
140
- op0.varid, op1, last_stmt.true_target.value, last_stmt.true_target_idx # type: ignore
141
- )
142
- cconds.append(ccond)
143
- elif op == "CmpNE":
144
- ccond = ConstantCondition(
145
- op0.varid, op1, last_stmt.false_target.value, last_stmt.false_target_idx # type: ignore
146
- )
147
- cconds.append(ccond)
140
+ self._extract_const_condition_from_stmt(last_stmt, cconds)
141
+ else:
142
+ # also check the first non-phi statement; rep stos may generate blocks whose conditional checks
143
+ # are at the beginning of the block
144
+
145
+ # we could have used is_head_controlled_loop_block, but at this point the block is simplified enough
146
+ # that the first non-label, non-phi statement must be a ConditionalJump that controls the execution
147
+ # of the loop body, so the following logic should work fine.
148
+
149
+ first_stmt = first_nonlabel_nonphi_statement(block)
150
+ if (
151
+ first_stmt is not last_stmt
152
+ and isinstance(first_stmt, ConditionalJump)
153
+ and isinstance(first_stmt.true_target, Const)
154
+ and isinstance(first_stmt.false_target, Const)
155
+ ):
156
+ self._extract_const_condition_from_stmt(first_stmt, cconds)
148
157
 
149
158
  return cconds
159
+
160
+ @staticmethod
161
+ def _extract_const_condition_from_stmt(stmt: ConditionalJump, cconds: list[ConstantCondition]) -> None:
162
+ if isinstance(stmt.condition, BinaryOp):
163
+ cond = stmt.condition
164
+ op = cond.op
165
+ op0, op1 = cond.operands
166
+ if isinstance(op0, Const):
167
+ op0, op1 = op1, op0
168
+ if isinstance(op0, VirtualVariable) and isinstance(op1, Const) and op1.is_int:
169
+ if op == "CmpEQ":
170
+ ccond = ConstantCondition(
171
+ op0.varid, op1, stmt.true_target.value, stmt.true_target_idx # type: ignore
172
+ )
173
+ cconds.append(ccond)
174
+ elif op == "CmpNE":
175
+ ccond = ConstantCondition(
176
+ op0.varid, op1, stmt.false_target.value, stmt.false_target_idx # type: ignore
177
+ )
178
+ cconds.append(ccond)
@@ -950,7 +950,9 @@ class DuplicationReverter(StructuringOptimizationPass):
950
950
  #
951
951
 
952
952
  def _share_subregion(self, blocks: list[Block]) -> bool:
953
- return any(all(block.addr in region for block in blocks) for region in self._ri.regions_by_block_addrs)
953
+ return any(
954
+ all((block.addr, block.idx) in region for block in blocks) for region in self._ri.regions_by_block_addrs
955
+ )
954
956
 
955
957
  def _is_valid_candidate(self, b0, b1):
956
958
  # blocks must have statements
@@ -6,7 +6,11 @@ import ailment
6
6
  from ailment.expression import Op
7
7
 
8
8
  from angr.analyses.decompiler.structuring.structurer_nodes import ConditionNode
9
- from angr.analyses.decompiler.utils import structured_node_is_simple_return, sequence_to_statements
9
+ from angr.analyses.decompiler.utils import (
10
+ structured_node_is_simple_return,
11
+ sequence_to_statements,
12
+ structured_node_has_multi_predecessors,
13
+ )
10
14
  from angr.analyses.decompiler.sequence_walker import SequenceWalker
11
15
  from .optimization_pass import SequenceOptimizationPass, OptimizationPassStage
12
16
 
@@ -43,7 +47,22 @@ class FlipBooleanWalker(SequenceWalker):
43
47
  and structured_node_is_simple_return(seq_node.nodes[idx + 1], self._graph)
44
48
  and node not in type1_condition_nodes
45
49
  ):
46
- type2_condition_nodes.append((idx, node, seq_node.nodes[idx + 1]))
50
+ # Type 2: Special Filter:
51
+ # consider code that looks like the following:
52
+ # {if (cond) {LABEL: ... } return;}; goto LABEL;
53
+ #
54
+ # if we were to do the normal flip, this happens:
55
+ # {if (!cond) return; LABEL: ...}; goto LABEL;
56
+ #
57
+ # This is incorrect because we've now created an infinite loop in the event that cond is false,
58
+ # which is not what the original code was. The gist here is that you can't ever flip these cases
59
+ # in the presence of more than one incoming edge to `...` region.
60
+ #
61
+ # To eliminate this illegal case, we simply need to find all the condition nodes of the above structure
62
+ # that have multiple incoming edges to the `...` region.
63
+ illegal_flip = structured_node_has_multi_predecessors(node.true_node, self._graph)
64
+ if not illegal_flip:
65
+ type2_condition_nodes.append((idx, node, seq_node.nodes[idx + 1]))
47
66
 
48
67
  for node in type1_condition_nodes:
49
68
  if isinstance(node.condition, Op) and structured_node_is_simple_return(node.false_node, self._graph):
@@ -7,7 +7,7 @@ import networkx
7
7
 
8
8
  from ailment import Block, AILBlockWalkerBase
9
9
  from ailment.statement import ConditionalJump, Label, Assignment, Jump
10
- from ailment.expression import Expression, BinaryOp, Const, Load
10
+ from ailment.expression import VirtualVariable, Expression, BinaryOp, Const, Load
11
11
 
12
12
  from angr.utils.graph import GraphUtils
13
13
  from angr.analyses.decompiler.utils import first_nonlabel_nonphi_statement, remove_last_statement
@@ -216,7 +216,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
216
216
  def _analyze(self, cache=None):
217
217
  variablehash_to_cases = self._find_cascading_switch_variable_comparisons()
218
218
 
219
- if not variablehash_to_cases:
219
+ if not variablehash_to_cases or all(not caselists for caselists in variablehash_to_cases.values()):
220
220
  return False
221
221
 
222
222
  graph_copy = networkx.DiGraph(self._graph)
@@ -257,6 +257,24 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
257
257
  _l.debug("Skipping switch-case conversion due to too few distinct cases for %s", real_cases[0])
258
258
  continue
259
259
 
260
+ # RULE 4: the default case should not reach other case nodes in the subregion
261
+ default_addr_and_idx = next(
262
+ ((case.target, case.target_idx) for case in cases if case.value == "default"), None
263
+ )
264
+ if default_addr_and_idx is None:
265
+ continue
266
+ default_addr, default_idx = default_addr_and_idx
267
+ default_node = self._get_block(default_addr, idx=default_idx)
268
+ default_reachable_from_case = False
269
+ for case in cases:
270
+ if case.value == "default":
271
+ continue
272
+ if self._node_reachable_from_node_in_region(case.original_node, default_node):
273
+ default_reachable_from_case = True
274
+ break
275
+ if default_reachable_from_case:
276
+ continue
277
+
260
278
  original_nodes = [case.original_node for case in real_cases]
261
279
  original_head: Block = original_nodes[0]
262
280
  original_nodes = original_nodes[1:]
@@ -320,6 +338,10 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
320
338
  node_to_heads[succ].add(new_head)
321
339
  graph_copy.remove_node(onode)
322
340
  for onode in redundant_nodes:
341
+ if onode in original_nodes:
342
+ # sometimes they overlap
343
+ # e.g., 0x402cc7 in mv_-O2
344
+ continue
323
345
  # ensure all nodes that are only reachable from onode are also removed
324
346
  # FIXME: Remove the entire path of nodes instead of only the immediate successors
325
347
  successors = list(graph_copy.successors(onode))
@@ -396,6 +418,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
396
418
  default_case_candidates = {}
397
419
  last_comp = None
398
420
  stack = [(head, 0, 0xFFFF_FFFF_FFFF_FFFF)]
421
+ head_varhash = variable_comparisons[head][1]
399
422
 
400
423
  # cursed: there is an infinite loop in the following loop that
401
424
  # occurs rarely. we need to keep track of the nodes we've seen
@@ -418,12 +441,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
418
441
  next_addr,
419
442
  next_addr_idx,
420
443
  ) = variable_comparisons[comp]
421
- last_varhash = cases[-1].variable_hash if cases else None
422
444
 
423
445
  if op == "eq":
424
446
  # eq always indicates a new case
425
447
 
426
- if last_varhash is None or last_varhash == variable_hash:
448
+ if head_varhash == variable_hash:
427
449
  if target == comp.addr and target_idx == comp.idx:
428
450
  # invalid
429
451
  break
@@ -443,9 +465,10 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
443
465
  # new variable!
444
466
  if last_comp is not None and comp.addr not in default_case_candidates:
445
467
  default_case_candidates[comp.addr] = Case(
446
- last_comp, None, last_varhash, None, "default", comp.addr, comp.idx, None
468
+ last_comp, None, head_varhash, None, "default", comp.addr, comp.idx, None
447
469
  )
448
- break
470
+ break
471
+ continue
449
472
 
450
473
  successors = [succ for succ in self._graph.successors(comp) if succ is not comp]
451
474
  succ_addrs = {(succ.addr, succ.idx) for succ in successors}
@@ -505,7 +528,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
505
528
  # gt always indicates new subtrees
506
529
  gt_addr, gt_idx, le_addr, le_idx = target, target_idx, next_addr, next_addr_idx
507
530
  # TODO: We don't yet support gt nodes acting as the head of a switch
508
- if last_varhash is not None and last_varhash == variable_hash:
531
+ if head_varhash == variable_hash:
509
532
  successors = [succ for succ in self._graph.successors(comp) if succ is not comp]
510
533
  succ_addrs = {(succ.addr, succ.idx) for succ in successors}
511
534
  if succ_addrs != {(gt_addr, gt_idx), (le_addr, le_idx)}:
@@ -526,21 +549,34 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
526
549
  le_added = True
527
550
  if gt_added or le_added:
528
551
  if not le_added:
529
- if le_addr not in default_case_candidates:
552
+ # if min_ + 1 == value, it means we actually have another case! it's not a default case
553
+ if min_ + 1 == value:
554
+ cases.append(
555
+ Case(comp, comp_type, variable_hash, expr, min_ + 1, le_addr, le_idx, None)
556
+ )
557
+ used_nodes.add(comp)
558
+ elif le_addr not in default_case_candidates:
530
559
  default_case_candidates[le_addr] = Case(
531
560
  comp, None, variable_hash, expr, "default", le_addr, le_idx, None
532
561
  )
533
- elif not gt_added and gt_addr not in default_case_candidates:
534
- default_case_candidates[gt_addr] = Case(
535
- comp, None, variable_hash, expr, "default", gt_addr, gt_idx, None
536
- )
562
+ if not gt_added:
563
+ # likewise, this means we have another non-default case
564
+ if value == max_:
565
+ cases.append(
566
+ Case(comp, comp_type, variable_hash, expr, max_, gt_addr, gt_idx, None)
567
+ )
568
+ used_nodes.add(comp)
569
+ elif gt_addr not in default_case_candidates:
570
+ default_case_candidates[gt_addr] = Case(
571
+ comp, None, variable_hash, expr, "default", gt_addr, gt_idx, None
572
+ )
537
573
  extra_cmp_nodes.append(comp)
538
574
  used_nodes.add(comp)
539
575
  else:
540
576
  break
541
577
  else:
542
578
  # checking on a new variable... it probably was not a switch-case
543
- break
579
+ continue
544
580
 
545
581
  if cases and len(default_case_candidates) <= 1:
546
582
  if default_case_candidates:
@@ -606,6 +642,27 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
606
642
 
607
643
  return varhash_to_caselists
608
644
 
645
+ def _node_reachable_from_node_in_region(self, to_node, from_node) -> bool:
646
+ # find the region that contains the to_node
647
+ to_node_region = None
648
+ from_node_region = None
649
+ for region in self._ri.regions_by_block_addrs:
650
+ if (to_node.addr, to_node.idx) in region:
651
+ to_node_region = region
652
+ if (from_node.addr, from_node.idx) in region:
653
+ from_node_region = region
654
+
655
+ if to_node_region is None or from_node_region is None:
656
+ return False
657
+ if to_node_region != from_node_region:
658
+ return False
659
+
660
+ # get a subgraph
661
+ all_nodes = [self._get_block(a, idx=idx) for a, idx in to_node_region]
662
+ subgraph = self._graph.subgraph(all_nodes)
663
+
664
+ return networkx.has_path(subgraph, from_node, to_node)
665
+
609
666
  @staticmethod
610
667
  def _find_switch_variable_comparison_type_a(
611
668
  node,
@@ -625,7 +682,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
625
682
  )
626
683
  ):
627
684
  cond = stmt.condition
628
- if isinstance(cond, BinaryOp) and isinstance(cond.operands[1], Const):
685
+ if (
686
+ isinstance(cond, BinaryOp)
687
+ and isinstance(cond.operands[0], VirtualVariable)
688
+ and isinstance(cond.operands[1], Const)
689
+ ):
629
690
  variable_hash = StableVarExprHasher(cond.operands[0]).hash
630
691
  value = cond.operands[1].value
631
692
  if cond.op == "CmpEQ":
@@ -672,7 +733,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
672
733
  )
673
734
  ):
674
735
  cond = stmt.condition
675
- if isinstance(cond, BinaryOp) and isinstance(cond.operands[1], Const):
736
+ if (
737
+ isinstance(cond, BinaryOp)
738
+ and isinstance(cond.operands[0], VirtualVariable)
739
+ and isinstance(cond.operands[1], Const)
740
+ ):
676
741
  variable_hash = StableVarExprHasher(cond.operands[0]).hash
677
742
  value = cond.operands[1].value
678
743
  if cond.op == "CmpEQ":
@@ -719,7 +784,11 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
719
784
  )
720
785
  ):
721
786
  cond = stmt.condition
722
- if isinstance(cond, BinaryOp) and isinstance(cond.operands[1], Const):
787
+ if (
788
+ isinstance(cond, BinaryOp)
789
+ and isinstance(cond.operands[0], VirtualVariable)
790
+ and isinstance(cond.operands[1], Const)
791
+ ):
723
792
  variable_hash = StableVarExprHasher(cond.operands[0]).hash
724
793
  value = cond.operands[1].value
725
794
  op = cond.op
@@ -1,8 +1,9 @@
1
1
  # pylint:disable=unused-argument
2
2
  from __future__ import annotations
3
3
  import logging
4
- from typing import Any, TYPE_CHECKING
4
+ from collections import namedtuple
5
5
  from collections.abc import Generator
6
+ from typing import Any, TYPE_CHECKING
6
7
  from enum import Enum
7
8
 
8
9
  import networkx
@@ -10,6 +11,7 @@ import networkx
10
11
  import ailment
11
12
 
12
13
  from angr.analyses.decompiler import RegionIdentifier
14
+ from angr.analyses.decompiler.ailgraph_walker import AILGraphWalker
13
15
  from angr.analyses.decompiler.condition_processor import ConditionProcessor
14
16
  from angr.analyses.decompiler.goto_manager import Goto, GotoManager
15
17
  from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
@@ -19,11 +21,15 @@ from angr.project import Project
19
21
 
20
22
  if TYPE_CHECKING:
21
23
  from angr.knowledge_plugins.functions import Function
24
+ from angr.analyses.decompiler.stack_item import StackItem
22
25
 
23
26
 
24
27
  _l = logging.getLogger(__name__)
25
28
 
26
29
 
30
+ BlockCache = namedtuple("BlockCache", ("rd", "prop"))
31
+
32
+
27
33
  class MultipleBlocksException(Exception):
28
34
  """
29
35
  An exception that is raised in _get_block() where multiple blocks satisfy the criteria but only one block was
@@ -130,6 +136,7 @@ class OptimizationPass(BaseOptimizationPass):
130
136
  complete_successors: bool = False,
131
137
  avoid_vvar_ids: set[int] | None = None,
132
138
  arg_vvars: set[int] | None = None,
139
+ peephole_optimizations=None,
133
140
  **kwargs,
134
141
  ):
135
142
  super().__init__(func)
@@ -150,9 +157,11 @@ class OptimizationPass(BaseOptimizationPass):
150
157
  self._force_loop_single_exit = force_loop_single_exit
151
158
  self._complete_successors = complete_successors
152
159
  self._avoid_vvar_ids = avoid_vvar_ids or set()
160
+ self._peephole_optimizations = peephole_optimizations
153
161
 
154
162
  # output
155
163
  self.out_graph: networkx.DiGraph | None = None
164
+ self.stack_items: dict[int, StackItem] = {}
156
165
 
157
166
  @property
158
167
  def blocks_by_addr(self) -> dict[int, set[ailment.Block]]:
@@ -267,9 +276,77 @@ class OptimizationPass(BaseOptimizationPass):
267
276
  def _is_sub(expr):
268
277
  return isinstance(expr, ailment.Expr.BinaryOp) and expr.op == "Sub"
269
278
 
279
+ def _simplify_blocks(
280
+ self,
281
+ ail_graph: networkx.DiGraph,
282
+ cache: dict | None = None,
283
+ ):
284
+ """
285
+ Simplify all blocks in self._blocks.
286
+
287
+ :param ail_graph: The AIL function graph.
288
+ :param cache: A block-level cache that stores reaching definition analysis results and
289
+ propagation results.
290
+ :return: None
291
+ """
292
+
293
+ blocks_by_addr_and_idx: dict[tuple[int, int | None], ailment.Block] = {}
294
+
295
+ for ail_block in ail_graph.nodes():
296
+ simplified = self._simplify_block(
297
+ ail_block,
298
+ cache=cache,
299
+ )
300
+ key = ail_block.addr, ail_block.idx
301
+ blocks_by_addr_and_idx[key] = simplified
302
+
303
+ # update blocks_map to allow node_addr to node lookup
304
+ def _replace_node_handler(node):
305
+ key = node.addr, node.idx
306
+ if key in blocks_by_addr_and_idx:
307
+ return blocks_by_addr_and_idx[key]
308
+ return None
309
+
310
+ AILGraphWalker(ail_graph, _replace_node_handler, replace_nodes=True).walk()
311
+
312
+ return ail_graph
313
+
314
+ def _simplify_block(self, ail_block, cache=None):
315
+ """
316
+ Simplify a single AIL block.
317
+
318
+ :param ailment.Block ail_block: The AIL block to simplify.
319
+ :return: A simplified AIL block.
320
+ """
321
+
322
+ cached_rd, cached_prop = None, None
323
+ cache_item = None
324
+ cache_key = ail_block.addr, ail_block.idx
325
+ if cache:
326
+ cache_item = cache.get(cache_key, None)
327
+ if cache_item:
328
+ # cache hit
329
+ cached_rd = cache_item.rd
330
+ cached_prop = cache_item.prop
331
+
332
+ simp = self.project.analyses.AILBlockSimplifier(
333
+ ail_block,
334
+ self._func.addr,
335
+ peephole_optimizations=self._peephole_optimizations,
336
+ cached_reaching_definitions=cached_rd,
337
+ cached_propagator=cached_prop,
338
+ )
339
+ # update the cache
340
+ if cache is not None:
341
+ if cache_item:
342
+ del cache[cache_key]
343
+ cache[cache_key] = BlockCache(simp._reaching_definitions, simp._propagator)
344
+ return simp.result_block
345
+
270
346
  def _simplify_graph(self, graph):
271
347
  MAX_SIMP_ITERATION = 8
272
348
  for _ in range(MAX_SIMP_ITERATION):
349
+ self._simplify_blocks(graph)
273
350
  simp = self.project.analyses.AILSimplifier(
274
351
  self._func,
275
352
  func_graph=graph,