angr 9.2.140__py3-none-win_amd64.whl → 9.2.142__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (76) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +105 -35
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/cfg_base.py +38 -4
  6. angr/analyses/cfg/cfg_fast.py +23 -7
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +13 -8
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +1 -1
  10. angr/analyses/decompiler/ail_simplifier.py +105 -62
  11. angr/analyses/decompiler/callsite_maker.py +24 -11
  12. angr/analyses/decompiler/clinic.py +83 -5
  13. angr/analyses/decompiler/condition_processor.py +7 -7
  14. angr/analyses/decompiler/decompilation_cache.py +2 -1
  15. angr/analyses/decompiler/decompiler.py +11 -2
  16. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
  19. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  20. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  21. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +85 -16
  22. angr/analyses/decompiler/optimization_passes/optimization_pass.py +78 -1
  23. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  24. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
  25. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  27. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  28. angr/analyses/decompiler/region_identifier.py +76 -51
  29. angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
  30. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
  31. angr/analyses/decompiler/ssailification/rewriting.py +70 -32
  32. angr/analyses/decompiler/ssailification/rewriting_engine.py +118 -24
  33. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  34. angr/analyses/decompiler/stack_item.py +36 -0
  35. angr/analyses/decompiler/structured_codegen/c.py +86 -145
  36. angr/analyses/decompiler/structuring/dream.py +1 -1
  37. angr/analyses/decompiler/structuring/phoenix.py +9 -4
  38. angr/analyses/decompiler/structuring/structurer_base.py +2 -1
  39. angr/analyses/decompiler/utils.py +46 -20
  40. angr/analyses/find_objects_static.py +2 -1
  41. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  42. angr/analyses/reaching_definitions/function_handler.py +24 -10
  43. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  44. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  45. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  46. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  47. angr/analyses/reaching_definitions/rd_state.py +11 -7
  48. angr/analyses/s_liveness.py +44 -6
  49. angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  51. angr/analyses/typehoon/simple_solver.py +35 -8
  52. angr/analyses/typehoon/typehoon.py +3 -1
  53. angr/analyses/variable_recovery/engine_ail.py +1 -1
  54. angr/analyses/variable_recovery/engine_vex.py +20 -4
  55. angr/calling_conventions.py +17 -12
  56. angr/factory.py +8 -3
  57. angr/knowledge_plugins/functions/function.py +5 -10
  58. angr/knowledge_plugins/variables/variable_manager.py +34 -5
  59. angr/lib/angr_native.dll +0 -0
  60. angr/procedures/definitions/__init__.py +3 -10
  61. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  62. angr/procedures/win32_kernel/__fastfail.py +15 -0
  63. angr/sim_procedure.py +2 -2
  64. angr/simos/simos.py +17 -11
  65. angr/simos/windows.py +42 -1
  66. angr/utils/ail.py +41 -1
  67. angr/utils/cpp.py +17 -0
  68. angr/utils/doms.py +142 -0
  69. angr/utils/library.py +1 -1
  70. angr/utils/types.py +59 -0
  71. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
  72. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/RECORD +76 -71
  73. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
  74. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
  75. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
  76. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ # pylint:disable=too-many-boolean-expressions
1
2
  from __future__ import annotations
2
3
  from collections.abc import Iterable
3
4
  import logging
@@ -7,6 +8,7 @@ import ailment
7
8
 
8
9
  from angr.calling_conventions import SimRegArg
9
10
  from angr.code_location import CodeLocation
11
+ from angr.analyses.decompiler.stack_item import StackItem, StackItemType
10
12
  from .optimization_pass import OptimizationPass, OptimizationPassStage
11
13
 
12
14
 
@@ -82,6 +84,14 @@ class RegisterSaveAreaSimplifier(OptimizationPass):
82
84
  # update it
83
85
  self._update_block(old_block, new_block)
84
86
 
87
+ if updated_blocks:
88
+ # update stack_items
89
+ for data in info.values():
90
+ for stack_offset, _ in data["stored"]:
91
+ self.stack_items[stack_offset] = StackItem(
92
+ stack_offset, self.project.arch.bytes, "regs", StackItemType.SAVED_REGS
93
+ )
94
+
85
95
  def _find_registers_stored_on_stack(self) -> list[tuple[int, int, CodeLocation]]:
86
96
  first_block = self._get_block(self._func.addr)
87
97
  if first_block is None:
@@ -94,14 +104,26 @@ class RegisterSaveAreaSimplifier(OptimizationPass):
94
104
  isinstance(stmt, ailment.Stmt.Store)
95
105
  and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
96
106
  and isinstance(stmt.addr.offset, int)
97
- and isinstance(stmt.data, ailment.Expr.VirtualVariable)
98
- and stmt.data.was_reg
99
107
  ):
100
- # it's storing registers to the stack!
101
- stack_offset = stmt.addr.offset
102
- reg_offset = stmt.data.reg_offset
103
- codeloc = CodeLocation(first_block.addr, idx, block_idx=first_block.idx, ins_addr=stmt.ins_addr)
104
- results.append((reg_offset, stack_offset, codeloc))
108
+ if isinstance(stmt.data, ailment.Expr.VirtualVariable) and stmt.data.was_reg:
109
+ # it's storing registers to the stack!
110
+ stack_offset = stmt.addr.offset
111
+ reg_offset = stmt.data.reg_offset
112
+ codeloc = CodeLocation(first_block.addr, idx, block_idx=first_block.idx, ins_addr=stmt.ins_addr)
113
+ results.append((reg_offset, stack_offset, codeloc))
114
+ elif (
115
+ self.project.arch.name == "AMD64"
116
+ and isinstance(stmt.data, ailment.Expr.Convert)
117
+ and isinstance(stmt.data.operand, ailment.Expr.VirtualVariable)
118
+ and stmt.data.operand.was_reg
119
+ and stmt.data.from_bits == 256
120
+ and stmt.data.to_bits == 128
121
+ ):
122
+ # storing xmm registers to the stack
123
+ stack_offset = stmt.addr.offset
124
+ reg_offset = stmt.data.operand.reg_offset
125
+ codeloc = CodeLocation(first_block.addr, idx, block_idx=first_block.idx, ins_addr=stmt.ins_addr)
126
+ results.append((reg_offset, stack_offset, codeloc))
105
127
 
106
128
  return results
107
129
 
@@ -34,13 +34,23 @@ class FreshVirtualVariableRewriter(AILBlockWalker):
34
34
  def _handle_Assignment(self, stmt_idx: int, stmt: Assignment, block: Block | None):
35
35
  new_stmt = super()._handle_Assignment(stmt_idx, stmt, block)
36
36
  dst = new_stmt.dst if new_stmt is not None else stmt.dst
37
+ src = new_stmt.src if new_stmt is not None else stmt.src
37
38
  if isinstance(dst, VirtualVariable):
38
39
  self.vvar_mapping[dst.varid] = self.vvar_idx
39
40
  self.vvar_idx += 1
40
41
 
41
- dst = VirtualVariable(dst.idx, self.vvar_mapping[dst.varid], dst.bits, dst.category, dst.oident, **dst.tags)
42
+ dst = VirtualVariable(
43
+ dst.idx,
44
+ self.vvar_mapping[dst.varid],
45
+ dst.bits,
46
+ dst.category,
47
+ dst.oident,
48
+ variable=dst.variable,
49
+ variable_offset=dst.variable_offset,
50
+ **dst.tags,
51
+ )
42
52
 
43
- return Assignment(stmt.idx, dst, stmt.src, **stmt.tags)
53
+ return Assignment(stmt.idx, dst, src, **stmt.tags)
44
54
 
45
55
  return new_stmt
46
56
 
@@ -133,14 +143,27 @@ class ReturnDuplicatorBase:
133
143
  self._supergraph = to_ail_supergraph(graph)
134
144
  for region_head, (in_edges, region) in endnode_regions.items():
135
145
  is_single_const_ret_region = self._is_simple_return_graph(region)
146
+ dup_pred_nodes = []
147
+ # duplicate the entire region if at least (N-2) in-edges for the region head is deemed should be duplicated.
148
+ # otherwise we only duplicate the edges that should be duplicated
136
149
  for in_edge in in_edges:
137
150
  pred_node = in_edge[0]
138
151
  if self._should_duplicate_dst(
139
152
  pred_node, region_head, graph, dst_is_const_ret=is_single_const_ret_region
140
153
  ):
154
+ dup_pred_nodes.append(pred_node)
155
+
156
+ dup_count = len(dup_pred_nodes)
157
+ dup_all = dup_count >= len(in_edges) - 2 > 0
158
+ if dup_all:
159
+ for pred_node in sorted((in_edge[0] for in_edge in in_edges), key=lambda x: x.addr):
141
160
  # every eligible pred gets a new region copy
142
161
  self._copy_region([pred_node], region_head, region, graph)
143
162
  graph_changed = True
163
+ else:
164
+ for pred_node in dup_pred_nodes:
165
+ self._copy_region([pred_node], region_head, region, graph)
166
+ graph_changed = True
144
167
 
145
168
  if region_head in graph and graph.in_degree(region_head) == 0:
146
169
  graph.remove_nodes_from(region)
@@ -199,10 +222,10 @@ class ReturnDuplicatorBase:
199
222
 
200
223
  return end_node_regions
201
224
 
202
- def _copy_region(self, pred_nodes, region_head, region, graph):
225
+ def _copy_region(self, pred_nodes: list[Block], region_head, region, graph):
203
226
  # copy the entire return region
204
227
  copies: dict[Block, Block] = {}
205
- queue = [(pred_node, region_head) for pred_node in pred_nodes]
228
+ queue: list[tuple[Block, Block]] = [(pred_node, region_head) for pred_node in pred_nodes]
206
229
  vvar_mapping: dict[int, int] = {}
207
230
  while queue:
208
231
  pred, node = queue.pop(0)
@@ -224,12 +247,33 @@ class ReturnDuplicatorBase:
224
247
  last_stmt = ConditionProcessor.get_last_statement(pred)
225
248
  if isinstance(last_stmt, Jump):
226
249
  if isinstance(last_stmt.target, Const) and last_stmt.target.value == node_copy.addr:
227
- last_stmt.target_idx = node_copy.idx
250
+ updated_last_stmt = Jump(
251
+ last_stmt.idx, last_stmt.target, target_idx=node_copy.idx, **last_stmt.tags
252
+ )
253
+ pred.statements[-1] = updated_last_stmt
228
254
  elif isinstance(last_stmt, ConditionalJump):
229
255
  if isinstance(last_stmt.true_target, Const) and last_stmt.true_target.value == node_copy.addr:
230
- last_stmt.true_target_idx = node_copy.idx
256
+ updated_last_stmt = ConditionalJump(
257
+ last_stmt.idx,
258
+ last_stmt.condition,
259
+ last_stmt.true_target,
260
+ last_stmt.false_target,
261
+ true_target_idx=node_copy.idx,
262
+ false_target_idx=last_stmt.false_target_idx,
263
+ **last_stmt.tags,
264
+ )
265
+ pred.statements[-1] = updated_last_stmt
231
266
  elif isinstance(last_stmt.false_target, Const) and last_stmt.false_target.value == node_copy.addr:
232
- last_stmt.false_target_idx = node_copy.idx
267
+ updated_last_stmt = ConditionalJump(
268
+ last_stmt.idx,
269
+ last_stmt.condition,
270
+ last_stmt.true_target,
271
+ last_stmt.false_target,
272
+ true_target_idx=last_stmt.true_target_idx,
273
+ false_target_idx=node_copy.idx,
274
+ **last_stmt.tags,
275
+ )
276
+ pred.statements[-1] = updated_last_stmt
233
277
  except EmptyBlockNotice:
234
278
  pass
235
279
 
@@ -6,6 +6,7 @@ import logging
6
6
  import ailment
7
7
 
8
8
  from angr.utils.bits import s2u
9
+ from angr.analyses.decompiler.stack_item import StackItem, StackItemType
9
10
  from .optimization_pass import OptimizationPass, OptimizationPassStage
10
11
 
11
12
 
@@ -168,6 +169,11 @@ class StackCanarySimplifier(OptimizationPass):
168
169
  first_block_copy.statements.pop(stmt_idx)
169
170
  self._update_block(first_block, first_block_copy)
170
171
 
172
+ # update stack_items
173
+ self.stack_items[store_offset] = StackItem(
174
+ store_offset, canary_init_stmt.dst.size, "canary", StackItemType.STACK_CANARY
175
+ )
176
+
171
177
  # Done!
172
178
 
173
179
  def _find_canary_init_stmt(self):
@@ -7,6 +7,7 @@ import ailment
7
7
  import cle
8
8
 
9
9
  from angr.utils.funcid import is_function_security_check_cookie
10
+ from angr.analyses.decompiler.stack_item import StackItem, StackItemType
10
11
  from .optimization_pass import OptimizationPass, OptimizationPassStage
11
12
 
12
13
 
@@ -62,7 +63,9 @@ class WinStackCanarySimplifier(OptimizationPass):
62
63
  first_block, canary_init_stmt_ids = init_stmts
63
64
  canary_init_stmt = first_block.statements[canary_init_stmt_ids[-1]]
64
65
  # where is the stack canary stored?
65
- if not isinstance(canary_init_stmt.addr, ailment.Expr.StackBaseOffset):
66
+ if not isinstance(canary_init_stmt, ailment.Stmt.Store) or not isinstance(
67
+ canary_init_stmt.addr, ailment.Expr.StackBaseOffset
68
+ ):
66
69
  _l.debug(
67
70
  "Unsupported canary storing location %s. Expects an ailment.Expr.StackBaseOffset.",
68
71
  canary_init_stmt.addr,
@@ -143,6 +146,11 @@ class WinStackCanarySimplifier(OptimizationPass):
143
146
  first_block_copy.statements.pop(stmt_idx)
144
147
  self._update_block(first_block, first_block_copy)
145
148
 
149
+ # update stack_items
150
+ self.stack_items[store_offset] = StackItem(
151
+ store_offset, canary_init_stmt.size, "canary", StackItemType.STACK_CANARY
152
+ )
153
+
146
154
  def _find_canary_init_stmt(self):
147
155
  first_block = self._get_block(self._func.addr)
148
156
  if first_block is None:
@@ -29,7 +29,12 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
29
29
  @staticmethod
30
30
  def _optimize_binaryop(expr: BinaryOp):
31
31
  if expr.op == "Add":
32
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
32
+ if (
33
+ isinstance(expr.operands[0], Const)
34
+ and isinstance(expr.operands[0].value, int)
35
+ and isinstance(expr.operands[1], Const)
36
+ and isinstance(expr.operands[1].value, int)
37
+ ):
33
38
  mask = (1 << expr.bits) - 1
34
39
  return Const(
35
40
  expr.idx, None, (expr.operands[0].value + expr.operands[1].value) & mask, expr.bits, **expr.tags
@@ -99,13 +104,19 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
99
104
  new_const = Const(const1.idx, None, const1.value + 1, const1.bits, **const1.tags)
100
105
  return BinaryOp(expr.idx, "Mul", [x1, new_const], expr.signed, **expr.tags)
101
106
  elif op0_is_mulconst and op1_is_mulconst:
107
+ assert x0 is not None and x1 is not None and const0 is not None and const1 is not None
102
108
  if x0.likes(x1):
103
109
  # x * A + x * B => (A + B) * x
104
110
  new_const = Const(const0.idx, None, const0.value + const1.value, const0.bits, **const0.tags)
105
111
  return BinaryOp(expr.idx, "Mul", [x0, new_const], expr.signed, **expr.tags)
106
112
 
107
113
  elif expr.op == "Sub":
108
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
114
+ if (
115
+ isinstance(expr.operands[0], Const)
116
+ and isinstance(expr.operands[0].value, int)
117
+ and isinstance(expr.operands[1], Const)
118
+ and isinstance(expr.operands[1].value, int)
119
+ ):
109
120
  mask = (1 << expr.bits) - 1
110
121
  return Const(
111
122
  expr.idx, None, (expr.operands[0].value - expr.operands[1].value) & mask, expr.bits, **expr.tags
@@ -138,12 +149,19 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
138
149
  return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
139
150
 
140
151
  if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
152
+ assert isinstance(expr.operands[0].offset, int) and isinstance(expr.operands[1].offset, int)
141
153
  return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
142
154
 
143
155
  elif expr.op == "And":
144
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
145
- return Const(expr.idx, None, (expr.operands[0].value & expr.operands[1].value), expr.bits, **expr.tags)
146
- if isinstance(expr.operands[1], Const) and expr.operands[1].value == 0:
156
+ op0, op1 = expr.operands
157
+ if (
158
+ isinstance(op0, Const)
159
+ and isinstance(op0.value, int)
160
+ and isinstance(op1, Const)
161
+ and isinstance(op1.value, int)
162
+ ):
163
+ return Const(expr.idx, None, (op0.value & op1.value), expr.bits, **expr.tags)
164
+ if isinstance(op1, Const) and op1.value == 0:
147
165
  return Const(expr.idx, None, 0, expr.bits, **expr.tags)
148
166
 
149
167
  elif expr.op == "Mul":
@@ -156,6 +174,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
156
174
  and isinstance(expr.operands[1], Const)
157
175
  and expr.operands[1].is_int
158
176
  ):
177
+ assert isinstance(expr.operands[0].value, int) and isinstance(expr.operands[1].value, int)
159
178
  # constant multiplication
160
179
  mask = (1 << expr.bits) - 1
161
180
  return Const(
@@ -235,7 +254,13 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
235
254
  return Const(expr0.idx, None, (const_a << expr1.value) & mask, expr0.bits, **expr0.tags)
236
255
 
237
256
  elif expr.op == "Or":
238
- if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
257
+ op0, op1 = expr.operands
258
+ if (
259
+ isinstance(op0, Const)
260
+ and isinstance(op0.value, int)
261
+ and isinstance(op1, Const)
262
+ and isinstance(op1.value, int)
263
+ ):
239
264
  return Const(expr.idx, None, expr.operands[0].value | expr.operands[1].value, expr.bits, **expr.tags)
240
265
  if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
241
266
  return expr.operands[1]
@@ -248,6 +273,16 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
248
273
  if expr.operands[0].likes(expr.operands[1]):
249
274
  return expr.operands[0]
250
275
 
276
+ elif expr.op == "Xor":
277
+ op0, op1 = expr.operands
278
+ if (
279
+ isinstance(op0, Const)
280
+ and isinstance(op0.value, int)
281
+ and isinstance(op1, Const)
282
+ and isinstance(op1.value, int)
283
+ ):
284
+ return Const(expr.idx, None, expr.operands[0].value ^ expr.operands[1].value, expr.bits, **expr.tags)
285
+
251
286
  elif expr.op in {"CmpEQ", "CmpLE", "CmpGE"}:
252
287
  if expr.operands[0].likes(expr.operands[1]):
253
288
  # x == x => 1
@@ -288,7 +323,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
288
323
 
289
324
  @staticmethod
290
325
  def _optimize_unaryop(expr: UnaryOp):
291
- if expr.op == "Neg" and isinstance(expr.operand, Const):
326
+ if expr.op == "Neg" and isinstance(expr.operand, Const) and isinstance(expr.operand.value, int):
292
327
  const_a = expr.operand.value
293
328
  mask = (2**expr.bits) - 1
294
329
  return Const(expr.idx, None, (~const_a) & mask, expr.bits, **expr.tags)
@@ -304,6 +339,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
304
339
  and expr.to_type == Convert.TYPE_INT
305
340
  and expr.from_bits > expr.to_bits
306
341
  ):
342
+ assert isinstance(expr.operand.value, int)
307
343
  # truncation
308
344
  mask = (1 << expr.to_bits) - 1
309
345
  v = expr.operand.value & mask
@@ -315,6 +351,7 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
315
351
  and expr.to_type == Convert.TYPE_INT
316
352
  and expr.from_bits <= expr.to_bits
317
353
  ):
354
+ assert isinstance(expr.operand.value, int)
318
355
  if expr.is_signed is False:
319
356
  # unsigned extension
320
357
  return Const(expr.idx, expr.operand.variable, expr.operand.value, expr.to_bits, **expr.operand.tags)
@@ -11,7 +11,8 @@ from ailment.statement import ConditionalJump, Jump
11
11
  from ailment.expression import Const
12
12
 
13
13
  from angr.utils.graph import GraphUtils
14
- from angr.utils.graph import dfs_back_edges, subgraph_between_nodes, dominates, shallow_reverse
14
+ from angr.utils.graph import dfs_back_edges, subgraph_between_nodes, dominates
15
+ from angr.utils.doms import IncrementalDominators
15
16
  from angr.errors import AngrRuntimeError
16
17
  from angr.analyses import Analysis, register_analysis
17
18
  from .structuring.structurer_nodes import MultiNode, ConditionNode, IncompleteSwitchCaseHeadStatement
@@ -106,7 +107,7 @@ class RegionIdentifier(Analysis):
106
107
  # make regions into block address lists
107
108
  self.regions_by_block_addrs = self._make_regions_by_block_addrs()
108
109
 
109
- def _make_regions_by_block_addrs(self) -> list[list[int]]:
110
+ def _make_regions_by_block_addrs(self) -> list[list[tuple[int, int | None]]]:
110
111
  """
111
112
  Creates a list of addr lists representing each region without recursion. A single region is defined
112
113
  as a set of only blocks, no Graphs containing nested regions. The list contains the address of each
@@ -115,22 +116,24 @@ class RegionIdentifier(Analysis):
115
116
  @return: List of addr lists
116
117
  """
117
118
 
118
- work_list = [self.region]
119
+ work_list: list[GraphRegion] = [self.region] # type: ignore
119
120
  block_only_regions = []
120
121
  seen_regions = set()
121
122
  while work_list:
122
- children_regions = []
123
+ children_regions: list[GraphRegion] = []
123
124
  for region in work_list:
124
125
  children_blocks = []
125
126
  for node in region.graph.nodes:
126
127
  if isinstance(node, Block):
127
- children_blocks.append(node.addr)
128
+ children_blocks.append((node.addr, node.idx))
128
129
  elif isinstance(node, MultiNode):
129
- children_blocks += [n.addr for n in node.nodes]
130
+ children_blocks += [(n.addr, node.idx) for n in node.nodes]
130
131
  elif isinstance(node, GraphRegion):
131
132
  if node not in seen_regions:
132
133
  children_regions.append(node)
133
- children_blocks.append(node.head.addr)
134
+ children_blocks.append(
135
+ (node.head.addr, node.head.idx if hasattr(node.head, "idx") else None)
136
+ )
134
137
  seen_regions.add(node)
135
138
  else:
136
139
  continue
@@ -232,7 +235,7 @@ class RegionIdentifier(Analysis):
232
235
  break
233
236
 
234
237
  def _find_loop_headers(self, graph: networkx.DiGraph) -> list:
235
- heads = {t for _, t in dfs_back_edges(graph, self._start_node)}
238
+ heads = list({t for _, t in dfs_back_edges(graph, self._start_node)})
236
239
  return GraphUtils.quasi_topological_sort_nodes(graph, heads)
237
240
 
238
241
  def _find_initial_loop_nodes(self, graph: networkx.DiGraph, head):
@@ -390,7 +393,7 @@ class RegionIdentifier(Analysis):
390
393
 
391
394
  while True:
392
395
  for node in networkx.dfs_postorder_nodes(graph):
393
- preds = graph.predecessors(node)
396
+ preds = list(graph.predecessors(node))
394
397
  if len(preds) == 1:
395
398
  # merge the two nodes
396
399
  self._absorb_node(graph, preds[0], node)
@@ -471,7 +474,7 @@ class RegionIdentifier(Analysis):
471
474
  head = next(iter(n for n in subgraph.nodes() if n.addr == head.addr))
472
475
  region.head = head
473
476
 
474
- if len(graph.nodes()) == 1 and isinstance(next(iter(graph.nodes())), GraphRegion):
477
+ if len(graph) == 1 and isinstance(next(iter(graph.nodes())), GraphRegion):
475
478
  return next(iter(graph.nodes()))
476
479
  # create a large graph region
477
480
  new_head = self._get_start_node(graph)
@@ -489,6 +492,7 @@ class RegionIdentifier(Analysis):
489
492
  l.debug("Initial loop nodes %s", self._dbg_block_list(initial_loop_nodes))
490
493
 
491
494
  # Make sure no other loops are contained in the current loop
495
+ assert self._loop_headers is not None
492
496
  if {n for n in initial_loop_nodes if n.addr != head.addr}.intersection(self._loop_headers):
493
497
  return None
494
498
 
@@ -533,7 +537,7 @@ class RegionIdentifier(Analysis):
533
537
  region = self._abstract_cyclic_region(
534
538
  graph, refined_loop_nodes, head, normal_entries, abnormal_entries, normal_exit_node, abnormal_exit_nodes
535
539
  )
536
- if len(region.successors) > 1 and self._force_loop_single_exit:
540
+ if region.successors is not None and len(region.successors) > 1 and self._force_loop_single_exit:
537
541
  # multi-successor region. refinement is required
538
542
  self._refine_loop_successors_to_guarded_successors(region, graph)
539
543
 
@@ -703,23 +707,20 @@ class RegionIdentifier(Analysis):
703
707
  else:
704
708
  dummy_endnode = None
705
709
 
706
- # compute dominator tree
707
- doms = networkx.immediate_dominators(graph_copy, head)
708
-
709
- # compute post-dominator tree
710
- inverted_graph = shallow_reverse(graph_copy)
711
- postdoms = networkx.immediate_dominators(inverted_graph, endnodes[0])
712
-
713
- # dominance frontiers
714
- df = networkx.algorithms.dominance_frontiers(graph_copy, head)
710
+ # dominators and post-dominators, computed incrementally
711
+ doms = IncrementalDominators(graph_copy, head)
712
+ postdoms = IncrementalDominators(graph_copy, endnodes[0], post=True)
715
713
 
716
714
  # visit the nodes in post-order
717
- for node in networkx.dfs_postorder_nodes(graph_copy, source=head):
715
+ region_created = False
716
+ for node in list(networkx.dfs_postorder_nodes(graph_copy, source=head)):
718
717
  if node is dummy_endnode:
719
718
  # skip the dummy endnode
720
719
  continue
721
720
  if cyclic and node is head:
722
721
  continue
722
+ if node not in graph_copy:
723
+ continue
723
724
 
724
725
  out_degree = graph_copy.out_degree[node]
725
726
  if out_degree == 0:
@@ -738,10 +739,10 @@ class RegionIdentifier(Analysis):
738
739
 
739
740
  # test if this node is an entry to a single-entry, single-successor region
740
741
  levels = 0
741
- postdom_node = postdoms.get(node, None)
742
+ postdom_node = postdoms.idom(node)
742
743
  while postdom_node is not None:
743
744
  if (node, postdom_node) not in failed_region_attempts and self._check_region(
744
- graph_copy, node, postdom_node, doms, df
745
+ graph_copy, node, postdom_node, doms
745
746
  ):
746
747
  frontier = [postdom_node]
747
748
  region = self._compute_region(
@@ -750,6 +751,8 @@ class RegionIdentifier(Analysis):
750
751
  if region is not None:
751
752
  # update region.graph_with_successors
752
753
  if secondary_graph is not None:
754
+ assert region.graph_with_successors is not None
755
+ assert region.successors is not None
753
756
  if self._complete_successors:
754
757
  for nn in list(region.graph_with_successors.nodes):
755
758
  original_successors = secondary_graph.successors(nn)
@@ -780,52 +783,75 @@ class RegionIdentifier(Analysis):
780
783
  graph, region, frontier, dummy_endnode=dummy_endnode, secondary_graph=secondary_graph
781
784
  )
782
785
  # assert dummy_endnode not in graph
783
- return True
786
+ region_created = True
787
+ # we created a new region to replace one or more nodes in the graph.
788
+ replaced_nodes = set(region.graph)
789
+ # update graph_copy; doms and postdoms are updated as well because they hold references to
790
+ # graph_copy internally.
791
+ if graph_copy is not graph:
792
+ self._update_graph(graph_copy, region, replaced_nodes)
793
+ doms.graph_updated(region, replaced_nodes, region.head)
794
+ postdoms.graph_updated(region, replaced_nodes, region.head)
795
+ # break out of the inner loop
796
+ break
784
797
 
785
798
  failed_region_attempts.add((node, postdom_node))
786
- if not dominates(doms, node, postdom_node):
799
+ if not doms.dominates(node, postdom_node):
787
800
  break
788
- if postdom_node is postdoms.get(postdom_node, None):
801
+ if postdom_node is postdoms.idom(postdom_node):
789
802
  break
790
- postdom_node = postdoms.get(postdom_node, None)
803
+ postdom_node = postdoms.idom(postdom_node)
791
804
  levels += 1
792
805
  # l.debug("Walked back %d levels in postdom tree and did not find anything for %r. Next.", levels, node)
793
806
 
794
- return False
807
+ return region_created
795
808
 
796
809
  @staticmethod
797
- def _check_region(graph, start_node, end_node, doms, df):
798
- """
810
+ def _update_graph(graph: networkx.DiGraph, new_region, replaced_nodes: set) -> None:
811
+ region_in_edges = RegionIdentifier._region_in_edges(graph, new_region, data=True)
812
+ region_out_edges = RegionIdentifier._region_out_edges(graph, new_region, data=True)
813
+ for node in replaced_nodes:
814
+ graph.remove_node(node)
815
+ graph.add_node(new_region)
816
+ for src, _, data in region_in_edges:
817
+ graph.add_edge(src, new_region, **data)
818
+ for _, dst, data in region_out_edges:
819
+ graph.add_edge(new_region, dst, **data)
799
820
 
800
- :param graph:
801
- :param start_node:
802
- :param end_node:
803
- :param doms:
804
- :param df:
805
- :return:
821
+ @staticmethod
822
+ def _check_region(graph, start_node, end_node, doms) -> bool:
823
+ """
824
+ Determine the graph slice between start_node and end_node forms a good region.
806
825
  """
807
826
 
808
827
  # if the exit node is the header of a loop that contains the start node, the dominance frontier should only
809
828
  # contain the exit node.
810
- if not dominates(doms, start_node, end_node):
811
- frontier = df.get(start_node, set())
812
- for node in frontier:
829
+ start_node_frontier = None
830
+ end_node_frontier = None
831
+
832
+ if not doms.dominates(start_node, end_node):
833
+ start_node_frontier = doms.df(start_node)
834
+ for node in start_node_frontier:
813
835
  if node is not start_node and node is not end_node:
814
836
  return False
815
837
 
816
838
  # no edges should enter the region.
817
- for node in df.get(end_node, set()):
818
- if dominates(doms, start_node, node) and node is not end_node:
839
+ end_node_frontier = doms.df(end_node)
840
+ for node in end_node_frontier:
841
+ if doms.dominates(start_node, node) and node is not end_node:
819
842
  return False
820
843
 
844
+ if start_node_frontier is None:
845
+ start_node_frontier = doms.df(start_node)
846
+
821
847
  # no edges should leave the region.
822
- for node in df.get(start_node, set()):
848
+ for node in start_node_frontier:
823
849
  if node is start_node or node is end_node:
824
850
  continue
825
- if node not in df.get(end_node, set()):
851
+ if node not in end_node_frontier:
826
852
  return False
827
853
  for pred in graph.predecessors(node):
828
- if dominates(doms, start_node, pred) and not dominates(doms, end_node, pred):
854
+ if doms.dominates(start_node, pred) and not doms.dominates(end_node, pred):
829
855
  return False
830
856
 
831
857
  return True
@@ -976,14 +1002,13 @@ class RegionIdentifier(Analysis):
976
1002
  subgraph_with_exits.add_edge(src, dst)
977
1003
  region.graph = subgraph
978
1004
  region.graph_with_successors = subgraph_with_exits
979
- if normal_exit_node is not None:
980
- region.successors = [normal_exit_node]
981
- else:
982
- region.successors = []
983
- region.successors += list(abnormal_exit_nodes)
1005
+ succs = [normal_exit_node] if normal_exit_node is not None else []
1006
+ succs += list(abnormal_exit_nodes)
1007
+ succs = sorted(set(succs), key=lambda x: x.addr)
1008
+ region.successors = set(succs)
984
1009
 
985
- for succ_0 in region.successors:
986
- for succ_1 in region.successors:
1010
+ for succ_0 in succs:
1011
+ for succ_1 in succs:
987
1012
  if succ_0 is not succ_1 and graph.has_edge(succ_0, succ_1):
988
1013
  region.graph_with_successors.add_edge(succ_0, succ_1)
989
1014