angr 9.2.123__py3-none-manylinux2014_x86_64.whl → 9.2.125__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (103) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +9 -1
  3. angr/analyses/cfg/indirect_jump_resolvers/mips_elf_fast.py +11 -8
  4. angr/analyses/cfg/indirect_jump_resolvers/mips_elf_got.py +2 -2
  5. angr/analyses/codecave.py +77 -0
  6. angr/analyses/decompiler/ail_simplifier.py +16 -19
  7. angr/analyses/decompiler/callsite_maker.py +8 -7
  8. angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +24 -2
  9. angr/analyses/decompiler/clinic.py +58 -2
  10. angr/analyses/decompiler/condition_processor.py +10 -3
  11. angr/analyses/decompiler/decompilation_cache.py +2 -0
  12. angr/analyses/decompiler/decompiler.py +54 -8
  13. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +10 -2
  14. angr/analyses/decompiler/dephication/rewriting_engine.py +64 -1
  15. angr/analyses/decompiler/expression_narrower.py +5 -1
  16. angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
  17. angr/analyses/decompiler/optimization_passes/div_simplifier.py +4 -1
  18. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +13 -0
  19. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +23 -4
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +3 -1
  21. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +8 -5
  22. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +10 -5
  23. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +18 -7
  24. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +6 -0
  25. angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +2 -0
  27. angr/analyses/decompiler/peephole_optimizations/const_mull_a_shift.py +2 -0
  28. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
  29. angr/analyses/decompiler/peephole_optimizations/remove_cascading_conversions.py +8 -2
  30. angr/analyses/decompiler/region_identifier.py +36 -0
  31. angr/analyses/decompiler/region_simplifiers/loop.py +2 -8
  32. angr/analyses/decompiler/region_simplifiers/switch_cluster_simplifier.py +9 -3
  33. angr/analyses/decompiler/ssailification/rewriting.py +5 -2
  34. angr/analyses/decompiler/ssailification/rewriting_engine.py +151 -25
  35. angr/analyses/decompiler/ssailification/rewriting_state.py +1 -0
  36. angr/analyses/decompiler/ssailification/ssailification.py +17 -9
  37. angr/analyses/decompiler/ssailification/traversal.py +3 -1
  38. angr/analyses/decompiler/ssailification/traversal_engine.py +35 -8
  39. angr/analyses/decompiler/ssailification/traversal_state.py +1 -0
  40. angr/analyses/decompiler/structured_codegen/c.py +42 -4
  41. angr/analyses/decompiler/structuring/phoenix.py +3 -0
  42. angr/analyses/patchfinder.py +137 -0
  43. angr/analyses/pathfinder.py +282 -0
  44. angr/analyses/propagator/engine_ail.py +10 -3
  45. angr/analyses/reaching_definitions/engine_ail.py +10 -15
  46. angr/analyses/s_propagator.py +16 -9
  47. angr/analyses/s_reaching_definitions/s_rda_view.py +127 -63
  48. angr/analyses/smc.py +159 -0
  49. angr/analyses/variable_recovery/engine_ail.py +14 -0
  50. angr/analyses/variable_recovery/engine_base.py +11 -0
  51. angr/angrdb/models.py +1 -2
  52. angr/engines/light/engine.py +12 -0
  53. angr/engines/vex/heavy/heavy.py +2 -0
  54. angr/exploration_techniques/spiller_db.py +1 -2
  55. angr/knowledge_plugins/__init__.py +2 -0
  56. angr/knowledge_plugins/decompilation.py +45 -0
  57. angr/knowledge_plugins/functions/function.py +4 -0
  58. angr/knowledge_plugins/functions/function_manager.py +18 -9
  59. angr/knowledge_plugins/functions/function_parser.py +1 -1
  60. angr/knowledge_plugins/functions/soot_function.py +1 -0
  61. angr/knowledge_plugins/key_definitions/atoms.py +8 -0
  62. angr/misc/ux.py +2 -2
  63. angr/procedures/definitions/parse_win32json.py +2 -1
  64. angr/project.py +17 -1
  65. angr/state_plugins/history.py +6 -4
  66. angr/storage/memory_mixins/actions_mixin.py +7 -7
  67. angr/storage/memory_mixins/address_concretization_mixin.py +5 -5
  68. angr/storage/memory_mixins/bvv_conversion_mixin.py +1 -1
  69. angr/storage/memory_mixins/clouseau_mixin.py +3 -3
  70. angr/storage/memory_mixins/conditional_store_mixin.py +3 -3
  71. angr/storage/memory_mixins/default_filler_mixin.py +3 -3
  72. angr/storage/memory_mixins/memory_mixin.py +45 -34
  73. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +15 -14
  74. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +27 -16
  75. angr/storage/memory_mixins/paged_memory/pages/cooperation.py +18 -9
  76. angr/storage/memory_mixins/paged_memory/pages/ispo_mixin.py +5 -5
  77. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +89 -55
  78. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +16 -25
  79. angr/storage/memory_mixins/paged_memory/pages/permissions_mixin.py +11 -9
  80. angr/storage/memory_mixins/paged_memory/pages/ultra_page.py +23 -7
  81. angr/storage/memory_mixins/paged_memory/privileged_mixin.py +1 -1
  82. angr/storage/memory_mixins/regioned_memory/region_meta_mixin.py +9 -7
  83. angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +9 -9
  84. angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +1 -0
  85. angr/storage/memory_mixins/simple_interface_mixin.py +2 -2
  86. angr/storage/memory_mixins/simplification_mixin.py +2 -2
  87. angr/storage/memory_mixins/size_resolution_mixin.py +1 -1
  88. angr/storage/memory_mixins/slotted_memory.py +3 -3
  89. angr/storage/memory_mixins/smart_find_mixin.py +1 -0
  90. angr/storage/memory_mixins/underconstrained_mixin.py +5 -5
  91. angr/storage/memory_mixins/unwrapper_mixin.py +4 -4
  92. angr/storage/memory_object.py +4 -3
  93. angr/utils/bits.py +4 -0
  94. angr/utils/constants.py +1 -1
  95. angr/utils/graph.py +15 -0
  96. angr/utils/tagged_interval_map.py +112 -0
  97. angr/vaults.py +2 -2
  98. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/METADATA +6 -6
  99. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/RECORD +103 -96
  100. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/WHEEL +1 -1
  101. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/LICENSE +0 -0
  102. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/entry_points.txt +0 -0
  103. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/top_level.txt +0 -0
@@ -11,9 +11,15 @@ class RemoveCascadingConversions(PeepholeOptimizationExprBase):
11
11
  expr_classes = (Convert,)
12
12
 
13
13
  def optimize(self, expr: Convert, **kwargs):
14
- if isinstance(expr.operand, Convert):
14
+ if (
15
+ expr.from_type == Convert.TYPE_INT
16
+ and expr.to_type == Convert.TYPE_INT
17
+ and isinstance(expr.operand, Convert)
18
+ and expr.operand.from_type == Convert.TYPE_INT
19
+ and expr.operand.to_type == Convert.TYPE_INT
20
+ ):
15
21
  inner = expr.operand
16
- if inner.from_bits == expr.to_bits:
22
+ if inner.from_bits == expr.to_bits and inner.from_type == expr.to_type:
17
23
  if inner.from_bits < inner.to_bits:
18
24
  # extension -> truncation
19
25
  return inner.operand
@@ -163,6 +163,22 @@ class RegionIdentifier(Analysis):
163
163
  raise AngrRuntimeError("Cannot find the start node from the graph!") from ex
164
164
  raise AngrRuntimeError("Cannot find the start node from the graph!")
165
165
 
166
+ def _get_entry_node(self, graph: networkx.DiGraph):
167
+ if self.entry_node_addr is None:
168
+ return None
169
+ return next(
170
+ (
171
+ n
172
+ for n in graph.nodes()
173
+ if (
174
+ (n.addr, n.idx) == self.entry_node_addr
175
+ if isinstance(n, Block)
176
+ else n.addr == self.entry_node_addr[0]
177
+ )
178
+ ),
179
+ None,
180
+ )
181
+
166
182
  def _test_reducibility(self):
167
183
  # make a copy of the graph
168
184
  graph = networkx.DiGraph(self._graph)
@@ -188,8 +204,19 @@ class RegionIdentifier(Analysis):
188
204
  return len(graph.nodes) == 1
189
205
 
190
206
  def _make_supergraph(self, graph: networkx.DiGraph):
207
+
208
+ entry_node = None
209
+ if self.entry_node_addr is not None:
210
+ entry_node = next(iter(nn for nn in graph if nn.addr == self.entry_node_addr[0]), None)
211
+
191
212
  while True:
192
213
  for src, dst, data in graph.edges(data=True):
214
+ if entry_node is not None and dst is entry_node:
215
+ # the entry node must be kept instead of merged with its predecessor (which can happen in real
216
+ # binaries! e.g., 444a401b900eb825f216e95111dcb6ef94b01a81fc7b88a48599867db8c50365, function
217
+ # 0x1802BEA28, block 0x1802BEA05 and 0x1802BEA28)
218
+ continue
219
+
193
220
  type_ = data.get("type", None)
194
221
  if type_ == "fake_return":
195
222
  if len(list(graph.successors(src))) == 1 and len(list(graph.predecessors(dst))) == 1:
@@ -452,6 +479,8 @@ class RegionIdentifier(Analysis):
452
479
  #
453
480
 
454
481
  def _make_cyclic_region(self, head, graph: networkx.DiGraph):
482
+ original_entry = self._get_entry_node(graph)
483
+
455
484
  l.debug("Found cyclic region at %#08x", head.addr)
456
485
  initial_loop_nodes = self._find_initial_loop_nodes(graph, head)
457
486
  l.debug("Initial loop nodes %s", self._dbg_block_list(initial_loop_nodes))
@@ -505,6 +534,13 @@ class RegionIdentifier(Analysis):
505
534
  # multi-successor region. refinement is required
506
535
  self._refine_loop_successors(region, graph)
507
536
 
537
+ # if the head node is in the graph and it's not the head of the graph, we will need to update the head node
538
+ # address.
539
+ if original_entry is not None and original_entry in region.graph and region.head is not original_entry:
540
+ self.entry_node_addr = (head.addr, None)
541
+ # FIXME: the identified region will probably be incorrect. we may need to add a jump block that jumps to
542
+ # original_entry.
543
+
508
544
  return region
509
545
 
510
546
  def _refine_loop_successors(self, region, graph: networkx.DiGraph):
@@ -16,6 +16,7 @@ from angr.analyses.decompiler.structuring.structurer_nodes import (
16
16
  CascadingConditionNode,
17
17
  )
18
18
  from angr.analyses.decompiler.utils import is_statement_terminating, has_nonlabel_nonphi_statements
19
+ from angr.utils.ail import is_phi_assignment
19
20
 
20
21
 
21
22
  class LoopSimplifier(SequenceWalker):
@@ -104,14 +105,7 @@ class LoopSimplifier(SequenceWalker):
104
105
  )
105
106
  and (
106
107
  all(has_nonlabel_nonphi_statements(block) for block in self.continue_preludes[node])
107
- and all(
108
- not self._control_transferring_statement(block.statements[-1])
109
- for block in self.continue_preludes[node]
110
- )
111
- and all(
112
- block.statements[-1] == self.continue_preludes[node][0].statements[-1]
113
- for block in self.continue_preludes[node]
114
- )
108
+ and all(not is_phi_assignment(block.statements[-1]) for block in self.continue_preludes[node])
115
109
  )
116
110
  ):
117
111
  node.sort = "for"
@@ -284,6 +284,10 @@ def simplify_switch_clusters(
284
284
 
285
285
  for variable in var2switches:
286
286
  switch_regions = var2switches[variable]
287
+ if len(switch_regions) <= 1:
288
+ # nothing to simplify or merge if there is only one switch region
289
+ continue
290
+
287
291
  cond_regions = list(var2condnodes[variable])
288
292
 
289
293
  if not cond_regions:
@@ -449,10 +453,10 @@ def simplify_switch_clusters(
449
453
  # build the SwitchCase node and replace old nodes in the parent node
450
454
  cases_dict = OrderedDict(cases)
451
455
  new_switchcase = SwitchCaseNode(
452
- switch_regions_default_nodes[0].node.switch_expr,
456
+ switch_regions[0].node.switch_expr,
453
457
  cases_dict,
454
458
  default_node,
455
- addr=switch_regions_default_nodes[0].node.addr,
459
+ addr=switch_regions[0].node.addr,
456
460
  )
457
461
 
458
462
  # what are we trying to replace?
@@ -525,13 +529,15 @@ def simplify_lowered_switches_core(
525
529
 
526
530
  if outermost_node is None:
527
531
  return False
532
+ if not isinstance(outermost_node, ConditionNode):
533
+ return False
528
534
  if isinstance(outermost_node.condition, UnaryOp) and outermost_node.condition.op == "Not":
529
535
  # attempt to flip any simple negated comparison for normalized operations
530
536
  outermost_node.condition = negate(outermost_node.condition.operand)
531
537
 
532
538
  caseno_to_node = {}
533
539
  default_node_candidates: list[tuple[BaseNode, BaseNode]] = [] # parent to default node candidate
534
- stack: list[(ConditionNode, int, int)] = [(outermost_node, 0, 0xFFFF_FFFF_FFFF_FFFF)]
540
+ stack: list[tuple[BaseNode, int, int]] = [(outermost_node, 0, 0xFFFF_FFFF_FFFF_FFFF)]
535
541
  while stack:
536
542
  node, min_, max_ = stack.pop(0)
537
543
  if node not in node_to_condnode:
@@ -9,7 +9,7 @@ import networkx
9
9
  import ailment
10
10
  from ailment import Block
11
11
  from ailment.expression import Expression, Phi, VirtualVariable, VirtualVariableCategory
12
- from ailment.statement import Assignment, Label
12
+ from ailment.statement import Statement, Assignment, Label
13
13
 
14
14
  from angr.code_location import CodeLocation
15
15
  from angr.analyses import ForwardAnalysis
@@ -38,6 +38,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
38
38
  udef_to_phiid: dict[tuple, set[int]],
39
39
  phiid_to_loc: dict[int, tuple[int, int | None]],
40
40
  stackvar_locs: dict[int, int],
41
+ rewrite_tmps: bool,
41
42
  ail_manager,
42
43
  vvar_id_start: int = 0,
43
44
  ):
@@ -52,6 +53,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
52
53
  self._udef_to_phiid = udef_to_phiid
53
54
  self._phiid_to_loc = phiid_to_loc
54
55
  self._stackvar_locs = stackvar_locs
56
+ self._rewrite_tmps = rewrite_tmps
55
57
  self._ail_manager = ail_manager
56
58
  self._engine_ail = SimEngineSSARewriting(
57
59
  self.project.arch,
@@ -61,6 +63,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
61
63
  udef_to_phiid=self._udef_to_phiid,
62
64
  phiid_to_loc=self._phiid_to_loc,
63
65
  stackvar_locs=self._stackvar_locs,
66
+ rewrite_tmps=self._rewrite_tmps,
64
67
  ail_manager=ail_manager,
65
68
  vvar_id_start=vvar_id_start,
66
69
  )
@@ -71,7 +74,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
71
74
 
72
75
  self._analyze()
73
76
 
74
- self.def_to_vvid: dict[Expression, int] = self._engine_ail.def_to_vvid
77
+ self.def_to_vvid: dict[tuple[int, int | None, int, Expression | Statement], int] = self._engine_ail.def_to_vvid
75
78
  self.out_graph = self._make_new_graph(ail_graph)
76
79
 
77
80
  @property
@@ -1,9 +1,8 @@
1
1
  # pylint:disable=no-self-use,unused-argument
2
2
  from __future__ import annotations
3
- from typing import Any
4
3
  import logging
5
4
 
6
- from ailment.statement import Statement, Assignment, Store, Call, Return, ConditionalJump
5
+ from ailment.statement import Statement, Assignment, Store, Call, Return, ConditionalJump, DirtyStatement
7
6
  from ailment.expression import (
8
7
  Expression,
9
8
  Register,
@@ -18,6 +17,8 @@ from ailment.expression import (
18
17
  StackBaseOffset,
19
18
  VEXCCallExpression,
20
19
  ITE,
20
+ Tmp,
21
+ DirtyExpression,
21
22
  )
22
23
 
23
24
  from angr.utils.ssa import get_reg_offset_base_and_size
@@ -51,6 +52,7 @@ class SimEngineSSARewriting(
51
52
  ail_manager=None,
52
53
  vvar_id_start: int = 0,
53
54
  bp_as_gpr: bool = False,
55
+ rewrite_tmps: bool = False,
54
56
  ):
55
57
  super().__init__()
56
58
 
@@ -58,10 +60,11 @@ class SimEngineSSARewriting(
58
60
  self.project = project
59
61
  self.sp_tracker = sp_tracker
60
62
  self.bp_as_gpr = bp_as_gpr
61
- self.def_to_vvid: dict[Any, int] = {}
63
+ self.def_to_vvid: dict[tuple[int, int | None, int, Expression | Statement], int] = {}
62
64
  self.stackvar_locs = stackvar_locs
63
65
  self.udef_to_phiid = udef_to_phiid
64
66
  self.phiid_to_loc = phiid_to_loc
67
+ self.rewrite_tmps = rewrite_tmps
65
68
  self.ail_manager = ail_manager
66
69
 
67
70
  self._current_vvar_id = vvar_id_start
@@ -97,9 +100,11 @@ class SimEngineSSARewriting(
97
100
  self.state.registers[stmt.dst.reg_offset][stmt.dst.size] = stmt.dst
98
101
  elif stmt.dst.category == VirtualVariableCategory.STACK:
99
102
  self.state.stackvars[stmt.dst.stack_offset][stmt.dst.size] = stmt.dst
103
+ elif stmt.dst.category == VirtualVariableCategory.TMP:
104
+ self.state.tmps[stmt.dst.tmp_idx] = stmt.dst
100
105
  new_dst = None
101
106
  else:
102
- new_dst = self._replace_def_expr(stmt.dst)
107
+ new_dst = self._replace_def_expr(self.block.addr, self.block.idx, self.stmt_idx, stmt.dst)
103
108
 
104
109
  stmt_base_reg = None
105
110
  if new_dst is not None:
@@ -124,7 +129,9 @@ class SimEngineSSARewriting(
124
129
  **stmt.dst.tags,
125
130
  )
126
131
  existing_base_reg_vvar = self._replace_use_reg(base_reg_expr)
127
- base_reg_vvar = self._replace_def_expr(base_reg_expr)
132
+ base_reg_vvar = self._replace_def_expr(
133
+ self.block.addr, self.block.idx, self.stmt_idx, base_reg_expr
134
+ )
128
135
  stmt_base_reg = Assignment(
129
136
  self.ail_manager.next_atom(),
130
137
  base_reg_vvar,
@@ -134,6 +141,8 @@ class SimEngineSSARewriting(
134
141
  **stmt.tags,
135
142
  )
136
143
  self.state.registers[base_offset][base_size] = base_reg_vvar
144
+ elif isinstance(stmt.dst, Tmp):
145
+ pass
137
146
  else:
138
147
  raise NotImplementedError
139
148
 
@@ -151,7 +160,7 @@ class SimEngineSSARewriting(
151
160
 
152
161
  def _handle_Store(self, stmt: Store) -> Store | Assignment | None:
153
162
  new_data = self._expr(stmt.data)
154
- vvar = self._replace_def_store(stmt)
163
+ vvar = self._replace_def_store(self.block.addr, self.block.idx, self.stmt_idx, stmt)
155
164
  if vvar is not None:
156
165
  return Assignment(stmt.idx, vvar, stmt.data if new_data is None else new_data, **stmt.tags)
157
166
 
@@ -189,9 +198,19 @@ class SimEngineSSARewriting(
189
198
  return None
190
199
 
191
200
  def _handle_Call(self, stmt: Call) -> Call | None:
192
- new_target = self._replace_use_reg(stmt.target) if isinstance(stmt.target, Register) else None
193
- new_ret_expr = self._replace_def_expr(stmt.ret_expr) if stmt.ret_expr is not None else None
194
- new_fp_ret_expr = self._replace_def_expr(stmt.fp_ret_expr) if stmt.fp_ret_expr is not None else None
201
+ changed = False
202
+
203
+ new_target = self._replace_use_expr(stmt.target)
204
+ new_ret_expr = (
205
+ self._replace_def_expr(self.block.addr, self.block.idx, self.stmt_idx, stmt.ret_expr)
206
+ if stmt.ret_expr is not None
207
+ else None
208
+ )
209
+ new_fp_ret_expr = (
210
+ self._replace_def_expr(self.block.addr, self.block.idx, self.stmt_idx, stmt.fp_ret_expr)
211
+ if stmt.fp_ret_expr is not None
212
+ else None
213
+ )
195
214
 
196
215
  cc = stmt.calling_convention if stmt.calling_convention is not None else self.project.factory.cc()
197
216
  if cc is not None:
@@ -211,22 +230,50 @@ class SimEngineSSARewriting(
211
230
  self._clear_aliasing_regs(stmt.fp_ret_expr.reg_offset, stmt.fp_ret_expr.size)
212
231
  self.state.registers[stmt.fp_ret_expr.reg_offset][stmt.fp_ret_expr.size] = new_fp_ret_expr
213
232
 
233
+ new_args = None
234
+ if stmt.args is not None:
235
+ new_args = []
236
+ for arg in stmt.args:
237
+ new_arg = self._expr(arg)
238
+ if new_arg is not None:
239
+ changed = True
240
+ new_args.append(new_arg)
241
+ else:
242
+ new_args.append(arg)
243
+
214
244
  if new_target is not None or new_ret_expr is not None or new_fp_ret_expr is not None:
245
+ changed = True
246
+
247
+ if changed:
215
248
  return Call(
216
249
  stmt.idx,
217
250
  stmt.target if new_target is None else new_target,
218
251
  calling_convention=stmt.calling_convention,
219
252
  prototype=stmt.prototype,
220
- args=stmt.args,
253
+ args=new_args,
221
254
  ret_expr=stmt.ret_expr if new_ret_expr is None else new_ret_expr,
222
255
  fp_ret_expr=stmt.fp_ret_expr if new_fp_ret_expr is None else new_fp_ret_expr,
256
+ bits=stmt.bits,
223
257
  **stmt.tags,
224
258
  )
225
259
  return None
226
260
 
261
+ _handle_CallExpr = _handle_Call
262
+
263
+ def _handle_DirtyStatement(self, stmt: DirtyStatement) -> DirtyStatement | None:
264
+ dirty = self._expr(stmt.dirty)
265
+ if dirty is None or dirty is stmt.dirty:
266
+ return None
267
+ return DirtyStatement(stmt.idx, dirty, **stmt.tags)
268
+
227
269
  def _handle_Register(self, expr: Register) -> VirtualVariable | None:
228
270
  return self._replace_use_reg(expr)
229
271
 
272
+ def _handle_Tmp(self, expr: Tmp) -> VirtualVariable | None:
273
+ return (
274
+ self._replace_use_tmp(self.block.addr, self.block.idx, self.stmt_idx, expr) if self.rewrite_tmps else None
275
+ )
276
+
230
277
  def _handle_Load(self, expr: Load) -> Load | VirtualVariable | None:
231
278
  if isinstance(expr.addr, StackBaseOffset) and isinstance(expr.addr.offset, int):
232
279
  new_expr = self._replace_use_load(expr)
@@ -341,13 +388,42 @@ class SimEngineSSARewriting(
341
388
  new_operands.append(operand)
342
389
 
343
390
  if updated:
344
- return VEXCCallExpression(expr.idx, expr.cee_name, new_operands, bits=expr.bits, **expr.tags)
391
+ return VEXCCallExpression(expr.idx, expr.callee, new_operands, bits=expr.bits, **expr.tags)
345
392
  return None
346
393
 
347
- def _handle_Dummy(self, expr) -> None:
394
+ def _handle_DirtyExpression(self, expr: DirtyExpression) -> DirtyExpression | None:
395
+ updated = False
396
+ new_operands = []
397
+ for operand in expr.operands:
398
+ new_operand = self._expr(operand)
399
+ if new_operand is not None:
400
+ updated = True
401
+ new_operands.append(new_operand)
402
+ else:
403
+ new_operands.append(operand)
404
+
405
+ new_guard = None
406
+ if expr.guard is not None:
407
+ new_guard = self._expr(expr.guard)
408
+ if new_guard is not None:
409
+ updated = True
410
+
411
+ if updated:
412
+ return DirtyExpression(
413
+ expr.idx,
414
+ expr.callee,
415
+ new_operands,
416
+ guard=new_guard,
417
+ mfx=expr.mfx,
418
+ maddr=expr.maddr,
419
+ msize=expr.msize,
420
+ bits=expr.bits,
421
+ **expr.tags,
422
+ )
348
423
  return None
349
424
 
350
- _handle_DirtyExpression = _handle_Dummy
425
+ def _handle_Dummy(self, expr) -> None:
426
+ return None
351
427
 
352
428
  #
353
429
  # Expression replacement
@@ -417,23 +493,29 @@ class SimEngineSSARewriting(
417
493
  **new_base_expr.tags,
418
494
  )
419
495
 
420
- def _replace_def_expr(self, thing: Expression | Statement) -> VirtualVariable | None:
496
+ def _replace_def_expr(
497
+ self, block_addr: int, block_idx: int | None, stmt_idx: int, thing: Expression | Statement
498
+ ) -> VirtualVariable | None:
421
499
  """
422
500
  Return a new virtual variable for the given defined expression.
423
501
  """
424
502
  if isinstance(thing, Register):
425
- return self._replace_def_reg(thing)
503
+ return self._replace_def_reg(block_addr, block_idx, stmt_idx, thing)
426
504
  if isinstance(thing, Store):
427
- return self._replace_def_store(thing)
505
+ return self._replace_def_store(block_addr, block_idx, stmt_idx, thing)
506
+ if isinstance(thing, Tmp) and self.rewrite_tmps:
507
+ return self._replace_def_tmp(block_addr, block_idx, stmt_idx, thing)
428
508
  return None
429
509
 
430
- def _replace_def_reg(self, expr: Register) -> VirtualVariable:
510
+ def _replace_def_reg(
511
+ self, block_addr: int, block_idx: int | None, stmt_idx: int, expr: Register
512
+ ) -> VirtualVariable:
431
513
  """
432
514
  Return a new virtual variable for the given defined register.
433
515
  """
434
516
 
435
517
  # get the virtual variable ID
436
- vvid = self.get_vvid_by_def(expr)
518
+ vvid = self.get_vvid_by_def(block_addr, block_idx, stmt_idx, expr)
437
519
  return VirtualVariable(
438
520
  expr.idx,
439
521
  vvid,
@@ -464,14 +546,16 @@ class SimEngineSSARewriting(
464
546
  return vvar
465
547
  return self.state.registers[base_off][base_size]
466
548
 
467
- def _replace_def_store(self, stmt: Store) -> VirtualVariable | None:
549
+ def _replace_def_store(
550
+ self, block_addr: int, block_idx: int | None, stmt_idx: int, stmt: Store
551
+ ) -> VirtualVariable | None:
468
552
  if (
469
553
  isinstance(stmt.addr, StackBaseOffset)
470
554
  and isinstance(stmt.addr.offset, int)
471
555
  and stmt.addr.offset in self.stackvar_locs
472
556
  and stmt.size == self.stackvar_locs[stmt.addr.offset]
473
557
  ):
474
- vvar_id = self.get_vvid_by_def(stmt)
558
+ vvar_id = self.get_vvid_by_def(block_addr, block_idx, stmt_idx, stmt)
475
559
  vvar = VirtualVariable(
476
560
  self.ail_manager.next_atom(),
477
561
  vvar_id,
@@ -484,6 +568,31 @@ class SimEngineSSARewriting(
484
568
  return vvar
485
569
  return None
486
570
 
571
+ def _replace_def_tmp(self, block_addr: int, block_idx: int | None, stmt_idx: int, expr: Tmp) -> VirtualVariable:
572
+ vvid = self.get_vvid_by_def(block_addr, block_idx, stmt_idx, expr)
573
+ vvar = VirtualVariable(
574
+ expr.idx,
575
+ vvid,
576
+ expr.bits,
577
+ VirtualVariableCategory.TMP,
578
+ oident=expr.tmp_idx,
579
+ **expr.tags,
580
+ )
581
+ self.state.tmps[expr.tmp_idx] = vvar
582
+ return vvar
583
+
584
+ def _replace_use_expr(self, thing: Expression | Statement) -> VirtualVariable | None:
585
+ """
586
+ Return a new virtual variable for the given defined expression.
587
+ """
588
+ if isinstance(thing, Register):
589
+ return self._replace_use_reg(thing)
590
+ if isinstance(thing, Store):
591
+ raise NotImplementedError("Store expressions are not supported in _replace_use_expr.")
592
+ if isinstance(thing, Tmp) and self.rewrite_tmps:
593
+ return self._replace_use_tmp(self.block.addr, self.block.idx, self.stmt_idx, thing)
594
+ return None
595
+
487
596
  def _replace_use_reg(self, reg_expr: Register) -> VirtualVariable | Expression:
488
597
 
489
598
  if reg_expr.reg_offset in self.state.registers:
@@ -556,7 +665,8 @@ class SimEngineSSARewriting(
556
665
  and expr.size == self.stackvar_locs[expr.addr.offset]
557
666
  ):
558
667
  if expr.size not in self.state.stackvars[expr.addr.offset]:
559
- vvar_id = self.get_vvid_by_def(expr)
668
+ # create it on the fly
669
+ vvar_id = self.get_vvid_by_def(self.block.addr, self.block.idx, self.stmt_idx, expr)
560
670
  return VirtualVariable(
561
671
  self.ail_manager.next_atom(),
562
672
  vvar_id,
@@ -579,15 +689,31 @@ class SimEngineSSARewriting(
579
689
  )
580
690
  return None
581
691
 
692
+ def _replace_use_tmp(self, block_addr: int, block_idx: int | None, stmt_idx: int, expr: Tmp) -> VirtualVariable:
693
+ vvar = self.state.tmps.get(expr.tmp_idx)
694
+ if vvar is None:
695
+ return self._replace_def_tmp(block_addr, block_idx, stmt_idx, expr)
696
+ return VirtualVariable(
697
+ expr.idx,
698
+ vvar.varid,
699
+ vvar.bits,
700
+ VirtualVariableCategory.TMP,
701
+ oident=expr.tmp_idx,
702
+ **expr.tags,
703
+ )
704
+
582
705
  #
583
706
  # Utils
584
707
  #
585
708
 
586
- def get_vvid_by_def(self, thing: Expression | Statement) -> int:
587
- if thing in self.def_to_vvid:
588
- return self.def_to_vvid[thing]
709
+ def get_vvid_by_def(
710
+ self, block_addr: int, block_idx: int | None, stmt_idx: int, thing: Expression | Statement
711
+ ) -> int:
712
+ key = block_addr, block_idx, stmt_idx, thing
713
+ if key in self.def_to_vvid:
714
+ return self.def_to_vvid[key]
589
715
  vvid = self.next_vvar_id()
590
- self.def_to_vvid[thing] = vvid
716
+ self.def_to_vvid[key] = vvid
591
717
  return vvid
592
718
 
593
719
  def _clear_aliasing_regs(self, reg_offset: int, size: int, remove_base_reg: bool = True) -> None:
@@ -32,6 +32,7 @@ class RewritingState:
32
32
  self.stackvars: defaultdict[int, dict[int, VirtualVariable]] = (
33
33
  stackvars if stackvars is not None else defaultdict(dict)
34
34
  )
35
+ self.tmps: dict[int, VirtualVariable] = {}
35
36
  self.original_block = original_block
36
37
  self.out_block = None
37
38
 
@@ -5,8 +5,8 @@ from collections import defaultdict
5
5
  from itertools import count
6
6
  from bisect import bisect_left
7
7
 
8
- from ailment.expression import Register, StackBaseOffset
9
- from ailment.statement import Store
8
+ from ailment.expression import Expression, Register, StackBaseOffset, Tmp
9
+ from ailment.statement import Statement, Store
10
10
 
11
11
  from angr.knowledge_plugins.functions import Function
12
12
  from angr.code_location import CodeLocation
@@ -33,6 +33,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
33
33
  func_addr: int | None = None,
34
34
  ail_manager=None,
35
35
  ssa_stackvars: bool = False,
36
+ ssa_tmps: bool = False,
36
37
  vvar_id_start: int = 0,
37
38
  ):
38
39
  """
@@ -51,6 +52,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
51
52
  self._func_addr = func_addr
52
53
  self._ail_manager = ail_manager
53
54
  self._ssa_stackvars = ssa_stackvars
55
+ self._ssa_tmps = ssa_tmps
54
56
  self._entry = (
55
57
  entry
56
58
  if entry is not None
@@ -68,6 +70,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
68
70
  stack_pointer_tracker,
69
71
  bp_as_gpr,
70
72
  ssa_stackvars,
73
+ ssa_tmps,
71
74
  )
72
75
 
73
76
  # calculate virtual variables and phi nodes
@@ -86,13 +89,19 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
86
89
  self._udef_to_phiid,
87
90
  self._phiid_to_loc,
88
91
  self._stackvar_locs,
92
+ self._ssa_tmps,
89
93
  self._ail_manager,
90
94
  vvar_id_start=vvar_id_start,
91
95
  )
92
96
  self.out_graph = rewriter.out_graph
93
97
  self.max_vvar_id = rewriter.max_vvar_id
94
98
 
95
- def _calculate_virtual_variables(self, ail_graph, def_to_loc: dict, loc_to_defs: dict[CodeLocation, Any]):
99
+ def _calculate_virtual_variables(
100
+ self,
101
+ ail_graph,
102
+ def_to_loc: list[tuple[Expression | Statement, CodeLocation]],
103
+ loc_to_defs: dict[CodeLocation, Any],
104
+ ):
96
105
  """
97
106
  Calculate the mapping from defs to virtual variables as well as where to insert phi nodes.
98
107
  """
@@ -112,7 +121,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
112
121
  if self._ssa_stackvars:
113
122
  # for stack variables, we collect all definitions and identify stack variable locations using heuristics
114
123
 
115
- stackvar_locs = self._synthesize_stackvar_locs([def_ for def_ in def_to_loc if isinstance(def_, Store)])
124
+ stackvar_locs = self._synthesize_stackvar_locs([def_ for def_, _ in def_to_loc if isinstance(def_, Store)])
116
125
  sorted_stackvar_offs = sorted(stackvar_locs)
117
126
  else:
118
127
  stackvar_locs = {}
@@ -121,10 +130,8 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
121
130
  # computer phi node locations for each unified definition
122
131
  udef_to_defs = defaultdict(set)
123
132
  udef_to_blockkeys = defaultdict(set)
124
- for def_ in def_to_loc:
133
+ for def_, loc in def_to_loc:
125
134
  if isinstance(def_, Register):
126
- loc = def_to_loc[def_]
127
-
128
135
  base_off, base_size = get_reg_offset_base_and_size(def_.reg_offset, self.project.arch, size=def_.size)
129
136
  base_reg_bits = base_size * self.project.arch.byte_width
130
137
  udef_to_defs[("reg", base_off, base_reg_bits)].add(def_)
@@ -135,8 +142,6 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
135
142
  udef_to_defs[("reg", def_.reg_offset, reg_bits)].add((loc.block_addr, loc.block_idx))
136
143
  elif isinstance(def_, Store):
137
144
  if isinstance(def_.addr, StackBaseOffset) and isinstance(def_.addr.offset, int):
138
- loc = def_to_loc[def_]
139
-
140
145
  idx_begin = bisect_left(sorted_stackvar_offs, def_.addr.offset)
141
146
  for i in range(idx_begin, len(sorted_stackvar_offs)):
142
147
  off = sorted_stackvar_offs[i]
@@ -144,6 +149,9 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
144
149
  break
145
150
  udef_to_defs[("stack", off, stackvar_locs[off])].add(def_)
146
151
  udef_to_blockkeys[("stack", off, stackvar_locs[off])].add((loc.block_addr, loc.block_idx))
152
+ elif isinstance(def_, Tmp):
153
+ # Tmps are local to each block and do not need phi nodes
154
+ pass
147
155
  else:
148
156
  raise NotImplementedError
149
157
  # other types are not supported yet
@@ -19,10 +19,11 @@ class TraversalAnalysis(ForwardAnalysis[None, NodeType, object, object]):
19
19
  TraversalAnalysis traverses the AIL graph and collects definitions.
20
20
  """
21
21
 
22
- def __init__(self, project, func, ail_graph, sp_tracker, bp_as_gpr: bool, stackvars: bool):
22
+ def __init__(self, project, func, ail_graph, sp_tracker, bp_as_gpr: bool, stackvars: bool, tmps: bool):
23
23
 
24
24
  self.project = project
25
25
  self._stackvars = stackvars
26
+ self._tmps = tmps
26
27
  self._function = func
27
28
  self._graph_visitor = FunctionGraphVisitor(self._function, ail_graph)
28
29
 
@@ -34,6 +35,7 @@ class TraversalAnalysis(ForwardAnalysis[None, NodeType, object, object]):
34
35
  sp_tracker=sp_tracker,
35
36
  bp_as_gpr=bp_as_gpr,
36
37
  stackvars=self._stackvars,
38
+ tmps=self._tmps,
37
39
  )
38
40
 
39
41
  self._visited_blocks: set[Any] = set()