angr 9.2.140__py3-none-win_amd64.whl → 9.2.142__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (76) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +105 -35
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/cfg_base.py +38 -4
  6. angr/analyses/cfg/cfg_fast.py +23 -7
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +13 -8
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +1 -1
  10. angr/analyses/decompiler/ail_simplifier.py +105 -62
  11. angr/analyses/decompiler/callsite_maker.py +24 -11
  12. angr/analyses/decompiler/clinic.py +83 -5
  13. angr/analyses/decompiler/condition_processor.py +7 -7
  14. angr/analyses/decompiler/decompilation_cache.py +2 -1
  15. angr/analyses/decompiler/decompiler.py +11 -2
  16. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
  19. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  20. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  21. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +85 -16
  22. angr/analyses/decompiler/optimization_passes/optimization_pass.py +78 -1
  23. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  24. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
  25. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  27. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  28. angr/analyses/decompiler/region_identifier.py +76 -51
  29. angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
  30. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
  31. angr/analyses/decompiler/ssailification/rewriting.py +70 -32
  32. angr/analyses/decompiler/ssailification/rewriting_engine.py +118 -24
  33. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  34. angr/analyses/decompiler/stack_item.py +36 -0
  35. angr/analyses/decompiler/structured_codegen/c.py +86 -145
  36. angr/analyses/decompiler/structuring/dream.py +1 -1
  37. angr/analyses/decompiler/structuring/phoenix.py +9 -4
  38. angr/analyses/decompiler/structuring/structurer_base.py +2 -1
  39. angr/analyses/decompiler/utils.py +46 -20
  40. angr/analyses/find_objects_static.py +2 -1
  41. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  42. angr/analyses/reaching_definitions/function_handler.py +24 -10
  43. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  44. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  45. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  46. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  47. angr/analyses/reaching_definitions/rd_state.py +11 -7
  48. angr/analyses/s_liveness.py +44 -6
  49. angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  51. angr/analyses/typehoon/simple_solver.py +35 -8
  52. angr/analyses/typehoon/typehoon.py +3 -1
  53. angr/analyses/variable_recovery/engine_ail.py +1 -1
  54. angr/analyses/variable_recovery/engine_vex.py +20 -4
  55. angr/calling_conventions.py +17 -12
  56. angr/factory.py +8 -3
  57. angr/knowledge_plugins/functions/function.py +5 -10
  58. angr/knowledge_plugins/variables/variable_manager.py +34 -5
  59. angr/lib/angr_native.dll +0 -0
  60. angr/procedures/definitions/__init__.py +3 -10
  61. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  62. angr/procedures/win32_kernel/__fastfail.py +15 -0
  63. angr/sim_procedure.py +2 -2
  64. angr/simos/simos.py +17 -11
  65. angr/simos/windows.py +42 -1
  66. angr/utils/ail.py +41 -1
  67. angr/utils/cpp.py +17 -0
  68. angr/utils/doms.py +142 -0
  69. angr/utils/library.py +1 -1
  70. angr/utils/types.py +59 -0
  71. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
  72. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/RECORD +76 -71
  73. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
  74. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
  75. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
  76. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
@@ -36,19 +36,21 @@ class StatementLocation(LocationBase):
36
36
  __slots__ = (
37
37
  "block_addr",
38
38
  "block_idx",
39
+ "phi_stmt",
39
40
  "stmt_idx",
40
41
  )
41
42
 
42
- def __init__(self, block_addr, block_idx, stmt_idx):
43
+ def __init__(self, block_addr, block_idx, stmt_idx, phi_stmt: bool = False):
43
44
  self.block_addr = block_addr
44
45
  self.block_idx = block_idx
45
46
  self.stmt_idx = stmt_idx
47
+ self.phi_stmt = phi_stmt
46
48
 
47
49
  def __repr__(self):
48
- return f"Loc: Statement@{self.block_addr:x}.{self.block_idx}-{self.stmt_idx}"
50
+ return f"Loc: Statement@{self.block_addr:x}.{self.block_idx}-{self.stmt_idx}{' phi' if self.phi_stmt else ''}"
49
51
 
50
52
  def __hash__(self):
51
- return hash((StatementLocation, self.block_addr, self.block_idx, self.stmt_idx))
53
+ return hash((StatementLocation, self.block_addr, self.block_idx, self.stmt_idx, self.phi_stmt))
52
54
 
53
55
  def __eq__(self, other):
54
56
  return (
@@ -56,10 +58,11 @@ class StatementLocation(LocationBase):
56
58
  and self.block_addr == other.block_addr
57
59
  and self.block_idx == other.block_idx
58
60
  and self.stmt_idx == other.stmt_idx
61
+ and self.phi_stmt == other.phi_stmt
59
62
  )
60
63
 
61
64
  def copy(self):
62
- return StatementLocation(self.block_addr, self.block_idx, self.stmt_idx)
65
+ return StatementLocation(self.block_addr, self.block_idx, self.stmt_idx, phi_stmt=self.phi_stmt)
63
66
 
64
67
 
65
68
  class ExpressionLocation(LocationBase):
@@ -71,23 +74,28 @@ class ExpressionLocation(LocationBase):
71
74
  "block_addr",
72
75
  "block_idx",
73
76
  "expr_idx",
77
+ "phi_stmt",
74
78
  "stmt_idx",
75
79
  )
76
80
 
77
- def __init__(self, block_addr, block_idx, stmt_idx, expr_idx):
81
+ def __init__(self, block_addr, block_idx, stmt_idx, expr_idx, phi_stmt: bool = False):
78
82
  self.block_addr = block_addr
79
83
  self.block_idx = block_idx
80
84
  self.stmt_idx = stmt_idx
81
85
  self.expr_idx = expr_idx
86
+ self.phi_stmt = phi_stmt
82
87
 
83
88
  def __repr__(self):
84
- return f"Loc: Expression@{self.block_addr:x}.{self.block_idx}-{self.stmt_idx}[{self.expr_idx}]"
89
+ return (
90
+ f"Loc: Expression@{self.block_addr:x}.{self.block_idx}-{self.stmt_idx}[{self.expr_idx}]"
91
+ f"{'phi' if self.phi_stmt else ''}"
92
+ )
85
93
 
86
94
  def statement_location(self) -> StatementLocation:
87
- return StatementLocation(self.block_addr, self.block_idx, self.stmt_idx)
95
+ return StatementLocation(self.block_addr, self.block_idx, self.stmt_idx, phi_stmt=self.phi_stmt)
88
96
 
89
97
  def __hash__(self):
90
- return hash((ExpressionLocation, self.block_addr, self.block_idx, self.stmt_idx, self.expr_idx))
98
+ return hash((ExpressionLocation, self.block_addr, self.block_idx, self.stmt_idx, self.expr_idx, self.phi_stmt))
91
99
 
92
100
  def __eq__(self, other):
93
101
  return (
@@ -96,6 +104,7 @@ class ExpressionLocation(LocationBase):
96
104
  and self.block_idx == other.block_idx
97
105
  and self.stmt_idx == other.stmt_idx
98
106
  and self.expr_idx == other.expr_idx
107
+ and self.phi_stmt == other.phi_stmt
99
108
  )
100
109
 
101
110
 
@@ -201,7 +210,18 @@ class ExpressionUseFinder(AILBlockWalker):
201
210
  if isinstance(expr, ailment.Expr.VirtualVariable) and expr.was_reg:
202
211
  if not (isinstance(stmt, ailment.Stmt.Assignment) and stmt.dst is expr):
203
212
  if block is not None:
204
- self.uses[expr.varid].add((expr, ExpressionLocation(block.addr, block.idx, stmt_idx, expr_idx)))
213
+ self.uses[expr.varid].add(
214
+ (
215
+ expr,
216
+ ExpressionLocation(
217
+ block.addr,
218
+ block.idx,
219
+ stmt_idx,
220
+ expr_idx,
221
+ phi_stmt=stmt is not None and is_phi_assignment(stmt),
222
+ ),
223
+ )
224
+ )
205
225
  else:
206
226
  self.uses[expr.varid].add((expr, None))
207
227
  return None
@@ -239,11 +259,7 @@ class ExpressionCounter(SequenceWalker):
239
259
  if isinstance(stmt, ailment.Stmt.Assignment):
240
260
  if is_phi_assignment(stmt):
241
261
  return
242
- if (
243
- isinstance(stmt.dst, ailment.Expr.VirtualVariable)
244
- and stmt.dst.was_reg
245
- and stmt.dst.variable is not None
246
- ):
262
+ if isinstance(stmt.dst, ailment.Expr.VirtualVariable) and stmt.dst.was_reg:
247
263
  # dependency
248
264
  dependency_finder = ExpressionUseFinder()
249
265
  dependency_finder.walk_expression(stmt.src)
@@ -260,7 +276,6 @@ class ExpressionCounter(SequenceWalker):
260
276
  isinstance(stmt, ailment.Stmt.Call)
261
277
  and isinstance(stmt.ret_expr, ailment.Expr.VirtualVariable)
262
278
  and stmt.ret_expr.was_reg
263
- and stmt.ret_expr.variable is not None
264
279
  ):
265
280
  dependency_finder = ExpressionUseFinder()
266
281
  dependency_finder.walk_expression(stmt)
@@ -279,8 +294,7 @@ class ExpressionCounter(SequenceWalker):
279
294
  use_finder = ExpressionUseFinder()
280
295
  for idx, stmt in enumerate(node.statements):
281
296
  self._handle_Statement(idx, stmt, node)
282
- if not is_phi_assignment(stmt):
283
- use_finder.walk_statement(stmt)
297
+ use_finder.walk_statement(stmt, block=node)
284
298
 
285
299
  for varid, content in use_finder.uses.items():
286
300
  if varid not in self.uses:
@@ -407,7 +421,7 @@ class InterferenceChecker(SequenceWalker):
407
421
  the_call = None
408
422
  if isinstance(stmt, Assignment) and isinstance(stmt.src, ailment.Stmt.Call):
409
423
  the_call = stmt.src
410
- elif isinstance(stmt, ailment.Stmt.Call):
424
+ elif isinstance(stmt, ailment.Stmt.Call) and not isinstance(stmt.target, str):
411
425
  the_call = stmt
412
426
  if the_call is not None:
413
427
  spotter.walk_expression(the_call.target)
@@ -116,10 +116,13 @@ class RegionSimplifier(Analysis):
116
116
  for var, uses in expr_counter.uses.items():
117
117
  if len(uses) == 1 and var in expr_counter.assignments and len(expr_counter.assignments[var]) == 1:
118
118
  definition, deps, loc, has_loads = next(iter(expr_counter.assignments[var]))
119
+ _, use_expr_loc = next(iter(uses))
120
+ if isinstance(use_expr_loc, ExpressionLocation) and use_expr_loc.phi_stmt:
121
+ # we cannot fold expressions that are used in phi statements
122
+ continue
119
123
  if has_loads:
120
124
  # the definition has at least one load expression. we need to ensure there are no store statements
121
125
  # between the definition site and the use site
122
- _, use_expr_loc = next(iter(uses))
123
126
  if isinstance(use_expr_loc, ExpressionLocation):
124
127
  use_loc = use_expr_loc.statement_location()
125
128
  else:
@@ -8,21 +8,20 @@ import networkx
8
8
 
9
9
  import ailment
10
10
  from ailment import Block
11
- from ailment.expression import Expression, Phi, VirtualVariable, VirtualVariableCategory
12
- from ailment.statement import Statement, Assignment, Label
11
+ from ailment.expression import Phi, VirtualVariable, VirtualVariableCategory
12
+ from ailment.statement import Assignment, Label
13
13
 
14
14
  from angr.code_location import CodeLocation
15
15
  from angr.analyses import ForwardAnalysis
16
- from angr.analyses.forward_analysis.visitors.graph import NodeType
17
16
  from angr.analyses.forward_analysis import FunctionGraphVisitor
18
- from .rewriting_engine import SimEngineSSARewriting
17
+ from angr.utils.ail import is_head_controlled_loop_block
18
+ from .rewriting_engine import SimEngineSSARewriting, DefExprType, AT
19
19
  from .rewriting_state import RewritingState
20
20
 
21
-
22
21
  l = logging.getLogger(__name__)
23
22
 
24
23
 
25
- class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object]):
24
+ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, object]):
26
25
  """
27
26
  RewritingAnalysis traverses the AIL graph, inserts phi nodes, and rewrites all expression uses to virtual variables
28
27
  when necessary.
@@ -37,7 +36,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
37
36
  bp_as_gpr: bool,
38
37
  udef_to_phiid: dict[tuple, set[int]],
39
38
  phiid_to_loc: dict[int, tuple[int, int | None]],
40
- stackvar_locs: dict[int, int],
39
+ stackvar_locs: dict[int, set[int]],
41
40
  rewrite_tmps: bool,
42
41
  ail_manager,
43
42
  func_args: set[VirtualVariable],
@@ -72,10 +71,22 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
72
71
  self._visited_blocks: set[Any] = set()
73
72
  self.out_blocks = {}
74
73
  self.out_states = {}
74
+ # loop_states stores states at the beginning of a loop block *after a loop iteration*, where the block is the
75
+ # following:
76
+ # 0x4036df | t4 = (rcx<8> == 0x0<64>)
77
+ # 0x4036df | if (t4) { Goto 0x4036e2<64> } else { Goto 0x4036df<64> }
78
+ # 0x4036df | STORE(addr=t3, data=t2, size=8, endness=Iend_LE, guard=None)
79
+ # 0x4036df | rdi<8> = t8
80
+ #
81
+ self.head_controlled_loop_outstates = {}
75
82
 
76
83
  self._analyze()
77
84
 
78
- self.def_to_vvid: dict[tuple[int, int | None, int, Expression | Statement], int] = self._engine_ail.def_to_vvid
85
+ self.def_to_vvid: dict[tuple[int, int | None, int, DefExprType, AT], int] = self._engine_ail.def_to_vvid
86
+ # during SSA conversion, we create secondary stack variables because they overlap and are larger than the
87
+ # actual stack variables. these secondary stack variables can be safely eliminated during dead assignment
88
+ # elimination if not used by anything else.
89
+ self.secondary_stackvars: set[int] = self._engine_ail.secondary_stackvars
79
90
  self.out_graph = self._make_new_graph(ail_graph)
80
91
 
81
92
  @property
@@ -174,24 +185,34 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
174
185
  else:
175
186
  node.statements = node.statements[:idx] + phi_stmts + node.statements[idx:]
176
187
 
177
- def _reg_predicate(self, node_, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
178
- out_state: RewritingState = self.out_states[(node_.addr, node_.idx)]
188
+ def _reg_predicate(self, node_: Block, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
189
+ out_state: RewritingState = (
190
+ self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
191
+ if is_head_controlled_loop_block(node_)
192
+ else self.out_states[(node_.addr, node_.idx)]
193
+ )
179
194
  if reg_offset in out_state.registers and reg_size in out_state.registers[reg_offset]:
180
- if out_state.registers[reg_offset][reg_size] is None:
195
+ existing_var = out_state.registers[reg_offset][reg_size]
196
+ if existing_var is None:
181
197
  # the vvar is not set. it should never be referenced
182
198
  return True, None
183
- vvar = out_state.registers[reg_offset][reg_size].copy()
199
+ vvar = existing_var.copy()
184
200
  vvar.idx = self._ail_manager.next_atom()
185
201
  return True, vvar
186
202
  return False, None
187
203
 
188
- def _stack_predicate(self, node_, *, stack_offset: int, stackvar_size: int) -> tuple[bool, Any]:
189
- out_state: RewritingState = self.out_states[(node_.addr, node_.idx)]
204
+ def _stack_predicate(self, node_: Block, *, stack_offset: int, stackvar_size: int) -> tuple[bool, Any]:
205
+ out_state: RewritingState = (
206
+ self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
207
+ if is_head_controlled_loop_block(node_)
208
+ else self.out_states[(node_.addr, node_.idx)]
209
+ )
190
210
  if stack_offset in out_state.stackvars and stackvar_size in out_state.stackvars[stack_offset]:
191
- if out_state.stackvars[stack_offset][stackvar_size] is None:
211
+ existing_var = out_state.stackvars[stack_offset][stackvar_size]
212
+ if existing_var is None:
192
213
  # the vvar is not set. it should never be referenced
193
214
  return True, None
194
- vvar = out_state.stackvars[stack_offset][stackvar_size].copy()
215
+ vvar = existing_var.copy()
195
216
  vvar.idx = self._ail_manager.next_atom()
196
217
  return True, vvar
197
218
  return False, None
@@ -215,11 +236,14 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
215
236
  )
216
237
  # update state with function arguments
217
238
  for func_arg in self._func_args:
218
- if func_arg.oident[0] == VirtualVariableCategory.REGISTER:
219
- reg_offset, reg_size = func_arg.oident[1], func_arg.size
239
+ if func_arg.parameter_category == VirtualVariableCategory.REGISTER:
240
+ reg_offset, reg_size = func_arg.parameter_reg_offset, func_arg.size
241
+ assert reg_offset is not None and reg_size is not None
220
242
  state.registers[reg_offset][reg_size] = func_arg
221
- elif func_arg.oident[0] == VirtualVariableCategory.STACK:
222
- state.stackvars[func_arg.oident[1]][func_arg.size] = func_arg
243
+ elif func_arg.parameter_category == VirtualVariableCategory.STACK:
244
+ parameter_stack_offset: int = func_arg.oident[1] # type: ignore
245
+ assert parameter_stack_offset is not None and func_arg.size is not None
246
+ state.stackvars[parameter_stack_offset][func_arg.size] = func_arg
223
247
  return state
224
248
 
225
249
  def _run_on_node(self, node, state: RewritingState):
@@ -254,18 +278,32 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
254
278
  )
255
279
 
256
280
  self._visited_blocks.add(block_key)
257
- self.out_states[block_key] = state
258
-
259
- if state.out_block is not None:
260
- assert state.out_block.addr == block.addr
261
-
262
- if self.out_blocks.get(block_key, None) == state.out_block:
263
- return True, state
264
- self.out_blocks[block_key] = state.out_block
265
- state.out_block = None
266
- return True, state
267
-
268
- return True, state
281
+ # get the output state (which is the input state for the successor node)
282
+ # if head_controlled_loop_outstate is set, then it is the output state of the successor node; in this case, the
283
+ # input state for the head-controlled loop block itself is out.state.
284
+ # otherwise (if head_controlled_loop_outstate is not set), engine.state is the input state of the successor
285
+ # node.
286
+ if engine.head_controlled_loop_outstate is None:
287
+ # this is a normal block
288
+ out_state = state
289
+ else:
290
+ # this is a head-controlled loop block
291
+ out_state = engine.head_controlled_loop_outstate
292
+ self.head_controlled_loop_outstates[block_key] = state
293
+ self.out_states[block_key] = out_state
294
+ # the final block is always in state
295
+ out_block = state.out_block
296
+
297
+ if out_block is not None:
298
+ assert out_block.addr == block.addr
299
+
300
+ if self.out_blocks.get(block_key, None) == out_block:
301
+ return True, out_state
302
+ self.out_blocks[block_key] = out_block
303
+ out_state.out_block = None
304
+ return True, out_state
305
+
306
+ return True, out_state
269
307
 
270
308
  def _intra_analysis(self):
271
309
  pass
@@ -1,7 +1,10 @@
1
1
  # pylint:disable=no-self-use,unused-argument
2
2
  from __future__ import annotations
3
+ from typing import Literal
3
4
  import logging
4
5
 
6
+ from archinfo import Endness
7
+ from ailment.block import Block
5
8
  from ailment.manager import Manager
6
9
  from ailment.statement import Statement, Assignment, Store, Call, Return, ConditionalJump, DirtyStatement, Jump
7
10
  from ailment.expression import (
@@ -22,6 +25,7 @@ from ailment.expression import (
22
25
  Reinterpret,
23
26
  )
24
27
 
28
+ from angr.knowledge_plugins.key_definitions import atoms
25
29
  from angr.engines.light.engine import SimEngineNostmtAIL
26
30
  from angr.utils.ssa import get_reg_offset_base_and_size
27
31
  from .rewriting_state import RewritingState
@@ -30,6 +34,10 @@ from .rewriting_state import RewritingState
30
34
  _l = logging.getLogger(__name__)
31
35
 
32
36
 
37
+ DefExprType = atoms.Register | atoms.Tmp | atoms.MemoryLocation
38
+ AT = Literal["l", "s"] | None
39
+
40
+
33
41
  class SimEngineSSARewriting(
34
42
  SimEngineNostmtAIL[RewritingState, Expression | None, Statement | tuple[Statement, ...], None]
35
43
  ):
@@ -38,6 +46,8 @@ class SimEngineSSARewriting(
38
46
  copies at each use location.
39
47
  """
40
48
 
49
+ state: RewritingState
50
+
41
51
  def __init__(
42
52
  self,
43
53
  project,
@@ -45,7 +55,7 @@ class SimEngineSSARewriting(
45
55
  sp_tracker,
46
56
  udef_to_phiid: dict[tuple, set[int]],
47
57
  phiid_to_loc: dict[int, tuple[int, int | None]],
48
- stackvar_locs: dict[int, int],
58
+ stackvar_locs: dict[int, set[int]],
49
59
  ail_manager: Manager,
50
60
  vvar_id_start: int = 0,
51
61
  bp_as_gpr: bool = False,
@@ -55,12 +65,15 @@ class SimEngineSSARewriting(
55
65
 
56
66
  self.sp_tracker = sp_tracker
57
67
  self.bp_as_gpr = bp_as_gpr
58
- self.def_to_vvid: dict[tuple[int, int | None, int, Expression | Statement], int] = {}
68
+ self.def_to_vvid: dict[tuple[int, int | None, int, DefExprType, AT], int] = {}
59
69
  self.stackvar_locs = stackvar_locs
60
70
  self.udef_to_phiid = udef_to_phiid
61
71
  self.phiid_to_loc = phiid_to_loc
62
72
  self.rewrite_tmps = rewrite_tmps
63
73
  self.ail_manager = ail_manager
74
+ self.head_controlled_loop_outstate: RewritingState | None = None
75
+
76
+ self.secondary_stackvars: set[int] = set()
64
77
 
65
78
  self._current_vvar_id = vvar_id_start
66
79
 
@@ -76,6 +89,12 @@ class SimEngineSSARewriting(
76
89
  # Handlers
77
90
  #
78
91
 
92
+ def process(
93
+ self, state: RewritingState, *, block: Block | None = None, whitelist: set[int] | None = None, **kwargs
94
+ ) -> None:
95
+ self.head_controlled_loop_outstate = None
96
+ super().process(state, block=block, whitelist=whitelist, **kwargs)
97
+
79
98
  def _top(self, bits):
80
99
  assert False, "Unreachable"
81
100
 
@@ -103,6 +122,7 @@ class SimEngineSSARewriting(
103
122
  elif stmt.dst.category == VirtualVariableCategory.STACK:
104
123
  self.state.stackvars[stmt.dst.stack_offset][stmt.dst.size] = stmt.dst
105
124
  elif stmt.dst.category == VirtualVariableCategory.TMP:
125
+ assert stmt.dst.tmp_idx is not None
106
126
  self.state.tmps[stmt.dst.tmp_idx] = stmt.dst
107
127
  new_dst = None
108
128
  else:
@@ -134,10 +154,11 @@ class SimEngineSSARewriting(
134
154
  base_reg_vvar = self._replace_def_expr(
135
155
  self.block.addr, self.block.idx, self.stmt_idx, base_reg_expr
136
156
  )
157
+ assert base_reg_vvar is not None
137
158
  stmt_base_reg = Assignment(
138
159
  self.ail_manager.next_atom(),
139
160
  base_reg_vvar,
140
- self._reg_update_expr(
161
+ self._partial_update_expr(
141
162
  existing_base_reg_vvar, base_offset, base_size, new_dst, stmt.dst.reg_offset, stmt.dst.size
142
163
  ),
143
164
  **stmt.tags,
@@ -160,12 +181,40 @@ class SimEngineSSARewriting(
160
181
  return new_stmt
161
182
  return None
162
183
 
163
- def _handle_stmt_Store(self, stmt: Store) -> Store | Assignment | None:
184
+ def _handle_stmt_Store(self, stmt: Store) -> Store | Assignment | tuple[Assignment, ...] | None:
164
185
  new_data = self._expr(stmt.data)
165
186
  if stmt.guard is None:
187
+ # the variable
166
188
  vvar = self._replace_def_store(self.block.addr, self.block.idx, self.stmt_idx, stmt)
167
189
  if vvar is not None:
168
- return Assignment(stmt.idx, vvar, stmt.data if new_data is None else new_data, **stmt.tags)
190
+ assert isinstance(stmt.addr, StackBaseOffset) and isinstance(stmt.addr.offset, int)
191
+
192
+ # remove everything else that overlaps with the full (base) stack variable
193
+ # the full stack variable is kept around because it's always updated immediately and will be used in
194
+ # case of partial stack variable update
195
+ self._clear_overlapping_stackvars(stmt.addr.offset, stmt.size, remove_base_stackvar=False)
196
+
197
+ data = stmt.data if new_data is None else new_data
198
+ vvar_assignment = Assignment(stmt.idx, vvar, data, **stmt.tags)
199
+
200
+ full_size = self._get_stack_var_full_size(stmt)
201
+ assert full_size is not None
202
+ if vvar.size >= full_size:
203
+ return vvar_assignment
204
+
205
+ # update the full variable
206
+ existing_full_vvar = self._replace_use_load(Load(None, stmt.addr, full_size, stmt.endness))
207
+ vvar_full = self._replace_def_store(
208
+ self.block.addr, self.block.idx, self.stmt_idx, stmt, force_size=full_size
209
+ )
210
+ if existing_full_vvar is not None and vvar_full is not None:
211
+ self.secondary_stackvars.add(vvar_full.varid)
212
+ full_data = self._partial_update_expr(
213
+ existing_full_vvar, stmt.addr.offset, full_size, vvar, stmt.addr.offset, stmt.size
214
+ )
215
+ full_assignment = Assignment(stmt.idx, vvar_full, full_data, **stmt.tags)
216
+ return vvar_assignment, full_assignment
217
+ return vvar_assignment
169
218
 
170
219
  # fall back to Store
171
220
  new_addr = self._expr(stmt.addr)
@@ -195,6 +244,11 @@ class SimEngineSSARewriting(
195
244
  new_true_target = self._expr(stmt.true_target) if stmt.true_target is not None else None
196
245
  new_false_target = self._expr(stmt.false_target) if stmt.false_target is not None else None
197
246
 
247
+ if self.stmt_idx != len(self.block.statements) - 1:
248
+ # the conditional jump is in the middle of the block (e.g., the block generated from lifting rep stosq).
249
+ # we need to make a copy of the state and use the state of this point in its successor
250
+ self.head_controlled_loop_outstate = self.state.copy()
251
+
198
252
  if new_cond is not None or new_true_target is not None or new_false_target is not None:
199
253
  return ConditionalJump(
200
254
  stmt.idx,
@@ -210,7 +264,7 @@ class SimEngineSSARewriting(
210
264
  def _handle_stmt_Call(self, stmt: Call) -> Call | None:
211
265
  changed = False
212
266
 
213
- new_target = self._replace_use_expr(stmt.target)
267
+ new_target = self._replace_use_expr(stmt.target) if not isinstance(stmt.target, str) else None
214
268
  new_ret_expr = (
215
269
  self._replace_def_expr(self.block.addr, self.block.idx, self.stmt_idx, stmt.ret_expr)
216
270
  if stmt.ret_expr is not None
@@ -275,7 +329,7 @@ class SimEngineSSARewriting(
275
329
  assert isinstance(dirty, DirtyExpression)
276
330
  return DirtyStatement(stmt.idx, dirty, **stmt.tags)
277
331
 
278
- def _handle_expr_Register(self, expr: Register) -> VirtualVariable | None:
332
+ def _handle_expr_Register(self, expr: Register) -> VirtualVariable | Expression | None:
279
333
  return self._replace_use_reg(expr)
280
334
 
281
335
  def _handle_expr_Tmp(self, expr: Tmp) -> VirtualVariable | None:
@@ -460,9 +514,9 @@ class SimEngineSSARewriting(
460
514
  # Expression replacement
461
515
  #
462
516
 
463
- def _reg_update_expr(
517
+ def _partial_update_expr(
464
518
  self,
465
- existing_vvar: VirtualVariable,
519
+ existing_vvar: Expression,
466
520
  base_offset: int,
467
521
  base_size: int,
468
522
  new_vvar: VirtualVariable,
@@ -546,7 +600,7 @@ class SimEngineSSARewriting(
546
600
  """
547
601
 
548
602
  # get the virtual variable ID
549
- vvid = self.get_vvid_by_def(block_addr, block_idx, stmt_idx, expr)
603
+ vvid = self.get_vvid_by_def(block_addr, block_idx, stmt_idx, atoms.Register(expr.reg_offset, expr.size), "s")
550
604
  return VirtualVariable(
551
605
  expr.idx,
552
606
  vvid,
@@ -578,32 +632,51 @@ class SimEngineSSARewriting(
578
632
  )
579
633
  self.state.registers[base_off][base_size] = vvar
580
634
  return vvar
581
- return self.state.registers[base_off][base_size]
635
+ existing_var = self.state.registers[base_off][base_size]
636
+ assert existing_var is not None
637
+ return existing_var
638
+
639
+ def _get_stack_var_full_size(self, stmt: Store) -> int | None:
640
+ if (
641
+ isinstance(stmt.addr, StackBaseOffset)
642
+ and isinstance(stmt.addr.offset, int)
643
+ and stmt.addr.offset in self.stackvar_locs
644
+ and stmt.size in self.stackvar_locs[stmt.addr.offset]
645
+ ):
646
+ return max(self.stackvar_locs[stmt.addr.offset])
647
+ return None
582
648
 
583
649
  def _replace_def_store(
584
- self, block_addr: int, block_idx: int | None, stmt_idx: int, stmt: Store
650
+ self, block_addr: int, block_idx: int | None, stmt_idx: int, stmt: Store, force_size: int | None = None
585
651
  ) -> VirtualVariable | None:
586
652
  if (
587
653
  isinstance(stmt.addr, StackBaseOffset)
588
654
  and isinstance(stmt.addr.offset, int)
589
655
  and stmt.addr.offset in self.stackvar_locs
590
- and stmt.size == self.stackvar_locs[stmt.addr.offset]
656
+ and stmt.size in self.stackvar_locs[stmt.addr.offset]
591
657
  ):
592
- vvar_id = self.get_vvid_by_def(block_addr, block_idx, stmt_idx, stmt)
658
+ size = stmt.size if force_size is None else force_size
659
+ vvar_id = self.get_vvid_by_def(
660
+ block_addr,
661
+ block_idx,
662
+ stmt_idx,
663
+ atoms.MemoryLocation(stmt.addr.offset, size, Endness(stmt.endness)),
664
+ "s",
665
+ )
593
666
  vvar = VirtualVariable(
594
667
  self.ail_manager.next_atom(),
595
668
  vvar_id,
596
- stmt.size * self.arch.byte_width,
669
+ size * self.arch.byte_width,
597
670
  category=VirtualVariableCategory.STACK,
598
671
  oident=stmt.addr.offset,
599
672
  **stmt.tags,
600
673
  )
601
- self.state.stackvars[stmt.addr.offset][stmt.size] = vvar
674
+ self.state.stackvars[stmt.addr.offset][size] = vvar
602
675
  return vvar
603
676
  return None
604
677
 
605
678
  def _replace_def_tmp(self, block_addr: int, block_idx: int | None, stmt_idx: int, expr: Tmp) -> VirtualVariable:
606
- vvid = self.get_vvid_by_def(block_addr, block_idx, stmt_idx, expr)
679
+ vvid = self.get_vvid_by_def(block_addr, block_idx, stmt_idx, atoms.Tmp(expr.tmp_idx, expr.size), "s")
607
680
  vvar = VirtualVariable(
608
681
  expr.idx,
609
682
  vvid,
@@ -615,7 +688,7 @@ class SimEngineSSARewriting(
615
688
  self.state.tmps[expr.tmp_idx] = vvar
616
689
  return vvar
617
690
 
618
- def _replace_use_expr(self, thing: Expression | Statement) -> VirtualVariable | None:
691
+ def _replace_use_expr(self, thing: Expression | Statement) -> VirtualVariable | Expression | None:
619
692
  """
620
693
  Return a new virtual variable for the given defined expression.
621
694
  """
@@ -670,7 +743,7 @@ class SimEngineSSARewriting(
670
743
  elif reg_expr.size > existing_size:
671
744
  # part of the variable exists... maybe it's a parameter?
672
745
  vvar = self.state.registers[reg_expr.reg_offset][existing_size]
673
- if vvar.category == VirtualVariableCategory.PARAMETER:
746
+ if vvar is not None and vvar.category == VirtualVariableCategory.PARAMETER:
674
747
  # just zero-extend it
675
748
  return Convert(
676
749
  self.ail_manager.next_atom(),
@@ -698,7 +771,7 @@ class SimEngineSSARewriting(
698
771
  shift_amount = Const(
699
772
  self.ail_manager.next_atom(),
700
773
  None,
701
- (reg_expr.reg_offset - vvar.oident) * self.arch.byte_width,
774
+ (reg_expr.reg_offset - vvar.reg_offset) * self.arch.byte_width,
702
775
  8,
703
776
  **reg_expr.tags,
704
777
  )
@@ -729,11 +802,17 @@ class SimEngineSSARewriting(
729
802
  isinstance(expr.addr, StackBaseOffset)
730
803
  and isinstance(expr.addr.offset, int)
731
804
  and expr.addr.offset in self.stackvar_locs
732
- and expr.size == self.stackvar_locs[expr.addr.offset]
805
+ and expr.size in self.stackvar_locs[expr.addr.offset]
733
806
  ):
734
807
  if expr.size not in self.state.stackvars[expr.addr.offset]:
735
808
  # create it on the fly
736
- vvar_id = self.get_vvid_by_def(self.block.addr, self.block.idx, self.stmt_idx, expr)
809
+ vvar_id = self.get_vvid_by_def(
810
+ self.block.addr,
811
+ self.block.idx,
812
+ self.stmt_idx,
813
+ atoms.MemoryLocation(expr.addr.offset, expr.size, Endness(expr.endness)),
814
+ "l",
815
+ )
737
816
  return VirtualVariable(
738
817
  self.ail_manager.next_atom(),
739
818
  vvar_id,
@@ -783,9 +862,9 @@ class SimEngineSSARewriting(
783
862
  #
784
863
 
785
864
  def get_vvid_by_def(
786
- self, block_addr: int, block_idx: int | None, stmt_idx: int, thing: Expression | Statement
865
+ self, block_addr: int, block_idx: int | None, stmt_idx: int, thing: DefExprType, access_type: AT
787
866
  ) -> int:
788
- key = block_addr, block_idx, stmt_idx, thing
867
+ key = block_addr, block_idx, stmt_idx, thing, access_type
789
868
  if key in self.def_to_vvid:
790
869
  return self.def_to_vvid[key]
791
870
  vvid = self.next_vvar_id()
@@ -802,6 +881,21 @@ class SimEngineSSARewriting(
802
881
  else:
803
882
  del self.state.registers[off]
804
883
 
884
+ def _clear_overlapping_stackvars(self, stack_offset: int, size: int, remove_base_stackvar: bool = True) -> None:
885
+ for off in range(stack_offset, stack_offset + size):
886
+ if off in self.state.stackvars:
887
+ if (
888
+ not remove_base_stackvar
889
+ and off in self.stackvar_locs
890
+ and off == stack_offset
891
+ and (base_size := max(self.stackvar_locs[off])) == size
892
+ and base_size in self.state.stackvars[off]
893
+ ):
894
+ if len(self.state.stackvars[off]) > 1:
895
+ self.state.stackvars[off] = {base_size: self.state.stackvars[off][base_size]}
896
+ else:
897
+ del self.state.stackvars[off]
898
+
805
899
  def _unreachable(self, *args, **kwargs):
806
900
  assert False
807
901