angr 9.2.142__py3-none-manylinux2014_x86_64.whl → 9.2.144__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (61) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +22 -10
  3. angr/analyses/calling_convention/fact_collector.py +72 -14
  4. angr/analyses/cfg/cfg_base.py +7 -2
  5. angr/analyses/cfg/cfg_emulated.py +13 -4
  6. angr/analyses/cfg/cfg_fast.py +21 -60
  7. angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
  8. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
  9. angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
  10. angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
  11. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -102
  12. angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
  13. angr/analyses/complete_calling_conventions.py +18 -5
  14. angr/analyses/decompiler/ail_simplifier.py +95 -65
  15. angr/analyses/decompiler/clinic.py +162 -68
  16. angr/analyses/decompiler/decompiler.py +4 -4
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +49 -14
  19. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
  21. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
  22. angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
  23. angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
  24. angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
  25. angr/analyses/decompiler/sequence_walker.py +8 -0
  26. angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
  27. angr/analyses/decompiler/ssailification/ssailification.py +10 -2
  28. angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
  29. angr/analyses/decompiler/structured_codegen/c.py +25 -4
  30. angr/analyses/decompiler/utils.py +13 -0
  31. angr/analyses/disassembly.py +3 -3
  32. angr/analyses/fcp/fcp.py +1 -4
  33. angr/analyses/s_propagator.py +40 -29
  34. angr/analyses/s_reaching_definitions/s_rda_model.py +45 -36
  35. angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
  36. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +41 -42
  37. angr/analyses/typehoon/dfa.py +13 -3
  38. angr/analyses/typehoon/typehoon.py +60 -18
  39. angr/analyses/typehoon/typevars.py +11 -7
  40. angr/analyses/variable_recovery/engine_ail.py +19 -23
  41. angr/analyses/variable_recovery/engine_base.py +26 -30
  42. angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
  43. angr/calling_conventions.py +18 -8
  44. angr/knowledge_plugins/functions/function.py +29 -15
  45. angr/knowledge_plugins/key_definitions/constants.py +2 -2
  46. angr/knowledge_plugins/key_definitions/liveness.py +4 -4
  47. angr/lib/angr_native.so +0 -0
  48. angr/procedures/definitions/linux_kernel.py +5 -0
  49. angr/state_plugins/unicorn_engine.py +24 -8
  50. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
  51. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
  52. angr/utils/doms.py +40 -33
  53. angr/utils/graph.py +26 -20
  54. angr/utils/ssa/__init__.py +21 -14
  55. angr/utils/ssa/vvar_uses_collector.py +2 -2
  56. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/METADATA +11 -8
  57. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/RECORD +61 -58
  58. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/WHEEL +1 -1
  59. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/LICENSE +0 -0
  60. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/entry_points.txt +0 -0
  61. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,37 @@
1
+ # pylint:disable=no-self-use,too-many-boolean-expressions
2
+ from __future__ import annotations
3
+ from ailment.expression import BinaryOp, Const
4
+
5
+ from .base import PeepholeOptimizationExprBase
6
+
7
+
8
+ class ASubAShrConstShrConst(PeepholeOptimizationExprBase):
9
+ """
10
+ Convert `cdq; sub eax, edx; sar eax, 1` to `eax /= 2`.
11
+ """
12
+
13
+ __slots__ = ()
14
+
15
+ NAME = "(a - (a >> 31)) >> N => a / 2 ** N (signed)"
16
+ expr_classes = (BinaryOp,)
17
+
18
+ def optimize(self, expr: BinaryOp, **kwargs):
19
+ if (
20
+ expr.op == "Sar"
21
+ and len(expr.operands) == 2
22
+ and isinstance(expr.operands[1], Const)
23
+ and expr.operands[1].is_int
24
+ and isinstance(expr.operands[0], BinaryOp)
25
+ and expr.operands[0].op == "Sub"
26
+ ):
27
+ a0, a1 = expr.operands[0].operands
28
+ if (
29
+ isinstance(a1, BinaryOp)
30
+ and a1.op == "Sar"
31
+ and isinstance(a1.operands[1], Const)
32
+ and a1.operands[1].value == 31
33
+ and a0.likes(a1.operands[0])
34
+ ):
35
+ dividend = 2 ** expr.operands[1].value
36
+ return BinaryOp(a0.idx, "Div", [a0, Const(None, None, dividend, expr.bits)], True, **expr.tags)
37
+ return None
@@ -1,3 +1,4 @@
1
+ # pylint:disable=too-many-boolean-expressions
1
2
  from __future__ import annotations
2
3
  from ailment.expression import BinaryOp, Const, Load
3
4
 
@@ -20,10 +21,23 @@ class SimplifyPcRelativeLoads(PeepholeOptimizationExprBase):
20
21
  if expr.op == "Add" and len(expr.operands) == 2 and isinstance(expr.operands[0], Load):
21
22
  op0, op1 = expr.operands
22
23
 
24
+ assert self.project is not None
25
+ if not hasattr(expr, "ins_addr"):
26
+ return expr
27
+ assert expr.ins_addr is not None
28
+
23
29
  # check if op1 is PC
24
- if isinstance(op1, Const) and hasattr(expr, "ins_addr") and is_pc(self.project, expr.ins_addr, op1.value):
30
+ if (
31
+ isinstance(op1, Const)
32
+ and op1.is_int
33
+ and hasattr(expr, "ins_addr")
34
+ and is_pc(self.project, expr.ins_addr, op1.value) # type: ignore
35
+ and isinstance(op0.addr, Const)
36
+ and op0.addr.is_int
37
+ ):
25
38
  # check if op0.addr points to a read-only section
26
39
  addr = op0.addr.value
40
+ assert isinstance(addr, int)
27
41
  if is_in_readonly_section(self.project, addr) or is_in_readonly_segment(self.project, addr):
28
42
  # found it!
29
43
  # do the load first
@@ -186,6 +186,14 @@ class SequenceWalker:
186
186
  new_condition = (
187
187
  self._handle(node.condition, parent=node, label="condition") if node.condition is not None else None
188
188
  )
189
+
190
+ # note that initializer and iterator are both statements, so they can return empty tuples
191
+ # TODO: Handle the case where multiple statements are returned
192
+ if new_initializer == ():
193
+ new_initializer = None
194
+ if new_iterator == ():
195
+ new_iterator = None
196
+
189
197
  seq_node = self._handle(node.sequence_node, parent=node, label="body", index=0)
190
198
  if seq_node is not None or new_initializer is not None or new_iterator is not None or new_condition is not None:
191
199
  return LoopNode(
@@ -698,6 +698,8 @@ class SimEngineSSARewriting(
698
698
  raise NotImplementedError("Store expressions are not supported in _replace_use_expr.")
699
699
  if isinstance(thing, Tmp) and self.rewrite_tmps:
700
700
  return self._replace_use_tmp(self.block.addr, self.block.idx, self.stmt_idx, thing)
701
+ if isinstance(thing, Load):
702
+ return self._replace_use_load(thing)
701
703
  return None
702
704
 
703
705
  def _replace_use_reg(self, reg_expr: Register) -> VirtualVariable | Expression:
@@ -5,7 +5,15 @@ from collections import defaultdict
5
5
  from itertools import count
6
6
  from bisect import bisect_left
7
7
 
8
- from ailment.expression import Expression, Register, StackBaseOffset, Tmp, VirtualVariable, VirtualVariableCategory
8
+ from ailment.expression import (
9
+ Expression,
10
+ Register,
11
+ StackBaseOffset,
12
+ Tmp,
13
+ VirtualVariable,
14
+ VirtualVariableCategory,
15
+ Load,
16
+ )
9
17
  from ailment.statement import Statement, Store
10
18
 
11
19
  from angr.knowledge_plugins.functions import Function
@@ -151,7 +159,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
151
159
  reg_bits = def_.size * self.project.arch.byte_width
152
160
  udef_to_defs[("reg", def_.reg_offset, reg_bits)].add(def_)
153
161
  udef_to_blockkeys[("reg", def_.reg_offset, reg_bits)].add((loc.block_addr, loc.block_idx))
154
- elif isinstance(def_, Store):
162
+ elif isinstance(def_, (Store, Load)):
155
163
  if isinstance(def_.addr, StackBaseOffset) and isinstance(def_.addr.offset, int):
156
164
  idx_begin = bisect_left(sorted_stackvar_offs, def_.addr.offset)
157
165
  for i in range(idx_begin, len(sorted_stackvar_offs)):
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
  from collections import OrderedDict
3
3
 
4
4
  from ailment.statement import Call, Store, ConditionalJump
5
- from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression, Tmp, DirtyExpression
5
+ from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression, Tmp, DirtyExpression, Load
6
6
 
7
7
  from angr.engines.light import SimEngineLightAIL
8
8
  from angr.project import Project
@@ -133,6 +133,22 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
133
133
 
134
134
  self.state.live_registers.add(base_offset)
135
135
 
136
+ def _handle_expr_Load(self, expr: Load):
137
+ self._expr(expr.addr)
138
+ if (
139
+ self.stackvars
140
+ and isinstance(expr.addr, StackBaseOffset)
141
+ and isinstance(expr.addr.offset, int)
142
+ and (expr.addr.offset, expr.size) not in self.state.live_stackvars
143
+ ):
144
+ # we must create this stack variable on the fly; we did not see its creation before it is first used
145
+ codeloc = self._codeloc()
146
+ self.def_to_loc.append((expr, codeloc))
147
+ if codeloc not in self.loc_to_defs:
148
+ self.loc_to_defs[codeloc] = OrderedSet()
149
+ self.loc_to_defs[codeloc].add(expr)
150
+ self.state.live_stackvars.add((expr.addr.offset, expr.size))
151
+
136
152
  def _handle_expr_Tmp(self, expr: Tmp):
137
153
  if self.use_tmps:
138
154
  codeloc = self._codeloc()
@@ -251,7 +267,6 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
251
267
 
252
268
  _handle_expr_VirtualVariable = _handle_Dummy
253
269
  _handle_expr_Phi = _handle_Dummy
254
- _handle_expr_Load = _handle_Dummy
255
270
  _handle_expr_Const = _handle_Dummy
256
271
  _handle_expr_MultiStatementExpression = _handle_Dummy
257
272
  _handle_expr_StackBaseOffset = _handle_Dummy
@@ -3426,8 +3426,13 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3426
3426
  return old_ty
3427
3427
 
3428
3428
  if expr.variable is not None:
3429
- cvar = self._variable(expr.variable, expr.size)
3430
- offset = expr.variable_offset or 0
3429
+ if "struct_member_info" in expr.tags:
3430
+ offset, var, _ = expr.struct_member_info
3431
+ cvar = self._variable(var, var.size)
3432
+ else:
3433
+ cvar = self._variable(expr.variable, expr.size)
3434
+ offset = expr.variable_offset or 0
3435
+
3431
3436
  assert type(offset) is int # I refuse to deal with the alternative
3432
3437
  return self._access_constant_offset(CUnaryOp("Reference", cvar, codegen=self), offset, ty, False, negotiate)
3433
3438
 
@@ -3649,8 +3654,24 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3649
3654
  return CMultiStatementExpression(cstmts, cexpr, tags=expr.tags, codegen=self)
3650
3655
 
3651
3656
  def _handle_VirtualVariable(self, expr: Expr.VirtualVariable, **kwargs):
3652
- if expr.variable:
3653
- cvar = self._variable(expr.variable, None, vvar_id=expr.varid)
3657
+ def negotiate(old_ty: SimType, proposed_ty: SimType) -> SimType:
3658
+ # we do not allow returning a struct for a primitive type
3659
+ if old_ty.size == proposed_ty.size and (
3660
+ not isinstance(proposed_ty, SimStruct) or isinstance(old_ty, SimStruct)
3661
+ ):
3662
+ return proposed_ty
3663
+ return old_ty
3664
+
3665
+ if expr.variable is not None:
3666
+ if "struct_member_info" in expr.tags:
3667
+ offset, var, _ = expr.struct_member_info
3668
+ cbasevar = self._variable(var, expr.size)
3669
+ cvar = self._access_constant_offset(
3670
+ self._get_variable_reference(cbasevar), offset, cbasevar.type, False, negotiate
3671
+ )
3672
+ else:
3673
+ cvar = self._variable(expr.variable, None, vvar_id=expr.varid)
3674
+
3654
3675
  if expr.variable.size != expr.size:
3655
3676
  l.warning(
3656
3677
  "VirtualVariable size (%d) and variable size (%d) do not match. Force a type cast.",
@@ -214,6 +214,19 @@ def switch_extract_switch_expr_from_jump_target(target: ailment.Expr.Expression)
214
214
  target = target.operands[0]
215
215
  else:
216
216
  return None
217
+ elif target.op == "And":
218
+ # it must be and-ing the target expr with a constant
219
+ if (
220
+ isinstance(target.operands[1], ailment.Expr.VirtualVariable)
221
+ and isinstance(target.operands[0], ailment.Expr.Const)
222
+ ) or (
223
+ isinstance(target.operands[0], ailment.Expr.VirtualVariable)
224
+ and isinstance(target.operands[1], ailment.Expr.Const)
225
+ ):
226
+ break
227
+ return None
228
+ else:
229
+ return None
217
230
  elif isinstance(target, ailment.Expr.Load):
218
231
  # we want the address!
219
232
  found_load = True
@@ -4,7 +4,7 @@ import contextlib
4
4
  import logging
5
5
  from collections import defaultdict
6
6
  from collections.abc import Sequence
7
- from typing import Union, Any
7
+ from typing import Any
8
8
 
9
9
  import pyvex
10
10
  import archinfo
@@ -24,8 +24,8 @@ try:
24
24
  from angr.engines import pcode
25
25
  import pypcode
26
26
 
27
- IRSBType = Union[pyvex.IRSB, pcode.lifter.IRSB]
28
- IROpObjType = Union[pyvex.stmt.IRStmt, pypcode.PcodeOp]
27
+ IRSBType = pyvex.IRSB | pcode.lifter.IRSB
28
+ IROpObjType = pyvex.stmt.IRStmt | pypcode.PcodeOp
29
29
  except ImportError:
30
30
  pcode = None
31
31
  IRSBType = pyvex.IRSB
angr/analyses/fcp/fcp.py CHANGED
@@ -407,10 +407,7 @@ class FastConstantPropagation(Analysis):
407
407
  except (TypeError, ValueError):
408
408
  arg_locs = None
409
409
 
410
- if None in arg_locs:
411
- arg_locs = None
412
-
413
- if arg_locs is not None:
410
+ if arg_locs is not None and None not in arg_locs:
414
411
  for arg_loc in arg_locs:
415
412
  for loc in arg_loc.get_footprint():
416
413
  if isinstance(loc, SimStackArg):
@@ -26,6 +26,7 @@ from angr.utils.ssa import (
26
26
  get_vvar_deflocs,
27
27
  has_ite_expr,
28
28
  has_ite_stmt,
29
+ has_tmp_expr,
29
30
  is_phi_assignment,
30
31
  is_const_assignment,
31
32
  is_const_and_vvar_assignment,
@@ -126,7 +127,7 @@ class SPropagatorAnalysis(Analysis):
126
127
  # update vvar_deflocs using function arguments
127
128
  if self.func_args:
128
129
  for func_arg in self.func_args:
129
- vvar_deflocs[func_arg] = ExternalCodeLocation()
130
+ vvar_deflocs[func_arg.varid] = func_arg, ExternalCodeLocation()
130
131
 
131
132
  # find all ret sites and indirect jump sites
132
133
  retsites: set[tuple[int, int | None, int]] = set()
@@ -143,11 +144,11 @@ class SPropagatorAnalysis(Analysis):
143
144
  # find constant and other propagatable assignments
144
145
  vvarid_to_vvar = {}
145
146
  const_vvars: dict[int, Const] = {}
146
- for vvar, defloc in vvar_deflocs.items():
147
+ for vvar_id, (vvar, defloc) in vvar_deflocs.items():
147
148
  if not vvar.was_reg and not vvar.was_parameter:
148
149
  continue
149
150
 
150
- vvarid_to_vvar[vvar.varid] = vvar
151
+ vvarid_to_vvar[vvar_id] = vvar
151
152
  if isinstance(defloc, ExternalCodeLocation):
152
153
  continue
153
154
 
@@ -160,8 +161,8 @@ class SPropagatorAnalysis(Analysis):
160
161
  if r:
161
162
  # replace wherever it's used
162
163
  assert v is not None
163
- const_vvars[vvar.varid] = v
164
- for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
164
+ const_vvars[vvar_id] = v
165
+ for vvar_at_use, useloc in vvar_uselocs[vvar_id]:
165
166
  replacements[useloc][vvar_at_use] = v
166
167
  continue
167
168
 
@@ -189,10 +190,10 @@ class SPropagatorAnalysis(Analysis):
189
190
  if self.mode == "function":
190
191
  assert self.func_graph is not None
191
192
 
192
- for vvar, defloc in vvar_deflocs.items():
193
- if vvar.varid not in vvar_uselocs:
193
+ for vvar_id, (vvar, defloc) in vvar_deflocs.items():
194
+ if vvar_id not in vvar_uselocs:
194
195
  continue
195
- if vvar.varid in const_vvars:
196
+ if vvar_id in const_vvars:
196
197
  continue
197
198
  if isinstance(defloc, ExternalCodeLocation):
198
199
  continue
@@ -200,11 +201,13 @@ class SPropagatorAnalysis(Analysis):
200
201
  assert defloc.block_addr is not None
201
202
  assert defloc.stmt_idx is not None
202
203
 
204
+ vvar_uselocs_set = set(vvar_uselocs[vvar_id]) # deduplicate
205
+
203
206
  block = blocks[(defloc.block_addr, defloc.block_idx)]
204
207
  stmt = block.statements[defloc.stmt_idx]
205
208
  if (
206
209
  (vvar.was_reg or vvar.was_parameter)
207
- and len(vvar_uselocs[vvar.varid]) <= 2
210
+ and len(vvar_uselocs_set) <= 2
208
211
  and isinstance(stmt, Assignment)
209
212
  and isinstance(stmt.src, Load)
210
213
  ):
@@ -215,43 +218,46 @@ class SPropagatorAnalysis(Analysis):
215
218
  # v1 = v0 + 1;
216
219
  # }
217
220
  can_replace = True
218
- for _, vvar_useloc in vvar_uselocs[vvar.varid]:
221
+ for _, vvar_useloc in vvar_uselocs_set:
219
222
  if has_store_stmt_in_between_stmts(self.func_graph, blocks, defloc, vvar_useloc):
220
223
  can_replace = False
221
224
 
222
225
  if can_replace:
223
226
  # we can propagate this load because there is no store between its def and use
224
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
227
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
225
228
  replacements[vvar_useloc][vvar_used] = stmt.src
226
229
  continue
227
230
 
228
231
  if (
229
232
  (vvar.was_reg or vvar.was_stack)
230
- and len(vvar_uselocs[vvar.varid]) == 2
233
+ and len(vvar_uselocs_set) == 2
234
+ and isinstance(stmt, Assignment)
231
235
  and not is_phi_assignment(stmt)
232
236
  ):
233
237
  # a special case: in a typical switch-case construct, a variable may be used once for comparison
234
238
  # for the default case and then used again for constructing the jump target. we can propagate this
235
239
  # variable for such cases.
236
- uselocs = {loc for _, loc in vvar_uselocs[vvar.varid]}
237
- if self.is_vvar_used_for_addr_loading_switch_case(uselocs, blocks):
238
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
240
+ uselocs = {loc for _, loc in vvar_uselocs_set}
241
+ if self.is_vvar_used_for_addr_loading_switch_case(uselocs, blocks) and not has_tmp_expr(stmt.src):
242
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
239
243
  replacements[vvar_useloc][vvar_used] = stmt.src
240
244
  # mark the vvar as dead and should be removed
241
245
  self.model.dead_vvar_ids.add(vvar.varid)
242
246
  continue
243
247
 
244
248
  if vvar.was_reg or vvar.was_parameter:
245
- if len(vvar_uselocs[vvar.varid]) == 1:
246
- vvar_used, vvar_useloc = next(iter(vvar_uselocs[vvar.varid]))
247
- if is_const_vvar_load_assignment(stmt) and not has_store_stmt_in_between_stmts(
248
- self.func_graph, blocks, defloc, vvar_useloc
249
+ if len(vvar_uselocs_set) == 1:
250
+ vvar_used, vvar_useloc = next(iter(vvar_uselocs_set))
251
+ if (
252
+ is_const_vvar_load_assignment(stmt)
253
+ and not has_store_stmt_in_between_stmts(self.func_graph, blocks, defloc, vvar_useloc)
254
+ and not has_tmp_expr(stmt.src)
249
255
  ):
250
256
  # we can propagate this load because there is no store between its def and use
251
257
  replacements[vvar_useloc][vvar_used] = stmt.src
252
258
  continue
253
259
 
254
- if is_const_and_vvar_assignment(stmt):
260
+ if is_const_and_vvar_assignment(stmt) and not has_tmp_expr(stmt.src):
255
261
  # if the useloc is a phi assignment statement, ensure that stmt.src is the same as the phi
256
262
  # variable
257
263
  assert vvar_useloc.block_addr is not None
@@ -273,18 +279,22 @@ class SPropagatorAnalysis(Analysis):
273
279
  else:
274
280
  non_exitsite_uselocs = [
275
281
  loc
276
- for _, loc in vvar_uselocs[vvar.varid]
282
+ for _, loc in vvar_uselocs_set
277
283
  if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
278
284
  ]
279
285
  if is_const_and_vvar_assignment(stmt):
280
286
  if len(non_exitsite_uselocs) == 1:
281
287
  # this vvar is used once if we exclude its uses at ret sites or jump sites. we can
282
288
  # propagate it
283
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
289
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
284
290
  replacements[vvar_useloc][vvar_used] = stmt.src
285
291
  continue
286
292
 
287
- if len(set(non_exitsite_uselocs)) == 1 and not has_ite_expr(stmt.src):
293
+ if (
294
+ len(set(non_exitsite_uselocs)) == 1
295
+ and not has_ite_expr(stmt.src)
296
+ and not has_tmp_expr(stmt.src)
297
+ ):
288
298
  useloc = non_exitsite_uselocs[0]
289
299
  assert useloc.block_addr is not None
290
300
  assert useloc.stmt_idx is not None
@@ -292,13 +302,13 @@ class SPropagatorAnalysis(Analysis):
292
302
  if stmt.src.depth <= 3 and not has_ite_stmt(useloc_stmt):
293
303
  # remove duplicate use locs (e.g., if the variable is used multiple times by the
294
304
  # same statement) - but ensure stmt is simple enough
295
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
305
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
296
306
  replacements[vvar_useloc][vvar_used] = stmt.src
297
307
  continue
298
308
 
299
309
  # special logic for global variables: if it's used once or multiple times, and the variable is never
300
310
  # updated before it's used, we will propagate the load
301
- if (vvar.was_reg or vvar.was_parameter) and isinstance(stmt, Assignment):
311
+ if (vvar.was_reg or vvar.was_parameter) and isinstance(stmt, Assignment) and not has_tmp_expr(stmt.src):
302
312
  stmt_src = stmt.src
303
313
  # unpack conversions
304
314
  while isinstance(stmt_src, Convert):
@@ -309,7 +319,7 @@ class SPropagatorAnalysis(Analysis):
309
319
  and isinstance(stmt_src.addr.value, int)
310
320
  ):
311
321
  gv_updated = False
312
- for _vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
322
+ for _vvar_used, vvar_useloc in vvar_uselocs_set:
313
323
  gv_updated |= self.is_global_variable_updated(
314
324
  self.func_graph,
315
325
  blocks,
@@ -320,12 +330,13 @@ class SPropagatorAnalysis(Analysis):
320
330
  vvar_useloc,
321
331
  )
322
332
  if not gv_updated:
323
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
333
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
324
334
  replacements[vvar_useloc][vvar_used] = stmt.src
325
335
  continue
326
336
 
327
337
  for vvar_id, uselocs in vvar_uselocs.items():
328
338
  vvar = next(iter(uselocs))[0] if vvar_id not in vvarid_to_vvar else vvarid_to_vvar[vvar_id]
339
+ vvar_uselocs_set = set(uselocs) # deduplicate
329
340
 
330
341
  if self._sp_tracker is not None and vvar.category == VirtualVariableCategory.REGISTER:
331
342
  if vvar.oident == self.project.arch.sp_offset:
@@ -334,7 +345,7 @@ class SPropagatorAnalysis(Analysis):
334
345
  if "sp" in self.project.arch.registers
335
346
  else None
336
347
  )
337
- for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
348
+ for vvar_at_use, useloc in vvar_uselocs_set:
338
349
  sb_offset = self._sp_tracker.offset_before(useloc.ins_addr, self.project.arch.sp_offset)
339
350
  if sb_offset is not None:
340
351
  v = StackBaseOffset(None, self.project.arch.bits, sb_offset)
@@ -349,7 +360,7 @@ class SPropagatorAnalysis(Analysis):
349
360
  if "bp" in self.project.arch.registers
350
361
  else None
351
362
  )
352
- for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
363
+ for vvar_at_use, useloc in vvar_uselocs_set:
353
364
  sb_offset = self._sp_tracker.offset_before(useloc.ins_addr, self.project.arch.bp_offset)
354
365
  if sb_offset is not None:
355
366
  v = StackBaseOffset(None, self.project.arch.bits, sb_offset)
@@ -20,26 +20,35 @@ class SRDAModel:
20
20
  self.func_args = func_args
21
21
  self.arch = arch
22
22
  self.varid_to_vvar: dict[int, VirtualVariable] = {}
23
- self.all_vvar_definitions: dict[VirtualVariable, CodeLocation] = {}
24
- self.all_vvar_uses: dict[VirtualVariable, set[tuple[VirtualVariable | None, CodeLocation]]] = defaultdict(set)
23
+ self.all_vvar_definitions: dict[int, CodeLocation] = {}
24
+ self.all_vvar_uses: dict[int, list[tuple[VirtualVariable | None, CodeLocation]]] = defaultdict(list)
25
25
  self.all_tmp_definitions: dict[CodeLocation, dict[atoms.Tmp, int]] = defaultdict(dict)
26
26
  self.all_tmp_uses: dict[CodeLocation, dict[atoms.Tmp, set[tuple[Tmp, int]]]] = defaultdict(dict)
27
27
  self.phi_vvar_ids: set[int] = set()
28
28
  self.phivarid_to_varids: dict[int, set[int]] = {}
29
+ self.vvar_uses_by_loc: dict[CodeLocation, list[int]] = {}
30
+
31
+ def add_vvar_use(self, vvar_id: int, expr: VirtualVariable | None, loc: CodeLocation) -> None:
32
+ self.all_vvar_uses[vvar_id].append((expr, loc))
33
+ if loc not in self.vvar_uses_by_loc:
34
+ self.vvar_uses_by_loc[loc] = []
35
+ self.vvar_uses_by_loc[loc].append(vvar_id)
29
36
 
30
37
  @property
31
38
  def all_definitions(self) -> Generator[Definition]:
32
- for vvar, defloc in self.all_vvar_definitions.items():
33
- yield Definition(atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident), defloc)
39
+ for vvar_id, defloc in self.all_vvar_definitions.items():
40
+ vvar = self.varid_to_vvar[vvar_id]
41
+ yield Definition(atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident), defloc)
34
42
 
35
43
  def is_phi_vvar_id(self, idx: int) -> bool:
36
44
  return idx in self.phi_vvar_ids
37
45
 
38
46
  def get_all_definitions(self, block_loc: CodeLocation) -> set[Definition]:
39
47
  s = set()
40
- for vvar, codeloc in self.all_vvar_definitions.items():
48
+ for vvar_id, codeloc in self.all_vvar_definitions.items():
49
+ vvar = self.varid_to_vvar[vvar_id]
41
50
  if codeloc.block_addr == block_loc.block_addr and codeloc.block_idx == block_loc.block_idx:
42
- s.add(Definition(atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident), codeloc))
51
+ s.add(Definition(atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident), codeloc))
43
52
  return s | self.get_all_tmp_definitions(block_loc)
44
53
 
45
54
  def get_all_tmp_definitions(self, block_loc: CodeLocation) -> set[Definition]:
@@ -64,45 +73,45 @@ class SRDAModel:
64
73
  :return: A set of definitions that are used at the given location.
65
74
  """
66
75
  if exprs:
67
- defs: set[tuple[Definition, Any]] = set()
68
- for vvar, uses in self.all_vvar_uses.items():
69
- for expr, loc_ in uses:
70
- if loc_ == loc:
71
- defs.add(
72
- (
73
- Definition(
74
- atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident),
75
- self.all_vvar_definitions[vvar],
76
- ),
77
- expr,
78
- )
79
- )
80
- return defs
81
-
82
- defs: set[Definition] = set()
83
- for vvar, uses in self.all_vvar_uses.items():
84
- for _, loc_ in uses:
85
- if loc_ == loc:
86
- defs.add(
76
+ def_with_exprs: set[tuple[Definition, Any]] = set()
77
+ if loc not in self.vvar_uses_by_loc:
78
+ return def_with_exprs
79
+ for vvar_id in self.vvar_uses_by_loc[loc]:
80
+ vvar = self.varid_to_vvar[vvar_id]
81
+ def_with_exprs.add(
82
+ (
87
83
  Definition(
88
- atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident),
89
- self.all_vvar_definitions[vvar],
90
- )
84
+ atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident),
85
+ self.all_vvar_definitions[vvar_id],
86
+ ),
87
+ vvar,
91
88
  )
89
+ )
90
+ return def_with_exprs
91
+
92
+ defs: set[Definition] = set()
93
+ if loc not in self.vvar_uses_by_loc:
94
+ return defs
95
+ for vvar_id in self.vvar_uses_by_loc[loc]:
96
+ vvar = self.varid_to_vvar[vvar_id]
97
+ defs.add(
98
+ Definition(
99
+ atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident),
100
+ self.all_vvar_definitions[vvar_id],
101
+ )
102
+ )
92
103
  return defs
93
104
 
94
105
  def get_vvar_uses(self, obj: VirtualVariable | atoms.VirtualVariable) -> set[CodeLocation]:
95
- the_vvar = self.varid_to_vvar.get(obj.varid, None)
96
- if the_vvar is not None:
97
- return {loc for _, loc in self.all_vvar_uses[the_vvar]}
106
+ if obj.varid in self.all_vvar_uses:
107
+ return {loc for _, loc in self.all_vvar_uses[obj.varid]}
98
108
  return set()
99
109
 
100
110
  def get_vvar_uses_with_expr(
101
111
  self, obj: VirtualVariable | atoms.VirtualVariable
102
- ) -> set[tuple[CodeLocation, VirtualVariable]]:
103
- the_vvar = self.varid_to_vvar.get(obj.varid, None)
104
- if the_vvar is not None:
105
- return {(loc, expr) for expr, loc in self.all_vvar_uses[the_vvar]}
112
+ ) -> set[tuple[VirtualVariable | None, CodeLocation]]:
113
+ if obj.varid in self.all_vvar_uses:
114
+ return set(self.all_vvar_uses[obj.varid])
106
115
  return set()
107
116
 
108
117
  def get_tmp_uses(self, obj: atoms.Tmp, block_loc: CodeLocation) -> set[CodeLocation]:
@@ -185,7 +185,10 @@ class SRDAView:
185
185
  vvars.append(func_arg)
186
186
  # there might be multiple vvars; we prioritize the one whose size fits the best
187
187
  for v in vvars:
188
- if v.stack_offset == stack_offset and v.size == size:
188
+ if (
189
+ (v.was_stack and v.stack_offset == stack_offset)
190
+ or (v.was_parameter and v.parameter_stack_offset == stack_offset)
191
+ ) and v.size == size:
189
192
  return v
190
193
  return vvars[0] if vvars else None
191
194
 
@@ -239,9 +242,9 @@ class SRDAView:
239
242
  return vvars[0] if vvars else None
240
243
 
241
244
  def get_vvar_value(self, vvar: VirtualVariable) -> Expression | None:
242
- if vvar not in self.model.all_vvar_definitions:
245
+ if vvar.varid not in self.model.all_vvar_definitions:
243
246
  return None
244
- codeloc = self.model.all_vvar_definitions[vvar]
247
+ codeloc = self.model.all_vvar_definitions[vvar.varid]
245
248
 
246
249
  for block in self.model.func_graph:
247
250
  if block.addr == codeloc.block_addr and block.idx == codeloc.block_idx: