angr 9.2.143__py3-none-macosx_11_0_arm64.whl → 9.2.144__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (46) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +13 -1
  3. angr/analyses/calling_convention/fact_collector.py +41 -5
  4. angr/analyses/cfg/cfg_base.py +7 -2
  5. angr/analyses/cfg/cfg_emulated.py +13 -4
  6. angr/analyses/cfg/cfg_fast.py +21 -60
  7. angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
  8. angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
  9. angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
  10. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +2 -101
  11. angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
  12. angr/analyses/decompiler/ail_simplifier.py +5 -0
  13. angr/analyses/decompiler/clinic.py +162 -68
  14. angr/analyses/decompiler/decompiler.py +4 -4
  15. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
  16. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
  17. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
  18. angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
  19. angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
  20. angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
  21. angr/analyses/decompiler/ssailification/ssailification.py +10 -2
  22. angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
  23. angr/analyses/decompiler/structured_codegen/c.py +25 -4
  24. angr/analyses/disassembly.py +3 -3
  25. angr/analyses/fcp/fcp.py +1 -4
  26. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +21 -22
  27. angr/analyses/typehoon/dfa.py +13 -3
  28. angr/analyses/typehoon/typehoon.py +60 -18
  29. angr/analyses/typehoon/typevars.py +11 -7
  30. angr/analyses/variable_recovery/engine_ail.py +13 -17
  31. angr/analyses/variable_recovery/engine_base.py +26 -30
  32. angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
  33. angr/knowledge_plugins/functions/function.py +29 -15
  34. angr/knowledge_plugins/key_definitions/constants.py +2 -2
  35. angr/knowledge_plugins/key_definitions/liveness.py +4 -4
  36. angr/lib/angr_native.dylib +0 -0
  37. angr/state_plugins/unicorn_engine.py +24 -8
  38. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
  39. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
  40. angr/utils/graph.py +26 -20
  41. {angr-9.2.143.dist-info → angr-9.2.144.dist-info}/METADATA +11 -8
  42. {angr-9.2.143.dist-info → angr-9.2.144.dist-info}/RECORD +46 -43
  43. {angr-9.2.143.dist-info → angr-9.2.144.dist-info}/WHEEL +1 -1
  44. {angr-9.2.143.dist-info → angr-9.2.144.dist-info}/LICENSE +0 -0
  45. {angr-9.2.143.dist-info → angr-9.2.144.dist-info}/entry_points.txt +0 -0
  46. {angr-9.2.143.dist-info → angr-9.2.144.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,15 @@ from collections import defaultdict
5
5
  from itertools import count
6
6
  from bisect import bisect_left
7
7
 
8
- from ailment.expression import Expression, Register, StackBaseOffset, Tmp, VirtualVariable, VirtualVariableCategory
8
+ from ailment.expression import (
9
+ Expression,
10
+ Register,
11
+ StackBaseOffset,
12
+ Tmp,
13
+ VirtualVariable,
14
+ VirtualVariableCategory,
15
+ Load,
16
+ )
9
17
  from ailment.statement import Statement, Store
10
18
 
11
19
  from angr.knowledge_plugins.functions import Function
@@ -151,7 +159,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
151
159
  reg_bits = def_.size * self.project.arch.byte_width
152
160
  udef_to_defs[("reg", def_.reg_offset, reg_bits)].add(def_)
153
161
  udef_to_blockkeys[("reg", def_.reg_offset, reg_bits)].add((loc.block_addr, loc.block_idx))
154
- elif isinstance(def_, Store):
162
+ elif isinstance(def_, (Store, Load)):
155
163
  if isinstance(def_.addr, StackBaseOffset) and isinstance(def_.addr.offset, int):
156
164
  idx_begin = bisect_left(sorted_stackvar_offs, def_.addr.offset)
157
165
  for i in range(idx_begin, len(sorted_stackvar_offs)):
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
  from collections import OrderedDict
3
3
 
4
4
  from ailment.statement import Call, Store, ConditionalJump
5
- from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression, Tmp, DirtyExpression
5
+ from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression, Tmp, DirtyExpression, Load
6
6
 
7
7
  from angr.engines.light import SimEngineLightAIL
8
8
  from angr.project import Project
@@ -133,6 +133,22 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
133
133
 
134
134
  self.state.live_registers.add(base_offset)
135
135
 
136
+ def _handle_expr_Load(self, expr: Load):
137
+ self._expr(expr.addr)
138
+ if (
139
+ self.stackvars
140
+ and isinstance(expr.addr, StackBaseOffset)
141
+ and isinstance(expr.addr.offset, int)
142
+ and (expr.addr.offset, expr.size) not in self.state.live_stackvars
143
+ ):
144
+ # we must create this stack variable on the fly; we did not see its creation before it is first used
145
+ codeloc = self._codeloc()
146
+ self.def_to_loc.append((expr, codeloc))
147
+ if codeloc not in self.loc_to_defs:
148
+ self.loc_to_defs[codeloc] = OrderedSet()
149
+ self.loc_to_defs[codeloc].add(expr)
150
+ self.state.live_stackvars.add((expr.addr.offset, expr.size))
151
+
136
152
  def _handle_expr_Tmp(self, expr: Tmp):
137
153
  if self.use_tmps:
138
154
  codeloc = self._codeloc()
@@ -251,7 +267,6 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
251
267
 
252
268
  _handle_expr_VirtualVariable = _handle_Dummy
253
269
  _handle_expr_Phi = _handle_Dummy
254
- _handle_expr_Load = _handle_Dummy
255
270
  _handle_expr_Const = _handle_Dummy
256
271
  _handle_expr_MultiStatementExpression = _handle_Dummy
257
272
  _handle_expr_StackBaseOffset = _handle_Dummy
@@ -3426,8 +3426,13 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3426
3426
  return old_ty
3427
3427
 
3428
3428
  if expr.variable is not None:
3429
- cvar = self._variable(expr.variable, expr.size)
3430
- offset = expr.variable_offset or 0
3429
+ if "struct_member_info" in expr.tags:
3430
+ offset, var, _ = expr.struct_member_info
3431
+ cvar = self._variable(var, var.size)
3432
+ else:
3433
+ cvar = self._variable(expr.variable, expr.size)
3434
+ offset = expr.variable_offset or 0
3435
+
3431
3436
  assert type(offset) is int # I refuse to deal with the alternative
3432
3437
  return self._access_constant_offset(CUnaryOp("Reference", cvar, codegen=self), offset, ty, False, negotiate)
3433
3438
 
@@ -3649,8 +3654,24 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3649
3654
  return CMultiStatementExpression(cstmts, cexpr, tags=expr.tags, codegen=self)
3650
3655
 
3651
3656
  def _handle_VirtualVariable(self, expr: Expr.VirtualVariable, **kwargs):
3652
- if expr.variable:
3653
- cvar = self._variable(expr.variable, None, vvar_id=expr.varid)
3657
+ def negotiate(old_ty: SimType, proposed_ty: SimType) -> SimType:
3658
+ # we do not allow returning a struct for a primitive type
3659
+ if old_ty.size == proposed_ty.size and (
3660
+ not isinstance(proposed_ty, SimStruct) or isinstance(old_ty, SimStruct)
3661
+ ):
3662
+ return proposed_ty
3663
+ return old_ty
3664
+
3665
+ if expr.variable is not None:
3666
+ if "struct_member_info" in expr.tags:
3667
+ offset, var, _ = expr.struct_member_info
3668
+ cbasevar = self._variable(var, expr.size)
3669
+ cvar = self._access_constant_offset(
3670
+ self._get_variable_reference(cbasevar), offset, cbasevar.type, False, negotiate
3671
+ )
3672
+ else:
3673
+ cvar = self._variable(expr.variable, None, vvar_id=expr.varid)
3674
+
3654
3675
  if expr.variable.size != expr.size:
3655
3676
  l.warning(
3656
3677
  "VirtualVariable size (%d) and variable size (%d) do not match. Force a type cast.",
@@ -4,7 +4,7 @@ import contextlib
4
4
  import logging
5
5
  from collections import defaultdict
6
6
  from collections.abc import Sequence
7
- from typing import Union, Any
7
+ from typing import Any
8
8
 
9
9
  import pyvex
10
10
  import archinfo
@@ -24,8 +24,8 @@ try:
24
24
  from angr.engines import pcode
25
25
  import pypcode
26
26
 
27
- IRSBType = Union[pyvex.IRSB, pcode.lifter.IRSB]
28
- IROpObjType = Union[pyvex.stmt.IRStmt, pypcode.PcodeOp]
27
+ IRSBType = pyvex.IRSB | pcode.lifter.IRSB
28
+ IROpObjType = pyvex.stmt.IRStmt | pypcode.PcodeOp
29
29
  except ImportError:
30
30
  pcode = None
31
31
  IRSBType = pyvex.IRSB
angr/analyses/fcp/fcp.py CHANGED
@@ -407,10 +407,7 @@ class FastConstantPropagation(Analysis):
407
407
  except (TypeError, ValueError):
408
408
  arg_locs = None
409
409
 
410
- if None in arg_locs:
411
- arg_locs = None
412
-
413
- if arg_locs is not None:
410
+ if arg_locs is not None and None not in arg_locs:
414
411
  for arg_loc in arg_locs:
415
412
  for loc in arg_loc.get_footprint():
416
413
  if isinstance(loc, SimStackArg):
@@ -131,28 +131,27 @@ class SReachingDefinitionsAnalysis(Analysis):
131
131
  stmt if isinstance(stmt, Call) else stmt.src if isinstance(stmt, Assignment) else stmt.ret_exprs[0]
132
132
  )
133
133
  assert isinstance(call, Call)
134
- if call.prototype is None:
135
- # without knowing the prototype, we must conservatively add uses to all registers that are
136
- # potentially used here
137
- if call.calling_convention is not None:
138
- cc = call.calling_convention
139
- else:
140
- # just use all registers in the default calling convention because we don't know anything about
141
- # the calling convention yet
142
- cc_cls = default_cc(self.project.arch.name)
143
- assert cc_cls is not None
144
- cc = cc_cls(self.project.arch)
145
-
146
- codeloc = CodeLocation(block_addr, stmt_idx, block_idx=block_idx, ins_addr=stmt.ins_addr)
147
- arg_locs = list(cc.ARG_REGS)
148
- if cc.FP_ARG_REGS:
149
- arg_locs += [r_name for r_name in cc.FP_ARG_REGS if r_name not in arg_locs]
150
-
151
- for arg_reg_name in arg_locs:
152
- reg_offset = self.project.arch.registers[arg_reg_name][0]
153
- if reg_offset in reg_to_vvarids:
154
- vvarid = reg_to_vvarids[reg_offset]
155
- self.model.add_vvar_use(vvarid, None, codeloc)
134
+
135
+ # conservatively add uses to all registers that are potentially used here
136
+ if call.calling_convention is not None:
137
+ cc = call.calling_convention
138
+ else:
139
+ # just use all registers in the default calling convention because we don't know anything about
140
+ # the calling convention yet
141
+ cc_cls = default_cc(self.project.arch.name)
142
+ assert cc_cls is not None
143
+ cc = cc_cls(self.project.arch)
144
+
145
+ codeloc = CodeLocation(block_addr, stmt_idx, block_idx=block_idx, ins_addr=stmt.ins_addr)
146
+ arg_locs = list(cc.ARG_REGS)
147
+ if cc.FP_ARG_REGS:
148
+ arg_locs += [r_name for r_name in cc.FP_ARG_REGS if r_name not in arg_locs]
149
+
150
+ for arg_reg_name in arg_locs:
151
+ reg_offset = self.project.arch.registers[arg_reg_name][0]
152
+ if reg_offset in reg_to_vvarids:
153
+ vvarid = reg_to_vvarids[reg_offset]
154
+ self.model.add_vvar_use(vvarid, None, codeloc)
156
155
 
157
156
  if self._track_tmps:
158
157
  # track tmps
@@ -1,21 +1,22 @@
1
+ # pylint:disable=import-outside-toplevel
1
2
  from __future__ import annotations
2
3
  from typing import TYPE_CHECKING
3
4
 
4
5
  import networkx
5
6
 
6
7
  # FIXME: Remove the dependency on pyformlang
7
- from pyformlang.finite_automaton import Epsilon, EpsilonNFA, State, Symbol
8
8
 
9
9
  from angr.errors import AngrError
10
10
  from .typevars import BaseLabel, Subtype
11
11
  from .variance import Variance
12
12
 
13
13
  if TYPE_CHECKING:
14
+ from pyformlang.finite_automaton import EpsilonNFA
14
15
  from pyformlang.finite_automaton import DeterministicFiniteAutomaton
15
16
 
16
17
 
17
- START_STATE = State("START")
18
- END_STATE = State("END")
18
+ START_STATE = None
19
+ END_STATE = None
19
20
 
20
21
 
21
22
  class EmptyEpsilonNFAError(AngrError):
@@ -31,6 +32,15 @@ class DFAConstraintSolver:
31
32
 
32
33
  @staticmethod
33
34
  def graph_to_epsilon_nfa(graph: networkx.DiGraph, starts: set, ends: set) -> EpsilonNFA:
35
+ from pyformlang.finite_automaton import Epsilon, EpsilonNFA, State, Symbol # delayed import
36
+
37
+ global START_STATE, END_STATE # pylint:disable=global-statement
38
+
39
+ if START_STATE is None:
40
+ START_STATE = State("START")
41
+ if END_STATE is None:
42
+ END_STATE = State("END")
43
+
34
44
  enfa = EpsilonNFA()
35
45
 
36
46
  # print("Converting graph to eNFA")
@@ -1,17 +1,18 @@
1
1
  # pylint:disable=bad-builtin
2
2
  from __future__ import annotations
3
3
  from typing import TYPE_CHECKING
4
+ from collections import defaultdict
4
5
 
5
6
  from angr.sim_type import SimStruct, SimTypePointer, SimTypeArray
6
7
  from angr.errors import AngrRuntimeError
7
8
  from angr.analyses.analysis import Analysis, AnalysesHub
9
+ from angr.sim_variable import SimVariable, SimStackVariable
8
10
  from .simple_solver import SimpleSolver
9
11
  from .translator import TypeTranslator
10
12
  from .typeconsts import Struct, Pointer, TypeConstant, Array, TopType
11
13
  from .typevars import Equivalence, Subtype, TypeVariable
12
14
 
13
15
  if TYPE_CHECKING:
14
- from angr.sim_variable import SimVariable
15
16
  from angr.sim_type import SimType
16
17
  from .typevars import TypeConstraint
17
18
 
@@ -38,6 +39,7 @@ class Typehoon(Analysis):
38
39
  var_mapping: dict[SimVariable, set[TypeVariable]] | None = None,
39
40
  must_struct: set[TypeVariable] | None = None,
40
41
  stackvar_max_sizes: dict[TypeVariable, int] | None = None,
42
+ stack_offset_tvs: dict[int, TypeVariable] | None = None,
41
43
  ):
42
44
  """
43
45
 
@@ -54,6 +56,7 @@ class Typehoon(Analysis):
54
56
  self._var_mapping = var_mapping
55
57
  self._must_struct = must_struct
56
58
  self._stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
59
+ self._stack_offset_tvs = stack_offset_tvs if stack_offset_tvs is not None else {}
57
60
 
58
61
  self.bits = self.project.arch.bits
59
62
  self.solution = None
@@ -70,25 +73,55 @@ class Typehoon(Analysis):
70
73
  # Public methods
71
74
  #
72
75
 
73
- def update_variable_types(self, func_addr: int | str, var_to_typevars):
76
+ def update_variable_types(
77
+ self,
78
+ func_addr: int | str,
79
+ var_to_typevars: dict[SimVariable, set[TypeVariable]],
80
+ stack_offset_tvs: dict[int, TypeVariable] | None = None,
81
+ ) -> None:
82
+
83
+ if not self.simtypes_solution:
84
+ return
85
+
74
86
  for var, typevars in var_to_typevars.items():
75
- for typevar in typevars:
87
+ # if the variable is a stack variable, does the stack offset have any corresponding type variable?
88
+ typevars_list = sorted(typevars, key=lambda tv: tv.idx)
89
+ if stack_offset_tvs and isinstance(var, SimStackVariable) and var.offset in stack_offset_tvs:
90
+ typevars_list.append(stack_offset_tvs[var.offset])
91
+
92
+ type_candidates: list[SimType] = []
93
+ for typevar in typevars_list:
76
94
  type_ = self.simtypes_solution.get(typevar, None)
77
- if type_ is not None:
78
- # print("{} -> {}: {}".format(var, typevar, type_))
79
- # Hack: if a global address is of a pointer type and it is not an array, we unpack the type
80
- if (
81
- func_addr == "global"
82
- and isinstance(type_, SimTypePointer)
83
- and not isinstance(type_.pts_to, SimTypeArray)
84
- ):
85
- type_ = type_.pts_to
86
-
87
- name = None
88
- if isinstance(type_, SimStruct):
89
- name = type_.name
90
-
91
- self.kb.variables[func_addr].set_variable_type(var, type_, name=name)
95
+ # print("{} -> {}: {}".format(var, typevar, type_))
96
+ # Hack: if a global address is of a pointer type and it is not an array, we unpack the type
97
+ if (
98
+ func_addr == "global"
99
+ and isinstance(type_, SimTypePointer)
100
+ and not isinstance(type_.pts_to, SimTypeArray)
101
+ ):
102
+ type_ = type_.pts_to
103
+ type_candidates.append(type_)
104
+
105
+ # determine the best type - this logic can be made better!
106
+ if not type_candidates:
107
+ continue
108
+ if len(type_candidates) > 1:
109
+ types_by_size: dict[int, list[SimType]] = defaultdict(list)
110
+ for t in type_candidates:
111
+ if t.size is not None:
112
+ types_by_size[t.size].append(t)
113
+ if not types_by_size:
114
+ # we only have BOT and TOP? damn
115
+ the_type = type_candidates[0]
116
+ else:
117
+ max_size = max(types_by_size.keys())
118
+ the_type = types_by_size[max_size][0] # TODO: Sort it
119
+ else:
120
+ the_type = type_candidates[0]
121
+
122
+ self.kb.variables[func_addr].set_variable_type(
123
+ var, the_type, name=the_type.name if isinstance(the_type, SimStruct) else None
124
+ )
92
125
 
93
126
  def pp_constraints(self) -> None:
94
127
  """
@@ -131,6 +164,8 @@ class Typehoon(Analysis):
131
164
  sol = self.solution[typevar]
132
165
  var_and_typevar = f"{typevar_to_var[typevar]} ({typevar})" if typevar in typevar_to_var else typevar
133
166
  print(f" {var_and_typevar} -> {sol}")
167
+ for stack_off, tv in self._stack_offset_tvs.items():
168
+ print(f" stack_{stack_off:#x} ({tv}) -> {self.solution[tv]}")
134
169
  print("### end of solutions ###")
135
170
 
136
171
  #
@@ -157,6 +192,7 @@ class Typehoon(Analysis):
157
192
  if self._var_mapping:
158
193
  for variable_typevars in self._var_mapping.values():
159
194
  typevars |= variable_typevars
195
+ typevars |= set(self._stack_offset_tvs.values())
160
196
  else:
161
197
  # collect type variables from constraints
162
198
  for constraint in self._constraints[self.func_var]:
@@ -175,6 +211,9 @@ class Typehoon(Analysis):
175
211
  - structs where every element is of the same type will be converted to an array of that element type.
176
212
  """
177
213
 
214
+ if not self.solution:
215
+ return
216
+
178
217
  for tv in list(self.solution.keys()):
179
218
  if self._must_struct and tv in self._must_struct:
180
219
  continue
@@ -223,6 +262,9 @@ class Typehoon(Analysis):
223
262
  Translate solutions in type variables to solutions in SimTypes.
224
263
  """
225
264
 
265
+ if not self.solution:
266
+ return
267
+
226
268
  simtypes_solution = {}
227
269
  translator = TypeTranslator(arch=self.project.arch)
228
270
  needs_backpatch = set()
@@ -397,11 +397,13 @@ class DerivedTypeVariable(TypeVariable):
397
397
  class TypeVariables:
398
398
  __slots__ = (
399
399
  "_last_typevars",
400
+ "_typevar2var",
400
401
  "_typevars",
401
402
  )
402
403
 
403
404
  def __init__(self):
404
405
  self._typevars: dict[SimVariable, set[TypeVariable]] = {}
406
+ self._typevar2var: dict[TypeVariable, SimVariable] = {}
405
407
  self._last_typevars: dict[SimVariable, TypeVariable] = {}
406
408
 
407
409
  def copy(self):
@@ -418,22 +420,24 @@ class TypeVariables:
418
420
  # )
419
421
  return f"{{TypeVars: {len(self._typevars)} items}}"
420
422
 
421
- def add_type_variable(self, var: SimVariable, codeloc, typevar: TypeType): # pylint:disable=unused-argument
423
+ def add_type_variable(self, var: SimVariable, typevar: TypeVariable, latest: bool = True):
422
424
  if var not in self._typevars:
423
425
  self._typevars[var] = set()
424
426
  elif typevar in self._typevars[var]:
425
427
  return
426
428
  self._typevars[var].add(typevar)
427
- self._last_typevars[var] = typevar
429
+ if latest:
430
+ self._last_typevars[var] = typevar
431
+ self._typevar2var[typevar] = var
428
432
 
429
- def get_type_variable(self, var, codeloc): # pylint:disable=unused-argument
433
+ def get_type_variable(self, var): # pylint:disable=unused-argument
430
434
  return self._last_typevars[var]
431
435
 
432
- def has_type_variable_for(self, var: SimVariable, codeloc): # pylint:disable=unused-argument
436
+ def has_type_variable_for(self, var: SimVariable): # pylint:disable=unused-argument
433
437
  return var in self._typevars
434
- # if codeloc not in self._typevars[var]:
435
- # return False
436
- # return True
438
+
439
+ def typevar_to_variable(self, typevar: TypeVariable) -> SimVariable | None:
440
+ return self._typevar2var.get(typevar, None)
437
441
 
438
442
  def __getitem__(self, var):
439
443
  return self._last_typevars[var]
@@ -9,14 +9,13 @@ from unique_log_filter import UniqueLogFilter
9
9
 
10
10
  from angr.engines.light.engine import SimEngineNostmtAIL
11
11
  from angr.procedures import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
12
- from angr.utils.constants import MAX_POINTSTO_BITS
13
12
  from angr.sim_type import SimTypeFunction, dereference_simtype
14
13
  from angr.analyses.typehoon import typeconsts, typevars
15
14
  from angr.analyses.typehoon.lifter import TypeLifter
16
15
  from .engine_base import SimEngineVRBase, RichR
17
16
 
18
17
  if TYPE_CHECKING:
19
- pass
18
+ from .variable_recovery_fast import VariableRecoveryFastState # noqa: F401
20
19
 
21
20
 
22
21
  l = logging.getLogger(name=__name__)
@@ -272,6 +271,8 @@ class SimEngineVRAIL(
272
271
  if arg.typevar is not None:
273
272
  arg_type = dereference_simtype(arg_type, type_collections).with_arch(arg_type._arch)
274
273
  arg_ty = TypeLifter(self.arch.bits).lift(arg_type)
274
+ if isinstance(arg_ty, typevars.TypeConstraint) and isinstance(arg.typevar, typevars.TypeConstraint):
275
+ continue
275
276
  type_constraint = typevars.Subtype(arg.typevar, arg_ty)
276
277
  self.state.add_type_constraint(type_constraint)
277
278
 
@@ -399,28 +400,23 @@ class SimEngineVRAIL(
399
400
  return RichR(self.state.top(expr.to_bits), typevar=typevar)
400
401
 
401
402
  def _handle_expr_StackBaseOffset(self, expr: ailment.Expr.StackBaseOffset):
402
- ref_typevar = self.state.stack_offset_typevars.get(expr.offset, None)
403
-
404
- if ref_typevar is None:
403
+ refbase_typevar = self.state.stack_offset_typevars.get(expr.offset, None)
404
+ if refbase_typevar is None:
405
405
  # allocate a new type variable
406
- ref_typevar = typevars.TypeVariable()
407
- self.state.stack_offset_typevars[expr.offset] = ref_typevar
406
+ refbase_typevar = typevars.TypeVariable()
407
+ self.state.stack_offset_typevars[expr.offset] = refbase_typevar
408
+
409
+ ref_typevar = typevars.TypeVariable()
410
+ access_derived_typevar = self._create_access_typevar(ref_typevar, False, None, 0)
411
+ load_constraint = typevars.Subtype(refbase_typevar, access_derived_typevar)
412
+ self.state.add_type_constraint(load_constraint)
408
413
 
409
414
  value_v = self.state.stack_address(expr.offset)
410
415
  richr = RichR(value_v, typevar=ref_typevar)
411
416
  codeloc = self._codeloc()
412
- var_and_offsets = self._ensure_variable_existence(richr, codeloc, src_expr=expr)
417
+ self._ensure_variable_existence(richr, codeloc, src_expr=expr)
413
418
  if self._reference_spoffset:
414
419
  self._reference(richr, codeloc, src=expr)
415
- for var, off_in_var in var_and_offsets:
416
- if self.state.typevars.has_type_variable_for(var, codeloc):
417
- var_typevar = self.state.typevars.get_type_variable(var, codeloc)
418
- load_typevar = self._create_access_typevar(
419
- ref_typevar, False, MAX_POINTSTO_BITS // 8, 0 if off_in_var is None else off_in_var
420
- )
421
- type_constraint = typevars.Subtype(var_typevar, load_typevar)
422
- self.state.add_type_constraint(type_constraint)
423
-
424
420
  return richr
425
421
 
426
422
  def _handle_expr_BasePointerOffset(self, expr):
@@ -15,7 +15,7 @@ from angr.sim_variable import SimVariable, SimStackVariable, SimRegisterVariable
15
15
  from angr.code_location import CodeLocation
16
16
  from angr.analyses.typehoon import typevars, typeconsts
17
17
  from angr.analyses.typehoon.typevars import TypeVariable, DerivedTypeVariable, AddN, SubN, Load, Store
18
-
18
+ from angr.utils.constants import MAX_POINTSTO_BITS
19
19
 
20
20
  #
21
21
  # The base engine used in VariableRecoveryFast
@@ -269,9 +269,9 @@ class SimEngineVRBase(
269
269
  return
270
270
  variable, _ = existing_vars[0]
271
271
 
272
- if not self.state.typevars.has_type_variable_for(variable, codeloc):
272
+ if not self.state.typevars.has_type_variable_for(variable):
273
273
  variable_typevar = typevars.TypeVariable()
274
- self.state.typevars.add_type_variable(variable, codeloc, variable_typevar)
274
+ self.state.typevars.add_type_variable(variable, variable_typevar)
275
275
  # we do not add any type constraint here because we are not sure if the given memory address will ever be
276
276
  # accessed or not
277
277
 
@@ -350,13 +350,13 @@ class SimEngineVRBase(
350
350
  self.state.variable_manager[self.func_addr].write_to(variable, None, codeloc, atom=dst, overwrite=False)
351
351
 
352
352
  if richr.typevar is not None:
353
- if not self.state.typevars.has_type_variable_for(variable, codeloc):
353
+ if not self.state.typevars.has_type_variable_for(variable):
354
354
  # assign a new type variable to it
355
355
  typevar = typevars.TypeVariable()
356
- self.state.typevars.add_type_variable(variable, codeloc, typevar)
356
+ self.state.typevars.add_type_variable(variable, typevar)
357
357
  # create constraints
358
358
  else:
359
- typevar = self.state.typevars.get_type_variable(variable, codeloc)
359
+ typevar = self.state.typevars.get_type_variable(variable)
360
360
  self.state.add_type_constraint(typevars.Subtype(richr.typevar, typevar))
361
361
  self.state.add_type_constraint(typevars.Subtype(typevar, typeconsts.int_type(variable.size * 8)))
362
362
 
@@ -448,13 +448,13 @@ class SimEngineVRBase(
448
448
  self.state.variable_manager[self.func_addr].write_to(variable, None, codeloc, atom=dst, overwrite=False)
449
449
 
450
450
  if richr.typevar is not None:
451
- if not self.state.typevars.has_type_variable_for(variable, codeloc):
451
+ if not self.state.typevars.has_type_variable_for(variable):
452
452
  # assign a new type variable to it
453
453
  typevar = typevars.TypeVariable()
454
- self.state.typevars.add_type_variable(variable, codeloc, typevar)
454
+ self.state.typevars.add_type_variable(variable, typevar)
455
455
  # create constraints
456
456
  else:
457
- typevar = self.state.typevars.get_type_variable(variable, codeloc)
457
+ typevar = self.state.typevars.get_type_variable(variable)
458
458
  self.state.add_type_constraint(typevars.Subtype(richr.typevar, typevar))
459
459
  # the constraint below is a default constraint that may conflict with more specific ones with different
460
460
  # sizes; we post-process at the very end of VRA to remove conflicting default constraints.
@@ -564,11 +564,11 @@ class SimEngineVRBase(
564
564
 
565
565
  # create type constraints
566
566
  if data.typevar is not None:
567
- if not self.state.typevars.has_type_variable_for(variable, codeloc):
567
+ if not self.state.typevars.has_type_variable_for(variable):
568
568
  typevar = typevars.TypeVariable()
569
- self.state.typevars.add_type_variable(variable, codeloc, typevar)
569
+ self.state.typevars.add_type_variable(variable, typevar)
570
570
  else:
571
- typevar = self.state.typevars.get_type_variable(variable, codeloc)
571
+ typevar = self.state.typevars.get_type_variable(variable)
572
572
  if typevar is not None:
573
573
  self.state.add_type_constraint(typevars.Subtype(data.typevar, typevar))
574
574
  # TODO: Create a tv_sp.store.<bits>@N <: typevar type constraint for the stack pointer
@@ -640,11 +640,11 @@ class SimEngineVRBase(
640
640
  variable_manager.write_to(var, var_offset, codeloc, atom=stmt)
641
641
 
642
642
  # create type constraints
643
- if not self.state.typevars.has_type_variable_for(variable, codeloc):
643
+ if not self.state.typevars.has_type_variable_for(variable):
644
644
  typevar = typevars.TypeVariable()
645
- self.state.typevars.add_type_variable(variable, codeloc, typevar)
645
+ self.state.typevars.add_type_variable(variable, typevar)
646
646
  else:
647
- typevar = self.state.typevars.get_type_variable(variable, codeloc)
647
+ typevar = self.state.typevars.get_type_variable(variable)
648
648
 
649
649
  if offset is not None and elem_size is not None:
650
650
  # it's an array!
@@ -671,9 +671,6 @@ class SimEngineVRBase(
671
671
  self.state.add_type_constraint(typevars.Subtype(data.typevar, store_typevar))
672
672
 
673
673
  def _store_to_variable(self, richr_addr: RichR[claripy.ast.BV], data: RichR, size: int):
674
- addr_variable = richr_addr.variable
675
- codeloc = self._codeloc()
676
-
677
674
  # Storing data into a pointer
678
675
  if richr_addr.type_constraints:
679
676
  for tc in richr_addr.type_constraints:
@@ -690,8 +687,6 @@ class SimEngineVRBase(
690
687
  field_offset = 0
691
688
 
692
689
  store_typevar = self._create_access_typevar(base_typevar, True, size, field_offset)
693
- if addr_variable is not None:
694
- self.state.typevars.add_type_variable(addr_variable, codeloc, typevar)
695
690
  data_typevar = data.typevar if data.typevar is not None else typeconsts.TopType()
696
691
  self.state.add_type_constraint(typevars.Subtype(store_typevar, data_typevar))
697
692
 
@@ -823,11 +818,11 @@ class SimEngineVRBase(
823
818
  self.state.delayed_type_constraints.pop(var)
824
819
 
825
820
  # create type constraints
826
- if not self.state.typevars.has_type_variable_for(var, codeloc):
821
+ if not self.state.typevars.has_type_variable_for(var):
827
822
  typevar = typevars.TypeVariable()
828
- self.state.typevars.add_type_variable(var, codeloc, typevar)
823
+ self.state.typevars.add_type_variable(var, typevar)
829
824
  else:
830
- typevar = self.state.typevars.get_type_variable(var, codeloc)
825
+ typevar = self.state.typevars.get_type_variable(var)
831
826
 
832
827
  else:
833
828
  typevar = typevars.TypeVariable()
@@ -933,11 +928,11 @@ class SimEngineVRBase(
933
928
 
934
929
  variable, _ = next(iter(existing_vars))
935
930
  # create type constraints
936
- if not self.state.typevars.has_type_variable_for(variable, codeloc):
931
+ if not self.state.typevars.has_type_variable_for(variable):
937
932
  typevar = typevars.TypeVariable()
938
- self.state.typevars.add_type_variable(variable, codeloc, typevar)
933
+ self.state.typevars.add_type_variable(variable, typevar)
939
934
  else:
940
- typevar = self.state.typevars.get_type_variable(variable, codeloc)
935
+ typevar = self.state.typevars.get_type_variable(variable)
941
936
 
942
937
  if offset is not None and elem_size is not None:
943
938
  # it's an array!
@@ -1024,7 +1019,7 @@ class SimEngineVRBase(
1024
1019
 
1025
1020
  if var not in self.state.typevars:
1026
1021
  typevar = typevars.TypeVariable()
1027
- self.state.typevars.add_type_variable(var, codeloc, typevar)
1022
+ self.state.typevars.add_type_variable(var, typevar)
1028
1023
  else:
1029
1024
  # FIXME: This is an extremely stupid hack. Fix it later.
1030
1025
  # | typevar = next(reversed(list(self.state.typevars[var].values())))
@@ -1125,7 +1120,7 @@ class SimEngineVRBase(
1125
1120
 
1126
1121
  if var not in self.state.typevars:
1127
1122
  typevar = typevars.TypeVariable()
1128
- self.state.typevars.add_type_variable(var, codeloc, typevar)
1123
+ self.state.typevars.add_type_variable(var, typevar)
1129
1124
  else:
1130
1125
  # FIXME: This is an extremely stupid hack. Fix it later.
1131
1126
  # | typevar = next(reversed(list(self.state.typevars[var].values())))
@@ -1140,7 +1135,7 @@ class SimEngineVRBase(
1140
1135
  self,
1141
1136
  typevar: typeconsts.TypeConstant | TypeVariable | DerivedTypeVariable,
1142
1137
  is_store: bool,
1143
- size: int,
1138
+ size: int | None,
1144
1139
  offset: int,
1145
1140
  ) -> DerivedTypeVariable:
1146
1141
  if isinstance(typevar, DerivedTypeVariable):
@@ -1157,8 +1152,9 @@ class SimEngineVRBase(
1157
1152
  else:
1158
1153
  typevar = DerivedTypeVariable(typevar.type_var, None, labels=typevar.labels[:-1])
1159
1154
  lbl = Store() if is_store else Load()
1155
+ bits = size * self.project.arch.byte_width if size is not None else MAX_POINTSTO_BITS
1160
1156
  return DerivedTypeVariable(
1161
1157
  typevar,
1162
1158
  None,
1163
- labels=(lbl, typevars.HasField(size * self.project.arch.byte_width, offset)),
1159
+ labels=(lbl, typevars.HasField(bits, offset)),
1164
1160
  )