angr 9.2.148__py3-none-win_amd64.whl → 9.2.149__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (56) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +42 -2
  3. angr/analyses/cfg/cfg_emulated.py +5 -2
  4. angr/analyses/cfg/cfg_fast.py +48 -46
  5. angr/analyses/decompiler/ail_simplifier.py +65 -32
  6. angr/analyses/decompiler/block_simplifier.py +20 -6
  7. angr/analyses/decompiler/clinic.py +80 -13
  8. angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
  9. angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
  10. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
  11. angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
  12. angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
  13. angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
  14. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
  15. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
  16. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
  17. angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
  18. angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
  19. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  20. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
  21. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
  22. angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
  23. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
  24. angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
  25. angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
  26. angr/analyses/decompiler/presets/fast.py +2 -0
  27. angr/analyses/decompiler/presets/full.py +2 -0
  28. angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
  29. angr/analyses/decompiler/ssailification/ssailification.py +23 -3
  30. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
  31. angr/analyses/decompiler/structured_codegen/c.py +141 -10
  32. angr/analyses/decompiler/utils.py +6 -1
  33. angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
  34. angr/analyses/typehoon/lifter.py +20 -0
  35. angr/analyses/typehoon/simple_solver.py +42 -9
  36. angr/analyses/typehoon/translator.py +4 -1
  37. angr/analyses/typehoon/typeconsts.py +17 -6
  38. angr/analyses/typehoon/typehoon.py +21 -5
  39. angr/analyses/variable_recovery/engine_ail.py +44 -5
  40. angr/analyses/variable_recovery/engine_base.py +35 -12
  41. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
  42. angr/calling_conventions.py +23 -5
  43. angr/engines/light/engine.py +7 -0
  44. angr/knowledge_plugins/functions/function.py +68 -0
  45. angr/knowledge_plugins/propagations/states.py +5 -2
  46. angr/knowledge_plugins/variables/variable_manager.py +3 -3
  47. angr/lib/angr_native.dll +0 -0
  48. angr/procedures/definitions/__init__.py +1 -1
  49. angr/procedures/definitions/types_stl.py +22 -0
  50. angr/sim_type.py +251 -130
  51. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/METADATA +7 -7
  52. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/RECORD +56 -50
  53. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
  54. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/licenses/LICENSE +3 -0
  55. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
  56. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,7 @@ from angr.sim_type import (
13
13
  SimTypeArray,
14
14
  SimTypeFloat,
15
15
  SimTypeDouble,
16
+ SimCppClass,
16
17
  )
17
18
  from .typeconsts import BottomType, Int8, Int16, Int32, Int64, Pointer32, Pointer64, Struct, Array, Float32, Float64
18
19
 
@@ -77,6 +78,24 @@ class TypeLifter:
77
78
  obj.field_names = field_names
78
79
  return obj
79
80
 
81
+ def _lift_SimCppClass(self, ty: SimCppClass) -> TypeConstant | BottomType:
82
+ if ty in self.memo:
83
+ return BottomType()
84
+
85
+ obj = Struct(fields={}, name=ty.name, is_cppclass=True)
86
+ self.memo[ty] = obj
87
+ converted_fields = {}
88
+ field_names = {}
89
+ ty_offsets = ty.offsets
90
+ for field_name, simtype in ty.members.items():
91
+ if field_name not in ty_offsets:
92
+ return BottomType()
93
+ converted_fields[ty_offsets[field_name]] = self.lift(simtype)
94
+ field_names[ty_offsets[field_name]] = field_name
95
+ obj.fields = converted_fields
96
+ obj.field_names = field_names
97
+ return obj
98
+
80
99
  def _lift_SimTypeArray(self, ty: SimTypeArray) -> Array:
81
100
  elem_type = self.lift(ty.elem_type)
82
101
  return Array(elem_type, count=ty.length)
@@ -96,6 +115,7 @@ _mapping = {
96
115
  SimTypeLongLong: TypeLifter._lift_SimTypeLongLong,
97
116
  SimTypePointer: TypeLifter._lift_SimTypePointer,
98
117
  SimStruct: TypeLifter._lift_SimStruct,
118
+ SimCppClass: TypeLifter._lift_SimCppClass,
99
119
  SimTypeArray: TypeLifter._lift_SimTypeArray,
100
120
  SimTypeFloat: TypeLifter._lift_SimTypeFloat,
101
121
  SimTypeDouble: TypeLifter._lift_SimTypeDouble,
@@ -181,7 +181,7 @@ class Sketch:
181
181
 
182
182
  def __init__(self, solver: SimpleSolver, root: TypeVariable):
183
183
  self.root: SketchNode = SketchNode(root)
184
- self.graph = networkx.DiGraph()
184
+ self.graph = networkx.MultiDiGraph()
185
185
  self.node_mapping: dict[TypeVariable | DerivedTypeVariable, SketchNodeBase] = {}
186
186
  self.solver = solver
187
187
 
@@ -200,7 +200,7 @@ class Sketch:
200
200
  for label in typevar.labels:
201
201
  succs = []
202
202
  for _, dst, data in self.graph.out_edges(node, data=True):
203
- if "label" in data and data["label"] == label:
203
+ if "label" in data and data["label"] == label and dst not in succs:
204
204
  succs.append(dst)
205
205
  if len(succs) > 1:
206
206
  _l.warning(
@@ -215,6 +215,11 @@ class Sketch:
215
215
  return node
216
216
 
217
217
  def add_edge(self, src: SketchNodeBase, dst: SketchNodeBase, label) -> None:
218
+ # ensure the label does not already exist in existing edges
219
+ if self.graph.has_edge(src, dst):
220
+ for data in self.graph.get_edge_data(src, dst).values():
221
+ if "label" in data and data["label"] == label:
222
+ return
218
223
  self.graph.add_edge(src, dst, label=label)
219
224
 
220
225
  def add_constraint(self, constraint: TypeConstraint) -> None:
@@ -315,7 +320,7 @@ class ConstraintGraphNode:
315
320
  tag_str = "R"
316
321
  else:
317
322
  tag_str = "U"
318
- forgotten_str = "PRE" if FORGOTTEN.PRE_FORGOTTEN else "POST"
323
+ forgotten_str = "PRE" if self.forgotten == FORGOTTEN.PRE_FORGOTTEN else "POST"
319
324
  s = f"{self.typevar}#{variance_str}.{tag_str}.{forgotten_str}"
320
325
  if ":" in s:
321
326
  return '"' + s + '"'
@@ -820,6 +825,7 @@ class SimpleSolver:
820
825
  """
821
826
 
822
827
  graph = networkx.DiGraph()
828
+ constraints = self._get_transitive_subtype_constraints(constraints)
823
829
  for constraint in constraints:
824
830
  if isinstance(constraint, Subtype):
825
831
  self._constraint_graph_add_edges(
@@ -830,6 +836,33 @@ class SimpleSolver:
830
836
  self._constraint_graph_recall_forget_split(graph)
831
837
  return graph
832
838
 
839
+ @staticmethod
840
+ def _get_transitive_subtype_constraints(constraints: set[TypeConstraint]) -> set[TypeConstraint]:
841
+ """
842
+ Apply the S-Trans rule: a <: b, b <: c => a <: c
843
+ """
844
+ tv2supertypes = defaultdict(set)
845
+ for constraint in constraints:
846
+ if isinstance(constraint, Subtype):
847
+ tv2supertypes[constraint.sub_type].add(constraint.super_type)
848
+
849
+ new_constraints = set()
850
+ while True:
851
+ changed = False
852
+ for subtype, supertypes in tv2supertypes.items():
853
+ supertypes_copy = set(supertypes)
854
+ for supertype in supertypes_copy:
855
+ if supertype in tv2supertypes:
856
+ for supertype_ in tv2supertypes[supertype]:
857
+ if supertype_ not in supertypes_copy:
858
+ changed = True
859
+ supertypes.add(supertype_)
860
+ new_constraints.add(Subtype(subtype, supertype_))
861
+ if not changed:
862
+ break
863
+
864
+ return constraints | new_constraints
865
+
833
866
  @staticmethod
834
867
  def _constraint_graph_add_recall_edges(graph: networkx.DiGraph, node: ConstraintGraphNode) -> None:
835
868
  while True:
@@ -1234,21 +1267,21 @@ class SimpleSolver:
1234
1267
  offset_to_maxsize[base] = max(offset_to_maxsize[base], (last_label.offset - base) + access_size)
1235
1268
  offset_to_sizes[base].add(access_size)
1236
1269
 
1237
- node_to_base = {}
1270
+ idx_to_base = {}
1238
1271
 
1239
- for labels, succ in path_and_successors:
1272
+ for idx, (labels, _) in enumerate(path_and_successors):
1240
1273
  last_label = labels[-1] if labels else None
1241
1274
  if isinstance(last_label, HasField):
1242
1275
  prev_offset = next(offset_to_base.irange(maximum=last_label.offset, reverse=True))
1243
- node_to_base[succ] = offset_to_base[prev_offset]
1276
+ idx_to_base[idx] = offset_to_base[prev_offset]
1244
1277
 
1245
1278
  node_by_offset = defaultdict(set)
1246
1279
 
1247
- for labels, succ in path_and_successors:
1280
+ for idx, (labels, succ) in enumerate(path_and_successors):
1248
1281
  last_label = labels[-1] if labels else None
1249
1282
  if isinstance(last_label, HasField):
1250
- if succ in node_to_base:
1251
- node_by_offset[node_to_base[succ]].add(succ)
1283
+ if idx in idx_to_base:
1284
+ node_by_offset[idx_to_base[idx]].add(succ)
1252
1285
  else:
1253
1286
  node_by_offset[last_label.offset].add(succ)
1254
1287
 
@@ -105,7 +105,10 @@ class TypeTranslator:
105
105
 
106
106
  name = tc.name if tc.name else self.struct_name()
107
107
 
108
- s = sim_type.SimStruct({}, name=name).with_arch(self.arch)
108
+ if tc.is_cppclass:
109
+ s = sim_type.SimCppClass(name=name).with_arch(self.arch)
110
+ else:
111
+ s = sim_type.SimStruct({}, name=name).with_arch(self.arch)
109
112
  self.structs[tc] = s
110
113
 
111
114
  next_offset = 0
@@ -114,6 +114,18 @@ class Int512(Int):
114
114
  return "int512"
115
115
 
116
116
 
117
+ class IntVar(Int):
118
+ def __init__(self, size):
119
+ self._size = size
120
+
121
+ @property
122
+ def size(self) -> int:
123
+ return self._size
124
+
125
+ def __repr__(self, memo=None):
126
+ return "intvar"
127
+
128
+
117
129
  class Float(TypeConstant):
118
130
  def __repr__(self, memo=None) -> str:
119
131
  return "floatbase"
@@ -211,10 +223,11 @@ class Array(TypeConstant):
211
223
 
212
224
 
213
225
  class Struct(TypeConstant):
214
- def __init__(self, fields=None, name=None, field_names=None):
226
+ def __init__(self, fields=None, name=None, field_names=None, is_cppclass: bool = False):
215
227
  self.fields = {} if fields is None else fields # offset to type
216
228
  self.name = name
217
229
  self.field_names = field_names
230
+ self.is_cppclass = is_cppclass
218
231
 
219
232
  def _hash(self, visited: set[int]):
220
233
  if id(self) in visited:
@@ -236,9 +249,9 @@ class Struct(TypeConstant):
236
249
 
237
250
  @memoize
238
251
  def __repr__(self, memo=None):
239
- prefix = "struct"
252
+ prefix = "CppClass" if self.is_cppclass else "struct"
240
253
  if self.name:
241
- prefix = f"struct {self.name}"
254
+ prefix = f"{prefix} {self.name}"
242
255
  return (
243
256
  prefix
244
257
  + "{"
@@ -312,9 +325,7 @@ def int_type(bits: int) -> Int:
312
325
  256: Int256,
313
326
  512: Int512,
314
327
  }
315
- if bits in mapping:
316
- return mapping[bits]()
317
- raise TypeError(f"Not a known size of int: {bits}")
328
+ return mapping[bits]() if bits in mapping else IntVar(bits)
318
329
 
319
330
 
320
331
  def float_type(bits: int) -> Float | None:
@@ -10,7 +10,7 @@ from angr.sim_variable import SimVariable, SimStackVariable
10
10
  from .simple_solver import SimpleSolver
11
11
  from .translator import TypeTranslator
12
12
  from .typeconsts import Struct, Pointer, TypeConstant, Array, TopType
13
- from .typevars import Equivalence, Subtype, TypeVariable
13
+ from .typevars import Equivalence, Subtype, TypeVariable, DerivedTypeVariable
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from angr.sim_type import SimType
@@ -187,6 +187,10 @@ class Typehoon(Analysis):
187
187
  if self._ground_truth and self.simtypes_solution is not None:
188
188
  self.simtypes_solution.update(self._ground_truth)
189
189
 
190
+ @staticmethod
191
+ def _resolve_derived(tv):
192
+ return tv.type_var if isinstance(tv, DerivedTypeVariable) else tv
193
+
190
194
  def _solve(self):
191
195
  typevars = set()
192
196
  if self._var_mapping:
@@ -198,9 +202,10 @@ class Typehoon(Analysis):
198
202
  for constraint in self._constraints[self.func_var]:
199
203
  if isinstance(constraint, Subtype):
200
204
  if isinstance(constraint.sub_type, TypeVariable):
201
- typevars.add(constraint.sub_type)
205
+ typevars.add(self._resolve_derived(constraint.sub_type))
202
206
  if isinstance(constraint.super_type, TypeVariable):
203
- typevars.add(constraint.super_type)
207
+ typevars.add(self._resolve_derived(constraint.super_type))
208
+
204
209
  solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
205
210
  self.solution = solver.solution
206
211
 
@@ -214,13 +219,16 @@ class Typehoon(Analysis):
214
219
  if not self.solution:
215
220
  return
216
221
 
222
+ memo = set()
217
223
  for tv in list(self.solution.keys()):
218
224
  if self._must_struct and tv in self._must_struct:
219
225
  continue
220
226
  sol = self.solution[tv]
221
- specialized = self._specialize_struct(sol)
227
+ specialized = self._specialize_struct(sol, memo=memo)
222
228
  if specialized is not None:
223
229
  self.solution[tv] = specialized
230
+ else:
231
+ memo.add(sol)
224
232
 
225
233
  def _specialize_struct(self, tc, memo: set | None = None):
226
234
  if isinstance(tc, Pointer):
@@ -240,7 +248,11 @@ class Typehoon(Analysis):
240
248
  return field0
241
249
 
242
250
  # are all fields the same?
243
- if len(tc.fields) > 1 and all(tc.fields[off] == field0 for off in offsets):
251
+ if (
252
+ len(tc.fields) > 1
253
+ and not self._is_pointer_to(field0, tc)
254
+ and all(tc.fields[off] == field0 for off in offsets)
255
+ ):
244
256
  # are all fields aligned properly?
245
257
  try:
246
258
  alignment = field0.size
@@ -257,6 +269,10 @@ class Typehoon(Analysis):
257
269
 
258
270
  return None
259
271
 
272
+ @staticmethod
273
+ def _is_pointer_to(pointer_to: TypeConstant, base_type: TypeConstant) -> bool:
274
+ return isinstance(pointer_to, Pointer) and pointer_to.basetype == base_type
275
+
260
276
  def _translate_to_simtypes(self):
261
277
  """
262
278
  Translate solutions in type variables to solutions in SimTypes.
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, cast
4
4
  import logging
5
5
 
6
6
  import ailment
7
+ from ailment.constant import UNDETERMINED_SIZE
7
8
  import claripy
8
9
  from unique_log_filter import UniqueLogFilter
9
10
 
@@ -30,8 +31,15 @@ class SimEngineVRAIL(
30
31
  The engine for variable recovery on AIL.
31
32
  """
32
33
 
33
- def __init__(self, *args, call_info=None, vvar_to_vvar: dict[int, int] | None, **kwargs):
34
- super().__init__(*args, **kwargs)
34
+ def __init__(
35
+ self,
36
+ *args,
37
+ call_info=None,
38
+ vvar_to_vvar: dict[int, int] | None,
39
+ vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None,
40
+ **kwargs,
41
+ ):
42
+ super().__init__(*args, vvar_type_hints=vvar_type_hints, **kwargs)
35
43
 
36
44
  self._reference_spoffset: bool = False
37
45
  self.call_info = call_info or {}
@@ -100,6 +108,13 @@ class SimEngineVRAIL(
100
108
  else:
101
109
  l.warning("Unsupported dst type %s.", dst_type)
102
110
 
111
+ def _handle_stmt_WeakAssignment(self, stmt) -> None:
112
+ src = self._expr(stmt.src)
113
+ dst = self._expr(stmt.dst)
114
+ if isinstance(src, RichR) and isinstance(dst, RichR) and src.typevar is not None and dst.typevar is not None:
115
+ tc = typevars.Subtype(src.typevar, dst.typevar)
116
+ self.state.add_type_constraint(tc)
117
+
103
118
  def _handle_stmt_Store(self, stmt: ailment.Stmt.Store):
104
119
  addr_r = self._expr_bv(stmt.addr)
105
120
  data = self._expr(stmt.data)
@@ -325,7 +340,9 @@ class SimEngineVRAIL(
325
340
  addr_r = self._expr_bv(expr.addr)
326
341
  size = expr.size
327
342
 
328
- return self._load(addr_r, size, expr=expr)
343
+ if size != UNDETERMINED_SIZE:
344
+ return self._load(addr_r, size, expr=expr)
345
+ return self._top(8)
329
346
 
330
347
  def _handle_expr_VirtualVariable(self, expr: ailment.Expr.VirtualVariable):
331
348
  return self._read_from_vvar(expr, expr=expr, vvar_id=self._mapped_vvarid(expr.varid))
@@ -419,6 +436,29 @@ class SimEngineVRAIL(
419
436
  self._reference(richr, codeloc, src=expr)
420
437
  return richr
421
438
 
439
+ def _handle_unop_Reference(self, expr: ailment.Expr.UnaryOp):
440
+ if isinstance(expr.operand, ailment.Expr.VirtualVariable) and expr.operand.was_stack:
441
+ off = expr.operand.stack_offset
442
+ refbase_typevar = self.state.stack_offset_typevars.get(off, None)
443
+ if refbase_typevar is None:
444
+ # allocate a new type variable
445
+ refbase_typevar = typevars.TypeVariable()
446
+ self.state.stack_offset_typevars[off] = refbase_typevar
447
+
448
+ ref_typevar = typevars.TypeVariable()
449
+ access_derived_typevar = self._create_access_typevar(ref_typevar, False, None, 0)
450
+ load_constraint = typevars.Subtype(refbase_typevar, access_derived_typevar)
451
+ self.state.add_type_constraint(load_constraint)
452
+
453
+ value_v = self.state.stack_address(off)
454
+ richr = RichR(value_v, typevar=ref_typevar)
455
+ codeloc = self._codeloc()
456
+ self._ensure_variable_existence(richr, codeloc, src_expr=expr.operand)
457
+ if self._reference_spoffset:
458
+ self._reference(richr, codeloc, src=expr.operand)
459
+ return richr
460
+ return RichR(self.state.top(expr.bits))
461
+
422
462
  def _handle_expr_BasePointerOffset(self, expr):
423
463
  # TODO
424
464
  return self._top(expr.bits)
@@ -433,7 +473,7 @@ class SimEngineVRAIL(
433
473
  def _handle_binop_Add(self, expr):
434
474
  arg0, arg1 = expr.operands
435
475
  r0, r1 = self._expr_pair(arg0, arg1)
436
- compute = r0.data + r1.data # type: ignore
476
+ compute = r0.data + r1.data if r0.data.size() == r1.data.size() else self.state.top(expr.bits) # type: ignore
437
477
 
438
478
  type_constraints = set()
439
479
  # create a new type variable and add constraints accordingly
@@ -844,7 +884,6 @@ class SimEngineVRAIL(
844
884
  self._expr(expr.operands[0])
845
885
  return RichR(self.state.top(expr.bits))
846
886
 
847
- _handle_unop_Reference = _handle_unop_Default
848
887
  _handle_unop_Dereference = _handle_unop_Default
849
888
  _handle_unop_Clz = _handle_unop_Default
850
889
  _handle_unop_Ctz = _handle_unop_Default
@@ -70,9 +70,12 @@ class SimEngineVRBase(
70
70
  and storing data.
71
71
  """
72
72
 
73
- def __init__(self, project, kb):
73
+ def __init__(self, project, kb, vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None):
74
74
  super().__init__(project)
75
75
 
76
+ self.vvar_type_hints: dict[int, typeconsts.TypeConstant] = (
77
+ vvar_type_hints if vvar_type_hints is not None else {}
78
+ )
76
79
  self.kb = kb
77
80
  self.vvar_region: dict[int, Any] = {}
78
81
 
@@ -453,13 +456,19 @@ class SimEngineVRBase(
453
456
  # assign a new type variable to it
454
457
  typevar = typevars.TypeVariable()
455
458
  self.state.typevars.add_type_variable(variable, typevar)
456
- # create constraints
457
459
  else:
458
460
  typevar = self.state.typevars.get_type_variable(variable)
461
+
462
+ # create constraints accordingly
463
+
459
464
  self.state.add_type_constraint(typevars.Subtype(richr.typevar, typevar))
460
- # the constraint below is a default constraint that may conflict with more specific ones with different
461
- # sizes; we post-process at the very end of VRA to remove conflicting default constraints.
462
- self.state.add_type_constraint(typevars.Subtype(typevar, typeconsts.int_type(variable.size * 8)))
465
+ if vvar.varid in self.vvar_type_hints:
466
+ # handle type hints
467
+ self.state.add_type_constraint(typevars.Subtype(typevar, self.vvar_type_hints[vvar.varid]))
468
+ else:
469
+ # the constraint below is a default constraint that may conflict with more specific ones with different
470
+ # sizes; we post-process at the very end of VRA to remove conflicting default constraints.
471
+ self.state.add_type_constraint(typevars.Subtype(typevar, typeconsts.int_type(variable.size * 8)))
463
472
 
464
473
  return variable
465
474
 
@@ -978,14 +987,22 @@ class SimEngineVRBase(
978
987
  value = self.state.top(size * self.project.arch.byte_width)
979
988
  if create_variable:
980
989
  # create a new variable if necessary
981
- variable = SimRegisterVariable(
982
- offset,
983
- size if force_variable_size is None else force_variable_size,
984
- ident=self.state.variable_manager[self.func_addr].next_variable_ident("register"),
985
- region=self.func_addr,
986
- )
990
+
991
+ # check if there is an existing variable for the atom at this location already
992
+ existing_vars: set[tuple[SimVariable, int]] = self.state.variable_manager[
993
+ self.func_addr
994
+ ].find_variables_by_atom(self.block.addr, self.stmt_idx, expr)
995
+ if not existing_vars:
996
+ variable = SimRegisterVariable(
997
+ offset,
998
+ size if force_variable_size is None else force_variable_size,
999
+ ident=self.state.variable_manager[self.func_addr].next_variable_ident("register"),
1000
+ region=self.func_addr,
1001
+ )
1002
+ self.state.variable_manager[self.func_addr].add_variable("register", offset, variable)
1003
+ else:
1004
+ variable = next(iter(existing_vars))[0]
987
1005
  value = self.state.annotate_with_variables(value, [(0, variable)])
988
- self.state.variable_manager[self.func_addr].add_variable("register", offset, variable)
989
1006
  self.state.register_region.store(offset, value)
990
1007
  value_list = [{value}]
991
1008
  else:
@@ -1131,6 +1148,12 @@ class SimEngineVRBase(
1131
1148
  if var is not None and var.size != vvar.size:
1132
1149
  # ignore the variable and the associated type if we are only reading part of the variable
1133
1150
  return RichR(value, variable=var)
1151
+
1152
+ # handle type hints
1153
+ if vvar.varid in self.vvar_type_hints:
1154
+ assert isinstance(typevar, typevars.TypeVariable)
1155
+ self.state.add_type_constraint(typevars.Subtype(typevar, self.vvar_type_hints[vvar.varid]))
1156
+
1134
1157
  return RichR(value, variable=var, typevar=typevar)
1135
1158
 
1136
1159
  def _create_access_typevar(
@@ -12,21 +12,25 @@ import ailment
12
12
  from ailment.expression import VirtualVariable
13
13
 
14
14
  import angr.errors
15
+ from angr import SIM_TYPE_COLLECTIONS
15
16
  from angr.analyses import AnalysesHub
16
17
  from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
17
18
  from angr.block import Block
18
19
  from angr.errors import AngrVariableRecoveryError, SimEngineError
19
20
  from angr.knowledge_plugins import Function
21
+ from angr.knowledge_plugins.key_definitions import atoms
20
22
  from angr.sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
21
23
  from angr.engines.vex.claripy.irop import vexop_to_simop
22
24
  from angr.analyses import ForwardAnalysis, visitors
23
25
  from angr.analyses.typehoon.typevars import Equivalence, TypeVariable, TypeVariables, Subtype, DerivedTypeVariable
24
- from angr.analyses.typehoon.typeconsts import Int
26
+ from angr.analyses.typehoon.typeconsts import Int, TypeConstant, BottomType, TopType
27
+ from angr.analyses.typehoon.lifter import TypeLifter
25
28
  from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
26
29
  from .engine_vex import SimEngineVRVEX
27
30
  from .engine_ail import SimEngineVRAIL
28
31
  import contextlib
29
32
 
33
+
30
34
  if TYPE_CHECKING:
31
35
  from angr.analyses.typehoon.typevars import TypeConstraint
32
36
 
@@ -241,6 +245,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
241
245
  unify_variables=True,
242
246
  func_arg_vvars: dict[int, tuple[VirtualVariable, SimVariable]] | None = None,
243
247
  vvar_to_vvar: dict[int, int] | None = None,
248
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
244
249
  ):
245
250
  if not isinstance(func, Function):
246
251
  func = self.kb.functions[func]
@@ -269,8 +274,17 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
269
274
  self._func_arg_vvars = func_arg_vvars
270
275
  self._unify_variables = unify_variables
271
276
 
277
+ # handle type hints
278
+ self.vvar_type_hints = {}
279
+ if type_hints:
280
+ self._parse_type_hints(type_hints)
281
+
272
282
  self._ail_engine: SimEngineVRAIL = SimEngineVRAIL(
273
- self.project, self.kb, call_info=call_info, vvar_to_vvar=self.vvar_to_vvar
283
+ self.project,
284
+ self.kb,
285
+ call_info=call_info,
286
+ vvar_to_vvar=self.vvar_to_vvar,
287
+ vvar_type_hints=self.vvar_type_hints,
274
288
  )
275
289
  self._vex_engine: SimEngineVRVEX = SimEngineVRVEX(self.project, self.kb, call_info=call_info)
276
290
 
@@ -617,5 +631,22 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
617
631
  if adjusted:
618
632
  state.register_region.store(self.project.arch.sp_offset, sp_v)
619
633
 
634
+ def _parse_type_hints(self, type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]]) -> None:
635
+ self.vvar_type_hints = {}
636
+ for loc, type_hint_str in type_hints:
637
+ if isinstance(loc, atoms.VirtualVariable):
638
+ type_hint = self._parse_type_hint(type_hint_str)
639
+ if type_hint is not None:
640
+ self.vvar_type_hints[loc.varid] = type_hint
641
+ # TODO: Handle other types of locations
642
+
643
+ def _parse_type_hint(self, type_hint_str: str) -> TypeConstant | None:
644
+ ty = SIM_TYPE_COLLECTIONS["cpp::std"].get(type_hint_str)
645
+ if ty is None:
646
+ return None
647
+ ty = ty.with_arch(self.project.arch)
648
+ lifted = TypeLifter(self.project.arch.bits).lift(ty)
649
+ return None if isinstance(lifted, (BottomType, TopType)) else lifted
650
+
620
651
 
621
652
  AnalysesHub.register_default("VariableRecoveryFast", VariableRecoveryFast)
@@ -1196,7 +1196,7 @@ class SimCC:
1196
1196
 
1197
1197
  @staticmethod
1198
1198
  def find_cc(
1199
- arch: archinfo.Arch, args: list[SimRegArg | SimStackArg], sp_delta: int, platform: str = "Linux"
1199
+ arch: archinfo.Arch, args: list[SimRegArg | SimStackArg], sp_delta: int, platform: str | None = "Linux"
1200
1200
  ) -> SimCC | None:
1201
1201
  """
1202
1202
  Pinpoint the best-fit calling convention and return the corresponding SimCC instance, or None if no fit is
@@ -1335,6 +1335,21 @@ class SimCCMicrosoftCdecl(SimCCCdecl):
1335
1335
  STRUCT_RETURN_THRESHOLD = 64
1336
1336
 
1337
1337
 
1338
+ class SimCCMicrosoftThiscall(SimCCCdecl):
1339
+ CALLEE_CLEANUP = True
1340
+ ARG_REGS = ["ecx"]
1341
+ CALLER_SAVED_REGS = ["eax", "ecx", "edx"]
1342
+ STRUCT_RETURN_THRESHOLD = 64
1343
+
1344
+ def arg_locs(self, prototype) -> list[SimFunctionArgument]:
1345
+ if prototype._arch is None:
1346
+ prototype = prototype.with_arch(self.arch)
1347
+ session = self.arg_session(prototype.returnty)
1348
+ if not prototype.args:
1349
+ return []
1350
+ return [SimRegArg("ecx", self.arch.bytes)] + [self.next_arg(session, arg_ty) for arg_ty in prototype.args[1:]]
1351
+
1352
+
1338
1353
  class SimCCStdcall(SimCCMicrosoftCdecl):
1339
1354
  CALLEE_CLEANUP = True
1340
1355
 
@@ -1469,7 +1484,7 @@ class SimCCSyscall(SimCC):
1469
1484
  self.ERROR_REG.set_value(state, error_reg_val)
1470
1485
  return expr
1471
1486
 
1472
- def set_return_val(self, state, val, ty, **kwargs): # pylint:disable=arguments-differ
1487
+ def set_return_val(self, state, val, ty, **kwargs): # type:ignore # pylint:disable=arguments-differ
1473
1488
  if self.ERROR_REG is not None:
1474
1489
  val = self.linux_syscall_update_error_reg(state, val)
1475
1490
  super().set_return_val(state, val, ty, **kwargs)
@@ -1607,6 +1622,7 @@ class SimCCSystemVAMD64(SimCC):
1607
1622
  classification = self._classify(ty)
1608
1623
  if any(cls == "MEMORY" for cls in classification):
1609
1624
  assert all(cls == "MEMORY" for cls in classification)
1625
+ assert ty.size is not None
1610
1626
  byte_size = ty.size // self.arch.byte_width
1611
1627
  referenced_locs = [SimStackArg(offset, self.arch.bytes) for offset in range(0, byte_size, self.arch.bytes)]
1612
1628
  referenced_loc = refine_locs_with_struct_type(self.arch, referenced_locs, ty)
@@ -1645,6 +1661,7 @@ class SimCCSystemVAMD64(SimCC):
1645
1661
  if isinstance(ty, (SimTypeFloat,)):
1646
1662
  return ["SSE"] + ["SSEUP"] * (nchunks - 1)
1647
1663
  if isinstance(ty, (SimStruct, SimTypeFixedSizeArray, SimUnion)):
1664
+ assert ty.size is not None
1648
1665
  if ty.size > 512:
1649
1666
  return ["MEMORY"] * nchunks
1650
1667
  flattened = self._flatten(ty)
@@ -1723,7 +1740,7 @@ class SimCCAMD64LinuxSyscall(SimCCSyscall):
1723
1740
  CALLER_SAVED_REGS = ["rax", "rcx", "r11"]
1724
1741
 
1725
1742
  @staticmethod
1726
- def _match(arch, args, sp_delta): # pylint: disable=unused-argument
1743
+ def _match(arch, args, sp_delta): # type:ignore # pylint: disable=unused-argument
1727
1744
  # doesn't appear anywhere but syscalls
1728
1745
  return False
1729
1746
 
@@ -1855,6 +1872,7 @@ class SimCCARM(SimCC):
1855
1872
  for suboffset, subsubty_list in subresult.items():
1856
1873
  result[offset + suboffset] += subsubty_list
1857
1874
  elif isinstance(ty, SimTypeFixedSizeArray):
1875
+ assert ty.elem_type.size is not None
1858
1876
  subresult = self._flatten(ty.elem_type)
1859
1877
  if subresult is None:
1860
1878
  return None
@@ -2273,7 +2291,7 @@ class SimCCUnknown(SimCC):
2273
2291
  """
2274
2292
 
2275
2293
  @staticmethod
2276
- def _match(arch, args, sp_delta): # pylint: disable=unused-argument
2294
+ def _match(arch, args, sp_delta): # type:ignore # pylint: disable=unused-argument
2277
2295
  # It always returns True
2278
2296
  return True
2279
2297
 
@@ -2317,7 +2335,7 @@ CC: dict[str, dict[str, list[type[SimCC]]]] = {
2317
2335
  "default": [SimCCCdecl],
2318
2336
  "Linux": [SimCCCdecl],
2319
2337
  "CGC": [SimCCCdecl],
2320
- "Win32": [SimCCMicrosoftCdecl, SimCCMicrosoftFastcall],
2338
+ "Win32": [SimCCMicrosoftCdecl, SimCCMicrosoftFastcall, SimCCMicrosoftThiscall],
2321
2339
  },
2322
2340
  "ARMEL": {
2323
2341
  "default": [SimCCARM],
@@ -533,6 +533,7 @@ class SimEngineLightAIL(
533
533
  def __init__(self, *args, **kwargs):
534
534
  self._stmt_handlers: dict[str, Callable[[Any], StmtDataType]] = {
535
535
  "Assignment": self._handle_stmt_Assignment,
536
+ "WeakAssignment": self._handle_stmt_WeakAssignment,
536
537
  "Store": self._handle_stmt_Store,
537
538
  "Jump": self._handle_stmt_Jump,
538
539
  "ConditionalJump": self._handle_stmt_ConditionalJump,
@@ -697,6 +698,9 @@ class SimEngineLightAIL(
697
698
  @abstractmethod
698
699
  def _handle_stmt_Assignment(self, stmt: ailment.statement.Assignment) -> StmtDataType: ...
699
700
 
701
+ @abstractmethod
702
+ def _handle_stmt_WeakAssignment(self, stmt: ailment.statement.WeakAssignment) -> StmtDataType: ...
703
+
700
704
  @abstractmethod
701
705
  def _handle_stmt_Store(self, stmt: ailment.statement.Store) -> StmtDataType: ...
702
706
 
@@ -1006,6 +1010,9 @@ class SimEngineNostmtAIL(
1006
1010
  def _handle_stmt_Assignment(self, stmt) -> StmtDataType | None:
1007
1011
  pass
1008
1012
 
1013
+ def _handle_stmt_WeakAssignment(self, stmt) -> StmtDataType | None:
1014
+ pass
1015
+
1009
1016
  def _handle_stmt_Store(self, stmt) -> StmtDataType | None:
1010
1017
  pass
1011
1018