angr 9.2.148__py3-none-macosx_11_0_arm64.whl → 9.2.150__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (62) hide show
  1. angr/__init__.py +1 -1
  2. angr/__main__.py +100 -37
  3. angr/analyses/calling_convention/calling_convention.py +42 -2
  4. angr/analyses/cfg/cfg_emulated.py +5 -2
  5. angr/analyses/cfg/cfg_fast.py +48 -46
  6. angr/analyses/decompiler/ail_simplifier.py +65 -32
  7. angr/analyses/decompiler/block_simplifier.py +20 -6
  8. angr/analyses/decompiler/clinic.py +80 -13
  9. angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
  10. angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
  11. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
  12. angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
  13. angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
  14. angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
  15. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +2 -1
  16. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
  17. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
  18. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
  19. angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
  20. angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
  21. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  22. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
  23. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
  24. angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
  25. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
  26. angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
  27. angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
  28. angr/analyses/decompiler/presets/fast.py +2 -0
  29. angr/analyses/decompiler/presets/full.py +2 -0
  30. angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
  31. angr/analyses/decompiler/ssailification/ssailification.py +23 -3
  32. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
  33. angr/analyses/decompiler/structured_codegen/c.py +141 -10
  34. angr/analyses/decompiler/utils.py +23 -1
  35. angr/analyses/disassembly.py +2 -1
  36. angr/analyses/patchfinder.py +1 -1
  37. angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
  38. angr/analyses/typehoon/lifter.py +20 -0
  39. angr/analyses/typehoon/simple_solver.py +42 -9
  40. angr/analyses/typehoon/translator.py +4 -1
  41. angr/analyses/typehoon/typeconsts.py +17 -6
  42. angr/analyses/typehoon/typehoon.py +25 -6
  43. angr/analyses/variable_recovery/engine_ail.py +44 -5
  44. angr/analyses/variable_recovery/engine_base.py +35 -12
  45. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
  46. angr/calling_conventions.py +23 -5
  47. angr/engines/light/engine.py +7 -0
  48. angr/engines/pcode/lifter.py +7 -0
  49. angr/knowledge_plugins/functions/function.py +68 -0
  50. angr/knowledge_plugins/propagations/states.py +5 -2
  51. angr/knowledge_plugins/variables/variable_manager.py +3 -3
  52. angr/lib/angr_native.dylib +0 -0
  53. angr/procedures/definitions/__init__.py +1 -1
  54. angr/procedures/definitions/types_stl.py +22 -0
  55. angr/sim_type.py +251 -130
  56. angr/utils/graph.py +51 -27
  57. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/METADATA +7 -7
  58. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/RECORD +62 -56
  59. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/WHEEL +1 -1
  60. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/licenses/LICENSE +3 -0
  61. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/entry_points.txt +0 -0
  62. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/top_level.txt +0 -0
@@ -723,6 +723,23 @@ def structured_node_is_simple_return(
723
723
  return False
724
724
 
725
725
 
726
+ def structured_node_is_simple_return_strict(node: BaseNode | SequenceNode | MultiNode | ailment.Block) -> bool:
727
+ """
728
+ Returns True iff the node exclusively contains a return statement.
729
+ """
730
+ if isinstance(node, (SequenceNode, MultiNode)) and node.nodes:
731
+ flat_blocks = _flatten_structured_node(node)
732
+ if len(flat_blocks) != 1:
733
+ return False
734
+ node = flat_blocks[-1]
735
+
736
+ return (
737
+ isinstance(node, ailment.Block)
738
+ and len(node.statements) == 1
739
+ and isinstance(node.statements[0], ailment.Stmt.Return)
740
+ )
741
+
742
+
726
743
  def is_statement_terminating(stmt: ailment.statement.Statement, functions) -> bool:
727
744
  if isinstance(stmt, ailment.Stmt.Return):
728
745
  return True
@@ -851,11 +868,16 @@ def peephole_optimize_stmts(block, stmt_opts):
851
868
  r = opt.optimize(stmt, stmt_idx=stmt_idx, block=block)
852
869
  if r is not None and r is not stmt:
853
870
  stmt = r
871
+ if r == ():
872
+ # the statement is gone; no more redo
873
+ redo = False
874
+ break
854
875
  redo = True
855
876
  break
856
877
 
857
878
  if stmt is not None and stmt is not old_stmt:
858
- statements.append(stmt)
879
+ if stmt != ():
880
+ statements.append(stmt)
859
881
  any_update = True
860
882
  else:
861
883
  statements.append(old_stmt)
@@ -1159,6 +1159,7 @@ class Disassembly(Analysis):
1159
1159
  show_bytes: bool = False,
1160
1160
  ascii_only: bool | None = None,
1161
1161
  color: bool = True,
1162
+ min_edge_depth: int = 0,
1162
1163
  ) -> str:
1163
1164
  """
1164
1165
  Render the disassembly to a string, with optional edges and addresses.
@@ -1288,7 +1289,7 @@ class Disassembly(Analysis):
1288
1289
  for f, t in sorted(edges_by_line, key=lambda e: abs(e[0] - e[1])):
1289
1290
  add_edge_to_buffer(edge_buf, ref_buf, f, t, lambda s: ansi_color(s, edge_col), ascii_only=ascii_only)
1290
1291
  add_edge_to_buffer(ref_buf, ref_buf, f, t, ascii_only=ascii_only)
1291
- max_edge_depth = max(map(len, ref_buf))
1292
+ max_edge_depth = max(*map(len, ref_buf), min_edge_depth)
1292
1293
 
1293
1294
  # Justify edge and combine with disassembly
1294
1295
  for i, line in enumerate(buf):
@@ -97,7 +97,7 @@ class PatchFinderAnalysis(Analysis):
97
97
  # - Looking for instruction partials broken by a patch (nodecode)
98
98
  # - Unusual stack manipulation
99
99
 
100
- atypical_alignments: list[Function]
100
+ atypical_alignments: list[AtypicallyAlignedFunction]
101
101
  possibly_patched_out: list[PatchedOutFunctionality]
102
102
 
103
103
  def __init__(self):
@@ -130,6 +130,7 @@ class SRDAView:
130
130
 
131
131
  for stmt in reversed(stmts):
132
132
  r = predicate(stmt)
133
+ predicate_returned_true |= r
133
134
  should_break = (predicate_returned_true and r is False) if consecutive else r
134
135
  if should_break:
135
136
  break
@@ -13,6 +13,7 @@ from angr.sim_type import (
13
13
  SimTypeArray,
14
14
  SimTypeFloat,
15
15
  SimTypeDouble,
16
+ SimCppClass,
16
17
  )
17
18
  from .typeconsts import BottomType, Int8, Int16, Int32, Int64, Pointer32, Pointer64, Struct, Array, Float32, Float64
18
19
 
@@ -77,6 +78,24 @@ class TypeLifter:
77
78
  obj.field_names = field_names
78
79
  return obj
79
80
 
81
+ def _lift_SimCppClass(self, ty: SimCppClass) -> TypeConstant | BottomType:
82
+ if ty in self.memo:
83
+ return BottomType()
84
+
85
+ obj = Struct(fields={}, name=ty.name, is_cppclass=True)
86
+ self.memo[ty] = obj
87
+ converted_fields = {}
88
+ field_names = {}
89
+ ty_offsets = ty.offsets
90
+ for field_name, simtype in ty.members.items():
91
+ if field_name not in ty_offsets:
92
+ return BottomType()
93
+ converted_fields[ty_offsets[field_name]] = self.lift(simtype)
94
+ field_names[ty_offsets[field_name]] = field_name
95
+ obj.fields = converted_fields
96
+ obj.field_names = field_names
97
+ return obj
98
+
80
99
  def _lift_SimTypeArray(self, ty: SimTypeArray) -> Array:
81
100
  elem_type = self.lift(ty.elem_type)
82
101
  return Array(elem_type, count=ty.length)
@@ -96,6 +115,7 @@ _mapping = {
96
115
  SimTypeLongLong: TypeLifter._lift_SimTypeLongLong,
97
116
  SimTypePointer: TypeLifter._lift_SimTypePointer,
98
117
  SimStruct: TypeLifter._lift_SimStruct,
118
+ SimCppClass: TypeLifter._lift_SimCppClass,
99
119
  SimTypeArray: TypeLifter._lift_SimTypeArray,
100
120
  SimTypeFloat: TypeLifter._lift_SimTypeFloat,
101
121
  SimTypeDouble: TypeLifter._lift_SimTypeDouble,
@@ -181,7 +181,7 @@ class Sketch:
181
181
 
182
182
  def __init__(self, solver: SimpleSolver, root: TypeVariable):
183
183
  self.root: SketchNode = SketchNode(root)
184
- self.graph = networkx.DiGraph()
184
+ self.graph = networkx.MultiDiGraph()
185
185
  self.node_mapping: dict[TypeVariable | DerivedTypeVariable, SketchNodeBase] = {}
186
186
  self.solver = solver
187
187
 
@@ -200,7 +200,7 @@ class Sketch:
200
200
  for label in typevar.labels:
201
201
  succs = []
202
202
  for _, dst, data in self.graph.out_edges(node, data=True):
203
- if "label" in data and data["label"] == label:
203
+ if "label" in data and data["label"] == label and dst not in succs:
204
204
  succs.append(dst)
205
205
  if len(succs) > 1:
206
206
  _l.warning(
@@ -215,6 +215,11 @@ class Sketch:
215
215
  return node
216
216
 
217
217
  def add_edge(self, src: SketchNodeBase, dst: SketchNodeBase, label) -> None:
218
+ # ensure the label does not already exist in existing edges
219
+ if self.graph.has_edge(src, dst):
220
+ for data in self.graph.get_edge_data(src, dst).values():
221
+ if "label" in data and data["label"] == label:
222
+ return
218
223
  self.graph.add_edge(src, dst, label=label)
219
224
 
220
225
  def add_constraint(self, constraint: TypeConstraint) -> None:
@@ -315,7 +320,7 @@ class ConstraintGraphNode:
315
320
  tag_str = "R"
316
321
  else:
317
322
  tag_str = "U"
318
- forgotten_str = "PRE" if FORGOTTEN.PRE_FORGOTTEN else "POST"
323
+ forgotten_str = "PRE" if self.forgotten == FORGOTTEN.PRE_FORGOTTEN else "POST"
319
324
  s = f"{self.typevar}#{variance_str}.{tag_str}.{forgotten_str}"
320
325
  if ":" in s:
321
326
  return '"' + s + '"'
@@ -820,6 +825,7 @@ class SimpleSolver:
820
825
  """
821
826
 
822
827
  graph = networkx.DiGraph()
828
+ constraints = self._get_transitive_subtype_constraints(constraints)
823
829
  for constraint in constraints:
824
830
  if isinstance(constraint, Subtype):
825
831
  self._constraint_graph_add_edges(
@@ -830,6 +836,33 @@ class SimpleSolver:
830
836
  self._constraint_graph_recall_forget_split(graph)
831
837
  return graph
832
838
 
839
+ @staticmethod
840
+ def _get_transitive_subtype_constraints(constraints: set[TypeConstraint]) -> set[TypeConstraint]:
841
+ """
842
+ Apply the S-Trans rule: a <: b, b <: c => a <: c
843
+ """
844
+ tv2supertypes = defaultdict(set)
845
+ for constraint in constraints:
846
+ if isinstance(constraint, Subtype):
847
+ tv2supertypes[constraint.sub_type].add(constraint.super_type)
848
+
849
+ new_constraints = set()
850
+ while True:
851
+ changed = False
852
+ for subtype, supertypes in tv2supertypes.items():
853
+ supertypes_copy = set(supertypes)
854
+ for supertype in supertypes_copy:
855
+ if supertype in tv2supertypes:
856
+ for supertype_ in tv2supertypes[supertype]:
857
+ if supertype_ not in supertypes_copy:
858
+ changed = True
859
+ supertypes.add(supertype_)
860
+ new_constraints.add(Subtype(subtype, supertype_))
861
+ if not changed:
862
+ break
863
+
864
+ return constraints | new_constraints
865
+
833
866
  @staticmethod
834
867
  def _constraint_graph_add_recall_edges(graph: networkx.DiGraph, node: ConstraintGraphNode) -> None:
835
868
  while True:
@@ -1234,21 +1267,21 @@ class SimpleSolver:
1234
1267
  offset_to_maxsize[base] = max(offset_to_maxsize[base], (last_label.offset - base) + access_size)
1235
1268
  offset_to_sizes[base].add(access_size)
1236
1269
 
1237
- node_to_base = {}
1270
+ idx_to_base = {}
1238
1271
 
1239
- for labels, succ in path_and_successors:
1272
+ for idx, (labels, _) in enumerate(path_and_successors):
1240
1273
  last_label = labels[-1] if labels else None
1241
1274
  if isinstance(last_label, HasField):
1242
1275
  prev_offset = next(offset_to_base.irange(maximum=last_label.offset, reverse=True))
1243
- node_to_base[succ] = offset_to_base[prev_offset]
1276
+ idx_to_base[idx] = offset_to_base[prev_offset]
1244
1277
 
1245
1278
  node_by_offset = defaultdict(set)
1246
1279
 
1247
- for labels, succ in path_and_successors:
1280
+ for idx, (labels, succ) in enumerate(path_and_successors):
1248
1281
  last_label = labels[-1] if labels else None
1249
1282
  if isinstance(last_label, HasField):
1250
- if succ in node_to_base:
1251
- node_by_offset[node_to_base[succ]].add(succ)
1283
+ if idx in idx_to_base:
1284
+ node_by_offset[idx_to_base[idx]].add(succ)
1252
1285
  else:
1253
1286
  node_by_offset[last_label.offset].add(succ)
1254
1287
 
@@ -105,7 +105,10 @@ class TypeTranslator:
105
105
 
106
106
  name = tc.name if tc.name else self.struct_name()
107
107
 
108
- s = sim_type.SimStruct({}, name=name).with_arch(self.arch)
108
+ if tc.is_cppclass:
109
+ s = sim_type.SimCppClass(name=name).with_arch(self.arch)
110
+ else:
111
+ s = sim_type.SimStruct({}, name=name).with_arch(self.arch)
109
112
  self.structs[tc] = s
110
113
 
111
114
  next_offset = 0
@@ -114,6 +114,18 @@ class Int512(Int):
114
114
  return "int512"
115
115
 
116
116
 
117
+ class IntVar(Int):
118
+ def __init__(self, size):
119
+ self._size = size
120
+
121
+ @property
122
+ def size(self) -> int:
123
+ return self._size
124
+
125
+ def __repr__(self, memo=None):
126
+ return "intvar"
127
+
128
+
117
129
  class Float(TypeConstant):
118
130
  def __repr__(self, memo=None) -> str:
119
131
  return "floatbase"
@@ -211,10 +223,11 @@ class Array(TypeConstant):
211
223
 
212
224
 
213
225
  class Struct(TypeConstant):
214
- def __init__(self, fields=None, name=None, field_names=None):
226
+ def __init__(self, fields=None, name=None, field_names=None, is_cppclass: bool = False):
215
227
  self.fields = {} if fields is None else fields # offset to type
216
228
  self.name = name
217
229
  self.field_names = field_names
230
+ self.is_cppclass = is_cppclass
218
231
 
219
232
  def _hash(self, visited: set[int]):
220
233
  if id(self) in visited:
@@ -236,9 +249,9 @@ class Struct(TypeConstant):
236
249
 
237
250
  @memoize
238
251
  def __repr__(self, memo=None):
239
- prefix = "struct"
252
+ prefix = "CppClass" if self.is_cppclass else "struct"
240
253
  if self.name:
241
- prefix = f"struct {self.name}"
254
+ prefix = f"{prefix} {self.name}"
242
255
  return (
243
256
  prefix
244
257
  + "{"
@@ -312,9 +325,7 @@ def int_type(bits: int) -> Int:
312
325
  256: Int256,
313
326
  512: Int512,
314
327
  }
315
- if bits in mapping:
316
- return mapping[bits]()
317
- raise TypeError(f"Not a known size of int: {bits}")
328
+ return mapping[bits]() if bits in mapping else IntVar(bits)
318
329
 
319
330
 
320
331
  def float_type(bits: int) -> Float | None:
@@ -10,7 +10,7 @@ from angr.sim_variable import SimVariable, SimStackVariable
10
10
  from .simple_solver import SimpleSolver
11
11
  from .translator import TypeTranslator
12
12
  from .typeconsts import Struct, Pointer, TypeConstant, Array, TopType
13
- from .typevars import Equivalence, Subtype, TypeVariable
13
+ from .typevars import Equivalence, Subtype, TypeVariable, DerivedTypeVariable
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from angr.sim_type import SimType
@@ -187,6 +187,10 @@ class Typehoon(Analysis):
187
187
  if self._ground_truth and self.simtypes_solution is not None:
188
188
  self.simtypes_solution.update(self._ground_truth)
189
189
 
190
+ @staticmethod
191
+ def _resolve_derived(tv):
192
+ return tv.type_var if isinstance(tv, DerivedTypeVariable) else tv
193
+
190
194
  def _solve(self):
191
195
  typevars = set()
192
196
  if self._var_mapping:
@@ -198,9 +202,10 @@ class Typehoon(Analysis):
198
202
  for constraint in self._constraints[self.func_var]:
199
203
  if isinstance(constraint, Subtype):
200
204
  if isinstance(constraint.sub_type, TypeVariable):
201
- typevars.add(constraint.sub_type)
205
+ typevars.add(self._resolve_derived(constraint.sub_type))
202
206
  if isinstance(constraint.super_type, TypeVariable):
203
- typevars.add(constraint.super_type)
207
+ typevars.add(self._resolve_derived(constraint.super_type))
208
+
204
209
  solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
205
210
  self.solution = solver.solution
206
211
 
@@ -214,13 +219,16 @@ class Typehoon(Analysis):
214
219
  if not self.solution:
215
220
  return
216
221
 
222
+ memo = set()
217
223
  for tv in list(self.solution.keys()):
218
224
  if self._must_struct and tv in self._must_struct:
219
225
  continue
220
226
  sol = self.solution[tv]
221
- specialized = self._specialize_struct(sol)
227
+ specialized = self._specialize_struct(sol, memo=memo)
222
228
  if specialized is not None:
223
229
  self.solution[tv] = specialized
230
+ else:
231
+ memo.add(sol)
224
232
 
225
233
  def _specialize_struct(self, tc, memo: set | None = None):
226
234
  if isinstance(tc, Pointer):
@@ -240,7 +248,11 @@ class Typehoon(Analysis):
240
248
  return field0
241
249
 
242
250
  # are all fields the same?
243
- if len(tc.fields) > 1 and all(tc.fields[off] == field0 for off in offsets):
251
+ if (
252
+ len(tc.fields) > 1
253
+ and not self._is_pointer_to(field0, tc)
254
+ and all(tc.fields[off] == field0 for off in offsets)
255
+ ):
244
256
  # are all fields aligned properly?
245
257
  try:
246
258
  alignment = field0.size
@@ -251,12 +263,19 @@ class Typehoon(Analysis):
251
263
  max_offset = offsets[-1]
252
264
  field0_size = 1
253
265
  if not isinstance(field0, TopType):
254
- field0_size = field0.size
266
+ try:
267
+ field0_size = field0.size
268
+ except NotImplementedError:
269
+ field0_size = 1
255
270
  count = (max_offset + field0_size) // alignment
256
271
  return Array(field0, count=count)
257
272
 
258
273
  return None
259
274
 
275
+ @staticmethod
276
+ def _is_pointer_to(pointer_to: TypeConstant, base_type: TypeConstant) -> bool:
277
+ return isinstance(pointer_to, Pointer) and pointer_to.basetype == base_type
278
+
260
279
  def _translate_to_simtypes(self):
261
280
  """
262
281
  Translate solutions in type variables to solutions in SimTypes.
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, cast
4
4
  import logging
5
5
 
6
6
  import ailment
7
+ from ailment.constant import UNDETERMINED_SIZE
7
8
  import claripy
8
9
  from unique_log_filter import UniqueLogFilter
9
10
 
@@ -30,8 +31,15 @@ class SimEngineVRAIL(
30
31
  The engine for variable recovery on AIL.
31
32
  """
32
33
 
33
- def __init__(self, *args, call_info=None, vvar_to_vvar: dict[int, int] | None, **kwargs):
34
- super().__init__(*args, **kwargs)
34
+ def __init__(
35
+ self,
36
+ *args,
37
+ call_info=None,
38
+ vvar_to_vvar: dict[int, int] | None,
39
+ vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None,
40
+ **kwargs,
41
+ ):
42
+ super().__init__(*args, vvar_type_hints=vvar_type_hints, **kwargs)
35
43
 
36
44
  self._reference_spoffset: bool = False
37
45
  self.call_info = call_info or {}
@@ -100,6 +108,13 @@ class SimEngineVRAIL(
100
108
  else:
101
109
  l.warning("Unsupported dst type %s.", dst_type)
102
110
 
111
+ def _handle_stmt_WeakAssignment(self, stmt) -> None:
112
+ src = self._expr(stmt.src)
113
+ dst = self._expr(stmt.dst)
114
+ if isinstance(src, RichR) and isinstance(dst, RichR) and src.typevar is not None and dst.typevar is not None:
115
+ tc = typevars.Subtype(src.typevar, dst.typevar)
116
+ self.state.add_type_constraint(tc)
117
+
103
118
  def _handle_stmt_Store(self, stmt: ailment.Stmt.Store):
104
119
  addr_r = self._expr_bv(stmt.addr)
105
120
  data = self._expr(stmt.data)
@@ -325,7 +340,9 @@ class SimEngineVRAIL(
325
340
  addr_r = self._expr_bv(expr.addr)
326
341
  size = expr.size
327
342
 
328
- return self._load(addr_r, size, expr=expr)
343
+ if size != UNDETERMINED_SIZE:
344
+ return self._load(addr_r, size, expr=expr)
345
+ return self._top(8)
329
346
 
330
347
  def _handle_expr_VirtualVariable(self, expr: ailment.Expr.VirtualVariable):
331
348
  return self._read_from_vvar(expr, expr=expr, vvar_id=self._mapped_vvarid(expr.varid))
@@ -419,6 +436,29 @@ class SimEngineVRAIL(
419
436
  self._reference(richr, codeloc, src=expr)
420
437
  return richr
421
438
 
439
+ def _handle_unop_Reference(self, expr: ailment.Expr.UnaryOp):
440
+ if isinstance(expr.operand, ailment.Expr.VirtualVariable) and expr.operand.was_stack:
441
+ off = expr.operand.stack_offset
442
+ refbase_typevar = self.state.stack_offset_typevars.get(off, None)
443
+ if refbase_typevar is None:
444
+ # allocate a new type variable
445
+ refbase_typevar = typevars.TypeVariable()
446
+ self.state.stack_offset_typevars[off] = refbase_typevar
447
+
448
+ ref_typevar = typevars.TypeVariable()
449
+ access_derived_typevar = self._create_access_typevar(ref_typevar, False, None, 0)
450
+ load_constraint = typevars.Subtype(refbase_typevar, access_derived_typevar)
451
+ self.state.add_type_constraint(load_constraint)
452
+
453
+ value_v = self.state.stack_address(off)
454
+ richr = RichR(value_v, typevar=ref_typevar)
455
+ codeloc = self._codeloc()
456
+ self._ensure_variable_existence(richr, codeloc, src_expr=expr.operand)
457
+ if self._reference_spoffset:
458
+ self._reference(richr, codeloc, src=expr.operand)
459
+ return richr
460
+ return RichR(self.state.top(expr.bits))
461
+
422
462
  def _handle_expr_BasePointerOffset(self, expr):
423
463
  # TODO
424
464
  return self._top(expr.bits)
@@ -433,7 +473,7 @@ class SimEngineVRAIL(
433
473
  def _handle_binop_Add(self, expr):
434
474
  arg0, arg1 = expr.operands
435
475
  r0, r1 = self._expr_pair(arg0, arg1)
436
- compute = r0.data + r1.data # type: ignore
476
+ compute = r0.data + r1.data if r0.data.size() == r1.data.size() else self.state.top(expr.bits) # type: ignore
437
477
 
438
478
  type_constraints = set()
439
479
  # create a new type variable and add constraints accordingly
@@ -844,7 +884,6 @@ class SimEngineVRAIL(
844
884
  self._expr(expr.operands[0])
845
885
  return RichR(self.state.top(expr.bits))
846
886
 
847
- _handle_unop_Reference = _handle_unop_Default
848
887
  _handle_unop_Dereference = _handle_unop_Default
849
888
  _handle_unop_Clz = _handle_unop_Default
850
889
  _handle_unop_Ctz = _handle_unop_Default
@@ -70,9 +70,12 @@ class SimEngineVRBase(
70
70
  and storing data.
71
71
  """
72
72
 
73
- def __init__(self, project, kb):
73
+ def __init__(self, project, kb, vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None):
74
74
  super().__init__(project)
75
75
 
76
+ self.vvar_type_hints: dict[int, typeconsts.TypeConstant] = (
77
+ vvar_type_hints if vvar_type_hints is not None else {}
78
+ )
76
79
  self.kb = kb
77
80
  self.vvar_region: dict[int, Any] = {}
78
81
 
@@ -453,13 +456,19 @@ class SimEngineVRBase(
453
456
  # assign a new type variable to it
454
457
  typevar = typevars.TypeVariable()
455
458
  self.state.typevars.add_type_variable(variable, typevar)
456
- # create constraints
457
459
  else:
458
460
  typevar = self.state.typevars.get_type_variable(variable)
461
+
462
+ # create constraints accordingly
463
+
459
464
  self.state.add_type_constraint(typevars.Subtype(richr.typevar, typevar))
460
- # the constraint below is a default constraint that may conflict with more specific ones with different
461
- # sizes; we post-process at the very end of VRA to remove conflicting default constraints.
462
- self.state.add_type_constraint(typevars.Subtype(typevar, typeconsts.int_type(variable.size * 8)))
465
+ if vvar.varid in self.vvar_type_hints:
466
+ # handle type hints
467
+ self.state.add_type_constraint(typevars.Subtype(typevar, self.vvar_type_hints[vvar.varid]))
468
+ else:
469
+ # the constraint below is a default constraint that may conflict with more specific ones with different
470
+ # sizes; we post-process at the very end of VRA to remove conflicting default constraints.
471
+ self.state.add_type_constraint(typevars.Subtype(typevar, typeconsts.int_type(variable.size * 8)))
463
472
 
464
473
  return variable
465
474
 
@@ -978,14 +987,22 @@ class SimEngineVRBase(
978
987
  value = self.state.top(size * self.project.arch.byte_width)
979
988
  if create_variable:
980
989
  # create a new variable if necessary
981
- variable = SimRegisterVariable(
982
- offset,
983
- size if force_variable_size is None else force_variable_size,
984
- ident=self.state.variable_manager[self.func_addr].next_variable_ident("register"),
985
- region=self.func_addr,
986
- )
990
+
991
+ # check if there is an existing variable for the atom at this location already
992
+ existing_vars: set[tuple[SimVariable, int]] = self.state.variable_manager[
993
+ self.func_addr
994
+ ].find_variables_by_atom(self.block.addr, self.stmt_idx, expr)
995
+ if not existing_vars:
996
+ variable = SimRegisterVariable(
997
+ offset,
998
+ size if force_variable_size is None else force_variable_size,
999
+ ident=self.state.variable_manager[self.func_addr].next_variable_ident("register"),
1000
+ region=self.func_addr,
1001
+ )
1002
+ self.state.variable_manager[self.func_addr].add_variable("register", offset, variable)
1003
+ else:
1004
+ variable = next(iter(existing_vars))[0]
987
1005
  value = self.state.annotate_with_variables(value, [(0, variable)])
988
- self.state.variable_manager[self.func_addr].add_variable("register", offset, variable)
989
1006
  self.state.register_region.store(offset, value)
990
1007
  value_list = [{value}]
991
1008
  else:
@@ -1131,6 +1148,12 @@ class SimEngineVRBase(
1131
1148
  if var is not None and var.size != vvar.size:
1132
1149
  # ignore the variable and the associated type if we are only reading part of the variable
1133
1150
  return RichR(value, variable=var)
1151
+
1152
+ # handle type hints
1153
+ if vvar.varid in self.vvar_type_hints:
1154
+ assert isinstance(typevar, typevars.TypeVariable)
1155
+ self.state.add_type_constraint(typevars.Subtype(typevar, self.vvar_type_hints[vvar.varid]))
1156
+
1134
1157
  return RichR(value, variable=var, typevar=typevar)
1135
1158
 
1136
1159
  def _create_access_typevar(
@@ -12,21 +12,25 @@ import ailment
12
12
  from ailment.expression import VirtualVariable
13
13
 
14
14
  import angr.errors
15
+ from angr import SIM_TYPE_COLLECTIONS
15
16
  from angr.analyses import AnalysesHub
16
17
  from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
17
18
  from angr.block import Block
18
19
  from angr.errors import AngrVariableRecoveryError, SimEngineError
19
20
  from angr.knowledge_plugins import Function
21
+ from angr.knowledge_plugins.key_definitions import atoms
20
22
  from angr.sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
21
23
  from angr.engines.vex.claripy.irop import vexop_to_simop
22
24
  from angr.analyses import ForwardAnalysis, visitors
23
25
  from angr.analyses.typehoon.typevars import Equivalence, TypeVariable, TypeVariables, Subtype, DerivedTypeVariable
24
- from angr.analyses.typehoon.typeconsts import Int
26
+ from angr.analyses.typehoon.typeconsts import Int, TypeConstant, BottomType, TopType
27
+ from angr.analyses.typehoon.lifter import TypeLifter
25
28
  from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
26
29
  from .engine_vex import SimEngineVRVEX
27
30
  from .engine_ail import SimEngineVRAIL
28
31
  import contextlib
29
32
 
33
+
30
34
  if TYPE_CHECKING:
31
35
  from angr.analyses.typehoon.typevars import TypeConstraint
32
36
 
@@ -241,6 +245,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
241
245
  unify_variables=True,
242
246
  func_arg_vvars: dict[int, tuple[VirtualVariable, SimVariable]] | None = None,
243
247
  vvar_to_vvar: dict[int, int] | None = None,
248
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
244
249
  ):
245
250
  if not isinstance(func, Function):
246
251
  func = self.kb.functions[func]
@@ -269,8 +274,17 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
269
274
  self._func_arg_vvars = func_arg_vvars
270
275
  self._unify_variables = unify_variables
271
276
 
277
+ # handle type hints
278
+ self.vvar_type_hints = {}
279
+ if type_hints:
280
+ self._parse_type_hints(type_hints)
281
+
272
282
  self._ail_engine: SimEngineVRAIL = SimEngineVRAIL(
273
- self.project, self.kb, call_info=call_info, vvar_to_vvar=self.vvar_to_vvar
283
+ self.project,
284
+ self.kb,
285
+ call_info=call_info,
286
+ vvar_to_vvar=self.vvar_to_vvar,
287
+ vvar_type_hints=self.vvar_type_hints,
274
288
  )
275
289
  self._vex_engine: SimEngineVRVEX = SimEngineVRVEX(self.project, self.kb, call_info=call_info)
276
290
 
@@ -617,5 +631,22 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
617
631
  if adjusted:
618
632
  state.register_region.store(self.project.arch.sp_offset, sp_v)
619
633
 
634
+ def _parse_type_hints(self, type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]]) -> None:
635
+ self.vvar_type_hints = {}
636
+ for loc, type_hint_str in type_hints:
637
+ if isinstance(loc, atoms.VirtualVariable):
638
+ type_hint = self._parse_type_hint(type_hint_str)
639
+ if type_hint is not None:
640
+ self.vvar_type_hints[loc.varid] = type_hint
641
+ # TODO: Handle other types of locations
642
+
643
+ def _parse_type_hint(self, type_hint_str: str) -> TypeConstant | None:
644
+ ty = SIM_TYPE_COLLECTIONS["cpp::std"].get(type_hint_str)
645
+ if ty is None:
646
+ return None
647
+ ty = ty.with_arch(self.project.arch)
648
+ lifted = TypeLifter(self.project.arch.bits).lift(ty)
649
+ return None if isinstance(lifted, (BottomType, TopType)) else lifted
650
+
620
651
 
621
652
  AnalysesHub.register_default("VariableRecoveryFast", VariableRecoveryFast)