angr 9.2.147__py3-none-macosx_11_0_arm64.whl → 9.2.149__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (91) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/analysis.py +3 -11
  3. angr/analyses/calling_convention/calling_convention.py +42 -2
  4. angr/analyses/calling_convention/fact_collector.py +5 -4
  5. angr/analyses/calling_convention/utils.py +1 -0
  6. angr/analyses/cfg/cfg_base.py +3 -59
  7. angr/analyses/cfg/cfg_emulated.py +17 -14
  8. angr/analyses/cfg/cfg_fast.py +68 -63
  9. angr/analyses/cfg/cfg_fast_soot.py +3 -3
  10. angr/analyses/decompiler/ail_simplifier.py +65 -32
  11. angr/analyses/decompiler/block_simplifier.py +20 -6
  12. angr/analyses/decompiler/callsite_maker.py +28 -18
  13. angr/analyses/decompiler/clinic.py +84 -17
  14. angr/analyses/decompiler/condition_processor.py +0 -21
  15. angr/analyses/decompiler/counters/call_counter.py +3 -0
  16. angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
  17. angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
  18. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
  19. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
  21. angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
  22. angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
  23. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
  24. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
  25. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
  26. angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
  27. angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
  28. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  29. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
  30. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
  31. angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
  32. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
  33. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +14 -0
  34. angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
  35. angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
  36. angr/analyses/decompiler/presets/fast.py +2 -0
  37. angr/analyses/decompiler/presets/full.py +2 -0
  38. angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
  39. angr/analyses/decompiler/ssailification/ssailification.py +23 -3
  40. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
  41. angr/analyses/decompiler/structured_codegen/c.py +146 -15
  42. angr/analyses/decompiler/structuring/phoenix.py +11 -3
  43. angr/analyses/decompiler/utils.py +6 -1
  44. angr/analyses/deobfuscator/api_obf_finder.py +5 -1
  45. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +1 -1
  46. angr/analyses/forward_analysis/visitors/graph.py +0 -8
  47. angr/analyses/identifier/runner.py +1 -1
  48. angr/analyses/reaching_definitions/function_handler.py +4 -4
  49. angr/analyses/reassembler.py +1 -1
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
  51. angr/analyses/stack_pointer_tracker.py +1 -1
  52. angr/analyses/static_hooker.py +11 -9
  53. angr/analyses/typehoon/lifter.py +20 -0
  54. angr/analyses/typehoon/simple_solver.py +42 -9
  55. angr/analyses/typehoon/translator.py +4 -1
  56. angr/analyses/typehoon/typeconsts.py +17 -6
  57. angr/analyses/typehoon/typehoon.py +21 -5
  58. angr/analyses/variable_recovery/engine_ail.py +52 -13
  59. angr/analyses/variable_recovery/engine_base.py +37 -12
  60. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
  61. angr/calling_conventions.py +96 -27
  62. angr/engines/light/engine.py +7 -0
  63. angr/exploration_techniques/director.py +1 -1
  64. angr/knowledge_plugins/functions/function.py +109 -38
  65. angr/knowledge_plugins/functions/function_manager.py +9 -0
  66. angr/knowledge_plugins/functions/function_parser.py +9 -1
  67. angr/knowledge_plugins/functions/soot_function.py +1 -1
  68. angr/knowledge_plugins/key_definitions/key_definition_manager.py +1 -1
  69. angr/knowledge_plugins/propagations/states.py +5 -2
  70. angr/knowledge_plugins/variables/variable_manager.py +3 -3
  71. angr/lib/angr_native.dylib +0 -0
  72. angr/procedures/definitions/__init__.py +15 -12
  73. angr/procedures/definitions/types_stl.py +22 -0
  74. angr/procedures/stubs/format_parser.py +1 -1
  75. angr/project.py +23 -29
  76. angr/protos/cfg_pb2.py +14 -25
  77. angr/protos/function_pb2.py +11 -22
  78. angr/protos/primitives_pb2.py +36 -47
  79. angr/protos/variables_pb2.py +28 -39
  80. angr/protos/xrefs_pb2.py +8 -19
  81. angr/sim_type.py +251 -146
  82. angr/simos/cgc.py +1 -1
  83. angr/simos/linux.py +5 -5
  84. angr/simos/windows.py +5 -5
  85. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +1 -1
  86. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/METADATA +9 -8
  87. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/RECORD +91 -85
  88. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
  89. {angr-9.2.147.dist-info → angr-9.2.149.dist-info/licenses}/LICENSE +3 -0
  90. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
  91. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
@@ -851,11 +851,16 @@ def peephole_optimize_stmts(block, stmt_opts):
851
851
  r = opt.optimize(stmt, stmt_idx=stmt_idx, block=block)
852
852
  if r is not None and r is not stmt:
853
853
  stmt = r
854
+ if r == ():
855
+ # the statement is gone; no more redo
856
+ redo = False
857
+ break
854
858
  redo = True
855
859
  break
856
860
 
857
861
  if stmt is not None and stmt is not old_stmt:
858
- statements.append(stmt)
862
+ if stmt != ():
863
+ statements.append(stmt)
859
864
  any_update = True
860
865
  else:
861
866
  statements.append(old_stmt)
@@ -315,7 +315,11 @@ class APIObfuscationFinder(Analysis):
315
315
 
316
316
  @staticmethod
317
317
  def is_apiname(name: str) -> bool:
318
- return any(not isinstance(lib, SimSyscallLibrary) and lib.has_prototype(name) for lib in SIM_LIBRARIES.values())
318
+ return any(
319
+ not isinstance(lib, SimSyscallLibrary) and lib.has_prototype(name)
320
+ for libs in SIM_LIBRARIES.values()
321
+ for lib in libs
322
+ )
319
323
 
320
324
 
321
325
  AnalysesHub.register_default("APIObfuscationFinder", APIObfuscationFinder)
@@ -30,7 +30,7 @@ class APIObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
30
30
  # assign a new function on-demand
31
31
  symbol = self.project.loader.extern_object.make_extern(funcname)
32
32
  hook_addr = self.project.hook_symbol(
33
- symbol.rebased_addr, SIM_LIBRARIES["linux"].get_stub(funcname, self.project.arch)
33
+ symbol.rebased_addr, SIM_LIBRARIES["linux"][0].get_stub(funcname, self.project.arch)
34
34
  )
35
35
  func = self.kb.functions.function(addr=hook_addr, name=funcname, create=True)
36
36
  func.is_simprocedure = True
@@ -3,7 +3,6 @@ from typing import TypeVar, Generic
3
3
  from collections.abc import Collection, Iterator
4
4
  from collections import defaultdict
5
5
 
6
- from angr.misc.ux import deprecated
7
6
  from angr.utils.algo import binary_insert
8
7
 
9
8
  NodeType = TypeVar("NodeType")
@@ -94,13 +93,6 @@ class GraphVisitor(Generic[NodeType]):
94
93
 
95
94
  return iter(self.sort_nodes())
96
95
 
97
- @deprecated(replacement="nodes")
98
- def nodes_iter(self):
99
- """
100
- (Deprecated) Return an iterator of nodes following an optimal traversal order. Will be removed in the future.
101
- """
102
- return self.nodes()
103
-
104
96
  # Traversal
105
97
 
106
98
  def reset(self):
@@ -29,7 +29,7 @@ assert len(FLAG_DATA) == 0x1000
29
29
  class Runner:
30
30
  def __init__(self, project, cfg):
31
31
  # this is kind of fucked up
32
- project.simos.syscall_library.update(SIM_LIBRARIES["cgcabi_tracer"])
32
+ project.simos.syscall_library.update(SIM_LIBRARIES["cgcabi_tracer"][0])
33
33
 
34
34
  self.project = project
35
35
  self.cfg = cfg
@@ -401,10 +401,10 @@ class FunctionHandler:
401
401
  )
402
402
  type_collections = []
403
403
  if prototype_libname is not None and prototype_libname in SIM_LIBRARIES:
404
- prototype_lib = SIM_LIBRARIES[prototype_libname]
405
- if prototype_lib.type_collection_names:
406
- for typelib_name in prototype_lib.type_collection_names:
407
- type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
404
+ for prototype_lib in SIM_LIBRARIES[prototype_libname]:
405
+ if prototype_lib.type_collection_names:
406
+ for typelib_name in prototype_lib.type_collection_names:
407
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
408
408
  if type_collections:
409
409
  prototype = dereference_simtype(data.prototype, type_collections).with_arch(state.arch)
410
410
  data.prototype = cast(SimTypeFunction, prototype)
@@ -2410,7 +2410,7 @@ class Reassembler(Analysis):
2410
2410
 
2411
2411
  # collect address of all instructions
2412
2412
  l.debug("Collecting instruction addresses...")
2413
- for cfg_node in self.cfg.nodes():
2413
+ for cfg_node in self.cfg.model.nodes():
2414
2414
  self.all_insn_addrs |= set(cfg_node.instruction_addrs)
2415
2415
 
2416
2416
  # Functions
@@ -130,6 +130,7 @@ class SRDAView:
130
130
 
131
131
  for stmt in reversed(stmts):
132
132
  r = predicate(stmt)
133
+ predicate_returned_true |= r
133
134
  should_break = (predicate_returned_true and r is False) if consecutive else r
134
135
  if should_break:
135
136
  break
@@ -520,7 +520,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
520
520
  # Setting register values to fresh ones will cause problems down the line when merging with normal
521
521
  # register values happen. therefore, we set their values to BOTTOM. these BOTTOMs will be replaced once
522
522
  # a merge with normal blocks happen.
523
- initial_regs = {r: BOTTOM for r in self.reg_offsets}
523
+ initial_regs = dict.fromkeys(self.reg_offsets, BOTTOM)
524
524
 
525
525
  return StackPointerTrackerState(
526
526
  regs=initial_regs, memory={}, is_tracking_memory=self.track_mem, resilient=self._resilient
@@ -21,7 +21,7 @@ class StaticHooker(Analysis):
21
21
  def __init__(self, library, binary=None):
22
22
  self.results = {}
23
23
  try:
24
- lib = SIM_LIBRARIES[library]
24
+ libs = SIM_LIBRARIES[library]
25
25
  except KeyError as err:
26
26
  raise AngrValueError(f"No such library {library}") from err
27
27
 
@@ -36,14 +36,16 @@ class StaticHooker(Analysis):
36
36
  l.debug("Skipping %s at %#x, already hooked", func.name, func.rebased_addr)
37
37
  continue
38
38
 
39
- if lib.has_implementation(func.name):
40
- proc = lib.get(func.name, self.project.arch)
41
- self.results[func.rebased_addr] = proc
42
- if self.project.is_hooked(func.rebased_addr):
43
- l.debug("Skipping %s at %#x, already hooked", func.name, func.rebased_addr)
44
- else:
45
- self.project.hook(func.rebased_addr, proc)
46
- l.info("Hooked %s at %#x", func.name, func.rebased_addr)
39
+ for lib in libs:
40
+ if lib.has_implementation(func.name):
41
+ proc = lib.get(func.name, self.project.arch)
42
+ self.results[func.rebased_addr] = proc
43
+ if self.project.is_hooked(func.rebased_addr):
44
+ l.debug("Skipping %s at %#x, already hooked", func.name, func.rebased_addr)
45
+ else:
46
+ self.project.hook(func.rebased_addr, proc)
47
+ l.info("Hooked %s at %#x", func.name, func.rebased_addr)
48
+ break
47
49
  else:
48
50
  l.debug("Failed to hook %s at %#x", func.name, func.rebased_addr)
49
51
 
@@ -13,6 +13,7 @@ from angr.sim_type import (
13
13
  SimTypeArray,
14
14
  SimTypeFloat,
15
15
  SimTypeDouble,
16
+ SimCppClass,
16
17
  )
17
18
  from .typeconsts import BottomType, Int8, Int16, Int32, Int64, Pointer32, Pointer64, Struct, Array, Float32, Float64
18
19
 
@@ -77,6 +78,24 @@ class TypeLifter:
77
78
  obj.field_names = field_names
78
79
  return obj
79
80
 
81
+ def _lift_SimCppClass(self, ty: SimCppClass) -> TypeConstant | BottomType:
82
+ if ty in self.memo:
83
+ return BottomType()
84
+
85
+ obj = Struct(fields={}, name=ty.name, is_cppclass=True)
86
+ self.memo[ty] = obj
87
+ converted_fields = {}
88
+ field_names = {}
89
+ ty_offsets = ty.offsets
90
+ for field_name, simtype in ty.members.items():
91
+ if field_name not in ty_offsets:
92
+ return BottomType()
93
+ converted_fields[ty_offsets[field_name]] = self.lift(simtype)
94
+ field_names[ty_offsets[field_name]] = field_name
95
+ obj.fields = converted_fields
96
+ obj.field_names = field_names
97
+ return obj
98
+
80
99
  def _lift_SimTypeArray(self, ty: SimTypeArray) -> Array:
81
100
  elem_type = self.lift(ty.elem_type)
82
101
  return Array(elem_type, count=ty.length)
@@ -96,6 +115,7 @@ _mapping = {
96
115
  SimTypeLongLong: TypeLifter._lift_SimTypeLongLong,
97
116
  SimTypePointer: TypeLifter._lift_SimTypePointer,
98
117
  SimStruct: TypeLifter._lift_SimStruct,
118
+ SimCppClass: TypeLifter._lift_SimCppClass,
99
119
  SimTypeArray: TypeLifter._lift_SimTypeArray,
100
120
  SimTypeFloat: TypeLifter._lift_SimTypeFloat,
101
121
  SimTypeDouble: TypeLifter._lift_SimTypeDouble,
@@ -181,7 +181,7 @@ class Sketch:
181
181
 
182
182
  def __init__(self, solver: SimpleSolver, root: TypeVariable):
183
183
  self.root: SketchNode = SketchNode(root)
184
- self.graph = networkx.DiGraph()
184
+ self.graph = networkx.MultiDiGraph()
185
185
  self.node_mapping: dict[TypeVariable | DerivedTypeVariable, SketchNodeBase] = {}
186
186
  self.solver = solver
187
187
 
@@ -200,7 +200,7 @@ class Sketch:
200
200
  for label in typevar.labels:
201
201
  succs = []
202
202
  for _, dst, data in self.graph.out_edges(node, data=True):
203
- if "label" in data and data["label"] == label:
203
+ if "label" in data and data["label"] == label and dst not in succs:
204
204
  succs.append(dst)
205
205
  if len(succs) > 1:
206
206
  _l.warning(
@@ -215,6 +215,11 @@ class Sketch:
215
215
  return node
216
216
 
217
217
  def add_edge(self, src: SketchNodeBase, dst: SketchNodeBase, label) -> None:
218
+ # ensure the label does not already exist in existing edges
219
+ if self.graph.has_edge(src, dst):
220
+ for data in self.graph.get_edge_data(src, dst).values():
221
+ if "label" in data and data["label"] == label:
222
+ return
218
223
  self.graph.add_edge(src, dst, label=label)
219
224
 
220
225
  def add_constraint(self, constraint: TypeConstraint) -> None:
@@ -315,7 +320,7 @@ class ConstraintGraphNode:
315
320
  tag_str = "R"
316
321
  else:
317
322
  tag_str = "U"
318
- forgotten_str = "PRE" if FORGOTTEN.PRE_FORGOTTEN else "POST"
323
+ forgotten_str = "PRE" if self.forgotten == FORGOTTEN.PRE_FORGOTTEN else "POST"
319
324
  s = f"{self.typevar}#{variance_str}.{tag_str}.{forgotten_str}"
320
325
  if ":" in s:
321
326
  return '"' + s + '"'
@@ -820,6 +825,7 @@ class SimpleSolver:
820
825
  """
821
826
 
822
827
  graph = networkx.DiGraph()
828
+ constraints = self._get_transitive_subtype_constraints(constraints)
823
829
  for constraint in constraints:
824
830
  if isinstance(constraint, Subtype):
825
831
  self._constraint_graph_add_edges(
@@ -830,6 +836,33 @@ class SimpleSolver:
830
836
  self._constraint_graph_recall_forget_split(graph)
831
837
  return graph
832
838
 
839
+ @staticmethod
840
+ def _get_transitive_subtype_constraints(constraints: set[TypeConstraint]) -> set[TypeConstraint]:
841
+ """
842
+ Apply the S-Trans rule: a <: b, b <: c => a <: c
843
+ """
844
+ tv2supertypes = defaultdict(set)
845
+ for constraint in constraints:
846
+ if isinstance(constraint, Subtype):
847
+ tv2supertypes[constraint.sub_type].add(constraint.super_type)
848
+
849
+ new_constraints = set()
850
+ while True:
851
+ changed = False
852
+ for subtype, supertypes in tv2supertypes.items():
853
+ supertypes_copy = set(supertypes)
854
+ for supertype in supertypes_copy:
855
+ if supertype in tv2supertypes:
856
+ for supertype_ in tv2supertypes[supertype]:
857
+ if supertype_ not in supertypes_copy:
858
+ changed = True
859
+ supertypes.add(supertype_)
860
+ new_constraints.add(Subtype(subtype, supertype_))
861
+ if not changed:
862
+ break
863
+
864
+ return constraints | new_constraints
865
+
833
866
  @staticmethod
834
867
  def _constraint_graph_add_recall_edges(graph: networkx.DiGraph, node: ConstraintGraphNode) -> None:
835
868
  while True:
@@ -1234,21 +1267,21 @@ class SimpleSolver:
1234
1267
  offset_to_maxsize[base] = max(offset_to_maxsize[base], (last_label.offset - base) + access_size)
1235
1268
  offset_to_sizes[base].add(access_size)
1236
1269
 
1237
- node_to_base = {}
1270
+ idx_to_base = {}
1238
1271
 
1239
- for labels, succ in path_and_successors:
1272
+ for idx, (labels, _) in enumerate(path_and_successors):
1240
1273
  last_label = labels[-1] if labels else None
1241
1274
  if isinstance(last_label, HasField):
1242
1275
  prev_offset = next(offset_to_base.irange(maximum=last_label.offset, reverse=True))
1243
- node_to_base[succ] = offset_to_base[prev_offset]
1276
+ idx_to_base[idx] = offset_to_base[prev_offset]
1244
1277
 
1245
1278
  node_by_offset = defaultdict(set)
1246
1279
 
1247
- for labels, succ in path_and_successors:
1280
+ for idx, (labels, succ) in enumerate(path_and_successors):
1248
1281
  last_label = labels[-1] if labels else None
1249
1282
  if isinstance(last_label, HasField):
1250
- if succ in node_to_base:
1251
- node_by_offset[node_to_base[succ]].add(succ)
1283
+ if idx in idx_to_base:
1284
+ node_by_offset[idx_to_base[idx]].add(succ)
1252
1285
  else:
1253
1286
  node_by_offset[last_label.offset].add(succ)
1254
1287
 
@@ -105,7 +105,10 @@ class TypeTranslator:
105
105
 
106
106
  name = tc.name if tc.name else self.struct_name()
107
107
 
108
- s = sim_type.SimStruct({}, name=name).with_arch(self.arch)
108
+ if tc.is_cppclass:
109
+ s = sim_type.SimCppClass(name=name).with_arch(self.arch)
110
+ else:
111
+ s = sim_type.SimStruct({}, name=name).with_arch(self.arch)
109
112
  self.structs[tc] = s
110
113
 
111
114
  next_offset = 0
@@ -114,6 +114,18 @@ class Int512(Int):
114
114
  return "int512"
115
115
 
116
116
 
117
+ class IntVar(Int):
118
+ def __init__(self, size):
119
+ self._size = size
120
+
121
+ @property
122
+ def size(self) -> int:
123
+ return self._size
124
+
125
+ def __repr__(self, memo=None):
126
+ return "intvar"
127
+
128
+
117
129
  class Float(TypeConstant):
118
130
  def __repr__(self, memo=None) -> str:
119
131
  return "floatbase"
@@ -211,10 +223,11 @@ class Array(TypeConstant):
211
223
 
212
224
 
213
225
  class Struct(TypeConstant):
214
- def __init__(self, fields=None, name=None, field_names=None):
226
+ def __init__(self, fields=None, name=None, field_names=None, is_cppclass: bool = False):
215
227
  self.fields = {} if fields is None else fields # offset to type
216
228
  self.name = name
217
229
  self.field_names = field_names
230
+ self.is_cppclass = is_cppclass
218
231
 
219
232
  def _hash(self, visited: set[int]):
220
233
  if id(self) in visited:
@@ -236,9 +249,9 @@ class Struct(TypeConstant):
236
249
 
237
250
  @memoize
238
251
  def __repr__(self, memo=None):
239
- prefix = "struct"
252
+ prefix = "CppClass" if self.is_cppclass else "struct"
240
253
  if self.name:
241
- prefix = f"struct {self.name}"
254
+ prefix = f"{prefix} {self.name}"
242
255
  return (
243
256
  prefix
244
257
  + "{"
@@ -312,9 +325,7 @@ def int_type(bits: int) -> Int:
312
325
  256: Int256,
313
326
  512: Int512,
314
327
  }
315
- if bits in mapping:
316
- return mapping[bits]()
317
- raise TypeError(f"Not a known size of int: {bits}")
328
+ return mapping[bits]() if bits in mapping else IntVar(bits)
318
329
 
319
330
 
320
331
  def float_type(bits: int) -> Float | None:
@@ -10,7 +10,7 @@ from angr.sim_variable import SimVariable, SimStackVariable
10
10
  from .simple_solver import SimpleSolver
11
11
  from .translator import TypeTranslator
12
12
  from .typeconsts import Struct, Pointer, TypeConstant, Array, TopType
13
- from .typevars import Equivalence, Subtype, TypeVariable
13
+ from .typevars import Equivalence, Subtype, TypeVariable, DerivedTypeVariable
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from angr.sim_type import SimType
@@ -187,6 +187,10 @@ class Typehoon(Analysis):
187
187
  if self._ground_truth and self.simtypes_solution is not None:
188
188
  self.simtypes_solution.update(self._ground_truth)
189
189
 
190
+ @staticmethod
191
+ def _resolve_derived(tv):
192
+ return tv.type_var if isinstance(tv, DerivedTypeVariable) else tv
193
+
190
194
  def _solve(self):
191
195
  typevars = set()
192
196
  if self._var_mapping:
@@ -198,9 +202,10 @@ class Typehoon(Analysis):
198
202
  for constraint in self._constraints[self.func_var]:
199
203
  if isinstance(constraint, Subtype):
200
204
  if isinstance(constraint.sub_type, TypeVariable):
201
- typevars.add(constraint.sub_type)
205
+ typevars.add(self._resolve_derived(constraint.sub_type))
202
206
  if isinstance(constraint.super_type, TypeVariable):
203
- typevars.add(constraint.super_type)
207
+ typevars.add(self._resolve_derived(constraint.super_type))
208
+
204
209
  solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
205
210
  self.solution = solver.solution
206
211
 
@@ -214,13 +219,16 @@ class Typehoon(Analysis):
214
219
  if not self.solution:
215
220
  return
216
221
 
222
+ memo = set()
217
223
  for tv in list(self.solution.keys()):
218
224
  if self._must_struct and tv in self._must_struct:
219
225
  continue
220
226
  sol = self.solution[tv]
221
- specialized = self._specialize_struct(sol)
227
+ specialized = self._specialize_struct(sol, memo=memo)
222
228
  if specialized is not None:
223
229
  self.solution[tv] = specialized
230
+ else:
231
+ memo.add(sol)
224
232
 
225
233
  def _specialize_struct(self, tc, memo: set | None = None):
226
234
  if isinstance(tc, Pointer):
@@ -240,7 +248,11 @@ class Typehoon(Analysis):
240
248
  return field0
241
249
 
242
250
  # are all fields the same?
243
- if len(tc.fields) > 1 and all(tc.fields[off] == field0 for off in offsets):
251
+ if (
252
+ len(tc.fields) > 1
253
+ and not self._is_pointer_to(field0, tc)
254
+ and all(tc.fields[off] == field0 for off in offsets)
255
+ ):
244
256
  # are all fields aligned properly?
245
257
  try:
246
258
  alignment = field0.size
@@ -257,6 +269,10 @@ class Typehoon(Analysis):
257
269
 
258
270
  return None
259
271
 
272
+ @staticmethod
273
+ def _is_pointer_to(pointer_to: TypeConstant, base_type: TypeConstant) -> bool:
274
+ return isinstance(pointer_to, Pointer) and pointer_to.basetype == base_type
275
+
260
276
  def _translate_to_simtypes(self):
261
277
  """
262
278
  Translate solutions in type variables to solutions in SimTypes.
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, cast
4
4
  import logging
5
5
 
6
6
  import ailment
7
+ from ailment.constant import UNDETERMINED_SIZE
7
8
  import claripy
8
9
  from unique_log_filter import UniqueLogFilter
9
10
 
@@ -30,8 +31,15 @@ class SimEngineVRAIL(
30
31
  The engine for variable recovery on AIL.
31
32
  """
32
33
 
33
- def __init__(self, *args, call_info=None, vvar_to_vvar: dict[int, int] | None, **kwargs):
34
- super().__init__(*args, **kwargs)
34
+ def __init__(
35
+ self,
36
+ *args,
37
+ call_info=None,
38
+ vvar_to_vvar: dict[int, int] | None,
39
+ vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None,
40
+ **kwargs,
41
+ ):
42
+ super().__init__(*args, vvar_type_hints=vvar_type_hints, **kwargs)
35
43
 
36
44
  self._reference_spoffset: bool = False
37
45
  self.call_info = call_info or {}
@@ -100,6 +108,13 @@ class SimEngineVRAIL(
100
108
  else:
101
109
  l.warning("Unsupported dst type %s.", dst_type)
102
110
 
111
+ def _handle_stmt_WeakAssignment(self, stmt) -> None:
112
+ src = self._expr(stmt.src)
113
+ dst = self._expr(stmt.dst)
114
+ if isinstance(src, RichR) and isinstance(dst, RichR) and src.typevar is not None and dst.typevar is not None:
115
+ tc = typevars.Subtype(src.typevar, dst.typevar)
116
+ self.state.add_type_constraint(tc)
117
+
103
118
  def _handle_stmt_Store(self, stmt: ailment.Stmt.Store):
104
119
  addr_r = self._expr_bv(stmt.addr)
105
120
  data = self._expr(stmt.data)
@@ -169,10 +184,10 @@ class SimEngineVRAIL(
169
184
 
170
185
  type_collections = []
171
186
  if prototype_libname is not None:
172
- prototype_lib = SIM_LIBRARIES[prototype_libname]
173
- if prototype_lib.type_collection_names:
174
- for typelib_name in prototype_lib.type_collection_names:
175
- type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
187
+ for prototype_lib in SIM_LIBRARIES[prototype_libname]:
188
+ if prototype_lib.type_collection_names:
189
+ for typelib_name in prototype_lib.type_collection_names:
190
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
176
191
 
177
192
  for arg, arg_type in zip(args, prototype.args):
178
193
  if arg.typevar is not None:
@@ -262,10 +277,10 @@ class SimEngineVRAIL(
262
277
 
263
278
  type_collections = []
264
279
  if prototype_libname is not None:
265
- prototype_lib = SIM_LIBRARIES[prototype_libname]
266
- if prototype_lib.type_collection_names:
267
- for typelib_name in prototype_lib.type_collection_names:
268
- type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
280
+ for prototype_lib in SIM_LIBRARIES[prototype_libname]:
281
+ if prototype_lib.type_collection_names:
282
+ for typelib_name in prototype_lib.type_collection_names:
283
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
269
284
 
270
285
  for arg, arg_type in zip(args, prototype.args):
271
286
  if arg.typevar is not None:
@@ -325,7 +340,9 @@ class SimEngineVRAIL(
325
340
  addr_r = self._expr_bv(expr.addr)
326
341
  size = expr.size
327
342
 
328
- return self._load(addr_r, size, expr=expr)
343
+ if size != UNDETERMINED_SIZE:
344
+ return self._load(addr_r, size, expr=expr)
345
+ return self._top(8)
329
346
 
330
347
  def _handle_expr_VirtualVariable(self, expr: ailment.Expr.VirtualVariable):
331
348
  return self._read_from_vvar(expr, expr=expr, vvar_id=self._mapped_vvarid(expr.varid))
@@ -419,6 +436,29 @@ class SimEngineVRAIL(
419
436
  self._reference(richr, codeloc, src=expr)
420
437
  return richr
421
438
 
439
+ def _handle_unop_Reference(self, expr: ailment.Expr.UnaryOp):
440
+ if isinstance(expr.operand, ailment.Expr.VirtualVariable) and expr.operand.was_stack:
441
+ off = expr.operand.stack_offset
442
+ refbase_typevar = self.state.stack_offset_typevars.get(off, None)
443
+ if refbase_typevar is None:
444
+ # allocate a new type variable
445
+ refbase_typevar = typevars.TypeVariable()
446
+ self.state.stack_offset_typevars[off] = refbase_typevar
447
+
448
+ ref_typevar = typevars.TypeVariable()
449
+ access_derived_typevar = self._create_access_typevar(ref_typevar, False, None, 0)
450
+ load_constraint = typevars.Subtype(refbase_typevar, access_derived_typevar)
451
+ self.state.add_type_constraint(load_constraint)
452
+
453
+ value_v = self.state.stack_address(off)
454
+ richr = RichR(value_v, typevar=ref_typevar)
455
+ codeloc = self._codeloc()
456
+ self._ensure_variable_existence(richr, codeloc, src_expr=expr.operand)
457
+ if self._reference_spoffset:
458
+ self._reference(richr, codeloc, src=expr.operand)
459
+ return richr
460
+ return RichR(self.state.top(expr.bits))
461
+
422
462
  def _handle_expr_BasePointerOffset(self, expr):
423
463
  # TODO
424
464
  return self._top(expr.bits)
@@ -433,7 +473,7 @@ class SimEngineVRAIL(
433
473
  def _handle_binop_Add(self, expr):
434
474
  arg0, arg1 = expr.operands
435
475
  r0, r1 = self._expr_pair(arg0, arg1)
436
- compute = r0.data + r1.data # type: ignore
476
+ compute = r0.data + r1.data if r0.data.size() == r1.data.size() else self.state.top(expr.bits) # type: ignore
437
477
 
438
478
  type_constraints = set()
439
479
  # create a new type variable and add constraints accordingly
@@ -844,7 +884,6 @@ class SimEngineVRAIL(
844
884
  self._expr(expr.operands[0])
845
885
  return RichR(self.state.top(expr.bits))
846
886
 
847
- _handle_unop_Reference = _handle_unop_Default
848
887
  _handle_unop_Dereference = _handle_unop_Default
849
888
  _handle_unop_Clz = _handle_unop_Default
850
889
  _handle_unop_Ctz = _handle_unop_Default