angr 9.2.143__py3-none-macosx_11_0_arm64.whl → 9.2.145__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (49) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +13 -1
  3. angr/analyses/calling_convention/fact_collector.py +41 -5
  4. angr/analyses/cfg/cfg_base.py +7 -2
  5. angr/analyses/cfg/cfg_emulated.py +13 -4
  6. angr/analyses/cfg/cfg_fast.py +35 -61
  7. angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
  8. angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
  9. angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
  10. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +2 -101
  11. angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
  12. angr/analyses/decompiler/ail_simplifier.py +5 -0
  13. angr/analyses/decompiler/clinic.py +163 -69
  14. angr/analyses/decompiler/decompiler.py +4 -4
  15. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
  16. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
  17. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
  18. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +58 -2
  19. angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
  20. angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
  21. angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
  22. angr/analyses/decompiler/ssailification/ssailification.py +10 -2
  23. angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
  24. angr/analyses/decompiler/structured_codegen/c.py +25 -4
  25. angr/analyses/disassembly.py +3 -3
  26. angr/analyses/fcp/fcp.py +1 -4
  27. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +21 -22
  28. angr/analyses/stack_pointer_tracker.py +61 -25
  29. angr/analyses/typehoon/dfa.py +13 -3
  30. angr/analyses/typehoon/typehoon.py +60 -18
  31. angr/analyses/typehoon/typevars.py +11 -7
  32. angr/analyses/variable_recovery/engine_ail.py +13 -17
  33. angr/analyses/variable_recovery/engine_base.py +26 -30
  34. angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
  35. angr/knowledge_plugins/functions/function.py +29 -15
  36. angr/knowledge_plugins/key_definitions/constants.py +2 -2
  37. angr/knowledge_plugins/key_definitions/liveness.py +4 -4
  38. angr/lib/angr_native.dylib +0 -0
  39. angr/state_plugins/unicorn_engine.py +24 -8
  40. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
  41. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
  42. angr/utils/funcid.py +27 -2
  43. angr/utils/graph.py +26 -20
  44. {angr-9.2.143.dist-info → angr-9.2.145.dist-info}/METADATA +11 -8
  45. {angr-9.2.143.dist-info → angr-9.2.145.dist-info}/RECORD +49 -46
  46. {angr-9.2.143.dist-info → angr-9.2.145.dist-info}/WHEEL +1 -1
  47. {angr-9.2.143.dist-info → angr-9.2.145.dist-info}/LICENSE +0 -0
  48. {angr-9.2.143.dist-info → angr-9.2.145.dist-info}/entry_points.txt +0 -0
  49. {angr-9.2.143.dist-info → angr-9.2.145.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  # pylint:disable=wrong-import-position,wrong-import-order
2
2
  from __future__ import annotations
3
3
  import enum
4
- from typing import TYPE_CHECKING, Any, Literal, cast
4
+ from typing import TYPE_CHECKING, Literal, cast
5
5
  from collections.abc import Sequence
6
6
  from collections import defaultdict, OrderedDict
7
7
  import logging
@@ -16,7 +16,6 @@ from claripy.annotation import UninitializedAnnotation
16
16
  from angr import sim_options as o
17
17
  from angr import BP, BP_BEFORE, BP_AFTER
18
18
  from angr.misc.ux import once
19
- from angr.code_location import CodeLocation
20
19
  from angr.concretization_strategies import SimConcretizationStrategyAny
21
20
  from angr.knowledge_plugins.cfg import IndirectJump, IndirectJumpType
22
21
  from angr.engines.vex.claripy import ccall
@@ -27,13 +26,11 @@ from angr.annocfg import AnnotatedCFG
27
26
  from angr.exploration_techniques.slicecutor import Slicecutor
28
27
  from angr.exploration_techniques.local_loop_seer import LocalLoopSeer
29
28
  from angr.exploration_techniques.explorer import Explorer
30
- from angr.project import Project
31
29
  from angr.utils.constants import DEFAULT_STATEMENT
32
- from angr.analyses.propagator.vex_vars import VEXReg
33
30
  from angr.analyses.propagator.top_checker_mixin import ClaripyDataVEXEngineMixin
34
31
  from angr.engines.vex.claripy.datalayer import value
35
32
  from .resolver import IndirectJumpResolver
36
- from .propagator_utils import PropagatorLoadCallback
33
+ from .constant_value_manager import ConstantValueManager
37
34
 
38
35
  try:
39
36
  from angr.engines import pcode
@@ -41,7 +38,6 @@ except ImportError:
41
38
  pcode = None
42
39
 
43
40
  if TYPE_CHECKING:
44
- from angr import SimState
45
41
  from angr.knowledge_plugins import Function
46
42
 
47
43
  l = logging.getLogger(name=__name__)
@@ -134,101 +130,6 @@ class JumpTargetBaseAddr:
134
130
  return self.base_addr is not None
135
131
 
136
132
 
137
- #
138
- # Constant register resolving support
139
- #
140
-
141
-
142
- class ConstantValueManager:
143
- """
144
- Manages the loading of registers who hold constant values.
145
- """
146
-
147
- __slots__ = (
148
- "func",
149
- "indirect_jump_addr",
150
- "kb",
151
- "mapping",
152
- "project",
153
- )
154
-
155
- def __init__(self, project: Project, kb, func: Function, ij_addr: int):
156
- self.project = project
157
- self.kb = kb
158
- self.func = func
159
- self.indirect_jump_addr = ij_addr
160
-
161
- self.mapping: dict[Any, dict[Any, claripy.ast.Base]] | None = None
162
-
163
- def reg_read_callback(self, state: SimState):
164
- if self.mapping is None:
165
- self._build_mapping()
166
- assert self.mapping is not None
167
-
168
- codeloc = CodeLocation(state.scratch.bbl_addr, state.scratch.stmt_idx, ins_addr=state.scratch.ins_addr)
169
- if codeloc in self.mapping:
170
- reg_read_offset = state.inspect.reg_read_offset
171
- if isinstance(reg_read_offset, claripy.ast.BV) and reg_read_offset.op == "BVV":
172
- reg_read_offset = reg_read_offset.args[0]
173
- variable = VEXReg(reg_read_offset, state.inspect.reg_read_length)
174
- if variable in self.mapping[codeloc]:
175
- v = self.mapping[codeloc][variable]
176
- if isinstance(v, int):
177
- v = claripy.BVV(v, state.inspect.reg_read_length * state.arch.byte_width)
178
- state.inspect.reg_read_expr = v
179
-
180
- def _build_mapping(self):
181
- # constant propagation
182
- l.debug("JumpTable: Propagating for %r at %#x.", self.func, self.indirect_jump_addr)
183
-
184
- # determine blocks to run FCP on
185
-
186
- # - include at most three levels of superblock successors from the entrypoint
187
- self.mapping = {}
188
- startpoint = self.func.startpoint
189
- if startpoint is None:
190
- return
191
-
192
- blocks = set()
193
- succ_and_levels = [(startpoint, 0)]
194
- while succ_and_levels:
195
- new_succs = []
196
- for node, level in succ_and_levels:
197
- if node in blocks:
198
- continue
199
- blocks.add(node)
200
- if node.addr == self.indirect_jump_addr:
201
- # stop at the indirect jump block
202
- continue
203
- for _, succ, data in self.func.graph.out_edges(node, data=True):
204
- new_level = level if data.get("type") == "fake_return" else level + 1
205
- if new_level <= 3:
206
- new_succs.append((succ, new_level))
207
- succ_and_levels = new_succs
208
-
209
- # - include at most six levels of predecessors from the indirect jump block
210
- ij_block = self.func.get_node(self.indirect_jump_addr)
211
- preds = [ij_block]
212
- for _ in range(6):
213
- new_preds = []
214
- for node in preds:
215
- if node in blocks:
216
- continue
217
- blocks.add(node)
218
- new_preds += list(self.func.graph.predecessors(node))
219
- preds = new_preds
220
- if not preds:
221
- break
222
-
223
- prop = self.project.analyses.FastConstantPropagation(
224
- self.func,
225
- blocks=blocks,
226
- vex_cross_insn_opt=True,
227
- load_callback=PropagatorLoadCallback(self.project).propagator_load_callback,
228
- )
229
- self.mapping = prop.replacements
230
-
231
-
232
133
  #
233
134
  # Jump table pre-check
234
135
  #
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+ import contextlib
3
+ from typing import TYPE_CHECKING
4
+ import logging
5
+
6
+ from angr import sim_options as o
7
+ from angr import BP, BP_AFTER
8
+ from angr.errors import (
9
+ AngrUnsupportedSyscallError,
10
+ SimOperationError,
11
+ SimError,
12
+ )
13
+
14
+ from .resolver import IndirectJumpResolver
15
+ from .constant_value_manager import ConstantValueManager
16
+
17
+ if TYPE_CHECKING:
18
+ from angr import Block
19
+ from angr.engines import SimSuccessors
20
+ from angr.sim_state import SimState
21
+ from angr.sim_procedure import SimProcedure
22
+
23
+
24
+ _l = logging.getLogger(name=__name__)
25
+
26
+
27
+ class SyscallResolver(IndirectJumpResolver):
28
+ """
29
+ Resolve syscalls to SimProcedures.
30
+ """
31
+
32
+ def __init__(self, project):
33
+ super().__init__(project, timeless=True)
34
+
35
+ def filter(self, cfg, addr, func_addr, block, jumpkind):
36
+ return jumpkind.startswith("Ijk_Sys")
37
+
38
+ def resolve( # pylint:disable=unused-argument
39
+ self, cfg, addr: int, func_addr: int, block: Block, jumpkind: str, func_graph_complete: bool = True, **kwargs
40
+ ):
41
+ stub = self._resolve_syscall_to_stub(cfg, addr, func_addr, block)
42
+ return (True, [stub.addr]) if stub else (False, [])
43
+
44
+ def _resolve_syscall_to_stub(self, cfg, addr: int, func_addr: int, block: Block) -> SimProcedure | None:
45
+ if not cfg.functions.contains_addr(func_addr):
46
+ return None
47
+ func = cfg.functions.get_by_addr(func_addr)
48
+
49
+ cv_manager = ConstantValueManager(self.project, cfg.kb, func, addr)
50
+ constant_value_reg_read_bp = BP(when=BP_AFTER, enabled=True, action=cv_manager.reg_read_callback)
51
+
52
+ state = self.project.factory.blank_state(
53
+ mode="fastpath",
54
+ addr=block.addr,
55
+ add_options={o.SYMBOL_FILL_UNCONSTRAINED_MEMORY, o.SYMBOL_FILL_UNCONSTRAINED_REGISTERS},
56
+ )
57
+ state.inspect.add_breakpoint("reg_read", constant_value_reg_read_bp)
58
+
59
+ successors = self._simulate_block_with_resilience(state)
60
+ if successors:
61
+ state = self._get_syscall_state_from_successors(successors)
62
+ if state:
63
+ with contextlib.suppress(AngrUnsupportedSyscallError):
64
+ return self.project.simos.syscall(state)
65
+ return None
66
+
67
+ def _simulate_block_with_resilience(self, state: SimState) -> SimSuccessors | None:
68
+ """
69
+ Execute a basic block with "On Error Resume Next". Give up when there is no way moving forward.
70
+ """
71
+
72
+ stmt_idx = 0
73
+ successors = None # make PyCharm's linting happy
74
+
75
+ while True:
76
+ try:
77
+ successors = self.project.factory.successors(state, skip_stmts=stmt_idx)
78
+ break
79
+ except SimOperationError:
80
+ stmt_idx += 1
81
+ continue
82
+ except SimError:
83
+ return None
84
+
85
+ return successors
86
+
87
+ @staticmethod
88
+ def _get_syscall_state_from_successors(successors: SimSuccessors) -> SimState | None:
89
+ for state in successors.flat_successors:
90
+ if state.history.jumpkind and state.history.jumpkind.startswith("Ijk_Sys"):
91
+ return state
92
+ return None
@@ -1307,6 +1307,11 @@ class AILSimplifier(Analysis):
1307
1307
  # check its predecessors
1308
1308
  succ_predecessors = list(self.func_graph.predecessors(succ))
1309
1309
  if len(succ_predecessors) == 1:
1310
+ if succ in lst:
1311
+ # we are about to form a loop - bad!
1312
+ # example: binary ce1897b492c80bf94083dd783aefb413ab1f6d8d4981adce8420f6669d0cb3e1, block
1313
+ # 0x2976EF7.
1314
+ break
1310
1315
  lst.append(succ)
1311
1316
  else:
1312
1317
  break
@@ -12,6 +12,7 @@ import networkx
12
12
  import capstone
13
13
 
14
14
  import ailment
15
+ from angr import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
15
16
 
16
17
  from angr.errors import AngrDecompilationError
17
18
  from angr.knowledge_base import KnowledgeBase
@@ -22,6 +23,7 @@ from angr.utils import timethis
22
23
  from angr.utils.graph import GraphUtils
23
24
  from angr.calling_conventions import SimRegArg, SimStackArg, SimFunctionArgument
24
25
  from angr.sim_type import (
26
+ dereference_simtype,
25
27
  SimTypeChar,
26
28
  SimTypeInt,
27
29
  SimTypeLongLong,
@@ -120,6 +122,7 @@ class Clinic(Analysis):
120
122
  desired_variables: set[str] | None = None,
121
123
  force_loop_single_exit: bool = True,
122
124
  complete_successors: bool = False,
125
+ max_type_constraints: int = 750,
123
126
  ):
124
127
  if not func.normalized and mode == ClinicMode.DECOMPILE:
125
128
  raise ValueError("Decompilation must work on normalized function graphs.")
@@ -130,7 +133,7 @@ class Clinic(Analysis):
130
133
  self.cc_graph: networkx.DiGraph | None = None
131
134
  self.unoptimized_graph: networkx.DiGraph | None = None
132
135
  self.arg_list = None
133
- self.arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimRegArg]] | None = None
136
+ self.arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None
134
137
  self.variable_kb = variable_kb
135
138
  self.externs: set[SimMemoryVariable] = set()
136
139
  self.data_refs: dict[int, list[DataRefDesc]] = {} # data address to data reference description
@@ -153,6 +156,7 @@ class Clinic(Analysis):
153
156
  self.reaching_definitions: ReachingDefinitionsAnalysis | None = None
154
157
  self._cache = cache
155
158
  self._mode = mode
159
+ self._max_type_constraints = max_type_constraints
156
160
  self.vvar_id_start = vvar_id_start
157
161
  self.vvar_to_vvar: dict[int, int] | None = None
158
162
  # during SSA conversion, we create secondary stack variables because they overlap and are larger than the
@@ -304,10 +308,10 @@ class Clinic(Analysis):
304
308
  self._update_progress(29.0, text="Recovering calling conventions (AIL mode)")
305
309
  self._recover_calling_conventions(func_graph=ail_graph)
306
310
 
307
- return ail_graph
311
+ return self._apply_callsite_prototype_and_calling_convention(ail_graph)
308
312
 
309
313
  def _slice_variables(self, ail_graph):
310
- assert self.variable_kb is not None
314
+ assert self.variable_kb is not None and self._desired_variables is not None
311
315
 
312
316
  nodes_index = {(n.addr, n.idx): n for n in ail_graph.nodes()}
313
317
 
@@ -383,7 +387,7 @@ class Clinic(Analysis):
383
387
  # replace the return statement with an assignment to the return register
384
388
  blk.statements.pop(idx)
385
389
 
386
- if stmt.ret_exprs:
390
+ if stmt.ret_exprs and self.project.arch.ret_offset is not None:
387
391
  assign_to_retreg = ailment.Stmt.Assignment(
388
392
  self._ail_manager.next_atom(),
389
393
  ailment.Expr.Register(
@@ -430,14 +434,17 @@ class Clinic(Analysis):
430
434
  if callee_clinic.arg_vvars:
431
435
  for arg_idx in sorted(callee_clinic.arg_vvars.keys()):
432
436
  param_vvar, reg_arg = callee_clinic.arg_vvars[arg_idx]
433
- reg_offset = reg_arg.reg
434
- stmt = ailment.Stmt.Assignment(
435
- self._ail_manager.next_atom(),
436
- param_vvar,
437
- ailment.Expr.Register(self._ail_manager.next_atom(), None, reg_offset, reg_arg.bits),
438
- ins_addr=caller_block.addr + caller_block.original_size,
439
- )
440
- caller_block.statements.append(stmt)
437
+ if isinstance(reg_arg, SimRegisterVariable):
438
+ reg_offset = reg_arg.reg
439
+ stmt = ailment.Stmt.Assignment(
440
+ self._ail_manager.next_atom(),
441
+ param_vvar,
442
+ ailment.Expr.Register(self._ail_manager.next_atom(), None, reg_offset, reg_arg.bits),
443
+ ins_addr=caller_block.addr + caller_block.original_size,
444
+ )
445
+ caller_block.statements.append(stmt)
446
+ else:
447
+ raise NotImplementedError("Unsupported parameter type")
441
448
 
442
449
  ail_graph.add_edge(caller_block, callee_start)
443
450
 
@@ -669,6 +676,7 @@ class Clinic(Analysis):
669
676
  self._convert_all()
670
677
 
671
678
  # there must be at least one Load or one Store
679
+ assert self._blocks_by_addr_and_size is not None
672
680
  found_load_or_store = False
673
681
  for ail_block in self._blocks_by_addr_and_size.values():
674
682
  for stmt in ail_block.statements:
@@ -726,7 +734,7 @@ class Clinic(Analysis):
726
734
  self.arg_list = None
727
735
  self.variable_kb = None
728
736
  self.cc_graph = None
729
- self.externs = None
737
+ self.externs = set()
730
738
  self.data_refs: dict[int, list[DataRefDesc]] = self._collect_data_refs(ail_graph)
731
739
 
732
740
  @staticmethod
@@ -754,7 +762,7 @@ class Clinic(Analysis):
754
762
  return graph_copy
755
763
 
756
764
  def copy_graph(self, graph=None) -> networkx.DiGraph:
757
- return self._copy_graph(graph or self.graph)
765
+ return self._copy_graph(graph or self.graph) # type:ignore
758
766
 
759
767
  @timethis
760
768
  def _set_function_graph(self):
@@ -765,6 +773,7 @@ class Clinic(Analysis):
765
773
  """
766
774
  Alignment blocks are basic blocks that only consist of nops. They should not be included in the graph.
767
775
  """
776
+ assert self._func_graph is not None
768
777
  for node in list(self._func_graph.nodes()):
769
778
  if self._func_graph.in_degree(node) == 0 and CFGBase._is_noop_block(
770
779
  self.project.arch, self.project.factory.block(node.addr, node.size)
@@ -872,7 +881,7 @@ class Clinic(Analysis):
872
881
  callsite_block_addr=callsite.addr,
873
882
  callsite_insn_addr=callsite_ins_addr,
874
883
  func_graph=func_graph,
875
- fail_fast=self._fail_fast,
884
+ fail_fast=self._fail_fast, # type:ignore
876
885
  )
877
886
 
878
887
  if cc.cc is not None and cc.prototype is not None:
@@ -953,6 +962,7 @@ class Clinic(Analysis):
953
962
 
954
963
  :return: None
955
964
  """
965
+ assert self._func_graph is not None
956
966
 
957
967
  for block_node in self._func_graph.nodes():
958
968
  ail_block = self._convert(block_node)
@@ -1063,7 +1073,9 @@ class Clinic(Analysis):
1063
1073
  self.project.hooked_by(successors[0].addr), UnresolvableCallTarget
1064
1074
  ):
1065
1075
  # found a single successor - replace the last statement
1076
+ assert isinstance(last_stmt.target, ailment.Expr.Expression) # not a string
1066
1077
  new_last_stmt = last_stmt.copy()
1078
+ assert isinstance(successors[0].addr, int)
1067
1079
  new_last_stmt.target = ailment.Expr.Const(None, None, successors[0].addr, last_stmt.target.bits)
1068
1080
  block.statements[-1] = new_last_stmt
1069
1081
 
@@ -1105,7 +1117,7 @@ class Clinic(Analysis):
1105
1117
  if self.kb.functions.contains_addr(target_addr):
1106
1118
  # replace the statement
1107
1119
  target_func = self.kb.functions.get_by_addr(target_addr)
1108
- if target_func.returning:
1120
+ if target_func.returning and self.project.arch.ret_offset is not None:
1109
1121
  ret_reg_offset = self.project.arch.ret_offset
1110
1122
  ret_expr = ailment.Expr.Register(
1111
1123
  None,
@@ -1136,6 +1148,74 @@ class Clinic(Analysis):
1136
1148
 
1137
1149
  return ail_graph
1138
1150
 
1151
+ def _apply_callsite_prototype_and_calling_convention(self, ail_graph: networkx.DiGraph) -> networkx.DiGraph:
1152
+ for block in ail_graph.nodes():
1153
+ if not block.statements:
1154
+ continue
1155
+
1156
+ last_stmt = block.statements[-1]
1157
+ if not isinstance(last_stmt, ailment.Stmt.Call):
1158
+ continue
1159
+
1160
+ cc = last_stmt.calling_convention
1161
+ prototype = last_stmt.prototype
1162
+ if cc and prototype:
1163
+ continue
1164
+
1165
+ # manually-specified call-site prototype
1166
+ has_callsite_prototype = self.kb.callsite_prototypes.has_prototype(block.addr)
1167
+ if has_callsite_prototype:
1168
+ manually_specified = self.kb.callsite_prototypes.get_prototype_type(block.addr)
1169
+ if manually_specified:
1170
+ cc = self.kb.callsite_prototypes.get_cc(block.addr)
1171
+ prototype = self.kb.callsite_prototypes.get_prototype(block.addr)
1172
+
1173
+ # function-specific prototype
1174
+ func = None
1175
+ if cc is None or prototype is None:
1176
+ target = None
1177
+ if isinstance(last_stmt.target, ailment.Expr.Const):
1178
+ target = last_stmt.target.value
1179
+
1180
+ if target is not None and target in self.kb.functions:
1181
+ # function-specific logic when the calling target is known
1182
+ func = self.kb.functions[target]
1183
+ if func.prototype is None:
1184
+ func.find_declaration()
1185
+ cc = func.calling_convention
1186
+ prototype = func.prototype
1187
+
1188
+ # automatically recovered call-site prototype
1189
+ if (cc is None or prototype is None) and has_callsite_prototype:
1190
+ cc = self.kb.callsite_prototypes.get_cc(block.addr)
1191
+ prototype = self.kb.callsite_prototypes.get_prototype(block.addr)
1192
+
1193
+ # ensure the prototype has been resolved
1194
+ if prototype is not None and func is not None:
1195
+ # make sure the function prototype is resolved.
1196
+ # TODO: Cache resolved function prototypes globally
1197
+ prototype_libname = func.prototype_libname
1198
+ type_collections = []
1199
+ if prototype_libname is not None:
1200
+ prototype_lib = SIM_LIBRARIES[prototype_libname]
1201
+ if prototype_lib.type_collection_names:
1202
+ for typelib_name in prototype_lib.type_collection_names:
1203
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
1204
+ if type_collections:
1205
+ prototype = dereference_simtype(prototype, type_collections).with_arch( # type: ignore
1206
+ self.project.arch
1207
+ )
1208
+
1209
+ if cc is None:
1210
+ l.warning("Call site %#x (callee %s) has an unknown calling convention.", block.addr, repr(func))
1211
+
1212
+ new_last_stmt = last_stmt.copy()
1213
+ new_last_stmt.calling_convention = cc
1214
+ new_last_stmt.prototype = prototype
1215
+ block.statements[-1] = new_last_stmt
1216
+
1217
+ return ail_graph
1218
+
1139
1219
  @timethis
1140
1220
  def _make_ailgraph(self) -> networkx.DiGraph:
1141
1221
  return self._function_graph_to_ail_graph(self._func_graph)
@@ -1589,9 +1669,9 @@ class Clinic(Analysis):
1589
1669
  tmp_kb.functions = self.kb.functions
1590
1670
  vr = self.project.analyses.VariableRecoveryFast(
1591
1671
  self.function, # pylint:disable=unused-variable
1592
- fail_fast=self._fail_fast,
1672
+ fail_fast=self._fail_fast, # type:ignore
1593
1673
  func_graph=ail_graph,
1594
- kb=tmp_kb,
1674
+ kb=tmp_kb, # type:ignore
1595
1675
  track_sp=False,
1596
1676
  func_args=arg_list,
1597
1677
  unify_variables=False,
@@ -1610,9 +1690,13 @@ class Clinic(Analysis):
1610
1690
  stackvar_max_sizes = var_manager.get_stackvar_max_sizes(self.stack_items)
1611
1691
  tv_max_sizes = {}
1612
1692
  for v, s in stackvar_max_sizes.items():
1693
+ assert isinstance(v, SimStackVariable)
1613
1694
  if v in vr.var_to_typevars:
1614
1695
  for tv in vr.var_to_typevars[v]:
1615
1696
  tv_max_sizes[tv] = s
1697
+ if v.offset in vr.stack_offset_typevars:
1698
+ tv = vr.stack_offset_typevars[v.offset]
1699
+ tv_max_sizes[tv] = s
1616
1700
  # clean up existing types for this function
1617
1701
  var_manager.remove_types()
1618
1702
  # TODO: Type inference for global variables
@@ -1624,35 +1708,49 @@ class Clinic(Analysis):
1624
1708
  must_struct |= typevars
1625
1709
  else:
1626
1710
  must_struct = None
1627
- try:
1628
- tp = self.project.analyses.Typehoon(
1629
- vr.type_constraints,
1630
- vr.func_typevar,
1631
- kb=tmp_kb,
1632
- fail_fast=self._fail_fast,
1633
- var_mapping=vr.var_to_typevars,
1634
- must_struct=must_struct,
1635
- ground_truth=groundtruth,
1636
- stackvar_max_sizes=tv_max_sizes,
1637
- )
1638
- # tp.pp_constraints()
1639
- # tp.pp_solution()
1640
- tp.update_variable_types(
1641
- self.function.addr,
1642
- {v: t for v, t in vr.var_to_typevars.items() if isinstance(v, (SimRegisterVariable, SimStackVariable))},
1643
- )
1644
- tp.update_variable_types(
1645
- "global",
1646
- {
1647
- v: t
1648
- for v, t in vr.var_to_typevars.items()
1649
- if isinstance(v, SimMemoryVariable) and not isinstance(v, SimStackVariable)
1650
- },
1651
- )
1652
- except Exception: # pylint:disable=broad-except
1653
- l.warning(
1654
- "Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
1711
+ total_type_constraints = sum(len(tc) for tc in vr.type_constraints.values()) if vr.type_constraints else 0
1712
+ if total_type_constraints > self._max_type_constraints:
1713
+ l.info(
1714
+ "The number of type constraints (%d) is greater than the threshold (%d). Skipping type inference.",
1715
+ total_type_constraints,
1716
+ self._max_type_constraints,
1655
1717
  )
1718
+ else:
1719
+ try:
1720
+ tp = self.project.analyses.Typehoon(
1721
+ vr.type_constraints,
1722
+ vr.func_typevar,
1723
+ kb=tmp_kb,
1724
+ fail_fast=self._fail_fast,
1725
+ var_mapping=vr.var_to_typevars,
1726
+ stack_offset_tvs=vr.stack_offset_typevars,
1727
+ must_struct=must_struct,
1728
+ ground_truth=groundtruth,
1729
+ stackvar_max_sizes=tv_max_sizes,
1730
+ )
1731
+ # tp.pp_constraints()
1732
+ # tp.pp_solution()
1733
+ tp.update_variable_types(
1734
+ self.function.addr,
1735
+ {
1736
+ v: t
1737
+ for v, t in vr.var_to_typevars.items()
1738
+ if isinstance(v, (SimRegisterVariable, SimStackVariable))
1739
+ },
1740
+ vr.stack_offset_typevars,
1741
+ )
1742
+ tp.update_variable_types(
1743
+ "global",
1744
+ {
1745
+ v: t
1746
+ for v, t in vr.var_to_typevars.items()
1747
+ if isinstance(v, SimMemoryVariable) and not isinstance(v, SimStackVariable)
1748
+ },
1749
+ )
1750
+ except Exception: # pylint:disable=broad-except
1751
+ l.warning(
1752
+ "Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
1753
+ )
1656
1754
 
1657
1755
  # for any left-over variables, assign Bottom type (which will get "corrected" into a default type in
1658
1756
  # VariableManager)
@@ -1671,14 +1769,10 @@ class Clinic(Analysis):
1671
1769
  func_blocks=list(ail_graph),
1672
1770
  )
1673
1771
 
1674
- # Link variables to each statement
1772
+ # Link variables and struct member information to every statement and expression
1675
1773
  for block in ail_graph.nodes():
1676
1774
  self._link_variables_on_block(block, tmp_kb)
1677
1775
 
1678
- # Link struct member info to Store statements
1679
- for block in ail_graph.nodes():
1680
- self._link_struct_member_info_on_block(block, tmp_kb)
1681
-
1682
1776
  if self._cache is not None:
1683
1777
  self._cache.type_constraints = vr.type_constraints
1684
1778
  self._cache.func_typevar = vr.func_typevar
@@ -1686,22 +1780,6 @@ class Clinic(Analysis):
1686
1780
 
1687
1781
  return tmp_kb
1688
1782
 
1689
- def _link_struct_member_info_on_block(self, block, kb):
1690
- variable_manager = kb.variables[self.function.addr]
1691
- for stmt in block.statements:
1692
- if isinstance(stmt, ailment.Stmt.Store) and isinstance((var := stmt.variable), SimStackVariable):
1693
- offset = var.offset
1694
- if offset in variable_manager.stack_offset_to_struct_member_info:
1695
- stmt.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[offset]
1696
- elif (
1697
- isinstance(stmt, ailment.Stmt.Assignment)
1698
- and isinstance(stmt.dst, ailment.Expr.VirtualVariable)
1699
- and stmt.dst.was_stack
1700
- ):
1701
- offset = stmt.dst.stack_offset
1702
- if offset in variable_manager.stack_offset_to_struct_member_info:
1703
- stmt.dst.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[offset]
1704
-
1705
1783
  def _link_variables_on_block(self, block, kb):
1706
1784
  """
1707
1785
  Link atoms (AIL expressions) in the given block to corresponding variables identified previously.
@@ -1737,6 +1815,12 @@ class Clinic(Analysis):
1737
1815
  )
1738
1816
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.data)
1739
1817
 
1818
+ # link struct member info
1819
+ if isinstance(stmt.variable, SimStackVariable):
1820
+ off = stmt.variable.offset
1821
+ if off in variable_manager.stack_offset_to_struct_member_info:
1822
+ stmt.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
1823
+
1740
1824
  elif stmt_type is ailment.Stmt.Assignment:
1741
1825
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.dst)
1742
1826
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.src)
@@ -1804,6 +1888,11 @@ class Clinic(Analysis):
1804
1888
  expr.variable = var
1805
1889
  expr.variable_offset = offset
1806
1890
 
1891
+ if isinstance(expr, ailment.Expr.VirtualVariable) and expr.was_stack:
1892
+ off = expr.stack_offset
1893
+ if off in variable_manager.stack_offset_to_struct_member_info:
1894
+ expr.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
1895
+
1807
1896
  elif type(expr) is ailment.Expr.Load:
1808
1897
  variables = variable_manager.find_variables_by_atom(block.addr, stmt_idx, expr, block_idx=block.idx)
1809
1898
  if len(variables) == 0:
@@ -1838,6 +1927,11 @@ class Clinic(Analysis):
1838
1927
  expr.variable = var
1839
1928
  expr.variable_offset = offset
1840
1929
 
1930
+ if isinstance(var, SimStackVariable):
1931
+ off = var.offset
1932
+ if off in variable_manager.stack_offset_to_struct_member_info:
1933
+ expr.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
1934
+
1841
1935
  elif type(expr) is ailment.Expr.BinaryOp:
1842
1936
  variables = variable_manager.find_variables_by_atom(block.addr, stmt_idx, expr, block_idx=block.idx)
1843
1937
  if len(variables) >= 1:
@@ -2683,7 +2777,7 @@ class Clinic(Analysis):
2683
2777
  def _next_atom(self) -> int:
2684
2778
  return self._ail_manager.next_atom()
2685
2779
 
2686
- def parse_variable_addr(self, addr: ailment.Expr.Expression) -> tuple[Any, Any] | None:
2780
+ def parse_variable_addr(self, addr: ailment.Expr.Expression) -> tuple[Any, Any]:
2687
2781
  if isinstance(addr, ailment.Expr.Const):
2688
2782
  return addr, 0
2689
2783
  if isinstance(addr, ailment.Expr.BinaryOp) and addr.op == "Add":
@@ -2857,7 +2951,7 @@ class Clinic(Analysis):
2857
2951
  if self.project.kb.functions.contains_addr(last_stmt.target.value)
2858
2952
  else None
2859
2953
  )
2860
- if func is not None and func.name == "__chkstk":
2954
+ if func is not None and (func.name == "__chkstk" or func.info.get("is_alloca_probe", False) is True):
2861
2955
  # get rid of this call
2862
2956
  node.statements = node.statements[:-1]
2863
2957
  if self.project.arch.call_pushes_ret and node.statements: