angr 9.2.140__py3-none-manylinux2014_x86_64.whl → 9.2.142__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (75) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +105 -35
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/cfg_base.py +38 -4
  6. angr/analyses/cfg/cfg_fast.py +23 -7
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +13 -8
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +1 -1
  10. angr/analyses/decompiler/ail_simplifier.py +105 -62
  11. angr/analyses/decompiler/callsite_maker.py +24 -11
  12. angr/analyses/decompiler/clinic.py +83 -5
  13. angr/analyses/decompiler/condition_processor.py +7 -7
  14. angr/analyses/decompiler/decompilation_cache.py +2 -1
  15. angr/analyses/decompiler/decompiler.py +11 -2
  16. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
  19. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  20. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  21. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +85 -16
  22. angr/analyses/decompiler/optimization_passes/optimization_pass.py +78 -1
  23. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  24. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
  25. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  27. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  28. angr/analyses/decompiler/region_identifier.py +76 -51
  29. angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
  30. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
  31. angr/analyses/decompiler/ssailification/rewriting.py +70 -32
  32. angr/analyses/decompiler/ssailification/rewriting_engine.py +118 -24
  33. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  34. angr/analyses/decompiler/stack_item.py +36 -0
  35. angr/analyses/decompiler/structured_codegen/c.py +86 -145
  36. angr/analyses/decompiler/structuring/dream.py +1 -1
  37. angr/analyses/decompiler/structuring/phoenix.py +9 -4
  38. angr/analyses/decompiler/structuring/structurer_base.py +2 -1
  39. angr/analyses/decompiler/utils.py +46 -20
  40. angr/analyses/find_objects_static.py +2 -1
  41. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  42. angr/analyses/reaching_definitions/function_handler.py +24 -10
  43. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  44. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  45. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  46. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  47. angr/analyses/reaching_definitions/rd_state.py +11 -7
  48. angr/analyses/s_liveness.py +44 -6
  49. angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  51. angr/analyses/typehoon/simple_solver.py +35 -8
  52. angr/analyses/typehoon/typehoon.py +3 -1
  53. angr/analyses/variable_recovery/engine_ail.py +1 -1
  54. angr/analyses/variable_recovery/engine_vex.py +20 -4
  55. angr/calling_conventions.py +17 -12
  56. angr/factory.py +8 -3
  57. angr/knowledge_plugins/functions/function.py +5 -10
  58. angr/knowledge_plugins/variables/variable_manager.py +34 -5
  59. angr/procedures/definitions/__init__.py +3 -10
  60. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  61. angr/procedures/win32_kernel/__fastfail.py +15 -0
  62. angr/sim_procedure.py +2 -2
  63. angr/simos/simos.py +17 -11
  64. angr/simos/windows.py +42 -1
  65. angr/utils/ail.py +41 -1
  66. angr/utils/cpp.py +17 -0
  67. angr/utils/doms.py +142 -0
  68. angr/utils/library.py +1 -1
  69. angr/utils/types.py +59 -0
  70. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
  71. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/RECORD +75 -70
  72. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
  73. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
  74. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
  75. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
@@ -2,9 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import networkx
4
4
  from ailment.expression import VirtualVariable
5
- from ailment.statement import Assignment, Call
5
+ from ailment.statement import Assignment, Call, ConditionalJump
6
6
 
7
7
  from angr.analyses import Analysis, register_analysis
8
+ from angr.utils.ail import is_head_controlled_loop_block, is_phi_assignment
8
9
  from angr.utils.ssa import VVarUsesCollector, phi_assignment_get_src
9
10
 
10
11
 
@@ -69,8 +70,14 @@ class SLivenessAnalysis(Analysis):
69
70
  block_key = block.addr, block.idx
70
71
  changed = False
71
72
 
73
+ head_controlled_loop = is_head_controlled_loop_block(block)
74
+
72
75
  live = set()
73
76
  for succ in graph.successors(block):
77
+ if head_controlled_loop and (block.addr, block.idx) == (succ.addr, succ.idx):
78
+ # this is a head-controlled loop block; we ignore the self-loop edge because all variables defined
79
+ # in the block after the conditional jump will be dead after leaving the current block
80
+ continue
74
81
  edge = (block.addr, block.idx), (succ.addr, succ.idx)
75
82
  if edge in live_on_edges:
76
83
  live |= live_on_edges[edge]
@@ -81,8 +88,18 @@ class SLivenessAnalysis(Analysis):
81
88
  changed = True
82
89
  live_outs[block_key] = live.copy()
83
90
 
91
+ if head_controlled_loop:
92
+ # this is a head-controlled loop block; we start scanning from the first condition jump backwards
93
+ condjump_idx = next(
94
+ iter(i for i, stmt in enumerate(block.statements) if isinstance(stmt, ConditionalJump)), None
95
+ )
96
+ assert condjump_idx is not None
97
+ stmts = block.statements[: condjump_idx + 1]
98
+ else:
99
+ stmts = block.statements
100
+
84
101
  live_in_by_pred = {}
85
- for stmt in reversed(block.statements):
102
+ for stmt in reversed(stmts):
86
103
  # handle assignments: a defined vvar is not live before the assignment
87
104
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
88
105
  live.discard(stmt.dst.varid)
@@ -92,6 +109,10 @@ class SLivenessAnalysis(Analysis):
92
109
  phi_expr = phi_assignment_get_src(stmt)
93
110
  if phi_expr is not None:
94
111
  for src, vvar in phi_expr.src_and_vvars:
112
+ if head_controlled_loop and src == (block.addr, block.idx):
113
+ # this is a head-controlled loop block; we ignore the self-loop edge
114
+ continue
115
+
95
116
  if src not in live_in_by_pred:
96
117
  live_in_by_pred[src] = live.copy()
97
118
  if vvar is not None:
@@ -99,9 +120,15 @@ class SLivenessAnalysis(Analysis):
99
120
  live_in_by_pred[src].discard(stmt.dst.varid)
100
121
 
101
122
  # handle the statement: add used vvars to the live set
102
- vvar_use_collector = VVarUsesCollector()
103
- vvar_use_collector.walk_statement(stmt)
104
- live |= vvar_use_collector.vvars
123
+ if head_controlled_loop and is_phi_assignment(stmt):
124
+ for src, vvar in stmt.src.src_and_vvars:
125
+ # this is a head-controlled loop block; we ignore the self-loop edge
126
+ if src != (block.addr, block.idx) and vvar is not None:
127
+ live |= {vvar.varid}
128
+ else:
129
+ vvar_use_collector = VVarUsesCollector()
130
+ vvar_use_collector.walk_statement(stmt)
131
+ live |= vvar_use_collector.vvars
105
132
 
106
133
  if live_ins[block_key] != live:
107
134
  live_ins[block_key] = live
@@ -135,7 +162,18 @@ class SLivenessAnalysis(Analysis):
135
162
 
136
163
  for block in self.func_graph.nodes():
137
164
  live = self.model.live_outs[(block.addr, block.idx)].copy()
138
- for stmt in reversed(block.statements):
165
+
166
+ if is_head_controlled_loop_block(block):
167
+ # this is a head-controlled loop block; we start scanning from the first condition jump backwards
168
+ condjump_idx = next(
169
+ iter(i for i, stmt in enumerate(block.statements) if isinstance(stmt, ConditionalJump)), None
170
+ )
171
+ assert condjump_idx is not None
172
+ stmts = block.statements[: condjump_idx + 1]
173
+ else:
174
+ stmts = block.statements
175
+
176
+ for stmt in reversed(stmts):
139
177
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
140
178
  def_vvar = stmt.dst.varid
141
179
  elif isinstance(stmt, Call) and isinstance(stmt.ret_expr, VirtualVariable):
@@ -91,13 +91,15 @@ class SRDAModel:
91
91
  )
92
92
  return defs
93
93
 
94
- def get_vvar_uses(self, obj: atoms.VirtualVariable) -> set[CodeLocation]:
94
+ def get_vvar_uses(self, obj: VirtualVariable | atoms.VirtualVariable) -> set[CodeLocation]:
95
95
  the_vvar = self.varid_to_vvar.get(obj.varid, None)
96
96
  if the_vvar is not None:
97
97
  return {loc for _, loc in self.all_vvar_uses[the_vvar]}
98
98
  return set()
99
99
 
100
- def get_vvar_uses_with_expr(self, obj: atoms.VirtualVariable) -> set[tuple[CodeLocation, VirtualVariable]]:
100
+ def get_vvar_uses_with_expr(
101
+ self, obj: VirtualVariable | atoms.VirtualVariable
102
+ ) -> set[tuple[CodeLocation, VirtualVariable]]:
101
103
  the_vvar = self.varid_to_vvar.get(obj.varid, None)
102
104
  if the_vvar is not None:
103
105
  return {(loc, expr) for expr, loc in self.all_vvar_uses[the_vvar]}
@@ -1,8 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from collections.abc import Callable
4
5
  from collections import defaultdict
5
6
 
7
+ from ailment import Block
6
8
  from ailment.statement import Statement, Assignment, Call, Label
7
9
  from ailment.expression import VirtualVariable, VirtualVariableCategory, Expression
8
10
 
@@ -22,7 +24,7 @@ class RegVVarPredicate:
22
24
  Implements a predicate that is used in get_reg_vvar_by_stmt_idx and get_reg_vvar_by_insn.
23
25
  """
24
26
 
25
- def __init__(self, reg_offset: int, vvars: set[VirtualVariable], arch):
27
+ def __init__(self, reg_offset: int, vvars: list[VirtualVariable], arch):
26
28
  self.reg_offset = reg_offset
27
29
  self.vvars = vvars
28
30
  self.arch = arch
@@ -47,7 +49,8 @@ class RegVVarPredicate:
47
49
  and stmt.dst.was_reg
48
50
  and stmt.dst.reg_offset == self.reg_offset
49
51
  ):
50
- self.vvars.add(stmt.dst)
52
+ if stmt.dst not in self.vvars:
53
+ self.vvars.append(stmt.dst)
51
54
  return True
52
55
  if isinstance(stmt, Call):
53
56
  if (
@@ -55,7 +58,8 @@ class RegVVarPredicate:
55
58
  and stmt.ret_expr.was_reg
56
59
  and stmt.ret_expr.reg_offset == self.reg_offset
57
60
  ):
58
- self.vvars.add(stmt.ret_expr)
61
+ if stmt.ret_expr not in self.vvars:
62
+ self.vvars.append(stmt.ret_expr)
59
63
  return True
60
64
  # is it clobbered maybe?
61
65
  clobbered_regs = self._get_call_clobbered_regs(stmt)
@@ -69,7 +73,7 @@ class StackVVarPredicate:
69
73
  Implements a predicate that is used in get_stack_vvar_by_stmt_idx and get_stack_vvar_by_insn.
70
74
  """
71
75
 
72
- def __init__(self, stack_offset: int, size: int, vvars: set[VirtualVariable]):
76
+ def __init__(self, stack_offset: int, size: int, vvars: list[VirtualVariable]):
73
77
  self.stack_offset = stack_offset
74
78
  self.size = size
75
79
  self.vvars = vvars
@@ -82,7 +86,8 @@ class StackVVarPredicate:
82
86
  and stmt.dst.stack_offset <= self.stack_offset < stmt.dst.stack_offset + stmt.dst.size
83
87
  and stmt.dst.stack_offset <= self.stack_offset + self.size <= stmt.dst.stack_offset + stmt.dst.size
84
88
  ):
85
- self.vvars.add(stmt.dst)
89
+ if stmt.dst not in self.vvars:
90
+ self.vvars.append(stmt.dst)
86
91
  return True
87
92
  return False
88
93
 
@@ -96,7 +101,13 @@ class SRDAView:
96
101
  self.model = model
97
102
 
98
103
  def _get_vvar_by_stmt(
99
- self, block_addr: int, block_idx: int | None, stmt_idx: int, op_type: ObservationPointType, predicate
104
+ self,
105
+ block_addr: int,
106
+ block_idx: int | None,
107
+ stmt_idx: int,
108
+ op_type: ObservationPointType,
109
+ predicate: Callable,
110
+ consecutive: bool = False,
100
111
  ):
101
112
  # find the starting block
102
113
  for block in self.model.func_graph:
@@ -107,7 +118,10 @@ class SRDAView:
107
118
  return
108
119
 
109
120
  traversed = set()
110
- queue = [(the_block, stmt_idx if op_type == ObservationPointType.OP_BEFORE else stmt_idx + 1)]
121
+ queue: list[tuple[Block, int | None]] = [
122
+ (the_block, stmt_idx if op_type == ObservationPointType.OP_BEFORE else stmt_idx + 1)
123
+ ]
124
+ predicate_returned_true = False
111
125
  while queue:
112
126
  block, start_stmt_idx = queue.pop(0)
113
127
  traversed.add(block)
@@ -115,7 +129,8 @@ class SRDAView:
115
129
  stmts = block.statements[:start_stmt_idx] if start_stmt_idx is not None else block.statements
116
130
 
117
131
  for stmt in reversed(stmts):
118
- should_break = predicate(stmt)
132
+ r = predicate(stmt)
133
+ should_break = (predicate_returned_true and r is False) if consecutive else r
119
134
  if should_break:
120
135
  break
121
136
  else:
@@ -129,7 +144,7 @@ class SRDAView:
129
144
  self, reg_offset: int, block_addr: int, block_idx: int | None, stmt_idx: int, op_type: ObservationPointType
130
145
  ) -> VirtualVariable | None:
131
146
  reg_offset = get_reg_offset_base(reg_offset, self.model.arch)
132
- vvars = set()
147
+ vvars = []
133
148
  predicater = RegVVarPredicate(reg_offset, vvars, self.model.arch)
134
149
  self._get_vvar_by_stmt(block_addr, block_idx, stmt_idx, op_type, predicater.predicate)
135
150
 
@@ -137,14 +152,14 @@ class SRDAView:
137
152
  # not found - check function arguments
138
153
  for func_arg in self.model.func_args:
139
154
  if isinstance(func_arg, VirtualVariable):
140
- func_arg_category = func_arg.oident[0]
155
+ func_arg_category = func_arg.parameter_category
141
156
  if func_arg_category == VirtualVariableCategory.REGISTER:
142
- func_arg_regoff = func_arg.oident[1]
157
+ func_arg_regoff = func_arg.parameter_reg_offset
143
158
  if func_arg_regoff == reg_offset:
144
- vvars.add(func_arg)
159
+ vvars.append(func_arg)
145
160
 
146
161
  assert len(vvars) <= 1
147
- return next(iter(vvars), None)
162
+ return vvars[0] if vvars else None
148
163
 
149
164
  def get_stack_vvar_by_stmt( # pylint: disable=too-many-positional-arguments
150
165
  self,
@@ -155,21 +170,24 @@ class SRDAView:
155
170
  stmt_idx: int,
156
171
  op_type: ObservationPointType,
157
172
  ) -> VirtualVariable | None:
158
- vvars = set()
173
+ vvars = []
159
174
  predicater = StackVVarPredicate(stack_offset, size, vvars)
160
- self._get_vvar_by_stmt(block_addr, block_idx, stmt_idx, op_type, predicater.predicate)
175
+ self._get_vvar_by_stmt(block_addr, block_idx, stmt_idx, op_type, predicater.predicate, consecutive=True)
161
176
 
162
177
  if not vvars:
163
178
  # not found - check function arguments
164
179
  for func_arg in self.model.func_args:
165
180
  if isinstance(func_arg, VirtualVariable):
166
- func_arg_category = func_arg.oident[0]
181
+ func_arg_category = func_arg.parameter_category
167
182
  if func_arg_category == VirtualVariableCategory.STACK:
168
- func_arg_stackoff = func_arg.oident[1]
183
+ func_arg_stackoff = func_arg.oident[1] # type: ignore
169
184
  if func_arg_stackoff == stack_offset and func_arg.size == size:
170
- vvars.add(func_arg)
171
- assert len(vvars) <= 1
172
- return next(iter(vvars), None)
185
+ vvars.append(func_arg)
186
+ # there might be multiple vvars; we prioritize the one whose size fits the best
187
+ for v in vvars:
188
+ if v.stack_offset == stack_offset and v.size == size:
189
+ return v
190
+ return vvars[0] if vvars else None
173
191
 
174
192
  def _get_vvar_by_insn(self, addr: int, op_type: ObservationPointType, predicate, block_idx: int | None = None):
175
193
  # find the starting block
@@ -202,23 +220,23 @@ class SRDAView:
202
220
  self, reg_offset: int, addr: int, op_type: ObservationPointType, block_idx: int | None = None
203
221
  ) -> VirtualVariable | None:
204
222
  reg_offset = get_reg_offset_base(reg_offset, self.model.arch)
205
- vvars = set()
223
+ vvars = []
206
224
  predicater = RegVVarPredicate(reg_offset, vvars, self.model.arch)
207
225
 
208
226
  self._get_vvar_by_insn(addr, op_type, predicater.predicate, block_idx=block_idx)
209
227
 
210
228
  assert len(vvars) <= 1
211
- return next(iter(vvars), None)
229
+ return vvars[0] if vvars else None
212
230
 
213
231
  def get_stack_vvar_by_insn( # pylint: disable=too-many-positional-arguments
214
232
  self, stack_offset: int, size: int, addr: int, op_type: ObservationPointType, block_idx: int | None = None
215
233
  ) -> VirtualVariable | None:
216
- vvars = set()
234
+ vvars = []
217
235
  predicater = StackVVarPredicate(stack_offset, size, vvars)
218
236
  self._get_vvar_by_insn(addr, op_type, predicater.predicate, block_idx=block_idx)
219
237
 
220
238
  assert len(vvars) <= 1
221
- return next(iter(vvars), None)
239
+ return vvars[0] if vvars else None
222
240
 
223
241
  def get_vvar_value(self, vvar: VirtualVariable) -> Expression | None:
224
242
  if vvar not in self.model.all_vvar_definitions:
@@ -227,7 +245,7 @@ class SRDAView:
227
245
 
228
246
  for block in self.model.func_graph:
229
247
  if block.addr == codeloc.block_addr and block.idx == codeloc.block_idx:
230
- if codeloc.stmt_idx < len(block.statements):
248
+ if codeloc.stmt_idx is not None and codeloc.stmt_idx < len(block.statements):
231
249
  stmt = block.statements[codeloc.stmt_idx]
232
250
  if isinstance(stmt, Assignment) and stmt.dst.likes(vvar):
233
251
  return stmt.src
@@ -185,7 +185,9 @@ class Sketch:
185
185
  return self.node_mapping[typevar]
186
186
  node: SketchNodeBase | None = None
187
187
  if isinstance(typevar, DerivedTypeVariable):
188
- node = self.node_mapping[SimpleSolver._to_typevar_or_typeconst(typevar.type_var)]
188
+ t = SimpleSolver._to_typevar_or_typeconst(typevar.type_var)
189
+ assert isinstance(t, TypeVariable)
190
+ node = self.node_mapping[t]
189
191
  for label in typevar.labels:
190
192
  succs = []
191
193
  for _, dst, data in self.graph.out_edges(node, data=True):
@@ -210,11 +212,26 @@ class Sketch:
210
212
  # sub <: super
211
213
  if not isinstance(constraint, Subtype):
212
214
  return
213
- subtype = self.flatten_typevar(constraint.sub_type)
214
- supertype = self.flatten_typevar(constraint.super_type)
215
+ subtype, _ = self.flatten_typevar(constraint.sub_type)
216
+ supertype, try_maxsize = self.flatten_typevar(constraint.super_type)
217
+
218
+ if (
219
+ try_maxsize
220
+ and isinstance(subtype, TypeVariable)
221
+ and subtype in self.solver.stackvar_max_sizes
222
+ and isinstance(supertype, TypeConstant)
223
+ and not isinstance(supertype, BottomType)
224
+ ):
225
+ basetype = supertype
226
+ assert basetype.size is not None
227
+ max_size = self.solver.stackvar_max_sizes.get(subtype, None)
228
+ if max_size not in {0, None} and max_size // basetype.size > 0: # type: ignore
229
+ supertype = Array(element=basetype, count=max_size // basetype.size) # type: ignore
230
+
215
231
  if SimpleSolver._typevar_inside_set(subtype, PRIMITIVE_TYPES) and not SimpleSolver._typevar_inside_set(
216
232
  supertype, PRIMITIVE_TYPES
217
233
  ):
234
+ assert isinstance(supertype, (TypeVariable, DerivedTypeVariable))
218
235
  super_node = self.lookup(supertype)
219
236
  assert super_node is None or isinstance(super_node, SketchNode)
220
237
  if super_node is not None:
@@ -222,6 +239,7 @@ class Sketch:
222
239
  elif SimpleSolver._typevar_inside_set(supertype, PRIMITIVE_TYPES) and not SimpleSolver._typevar_inside_set(
223
240
  subtype, PRIMITIVE_TYPES
224
241
  ):
242
+ assert isinstance(subtype, (TypeVariable, DerivedTypeVariable))
225
243
  sub_node = self.lookup(subtype)
226
244
  assert sub_node is None or isinstance(sub_node, SketchNode)
227
245
  # assert sub_node is not None
@@ -231,7 +249,7 @@ class Sketch:
231
249
  @staticmethod
232
250
  def flatten_typevar(
233
251
  derived_typevar: TypeVariable | TypeConstant | DerivedTypeVariable,
234
- ) -> DerivedTypeVariable | TypeVariable | TypeConstant:
252
+ ) -> tuple[DerivedTypeVariable | TypeVariable | TypeConstant, bool]:
235
253
  # pylint:disable=too-many-boolean-expressions
236
254
  if (
237
255
  isinstance(derived_typevar, DerivedTypeVariable)
@@ -243,8 +261,10 @@ class Sketch:
243
261
  and derived_typevar.labels[1].offset == 0
244
262
  and derived_typevar.labels[1].bits == MAX_POINTSTO_BITS
245
263
  ):
246
- return derived_typevar.type_var.basetype
247
- return derived_typevar
264
+ bt = derived_typevar.type_var.basetype
265
+ assert bt is not None
266
+ return bt, True
267
+ return derived_typevar, False
248
268
 
249
269
 
250
270
  #
@@ -313,6 +333,11 @@ class ConstraintGraphNode:
313
333
  else:
314
334
  prefix = DerivedTypeVariable(self.typevar.type_var, None, labels=self.typevar.labels[:-1])
315
335
  variance = Variance.COVARIANT if self.variance == last_label.variance else Variance.CONTRAVARIANT
336
+ if not isinstance(prefix, (TypeVariable, DerivedTypeVariable)):
337
+ # we may see incorrectly generated type constraints that attempt to load from an int:
338
+ # int64.load
339
+ # we don't want to entertain such constraints
340
+ return None
316
341
  return (
317
342
  ConstraintGraphNode(prefix, variance, self.tag, FORGOTTEN.PRE_FORGOTTEN),
318
343
  self.typevar.labels[-1],
@@ -330,6 +355,7 @@ class ConstraintGraphNode:
330
355
  raise TypeError(f"Unsupported type {type(self.typevar)}")
331
356
  variance = Variance.COVARIANT if self.variance == label.variance else Variance.CONTRAVARIANT
332
357
  var = typevar if not labels else DerivedTypeVariable(typevar, None, labels=labels)
358
+ assert isinstance(var, (TypeVariable, DerivedTypeVariable))
333
359
  return ConstraintGraphNode(var, variance, self.tag, FORGOTTEN.PRE_FORGOTTEN)
334
360
 
335
361
  def inverse(self) -> ConstraintGraphNode:
@@ -366,13 +392,14 @@ class SimpleSolver:
366
392
  improvements.
367
393
  """
368
394
 
369
- def __init__(self, bits: int, constraints, typevars):
395
+ def __init__(self, bits: int, constraints, typevars, stackvar_max_sizes: dict[TypeVariable, int] | None = None):
370
396
  if bits not in (32, 64):
371
397
  raise ValueError(f"Pointer size {bits} is not supported. Expect 32 or 64.")
372
398
 
373
399
  self.bits = bits
374
400
  self._constraints: dict[TypeVariable, set[TypeConstraint]] = constraints
375
401
  self._typevars: set[TypeVariable] = typevars
402
+ self.stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
376
403
  self._base_lattice = BASE_LATTICES[bits]
377
404
  self._base_lattice_inverted = networkx.DiGraph()
378
405
  for src, dst in self._base_lattice.edges:
@@ -1289,7 +1316,7 @@ class SimpleSolver:
1289
1316
  for _, succ, data in out_edges:
1290
1317
  if isinstance(succ, RecursiveRefNode):
1291
1318
  ref = succ
1292
- succ: SketchNode | None = sketch.lookup(succ.target)
1319
+ succ: SketchNode | None = sketch.lookup(succ.target) # type: ignore
1293
1320
  if succ is None:
1294
1321
  # failed to resolve...
1295
1322
  _l.warning(
@@ -37,6 +37,7 @@ class Typehoon(Analysis):
37
37
  ground_truth=None,
38
38
  var_mapping: dict[SimVariable, set[TypeVariable]] | None = None,
39
39
  must_struct: set[TypeVariable] | None = None,
40
+ stackvar_max_sizes: dict[TypeVariable, int] | None = None,
40
41
  ):
41
42
  """
42
43
 
@@ -52,6 +53,7 @@ class Typehoon(Analysis):
52
53
  self._ground_truth: dict[TypeVariable, SimType] | None = ground_truth
53
54
  self._var_mapping = var_mapping
54
55
  self._must_struct = must_struct
56
+ self._stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
55
57
 
56
58
  self.bits = self.project.arch.bits
57
59
  self.solution = None
@@ -163,7 +165,7 @@ class Typehoon(Analysis):
163
165
  typevars.add(constraint.sub_type)
164
166
  if isinstance(constraint.super_type, TypeVariable):
165
167
  typevars.add(constraint.super_type)
166
- solver = SimpleSolver(self.bits, self._constraints, typevars)
168
+ solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
167
169
  self.solution = solver.solution
168
170
 
169
171
  def _specialize(self):
@@ -333,7 +333,7 @@ class SimEngineVRAIL(
333
333
  tvs = set()
334
334
  for _, vvar in expr.src_and_vvars:
335
335
  if vvar is not None:
336
- r = self._read_from_vvar(vvar, expr=expr, vvar_id=self._mapped_vvarid(vvar.varid))
336
+ r = self._read_from_vvar(vvar, expr=vvar, vvar_id=self._mapped_vvarid(vvar.varid))
337
337
  if r.typevar is not None:
338
338
  tvs.add(r.typevar)
339
339
 
@@ -8,12 +8,13 @@ from archinfo.arch_arm import is_arm_arch
8
8
 
9
9
  from angr.block import Block
10
10
  from angr.errors import SimMemoryMissingError
11
- from angr.calling_conventions import SimRegArg, SimStackArg, default_cc
11
+ from angr.calling_conventions import SimRegArg, SimStackArg, SimTypeFunction, default_cc
12
12
  from angr.engines.vex.claripy.datalayer import value as claripy_value
13
13
  from angr.engines.light import SimEngineNostmtVEX
14
14
  from angr.knowledge_plugins import Function
15
15
  from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
16
16
  from angr.analyses.typehoon import typevars, typeconsts
17
+ from angr.sim_type import SimTypeBottom
17
18
  from .engine_base import SimEngineVRBase, RichR
18
19
  from .irsb_scanner import VEXIRSBScanner
19
20
 
@@ -222,24 +223,39 @@ class SimEngineVRVEX(
222
223
 
223
224
  def _process_block_end(self, stmt_result, whitelist):
224
225
  # handles block-end calls
226
+ has_call = False
225
227
  current_addr = self.state.block_addr
226
228
  for target_func in self.call_info.get(current_addr, []):
227
229
  self._handle_function_concrete(target_func)
230
+ has_call = True
228
231
 
229
- if self.block.vex.jumpkind == "Ijk_Call":
232
+ if has_call or self.block.vex.jumpkind == "Ijk_Call":
230
233
  # emulates return values from calls
231
234
  cc = None
235
+ proto: SimTypeFunction | None = None
232
236
  for target_func in self.call_info.get(self.state.block_addr, []):
233
237
  if target_func.calling_convention is not None:
234
238
  cc = target_func.calling_convention
239
+ proto = target_func.prototype
235
240
  break
236
241
  if cc is None:
237
242
  cc = default_cc(self.arch.name, platform=self.project.simos.name)(self.arch)
238
- if isinstance(cc.RETURN_VAL, SimRegArg):
239
- reg_offset, reg_size = self.arch.registers[cc.RETURN_VAL.reg_name]
243
+
244
+ if proto is not None and not isinstance(proto.returnty, SimTypeBottom):
245
+ ret_reg = cc.return_val(proto.returnty)
246
+ else:
247
+ ret_reg = cc.RETURN_VAL
248
+ if isinstance(ret_reg, SimRegArg):
249
+ reg_offset, reg_size = self.arch.registers[ret_reg.reg_name]
240
250
  data = self._top(reg_size * self.arch.byte_width)
241
251
  self._assign_to_register(reg_offset, data, reg_size, create_variable=False)
242
252
 
253
+ # handle tail-call optimizations
254
+ if self.block.vex.jumpkind == "Ijk_Boring":
255
+ self.state.ret_val_size = (
256
+ reg_size if self.state.ret_val_size is None else max(self.state.ret_val_size, reg_size)
257
+ )
258
+
243
259
  elif self.block.vex.jumpkind == "Ijk_Ret":
244
260
  # handles return statements
245
261
 
@@ -2,7 +2,8 @@
2
2
  from __future__ import annotations
3
3
  import logging
4
4
  from typing import cast
5
- from collections.abc import Iterable
5
+
6
+ from collections.abc import Iterable, Sequence
6
7
  from collections import defaultdict
7
8
  import contextlib
8
9
 
@@ -82,7 +83,8 @@ class AllocHelper:
82
83
 
83
84
  def size(self):
84
85
  val = self.translate(self.ptr, claripy.BVV(0, len(self.ptr)))
85
- assert val.op == "BVV"
86
+ assert isinstance(val, claripy.ast.Base) and val.op == "BVV"
87
+ assert isinstance(val.args[0], int)
86
88
  return abs(val.args[0])
87
89
 
88
90
  @classmethod
@@ -130,6 +132,7 @@ def refine_locs_with_struct_type(
130
132
  arg_type = SimTypeInt(label=arg_type.label).with_arch(arch)
131
133
 
132
134
  if isinstance(arg_type, (SimTypeReg, SimTypeNum, SimTypeFloat)):
135
+ assert arg_type.size is not None
133
136
  seen_bytes = 0
134
137
  pieces = []
135
138
  while seen_bytes < arg_type.size // arch.byte_width:
@@ -147,20 +150,21 @@ def refine_locs_with_struct_type(
147
150
  piece.is_fp = True
148
151
  return piece
149
152
  if isinstance(arg_type, SimTypeFixedSizeArray):
153
+ assert arg_type.elem_type.size is not None and arg_type.length is not None
150
154
  # TODO explicit stride
151
- locs = [
155
+ locs_list = [
152
156
  refine_locs_with_struct_type(
153
157
  arch, locs, arg_type.elem_type, offset=offset + i * arg_type.elem_type.size // arch.byte_width
154
158
  )
155
159
  for i in range(arg_type.length)
156
160
  ]
157
- return SimArrayArg(locs)
161
+ return SimArrayArg(locs_list)
158
162
  if isinstance(arg_type, SimStruct):
159
- locs = {
163
+ locs_dict = {
160
164
  field: refine_locs_with_struct_type(arch, locs, field_ty, offset=offset + arg_type.offsets[field])
161
165
  for field, field_ty in arg_type.fields.items()
162
166
  }
163
- return SimStructArg(arg_type, locs)
167
+ return SimStructArg(arg_type, locs_dict)
164
168
  if isinstance(arg_type, SimUnion):
165
169
  # Treat a SimUnion as functionality equivalent to its longest member
166
170
  for member in arg_type.members.values():
@@ -574,8 +578,8 @@ class SimCC:
574
578
  # (if applicable) and the arguments. Probably zero.
575
579
  STACKARG_SP_DIFF = 0 # The amount of stack space reserved for the return address
576
580
  CALLER_SAVED_REGS: list[str] = [] # Caller-saved registers
577
- RETURN_ADDR: SimFunctionArgument = None # The location where the return address is stored, as a SimFunctionArgument
578
- RETURN_VAL: SimFunctionArgument = None # The location where the return value is stored, as a SimFunctionArgument
581
+ RETURN_ADDR: SimFunctionArgument # The location where the return address is stored, as a SimFunctionArgument
582
+ RETURN_VAL: SimFunctionArgument # The location where the return value is stored, as a SimFunctionArgument
579
583
  OVERFLOW_RETURN_VAL: SimFunctionArgument | None = (
580
584
  None # The second half of the location where a double-length return value is stored
581
585
  )
@@ -728,6 +732,7 @@ class SimCC:
728
732
  l.warning("Function argument type cannot be BOT. Treating it as a 32-bit int.")
729
733
  arg_type = SimTypeInt().with_arch(self.arch)
730
734
  is_fp = isinstance(arg_type, SimTypeFloat)
735
+ assert arg_type.size is not None
731
736
  size = arg_type.size // self.arch.byte_width
732
737
  try:
733
738
  arg = next(session.fp_iter) if is_fp else next(session.int_iter)
@@ -760,7 +765,7 @@ class SimCC:
760
765
  def is_fp_value(val):
761
766
  return (
762
767
  isinstance(val, (float, claripy.ast.FP))
763
- or (isinstance(val, claripy.ast.Base) and val.op.startswith("fp"))
768
+ or (isinstance(val, claripy.ast.Base) and val.op.startswith("fp")) # type: ignore
764
769
  or (isinstance(val, claripy.ast.Base) and val.op == "Reverse" and val.args[0].op.startswith("fp"))
765
770
  )
766
771
 
@@ -1130,7 +1135,7 @@ class SimCC:
1130
1135
 
1131
1136
  @staticmethod
1132
1137
  def find_cc(
1133
- arch: archinfo.Arch, args: list[SimFunctionArgument], sp_delta: int, platform: str = "Linux"
1138
+ arch: archinfo.Arch, args: Sequence[SimFunctionArgument], sp_delta: int, platform: str = "Linux"
1134
1139
  ) -> SimCC | None:
1135
1140
  """
1136
1141
  Pinpoint the best-fit calling convention and return the corresponding SimCC instance, or None if no fit is
@@ -1428,7 +1433,7 @@ class SimCCX86LinuxSyscall(SimCCSyscall):
1428
1433
 
1429
1434
  class SimCCX86WindowsSyscall(SimCCSyscall):
1430
1435
  # TODO: Make sure the information is correct
1431
- ARG_REGS = []
1436
+ ARG_REGS = ["ecx"]
1432
1437
  FP_ARG_REGS = []
1433
1438
  RETURN_VAL = SimRegArg("eax", 4)
1434
1439
  RETURN_ADDR = SimRegArg("ip_at_syscall", 4)
@@ -1668,7 +1673,7 @@ class SimCCAMD64LinuxSyscall(SimCCSyscall):
1668
1673
 
1669
1674
  class SimCCAMD64WindowsSyscall(SimCCSyscall):
1670
1675
  # TODO: Make sure the information is correct
1671
- ARG_REGS = []
1676
+ ARG_REGS = ["rcx"]
1672
1677
  FP_ARG_REGS = []
1673
1678
  RETURN_VAL = SimRegArg("rax", 8)
1674
1679
  RETURN_ADDR = SimRegArg("ip_at_syscall", 8)
angr/factory.py CHANGED
@@ -7,6 +7,7 @@ from typing import overload, TYPE_CHECKING
7
7
  import archinfo
8
8
  from archinfo.arch_soot import ArchSoot, SootAddressDescriptor
9
9
 
10
+ from .knowledge_plugins.functions import Function
10
11
  from .sim_state import SimState
11
12
  from .calling_conventions import default_cc, SimRegArg, SimStackArg, PointerWrapper, SimCCUnknown
12
13
  from .callable import Callable
@@ -236,7 +237,7 @@ class AngrObjectFactory:
236
237
 
237
238
  def callable(
238
239
  self,
239
- addr,
240
+ addr: int | Function,
240
241
  prototype=None,
241
242
  concrete_only=False,
242
243
  perform_merge=True,
@@ -251,8 +252,9 @@ class AngrObjectFactory:
251
252
  A Callable is a representation of a function in the binary that can be interacted with like a native python
252
253
  function.
253
254
 
254
- :param addr: The address of the function to use
255
- :param prototype: The prototype of the call to use, as a string or a SimTypeFunction
255
+ :param addr: The address of the function to use. If you pass in the function object, we will take
256
+ its addr.
257
+ :param prototype: The prototype of the call to use, as a string or a SimTypeFunction
256
258
  :param concrete_only: Throw an exception if the execution splits into multiple states
257
259
  :param perform_merge: Merge all result states into one at the end (only relevant if concrete_only=False)
258
260
  :param base_state: The state from which to do these runs
@@ -263,6 +265,9 @@ class AngrObjectFactory:
263
265
  python function.
264
266
  :rtype: angr.callable.Callable
265
267
  """
268
+ if isinstance(addr, Function):
269
+ addr = addr.addr
270
+
266
271
  return Callable(
267
272
  self.project,
268
273
  addr=addr,