angr 9.2.141__py3-none-macosx_11_0_arm64.whl → 9.2.143__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (72) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +26 -12
  3. angr/analyses/calling_convention/fact_collector.py +31 -9
  4. angr/analyses/cfg/cfg_base.py +38 -4
  5. angr/analyses/cfg/cfg_fast.py +23 -7
  6. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -1
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +19 -6
  10. angr/analyses/decompiler/ail_simplifier.py +138 -98
  11. angr/analyses/decompiler/clinic.py +73 -5
  12. angr/analyses/decompiler/condition_processor.py +7 -7
  13. angr/analyses/decompiler/decompilation_cache.py +2 -1
  14. angr/analyses/decompiler/decompiler.py +10 -2
  15. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  16. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  17. angr/analyses/decompiler/optimization_passes/condition_constprop.py +110 -46
  18. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
  19. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +2 -0
  21. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  22. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  23. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  24. angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
  25. angr/analyses/decompiler/region_identifier.py +70 -47
  26. angr/analyses/decompiler/sequence_walker.py +8 -0
  27. angr/analyses/decompiler/ssailification/rewriting.py +47 -17
  28. angr/analyses/decompiler/ssailification/rewriting_engine.py +13 -0
  29. angr/analyses/decompiler/stack_item.py +36 -0
  30. angr/analyses/decompiler/structured_codegen/c.py +14 -9
  31. angr/analyses/decompiler/structuring/phoenix.py +3 -3
  32. angr/analyses/decompiler/utils.py +13 -0
  33. angr/analyses/find_objects_static.py +2 -1
  34. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  35. angr/analyses/reaching_definitions/function_handler.py +24 -10
  36. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  37. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  38. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  39. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  40. angr/analyses/reaching_definitions/rd_state.py +11 -7
  41. angr/analyses/s_liveness.py +44 -6
  42. angr/analyses/s_propagator.py +40 -29
  43. angr/analyses/s_reaching_definitions/s_rda_model.py +48 -37
  44. angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
  45. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +21 -21
  46. angr/analyses/typehoon/simple_solver.py +35 -8
  47. angr/analyses/typehoon/typehoon.py +3 -1
  48. angr/analyses/variable_recovery/engine_ail.py +6 -6
  49. angr/calling_conventions.py +20 -10
  50. angr/knowledge_plugins/functions/function.py +5 -10
  51. angr/knowledge_plugins/variables/variable_manager.py +27 -0
  52. angr/lib/angr_native.dylib +0 -0
  53. angr/procedures/definitions/__init__.py +3 -10
  54. angr/procedures/definitions/linux_kernel.py +5 -0
  55. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  56. angr/procedures/win32_kernel/__fastfail.py +15 -0
  57. angr/sim_procedure.py +2 -2
  58. angr/simos/simos.py +14 -10
  59. angr/simos/windows.py +42 -1
  60. angr/utils/ail.py +41 -1
  61. angr/utils/cpp.py +17 -0
  62. angr/utils/doms.py +149 -0
  63. angr/utils/library.py +1 -1
  64. angr/utils/ssa/__init__.py +21 -14
  65. angr/utils/ssa/vvar_uses_collector.py +2 -2
  66. angr/utils/types.py +12 -1
  67. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/METADATA +7 -7
  68. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/RECORD +72 -68
  69. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/LICENSE +0 -0
  70. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/WHEEL +0 -0
  71. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/entry_points.txt +0 -0
  72. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/top_level.txt +0 -0
@@ -26,6 +26,7 @@ from angr.utils.ssa import (
26
26
  get_vvar_deflocs,
27
27
  has_ite_expr,
28
28
  has_ite_stmt,
29
+ has_tmp_expr,
29
30
  is_phi_assignment,
30
31
  is_const_assignment,
31
32
  is_const_and_vvar_assignment,
@@ -126,7 +127,7 @@ class SPropagatorAnalysis(Analysis):
126
127
  # update vvar_deflocs using function arguments
127
128
  if self.func_args:
128
129
  for func_arg in self.func_args:
129
- vvar_deflocs[func_arg] = ExternalCodeLocation()
130
+ vvar_deflocs[func_arg.varid] = func_arg, ExternalCodeLocation()
130
131
 
131
132
  # find all ret sites and indirect jump sites
132
133
  retsites: set[tuple[int, int | None, int]] = set()
@@ -143,11 +144,11 @@ class SPropagatorAnalysis(Analysis):
143
144
  # find constant and other propagatable assignments
144
145
  vvarid_to_vvar = {}
145
146
  const_vvars: dict[int, Const] = {}
146
- for vvar, defloc in vvar_deflocs.items():
147
+ for vvar_id, (vvar, defloc) in vvar_deflocs.items():
147
148
  if not vvar.was_reg and not vvar.was_parameter:
148
149
  continue
149
150
 
150
- vvarid_to_vvar[vvar.varid] = vvar
151
+ vvarid_to_vvar[vvar_id] = vvar
151
152
  if isinstance(defloc, ExternalCodeLocation):
152
153
  continue
153
154
 
@@ -160,8 +161,8 @@ class SPropagatorAnalysis(Analysis):
160
161
  if r:
161
162
  # replace wherever it's used
162
163
  assert v is not None
163
- const_vvars[vvar.varid] = v
164
- for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
164
+ const_vvars[vvar_id] = v
165
+ for vvar_at_use, useloc in vvar_uselocs[vvar_id]:
165
166
  replacements[useloc][vvar_at_use] = v
166
167
  continue
167
168
 
@@ -189,10 +190,10 @@ class SPropagatorAnalysis(Analysis):
189
190
  if self.mode == "function":
190
191
  assert self.func_graph is not None
191
192
 
192
- for vvar, defloc in vvar_deflocs.items():
193
- if vvar.varid not in vvar_uselocs:
193
+ for vvar_id, (vvar, defloc) in vvar_deflocs.items():
194
+ if vvar_id not in vvar_uselocs:
194
195
  continue
195
- if vvar.varid in const_vvars:
196
+ if vvar_id in const_vvars:
196
197
  continue
197
198
  if isinstance(defloc, ExternalCodeLocation):
198
199
  continue
@@ -200,11 +201,13 @@ class SPropagatorAnalysis(Analysis):
200
201
  assert defloc.block_addr is not None
201
202
  assert defloc.stmt_idx is not None
202
203
 
204
+ vvar_uselocs_set = set(vvar_uselocs[vvar_id]) # deduplicate
205
+
203
206
  block = blocks[(defloc.block_addr, defloc.block_idx)]
204
207
  stmt = block.statements[defloc.stmt_idx]
205
208
  if (
206
209
  (vvar.was_reg or vvar.was_parameter)
207
- and len(vvar_uselocs[vvar.varid]) <= 2
210
+ and len(vvar_uselocs_set) <= 2
208
211
  and isinstance(stmt, Assignment)
209
212
  and isinstance(stmt.src, Load)
210
213
  ):
@@ -215,43 +218,46 @@ class SPropagatorAnalysis(Analysis):
215
218
  # v1 = v0 + 1;
216
219
  # }
217
220
  can_replace = True
218
- for _, vvar_useloc in vvar_uselocs[vvar.varid]:
221
+ for _, vvar_useloc in vvar_uselocs_set:
219
222
  if has_store_stmt_in_between_stmts(self.func_graph, blocks, defloc, vvar_useloc):
220
223
  can_replace = False
221
224
 
222
225
  if can_replace:
223
226
  # we can propagate this load because there is no store between its def and use
224
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
227
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
225
228
  replacements[vvar_useloc][vvar_used] = stmt.src
226
229
  continue
227
230
 
228
231
  if (
229
232
  (vvar.was_reg or vvar.was_stack)
230
- and len(vvar_uselocs[vvar.varid]) == 2
233
+ and len(vvar_uselocs_set) == 2
234
+ and isinstance(stmt, Assignment)
231
235
  and not is_phi_assignment(stmt)
232
236
  ):
233
237
  # a special case: in a typical switch-case construct, a variable may be used once for comparison
234
238
  # for the default case and then used again for constructing the jump target. we can propagate this
235
239
  # variable for such cases.
236
- uselocs = {loc for _, loc in vvar_uselocs[vvar.varid]}
237
- if self.is_vvar_used_for_addr_loading_switch_case(uselocs, blocks):
238
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
240
+ uselocs = {loc for _, loc in vvar_uselocs_set}
241
+ if self.is_vvar_used_for_addr_loading_switch_case(uselocs, blocks) and not has_tmp_expr(stmt.src):
242
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
239
243
  replacements[vvar_useloc][vvar_used] = stmt.src
240
244
  # mark the vvar as dead and should be removed
241
245
  self.model.dead_vvar_ids.add(vvar.varid)
242
246
  continue
243
247
 
244
248
  if vvar.was_reg or vvar.was_parameter:
245
- if len(vvar_uselocs[vvar.varid]) == 1:
246
- vvar_used, vvar_useloc = next(iter(vvar_uselocs[vvar.varid]))
247
- if is_const_vvar_load_assignment(stmt) and not has_store_stmt_in_between_stmts(
248
- self.func_graph, blocks, defloc, vvar_useloc
249
+ if len(vvar_uselocs_set) == 1:
250
+ vvar_used, vvar_useloc = next(iter(vvar_uselocs_set))
251
+ if (
252
+ is_const_vvar_load_assignment(stmt)
253
+ and not has_store_stmt_in_between_stmts(self.func_graph, blocks, defloc, vvar_useloc)
254
+ and not has_tmp_expr(stmt.src)
249
255
  ):
250
256
  # we can propagate this load because there is no store between its def and use
251
257
  replacements[vvar_useloc][vvar_used] = stmt.src
252
258
  continue
253
259
 
254
- if is_const_and_vvar_assignment(stmt):
260
+ if is_const_and_vvar_assignment(stmt) and not has_tmp_expr(stmt.src):
255
261
  # if the useloc is a phi assignment statement, ensure that stmt.src is the same as the phi
256
262
  # variable
257
263
  assert vvar_useloc.block_addr is not None
@@ -273,18 +279,22 @@ class SPropagatorAnalysis(Analysis):
273
279
  else:
274
280
  non_exitsite_uselocs = [
275
281
  loc
276
- for _, loc in vvar_uselocs[vvar.varid]
282
+ for _, loc in vvar_uselocs_set
277
283
  if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
278
284
  ]
279
285
  if is_const_and_vvar_assignment(stmt):
280
286
  if len(non_exitsite_uselocs) == 1:
281
287
  # this vvar is used once if we exclude its uses at ret sites or jump sites. we can
282
288
  # propagate it
283
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
289
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
284
290
  replacements[vvar_useloc][vvar_used] = stmt.src
285
291
  continue
286
292
 
287
- if len(set(non_exitsite_uselocs)) == 1 and not has_ite_expr(stmt.src):
293
+ if (
294
+ len(set(non_exitsite_uselocs)) == 1
295
+ and not has_ite_expr(stmt.src)
296
+ and not has_tmp_expr(stmt.src)
297
+ ):
288
298
  useloc = non_exitsite_uselocs[0]
289
299
  assert useloc.block_addr is not None
290
300
  assert useloc.stmt_idx is not None
@@ -292,13 +302,13 @@ class SPropagatorAnalysis(Analysis):
292
302
  if stmt.src.depth <= 3 and not has_ite_stmt(useloc_stmt):
293
303
  # remove duplicate use locs (e.g., if the variable is used multiple times by the
294
304
  # same statement) - but ensure stmt is simple enough
295
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
305
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
296
306
  replacements[vvar_useloc][vvar_used] = stmt.src
297
307
  continue
298
308
 
299
309
  # special logic for global variables: if it's used once or multiple times, and the variable is never
300
310
  # updated before it's used, we will propagate the load
301
- if (vvar.was_reg or vvar.was_parameter) and isinstance(stmt, Assignment):
311
+ if (vvar.was_reg or vvar.was_parameter) and isinstance(stmt, Assignment) and not has_tmp_expr(stmt.src):
302
312
  stmt_src = stmt.src
303
313
  # unpack conversions
304
314
  while isinstance(stmt_src, Convert):
@@ -309,7 +319,7 @@ class SPropagatorAnalysis(Analysis):
309
319
  and isinstance(stmt_src.addr.value, int)
310
320
  ):
311
321
  gv_updated = False
312
- for _vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
322
+ for _vvar_used, vvar_useloc in vvar_uselocs_set:
313
323
  gv_updated |= self.is_global_variable_updated(
314
324
  self.func_graph,
315
325
  blocks,
@@ -320,12 +330,13 @@ class SPropagatorAnalysis(Analysis):
320
330
  vvar_useloc,
321
331
  )
322
332
  if not gv_updated:
323
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
333
+ for vvar_used, vvar_useloc in vvar_uselocs_set:
324
334
  replacements[vvar_useloc][vvar_used] = stmt.src
325
335
  continue
326
336
 
327
337
  for vvar_id, uselocs in vvar_uselocs.items():
328
338
  vvar = next(iter(uselocs))[0] if vvar_id not in vvarid_to_vvar else vvarid_to_vvar[vvar_id]
339
+ vvar_uselocs_set = set(uselocs) # deduplicate
329
340
 
330
341
  if self._sp_tracker is not None and vvar.category == VirtualVariableCategory.REGISTER:
331
342
  if vvar.oident == self.project.arch.sp_offset:
@@ -334,7 +345,7 @@ class SPropagatorAnalysis(Analysis):
334
345
  if "sp" in self.project.arch.registers
335
346
  else None
336
347
  )
337
- for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
348
+ for vvar_at_use, useloc in vvar_uselocs_set:
338
349
  sb_offset = self._sp_tracker.offset_before(useloc.ins_addr, self.project.arch.sp_offset)
339
350
  if sb_offset is not None:
340
351
  v = StackBaseOffset(None, self.project.arch.bits, sb_offset)
@@ -349,7 +360,7 @@ class SPropagatorAnalysis(Analysis):
349
360
  if "bp" in self.project.arch.registers
350
361
  else None
351
362
  )
352
- for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
363
+ for vvar_at_use, useloc in vvar_uselocs_set:
353
364
  sb_offset = self._sp_tracker.offset_before(useloc.ins_addr, self.project.arch.bp_offset)
354
365
  if sb_offset is not None:
355
366
  v = StackBaseOffset(None, self.project.arch.bits, sb_offset)
@@ -20,26 +20,35 @@ class SRDAModel:
20
20
  self.func_args = func_args
21
21
  self.arch = arch
22
22
  self.varid_to_vvar: dict[int, VirtualVariable] = {}
23
- self.all_vvar_definitions: dict[VirtualVariable, CodeLocation] = {}
24
- self.all_vvar_uses: dict[VirtualVariable, set[tuple[VirtualVariable | None, CodeLocation]]] = defaultdict(set)
23
+ self.all_vvar_definitions: dict[int, CodeLocation] = {}
24
+ self.all_vvar_uses: dict[int, list[tuple[VirtualVariable | None, CodeLocation]]] = defaultdict(list)
25
25
  self.all_tmp_definitions: dict[CodeLocation, dict[atoms.Tmp, int]] = defaultdict(dict)
26
26
  self.all_tmp_uses: dict[CodeLocation, dict[atoms.Tmp, set[tuple[Tmp, int]]]] = defaultdict(dict)
27
27
  self.phi_vvar_ids: set[int] = set()
28
28
  self.phivarid_to_varids: dict[int, set[int]] = {}
29
+ self.vvar_uses_by_loc: dict[CodeLocation, list[int]] = {}
30
+
31
+ def add_vvar_use(self, vvar_id: int, expr: VirtualVariable | None, loc: CodeLocation) -> None:
32
+ self.all_vvar_uses[vvar_id].append((expr, loc))
33
+ if loc not in self.vvar_uses_by_loc:
34
+ self.vvar_uses_by_loc[loc] = []
35
+ self.vvar_uses_by_loc[loc].append(vvar_id)
29
36
 
30
37
  @property
31
38
  def all_definitions(self) -> Generator[Definition]:
32
- for vvar, defloc in self.all_vvar_definitions.items():
33
- yield Definition(atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident), defloc)
39
+ for vvar_id, defloc in self.all_vvar_definitions.items():
40
+ vvar = self.varid_to_vvar[vvar_id]
41
+ yield Definition(atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident), defloc)
34
42
 
35
43
  def is_phi_vvar_id(self, idx: int) -> bool:
36
44
  return idx in self.phi_vvar_ids
37
45
 
38
46
  def get_all_definitions(self, block_loc: CodeLocation) -> set[Definition]:
39
47
  s = set()
40
- for vvar, codeloc in self.all_vvar_definitions.items():
48
+ for vvar_id, codeloc in self.all_vvar_definitions.items():
49
+ vvar = self.varid_to_vvar[vvar_id]
41
50
  if codeloc.block_addr == block_loc.block_addr and codeloc.block_idx == block_loc.block_idx:
42
- s.add(Definition(atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident), codeloc))
51
+ s.add(Definition(atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident), codeloc))
43
52
  return s | self.get_all_tmp_definitions(block_loc)
44
53
 
45
54
  def get_all_tmp_definitions(self, block_loc: CodeLocation) -> set[Definition]:
@@ -64,43 +73,45 @@ class SRDAModel:
64
73
  :return: A set of definitions that are used at the given location.
65
74
  """
66
75
  if exprs:
67
- defs: set[tuple[Definition, Any]] = set()
68
- for vvar, uses in self.all_vvar_uses.items():
69
- for expr, loc_ in uses:
70
- if loc_ == loc:
71
- defs.add(
72
- (
73
- Definition(
74
- atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident),
75
- self.all_vvar_definitions[vvar],
76
- ),
77
- expr,
78
- )
79
- )
80
- return defs
81
-
82
- defs: set[Definition] = set()
83
- for vvar, uses in self.all_vvar_uses.items():
84
- for _, loc_ in uses:
85
- if loc_ == loc:
86
- defs.add(
76
+ def_with_exprs: set[tuple[Definition, Any]] = set()
77
+ if loc not in self.vvar_uses_by_loc:
78
+ return def_with_exprs
79
+ for vvar_id in self.vvar_uses_by_loc[loc]:
80
+ vvar = self.varid_to_vvar[vvar_id]
81
+ def_with_exprs.add(
82
+ (
87
83
  Definition(
88
- atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident),
89
- self.all_vvar_definitions[vvar],
90
- )
84
+ atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident),
85
+ self.all_vvar_definitions[vvar_id],
86
+ ),
87
+ vvar,
91
88
  )
89
+ )
90
+ return def_with_exprs
91
+
92
+ defs: set[Definition] = set()
93
+ if loc not in self.vvar_uses_by_loc:
94
+ return defs
95
+ for vvar_id in self.vvar_uses_by_loc[loc]:
96
+ vvar = self.varid_to_vvar[vvar_id]
97
+ defs.add(
98
+ Definition(
99
+ atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident),
100
+ self.all_vvar_definitions[vvar_id],
101
+ )
102
+ )
92
103
  return defs
93
104
 
94
- def get_vvar_uses(self, obj: atoms.VirtualVariable) -> set[CodeLocation]:
95
- the_vvar = self.varid_to_vvar.get(obj.varid, None)
96
- if the_vvar is not None:
97
- return {loc for _, loc in self.all_vvar_uses[the_vvar]}
105
+ def get_vvar_uses(self, obj: VirtualVariable | atoms.VirtualVariable) -> set[CodeLocation]:
106
+ if obj.varid in self.all_vvar_uses:
107
+ return {loc for _, loc in self.all_vvar_uses[obj.varid]}
98
108
  return set()
99
109
 
100
- def get_vvar_uses_with_expr(self, obj: atoms.VirtualVariable) -> set[tuple[CodeLocation, VirtualVariable]]:
101
- the_vvar = self.varid_to_vvar.get(obj.varid, None)
102
- if the_vvar is not None:
103
- return {(loc, expr) for expr, loc in self.all_vvar_uses[the_vvar]}
110
+ def get_vvar_uses_with_expr(
111
+ self, obj: VirtualVariable | atoms.VirtualVariable
112
+ ) -> set[tuple[VirtualVariable | None, CodeLocation]]:
113
+ if obj.varid in self.all_vvar_uses:
114
+ return set(self.all_vvar_uses[obj.varid])
104
115
  return set()
105
116
 
106
117
  def get_tmp_uses(self, obj: atoms.Tmp, block_loc: CodeLocation) -> set[CodeLocation]:
@@ -185,7 +185,10 @@ class SRDAView:
185
185
  vvars.append(func_arg)
186
186
  # there might be multiple vvars; we prioritize the one whose size fits the best
187
187
  for v in vvars:
188
- if v.stack_offset == stack_offset and v.size == size:
188
+ if (
189
+ (v.was_stack and v.stack_offset == stack_offset)
190
+ or (v.was_parameter and v.parameter_stack_offset == stack_offset)
191
+ ) and v.size == size:
189
192
  return v
190
193
  return vvars[0] if vvars else None
191
194
 
@@ -239,9 +242,9 @@ class SRDAView:
239
242
  return vvars[0] if vvars else None
240
243
 
241
244
  def get_vvar_value(self, vvar: VirtualVariable) -> Expression | None:
242
- if vvar not in self.model.all_vvar_definitions:
245
+ if vvar.varid not in self.model.all_vvar_definitions:
243
246
  return None
244
- codeloc = self.model.all_vvar_definitions[vvar]
247
+ codeloc = self.model.all_vvar_definitions[vvar.varid]
245
248
 
246
249
  for block in self.model.func_graph:
247
250
  if block.addr == codeloc.block_addr and block.idx == codeloc.block_idx:
@@ -63,7 +63,7 @@ class SReachingDefinitionsAnalysis(Analysis):
63
63
  case _:
64
64
  raise NotImplementedError
65
65
 
66
- phi_vvars = {}
66
+ phi_vvars: dict[int, set[int]] = {}
67
67
  # find all vvar definitions
68
68
  vvar_deflocs = get_vvar_deflocs(blocks.values(), phi_vvars=phi_vvars)
69
69
  # find all explicit vvar uses
@@ -72,34 +72,35 @@ class SReachingDefinitionsAnalysis(Analysis):
72
72
  # update vvar definitions using function arguments
73
73
  if self.func_args:
74
74
  for vvar in self.func_args:
75
- if vvar not in vvar_deflocs:
76
- vvar_deflocs[vvar] = ExternalCodeLocation()
75
+ if vvar.varid not in vvar_deflocs:
76
+ vvar_deflocs[vvar.varid] = vvar, ExternalCodeLocation()
77
77
  self.model.func_args = self.func_args
78
78
 
79
79
  # update model
80
- for vvar, defloc in vvar_deflocs.items():
81
- self.model.varid_to_vvar[vvar.varid] = vvar
82
- self.model.all_vvar_definitions[vvar] = defloc
83
-
84
- for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
85
- self.model.all_vvar_uses[vvar].add((vvar_at_use, useloc))
86
-
87
- self.model.phi_vvar_ids = {vvar.varid for vvar in phi_vvars}
80
+ for vvar_id, (vvar, defloc) in vvar_deflocs.items():
81
+ self.model.varid_to_vvar[vvar_id] = vvar
82
+ self.model.all_vvar_definitions[vvar_id] = defloc
83
+ if vvar_id in vvar_uselocs:
84
+ for useloc in vvar_uselocs[vvar_id]:
85
+ self.model.add_vvar_use(vvar_id, *useloc)
86
+
87
+ self.model.phi_vvar_ids = set(phi_vvars)
88
88
  self.model.phivarid_to_varids = {}
89
- for vvar, src_vvars in phi_vvars.items():
90
- self.model.phivarid_to_varids[vvar.varid] = {
91
- src_vvar.varid for src_vvar in src_vvars if src_vvar is not None
92
- }
89
+ for vvar_id, src_vvars in phi_vvars.items():
90
+ self.model.phivarid_to_varids[vvar_id] = src_vvars
93
91
 
94
92
  if self.mode == "function":
93
+
95
94
  # fix register definitions for arguments
96
- defined_vvarids = {vvar.varid for vvar in vvar_deflocs}
95
+ defined_vvarids = set(vvar_deflocs)
97
96
  undefined_vvarids = set(vvar_uselocs.keys()).difference(defined_vvarids)
98
97
  for vvar_id in undefined_vvarids:
99
98
  used_vvar = next(iter(vvar_uselocs[vvar_id]))[0]
100
- self.model.varid_to_vvar[used_vvar.varid] = used_vvar
101
- self.model.all_vvar_definitions[used_vvar] = ExternalCodeLocation()
102
- self.model.all_vvar_uses[used_vvar] |= vvar_uselocs[vvar_id]
99
+ self.model.varid_to_vvar[vvar_id] = used_vvar
100
+ self.model.all_vvar_definitions[vvar_id] = ExternalCodeLocation()
101
+ if vvar_id in vvar_uselocs:
102
+ for vvar_useloc in vvar_uselocs[vvar_id]:
103
+ self.model.add_vvar_use(vvar_id, *vvar_useloc)
103
104
 
104
105
  srda_view = SRDAView(self.model)
105
106
 
@@ -151,8 +152,7 @@ class SReachingDefinitionsAnalysis(Analysis):
151
152
  reg_offset = self.project.arch.registers[arg_reg_name][0]
152
153
  if reg_offset in reg_to_vvarids:
153
154
  vvarid = reg_to_vvarids[reg_offset]
154
- vvar = self.model.varid_to_vvar[vvarid]
155
- self.model.all_vvar_uses[vvar].add((None, codeloc))
155
+ self.model.add_vvar_use(vvarid, None, codeloc)
156
156
 
157
157
  if self._track_tmps:
158
158
  # track tmps
@@ -185,7 +185,9 @@ class Sketch:
185
185
  return self.node_mapping[typevar]
186
186
  node: SketchNodeBase | None = None
187
187
  if isinstance(typevar, DerivedTypeVariable):
188
- node = self.node_mapping[SimpleSolver._to_typevar_or_typeconst(typevar.type_var)]
188
+ t = SimpleSolver._to_typevar_or_typeconst(typevar.type_var)
189
+ assert isinstance(t, TypeVariable)
190
+ node = self.node_mapping[t]
189
191
  for label in typevar.labels:
190
192
  succs = []
191
193
  for _, dst, data in self.graph.out_edges(node, data=True):
@@ -210,11 +212,26 @@ class Sketch:
210
212
  # sub <: super
211
213
  if not isinstance(constraint, Subtype):
212
214
  return
213
- subtype = self.flatten_typevar(constraint.sub_type)
214
- supertype = self.flatten_typevar(constraint.super_type)
215
+ subtype, _ = self.flatten_typevar(constraint.sub_type)
216
+ supertype, try_maxsize = self.flatten_typevar(constraint.super_type)
217
+
218
+ if (
219
+ try_maxsize
220
+ and isinstance(subtype, TypeVariable)
221
+ and subtype in self.solver.stackvar_max_sizes
222
+ and isinstance(supertype, TypeConstant)
223
+ and not isinstance(supertype, BottomType)
224
+ ):
225
+ basetype = supertype
226
+ assert basetype.size is not None
227
+ max_size = self.solver.stackvar_max_sizes.get(subtype, None)
228
+ if max_size not in {0, None} and max_size // basetype.size > 0: # type: ignore
229
+ supertype = Array(element=basetype, count=max_size // basetype.size) # type: ignore
230
+
215
231
  if SimpleSolver._typevar_inside_set(subtype, PRIMITIVE_TYPES) and not SimpleSolver._typevar_inside_set(
216
232
  supertype, PRIMITIVE_TYPES
217
233
  ):
234
+ assert isinstance(supertype, (TypeVariable, DerivedTypeVariable))
218
235
  super_node = self.lookup(supertype)
219
236
  assert super_node is None or isinstance(super_node, SketchNode)
220
237
  if super_node is not None:
@@ -222,6 +239,7 @@ class Sketch:
222
239
  elif SimpleSolver._typevar_inside_set(supertype, PRIMITIVE_TYPES) and not SimpleSolver._typevar_inside_set(
223
240
  subtype, PRIMITIVE_TYPES
224
241
  ):
242
+ assert isinstance(subtype, (TypeVariable, DerivedTypeVariable))
225
243
  sub_node = self.lookup(subtype)
226
244
  assert sub_node is None or isinstance(sub_node, SketchNode)
227
245
  # assert sub_node is not None
@@ -231,7 +249,7 @@ class Sketch:
231
249
  @staticmethod
232
250
  def flatten_typevar(
233
251
  derived_typevar: TypeVariable | TypeConstant | DerivedTypeVariable,
234
- ) -> DerivedTypeVariable | TypeVariable | TypeConstant:
252
+ ) -> tuple[DerivedTypeVariable | TypeVariable | TypeConstant, bool]:
235
253
  # pylint:disable=too-many-boolean-expressions
236
254
  if (
237
255
  isinstance(derived_typevar, DerivedTypeVariable)
@@ -243,8 +261,10 @@ class Sketch:
243
261
  and derived_typevar.labels[1].offset == 0
244
262
  and derived_typevar.labels[1].bits == MAX_POINTSTO_BITS
245
263
  ):
246
- return derived_typevar.type_var.basetype
247
- return derived_typevar
264
+ bt = derived_typevar.type_var.basetype
265
+ assert bt is not None
266
+ return bt, True
267
+ return derived_typevar, False
248
268
 
249
269
 
250
270
  #
@@ -313,6 +333,11 @@ class ConstraintGraphNode:
313
333
  else:
314
334
  prefix = DerivedTypeVariable(self.typevar.type_var, None, labels=self.typevar.labels[:-1])
315
335
  variance = Variance.COVARIANT if self.variance == last_label.variance else Variance.CONTRAVARIANT
336
+ if not isinstance(prefix, (TypeVariable, DerivedTypeVariable)):
337
+ # we may see incorrectly generated type constraints that attempt to load from an int:
338
+ # int64.load
339
+ # we don't want to entertain such constraints
340
+ return None
316
341
  return (
317
342
  ConstraintGraphNode(prefix, variance, self.tag, FORGOTTEN.PRE_FORGOTTEN),
318
343
  self.typevar.labels[-1],
@@ -330,6 +355,7 @@ class ConstraintGraphNode:
330
355
  raise TypeError(f"Unsupported type {type(self.typevar)}")
331
356
  variance = Variance.COVARIANT if self.variance == label.variance else Variance.CONTRAVARIANT
332
357
  var = typevar if not labels else DerivedTypeVariable(typevar, None, labels=labels)
358
+ assert isinstance(var, (TypeVariable, DerivedTypeVariable))
333
359
  return ConstraintGraphNode(var, variance, self.tag, FORGOTTEN.PRE_FORGOTTEN)
334
360
 
335
361
  def inverse(self) -> ConstraintGraphNode:
@@ -366,13 +392,14 @@ class SimpleSolver:
366
392
  improvements.
367
393
  """
368
394
 
369
- def __init__(self, bits: int, constraints, typevars):
395
+ def __init__(self, bits: int, constraints, typevars, stackvar_max_sizes: dict[TypeVariable, int] | None = None):
370
396
  if bits not in (32, 64):
371
397
  raise ValueError(f"Pointer size {bits} is not supported. Expect 32 or 64.")
372
398
 
373
399
  self.bits = bits
374
400
  self._constraints: dict[TypeVariable, set[TypeConstraint]] = constraints
375
401
  self._typevars: set[TypeVariable] = typevars
402
+ self.stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
376
403
  self._base_lattice = BASE_LATTICES[bits]
377
404
  self._base_lattice_inverted = networkx.DiGraph()
378
405
  for src, dst in self._base_lattice.edges:
@@ -1289,7 +1316,7 @@ class SimpleSolver:
1289
1316
  for _, succ, data in out_edges:
1290
1317
  if isinstance(succ, RecursiveRefNode):
1291
1318
  ref = succ
1292
- succ: SketchNode | None = sketch.lookup(succ.target)
1319
+ succ: SketchNode | None = sketch.lookup(succ.target) # type: ignore
1293
1320
  if succ is None:
1294
1321
  # failed to resolve...
1295
1322
  _l.warning(
@@ -37,6 +37,7 @@ class Typehoon(Analysis):
37
37
  ground_truth=None,
38
38
  var_mapping: dict[SimVariable, set[TypeVariable]] | None = None,
39
39
  must_struct: set[TypeVariable] | None = None,
40
+ stackvar_max_sizes: dict[TypeVariable, int] | None = None,
40
41
  ):
41
42
  """
42
43
 
@@ -52,6 +53,7 @@ class Typehoon(Analysis):
52
53
  self._ground_truth: dict[TypeVariable, SimType] | None = ground_truth
53
54
  self._var_mapping = var_mapping
54
55
  self._must_struct = must_struct
56
+ self._stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
55
57
 
56
58
  self.bits = self.project.arch.bits
57
59
  self.solution = None
@@ -163,7 +165,7 @@ class Typehoon(Analysis):
163
165
  typevars.add(constraint.sub_type)
164
166
  if isinstance(constraint.super_type, TypeVariable):
165
167
  typevars.add(constraint.super_type)
166
- solver = SimpleSolver(self.bits, self._constraints, typevars)
168
+ solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
167
169
  self.solution = solver.solution
168
170
 
169
171
  def _specialize(self):
@@ -143,9 +143,9 @@ class SimEngineVRAIL(
143
143
  # this is a dynamically calculated call target
144
144
  target_expr = self._expr(target)
145
145
  funcaddr_typevar = target_expr.typevar
146
- assert funcaddr_typevar is not None
147
- load_typevar = self._create_access_typevar(funcaddr_typevar, False, self.arch.bytes, 0)
148
- self.state.add_type_constraint(typevars.Subtype(funcaddr_typevar, load_typevar))
146
+ if funcaddr_typevar is not None:
147
+ load_typevar = self._create_access_typevar(funcaddr_typevar, False, self.arch.bytes, 0)
148
+ self.state.add_type_constraint(typevars.Subtype(funcaddr_typevar, load_typevar))
149
149
 
150
150
  # discover the prototype
151
151
  prototype: SimTypeFunction | None = None
@@ -212,9 +212,9 @@ class SimEngineVRAIL(
212
212
  # this is a dynamically calculated call target
213
213
  target_expr = self._expr(target)
214
214
  funcaddr_typevar = target_expr.typevar
215
- assert funcaddr_typevar is not None
216
- load_typevar = self._create_access_typevar(funcaddr_typevar, False, self.arch.bytes, 0)
217
- self.state.add_type_constraint(typevars.Subtype(funcaddr_typevar, load_typevar))
215
+ if funcaddr_typevar is not None:
216
+ load_typevar = self._create_access_typevar(funcaddr_typevar, False, self.arch.bytes, 0)
217
+ self.state.add_type_constraint(typevars.Subtype(funcaddr_typevar, load_typevar))
218
218
 
219
219
  # discover the prototype
220
220
  prototype: SimTypeFunction | None = None