angr 9.2.140__py3-none-win_amd64.whl → 9.2.142__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (76) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +105 -35
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/cfg_base.py +38 -4
  6. angr/analyses/cfg/cfg_fast.py +23 -7
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +13 -8
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +1 -1
  10. angr/analyses/decompiler/ail_simplifier.py +105 -62
  11. angr/analyses/decompiler/callsite_maker.py +24 -11
  12. angr/analyses/decompiler/clinic.py +83 -5
  13. angr/analyses/decompiler/condition_processor.py +7 -7
  14. angr/analyses/decompiler/decompilation_cache.py +2 -1
  15. angr/analyses/decompiler/decompiler.py +11 -2
  16. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
  19. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  20. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  21. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +85 -16
  22. angr/analyses/decompiler/optimization_passes/optimization_pass.py +78 -1
  23. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  24. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
  25. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  27. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  28. angr/analyses/decompiler/region_identifier.py +76 -51
  29. angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
  30. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
  31. angr/analyses/decompiler/ssailification/rewriting.py +70 -32
  32. angr/analyses/decompiler/ssailification/rewriting_engine.py +118 -24
  33. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  34. angr/analyses/decompiler/stack_item.py +36 -0
  35. angr/analyses/decompiler/structured_codegen/c.py +86 -145
  36. angr/analyses/decompiler/structuring/dream.py +1 -1
  37. angr/analyses/decompiler/structuring/phoenix.py +9 -4
  38. angr/analyses/decompiler/structuring/structurer_base.py +2 -1
  39. angr/analyses/decompiler/utils.py +46 -20
  40. angr/analyses/find_objects_static.py +2 -1
  41. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  42. angr/analyses/reaching_definitions/function_handler.py +24 -10
  43. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  44. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  45. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  46. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  47. angr/analyses/reaching_definitions/rd_state.py +11 -7
  48. angr/analyses/s_liveness.py +44 -6
  49. angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  51. angr/analyses/typehoon/simple_solver.py +35 -8
  52. angr/analyses/typehoon/typehoon.py +3 -1
  53. angr/analyses/variable_recovery/engine_ail.py +1 -1
  54. angr/analyses/variable_recovery/engine_vex.py +20 -4
  55. angr/calling_conventions.py +17 -12
  56. angr/factory.py +8 -3
  57. angr/knowledge_plugins/functions/function.py +5 -10
  58. angr/knowledge_plugins/variables/variable_manager.py +34 -5
  59. angr/lib/angr_native.dll +0 -0
  60. angr/procedures/definitions/__init__.py +3 -10
  61. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  62. angr/procedures/win32_kernel/__fastfail.py +15 -0
  63. angr/sim_procedure.py +2 -2
  64. angr/simos/simos.py +17 -11
  65. angr/simos/windows.py +42 -1
  66. angr/utils/ail.py +41 -1
  67. angr/utils/cpp.py +17 -0
  68. angr/utils/doms.py +142 -0
  69. angr/utils/library.py +1 -1
  70. angr/utils/types.py +59 -0
  71. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
  72. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/RECORD +76 -71
  73. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
  74. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
  75. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
  76. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
+
2
3
  from angr.sim_type import SimCppClass, SimTypeCppFunction
3
4
  from angr.analyses import AnalysesHub
5
+ from angr.utils.cpp import is_cpp_funcname_ctor
4
6
  from . import Analysis, CFGFast, VtableFinder
5
7
 
6
8
 
@@ -33,17 +35,13 @@ class ClassIdentifier(Analysis):
33
35
  class_name = class_name.removeprefix("non-virtual thunk for ")
34
36
  if col_ind != -1:
35
37
  if class_name not in self.classes:
36
- ctor = False
37
- if func.demangled_name.find("{ctor}"):
38
- ctor = True
38
+ ctor = is_cpp_funcname_ctor(func.demangled_name)
39
39
  function_members = {func.addr: SimTypeCppFunction([], None, label=func.demangled_name, ctor=ctor)}
40
40
  new_class = SimCppClass(name=class_name, function_members=function_members)
41
41
  self.classes[class_name] = new_class
42
42
 
43
43
  else:
44
- ctor = False
45
- if func.demangled_name.find("{ctor}"):
46
- ctor = True
44
+ ctor = is_cpp_funcname_ctor(func.demangled_name)
47
45
  cur_class = self.classes[class_name]
48
46
  cur_class.function_members[func.addr] = SimTypeCppFunction(
49
47
  [], None, label=func.demangled_name, ctor=ctor
@@ -55,7 +53,10 @@ class ClassIdentifier(Analysis):
55
53
  vtable_calling_func = self.project.kb.functions.floor_func(ref.ins_addr)
56
54
  tmp_col_ind = vtable_calling_func.demangled_name.rfind("::")
57
55
  possible_constructor_class_name = vtable_calling_func.demangled_name[:tmp_col_ind]
58
- if "ctor" in vtable_calling_func.demangled_name and possible_constructor_class_name in self.classes:
56
+ if (
57
+ is_cpp_funcname_ctor(vtable_calling_func.demangled_name)
58
+ and possible_constructor_class_name in self.classes
59
+ ):
59
60
  self.classes[possible_constructor_class_name].vtable_ptrs.append(vtable.vaddr)
60
61
 
61
62
 
@@ -383,7 +383,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
383
383
  return (
384
384
  cc_analysis.cc,
385
385
  cc_analysis.prototype,
386
- func.prototype_libname,
386
+ cc_analysis.prototype_libname if cc_analysis.prototype_libname is not None else func.prototype_libname,
387
387
  self.kb.variables.get_function_manager(func_addr),
388
388
  )
389
389
  _l.info("Cannot determine calling convention for %r.", func)
@@ -99,6 +99,7 @@ class AILSimplifier(Analysis):
99
99
  removed_vvar_ids: set[int] | None = None,
100
100
  arg_vvars: dict[int, tuple[VirtualVariable, SimVariable]] | None = None,
101
101
  avoid_vvar_ids: set[int] | None = None,
102
+ secondary_stackvars: set[int] | None = None,
102
103
  ):
103
104
  self.func = func
104
105
  self.func_graph = func_graph if func_graph is not None else func.graph
@@ -117,8 +118,9 @@ class AILSimplifier(Analysis):
117
118
  self._should_rewrite_ccalls = rewrite_ccalls
118
119
  self._removed_vvar_ids = removed_vvar_ids if removed_vvar_ids is not None else set()
119
120
  self._arg_vvars = arg_vvars
120
- self._avoid_vvar_ids = avoid_vvar_ids
121
+ self._avoid_vvar_ids = avoid_vvar_ids if avoid_vvar_ids is not None else set()
121
122
  self._propagator_dead_vvar_ids: set[int] = set()
123
+ self._secondary_stackvars: set[int] = secondary_stackvars if secondary_stackvars is not None else set()
122
124
 
123
125
  self._calls_to_remove: set[CodeLocation] = set()
124
126
  self._assignments_to_remove: set[CodeLocation] = set()
@@ -130,12 +132,10 @@ class AILSimplifier(Analysis):
130
132
  def _simplify(self):
131
133
  if self._narrow_expressions:
132
134
  _l.debug("Removing dead assignments before narrowing expressions")
133
- r = self._remove_dead_assignments()
135
+ r = self._iteratively_remove_dead_assignments()
134
136
  if r:
135
137
  _l.debug("... dead assignments removed")
136
138
  self.simplified = True
137
- self._rebuild_func_graph()
138
- self._clear_cache()
139
139
 
140
140
  _l.debug("Narrowing expressions")
141
141
  narrowed_exprs = self._narrow_exprs()
@@ -168,12 +168,10 @@ class AILSimplifier(Analysis):
168
168
 
169
169
  if self._unify_vars:
170
170
  _l.debug("Removing dead assignments")
171
- r = self._remove_dead_assignments()
171
+ r = self._iteratively_remove_dead_assignments()
172
172
  if r:
173
173
  _l.debug("... dead assignments removed")
174
174
  self.simplified = True
175
- self._rebuild_func_graph()
176
- self._clear_cache()
177
175
 
178
176
  _l.debug("Unifying local variables")
179
177
  r = self._unify_local_variables()
@@ -192,11 +190,10 @@ class AILSimplifier(Analysis):
192
190
  self._clear_cache()
193
191
 
194
192
  _l.debug("Removing dead assignments")
195
- r = self._remove_dead_assignments()
193
+ r = self._iteratively_remove_dead_assignments()
196
194
  if r:
197
195
  _l.debug("... dead assignments removed")
198
196
  self.simplified = True
199
- self._rebuild_func_graph()
200
197
 
201
198
  def _rebuild_func_graph(self):
202
199
  def _handler(node):
@@ -1317,14 +1314,27 @@ class AILSimplifier(Analysis):
1317
1314
 
1318
1315
  return False, None
1319
1316
 
1317
+ def _iteratively_remove_dead_assignments(self) -> bool:
1318
+ anything_removed = False
1319
+ while True:
1320
+ r = self._remove_dead_assignments()
1321
+ if not r:
1322
+ return anything_removed
1323
+ self._rebuild_func_graph()
1324
+ self._clear_cache()
1325
+
1320
1326
  def _remove_dead_assignments(self) -> bool:
1321
1327
 
1322
1328
  # keeping tracking of statements to remove and statements (as well as dead vvars) to keep allows us to handle
1323
- # cases where a statement defines more than one atoms, e.g., a call statement that defines both the return
1329
+ # cases where a statement defines more than one atom, e.g., a call statement that defines both the return
1324
1330
  # value and the floating-point return value.
1325
1331
  stmts_to_remove_per_block: dict[tuple[int, int | None], set[int]] = defaultdict(set)
1326
1332
  stmts_to_keep_per_block: dict[tuple[int, int | None], set[int]] = defaultdict(set)
1327
1333
  dead_vvar_ids: set[int] = set()
1334
+ dead_vvar_codelocs: set[CodeLocation] = set()
1335
+ blocks: dict[tuple[int, int | None], Block] = {
1336
+ (node.addr, node.idx): self.blocks.get(node, node) for node in self.func_graph.nodes()
1337
+ }
1328
1338
 
1329
1339
  # Find all statements that should be removed
1330
1340
  mask = (1 << self.project.arch.bits) - 1
@@ -1333,54 +1343,64 @@ class AILSimplifier(Analysis):
1333
1343
  stackarg_offsets = (
1334
1344
  {(tpl[1] & mask) for tpl in self._stack_arg_offsets} if self._stack_arg_offsets is not None else None
1335
1345
  )
1336
- for def_ in rd.all_definitions:
1337
- if def_.dummy:
1338
- continue
1339
- # we do not remove references to global memory regions no matter what
1340
- if isinstance(def_.atom, atoms.MemoryLocation) and isinstance(def_.atom.addr, int):
1341
- continue
1342
- if isinstance(def_.atom, atoms.VirtualVariable):
1343
- if def_.atom.varid in self._propagator_dead_vvar_ids:
1346
+ while True:
1347
+ new_dead_vars_found = False
1348
+ for vvar, codeloc in rd.all_vvar_definitions.items():
1349
+ if vvar.varid in dead_vvar_ids:
1350
+ continue
1351
+ if vvar.varid in self._propagator_dead_vvar_ids:
1344
1352
  # we are definitely removing this variable if it has no uses
1345
- uses = rd.get_vvar_uses(def_.atom)
1346
- elif def_.atom.was_stack:
1353
+ uses = rd.all_vvar_uses[vvar]
1354
+ elif vvar.was_stack:
1347
1355
  if not self._remove_dead_memdefs:
1348
- if rd.is_phi_vvar_id(def_.atom.varid):
1356
+ if rd.is_phi_vvar_id(vvar.varid):
1349
1357
  # we always remove unused phi variables
1350
1358
  pass
1359
+ elif vvar.varid in self._secondary_stackvars:
1360
+ # secondary stack variables are potentially removable
1361
+ pass
1351
1362
  elif stackarg_offsets is not None:
1352
1363
  # we always remove definitions for stack arguments
1353
- assert def_.atom.stack_offset is not None
1354
- if (def_.atom.stack_offset & mask) not in stackarg_offsets:
1364
+ assert vvar.stack_offset is not None
1365
+ if (vvar.stack_offset & mask) not in stackarg_offsets:
1355
1366
  continue
1356
1367
  else:
1357
1368
  continue
1358
- uses = rd.get_vvar_uses(def_.atom)
1369
+ uses = rd.all_vvar_uses[vvar]
1359
1370
 
1360
- elif def_.atom.was_tmp or def_.atom.was_reg or def_.atom.was_parameter:
1361
- uses = rd.get_vvar_uses(def_.atom)
1371
+ elif vvar.was_tmp or vvar.was_reg or vvar.was_parameter:
1372
+ uses = rd.all_vvar_uses[vvar]
1362
1373
 
1363
1374
  else:
1364
1375
  uses = set()
1365
1376
 
1366
- else:
1367
- continue
1368
-
1369
- if not uses:
1370
- if isinstance(def_.atom, atoms.VirtualVariable):
1371
- dead_vvar_ids.add(def_.atom.varid)
1377
+ # remove uses where vvars are going to be removed
1378
+ filtered_uses_count = 0
1379
+ for _, loc in uses:
1380
+ if loc in dead_vvar_codelocs and loc.block_addr is not None and loc.stmt_idx is not None:
1381
+ stmt = blocks[(loc.block_addr, loc.block_idx)].statements[loc.stmt_idx]
1382
+ if not self._statement_has_call_exprs(stmt) and not isinstance(stmt, (DirtyStatement, Call)):
1383
+ continue
1384
+ filtered_uses_count += 1
1385
+
1386
+ if filtered_uses_count == 0:
1387
+ new_dead_vars_found = True
1388
+ dead_vvar_ids.add(vvar.varid)
1389
+ dead_vvar_codelocs.add(codeloc)
1390
+ if not isinstance(codeloc, ExternalCodeLocation):
1391
+ assert codeloc.block_addr is not None
1392
+ assert codeloc.stmt_idx is not None
1393
+ stmts_to_remove_per_block[(codeloc.block_addr, codeloc.block_idx)].add(codeloc.stmt_idx)
1394
+ stmts_to_keep_per_block[(codeloc.block_addr, codeloc.block_idx)].discard(codeloc.stmt_idx)
1395
+ else:
1396
+ if not isinstance(codeloc, ExternalCodeLocation):
1397
+ assert codeloc.block_addr is not None
1398
+ assert codeloc.stmt_idx is not None
1399
+ stmts_to_keep_per_block[(codeloc.block_addr, codeloc.block_idx)].add(codeloc.stmt_idx)
1372
1400
 
1373
- if not isinstance(def_.codeloc, ExternalCodeLocation):
1374
- assert def_.codeloc.block_addr is not None
1375
- assert def_.codeloc.stmt_idx is not None
1376
- stmts_to_remove_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(
1377
- def_.codeloc.stmt_idx
1378
- )
1379
- else:
1380
- if not isinstance(def_.codeloc, ExternalCodeLocation):
1381
- assert def_.codeloc.block_addr is not None
1382
- assert def_.codeloc.stmt_idx is not None
1383
- stmts_to_keep_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(def_.codeloc.stmt_idx)
1401
+ if not new_dead_vars_found:
1402
+ # nothing more is found. let's end the loop
1403
+ break
1384
1404
 
1385
1405
  # find all phi variables that rely on variables that no longer exist
1386
1406
  all_removed_var_ids = self._removed_vvar_ids.copy()
@@ -1455,6 +1475,7 @@ class AILSimplifier(Analysis):
1455
1475
  if codeloc in self._assignments_to_remove:
1456
1476
  # it should be removed
1457
1477
  simplified = True
1478
+ self._assignments_to_remove.discard(codeloc)
1458
1479
  continue
1459
1480
 
1460
1481
  if self._statement_has_call_exprs(stmt):
@@ -1482,6 +1503,7 @@ class AILSimplifier(Analysis):
1482
1503
  codeloc = CodeLocation(block.addr, idx, ins_addr=stmt.ins_addr, block_idx=block.idx)
1483
1504
  if codeloc in self._calls_to_remove:
1484
1505
  # this call can be removed
1506
+ self._calls_to_remove.discard(codeloc)
1485
1507
  simplified = True
1486
1508
  continue
1487
1509
 
@@ -1503,8 +1525,36 @@ class AILSimplifier(Analysis):
1503
1525
 
1504
1526
  return simplified
1505
1527
 
1528
+ @staticmethod
1529
+ def _get_vvar_used_by(
1530
+ vvar_id: int, rd: SRDAModel, blocks_dict: dict[tuple[int, int | None], Block]
1531
+ ) -> set[int | None]:
1532
+ """
1533
+ Get all atoms that use a specified virtual variable. The atoms are in the form of virtual variable ID or None
1534
+ (indicating the virtual variable is used by another statement like Store).
1535
+
1536
+ :param vvar_id: ID of the virtual variable.
1537
+ :param rd: The SRDA model.
1538
+ :return: The set of vvar use atoms.
1539
+ """
1540
+
1541
+ vvar = rd.varid_to_vvar[vvar_id]
1542
+ used_by: set[int | None] = set()
1543
+ for used_vvar, loc in rd.all_vvar_uses[vvar]:
1544
+ if used_vvar is None:
1545
+ # no explicit reference
1546
+ used_by.add(None)
1547
+ elif loc.block_addr is not None:
1548
+ assert loc.stmt_idx is not None
1549
+ stmt = blocks_dict[(loc.block_addr, loc.block_idx)].statements[loc.stmt_idx]
1550
+ if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
1551
+ used_by.add(stmt.dst.varid)
1552
+ else:
1553
+ used_by.add(None)
1554
+ return used_by
1555
+
1506
1556
  def _find_cyclic_dependent_phis_and_dirty_vvars(self, rd: SRDAModel) -> set[int]:
1507
- blocks_dict = {(bb.addr, bb.idx): bb for bb in self.func_graph}
1557
+ blocks_dict: dict[tuple[int, int | None], Block] = {(bb.addr, bb.idx): bb for bb in self.func_graph}
1508
1558
 
1509
1559
  # find dirty vvars and vexccall vvars
1510
1560
  dirty_vvar_ids = set()
@@ -1520,25 +1570,14 @@ class AILSimplifier(Analysis):
1520
1570
 
1521
1571
  phi_and_dirty_vvar_ids = rd.phi_vvar_ids | dirty_vvar_ids
1522
1572
 
1523
- vvar_used_by: dict[int, set[int]] = defaultdict(set)
1573
+ vvar_used_by: dict[int, set[int | None]] = defaultdict(set)
1524
1574
  for var_id in phi_and_dirty_vvar_ids:
1525
1575
  if var_id in rd.phivarid_to_varids:
1526
1576
  for used_by_varid in rd.phivarid_to_varids[var_id]:
1527
- vvar_used_by[used_by_varid].add(var_id)
1528
-
1529
- vvar = rd.varid_to_vvar[var_id]
1530
- used_by = set()
1531
- for used_vvar, loc in rd.all_vvar_uses[vvar]:
1532
- if used_vvar is None:
1533
- # no explicit reference
1534
- used_by.add(None)
1535
- else:
1536
- stmt = blocks_dict[loc.block_addr, loc.block_idx].statements[loc.stmt_idx]
1537
- if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
1538
- used_by.add(stmt.dst.varid)
1539
- else:
1540
- used_by.add(None)
1541
- vvar_used_by[var_id] |= used_by
1577
+ if used_by_varid not in vvar_used_by:
1578
+ vvar_used_by[used_by_varid] |= self._get_vvar_used_by(used_by_varid, rd, blocks_dict)
1579
+ vvar_used_by[used_by_varid].add(var_id) # probably unnecessary
1580
+ vvar_used_by[var_id] |= self._get_vvar_used_by(var_id, rd, blocks_dict)
1542
1581
 
1543
1582
  g = networkx.DiGraph()
1544
1583
  dummy_vvar_id = -1
@@ -1557,8 +1596,12 @@ class AILSimplifier(Analysis):
1557
1596
 
1558
1597
  bail = False
1559
1598
  for varid in scc:
1560
- # if this vvar is a phi var, ensure this vvar is not used by anything else outside the scc
1561
- if varid in rd.phi_vvar_ids:
1599
+ # ensure this vvar is not used by anything else outside the scc (regardless of whether this vvar is a
1600
+ # phi variable or not)
1601
+ if varid in vvar_used_by and None in vvar_used_by[varid]:
1602
+ bail = True
1603
+ break
1604
+ if bail is False:
1562
1605
  succs = list(g.successors(varid))
1563
1606
  if any(succ_varid not in scc for succ_varid in succs):
1564
1607
  bail = True
@@ -45,7 +45,7 @@ class CallSiteMaker(Analysis):
45
45
  self._ail_manager = ail_manager
46
46
 
47
47
  self.result_block = None
48
- self.stack_arg_offsets: set[tuple[int, int]] | None = None # ins_addr, stack_offset
48
+ self.stack_arg_offsets: set[tuple[int, int]] | None = None # call ins addr, stack_offset
49
49
  self.removed_vvar_ids: set[int] = set()
50
50
 
51
51
  self._analyze()
@@ -372,7 +372,9 @@ class CallSiteMaker(Analysis):
372
372
 
373
373
  return None
374
374
 
375
- def _resolve_stack_argument(self, call_stmt, arg_loc) -> tuple[Any, Any]: # pylint:disable=unused-argument
375
+ def _resolve_stack_argument(
376
+ self, call_stmt: Stmt.Call, arg_loc
377
+ ) -> tuple[Any, Any]: # pylint:disable=unused-argument
376
378
  assert self._stack_pointer_tracker is not None
377
379
 
378
380
  size = arg_loc.size
@@ -399,15 +401,26 @@ class CallSiteMaker(Analysis):
399
401
  # FIXME: vvar may be larger than that we ask; we may need to chop the correct value of vvar
400
402
  value = view.get_vvar_value(vvar)
401
403
  if value is not None and not isinstance(value, Expr.Phi):
402
- return None, value
403
- return None, Expr.VirtualVariable(
404
- self._atom_idx(),
405
- vvar.varid,
406
- vvar.bits,
407
- vvar.category,
408
- oident=vvar.oident,
409
- ins_addr=call_stmt.ins_addr,
410
- )
404
+ v: Expr.Expression = value
405
+ else:
406
+ v: Expr.Expression = Expr.VirtualVariable(
407
+ self._atom_idx(),
408
+ vvar.varid,
409
+ vvar.bits,
410
+ vvar.category,
411
+ oident=vvar.oident,
412
+ ins_addr=call_stmt.ins_addr,
413
+ )
414
+ if v.size > size:
415
+ v = Expr.Convert(
416
+ self._atom_idx(),
417
+ v.bits,
418
+ size * self.project.arch.byte_width,
419
+ False,
420
+ v,
421
+ ins_addr=call_stmt.ins_addr,
422
+ )
423
+ return None, v
411
424
 
412
425
  return None, Expr.Load(
413
426
  self._atom_idx(),
@@ -13,7 +13,6 @@ import capstone
13
13
 
14
14
  import ailment
15
15
 
16
- from angr.analyses.decompiler.ssailification.ssailification import Ssailification
17
16
  from angr.errors import AngrDecompilationError
18
17
  from angr.knowledge_base import KnowledgeBase
19
18
  from angr.knowledge_plugins.functions import Function
@@ -39,6 +38,8 @@ from angr.procedures.stubs.UnresolvableJumpTarget import UnresolvableJumpTarget
39
38
  from angr.analyses import Analysis, register_analysis
40
39
  from angr.analyses.cfg.cfg_base import CFGBase
41
40
  from angr.analyses.reaching_definitions import ReachingDefinitionsAnalysis
41
+ from .ssailification.ssailification import Ssailification
42
+ from .stack_item import StackItem, StackItemType
42
43
  from .return_maker import ReturnMaker
43
44
  from .ailgraph_walker import AILGraphWalker, RemoveNodeNotice
44
45
  from .optimization_passes import (
@@ -154,6 +155,9 @@ class Clinic(Analysis):
154
155
  self._mode = mode
155
156
  self.vvar_id_start = vvar_id_start
156
157
  self.vvar_to_vvar: dict[int, int] | None = None
158
+ # during SSA conversion, we create secondary stack variables because they overlap and are larger than the
159
+ # actual stack variables. these secondary stack variables can be safely eliminated if not used by anything.
160
+ self.secondary_stackvars: set[int] = set()
157
161
 
158
162
  # inlining help
159
163
  self._sp_shift = sp_shift
@@ -167,6 +171,7 @@ class Clinic(Analysis):
167
171
 
168
172
  self._register_save_areas_removed: bool = False
169
173
  self.edges_to_remove: list[tuple[tuple[int, int | None], tuple[int, int | None]]] = []
174
+ self.copied_var_ids: set[int] = set()
170
175
 
171
176
  self._new_block_addrs = set()
172
177
 
@@ -179,6 +184,10 @@ class Clinic(Analysis):
179
184
  else:
180
185
  self._optimization_passes = []
181
186
 
187
+ self.stack_items: dict[int, StackItem] = {}
188
+ if self.project.arch.call_pushes_ret:
189
+ self.stack_items[0] = StackItem(0, self.project.arch.bytes, "ret_addr", StackItemType.RET_ADDR)
190
+
182
191
  if self._mode == ClinicMode.DECOMPILE:
183
192
  self._analyze_for_decompiling()
184
193
  elif self._mode == ClinicMode.COLLECT_DATA_REFS:
@@ -499,7 +508,7 @@ class Clinic(Analysis):
499
508
  # Run simplification passes
500
509
  self._update_progress(40.0, text="Running simplifications 1")
501
510
  ail_graph = self._run_simplification_passes(
502
- ail_graph, stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION
511
+ ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION
503
512
  )
504
513
 
505
514
  # Simplify the entire function for the first time
@@ -562,7 +571,9 @@ class Clinic(Analysis):
562
571
 
563
572
  # Run simplification passes
564
573
  self._update_progress(65.0, text="Running simplifications 3 ")
565
- ail_graph = self._run_simplification_passes(ail_graph, stage=OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION)
574
+ ail_graph = self._run_simplification_passes(
575
+ ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
576
+ )
566
577
 
567
578
  # Simplify the entire function for the third time
568
579
  self._update_progress(70.0, text="Simplifying function 3")
@@ -629,6 +640,7 @@ class Clinic(Analysis):
629
640
  self.cc_graph = self.copy_graph(ail_graph)
630
641
  self.externs = self._collect_externs(ail_graph, variable_kb)
631
642
  self.vvar_to_vvar = vvar2vvar
643
+ self.copied_var_ids = copied_vvar_ids
632
644
  return ail_graph
633
645
 
634
646
  def _analyze_for_data_refs(self):
@@ -777,6 +789,8 @@ class Clinic(Analysis):
777
789
  :return: None
778
790
  """
779
791
 
792
+ attempted_funcs: set[int] = set()
793
+
780
794
  for node in self.function.transition_graph:
781
795
  if (
782
796
  isinstance(node, BlockNode)
@@ -788,7 +802,12 @@ class Clinic(Analysis):
788
802
  elif isinstance(node, Function):
789
803
  target_func = node
790
804
  else:
805
+ # TODO: Enable call-site analysis for indirect calls
806
+ continue
807
+
808
+ if target_func.addr in attempted_funcs:
791
809
  continue
810
+ attempted_funcs.add(target_func.addr)
792
811
 
793
812
  # case 0: the calling convention and prototype are available
794
813
  if target_func.calling_convention is not None and target_func.prototype is not None:
@@ -808,6 +827,7 @@ class Clinic(Analysis):
808
827
  if cc.cc is not None and cc.prototype is not None:
809
828
  target_func.calling_convention = cc.cc
810
829
  target_func.prototype = cc.prototype
830
+ target_func.prototype_libname = cc.prototype_libname
811
831
  continue
812
832
 
813
833
  # case 3: the callee is a PLT function
@@ -816,6 +836,7 @@ class Clinic(Analysis):
816
836
  if cc.cc is not None and cc.prototype is not None:
817
837
  target_func.calling_convention = cc.cc
818
838
  target_func.prototype = cc.prototype
839
+ target_func.prototype_libname = cc.prototype_libname
819
840
  continue
820
841
 
821
842
  # case 4: fall back to call site analysis
@@ -967,7 +988,29 @@ class Clinic(Analysis):
967
988
  return ailment.Block(block_node.addr, 0, statements=[])
968
989
 
969
990
  block = self.project.factory.block(block_node.addr, block_node.size, cross_insn_opt=False)
970
- return self._convert_vex(block)
991
+ converted = self._convert_vex(block)
992
+
993
+ # architecture-specific setup
994
+ if block.addr == self.function.addr and self.project.arch.name in {"X86", "AMD64"}:
995
+ # setup dflag; this is a hack for most sane ABIs. we may move this logic elsewhere if there are adversarial
996
+ # binaries that mess with dflags and pass them across functions
997
+ dflag_offset, dflag_size = self.project.arch.registers["d"]
998
+ dflag = ailment.Expr.Register(
999
+ self._ail_manager.next_atom(),
1000
+ None,
1001
+ dflag_offset,
1002
+ dflag_size * self.project.arch.byte_width,
1003
+ ins_addr=block.addr,
1004
+ )
1005
+ forward = ailment.Expr.Const(
1006
+ self._ail_manager.next_atom(), None, 1, dflag_size * self.project.arch.byte_width, ins_addr=block.addr
1007
+ )
1008
+ dflag_assignment = ailment.Stmt.Assignment(
1009
+ self._ail_manager.next_atom(), dflag, forward, ins_addr=block.addr
1010
+ )
1011
+ converted.statements.insert(0, dflag_assignment)
1012
+
1013
+ return converted
971
1014
 
972
1015
  def _convert_vex(self, block):
973
1016
  if block.vex.jumpkind not in {"Ijk_Call", "Ijk_Boring", "Ijk_Ret"} and not block.vex.jumpkind.startswith(
@@ -1009,7 +1052,11 @@ class Clinic(Analysis):
1009
1052
  node = self._cfg.get_any_node(block.addr)
1010
1053
  if node is None:
1011
1054
  continue
1012
- successors = self._cfg.get_successors(node, excluding_fakeret=True, jumpkind="Ijk_Call")
1055
+ successors = [
1056
+ node
1057
+ for node, jk in self._cfg.get_successors_and_jumpkinds(node)
1058
+ if jk == "Ijk_Call" or jk.startswith("Ijk_Sys")
1059
+ ]
1013
1060
  if len(successors) == 1:
1014
1061
  succ_addr = successors[0].addr
1015
1062
  if not self.project.is_hooked(succ_addr) or not isinstance(
@@ -1239,6 +1286,7 @@ class Clinic(Analysis):
1239
1286
  rewrite_ccalls=rewrite_ccalls,
1240
1287
  removed_vvar_ids=removed_vvar_ids,
1241
1288
  arg_vvars=arg_vvars,
1289
+ secondary_stackvars=self.secondary_stackvars,
1242
1290
  )
1243
1291
  # cache the simplifier's RDA analysis
1244
1292
  self.reaching_definitions = simp._reaching_definitions
@@ -1252,6 +1300,7 @@ class Clinic(Analysis):
1252
1300
  ail_graph,
1253
1301
  stage: OptimizationPassStage = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION,
1254
1302
  variable_kb=None,
1303
+ stack_items: dict[int, StackItem] | None = None,
1255
1304
  **kwargs,
1256
1305
  ):
1257
1306
  addr_and_idx_to_blocks: dict[tuple[int, int | None], ailment.Block] = {}
@@ -1297,6 +1346,8 @@ class Clinic(Analysis):
1297
1346
  # clear the cached RDA result
1298
1347
  self.reaching_definitions = None
1299
1348
  self.vvar_id_start = a.vvar_id_start
1349
+ if stack_items is not None and a.stack_items:
1350
+ stack_items.update(a.stack_items)
1300
1351
 
1301
1352
  return ail_graph
1302
1353
 
@@ -1364,6 +1415,7 @@ class Clinic(Analysis):
1364
1415
  vvar_id_start=self.vvar_id_start,
1365
1416
  )
1366
1417
  self.vvar_id_start = ssailification.max_vvar_id + 1
1418
+ self.secondary_stackvars = ssailification.secondary_stackvars
1367
1419
  return ssailification.out_graph
1368
1420
 
1369
1421
  @timethis
@@ -1554,6 +1606,13 @@ class Clinic(Analysis):
1554
1606
  if vartype is not None:
1555
1607
  for tv in vr.var_to_typevars[variable]:
1556
1608
  groundtruth[tv] = vartype
1609
+ # get maximum sizes of each stack variable, regardless of its original type
1610
+ stackvar_max_sizes = var_manager.get_stackvar_max_sizes(self.stack_items)
1611
+ tv_max_sizes = {}
1612
+ for v, s in stackvar_max_sizes.items():
1613
+ if v in vr.var_to_typevars:
1614
+ for tv in vr.var_to_typevars[v]:
1615
+ tv_max_sizes[tv] = s
1557
1616
  # clean up existing types for this function
1558
1617
  var_manager.remove_types()
1559
1618
  # TODO: Type inference for global variables
@@ -1574,6 +1633,7 @@ class Clinic(Analysis):
1574
1633
  var_mapping=vr.var_to_typevars,
1575
1634
  must_struct=must_struct,
1576
1635
  ground_truth=groundtruth,
1636
+ stackvar_max_sizes=tv_max_sizes,
1577
1637
  )
1578
1638
  # tp.pp_constraints()
1579
1639
  # tp.pp_solution()
@@ -1864,6 +1924,11 @@ class Clinic(Analysis):
1864
1924
  if expr.guard:
1865
1925
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, expr.guard)
1866
1926
 
1927
+ elif isinstance(expr, ailment.Expr.Phi):
1928
+ for _, vvar in expr.src_and_vvars:
1929
+ if vvar is not None:
1930
+ self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, vvar)
1931
+
1867
1932
  def _function_graph_to_ail_graph(self, func_graph, blocks_by_addr_and_size=None):
1868
1933
  if blocks_by_addr_and_size is None:
1869
1934
  blocks_by_addr_and_size = self._blocks_by_addr_and_size
@@ -2429,6 +2494,19 @@ class Clinic(Analysis):
2429
2494
  last_stmt.target.value = succs[0].addr
2430
2495
  elif isinstance(last_stmt, ailment.Stmt.ConditionalJump):
2431
2496
  patch_conditional_jump_target(last_stmt, node.addr, succs[0].addr)
2497
+ # if both branches jump to the same location, we replace it with a jump
2498
+ if (
2499
+ isinstance(last_stmt.true_target, ailment.Expr.Const)
2500
+ and isinstance(last_stmt.false_target, ailment.Expr.Const)
2501
+ and last_stmt.true_target.value == last_stmt.false_target.value
2502
+ ):
2503
+ last_stmt = ailment.Stmt.Jump(
2504
+ last_stmt.idx,
2505
+ last_stmt.true_target,
2506
+ target_idx=last_stmt.true_target.idx,
2507
+ ins_addr=last_stmt.ins_addr,
2508
+ )
2509
+ pred.statements[-1] = last_stmt
2432
2510
  first_cond_jump = first_conditional_jump(pred)
2433
2511
  if first_cond_jump is not None and first_cond_jump is not last_stmt:
2434
2512
  patch_conditional_jump_target(first_cond_jump, node.addr, succs[0].addr)
@@ -16,6 +16,7 @@ from angr.utils.graph import GraphUtils
16
16
  from angr.utils.lazy_import import lazy_import
17
17
  from angr.utils import is_pyinstaller
18
18
  from angr.utils.graph import dominates, inverted_idoms
19
+ from angr.utils.ail import is_head_controlled_loop_block
19
20
  from angr.block import Block, BlockNode
20
21
  from angr.errors import AngrRuntimeError
21
22
  from .peephole_optimizations import InvertNegatedLogicalConjunctionsAndDisjunctions, RemoveRedundantNots
@@ -34,7 +35,7 @@ from .structuring.structurer_nodes import (
34
35
  IncompleteSwitchCaseNode,
35
36
  )
36
37
  from .graph_region import GraphRegion
37
- from .utils import first_nonlabel_nonphi_statement, peephole_optimize_expr
38
+ from .utils import peephole_optimize_expr
38
39
 
39
40
  if is_pyinstaller():
40
41
  # PyInstaller is not happy with lazy import
@@ -671,12 +672,11 @@ class ConditionProcessor:
671
672
  return claripy.true()
672
673
 
673
674
  # sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
674
- if (
675
- isinstance(src_block, ailment.Block)
676
- and src_block.statements
677
- and isinstance(first_nonlabel_nonphi_statement(src_block), ailment.Stmt.ConditionalJump)
678
- ):
679
- last_stmt = first_nonlabel_nonphi_statement(src_block)
675
+ if isinstance(src_block, ailment.Block) and src_block.statements and is_head_controlled_loop_block(src_block):
676
+ last_stmt = next(
677
+ iter(stmt for stmt in src_block.statements[:-1] if isinstance(stmt, ailment.Stmt.ConditionalJump)), None
678
+ )
679
+ assert last_stmt is not None
680
680
  else:
681
681
  last_stmt = self.get_last_statement(src_block)
682
682