angr 9.2.142__py3-none-manylinux2014_x86_64.whl → 9.2.144__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (61) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +22 -10
  3. angr/analyses/calling_convention/fact_collector.py +72 -14
  4. angr/analyses/cfg/cfg_base.py +7 -2
  5. angr/analyses/cfg/cfg_emulated.py +13 -4
  6. angr/analyses/cfg/cfg_fast.py +21 -60
  7. angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
  8. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
  9. angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
  10. angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
  11. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -102
  12. angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
  13. angr/analyses/complete_calling_conventions.py +18 -5
  14. angr/analyses/decompiler/ail_simplifier.py +95 -65
  15. angr/analyses/decompiler/clinic.py +162 -68
  16. angr/analyses/decompiler/decompiler.py +4 -4
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +49 -14
  19. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
  21. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
  22. angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
  23. angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
  24. angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
  25. angr/analyses/decompiler/sequence_walker.py +8 -0
  26. angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
  27. angr/analyses/decompiler/ssailification/ssailification.py +10 -2
  28. angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
  29. angr/analyses/decompiler/structured_codegen/c.py +25 -4
  30. angr/analyses/decompiler/utils.py +13 -0
  31. angr/analyses/disassembly.py +3 -3
  32. angr/analyses/fcp/fcp.py +1 -4
  33. angr/analyses/s_propagator.py +40 -29
  34. angr/analyses/s_reaching_definitions/s_rda_model.py +45 -36
  35. angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
  36. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +41 -42
  37. angr/analyses/typehoon/dfa.py +13 -3
  38. angr/analyses/typehoon/typehoon.py +60 -18
  39. angr/analyses/typehoon/typevars.py +11 -7
  40. angr/analyses/variable_recovery/engine_ail.py +19 -23
  41. angr/analyses/variable_recovery/engine_base.py +26 -30
  42. angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
  43. angr/calling_conventions.py +18 -8
  44. angr/knowledge_plugins/functions/function.py +29 -15
  45. angr/knowledge_plugins/key_definitions/constants.py +2 -2
  46. angr/knowledge_plugins/key_definitions/liveness.py +4 -4
  47. angr/lib/angr_native.so +0 -0
  48. angr/procedures/definitions/linux_kernel.py +5 -0
  49. angr/state_plugins/unicorn_engine.py +24 -8
  50. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
  51. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
  52. angr/utils/doms.py +40 -33
  53. angr/utils/graph.py +26 -20
  54. angr/utils/ssa/__init__.py +21 -14
  55. angr/utils/ssa/vvar_uses_collector.py +2 -2
  56. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/METADATA +11 -8
  57. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/RECORD +61 -58
  58. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/WHEEL +1 -1
  59. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/LICENSE +0 -0
  60. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/entry_points.txt +0 -0
  61. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/top_level.txt +0 -0
@@ -47,6 +47,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
47
47
  analysis,
48
48
  arch,
49
49
  func,
50
+ project,
50
51
  stack_region=None,
51
52
  register_region=None,
52
53
  global_region=None,
@@ -55,7 +56,6 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
55
56
  func_typevar=None,
56
57
  delayed_type_constraints=None,
57
58
  stack_offset_typevars=None,
58
- project=None,
59
59
  ret_val_size=None,
60
60
  ):
61
61
  super().__init__(
@@ -63,6 +63,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
63
63
  analysis,
64
64
  arch,
65
65
  func,
66
+ project,
66
67
  stack_region=stack_region,
67
68
  register_region=register_region,
68
69
  global_region=global_region,
@@ -71,12 +72,11 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
71
72
  func_typevar=func_typevar,
72
73
  delayed_type_constraints=delayed_type_constraints,
73
74
  stack_offset_typevars=stack_offset_typevars,
74
- project=project,
75
75
  )
76
76
  self.ret_val_size = ret_val_size
77
77
 
78
78
  def __repr__(self):
79
- return f"<VRAbstractState@{self.block_addr:#x}: {len(self.register_region)} register variables, {len(self.stack_region)} stack variables>"
79
+ return f"<VRAbstractState@{self.block_addr:#x}"
80
80
 
81
81
  def __eq__(self, other):
82
82
  if type(other) is not VariableRecoveryFastState:
@@ -96,12 +96,14 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
96
96
  type_constraints=self.type_constraints,
97
97
  func_typevar=self.func_typevar,
98
98
  delayed_type_constraints=self.delayed_type_constraints,
99
- stack_offset_typevars=dict(self.stack_offset_typevars),
99
+ stack_offset_typevars=self.stack_offset_typevars,
100
100
  project=self.project,
101
101
  ret_val_size=self.ret_val_size,
102
102
  )
103
103
 
104
- def merge(self, others: tuple[VariableRecoveryFastState], successor=None) -> tuple[VariableRecoveryFastState, bool]:
104
+ def merge(
105
+ self, others: tuple[VariableRecoveryFastState, ...], successor=None
106
+ ) -> tuple[VariableRecoveryFastState, bool]:
105
107
  """
106
108
  Merge two abstract states.
107
109
 
@@ -135,10 +137,10 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
135
137
  # add subtype constraints for all replacements
136
138
  for v0, v1 in self.phi_variables.items():
137
139
  # v0 will be replaced by v1
138
- if not typevars.has_type_variable_for(v1, None):
139
- typevars.add_type_variable(v1, None, TypeVariable())
140
- if not typevars.has_type_variable_for(v0, None):
141
- typevars.add_type_variable(v0, None, TypeVariable())
140
+ if not typevars.has_type_variable_for(v1):
141
+ typevars.add_type_variable(v1, TypeVariable())
142
+ if not typevars.has_type_variable_for(v0):
143
+ typevars.add_type_variable(v0, TypeVariable())
142
144
  # Assuming v2 = phi(v0, v1), then we know that v0_typevar == v1_typevar == v2_typevar
143
145
  # However, it's possible that neither v0 nor v1 will ever be used in future blocks, which not only makes
144
146
  # this phi function useless, but also leads to the incorrect assumption that v1_typevar == v2_typevar.
@@ -146,7 +148,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
146
148
  # when v1 (the new variable that will end up in the state) is ever used in the future.
147
149
 
148
150
  # create an equivalence relationship
149
- equivalence = Equivalence(typevars.get_type_variable(v1, None), typevars.get_type_variable(v0, None))
151
+ equivalence = Equivalence(typevars.get_type_variable(v1), typevars.get_type_variable(v0))
150
152
  delayed_typeconstraints[v1].add(equivalence)
151
153
 
152
154
  stack_offset_typevars = {}
@@ -281,6 +283,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
281
283
  self.func_typevar = TypeVariable(name=func.name)
282
284
  self.delayed_type_constraints = None
283
285
  self.ret_val_size = None
286
+ self.stack_offset_typevars: dict[int, TypeVariable] = {}
284
287
 
285
288
  self._analyze()
286
289
 
@@ -328,6 +331,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
328
331
  type_constraints=self.type_constraints,
329
332
  func_typevar=self.func_typevar,
330
333
  delayed_type_constraints=self.delayed_type_constraints,
334
+ stack_offset_typevars=self.stack_offset_typevars,
331
335
  )
332
336
  initial_sp = state.stack_address(self.project.arch.bytes if self.project.arch.call_pushes_ret else 0)
333
337
  if self.project.arch.sp_offset is not None:
@@ -439,20 +443,10 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
439
443
  block = self.project.factory.block(node.addr, node.size, opt_level=1, cross_insn_opt=False)
440
444
  block_key = node.addr
441
445
 
442
- # if node.addr in self._instates:
443
- # prev_state: VariableRecoveryFastState = self._instates[node.addr]
444
- # if input_state == prev_state:
445
- # l.debug('Skip node %#x as we have reached a fixed-point', node.addr)
446
- # return False, input_state
447
- # else:
448
- # l.debug('Merging input state of node %#x with the previous state.', node.addr)
449
- # input_state, _ = prev_state.merge((input_state,), successor=node.addr)
450
-
451
446
  state = state.copy()
452
447
  state.block_addr = node.addr
453
448
  if isinstance(node, ailment.Block):
454
449
  state.block_idx = node.idx
455
- # self._instates[node.addr] = state
456
450
 
457
451
  if self._node_iterations[block_key] >= self._max_iterations:
458
452
  l.debug("Skip node %#x as we have iterated %d times on it.", node.addr, self._node_iterations[node.addr])
@@ -491,10 +485,12 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
491
485
  self.variable_manager[self.function.addr].unify_variables()
492
486
 
493
487
  # fill in var_to_typevars
488
+ assert self.typevars is not None
494
489
  for var, typevar_set in self.typevars._typevars.items():
495
490
  self.var_to_typevars[var] = typevar_set
496
491
 
497
492
  # unify type variables for global variables
493
+ assert self.type_constraints is not None
498
494
  for var, typevars in self.var_to_typevars.items():
499
495
  if len(typevars) > 1 and isinstance(var, SimMemoryVariable) and not isinstance(var, SimStackVariable):
500
496
  sorted_typevars = sorted(typevars, key=lambda x: str(x)) # pylint:disable=unnecessary-lambda
@@ -600,7 +596,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
600
596
  block = self._peephole_optimize(block)
601
597
 
602
598
  processor = self._ail_engine if isinstance(block, ailment.Block) else self._vex_engine
603
- processor.process(state, block=block, fail_fast=self._fail_fast)
599
+ processor.process(state, block=block, fail_fast=self._fail_fast) # type: ignore
604
600
 
605
601
  if self._track_sp and block.addr in self._node_to_cc:
606
602
  # readjusting sp at the end for blocks that end in a call
@@ -254,7 +254,7 @@ class SimFunctionArgument:
254
254
  if self.size not in (4, 8):
255
255
  raise ValueError(f"What do I do with a float {self.size} bytes long")
256
256
  value = claripy.FPV(value, claripy.FSORT_FLOAT if self.size == 4 else claripy.FSORT_DOUBLE)
257
- return value.raw_to_bv()
257
+ return value.raw_to_bv() # type:ignore
258
258
 
259
259
  def check_value_get(self, value):
260
260
  if self.is_fp:
@@ -578,8 +578,12 @@ class SimCC:
578
578
  # (if applicable) and the arguments. Probably zero.
579
579
  STACKARG_SP_DIFF = 0 # The amount of stack space reserved for the return address
580
580
  CALLER_SAVED_REGS: list[str] = [] # Caller-saved registers
581
- RETURN_ADDR: SimFunctionArgument # The location where the return address is stored, as a SimFunctionArgument
582
- RETURN_VAL: SimFunctionArgument # The location where the return value is stored, as a SimFunctionArgument
581
+ RETURN_ADDR: SimFunctionArgument | None = (
582
+ None # The location where the return address is stored, as a SimFunctionArgument
583
+ )
584
+ RETURN_VAL: SimFunctionArgument | None = (
585
+ None # The location where the return value is stored, as a SimFunctionArgument
586
+ )
583
587
  OVERFLOW_RETURN_VAL: SimFunctionArgument | None = (
584
588
  None # The second half of the location where a double-length return value is stored
585
589
  )
@@ -766,7 +770,11 @@ class SimCC:
766
770
  return (
767
771
  isinstance(val, (float, claripy.ast.FP))
768
772
  or (isinstance(val, claripy.ast.Base) and val.op.startswith("fp")) # type: ignore
769
- or (isinstance(val, claripy.ast.Base) and val.op == "Reverse" and val.args[0].op.startswith("fp"))
773
+ or (
774
+ isinstance(val, claripy.ast.Base)
775
+ and val.op == "Reverse" # type:ignore
776
+ and val.args[0].op.startswith("fp") # type:ignore
777
+ )
770
778
  )
771
779
 
772
780
  @staticmethod
@@ -922,8 +930,10 @@ class SimCC:
922
930
  allocator.apply(state, alloc_base)
923
931
 
924
932
  for loc, val in zip(arg_locs, vals):
933
+ assert loc is not None
925
934
  loc.set_value(state, val, stack_base=stack_base)
926
- self.return_addr.set_value(state, ret_addr, stack_base=stack_base)
935
+ if self.return_addr is not None:
936
+ self.return_addr.set_value(state, ret_addr, stack_base=stack_base)
927
937
 
928
938
  def teardown_callsite(self, state, return_val=None, prototype=None, force_callee_cleanup=False):
929
939
  """
@@ -943,10 +953,10 @@ class SimCC:
943
953
  self.set_return_val(state, return_val, prototype.returnty)
944
954
  # ummmmmmmm hack
945
955
  loc = self.return_val(prototype.returnty)
946
- if isinstance(loc, SimReferenceArgument):
956
+ if self.RETURN_VAL is not None and isinstance(loc, SimReferenceArgument):
947
957
  self.RETURN_VAL.set_value(state, loc.ptr_loc.get_value(state))
948
958
 
949
- ret_addr = self.return_addr.get_value(state)
959
+ ret_addr = self.return_addr.get_value(state) if self.return_addr is not None else None
950
960
 
951
961
  if state.arch.sp_offset is not None and prototype is not None:
952
962
  if force_callee_cleanup or self.CALLEE_CLEANUP:
@@ -975,7 +985,7 @@ class SimCC:
975
985
 
976
986
  if arg.buffer:
977
987
  if isinstance(arg.value, claripy.ast.Bits):
978
- real_value = arg.value.chop(state.arch.byte_width)
988
+ real_value = arg.value.chop(state.arch.byte_width) # type:ignore
979
989
  elif type(arg.value) in (bytes, str):
980
990
  real_value = claripy.BVV(arg.value).chop(8)
981
991
  else:
@@ -237,21 +237,7 @@ class Function(Serializable):
237
237
 
238
238
  self._returning = self._get_initial_returning()
239
239
 
240
- # Determine a calling convention
241
- # If it is a SimProcedure it might have a CC already defined which can be used
242
- if self.is_simprocedure and self.project is not None and self.addr in self.project._sim_procedures:
243
- simproc = self.project._sim_procedures[self.addr]
244
- cc = simproc.cc
245
- if cc is None:
246
- arch = self.project.arch
247
- if self.project.arch.name in DEFAULT_CC:
248
- cc = default_cc(
249
- arch.name, platform=self.project.simos.name if self.project.simos is not None else None
250
- )(arch)
251
-
252
- self.calling_convention: SimCC | None = cc
253
- else:
254
- self.calling_convention: SimCC | None = None
240
+ self._init_prototype_and_calling_convention()
255
241
 
256
242
  @property
257
243
  @deprecated(".is_alignment")
@@ -768,6 +754,34 @@ class Function(Serializable):
768
754
  # Cannot determine
769
755
  return None
770
756
 
757
+ def _init_prototype_and_calling_convention(self) -> None:
758
+ """
759
+ Initialize prototype and calling convention from a SimProcedure, if available.
760
+ """
761
+ hooker = None
762
+ if self.is_syscall and self.project is not None and self.project.simos.is_syscall_addr(self.addr):
763
+ hooker = self.project.simos.syscall_from_addr(self.addr)
764
+ elif self.is_simprocedure and self.project is not None:
765
+ hooker = self.project.hooked_by(self.addr)
766
+ if hooker is None or hooker.guessed_prototype:
767
+ return
768
+
769
+ if hooker.prototype:
770
+ self.prototype_libname = hooker.library_name
771
+ self.prototype = hooker.prototype
772
+ self.is_prototype_guessed = False
773
+
774
+ cc = hooker.cc
775
+ if cc is None and self.project is not None:
776
+ arch = self.project.arch
777
+ if arch.name in DEFAULT_CC:
778
+ cc_cls = default_cc(
779
+ arch.name, platform=self.project.simos.name if self.project.simos is not None else None
780
+ )
781
+ if cc_cls is not None:
782
+ cc = cc_cls(arch)
783
+ self.calling_convention = cc
784
+
771
785
  def _clear_transition_graph(self):
772
786
  self._block_sizes = {}
773
787
  self._addr_to_block_node = {}
@@ -1,5 +1,5 @@
1
1
  from __future__ import annotations
2
- from typing import Literal, Union
2
+ from typing import Literal
3
3
  import enum
4
4
 
5
5
  DEBUG = False
@@ -25,5 +25,5 @@ OP_BEFORE = ObservationPointType.OP_BEFORE
25
25
  OP_AFTER = ObservationPointType.OP_AFTER
26
26
 
27
27
  ObservationPoint = tuple[
28
- Literal["insn", "node", "stmt", "exit"], Union[int, tuple[int, int], tuple[int, int, int]], ObservationPointType
28
+ Literal["insn", "node", "stmt", "exit"], int | tuple[int, int] | tuple[int, int, int], ObservationPointType
29
29
  ]
@@ -1,5 +1,5 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, TYPE_CHECKING
2
+ from typing import TYPE_CHECKING
3
3
 
4
4
  from collections import defaultdict
5
5
  from itertools import chain
@@ -14,11 +14,11 @@ if TYPE_CHECKING:
14
14
  from angr.code_location import CodeLocation
15
15
 
16
16
 
17
- LocationType = tuple[int, Optional[int], Optional[int]] # block addr, block ID, stmt ID
17
+ LocationType = tuple[int, int | None, int | None] # block addr, block ID, stmt ID
18
18
  LocationWithPosType = tuple[
19
- int, Optional[int], Optional[int], ObservationPointType
19
+ int, int | None, int | None, ObservationPointType
20
20
  ] # block addr, block ID, stmt ID, before/after
21
- BlockAddrType = tuple[int, Optional[int]] # block addr, block ID
21
+ BlockAddrType = tuple[int, int | None] # block addr, block ID
22
22
 
23
23
 
24
24
  class Liveness:
angr/lib/angr_native.so CHANGED
Binary file
@@ -3,6 +3,7 @@ import logging
3
3
 
4
4
  from angr.sim_type import SimTypeFunction, SimTypePointer, SimTypeLong, SimStruct, SimTypeInt, SimTypeChar, SimTypeBottom, SimTypeFd, SimTypeLongLong
5
5
  from angr.procedures import SIM_PROCEDURES as P
6
+ from angr.calling_conventions import SYSCALL_CC
6
7
  from . import SimSyscallLibrary
7
8
 
8
9
  _l = logging.getLogger(__name__)
@@ -11,6 +12,10 @@ _l = logging.getLogger(__name__)
11
12
  lib = SimSyscallLibrary()
12
13
  lib.set_library_names('linux')
13
14
  lib.add_all_from_dict(P['linux_kernel'])
15
+ for arch, os_name_to_cc in SYSCALL_CC.items():
16
+ linux_syscall_cc = os_name_to_cc.get("Linux")
17
+ if linux_syscall_cc:
18
+ lib.set_default_cc(arch, linux_syscall_cc)
14
19
 
15
20
  lib.add('open', P['posix']['open'])
16
21
  lib.add('read', P['posix']['read'])
@@ -28,9 +28,11 @@ ffi = cffi.FFI()
28
28
 
29
29
  try:
30
30
  import unicorn
31
+ from unicorn.unicorn import _uc
31
32
  except ImportError:
32
- l.warning("Unicorn is not installed. Support disabled.")
33
- unicorn = None
33
+ l.info("Unicorn is not installed. Support disabled.")
34
+ unicorn = None # type: ignore
35
+ _uc = None # type: ignore
34
36
 
35
37
 
36
38
  class MEM_PATCH(ctypes.Structure):
@@ -418,6 +420,7 @@ def _load_native():
418
420
  getattr(handle, func).argtypes = argtypes
419
421
 
420
422
  # _setup_prototype_explicit(h, 'logSetLogLevel', None, ctypes.c_uint64)
423
+ _setup_prototype(h, "setup_imports", ctypes.c_bool, ctypes.c_char_p)
421
424
  _setup_prototype(
422
425
  h,
423
426
  "alloc",
@@ -470,7 +473,8 @@ def _load_native():
470
473
  _setup_prototype(h, "set_tracking", None, state_t, ctypes.c_bool, ctypes.c_bool)
471
474
  _setup_prototype(h, "executed_pages", ctypes.c_uint64, state_t)
472
475
  _setup_prototype(h, "in_cache", ctypes.c_bool, state_t, ctypes.c_uint64)
473
- _setup_prototype(h, "set_map_callback", None, state_t, unicorn.unicorn.UC_HOOK_MEM_INVALID_CB)
476
+ if unicorn is not None:
477
+ _setup_prototype(h, "set_map_callback", None, state_t, unicorn.unicorn.UC_HOOK_MEM_INVALID_CB)
474
478
  _setup_prototype(
475
479
  h,
476
480
  "set_vex_to_unicorn_reg_mappings",
@@ -550,7 +554,7 @@ def _load_native():
550
554
 
551
555
  return h
552
556
  except (OSError, AttributeError) as e:
553
- l.warning('failed loading "%s", unicorn support disabled (%s)', libfile, e)
557
+ l.error('failed loading "%s", unicorn support disabled (%s)', libfile, e)
554
558
  raise ImportError("Unable to import native SimUnicorn support") from e
555
559
 
556
560
 
@@ -560,6 +564,10 @@ try:
560
564
  except ImportError:
561
565
  _UC_NATIVE = None
562
566
 
567
+ if _uc is not None and _UC_NATIVE is not None and not _UC_NATIVE.setup_imports(_uc._name.encode()):
568
+ l.error("Unicorn engine has an incompatible API. Support disabled.")
569
+ unicorn = None
570
+
563
571
 
564
572
  class Unicorn(SimStatePlugin):
565
573
  """
@@ -675,8 +683,12 @@ class Unicorn(SimStatePlugin):
675
683
 
676
684
  self.time = None
677
685
 
678
- self._bullshit_cb = ctypes.cast(
679
- unicorn.unicorn.UC_HOOK_MEM_INVALID_CB(self._hook_mem_unmapped), unicorn.unicorn.UC_HOOK_MEM_INVALID_CB
686
+ self._bullshit_cb = (
687
+ ctypes.cast(
688
+ unicorn.unicorn.UC_HOOK_MEM_INVALID_CB(self._hook_mem_unmapped), unicorn.unicorn.UC_HOOK_MEM_INVALID_CB
689
+ )
690
+ if unicorn is not None
691
+ else None
680
692
  )
681
693
 
682
694
  @SimStatePlugin.memo
@@ -777,8 +789,12 @@ class Unicorn(SimStatePlugin):
777
789
 
778
790
  def __setstate__(self, s):
779
791
  self.__dict__.update(s)
780
- self._bullshit_cb = ctypes.cast(
781
- unicorn.unicorn.UC_HOOK_MEM_INVALID_CB(self._hook_mem_unmapped), unicorn.unicorn.UC_HOOK_MEM_INVALID_CB
792
+ self._bullshit_cb = (
793
+ ctypes.cast(
794
+ unicorn.unicorn.UC_HOOK_MEM_INVALID_CB(self._hook_mem_unmapped), unicorn.unicorn.UC_HOOK_MEM_INVALID_CB
795
+ )
796
+ if unicorn is not None
797
+ else None
782
798
  )
783
799
  self._unicount = next(_unicounter)
784
800
  self._uc_state = None
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
  from mmap import mmap
3
- from typing import Union
4
3
  from collections.abc import Generator
5
4
  import logging
6
5
 
@@ -9,7 +8,7 @@ import cle
9
8
 
10
9
  l = logging.getLogger(__name__)
11
10
 
12
- BackerType = Union[bytes, bytearray, list[int]]
11
+ BackerType = bytes | bytearray | list[int]
13
12
  BackerIterType = Generator[tuple[int, BackerType], None, None]
14
13
 
15
14
 
@@ -1,7 +1,7 @@
1
1
  # pylint:disable=abstract-method,arguments-differ,assignment-from-no-return
2
2
  from __future__ import annotations
3
3
  import logging
4
- from typing import Union, Any
4
+ from typing import Any
5
5
  from collections.abc import Callable
6
6
 
7
7
  from angr.storage.memory_mixins.memory_mixin import MemoryMixin
@@ -13,7 +13,7 @@ from .cooperation import MemoryObjectSetMixin
13
13
 
14
14
  l = logging.getLogger(name=__name__)
15
15
 
16
- _MOTYPE = Union[SimMemoryObject, SimLabeledMemoryObject]
16
+ _MOTYPE = SimMemoryObject | SimLabeledMemoryObject
17
17
 
18
18
 
19
19
  class MVListPage(
angr/utils/doms.py CHANGED
@@ -21,6 +21,7 @@ class IncrementalDominators:
21
21
  self._pre: bool = not post # calculate dominators
22
22
 
23
23
  self._doms: dict[Any, Any] = {}
24
+ self._dfs: dict[Any, set[Any]] | None = None # initialized on-demand
24
25
  self._inverted_dom_tree: dict[Any, Any] | None = None # initialized on demand
25
26
 
26
27
  self._doms = self.init_doms()
@@ -33,6 +34,21 @@ class IncrementalDominators:
33
34
  doms = networkx.immediate_dominators(self.graph, self.start)
34
35
  return doms
35
36
 
37
+ def init_dfs(self) -> dict[Any, set[Any]]:
38
+ _pred = self.graph.predecessors if self._pre else self.graph.successors
39
+ df: dict = {}
40
+ for u in self._doms:
41
+ _preds = list(_pred(u)) # type:ignore
42
+ if len(_preds) >= 2:
43
+ for v in _preds:
44
+ if v in self._doms:
45
+ while v is not self._doms[u]:
46
+ if v not in df:
47
+ df[v] = set()
48
+ df[v].add(u)
49
+ v = self._doms[v]
50
+ return df
51
+
36
52
  def _update_inverted_domtree(self):
37
53
  # recalculate the dominators for dominatees of replaced nodes
38
54
  if self._inverted_dom_tree is None:
@@ -63,6 +79,18 @@ class IncrementalDominators:
63
79
  new_node_doms.append(dtee)
64
80
  self._doms[new_node] = new_dom
65
81
 
82
+ if self._dfs is not None:
83
+ # update dominance frontiers
84
+ if replaced_head in self._dfs:
85
+ self._dfs[new_node] = self._dfs[replaced_head]
86
+ for rn in replaced_nodes:
87
+ if rn in self._dfs:
88
+ del self._dfs[rn]
89
+ for df in self._dfs.values():
90
+ if rn in df:
91
+ df.remove(rn)
92
+ df.add(new_node)
93
+
66
94
  # keep inverted dom tree up-to-date
67
95
  self._inverted_dom_tree[new_dom].append(new_node)
68
96
  self._inverted_dom_tree[new_node] = new_node_doms
@@ -85,39 +113,9 @@ class IncrementalDominators:
85
113
  """
86
114
  Generate the dominance frontier of a node.
87
115
  """
88
-
89
- if node not in self.graph:
90
- return set()
91
-
92
- _pred = self.graph.predecessors if self._pre else self.graph.successors
93
- _succ = self.graph.successors if self._pre else self.graph.predecessors
94
- df = set()
95
-
96
- visited = {node}
97
- queue = [node]
98
-
99
- while queue:
100
- u = queue.pop(0)
101
- preds = list(_pred(u)) # type: ignore
102
- added = False
103
- if len(preds) >= 2:
104
- for v in preds:
105
- if v in self._doms:
106
- while v != self._doms[u]:
107
- if v is node:
108
- df.add(u)
109
- added = True
110
- break
111
- v = self._doms[v]
112
- if added:
113
- break
114
-
115
- if not added:
116
- for v in _succ(u): # type: ignore
117
- if v not in visited:
118
- visited.add(v)
119
- queue.append(v)
120
- return df
116
+ if self._dfs is None:
117
+ self._dfs = self.init_dfs()
118
+ return self._dfs.get(node, set())
121
119
 
122
120
  def dominates(self, dominator_node: Any, node: Any) -> bool:
123
121
  """
@@ -140,3 +138,12 @@ class IncrementalDominators:
140
138
  if true_doms[k] != self._doms[k]:
141
139
  print(f"{k!r}: {true_doms[k]!r} {self._doms[k]!r}")
142
140
  raise ValueError("dominators do not match")
141
+
142
+ if self._dfs is not None:
143
+ dfs = self.init_dfs()
144
+ if len(dfs) != len(self._dfs):
145
+ raise ValueError("dfs do not match")
146
+ for k in dfs:
147
+ if dfs[k] != self._dfs[k]:
148
+ print(f"{k!r}: {dfs[k]!r} {self._dfs[k]!r}")
149
+ raise ValueError("dfs do not match")
angr/utils/graph.py CHANGED
@@ -276,7 +276,7 @@ class ContainerNode:
276
276
 
277
277
  def __init__(self, obj):
278
278
  self._obj = obj
279
- self.index = None
279
+ self.index: int | None = None
280
280
 
281
281
  @property
282
282
  def obj(self):
@@ -308,8 +308,8 @@ class Dominators:
308
308
  self._reverse = reverse # Set it to True to generate a post-dominator tree.
309
309
 
310
310
  # Temporary variables
311
- self._ancestor = None
312
- self._semi = None
311
+ self._ancestor: list[ContainerNode | None] | None = None
312
+ self._semi: list[ContainerNode] | None = None
313
313
  self._label = None
314
314
 
315
315
  # Output
@@ -351,9 +351,11 @@ class Dominators:
351
351
  # parent is a dict storing the mapping from ContainerNode to ContainerNode
352
352
  # Each node in prepared_graph is a ContainerNode instance
353
353
 
354
- bucket = defaultdict(set)
355
- dom = [None] * (len(vertices))
356
- self._ancestor = [None] * (len(vertices) + 1)
354
+ assert self._semi is not None
355
+
356
+ bucket: dict[int, set[ContainerNode]] = defaultdict(set)
357
+ dom: list[None | ContainerNode] = [None] * (len(vertices))
358
+ self._ancestor = [None] * (len(vertices) + 1) # type: ignore
357
359
 
358
360
  for i in range(len(vertices) - 1, 0, -1):
359
361
  w = vertices[i]
@@ -376,6 +378,7 @@ class Dominators:
376
378
  # Step 3
377
379
  for v in bucket[parent[w].index]:
378
380
  u = self._pd_eval(v)
381
+ assert u.index is not None and v.index is not None
379
382
  if self._semi[u.index].index < self._semi[v.index].index:
380
383
  dom[v.index] = u
381
384
  else:
@@ -393,7 +396,7 @@ class Dominators:
393
396
  self.dom = networkx.DiGraph() # The post-dom tree described in a directional graph
394
397
  for i in range(1, len(vertices)):
395
398
  if dom[i] is not None and vertices[i] is not None:
396
- self.dom.add_edge(dom[i].obj, vertices[i].obj)
399
+ self.dom.add_edge(dom[i].obj, vertices[i].obj) # type: ignore
397
400
 
398
401
  # Output
399
402
  self.prepared_graph = _prepared_graph
@@ -476,7 +479,7 @@ class Dominators:
476
479
  all_nodes_count = new_graph.number_of_nodes()
477
480
  self._l.debug("There should be %d nodes in all", all_nodes_count)
478
481
  counter = 0
479
- vertices = [ContainerNode("placeholder")]
482
+ vertices: list[Any] = [ContainerNode("placeholder")]
480
483
  scanned_nodes = set()
481
484
  parent = {}
482
485
  while True:
@@ -526,15 +529,23 @@ class Dominators:
526
529
  return new_graph, vertices, parent
527
530
 
528
531
  def _pd_link(self, v, w):
532
+ assert self._ancestor is not None
529
533
  self._ancestor[w.index] = v
530
534
 
531
535
  def _pd_eval(self, v):
536
+ assert self._ancestor is not None
537
+ assert self._label is not None
538
+
532
539
  if self._ancestor[v.index] is None:
533
540
  return v
534
541
  self._pd_compress(v)
535
542
  return self._label[v.index]
536
543
 
537
544
  def _pd_compress(self, v):
545
+ assert self._ancestor is not None
546
+ assert self._semi is not None
547
+ assert self._label is not None
548
+
538
549
  if self._ancestor[self._ancestor[v.index].index] is not None:
539
550
  self._pd_compress(self._ancestor[v.index])
540
551
  if (
@@ -604,7 +615,7 @@ class GraphUtils:
604
615
  if graph.in_degree(node) > 1:
605
616
  merge_points.add(node)
606
617
 
607
- ordered_merge_points = GraphUtils.quasi_topological_sort_nodes(graph, merge_points)
618
+ ordered_merge_points = GraphUtils.quasi_topological_sort_nodes(graph, nodes=list(merge_points))
608
619
 
609
620
  return [n.addr for n in ordered_merge_points]
610
621
 
@@ -732,7 +743,7 @@ class GraphUtils:
732
743
  graph_copy.add_edge(src, dst)
733
744
 
734
745
  # add loners
735
- out_degree_zero_nodes = [node for (node, degree) in graph.out_degree() if degree == 0]
746
+ out_degree_zero_nodes = [node for (node, degree) in graph.out_degree() if degree == 0] # type:ignore
736
747
  for node in out_degree_zero_nodes:
737
748
  if graph.in_degree(node) == 0:
738
749
  graph_copy.add_node(node)
@@ -749,9 +760,7 @@ class GraphUtils:
749
760
 
750
761
  if nodes is None:
751
762
  return ordered_nodes
752
-
753
- nodes = set(nodes)
754
- return [n for n in ordered_nodes if n in nodes]
763
+ return [n for n in ordered_nodes if n in set(nodes)]
755
764
 
756
765
  @staticmethod
757
766
  def _components_index_node(components, node):
@@ -820,13 +829,10 @@ class GraphUtils:
820
829
  # panic mode that will aggressively remove edges
821
830
 
822
831
  if len(subgraph) > 3000 and len(subgraph.edges) > len(subgraph) * 1.4:
823
- for n in scc:
824
- if subgraph.in_degree[n] >= 1 and subgraph.out_degree[n] >= 1:
825
- for src in list(subgraph.predecessors(n)):
826
- if src is not n:
827
- subgraph.remove_edge(src, n)
828
- if len(subgraph.edges) <= len(subgraph) * 1.4:
829
- break
832
+ for n0, n1 in sorted(dfs_back_edges(subgraph, loop_head), key=lambda x: (x[0].addr, x[0].addr)):
833
+ subgraph.remove_edge(n0, n1)
834
+ if len(subgraph.edges) <= len(subgraph) * 1.4:
835
+ break
830
836
 
831
837
  ordered_nodes.extend(GraphUtils.quasi_topological_sort_nodes(subgraph))
832
838