PyPI - angr - Versions diffs - 9.2.142__py3-none-manylinux2014_aarch64.whl → 9.2.144__py3-none-manylinux2014_aarch64.whl - Mend

angr 9.2.142__py3-none-manylinux2014_aarch64.whl → 9.2.144__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (61) hide show

angr/__init__.py +1 -1
angr/analyses/calling_convention/calling_convention.py +22 -10
angr/analyses/calling_convention/fact_collector.py +72 -14
angr/analyses/cfg/cfg_base.py +7 -2
angr/analyses/cfg/cfg_emulated.py +13 -4
angr/analyses/cfg/cfg_fast.py +21 -60
angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -102
angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
angr/analyses/complete_calling_conventions.py +18 -5
angr/analyses/decompiler/ail_simplifier.py +95 -65
angr/analyses/decompiler/clinic.py +162 -68
angr/analyses/decompiler/decompiler.py +4 -4
angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
angr/analyses/decompiler/optimization_passes/condition_constprop.py +49 -14
angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
angr/analyses/decompiler/sequence_walker.py +8 -0
angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
angr/analyses/decompiler/ssailification/ssailification.py +10 -2
angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
angr/analyses/decompiler/structured_codegen/c.py +25 -4
angr/analyses/decompiler/utils.py +13 -0
angr/analyses/disassembly.py +3 -3
angr/analyses/fcp/fcp.py +1 -4
angr/analyses/s_propagator.py +40 -29
angr/analyses/s_reaching_definitions/s_rda_model.py +45 -36
angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
angr/analyses/s_reaching_definitions/s_reaching_definitions.py +41 -42
angr/analyses/typehoon/dfa.py +13 -3
angr/analyses/typehoon/typehoon.py +60 -18
angr/analyses/typehoon/typevars.py +11 -7
angr/analyses/variable_recovery/engine_ail.py +19 -23
angr/analyses/variable_recovery/engine_base.py +26 -30
angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
angr/calling_conventions.py +18 -8
angr/knowledge_plugins/functions/function.py +29 -15
angr/knowledge_plugins/key_definitions/constants.py +2 -2
angr/knowledge_plugins/key_definitions/liveness.py +4 -4
angr/lib/angr_native.so +0 -0
angr/procedures/definitions/linux_kernel.py +5 -0
angr/state_plugins/unicorn_engine.py +24 -8
angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
angr/utils/doms.py +40 -33
angr/utils/graph.py +26 -20
angr/utils/ssa/__init__.py +21 -14
angr/utils/ssa/vvar_uses_collector.py +2 -2
{angr-9.2.142.dist-info → angr-9.2.144.dist-info}/METADATA +11 -8
{angr-9.2.142.dist-info → angr-9.2.144.dist-info}/RECORD +61 -58
{angr-9.2.142.dist-info → angr-9.2.144.dist-info}/WHEEL +1 -1
{angr-9.2.142.dist-info → angr-9.2.144.dist-info}/LICENSE +0 -0
{angr-9.2.142.dist-info → angr-9.2.144.dist-info}/entry_points.txt +0 -0
{angr-9.2.142.dist-info → angr-9.2.144.dist-info}/top_level.txt +0 -0

angr/analyses/variable_recovery/variable_recovery_fast.py CHANGED Viewed

@@ -47,6 +47,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
         analysis,
         arch,
         func,
+        project,
         stack_region=None,
         register_region=None,
         global_region=None,
@@ -55,7 +56,6 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
         func_typevar=None,
         delayed_type_constraints=None,
         stack_offset_typevars=None,
-        project=None,
         ret_val_size=None,
     ):
         super().__init__(
@@ -63,6 +63,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
             analysis,
             arch,
             func,
+            project,
             stack_region=stack_region,
             register_region=register_region,
             global_region=global_region,
@@ -71,12 +72,11 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
             func_typevar=func_typevar,
             delayed_type_constraints=delayed_type_constraints,
             stack_offset_typevars=stack_offset_typevars,
-            project=project,
         )
         self.ret_val_size = ret_val_size
     def __repr__(self):
-        return f"<VRAbstractState@{self.block_addr:#x}: {len(self.register_region)} register variables, {len(self.stack_region)} stack variables>"
+        return f"<VRAbstractState@{self.block_addr:#x}"
     def __eq__(self, other):
         if type(other) is not VariableRecoveryFastState:
@@ -96,12 +96,14 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
             type_constraints=self.type_constraints,
             func_typevar=self.func_typevar,
             delayed_type_constraints=self.delayed_type_constraints,
-            stack_offset_typevars=dict(self.stack_offset_typevars),
+            stack_offset_typevars=self.stack_offset_typevars,
             project=self.project,
             ret_val_size=self.ret_val_size,
         )
-    def merge(self, others: tuple[VariableRecoveryFastState], successor=None) -> tuple[VariableRecoveryFastState, bool]:
+    def merge(
+        self, others: tuple[VariableRecoveryFastState, ...], successor=None
+    ) -> tuple[VariableRecoveryFastState, bool]:
         """
         Merge two abstract states.
@@ -135,10 +137,10 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
         # add subtype constraints for all replacements
         for v0, v1 in self.phi_variables.items():
             # v0 will be replaced by v1
-            if not typevars.has_type_variable_for(v1, None):
-                typevars.add_type_variable(v1, None, TypeVariable())
-            if not typevars.has_type_variable_for(v0, None):
-                typevars.add_type_variable(v0, None, TypeVariable())
+            if not typevars.has_type_variable_for(v1):
+                typevars.add_type_variable(v1, TypeVariable())
+            if not typevars.has_type_variable_for(v0):
+                typevars.add_type_variable(v0, TypeVariable())
             # Assuming v2 = phi(v0, v1), then we know that v0_typevar == v1_typevar == v2_typevar
             # However, it's possible that neither v0 nor v1 will ever be used in future blocks, which not only makes
             # this phi function useless, but also leads to the incorrect assumption that v1_typevar == v2_typevar.
@@ -146,7 +148,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
             # when v1 (the new variable that will end up in the state) is ever used in the future.
             # create an equivalence relationship
-            equivalence = Equivalence(typevars.get_type_variable(v1, None), typevars.get_type_variable(v0, None))
+            equivalence = Equivalence(typevars.get_type_variable(v1), typevars.get_type_variable(v0))
             delayed_typeconstraints[v1].add(equivalence)
         stack_offset_typevars = {}
@@ -281,6 +283,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
         self.func_typevar = TypeVariable(name=func.name)
         self.delayed_type_constraints = None
         self.ret_val_size = None
+        self.stack_offset_typevars: dict[int, TypeVariable] = {}
         self._analyze()
@@ -328,6 +331,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
             type_constraints=self.type_constraints,
             func_typevar=self.func_typevar,
             delayed_type_constraints=self.delayed_type_constraints,
+            stack_offset_typevars=self.stack_offset_typevars,
         )
         initial_sp = state.stack_address(self.project.arch.bytes if self.project.arch.call_pushes_ret else 0)
         if self.project.arch.sp_offset is not None:
@@ -439,20 +443,10 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
             block = self.project.factory.block(node.addr, node.size, opt_level=1, cross_insn_opt=False)
             block_key = node.addr
-        # if node.addr in self._instates:
-        #     prev_state: VariableRecoveryFastState = self._instates[node.addr]
-        #     if input_state == prev_state:
-        #         l.debug('Skip node %#x as we have reached a fixed-point', node.addr)
-        #         return False, input_state
-        #     else:
-        #         l.debug('Merging input state of node %#x with the previous state.', node.addr)
-        #         input_state, _ = prev_state.merge((input_state,), successor=node.addr)
         state = state.copy()
         state.block_addr = node.addr
         if isinstance(node, ailment.Block):
             state.block_idx = node.idx
-        # self._instates[node.addr] = state
         if self._node_iterations[block_key] >= self._max_iterations:
             l.debug("Skip node %#x as we have iterated %d times on it.", node.addr, self._node_iterations[node.addr])
@@ -491,10 +485,12 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
             self.variable_manager[self.function.addr].unify_variables()
         # fill in var_to_typevars
+        assert self.typevars is not None
         for var, typevar_set in self.typevars._typevars.items():
             self.var_to_typevars[var] = typevar_set
         # unify type variables for global variables
+        assert self.type_constraints is not None
         for var, typevars in self.var_to_typevars.items():
             if len(typevars) > 1 and isinstance(var, SimMemoryVariable) and not isinstance(var, SimStackVariable):
                 sorted_typevars = sorted(typevars, key=lambda x: str(x))  # pylint:disable=unnecessary-lambda
@@ -600,7 +596,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
             block = self._peephole_optimize(block)
         processor = self._ail_engine if isinstance(block, ailment.Block) else self._vex_engine
-        processor.process(state, block=block, fail_fast=self._fail_fast)
+        processor.process(state, block=block, fail_fast=self._fail_fast)  # type: ignore
         if self._track_sp and block.addr in self._node_to_cc:
             # readjusting sp at the end for blocks that end in a call

angr/calling_conventions.py CHANGED Viewed

@@ -254,7 +254,7 @@ class SimFunctionArgument:
             if self.size not in (4, 8):
                 raise ValueError(f"What do I do with a float {self.size} bytes long")
             value = claripy.FPV(value, claripy.FSORT_FLOAT if self.size == 4 else claripy.FSORT_DOUBLE)
-        return value.raw_to_bv()
+        return value.raw_to_bv()  # type:ignore
     def check_value_get(self, value):
         if self.is_fp:
@@ -578,8 +578,12 @@ class SimCC:
     # (if applicable) and the arguments. Probably zero.
     STACKARG_SP_DIFF = 0  # The amount of stack space reserved for the return address
     CALLER_SAVED_REGS: list[str] = []  # Caller-saved registers
-    RETURN_ADDR: SimFunctionArgument  # The location where the return address is stored, as a SimFunctionArgument
-    RETURN_VAL: SimFunctionArgument  # The location where the return value is stored, as a SimFunctionArgument
+    RETURN_ADDR: SimFunctionArgument | None = (
+        None  # The location where the return address is stored, as a SimFunctionArgument
+    )
+    RETURN_VAL: SimFunctionArgument | None = (
+        None  # The location where the return value is stored, as a SimFunctionArgument
+    )
     OVERFLOW_RETURN_VAL: SimFunctionArgument | None = (
         None  # The second half of the location where a double-length return value is stored
     )
@@ -766,7 +770,11 @@ class SimCC:
         return (
             isinstance(val, (float, claripy.ast.FP))
             or (isinstance(val, claripy.ast.Base) and val.op.startswith("fp"))  # type: ignore
-            or (isinstance(val, claripy.ast.Base) and val.op == "Reverse" and val.args[0].op.startswith("fp"))
+            or (
+                isinstance(val, claripy.ast.Base)
+                and val.op == "Reverse"  # type:ignore
+                and val.args[0].op.startswith("fp")  # type:ignore
+            )
         )
     @staticmethod
@@ -922,8 +930,10 @@ class SimCC:
         allocator.apply(state, alloc_base)
         for loc, val in zip(arg_locs, vals):
+            assert loc is not None
             loc.set_value(state, val, stack_base=stack_base)
-        self.return_addr.set_value(state, ret_addr, stack_base=stack_base)
+        if self.return_addr is not None:
+            self.return_addr.set_value(state, ret_addr, stack_base=stack_base)
     def teardown_callsite(self, state, return_val=None, prototype=None, force_callee_cleanup=False):
         """
@@ -943,10 +953,10 @@ class SimCC:
             self.set_return_val(state, return_val, prototype.returnty)
             # ummmmmmmm hack
             loc = self.return_val(prototype.returnty)
-            if isinstance(loc, SimReferenceArgument):
+            if self.RETURN_VAL is not None and isinstance(loc, SimReferenceArgument):
                 self.RETURN_VAL.set_value(state, loc.ptr_loc.get_value(state))
-        ret_addr = self.return_addr.get_value(state)
+        ret_addr = self.return_addr.get_value(state) if self.return_addr is not None else None
         if state.arch.sp_offset is not None and prototype is not None:
             if force_callee_cleanup or self.CALLEE_CLEANUP:
@@ -975,7 +985,7 @@ class SimCC:
             if arg.buffer:
                 if isinstance(arg.value, claripy.ast.Bits):
-                    real_value = arg.value.chop(state.arch.byte_width)
+                    real_value = arg.value.chop(state.arch.byte_width)  # type:ignore
                 elif type(arg.value) in (bytes, str):
                     real_value = claripy.BVV(arg.value).chop(8)
                 else:

angr/knowledge_plugins/functions/function.py CHANGED Viewed

@@ -237,21 +237,7 @@ class Function(Serializable):
             self._returning = self._get_initial_returning()
-        # Determine a calling convention
-        # If it is a SimProcedure it might have a CC already defined which can be used
-        if self.is_simprocedure and self.project is not None and self.addr in self.project._sim_procedures:
-            simproc = self.project._sim_procedures[self.addr]
-            cc = simproc.cc
-            if cc is None:
-                arch = self.project.arch
-                if self.project.arch.name in DEFAULT_CC:
-                    cc = default_cc(
-                        arch.name, platform=self.project.simos.name if self.project.simos is not None else None
-                    )(arch)
-            self.calling_convention: SimCC | None = cc
-        else:
-            self.calling_convention: SimCC | None = None
+        self._init_prototype_and_calling_convention()
     @property
     @deprecated(".is_alignment")
@@ -768,6 +754,34 @@ class Function(Serializable):
         # Cannot determine
         return None
+    def _init_prototype_and_calling_convention(self) -> None:
+        """
+        Initialize prototype and calling convention from a SimProcedure, if available.
+        """
+        hooker = None
+        if self.is_syscall and self.project is not None and self.project.simos.is_syscall_addr(self.addr):
+            hooker = self.project.simos.syscall_from_addr(self.addr)
+        elif self.is_simprocedure and self.project is not None:
+            hooker = self.project.hooked_by(self.addr)
+        if hooker is None or hooker.guessed_prototype:
+            return
+        if hooker.prototype:
+            self.prototype_libname = hooker.library_name
+            self.prototype = hooker.prototype
+            self.is_prototype_guessed = False
+        cc = hooker.cc
+        if cc is None and self.project is not None:
+            arch = self.project.arch
+            if arch.name in DEFAULT_CC:
+                cc_cls = default_cc(
+                    arch.name, platform=self.project.simos.name if self.project.simos is not None else None
+                )
+                if cc_cls is not None:
+                    cc = cc_cls(arch)
+        self.calling_convention = cc
     def _clear_transition_graph(self):
         self._block_sizes = {}
         self._addr_to_block_node = {}

angr/knowledge_plugins/key_definitions/constants.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Literal, Union
+from typing import Literal
 import enum
 DEBUG = False
@@ -25,5 +25,5 @@ OP_BEFORE = ObservationPointType.OP_BEFORE
 OP_AFTER = ObservationPointType.OP_AFTER
 ObservationPoint = tuple[
-    Literal["insn", "node", "stmt", "exit"], Union[int, tuple[int, int], tuple[int, int, int]], ObservationPointType
+    Literal["insn", "node", "stmt", "exit"], int | tuple[int, int] | tuple[int, int, int], ObservationPointType
 ]

angr/knowledge_plugins/key_definitions/liveness.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Optional, TYPE_CHECKING
+from typing import TYPE_CHECKING
 from collections import defaultdict
 from itertools import chain
@@ -14,11 +14,11 @@ if TYPE_CHECKING:
     from angr.code_location import CodeLocation
-LocationType = tuple[int, Optional[int], Optional[int]]  # block addr, block ID, stmt ID
+LocationType = tuple[int, int | None, int | None]  # block addr, block ID, stmt ID
 LocationWithPosType = tuple[
-    int, Optional[int], Optional[int], ObservationPointType
+    int, int | None, int | None, ObservationPointType
 ]  # block addr, block ID, stmt ID, before/after
-BlockAddrType = tuple[int, Optional[int]]  # block addr, block ID
+BlockAddrType = tuple[int, int | None]  # block addr, block ID
 class Liveness:

angr/lib/angr_native.so CHANGED Viewed

Binary file

angr/procedures/definitions/linux_kernel.py CHANGED Viewed

@@ -3,6 +3,7 @@ import logging
 from angr.sim_type import SimTypeFunction, SimTypePointer, SimTypeLong, SimStruct, SimTypeInt, SimTypeChar, SimTypeBottom, SimTypeFd, SimTypeLongLong
 from angr.procedures import SIM_PROCEDURES as P
+from angr.calling_conventions import SYSCALL_CC
 from . import SimSyscallLibrary
 _l = logging.getLogger(__name__)
@@ -11,6 +12,10 @@ _l = logging.getLogger(__name__)
 lib = SimSyscallLibrary()
 lib.set_library_names('linux')
 lib.add_all_from_dict(P['linux_kernel'])
+for arch, os_name_to_cc in SYSCALL_CC.items():
+    linux_syscall_cc = os_name_to_cc.get("Linux")
+    if linux_syscall_cc:
+        lib.set_default_cc(arch, linux_syscall_cc)
 lib.add('open', P['posix']['open'])
 lib.add('read', P['posix']['read'])

angr/state_plugins/unicorn_engine.py CHANGED Viewed

@@ -28,9 +28,11 @@ ffi = cffi.FFI()
 try:
     import unicorn
+    from unicorn.unicorn import _uc
 except ImportError:
-    l.warning("Unicorn is not installed. Support disabled.")
-    unicorn = None
+    l.info("Unicorn is not installed. Support disabled.")
+    unicorn = None  # type: ignore
+    _uc = None  # type: ignore
 class MEM_PATCH(ctypes.Structure):
@@ -418,6 +420,7 @@ def _load_native():
             getattr(handle, func).argtypes = argtypes
         # _setup_prototype_explicit(h, 'logSetLogLevel', None, ctypes.c_uint64)
+        _setup_prototype(h, "setup_imports", ctypes.c_bool, ctypes.c_char_p)
         _setup_prototype(
             h,
             "alloc",
@@ -470,7 +473,8 @@ def _load_native():
         _setup_prototype(h, "set_tracking", None, state_t, ctypes.c_bool, ctypes.c_bool)
         _setup_prototype(h, "executed_pages", ctypes.c_uint64, state_t)
         _setup_prototype(h, "in_cache", ctypes.c_bool, state_t, ctypes.c_uint64)
-        _setup_prototype(h, "set_map_callback", None, state_t, unicorn.unicorn.UC_HOOK_MEM_INVALID_CB)
+        if unicorn is not None:
+            _setup_prototype(h, "set_map_callback", None, state_t, unicorn.unicorn.UC_HOOK_MEM_INVALID_CB)
         _setup_prototype(
             h,
             "set_vex_to_unicorn_reg_mappings",
@@ -550,7 +554,7 @@ def _load_native():
         return h
     except (OSError, AttributeError) as e:
-        l.warning('failed loading "%s", unicorn support disabled (%s)', libfile, e)
+        l.error('failed loading "%s", unicorn support disabled (%s)', libfile, e)
         raise ImportError("Unable to import native SimUnicorn support") from e
@@ -560,6 +564,10 @@ try:
 except ImportError:
     _UC_NATIVE = None
+if _uc is not None and _UC_NATIVE is not None and not _UC_NATIVE.setup_imports(_uc._name.encode()):
+    l.error("Unicorn engine has an incompatible API. Support disabled.")
+    unicorn = None
 class Unicorn(SimStatePlugin):
     """
@@ -675,8 +683,12 @@ class Unicorn(SimStatePlugin):
         self.time = None
-        self._bullshit_cb = ctypes.cast(
-            unicorn.unicorn.UC_HOOK_MEM_INVALID_CB(self._hook_mem_unmapped), unicorn.unicorn.UC_HOOK_MEM_INVALID_CB
+        self._bullshit_cb = (
+            ctypes.cast(
+                unicorn.unicorn.UC_HOOK_MEM_INVALID_CB(self._hook_mem_unmapped), unicorn.unicorn.UC_HOOK_MEM_INVALID_CB
+            )
+            if unicorn is not None
+            else None
         )
     @SimStatePlugin.memo
@@ -777,8 +789,12 @@ class Unicorn(SimStatePlugin):
     def __setstate__(self, s):
         self.__dict__.update(s)
-        self._bullshit_cb = ctypes.cast(
-            unicorn.unicorn.UC_HOOK_MEM_INVALID_CB(self._hook_mem_unmapped), unicorn.unicorn.UC_HOOK_MEM_INVALID_CB
+        self._bullshit_cb = (
+            ctypes.cast(
+                unicorn.unicorn.UC_HOOK_MEM_INVALID_CB(self._hook_mem_unmapped), unicorn.unicorn.UC_HOOK_MEM_INVALID_CB
+            )
+            if unicorn is not None
+            else None
         )
         self._unicount = next(_unicounter)
         self._uc_state = None

angr/storage/memory_mixins/paged_memory/page_backer_mixins.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from __future__ import annotations
 from mmap import mmap
-from typing import Union
 from collections.abc import Generator
 import logging
@@ -9,7 +8,7 @@ import cle
 l = logging.getLogger(__name__)
-BackerType = Union[bytes, bytearray, list[int]]
+BackerType = bytes | bytearray | list[int]
 BackerIterType = Generator[tuple[int, BackerType], None, None]

angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # pylint:disable=abstract-method,arguments-differ,assignment-from-no-return
 from __future__ import annotations
 import logging
-from typing import Union, Any
+from typing import Any
 from collections.abc import Callable
 from angr.storage.memory_mixins.memory_mixin import MemoryMixin
@@ -13,7 +13,7 @@ from .cooperation import MemoryObjectSetMixin
 l = logging.getLogger(name=__name__)
-_MOTYPE = Union[SimMemoryObject, SimLabeledMemoryObject]
+_MOTYPE = SimMemoryObject | SimLabeledMemoryObject
 class MVListPage(

angr/utils/doms.py CHANGED Viewed

@@ -21,6 +21,7 @@ class IncrementalDominators:
         self._pre: bool = not post  # calculate dominators
         self._doms: dict[Any, Any] = {}
+        self._dfs: dict[Any, set[Any]] | None = None  # initialized on-demand
         self._inverted_dom_tree: dict[Any, Any] | None = None  # initialized on demand
         self._doms = self.init_doms()
@@ -33,6 +34,21 @@ class IncrementalDominators:
             doms = networkx.immediate_dominators(self.graph, self.start)
         return doms
+    def init_dfs(self) -> dict[Any, set[Any]]:
+        _pred = self.graph.predecessors if self._pre else self.graph.successors
+        df: dict = {}
+        for u in self._doms:
+            _preds = list(_pred(u))  # type:ignore
+            if len(_preds) >= 2:
+                for v in _preds:
+                    if v in self._doms:
+                        while v is not self._doms[u]:
+                            if v not in df:
+                                df[v] = set()
+                            df[v].add(u)
+                            v = self._doms[v]
+        return df
     def _update_inverted_domtree(self):
         # recalculate the dominators for dominatees of replaced nodes
         if self._inverted_dom_tree is None:
@@ -63,6 +79,18 @@ class IncrementalDominators:
                 new_node_doms.append(dtee)
         self._doms[new_node] = new_dom
+        if self._dfs is not None:
+            # update dominance frontiers
+            if replaced_head in self._dfs:
+                self._dfs[new_node] = self._dfs[replaced_head]
+            for rn in replaced_nodes:
+                if rn in self._dfs:
+                    del self._dfs[rn]
+                for df in self._dfs.values():
+                    if rn in df:
+                        df.remove(rn)
+                        df.add(new_node)
         # keep inverted dom tree up-to-date
         self._inverted_dom_tree[new_dom].append(new_node)
         self._inverted_dom_tree[new_node] = new_node_doms
@@ -85,39 +113,9 @@ class IncrementalDominators:
         """
         Generate the dominance frontier of a node.
         """
-        if node not in self.graph:
-            return set()
-        _pred = self.graph.predecessors if self._pre else self.graph.successors
-        _succ = self.graph.successors if self._pre else self.graph.predecessors
-        df = set()
-        visited = {node}
-        queue = [node]
-        while queue:
-            u = queue.pop(0)
-            preds = list(_pred(u))  # type: ignore
-            added = False
-            if len(preds) >= 2:
-                for v in preds:
-                    if v in self._doms:
-                        while v != self._doms[u]:
-                            if v is node:
-                                df.add(u)
-                                added = True
-                                break
-                            v = self._doms[v]
-                    if added:
-                        break
-            if not added:
-                for v in _succ(u):  # type: ignore
-                    if v not in visited:
-                        visited.add(v)
-                        queue.append(v)
-        return df
+        if self._dfs is None:
+            self._dfs = self.init_dfs()
+        return self._dfs.get(node, set())
     def dominates(self, dominator_node: Any, node: Any) -> bool:
         """
@@ -140,3 +138,12 @@ class IncrementalDominators:
             if true_doms[k] != self._doms[k]:
                 print(f"{k!r}: {true_doms[k]!r} {self._doms[k]!r}")
                 raise ValueError("dominators do not match")
+        if self._dfs is not None:
+            dfs = self.init_dfs()
+            if len(dfs) != len(self._dfs):
+                raise ValueError("dfs do not match")
+            for k in dfs:
+                if dfs[k] != self._dfs[k]:
+                    print(f"{k!r}: {dfs[k]!r} {self._dfs[k]!r}")
+                    raise ValueError("dfs do not match")

angr/utils/graph.py CHANGED Viewed

@@ -276,7 +276,7 @@ class ContainerNode:
     def __init__(self, obj):
         self._obj = obj
-        self.index = None
+        self.index: int | None = None
     @property
     def obj(self):
@@ -308,8 +308,8 @@ class Dominators:
         self._reverse = reverse  # Set it to True to generate a post-dominator tree.
         # Temporary variables
-        self._ancestor = None
-        self._semi = None
+        self._ancestor: list[ContainerNode | None] | None = None
+        self._semi: list[ContainerNode] | None = None
         self._label = None
         # Output
@@ -351,9 +351,11 @@ class Dominators:
         # parent is a dict storing the mapping from ContainerNode to ContainerNode
         # Each node in prepared_graph is a ContainerNode instance
-        bucket = defaultdict(set)
-        dom = [None] * (len(vertices))
-        self._ancestor = [None] * (len(vertices) + 1)
+        assert self._semi is not None
+        bucket: dict[int, set[ContainerNode]] = defaultdict(set)
+        dom: list[None | ContainerNode] = [None] * (len(vertices))
+        self._ancestor = [None] * (len(vertices) + 1)  # type: ignore
         for i in range(len(vertices) - 1, 0, -1):
             w = vertices[i]
@@ -376,6 +378,7 @@ class Dominators:
             # Step 3
             for v in bucket[parent[w].index]:
                 u = self._pd_eval(v)
+                assert u.index is not None and v.index is not None
                 if self._semi[u.index].index < self._semi[v.index].index:
                     dom[v.index] = u
                 else:
@@ -393,7 +396,7 @@ class Dominators:
         self.dom = networkx.DiGraph()  # The post-dom tree described in a directional graph
         for i in range(1, len(vertices)):
             if dom[i] is not None and vertices[i] is not None:
-                self.dom.add_edge(dom[i].obj, vertices[i].obj)
+                self.dom.add_edge(dom[i].obj, vertices[i].obj)  # type: ignore
         # Output
         self.prepared_graph = _prepared_graph
@@ -476,7 +479,7 @@ class Dominators:
         all_nodes_count = new_graph.number_of_nodes()
         self._l.debug("There should be %d nodes in all", all_nodes_count)
         counter = 0
-        vertices = [ContainerNode("placeholder")]
+        vertices: list[Any] = [ContainerNode("placeholder")]
         scanned_nodes = set()
         parent = {}
         while True:
@@ -526,15 +529,23 @@ class Dominators:
         return new_graph, vertices, parent
     def _pd_link(self, v, w):
+        assert self._ancestor is not None
         self._ancestor[w.index] = v
     def _pd_eval(self, v):
+        assert self._ancestor is not None
+        assert self._label is not None
         if self._ancestor[v.index] is None:
             return v
         self._pd_compress(v)
         return self._label[v.index]
     def _pd_compress(self, v):
+        assert self._ancestor is not None
+        assert self._semi is not None
+        assert self._label is not None
         if self._ancestor[self._ancestor[v.index].index] is not None:
             self._pd_compress(self._ancestor[v.index])
             if (
@@ -604,7 +615,7 @@ class GraphUtils:
             if graph.in_degree(node) > 1:
                 merge_points.add(node)
-        ordered_merge_points = GraphUtils.quasi_topological_sort_nodes(graph, merge_points)
+        ordered_merge_points = GraphUtils.quasi_topological_sort_nodes(graph, nodes=list(merge_points))
         return [n.addr for n in ordered_merge_points]
@@ -732,7 +743,7 @@ class GraphUtils:
             graph_copy.add_edge(src, dst)
         # add loners
-        out_degree_zero_nodes = [node for (node, degree) in graph.out_degree() if degree == 0]
+        out_degree_zero_nodes = [node for (node, degree) in graph.out_degree() if degree == 0]  # type:ignore
         for node in out_degree_zero_nodes:
             if graph.in_degree(node) == 0:
                 graph_copy.add_node(node)
@@ -749,9 +760,7 @@ class GraphUtils:
         if nodes is None:
             return ordered_nodes
-        nodes = set(nodes)
-        return [n for n in ordered_nodes if n in nodes]
+        return [n for n in ordered_nodes if n in set(nodes)]
     @staticmethod
     def _components_index_node(components, node):
@@ -820,13 +829,10 @@ class GraphUtils:
         # panic mode that will aggressively remove edges
         if len(subgraph) > 3000 and len(subgraph.edges) > len(subgraph) * 1.4:
-            for n in scc:
-                if subgraph.in_degree[n] >= 1 and subgraph.out_degree[n] >= 1:
-                    for src in list(subgraph.predecessors(n)):
-                        if src is not n:
-                            subgraph.remove_edge(src, n)
-                            if len(subgraph.edges) <= len(subgraph) * 1.4:
-                                break
+            for n0, n1 in sorted(dfs_back_edges(subgraph, loop_head), key=lambda x: (x[0].addr, x[0].addr)):
+                subgraph.remove_edge(n0, n1)
+                if len(subgraph.edges) <= len(subgraph) * 1.4:
+                    break
         ordered_nodes.extend(GraphUtils.quasi_topological_sort_nodes(subgraph))