PyPI - angr - Versions diffs - 9.2.142__py3-none-manylinux2014_aarch64.whl → 9.2.143__py3-none-manylinux2014_aarch64.whl - Mend

angr 9.2.142__py3-none-manylinux2014_aarch64.whl → 9.2.143__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (28) hide show

angr/__init__.py +1 -1
angr/analyses/calling_convention/calling_convention.py +9 -9
angr/analyses/calling_convention/fact_collector.py +31 -9
angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +4 -1
angr/analyses/complete_calling_conventions.py +18 -5
angr/analyses/decompiler/ail_simplifier.py +90 -65
angr/analyses/decompiler/optimization_passes/condition_constprop.py +49 -14
angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
angr/analyses/decompiler/sequence_walker.py +8 -0
angr/analyses/decompiler/utils.py +13 -0
angr/analyses/s_propagator.py +40 -29
angr/analyses/s_reaching_definitions/s_rda_model.py +45 -36
angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
angr/analyses/s_reaching_definitions/s_reaching_definitions.py +21 -21
angr/analyses/variable_recovery/engine_ail.py +6 -6
angr/calling_conventions.py +18 -8
angr/procedures/definitions/linux_kernel.py +5 -0
angr/utils/doms.py +40 -33
angr/utils/ssa/__init__.py +21 -14
angr/utils/ssa/vvar_uses_collector.py +2 -2
{angr-9.2.142.dist-info → angr-9.2.143.dist-info}/METADATA +6 -6
{angr-9.2.142.dist-info → angr-9.2.143.dist-info}/RECORD +28 -28
{angr-9.2.142.dist-info → angr-9.2.143.dist-info}/LICENSE +0 -0
{angr-9.2.142.dist-info → angr-9.2.143.dist-info}/WHEEL +0 -0
{angr-9.2.142.dist-info → angr-9.2.143.dist-info}/entry_points.txt +0 -0
{angr-9.2.142.dist-info → angr-9.2.143.dist-info}/top_level.txt +0 -0

angr/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # pylint: disable=wrong-import-position
 from __future__ import annotations
-__version__ = "9.2.142"
+__version__ = "9.2.143"
 if bytes is str:
     raise Exception(

angr/analyses/calling_convention/calling_convention.py CHANGED Viewed

@@ -220,9 +220,9 @@ class CallingConventionAnalysis(Analysis):
                 self.prototype = prototype  # type: ignore
             return
         if self._function.is_plt:
-            r = self._analyze_plt()
-            if r is not None:
-                self.cc, self.prototype = r
+            r_plt = self._analyze_plt()
+            if r_plt is not None:
+                self.cc, self.prototype, self.prototype_libname = r_plt
             return
         r = self._analyze_function()
@@ -278,11 +278,11 @@ class CallingConventionAnalysis(Analysis):
         self.cc = cc
         self.prototype = prototype
-    def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
+    def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None, str | None] | None:
         """
         Get the calling convention for a PLT stub.
-        :return:    A calling convention.
+        :return:    A calling convention, the function type, as well as the library name if available.
         """
         assert self._function is not None
@@ -326,11 +326,11 @@ class CallingConventionAnalysis(Analysis):
                         # we only take the prototype from the SimProcedure if
                         # - the SimProcedure is a function
                         # - the prototype of the SimProcedure is not guessed
-                        return cc, hooker.prototype
+                        return cc, hooker.prototype, hooker.library_name
                 if real_func.prototype is not None:
-                    return cc, real_func.prototype
+                    return cc, real_func.prototype, real_func.prototype_libname
             else:
-                return cc, real_func.prototype
+                return cc, real_func.prototype, real_func.prototype_libname
         if self.analyze_callsites:
             # determine the calling convention by analyzing its callsites
@@ -344,7 +344,7 @@ class CallingConventionAnalysis(Analysis):
             prototype = self._adjust_prototype(
                 prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
             )
-            return cc, prototype
+            return cc, prototype, None
         return None

angr/analyses/calling_convention/fact_collector.py CHANGED Viewed

@@ -1,10 +1,11 @@
 # pylint:disable=too-many-boolean-expressions
 from __future__ import annotations
-from typing import Any
+from typing import Any, TYPE_CHECKING
 import pyvex
 import claripy
+from angr import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
 from angr.utils.bits import s2u, u2s
 from angr.block import Block
 from angr.analyses.analysis import Analysis
@@ -13,9 +14,12 @@ from angr.knowledge_plugins.functions import Function
 from angr.codenode import BlockNode, HookNode
 from angr.engines.light import SimEngineNostmtVEX, SimEngineLight, SpOffset, RegisterOffset
 from angr.calling_conventions import SimRegArg, SimStackArg, default_cc
-from angr.sim_type import SimTypeBottom
+from angr.sim_type import SimTypeBottom, dereference_simtype, SimTypeFunction
 from .utils import is_sane_register_variable
+if TYPE_CHECKING:
+    from angr.codenode import CodeNode
 class FactCollectorState:
     """
@@ -224,9 +228,12 @@ class FactCollector(Analysis):
         callee_restored_regs = self._analyze_endpoints_for_restored_regs()
         self._determine_input_args(end_states, callee_restored_regs)
-    def _analyze_startpoint(self):
+    def _analyze_startpoint(self) -> list[FactCollectorState]:
         func_graph = self.function.transition_graph
         startpoint = self.function.startpoint
+        if startpoint is None:
+            return []
         bp_as_gpr = self.function.info.get("bp_as_gpr", False)
         engine = SimEngineFactCollectorVEX(self.project, bp_as_gpr)
         init_state = FactCollectorState()
@@ -235,9 +242,9 @@ class FactCollector(Analysis):
         init_state.bp_value = init_state.sp_value
         traversed = set()
-        queue: list[tuple[int, FactCollectorState, BlockNode | HookNode | Function, BlockNode | HookNode | None]] = [
-            (0, init_state, startpoint, None)
-        ]
+        queue: list[
+            tuple[int, FactCollectorState, CodeNode | BlockNode | HookNode | Function, BlockNode | HookNode | None]
+        ] = [(0, init_state, startpoint, None)]
         end_states: list[FactCollectorState] = []
         while queue:
             depth, state, node, retnode = queue.pop(0)
@@ -398,9 +405,24 @@ class FactCollector(Analysis):
                             and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
                         ):
                             # assume the function overwrites the return variable
-                            returnty_size = func_succ.prototype.returnty.with_arch(self.project.arch).size
-                            assert returnty_size is not None
-                            retval_size = returnty_size // self.project.arch.byte_width
+                            proto = func_succ.prototype
+                            if func_succ.prototype_libname is not None:
+                                # we need to deref the prototype in case it uses SimTypeRef internally
+                                type_collections = []
+                                prototype_lib = SIM_LIBRARIES[func_succ.prototype_libname]
+                                if prototype_lib.type_collection_names:
+                                    for typelib_name in prototype_lib.type_collection_names:
+                                        type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
+                                    proto = dereference_simtype(proto, type_collections)
+                            assert isinstance(proto, SimTypeFunction) and proto.returnty is not None
+                            returnty_size = proto.returnty.with_arch(self.project.arch).size
+                            if returnty_size is None:
+                                # it may be None if somehow we cannot resolve a SimTypeRef; we fall back to the full
+                                # machine word size
+                                retval_size = self.project.arch.bytes
+                            else:
+                                retval_size = returnty_size // self.project.arch.byte_width
                             retval_sizes.append(retval_size)
                         continue

angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py CHANGED Viewed

@@ -43,11 +43,22 @@ class ConstantResolver(IndirectJumpResolver):
     be resolved to a constant value. This resolver must be run after all other more specific resolvers.
     """
-    def __init__(self, project):
+    def __init__(self, project, max_func_nodes: int = 512):
         super().__init__(project, timeless=False)
+        self.max_func_nodes = max_func_nodes
     def filter(self, cfg, addr, func_addr, block, jumpkind):
+        if not cfg.functions.contains_addr(func_addr):
+            # the function does not exist
+            return False
+        # for performance, we don't run constant resolver if the function is too large
+        func = cfg.functions.get_by_addr(func_addr)
+        if len(func.block_addrs_set) > self.max_func_nodes:
+            return False
         # we support both an indirect call and jump since the value can be resolved
         return jumpkind in {"Ijk_Boring", "Ijk_Call"}
     def resolve(  # pylint:disable=unused-argument

angr/analyses/cfg/indirect_jump_resolvers/jumptable.py CHANGED Viewed

@@ -6,6 +6,7 @@ from collections.abc import Sequence
 from collections import defaultdict, OrderedDict
 import logging
 import functools
+import contextlib
 import pyvex
 import claripy
@@ -1798,7 +1799,9 @@ class JumpTableResolver(IndirectJumpResolver):
                         # swap the two tmps
                         jump_base_addr.tmp, jump_base_addr.tmp_1 = jump_base_addr.tmp_1, jump_base_addr.tmp
                     # Load the concrete base address
-                    jump_base_addr.base_addr = state.solver.eval(state.scratch.temps[jump_base_addr.tmp_1])
+                    with contextlib.suppress(SimError):
+                        # silently eat the claripy exception
+                        jump_base_addr.base_addr = state.solver.eval(state.scratch.temps[jump_base_addr.tmp_1])
             else:
                 # We do not support the cases where the base address involves more than one addition.
                 # One such case exists in libc-2.27.so shipped with Ubuntu x86 where esi is used as the address of the

angr/analyses/complete_calling_conventions.py CHANGED Viewed

@@ -63,7 +63,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
         max_function_size: int | None = None,
         workers: int = 0,
         cc_callback: Callable | None = None,
-        prioritize_func_addrs: Iterable[int] | None = None,
+        prioritize_func_addrs: list[int] | set[int] | None = None,
         skip_other_funcs: bool = False,
         auto_start: bool = True,
         func_graphs: dict[int, networkx.DiGraph] | None = None,
@@ -130,9 +130,20 @@ class CompleteCallingConventionsAnalysis(Analysis):
         Infer calling conventions for all functions in the current project.
         """
-        # get an ordering of functions based on the call graph
-        # note that the call graph is a multi-digraph. we convert it to a digraph to speed up topological sort
-        directed_callgraph = networkx.DiGraph(self.kb.functions.callgraph)
+        # special case: if both _prioritize_func_addrs and _skip_other_funcs are set, we only need to sort part of
+        # the call graph; even better, if there is only one function set, we don't need to sort the call graph at all!
+        if self._prioritize_func_addrs and self._skip_other_funcs:
+            if len(self._prioritize_func_addrs) == 1:
+                self._func_addrs = list(self._prioritize_func_addrs)
+                self._total_funcs = 1
+                return
+            directed_callgraph = networkx.DiGraph(self.kb.functions.callgraph)
+            directed_callgraph = directed_callgraph.subgraph(self._prioritize_func_addrs)
+        else:
+            # get an ordering of functions based on the call graph
+            # note that the call graph is a multi-digraph. we convert it to a digraph to speed up topological sort
+            directed_callgraph = networkx.DiGraph(self.kb.functions.callgraph)
+        assert isinstance(directed_callgraph, networkx.DiGraph)
         sorted_funcs = GraphUtils.quasi_topological_sort_nodes(directed_callgraph)
         total_funcs = 0
@@ -148,7 +159,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
                     continue
                 if self._max_function_size is not None:
-                    func_size = sum(block.size for block in func.blocks)
+                    func_size = sum(block.size for block in func.blocks if block.size is not None)
                     if func_size > self._max_function_size:
                         _l.info(
                             "Skipping variable recovery for %r since its size (%d) is greater than the cutoff "
@@ -189,6 +200,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
     def work(self):
         total_funcs = self._total_funcs
+        assert total_funcs is not None
         if self._workers == 0:
             self._update_progress(0)
             for idx, func_addr in enumerate(self._func_addrs):
@@ -211,6 +223,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
             self._finish_progress()
         else:
+            assert self._remaining_funcs is not None and self._func_queue is not None
             self._remaining_funcs.value = len(self._func_addrs)
             # generate a call tree (obviously, it's acyclic)

angr/analyses/decompiler/ail_simplifier.py CHANGED Viewed

@@ -36,6 +36,7 @@ from angr.knowledge_plugins.key_definitions.definition import Definition
 from angr.knowledge_plugins.key_definitions.constants import OP_BEFORE
 from angr.errors import AngrRuntimeError
 from angr.analyses import Analysis, AnalysesHub
+from angr.utils.timing import timethis
 from .ailgraph_walker import AILGraphWalker
 from .expression_narrower import ExprNarrowingInfo, NarrowingInfoExtractor, ExpressionNarrower
 from .block_simplifier import BlockSimplifier
@@ -202,6 +203,7 @@ class AILSimplifier(Analysis):
         AILGraphWalker(self.func_graph, _handler, replace_nodes=True).walk()
         self.blocks = {}
+    @timethis
     def _compute_reaching_definitions(self) -> SRDAModel:
         # Computing reaching definitions or return the cached one
         if self._reaching_definitions is not None:
@@ -217,6 +219,7 @@ class AILSimplifier(Analysis):
         self._reaching_definitions = rd
         return rd
+    @timethis
     def _compute_propagation(self) -> SPropagatorAnalysis:
         # Propagate expressions or return the existing result
         if self._propagator is not None:
@@ -233,6 +236,7 @@ class AILSimplifier(Analysis):
         self._propagator_dead_vvar_ids = prop.dead_vvar_ids
         return prop
+    @timethis
     def _compute_equivalence(self) -> set[Equivalence]:
         equivalence = set()
         for block in self.func_graph:
@@ -281,6 +285,7 @@ class AILSimplifier(Analysis):
     # Expression narrowing
     #
+    @timethis
     def _narrow_exprs(self) -> bool:
         """
         A register may be used with full width even when only the lower bytes are really needed. This results in the
@@ -511,9 +516,9 @@ class AILSimplifier(Analysis):
             atom = atom_queue.pop(0)
             seen.add(atom)
-            use_and_exprs = rd.get_vvar_uses_with_expr(atom)
+            expr_and_uses = rd.all_vvar_uses[atom.varid]
-            for loc, expr in use_and_exprs:
+            for expr, loc in set(expr_and_uses):
                 old_block = block_dict.get((loc.block_addr, loc.block_idx), None)
                 if old_block is None:
                     # missing a block for whatever reason
@@ -532,6 +537,7 @@ class AILSimplifier(Analysis):
                     )
                     if new_atom not in seen:
                         atom_queue.append(new_atom)
+                        seen.add(new_atom)
                 else:
                     result.append((atom, loc, expr))
         return result, phi_vars
@@ -659,6 +665,7 @@ class AILSimplifier(Analysis):
     # Unifying local variables
     #
+    @timethis
     def _unify_local_variables(self) -> bool:
         """
         Find variables that are definitely equivalent and then eliminate unnecessary copies.
@@ -822,14 +829,14 @@ class AILSimplifier(Analysis):
                             continue
                         # find all its uses
-                        all_arg_copy_var_uses: set[tuple[CodeLocation, Any]] = set(
-                            rd.get_vvar_uses_with_expr(arg_copy_def.atom)
+                        all_arg_copy_var_uses: set[tuple[Any, CodeLocation]] = rd.get_vvar_uses_with_expr(
+                            arg_copy_def.atom
                         )
                         all_uses_with_def = set()
                         should_abort = False
                         for use in all_arg_copy_var_uses:
-                            used_expr = use[1]
+                            used_expr = use[0]
                             if used_expr is not None and used_expr.size != arg_copy_def.size:
                                 should_abort = True
                                 break
@@ -924,15 +931,19 @@ class AILSimplifier(Analysis):
                 # find all uses of this definition
                 # we make a copy of the set since we may touch the set (uses) when replacing expressions
-                all_uses: set[tuple[CodeLocation, Any]] = set(rd.get_vvar_uses_with_expr(to_replace_def.atom))
+                all_uses: set[tuple[Any, CodeLocation]] = set(rd.all_vvar_uses[to_replace_def.atom.varid])
                 # make sure none of these uses are phi nodes (depends on more than one def)
                 all_uses_with_unique_def = set()
-                for use_and_expr in all_uses:
-                    use_loc, used_expr = use_and_expr
+                for expr_and_use in all_uses:
+                    used_expr, use_loc = expr_and_use
                     defs_and_exprs = rd.get_uses_by_location(use_loc, exprs=True)
-                    filtered_defs = {def_ for def_, expr_ in defs_and_exprs if expr_ == used_expr}
+                    filtered_defs = {
+                        def_
+                        for def_, expr_ in defs_and_exprs
+                        if expr_ is not None and used_expr is not None and expr_.varid == used_expr.varid
+                    }
                     if len(filtered_defs) == 1:
-                        all_uses_with_unique_def.add(use_and_expr)
+                        all_uses_with_unique_def.add(expr_and_use)
                     else:
                         # optimization: break early
                         break
@@ -947,7 +958,7 @@ class AILSimplifier(Analysis):
                 if not (isinstance(replace_with, VirtualVariable) and replace_with.was_parameter):
                     assignment_ctr = 0
-                    all_use_locs = {use_loc for use_loc, _ in all_uses}
+                    all_use_locs = {use_loc for _, use_loc in all_uses}
                     for use_loc in all_use_locs:
                         if use_loc == eq.codeloc:
                             continue
@@ -960,17 +971,17 @@ class AILSimplifier(Analysis):
                     if assignment_ctr > 1:
                         continue
-                all_uses_with_def = {(to_replace_def, use_and_expr) for use_and_expr in all_uses}
+                all_uses_with_def = {(to_replace_def, expr_and_use) for expr_and_use in all_uses}
                 remove_initial_assignment = False  # expression folding will take care of it
             assert replace_with is not None
-            if any(not isinstance(use_and_expr[1], VirtualVariable) for _, use_and_expr in all_uses_with_def):
+            if any(not isinstance(expr_and_use[0], VirtualVariable) for _, expr_and_use in all_uses_with_def):
                 # if any of the uses are phi assignments, we skip
                 used_in_phi_assignment = False
-                for _, use_and_expr in all_uses_with_def:
-                    u = use_and_expr[0]
+                for _, expr_and_use in all_uses_with_def:
+                    u = expr_and_use[1]
                     assert u.block_addr is not None
                     assert u.stmt_idx is not None
                     block = addr_and_idx_to_block[(u.block_addr, u.block_idx)]
@@ -983,8 +994,8 @@ class AILSimplifier(Analysis):
             # ensure the uses we consider are all after the eq location
             filtered_all_uses_with_def = []
-            for def_, use_and_expr in all_uses_with_def:
-                u = use_and_expr[0]
+            for def_, expr_and_use in all_uses_with_def:
+                u = expr_and_use[1]
                 if (
                     u.block_addr == eq.codeloc.block_addr
                     and u.block_idx == eq.codeloc.block_idx
@@ -992,7 +1003,7 @@ class AILSimplifier(Analysis):
                 ):
                     # this use happens before the assignment - ignore it
                     continue
-                filtered_all_uses_with_def.append((def_, use_and_expr))
+                filtered_all_uses_with_def.append((def_, expr_and_use))
             all_uses_with_def = filtered_all_uses_with_def
             if not all_uses_with_def:
@@ -1004,8 +1015,8 @@ class AILSimplifier(Analysis):
             # replace all uses
             all_uses_replaced = True
-            for def_, use_and_expr in all_uses_with_def:
-                u, used_expr = use_and_expr
+            for def_, expr_and_use in all_uses_with_def:
+                used_expr, u = expr_and_use
                 use_expr_defns = []
                 for d in rd.get_uses_by_location(u):
@@ -1110,6 +1121,7 @@ class AILSimplifier(Analysis):
         walker.walk_statement(stmt)
         return len(walker.temps) > 0
+    @timethis
     def _fold_call_exprs(self) -> bool:
         """
         Fold a call expression (statement) into other statements if the return value of the call expression (statement)
@@ -1183,11 +1195,11 @@ class AILSimplifier(Analysis):
                 assert the_def.codeloc.block_addr is not None
                 assert the_def.codeloc.stmt_idx is not None
-                all_uses: set[tuple[CodeLocation, Any]] = set(rd.get_vvar_uses_with_expr(the_def.atom))
+                all_uses: set[tuple[Any, CodeLocation]] = rd.get_vvar_uses_with_expr(the_def.atom)
                 if len(all_uses) != 1:
                     continue
-                u, used_expr = next(iter(all_uses))
+                used_expr, u = next(iter(all_uses))
                 if used_expr is None:
                     continue
                 assert u.block_addr is not None
@@ -1314,6 +1326,7 @@ class AILSimplifier(Analysis):
         return False, None
+    @timethis
     def _iteratively_remove_dead_assignments(self) -> bool:
         anything_removed = False
         while True:
@@ -1323,6 +1336,7 @@ class AILSimplifier(Analysis):
             self._rebuild_func_graph()
             self._clear_cache()
+    @timethis
     def _remove_dead_assignments(self) -> bool:
         # keeping tracking of statements to remove and statements (as well as dead vvars) to keep allows us to handle
@@ -1330,7 +1344,7 @@ class AILSimplifier(Analysis):
         # value and the floating-point return value.
         stmts_to_remove_per_block: dict[tuple[int, int | None], set[int]] = defaultdict(set)
         stmts_to_keep_per_block: dict[tuple[int, int | None], set[int]] = defaultdict(set)
-        dead_vvar_ids: set[int] = set()
+        dead_vvar_ids: set[int] = self._removed_vvar_ids.copy()
         dead_vvar_codelocs: set[CodeLocation] = set()
         blocks: dict[tuple[int, int | None], Block] = {
             (node.addr, node.idx): self.blocks.get(node, node) for node in self.func_graph.nodes()
@@ -1343,36 +1357,43 @@ class AILSimplifier(Analysis):
         stackarg_offsets = (
             {(tpl[1] & mask) for tpl in self._stack_arg_offsets} if self._stack_arg_offsets is not None else None
         )
         while True:
             new_dead_vars_found = False
-            for vvar, codeloc in rd.all_vvar_definitions.items():
-                if vvar.varid in dead_vvar_ids:
+            # traverse all virtual variable definitions
+            for vvar_id, codeloc in rd.all_vvar_definitions.items():
+                if vvar_id in dead_vvar_ids:
                     continue
-                if vvar.varid in self._propagator_dead_vvar_ids:
+                uses = None
+                if vvar_id in self._propagator_dead_vvar_ids:
                     # we are definitely removing this variable if it has no uses
-                    uses = rd.all_vvar_uses[vvar]
-                elif vvar.was_stack:
-                    if not self._remove_dead_memdefs:
-                        if rd.is_phi_vvar_id(vvar.varid):
-                            # we always remove unused phi variables
-                            pass
-                        elif vvar.varid in self._secondary_stackvars:
-                            # secondary stack variables are potentially removable
-                            pass
-                        elif stackarg_offsets is not None:
-                            # we always remove definitions for stack arguments
-                            assert vvar.stack_offset is not None
-                            if (vvar.stack_offset & mask) not in stackarg_offsets:
+                    uses = rd.all_vvar_uses[vvar_id]
+                if uses is None:
+                    vvar = rd.varid_to_vvar[vvar_id]
+                    if vvar.was_stack:
+                        if not self._remove_dead_memdefs:
+                            if rd.is_phi_vvar_id(vvar_id):
+                                # we always remove unused phi variables
+                                pass
+                            elif vvar_id in self._secondary_stackvars:
+                                # secondary stack variables are potentially removable
+                                pass
+                            elif stackarg_offsets is not None:
+                                # we always remove definitions for stack arguments
+                                assert vvar.stack_offset is not None
+                                if (vvar.stack_offset & mask) not in stackarg_offsets:
+                                    continue
+                            else:
                                 continue
-                        else:
-                            continue
-                    uses = rd.all_vvar_uses[vvar]
+                        uses = rd.all_vvar_uses[vvar_id]
-                elif vvar.was_tmp or vvar.was_reg or vvar.was_parameter:
-                    uses = rd.all_vvar_uses[vvar]
+                    elif vvar.was_tmp or vvar.was_reg or vvar.was_parameter:
+                        uses = rd.all_vvar_uses[vvar_id]
-                else:
-                    uses = set()
+                    else:
+                        uses = set()
                 # remove uses where vvars are going to be removed
                 filtered_uses_count = 0
@@ -1385,7 +1406,7 @@ class AILSimplifier(Analysis):
                 if filtered_uses_count == 0:
                     new_dead_vars_found = True
-                    dead_vvar_ids.add(vvar.varid)
+                    dead_vvar_ids.add(vvar_id)
                     dead_vvar_codelocs.add(codeloc)
                     if not isinstance(codeloc, ExternalCodeLocation):
                         assert codeloc.block_addr is not None
@@ -1403,30 +1424,29 @@ class AILSimplifier(Analysis):
                 break
         # find all phi variables that rely on variables that no longer exist
-        all_removed_var_ids = self._removed_vvar_ids.copy()
         removed_vvar_ids = self._removed_vvar_ids
         while True:
             new_removed_vvar_ids = set()
             for phi_varid, phi_use_varids in rd.phivarid_to_varids.items():
-                if phi_varid not in all_removed_var_ids and any(
-                    vvarid in removed_vvar_ids for vvarid in phi_use_varids
-                ):
-                    loc = rd.all_vvar_definitions[rd.varid_to_vvar[phi_varid]]
+                if phi_varid not in dead_vvar_ids and any(vvarid in removed_vvar_ids for vvarid in phi_use_varids):
+                    loc = rd.all_vvar_definitions[phi_varid]
                     assert loc.block_addr is not None and loc.stmt_idx is not None
-                    stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
-                    new_removed_vvar_ids.add(phi_varid)
-                    all_removed_var_ids.add(phi_varid)
+                    if loc.stmt_idx not in stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)]:
+                        stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
+                        new_removed_vvar_ids.add(phi_varid)
+                        dead_vvar_ids.add(phi_varid)
             if not new_removed_vvar_ids:
                 break
             removed_vvar_ids = new_removed_vvar_ids
         # find all phi variables that are only ever used by other phi variables
-        redundant_phi_and_dirty_varids = self._find_cyclic_dependent_phis_and_dirty_vvars(rd)
+        redundant_phi_and_dirty_varids = self._find_cyclic_dependent_phis_and_dirty_vvars(rd, dead_vvar_ids)
         for varid in redundant_phi_and_dirty_varids:
-            loc = rd.all_vvar_definitions[rd.varid_to_vvar[varid]]
+            loc = rd.all_vvar_definitions[varid]
             assert loc.block_addr is not None and loc.stmt_idx is not None
-            stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
-            stmts_to_keep_per_block[(loc.block_addr, loc.block_idx)].discard(loc.stmt_idx)
+            if loc.stmt_idx not in stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)]:
+                stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
+                stmts_to_keep_per_block[(loc.block_addr, loc.block_idx)].discard(loc.stmt_idx)
         for codeloc in self._calls_to_remove | self._assignments_to_remove:
             # this call can be removed. make sure it exists in stmts_to_remove_per_block
@@ -1481,6 +1501,7 @@ class AILSimplifier(Analysis):
                         if self._statement_has_call_exprs(stmt):
                             if codeloc in self._calls_to_remove:
                                 # it has a call and must be removed
+                                self._calls_to_remove.discard(codeloc)
                                 simplified = True
                                 continue
                             if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
@@ -1538,9 +1559,8 @@ class AILSimplifier(Analysis):
         :return:            The set of vvar use atoms.
         """
-        vvar = rd.varid_to_vvar[vvar_id]
         used_by: set[int | None] = set()
-        for used_vvar, loc in rd.all_vvar_uses[vvar]:
+        for used_vvar, loc in rd.all_vvar_uses[vvar_id]:
             if used_vvar is None:
                 # no explicit reference
                 used_by.add(None)
@@ -1553,7 +1573,7 @@ class AILSimplifier(Analysis):
                     used_by.add(None)
         return used_by
-    def _find_cyclic_dependent_phis_and_dirty_vvars(self, rd: SRDAModel) -> set[int]:
+    def _find_cyclic_dependent_phis_and_dirty_vvars(self, rd: SRDAModel, dead_vvar_ids: set[int]) -> set[int]:
         blocks_dict: dict[tuple[int, int | None], Block] = {(bb.addr, bb.idx): bb for bb in self.func_graph}
         # find dirty vvars and vexccall vvars
@@ -1568,16 +1588,21 @@ class AILSimplifier(Analysis):
                 ):
                     dirty_vvar_ids.add(stmt.dst.varid)
-        phi_and_dirty_vvar_ids = rd.phi_vvar_ids | dirty_vvar_ids
+        phi_and_dirty_vvar_ids = (rd.phi_vvar_ids | dirty_vvar_ids).difference(dead_vvar_ids)
         vvar_used_by: dict[int, set[int | None]] = defaultdict(set)
         for var_id in phi_and_dirty_vvar_ids:
             if var_id in rd.phivarid_to_varids:
                 for used_by_varid in rd.phivarid_to_varids[var_id]:
+                    if used_by_varid in dead_vvar_ids:
+                        # this variable no longer exists
+                        continue
                     if used_by_varid not in vvar_used_by:
-                        vvar_used_by[used_by_varid] |= self._get_vvar_used_by(used_by_varid, rd, blocks_dict)
+                        vvar_used_by[used_by_varid] |= self._get_vvar_used_by(
+                            used_by_varid, rd, blocks_dict
+                        ).difference(dead_vvar_ids)
                     vvar_used_by[used_by_varid].add(var_id)  # probably unnecessary
-            vvar_used_by[var_id] |= self._get_vvar_used_by(var_id, rd, blocks_dict)
+            vvar_used_by[var_id] |= self._get_vvar_used_by(var_id, rd, blocks_dict).difference(dead_vvar_ids)
         g = networkx.DiGraph()
         dummy_vvar_id = -1