PyPI - angr - Versions diffs - 9.2.140__py3-none-manylinux2014_x86_64.whl → 9.2.141__py3-none-manylinux2014_x86_64.whl - Mend

angr 9.2.140__py3-none-manylinux2014_x86_64.whl → 9.2.141__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (40) hide show

angr/__init__.py +1 -1
angr/analyses/calling_convention/calling_convention.py +88 -32
angr/analyses/calling_convention/fact_collector.py +44 -18
angr/analyses/calling_convention/utils.py +3 -1
angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +9 -8
angr/analyses/decompiler/ail_simplifier.py +48 -20
angr/analyses/decompiler/callsite_maker.py +24 -11
angr/analyses/decompiler/clinic.py +10 -0
angr/analyses/decompiler/decompiler.py +1 -0
angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +84 -15
angr/analyses/decompiler/optimization_passes/optimization_pass.py +76 -1
angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
angr/analyses/decompiler/region_identifier.py +6 -4
angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
angr/analyses/decompiler/ssailification/rewriting.py +23 -15
angr/analyses/decompiler/ssailification/rewriting_engine.py +105 -24
angr/analyses/decompiler/ssailification/ssailification.py +22 -14
angr/analyses/decompiler/structured_codegen/c.py +73 -137
angr/analyses/decompiler/structuring/dream.py +1 -1
angr/analyses/decompiler/structuring/phoenix.py +6 -1
angr/analyses/decompiler/structuring/structurer_base.py +2 -1
angr/analyses/decompiler/utils.py +46 -20
angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
angr/analyses/variable_recovery/engine_ail.py +1 -1
angr/analyses/variable_recovery/engine_vex.py +20 -4
angr/calling_conventions.py +15 -10
angr/factory.py +8 -3
angr/knowledge_plugins/variables/variable_manager.py +7 -5
angr/simos/simos.py +3 -1
angr/utils/types.py +48 -0
{angr-9.2.140.dist-info → angr-9.2.141.dist-info}/METADATA +6 -6
{angr-9.2.140.dist-info → angr-9.2.141.dist-info}/RECORD +40 -39
{angr-9.2.140.dist-info → angr-9.2.141.dist-info}/LICENSE +0 -0
{angr-9.2.140.dist-info → angr-9.2.141.dist-info}/WHEEL +0 -0
{angr-9.2.140.dist-info → angr-9.2.141.dist-info}/entry_points.txt +0 -0
{angr-9.2.140.dist-info → angr-9.2.141.dist-info}/top_level.txt +0 -0

angr/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # pylint: disable=wrong-import-position
 from __future__ import annotations
-__version__ = "9.2.140"
+__version__ = "9.2.141"
 if bytes is str:
     raise Exception(

angr/analyses/calling_convention/calling_convention.py CHANGED Viewed

@@ -33,6 +33,7 @@ from angr.knowledge_plugins.key_definitions.rd_model import ReachingDefinitionsM
 from angr.knowledge_plugins.variables.variable_access import VariableAccessSort
 from angr.knowledge_plugins.functions import Function
 from angr.utils.constants import DEFAULT_STATEMENT
+from angr.utils.ssa import get_reg_offset_base_and_size, get_reg_offset_base
 from angr import SIM_PROCEDURES
 from angr.analyses import Analysis, register_analysis, ReachingDefinitionsAnalysis
 from angr.analyses.reaching_definitions import get_all_definitions
@@ -264,7 +265,7 @@ class CallingConventionAnalysis(Analysis):
         self.cc = cc
         self.prototype = prototype
-    def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction] | None:
+    def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
         """
         Get the calling convention for a PLT stub.
@@ -296,6 +297,14 @@ class CallingConventionAnalysis(Analysis):
             real_func = None
         if real_func is not None:
+            if real_func.calling_convention is None:
+                cc_cls = default_cc(self.project.arch.name)
+                if cc_cls is None:
+                    # can't determine the default calling convention for this architecture
+                    return None
+                cc = cc_cls(self.project.arch)
+            else:
+                cc = real_func.calling_convention
             if real_func.is_simprocedure:
                 if self.project.is_hooked(real_func.addr):
                     # prioritize the hooker
@@ -303,17 +312,20 @@ class CallingConventionAnalysis(Analysis):
                     if hooker is not None and (
                         not hooker.is_stub or (hooker.is_function and not hooker.guessed_prototype)
                     ):
-                        return real_func.calling_convention, hooker.prototype
-                if real_func.calling_convention and real_func.prototype:
-                    return real_func.calling_convention, real_func.prototype
+                        return cc, hooker.prototype
+                if real_func.prototype is not None:
+                    return cc, real_func.prototype
             else:
-                return real_func.calling_convention, real_func.prototype
+                return cc, real_func.prototype
         if self.analyze_callsites:
             # determine the calling convention by analyzing its callsites
             callsite_facts = self._extract_and_analyze_callsites(max_analyzing_callsites=1)
             cc_cls = default_cc(self.project.arch.name)
-            cc = cc_cls(self.project.arch) if cc_cls is not None else None
+            if cc_cls is None:
+                # can't determine the default calling convention for this architecture
+                return None
+            cc = cc_cls(self.project.arch)
             prototype = SimTypeFunction([], None)
             prototype = self._adjust_prototype(
                 prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
@@ -342,7 +354,7 @@ class CallingConventionAnalysis(Analysis):
             input_variables = vm.input_variables()
             input_args = self._args_from_vars(input_variables, vm)
         else:
-            input_args = self._input_args
+            input_args = set(self._input_args)
             retval_size = self._retval_size
         # check if this function is a variadic function
@@ -355,8 +367,14 @@ class CallingConventionAnalysis(Analysis):
         # TODO: properly determine sp_delta
         sp_delta = self.project.arch.bytes if self.project.arch.call_pushes_ret else 0
-        input_args = list(input_args)  # input_args might be modified by find_cc()
-        cc = SimCC.find_cc(self.project.arch, input_args, sp_delta, platform=self.project.simos.name)
+        full_input_args = self._consolidate_input_args(input_args)
+        full_input_args_copy = list(full_input_args)  # input_args might be modified by find_cc()
+        cc = SimCC.find_cc(self.project.arch, full_input_args_copy, sp_delta, platform=self.project.simos.name)
+        # update input_args according to the difference between full_input_args and full_input_args_copy
+        for a in full_input_args:
+            if a not in full_input_args_copy and a in input_args:
+                input_args.remove(a)
         if cc is None:
             l.warning(
@@ -657,12 +675,6 @@ class CallingConventionAnalysis(Analysis):
             else:
                 break
-        if None in temp_args:
-            first_none_idx = temp_args.index(None)
-            # test if there is at least one argument set after None; if so, we ignore the first None
-            if any(arg is not None for arg in temp_args[first_none_idx:]):
-                temp_args[first_none_idx] = expected_args[first_none_idx]
         if None in temp_args:
             # we be very conservative here and ignore all arguments starting from the first missing one
             first_none_idx = temp_args.index(None)
@@ -681,17 +693,18 @@ class CallingConventionAnalysis(Analysis):
             if all(fact.return_value_used is False for fact in facts):
                 proto.returnty = SimTypeBottom(label="void")
             else:
-                proto.returnty = SimTypeInt().with_arch(self.project.arch)
+                if proto.returnty is None or isinstance(proto.returnty, SimTypeBottom):
+                    proto.returnty = SimTypeInt().with_arch(self.project.arch)
         if (
             update_arguments == UpdateArgumentsOption.AlwaysUpdate
             or (update_arguments == UpdateArgumentsOption.UpdateWhenCCHasNoArgs and not proto.args)
         ) and len({len(fact.args) for fact in facts}) == 1:
             fact = next(iter(facts))
-            proto.args = [
+            proto.args = tuple(
                 self._guess_arg_type(arg) if arg is not None else SimTypeInt().with_arch(self.project.arch)
                 for arg in fact.args
-            ]
+            )
         return proto
@@ -730,13 +743,8 @@ class CallingConventionAnalysis(Analysis):
                 # a register variable, convert it to a register argument
                 if not is_sane_register_variable(self.project.arch, variable.reg, variable.size, def_cc=def_cc):
                     continue
-                if self.project.arch.name in {"AMD64", "X86"} and variable.size < self.project.arch.bytes:
-                    # use complete registers on AMD64 and X86
-                    reg_name = self.project.arch.translate_register_name(variable.reg, size=self.project.arch.bytes)
-                    arg = SimRegArg(reg_name, self.project.arch.bytes)
-                else:
-                    reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
-                    arg = SimRegArg(reg_name, variable.size)
+                reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
+                arg = SimRegArg(reg_name, variable.size)
                 args.add(arg)
                 accesses = var_manager.get_variable_accesses(variable)
@@ -778,15 +786,58 @@ class CallingConventionAnalysis(Analysis):
         return args.difference(restored_reg_vars)
-    def _reorder_args(self, args: list[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
+    def _consolidate_input_args(self, input_args: set[SimRegArg | SimStackArg]) -> set[SimRegArg | SimStackArg]:
+        """
+        Consolidate register arguments by converting partial registers to full registers on certain architectures.
+        :param input_args:  A set of input arguments.
+        :return:            A set of consolidated input args.
+        """
+        if self.project.arch.name in {"AMD64", "X86"}:
+            new_input_args = set()
+            for a in input_args:
+                if isinstance(a, SimRegArg) and a.size < self.project.arch.bytes:
+                    # use complete registers on AMD64 and X86
+                    reg_offset, reg_size = self.project.arch.registers[a.reg_name]
+                    full_reg_offset, full_reg_size = get_reg_offset_base_and_size(
+                        reg_offset, self.project.arch, size=reg_size
+                    )
+                    full_reg_name = self.project.arch.translate_register_name(full_reg_offset, size=full_reg_size)
+                    arg = SimRegArg(full_reg_name, full_reg_size)
+                    if arg not in new_input_args:
+                        new_input_args.add(arg)
+                else:
+                    new_input_args.add(a)
+            return new_input_args
+        return input_args
+    def _reorder_args(self, args: set[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
         """
         Reorder arguments according to the calling convention identified.
-        :param args:   A list of arguments that haven't been ordered.
+        :param args:   A set of arguments that haven't been ordered.
         :param cc:    The identified calling convention.
         :return:            A reordered list of args.
         """
+        def _is_same_reg(rn0: str, rn1: str) -> bool:
+            """
+            Check if rn0 and rn1 belong to the same base register.
+            :param rn0:     Register name of the first register.
+            :param rn1:     Register name of the second register.
+            :return:        True if they belong to the same base register; False otherwise.
+            """
+            if rn0 == rn1:
+                return True
+            off0, sz0 = self.project.arch.registers[rn0]
+            full_off0 = get_reg_offset_base(off0, self.project.arch, sz0)
+            off1, sz1 = self.project.arch.registers[rn1]
+            full_off1 = get_reg_offset_base(off1, self.project.arch, sz1)
+            return full_off0 == full_off1
         reg_args = []
         # split args into two lists
@@ -805,7 +856,7 @@ class CallingConventionAnalysis(Analysis):
         # match int args first
         for reg_name in cc.ARG_REGS:
             try:
-                arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and a.reg_name == reg_name))
+                arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name)))
             except StopIteration:
                 # have we reached the end of the args list?
                 if [a for a in int_args if isinstance(a, SimRegArg)] or len(stack_int_args) > 0:
@@ -821,7 +872,9 @@ class CallingConventionAnalysis(Analysis):
         if fp_args:
             for reg_name in cc.FP_ARG_REGS:
                 try:
-                    arg = next(iter(a for a in fp_args if isinstance(a, SimRegArg) and a.reg_name == reg_name))
+                    arg = next(
+                        iter(a for a in fp_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name))
+                    )
                 except StopIteration:
                     # have we reached the end of the args list?
                     if [a for a in fp_args if isinstance(a, SimRegArg)] or len(stack_fp_args) > 0:
@@ -886,12 +939,15 @@ class CallingConventionAnalysis(Analysis):
             if 5 <= ret_val_size <= 8:
                 return SimTypeLongLong()
-        # fallback
-        return SimTypeInt() if cc.arch.bits == 32 else SimTypeLongLong()
+        return SimTypeBottom(label="void")
     @staticmethod
     def _likely_saving_temp_reg(ail_block: ailment.Block, d: Definition, all_reg_defs: set[Definition]) -> bool:
-        if d.codeloc.block_addr == ail_block.addr and d.codeloc.stmt_idx < len(ail_block.statements):
+        if (
+            d.codeloc.block_addr == ail_block.addr
+            and d.codeloc.stmt_idx is not None
+            and d.codeloc.stmt_idx < len(ail_block.statements)
+        ):
             stmt = ail_block.statements[d.codeloc.stmt_idx]
             if isinstance(stmt, ailment.Stmt.Assignment) and isinstance(stmt.src, ailment.Expr.Register):
                 src_offset = stmt.src.reg_offset

angr/analyses/calling_convention/fact_collector.py CHANGED Viewed

@@ -90,7 +90,7 @@ binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactColle
 class SimEngineFactCollectorVEX(
     SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
-    SimEngineLight[type[FactCollectorState], SpOffset | RegisterOffset | int, Block, None],
+    SimEngineLight[FactCollectorState, SpOffset | RegisterOffset | int, Block, None],
 ):
     """
     THe engine for FactCollector.
@@ -101,7 +101,7 @@ class SimEngineFactCollectorVEX(
         super().__init__(project)
     def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
-        if self.block.vex.jumpkind == "Ijk_Call":
+        if self.block.vex.jumpkind == "Ijk_Call" and self.arch.ret_offset is not None:
             self.state.register_written(self.arch.ret_offset, self.arch.bytes)
     def _top(self, bits: int):
@@ -110,7 +110,7 @@ class SimEngineFactCollectorVEX(
     def _is_top(self, expr: Any) -> bool:
         raise NotImplementedError
-    def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
+    def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.expr.IRExpr) -> Any:
         return None
     def _handle_stmt_Put(self, stmt):
@@ -142,9 +142,9 @@ class SimEngineFactCollectorVEX(
         return expr.con.value
     def _handle_expr_GSPTR(self, expr):
-        return None
+        return 0
-    def _handle_expr_Get(self, expr) -> SpOffset | None:
+    def _handle_expr_Get(self, expr) -> SpOffset | RegisterOffset:
         if expr.offset == self.arch.sp_offset:
             return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
         if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
@@ -304,7 +304,10 @@ class FactCollector(Analysis):
     def _handle_function(self, state: FactCollectorState, func: Function) -> None:
         try:
-            arg_locs = func.calling_convention.arg_locs(func.prototype)
+            if func.calling_convention is not None and func.prototype is not None:
+                arg_locs = func.calling_convention.arg_locs(func.prototype)
+            else:
+                return
         except (TypeError, ValueError):
             return
@@ -355,6 +358,7 @@ class FactCollector(Analysis):
                 if isinstance(node, BlockNode) and node.size == 0:
                     continue
                 if isinstance(node, HookNode):
                     # attempt to convert it into a function
                     if self.kb.functions.contains_addr(node.addr):
@@ -369,17 +373,43 @@ class FactCollector(Analysis):
                         and not isinstance(node.prototype.returnty, SimTypeBottom)
                     ):
                         # assume the function overwrites the return variable
-                        retval_size = (
-                            node.prototype.returnty.with_arch(self.project.arch).size // self.project.arch.byte_width
-                        )
+                        returnty_size = node.prototype.returnty.with_arch(self.project.arch).size
+                        assert returnty_size is not None
+                        retval_size = returnty_size // self.project.arch.byte_width
                         retval_sizes.append(retval_size)
                     continue
+                # if this block ends with a call to a function, we process the function first
+                func_succs = [
+                    succ
+                    for succ in func_graph.successors(node)
+                    if isinstance(succ, (Function, HookNode)) or self.kb.functions.contains_addr(succ.addr)
+                ]
+                if len(func_succs) == 1:
+                    func_succ = func_succs[0]
+                    if isinstance(func_succ, (BlockNode, HookNode)) and self.kb.functions.contains_addr(func_succ.addr):
+                        # attempt to convert it into a function
+                        func_succ = self.kb.functions.get_by_addr(func_succ.addr)
+                    if isinstance(func_succ, Function):
+                        if (
+                            func_succ.calling_convention is not None
+                            and func_succ.prototype is not None
+                            and func_succ.prototype.returnty is not None
+                            and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
+                        ):
+                            # assume the function overwrites the return variable
+                            returnty_size = func_succ.prototype.returnty.with_arch(self.project.arch).size
+                            assert returnty_size is not None
+                            retval_size = returnty_size // self.project.arch.byte_width
+                            retval_sizes.append(retval_size)
+                        continue
                 block = self.project.factory.block(node.addr, size=node.size)
                 # scan the block statements backwards to find writes to the return value register
                 retval_size = None
                 for stmt in reversed(block.vex.statements):
                     if isinstance(stmt, pyvex.IRStmt.Put):
+                        assert block.vex.tyenv is not None
                         size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
                         if stmt.offset == retreg_offset:
                             retval_size = max(size, 1)
@@ -391,9 +421,9 @@ class FactCollector(Analysis):
                 for pred, _, data in func_graph.in_edges(node, data=True):
                     edge_type = data.get("type")
                     if pred not in traversed and depth + 1 <= self._max_depth:
-                        if edge_type == "fake_return":
+                        if edge_type == "call":
                             continue
-                        if edge_type in {"transition", "call"}:
+                        if edge_type in {"transition", "fake_return"}:
                             queue.append((depth + 1, pred))
         self.retval_size = max(retval_sizes) if retval_sizes else None
@@ -472,6 +502,7 @@ class FactCollector(Analysis):
                                 ):
                                     tmps[stmt.tmp] = "sp"
                     if isinstance(stmt, pyvex.IRStmt.Put):
+                        assert block.vex.tyenv is not None
                         size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
                         # is the data loaded from the stack?
                         if (
@@ -532,13 +563,8 @@ class FactCollector(Analysis):
                 ):
                     continue
                 reg_offset_created.add(offset)
-                if self.project.arch.name in {"AMD64", "X86"} and size < self.project.arch.bytes:
-                    # use complete registers on AMD64 and X86
-                    reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
-                    arg = SimRegArg(reg_name, self.project.arch.bytes)
-                else:
-                    reg_name = self.project.arch.translate_register_name(offset, size=size)
-                    arg = SimRegArg(reg_name, size)
+                reg_name = self.project.arch.translate_register_name(offset, size=size)
+                arg = SimRegArg(reg_name, size)
                 self.input_args.append(arg)
         stack_offset_created = set()

angr/analyses/calling_convention/utils.py CHANGED Viewed

@@ -9,7 +9,9 @@ from angr.calling_conventions import SimCC
 l = logging.getLogger(__name__)
-def is_sane_register_variable(arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | None = None) -> bool:
+def is_sane_register_variable(
+    arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | type[SimCC] | None = None
+) -> bool:
     """
     Filters all registers that are surly not members of function arguments.
     This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which

angr/analyses/cfg/indirect_jump_resolvers/jumptable.py CHANGED Viewed

@@ -182,23 +182,24 @@ class ConstantValueManager:
         # determine blocks to run FCP on
-        # - include at most three levels of successors from the entrypoint
+        # - include at most three levels of superblock successors from the entrypoint
         startpoint = self.func.startpoint
         blocks = set()
-        succs = [startpoint]
-        for _ in range(3):
+        succ_and_levels = [(startpoint, 0)]
+        while succ_and_levels:
             new_succs = []
-            for node in succs:
+            for node, level in succ_and_levels:
                 if node in blocks:
                     continue
                 blocks.add(node)
                 if node.addr == self.indirect_jump_addr:
                     # stop at the indirect jump block
                     continue
-                new_succs += list(self.func.graph.successors(node))
-            succs = new_succs
-            if not succs:
-                break
+                for _, succ, data in self.func.graph.out_edges(node, data=True):
+                    new_level = level if data.get("type") == "fake_return" else level + 1
+                    if new_level <= 3:
+                        new_succs.append((succ, new_level))
+            succ_and_levels = new_succs
         # - include at most six levels of predecessors from the indirect jump block
         ij_block = self.func.get_node(self.indirect_jump_addr)

angr/analyses/decompiler/ail_simplifier.py CHANGED Viewed

@@ -99,6 +99,7 @@ class AILSimplifier(Analysis):
         removed_vvar_ids: set[int] | None = None,
         arg_vvars: dict[int, tuple[VirtualVariable, SimVariable]] | None = None,
         avoid_vvar_ids: set[int] | None = None,
+        secondary_stackvars: set[int] | None = None,
     ):
         self.func = func
         self.func_graph = func_graph if func_graph is not None else func.graph
@@ -119,6 +120,7 @@ class AILSimplifier(Analysis):
         self._arg_vvars = arg_vvars
         self._avoid_vvar_ids = avoid_vvar_ids
         self._propagator_dead_vvar_ids: set[int] = set()
+        self._secondary_stackvars: set[int] = secondary_stackvars if secondary_stackvars is not None else set()
         self._calls_to_remove: set[CodeLocation] = set()
         self._assignments_to_remove: set[CodeLocation] = set()
@@ -1348,6 +1350,9 @@ class AILSimplifier(Analysis):
                         if rd.is_phi_vvar_id(def_.atom.varid):
                             # we always remove unused phi variables
                             pass
+                        elif def_.atom.varid in self._secondary_stackvars:
+                            # secondary stack variables are potentially removable
+                            pass
                         elif stackarg_offsets is not None:
                             # we always remove definitions for stack arguments
                             assert def_.atom.stack_offset is not None
@@ -1380,7 +1385,9 @@ class AILSimplifier(Analysis):
                 if not isinstance(def_.codeloc, ExternalCodeLocation):
                     assert def_.codeloc.block_addr is not None
                     assert def_.codeloc.stmt_idx is not None
-                stmts_to_keep_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(def_.codeloc.stmt_idx)
+                    stmts_to_keep_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(
+                        def_.codeloc.stmt_idx
+                    )
         # find all phi variables that rely on variables that no longer exist
         all_removed_var_ids = self._removed_vvar_ids.copy()
@@ -1503,8 +1510,36 @@ class AILSimplifier(Analysis):
         return simplified
+    @staticmethod
+    def _get_vvar_used_by(
+        vvar_id: int, rd: SRDAModel, blocks_dict: dict[tuple[int, int | None], Block]
+    ) -> set[int | None]:
+        """
+        Get all atoms that use a specified virtual variable. The atoms are in the form of virtual variable ID or None
+        (indicating the virtual variable is used by another statement like Store).
+        :param vvar_id:     ID of the virtual variable.
+        :param rd:          The SRDA model.
+        :return:            The set of vvar use atoms.
+        """
+        vvar = rd.varid_to_vvar[vvar_id]
+        used_by: set[int | None] = set()
+        for used_vvar, loc in rd.all_vvar_uses[vvar]:
+            if used_vvar is None:
+                # no explicit reference
+                used_by.add(None)
+            elif loc.block_addr is not None:
+                assert loc.stmt_idx is not None
+                stmt = blocks_dict[(loc.block_addr, loc.block_idx)].statements[loc.stmt_idx]
+                if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
+                    used_by.add(stmt.dst.varid)
+                else:
+                    used_by.add(None)
+        return used_by
     def _find_cyclic_dependent_phis_and_dirty_vvars(self, rd: SRDAModel) -> set[int]:
-        blocks_dict = {(bb.addr, bb.idx): bb for bb in self.func_graph}
+        blocks_dict: dict[tuple[int, int | None], Block] = {(bb.addr, bb.idx): bb for bb in self.func_graph}
         # find dirty vvars and vexccall vvars
         dirty_vvar_ids = set()
@@ -1520,25 +1555,14 @@ class AILSimplifier(Analysis):
         phi_and_dirty_vvar_ids = rd.phi_vvar_ids | dirty_vvar_ids
-        vvar_used_by: dict[int, set[int]] = defaultdict(set)
+        vvar_used_by: dict[int, set[int | None]] = defaultdict(set)
         for var_id in phi_and_dirty_vvar_ids:
             if var_id in rd.phivarid_to_varids:
                 for used_by_varid in rd.phivarid_to_varids[var_id]:
-                    vvar_used_by[used_by_varid].add(var_id)
-            vvar = rd.varid_to_vvar[var_id]
-            used_by = set()
-            for used_vvar, loc in rd.all_vvar_uses[vvar]:
-                if used_vvar is None:
-                    # no explicit reference
-                    used_by.add(None)
-                else:
-                    stmt = blocks_dict[loc.block_addr, loc.block_idx].statements[loc.stmt_idx]
-                    if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
-                        used_by.add(stmt.dst.varid)
-                    else:
-                        used_by.add(None)
-            vvar_used_by[var_id] |= used_by
+                    if used_by_varid not in vvar_used_by:
+                        vvar_used_by[used_by_varid] |= self._get_vvar_used_by(used_by_varid, rd, blocks_dict)
+                    vvar_used_by[used_by_varid].add(var_id)  # probably unnecessary
+            vvar_used_by[var_id] |= self._get_vvar_used_by(var_id, rd, blocks_dict)
         g = networkx.DiGraph()
         dummy_vvar_id = -1
@@ -1557,8 +1581,12 @@ class AILSimplifier(Analysis):
             bail = False
             for varid in scc:
-                # if this vvar is a phi var, ensure this vvar is not used by anything else outside the scc
-                if varid in rd.phi_vvar_ids:
+                # ensure this vvar is not used by anything else outside the scc (regardless of whether this vvar is a
+                # phi variable or not)
+                if varid in vvar_used_by and None in vvar_used_by[varid]:
+                    bail = True
+                    break
+                if bail is False:
                     succs = list(g.successors(varid))
                     if any(succ_varid not in scc for succ_varid in succs):
                         bail = True

angr/analyses/decompiler/callsite_maker.py CHANGED Viewed

@@ -45,7 +45,7 @@ class CallSiteMaker(Analysis):
         self._ail_manager = ail_manager
         self.result_block = None
-        self.stack_arg_offsets: set[tuple[int, int]] | None = None  # ins_addr, stack_offset
+        self.stack_arg_offsets: set[tuple[int, int]] | None = None  # call ins addr, stack_offset
         self.removed_vvar_ids: set[int] = set()
         self._analyze()
@@ -372,7 +372,9 @@ class CallSiteMaker(Analysis):
         return None
-    def _resolve_stack_argument(self, call_stmt, arg_loc) -> tuple[Any, Any]:  # pylint:disable=unused-argument
+    def _resolve_stack_argument(
+        self, call_stmt: Stmt.Call, arg_loc
+    ) -> tuple[Any, Any]:  # pylint:disable=unused-argument
         assert self._stack_pointer_tracker is not None
         size = arg_loc.size
@@ -399,15 +401,26 @@ class CallSiteMaker(Analysis):
                     # FIXME: vvar may be larger than that we ask; we may need to chop the correct value of vvar
                     value = view.get_vvar_value(vvar)
                     if value is not None and not isinstance(value, Expr.Phi):
-                        return None, value
-                    return None, Expr.VirtualVariable(
-                        self._atom_idx(),
-                        vvar.varid,
-                        vvar.bits,
-                        vvar.category,
-                        oident=vvar.oident,
-                        ins_addr=call_stmt.ins_addr,
-                    )
+                        v: Expr.Expression = value
+                    else:
+                        v: Expr.Expression = Expr.VirtualVariable(
+                            self._atom_idx(),
+                            vvar.varid,
+                            vvar.bits,
+                            vvar.category,
+                            oident=vvar.oident,
+                            ins_addr=call_stmt.ins_addr,
+                        )
+                    if v.size > size:
+                        v = Expr.Convert(
+                            self._atom_idx(),
+                            v.bits,
+                            size * self.project.arch.byte_width,
+                            False,
+                            v,
+                            ins_addr=call_stmt.ins_addr,
+                        )
+                    return None, v
             return None, Expr.Load(
                 self._atom_idx(),

angr/analyses/decompiler/clinic.py CHANGED Viewed

@@ -154,6 +154,9 @@ class Clinic(Analysis):
         self._mode = mode
         self.vvar_id_start = vvar_id_start
         self.vvar_to_vvar: dict[int, int] | None = None
+        # during SSA conversion, we create secondary stack variables because they overlap and are larger than the
+        # actual stack variables. these secondary stack variables can be safely eliminated if not used by anything.
+        self.secondary_stackvars: set[int] = set()
         # inlining help
         self._sp_shift = sp_shift
@@ -1239,6 +1242,7 @@ class Clinic(Analysis):
             rewrite_ccalls=rewrite_ccalls,
             removed_vvar_ids=removed_vvar_ids,
             arg_vvars=arg_vvars,
+            secondary_stackvars=self.secondary_stackvars,
         )
         # cache the simplifier's RDA analysis
         self.reaching_definitions = simp._reaching_definitions
@@ -1364,6 +1368,7 @@ class Clinic(Analysis):
             vvar_id_start=self.vvar_id_start,
         )
         self.vvar_id_start = ssailification.max_vvar_id + 1
+        self.secondary_stackvars = ssailification.secondary_stackvars
         return ssailification.out_graph
     @timethis
@@ -1864,6 +1869,11 @@ class Clinic(Analysis):
             if expr.guard:
                 self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, expr.guard)
+        elif isinstance(expr, ailment.Expr.Phi):
+            for _, vvar in expr.src_and_vvars:
+                if vvar is not None:
+                    self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, vvar)
     def _function_graph_to_ail_graph(self, func_graph, blocks_by_addr_and_size=None):
         if blocks_by_addr_and_size is None:
             blocks_by_addr_and_size = self._blocks_by_addr_and_size

angr/analyses/decompiler/decompiler.py CHANGED Viewed

@@ -500,6 +500,7 @@ class Decompiler(Analysis):
                 scratch=self._optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
                 complete_successors=self._complete_successors,
+                peephole_optimizations=self._peephole_optimizations,
                 **kwargs,
             )

angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py CHANGED Viewed

@@ -950,7 +950,9 @@ class DuplicationReverter(StructuringOptimizationPass):
     #
     def _share_subregion(self, blocks: list[Block]) -> bool:
-        return any(all(block.addr in region for block in blocks) for region in self._ri.regions_by_block_addrs)
+        return any(
+            all((block.addr, block.idx) in region for block in blocks) for region in self._ri.regions_by_block_addrs
+        )
     def _is_valid_candidate(self, b0, b1):
         # blocks must have statements