PyPI - angr - Versions diffs - 9.2.138__py3-none-manylinux2014_x86_64.whl → 9.2.140__py3-none-manylinux2014_x86_64.whl - Mend

angr 9.2.138__py3-none-manylinux2014_x86_64.whl → 9.2.140__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (100) hide show

angr/__init__.py +1 -1
angr/analyses/calling_convention/calling_convention.py +48 -21
angr/analyses/calling_convention/fact_collector.py +59 -12
angr/analyses/calling_convention/utils.py +2 -2
angr/analyses/cfg/cfg_base.py +13 -0
angr/analyses/cfg/cfg_fast.py +23 -4
angr/analyses/decompiler/ail_simplifier.py +79 -53
angr/analyses/decompiler/block_simplifier.py +0 -2
angr/analyses/decompiler/callsite_maker.py +80 -14
angr/analyses/decompiler/clinic.py +99 -80
angr/analyses/decompiler/condition_processor.py +2 -2
angr/analyses/decompiler/decompiler.py +19 -7
angr/analyses/decompiler/dephication/rewriting_engine.py +16 -7
angr/analyses/decompiler/expression_narrower.py +1 -1
angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
angr/analyses/decompiler/optimization_passes/condition_constprop.py +149 -0
angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
angr/analyses/decompiler/optimization_passes/deadblock_remover.py +12 -3
angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +1 -1
angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
angr/analyses/decompiler/optimization_passes/optimization_pass.py +21 -12
angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +17 -9
angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +7 -10
angr/analyses/decompiler/peephole_optimizations/eager_eval.py +12 -1
angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +61 -25
angr/analyses/decompiler/peephole_optimizations/remove_redundant_shifts.py +50 -1
angr/analyses/decompiler/presets/fast.py +2 -0
angr/analyses/decompiler/presets/full.py +2 -0
angr/analyses/decompiler/region_simplifiers/expr_folding.py +259 -108
angr/analyses/decompiler/region_simplifiers/region_simplifier.py +28 -9
angr/analyses/decompiler/ssailification/rewriting_engine.py +20 -2
angr/analyses/decompiler/ssailification/traversal_engine.py +4 -3
angr/analyses/decompiler/structured_codegen/c.py +10 -3
angr/analyses/decompiler/structuring/dream.py +28 -19
angr/analyses/decompiler/structuring/phoenix.py +253 -89
angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
angr/analyses/decompiler/structuring/structurer_base.py +121 -46
angr/analyses/decompiler/structuring/structurer_nodes.py +6 -1
angr/analyses/decompiler/utils.py +60 -1
angr/analyses/deobfuscator/api_obf_finder.py +13 -5
angr/analyses/deobfuscator/api_obf_type2_finder.py +166 -0
angr/analyses/deobfuscator/string_obf_finder.py +105 -18
angr/analyses/forward_analysis/forward_analysis.py +1 -1
angr/analyses/propagator/top_checker_mixin.py +6 -6
angr/analyses/reaching_definitions/__init__.py +2 -1
angr/analyses/reaching_definitions/dep_graph.py +1 -12
angr/analyses/reaching_definitions/engine_vex.py +36 -31
angr/analyses/reaching_definitions/function_handler.py +15 -2
angr/analyses/reaching_definitions/rd_state.py +1 -37
angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
angr/analyses/s_propagator.py +129 -87
angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
angr/analyses/s_reaching_definitions/s_rda_view.py +2 -2
angr/analyses/s_reaching_definitions/s_reaching_definitions.py +3 -1
angr/analyses/stack_pointer_tracker.py +36 -22
angr/analyses/typehoon/simple_solver.py +45 -7
angr/analyses/typehoon/typeconsts.py +18 -5
angr/analyses/variable_recovery/engine_ail.py +1 -1
angr/analyses/variable_recovery/engine_base.py +62 -67
angr/analyses/variable_recovery/engine_vex.py +1 -1
angr/analyses/variable_recovery/irsb_scanner.py +2 -2
angr/block.py +69 -107
angr/callable.py +14 -7
angr/calling_conventions.py +81 -10
angr/distributed/__init__.py +1 -1
angr/engines/__init__.py +7 -8
angr/engines/engine.py +3 -138
angr/engines/failure.py +2 -2
angr/engines/hook.py +2 -2
angr/engines/light/engine.py +5 -10
angr/engines/pcode/emulate.py +2 -2
angr/engines/pcode/engine.py +2 -14
angr/engines/pcode/lifter.py +2 -2
angr/engines/procedure.py +2 -2
angr/engines/soot/engine.py +2 -2
angr/engines/soot/statements/switch.py +1 -1
angr/engines/successors.py +123 -17
angr/engines/syscall.py +2 -2
angr/engines/unicorn.py +3 -3
angr/engines/vex/heavy/heavy.py +3 -15
angr/engines/vex/lifter.py +2 -2
angr/engines/vex/light/light.py +2 -2
angr/factory.py +4 -19
angr/knowledge_plugins/cfg/cfg_model.py +3 -2
angr/knowledge_plugins/key_definitions/atoms.py +8 -4
angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
angr/knowledge_plugins/labels.py +2 -2
angr/knowledge_plugins/obfuscations.py +1 -0
angr/knowledge_plugins/xrefs/xref_manager.py +4 -0
angr/sim_type.py +19 -17
angr/state_plugins/plugin.py +19 -4
angr/storage/memory_mixins/memory_mixin.py +1 -1
angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
angr/utils/ssa/__init__.py +119 -4
{angr-9.2.138.dist-info → angr-9.2.140.dist-info}/METADATA +6 -6
{angr-9.2.138.dist-info → angr-9.2.140.dist-info}/RECORD +100 -98
{angr-9.2.138.dist-info → angr-9.2.140.dist-info}/LICENSE +0 -0
{angr-9.2.138.dist-info → angr-9.2.140.dist-info}/WHEEL +0 -0
{angr-9.2.138.dist-info → angr-9.2.140.dist-info}/entry_points.txt +0 -0
{angr-9.2.138.dist-info → angr-9.2.140.dist-info}/top_level.txt +0 -0

angr/analyses/s_propagator.py CHANGED Viewed

@@ -16,7 +16,7 @@ from ailment.expression import (
     Convert,
     Expression,
 )
-from ailment.statement import Assignment, Store, Return, Jump
+from ailment.statement import Assignment, Store, Return, Jump, ConditionalJump
 from angr.knowledge_plugins.functions import Function
 from angr.code_location import CodeLocation, ExternalCodeLocation
@@ -35,6 +35,7 @@ from angr.utils.ssa import (
     get_tmp_uselocs,
     get_tmp_deflocs,
     phi_assignment_get_src,
+    has_store_stmt_in_between_stmts,
 )
@@ -45,6 +46,8 @@ class SPropagatorModel:
     def __init__(self):
         self.replacements: Mapping[CodeLocation, Mapping[Expression, Expression]] = {}
+        # store vvars that are definitely dead (but usually not removed by default because they are stack variables)
+        self.dead_vvar_ids: set[int] = set()
 class SPropagatorAnalysis(Analysis):
@@ -90,6 +93,7 @@ class SPropagatorAnalysis(Analysis):
             bp_as_gpr = the_func.info.get("bp_as_gpr", False)
         self._bp_as_gpr = bp_as_gpr
+        # output
         self.model = SPropagatorModel()
         self._analyze()
@@ -98,6 +102,10 @@ class SPropagatorAnalysis(Analysis):
     def replacements(self):
         return self.model.replacements
+    @property
+    def dead_vvar_ids(self):
+        return self.model.dead_vvar_ids
     def _analyze(self):
         blocks: dict[tuple[int, int | None], Block]
         match self.mode:
@@ -132,7 +140,7 @@ class SPropagatorAnalysis(Analysis):
         replacements = defaultdict(dict)
-        # find constant assignments
+        # find constant and other propagatable assignments
         vvarid_to_vvar = {}
         const_vvars: dict[int, Const] = {}
         for vvar, defloc in vvar_deflocs.items():
@@ -140,7 +148,6 @@ class SPropagatorAnalysis(Analysis):
                 continue
             vvarid_to_vvar[vvar.varid] = vvar
-            defloc = vvar_deflocs[vvar]
             if isinstance(defloc, ExternalCodeLocation):
                 continue
@@ -178,8 +185,29 @@ class SPropagatorAnalysis(Analysis):
                         for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
                             replacements[useloc][vvar_at_use] = const_value
-            if self.mode == "function" and vvar.varid in vvar_uselocs:
-                if len(vvar_uselocs[vvar.varid]) <= 2 and isinstance(stmt, Assignment) and isinstance(stmt.src, Load):
+        # function mode only
+        if self.mode == "function":
+            assert self.func_graph is not None
+            for vvar, defloc in vvar_deflocs.items():
+                if vvar.varid not in vvar_uselocs:
+                    continue
+                if vvar.varid in const_vvars:
+                    continue
+                if isinstance(defloc, ExternalCodeLocation):
+                    continue
+                assert defloc.block_addr is not None
+                assert defloc.stmt_idx is not None
+                block = blocks[(defloc.block_addr, defloc.block_idx)]
+                stmt = block.statements[defloc.stmt_idx]
+                if (
+                    (vvar.was_reg or vvar.was_parameter)
+                    and len(vvar_uselocs[vvar.varid]) <= 2
+                    and isinstance(stmt, Assignment)
+                    and isinstance(stmt.src, Load)
+                ):
                     # do we want to propagate this Load expression if it's used for less than twice?
                     # it's often seen in the following pattern, where propagation will be beneficial:
                     #    v0 = Load(...)
@@ -188,7 +216,7 @@ class SPropagatorAnalysis(Analysis):
                     #    }
                     can_replace = True
                     for _, vvar_useloc in vvar_uselocs[vvar.varid]:
-                        if self.has_store_stmt_in_between(blocks, defloc, vvar_useloc):
+                        if has_store_stmt_in_between_stmts(self.func_graph, blocks, defloc, vvar_useloc):
                             can_replace = False
                     if can_replace:
@@ -197,63 +225,80 @@ class SPropagatorAnalysis(Analysis):
                             replacements[vvar_useloc][vvar_used] = stmt.src
                         continue
-                if len(vvar_uselocs[vvar.varid]) == 1:
-                    vvar_used, vvar_useloc = next(iter(vvar_uselocs[vvar.varid]))
-                    if is_const_vvar_load_assignment(stmt) and not self.has_store_stmt_in_between(
-                        blocks, defloc, vvar_useloc
-                    ):
-                        # we can propagate this load because there is no store between its def and use
-                        replacements[vvar_useloc][vvar_used] = stmt.src
+                if (
+                    (vvar.was_reg or vvar.was_stack)
+                    and len(vvar_uselocs[vvar.varid]) == 2
+                    and not is_phi_assignment(stmt)
+                ):
+                    # a special case: in a typical switch-case construct, a variable may be used once for comparison
+                    # for the default case and then used again for constructing the jump target. we can propagate this
+                    # variable for such cases.
+                    uselocs = {loc for _, loc in vvar_uselocs[vvar.varid]}
+                    if self.is_vvar_used_for_addr_loading_switch_case(uselocs, blocks):
+                        for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
+                            replacements[vvar_useloc][vvar_used] = stmt.src
+                        # mark the vvar as dead and should be removed
+                        self.model.dead_vvar_ids.add(vvar.varid)
                         continue
-                    if is_const_and_vvar_assignment(stmt):
-                        # if the useloc is a phi assignment statement, ensure that stmt.src is the same as the phi
-                        # variable
-                        assert vvar_useloc.block_addr is not None
-                        assert vvar_useloc.stmt_idx is not None
-                        useloc_stmt = blocks[(vvar_useloc.block_addr, vvar_useloc.block_idx)].statements[
-                            vvar_useloc.stmt_idx
-                        ]
-                        if is_phi_assignment(useloc_stmt):
-                            if (
-                                isinstance(stmt.src, VirtualVariable)
-                                and stmt.src.oident == useloc_stmt.dst.oident
-                                and stmt.src.category == useloc_stmt.dst.category
-                            ):
-                                replacements[vvar_useloc][vvar_used] = stmt.src
-                        else:
+                if vvar.was_reg or vvar.was_parameter:
+                    if len(vvar_uselocs[vvar.varid]) == 1:
+                        vvar_used, vvar_useloc = next(iter(vvar_uselocs[vvar.varid]))
+                        if is_const_vvar_load_assignment(stmt) and not has_store_stmt_in_between_stmts(
+                            self.func_graph, blocks, defloc, vvar_useloc
+                        ):
+                            # we can propagate this load because there is no store between its def and use
                             replacements[vvar_useloc][vvar_used] = stmt.src
-                        continue
+                            continue
-                else:
-                    non_exitsite_uselocs = [
-                        loc
-                        for _, loc in vvar_uselocs[vvar.varid]
-                        if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
-                    ]
-                    if is_const_and_vvar_assignment(stmt):
-                        if len(non_exitsite_uselocs) == 1:
-                            # this vvar is used once if we exclude its uses at ret sites or jump sites. we can
-                            # propagate it
-                            for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
+                        if is_const_and_vvar_assignment(stmt):
+                            # if the useloc is a phi assignment statement, ensure that stmt.src is the same as the phi
+                            # variable
+                            assert vvar_useloc.block_addr is not None
+                            assert vvar_useloc.stmt_idx is not None
+                            useloc_stmt = blocks[(vvar_useloc.block_addr, vvar_useloc.block_idx)].statements[
+                                vvar_useloc.stmt_idx
+                            ]
+                            if is_phi_assignment(useloc_stmt):
+                                if (
+                                    isinstance(stmt.src, VirtualVariable)
+                                    and stmt.src.oident == useloc_stmt.dst.oident
+                                    and stmt.src.category == useloc_stmt.dst.category
+                                ):
+                                    replacements[vvar_useloc][vvar_used] = stmt.src
+                            else:
                                 replacements[vvar_useloc][vvar_used] = stmt.src
                             continue
-                        if len(set(non_exitsite_uselocs)) == 1 and not has_ite_expr(stmt.src):
-                            useloc = non_exitsite_uselocs[0]
-                            assert useloc.block_addr is not None
-                            assert useloc.stmt_idx is not None
-                            useloc_stmt = blocks[(useloc.block_addr, useloc.block_idx)].statements[useloc.stmt_idx]
-                            if stmt.src.depth <= 3 and not has_ite_stmt(useloc_stmt):
-                                # remove duplicate use locs (e.g., if the variable is used multiple times by the same
-                                # statement) - but ensure stmt is simple enough
+                    else:
+                        non_exitsite_uselocs = [
+                            loc
+                            for _, loc in vvar_uselocs[vvar.varid]
+                            if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
+                        ]
+                        if is_const_and_vvar_assignment(stmt):
+                            if len(non_exitsite_uselocs) == 1:
+                                # this vvar is used once if we exclude its uses at ret sites or jump sites. we can
+                                # propagate it
                                 for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
                                     replacements[vvar_useloc][vvar_used] = stmt.src
                                 continue
+                            if len(set(non_exitsite_uselocs)) == 1 and not has_ite_expr(stmt.src):
+                                useloc = non_exitsite_uselocs[0]
+                                assert useloc.block_addr is not None
+                                assert useloc.stmt_idx is not None
+                                useloc_stmt = blocks[(useloc.block_addr, useloc.block_idx)].statements[useloc.stmt_idx]
+                                if stmt.src.depth <= 3 and not has_ite_stmt(useloc_stmt):
+                                    # remove duplicate use locs (e.g., if the variable is used multiple times by the
+                                    # same statement) - but ensure stmt is simple enough
+                                    for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
+                                        replacements[vvar_useloc][vvar_used] = stmt.src
+                                    continue
                 # special logic for global variables: if it's used once or multiple times, and the variable is never
                 # updated before it's used, we will propagate the load
-                if isinstance(stmt, Assignment):
+                if (vvar.was_reg or vvar.was_parameter) and isinstance(stmt, Assignment):
                     stmt_src = stmt.src
                     # unpack conversions
                     while isinstance(stmt_src, Convert):
@@ -420,43 +465,40 @@ class SPropagatorAnalysis(Analysis):
         return False
-    def has_store_stmt_in_between(
-        self, blocks: dict[tuple[int, int | None], Block], defloc: CodeLocation, useloc: CodeLocation
-    ) -> bool:
-        assert defloc.block_addr is not None
-        assert defloc.stmt_idx is not None
-        assert useloc.block_addr is not None
-        assert useloc.stmt_idx is not None
-        assert self.func_graph is not None
-        use_block = blocks[(useloc.block_addr, useloc.block_idx)]
-        def_block = blocks[(defloc.block_addr, defloc.block_idx)]
-        # traverse the graph, go from use_block until we reach def_block, and look for Store statements
-        seen = {use_block}
-        queue = [use_block]
-        while queue:
-            block = queue.pop(0)
-            starting_stmt_idx, ending_stmt_idx = 0, len(block.statements)
-            if block is def_block:
-                starting_stmt_idx = defloc.stmt_idx + 1
-            if block is use_block:
-                ending_stmt_idx = useloc.stmt_idx + 1
-            for i in range(starting_stmt_idx, ending_stmt_idx):
-                if isinstance(block.statements[i], Store):
-                    return True
-            if block is def_block:
-                continue
-            for pred in self.func_graph.predecessors(block):
-                if pred not in seen:
-                    seen.add(pred)
-                    queue.append(pred)
-        return False
+    @staticmethod
+    def is_vvar_used_for_addr_loading_switch_case(uselocs: set[CodeLocation], blocks) -> bool:
+        """
+        Check if a virtual variable is used for loading an address in a switch-case construct.
+        :param uselocs: The use locations of the virtual variable.
+        :param blocks:  All blocks of the current function.
+        :return:        True if the virtual variable is used for loading an address in a switch-case construct, False
+                        otherwise.
+        """
+        if len(uselocs) != 2:
+            return False
+        useloc_0, useloc_1 = list(uselocs)
+        block_0 = blocks[(useloc_0.block_addr, useloc_0.block_idx)]
+        stmt_0 = block_0.statements[useloc_0.stmt_idx]
+        block_1 = blocks[(useloc_1.block_addr, useloc_1.block_idx)]
+        stmt_1 = block_1.statements[useloc_1.stmt_idx]
+        if isinstance(stmt_0, Jump):
+            stmt_0, stmt_1 = stmt_1, stmt_0
+            block_0, block_1 = block_1, block_0
+        if not isinstance(stmt_0, ConditionalJump) or not isinstance(stmt_1, Jump):
+            return False
+        # check if stmt_0 jumps to block_1
+        if not isinstance(stmt_0.true_target, Const) or not isinstance(stmt_0.false_target, Const):
+            return False
+        stmt_0_targets = {
+            (stmt_0.true_target.value, stmt_0.true_target_idx),
+            (stmt_0.false_target.value, stmt_0.false_target_idx),
+        }
+        return (block_1.addr, block_1.idx) in stmt_0_targets
 register_analysis(SPropagatorAnalysis, "SPropagator")

angr/analyses/s_reaching_definitions/s_rda_model.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 from collections import defaultdict
 from collections.abc import Generator
-from typing import Any
+from typing import Any, Literal, overload
 from ailment.expression import VirtualVariable, Tmp
@@ -48,6 +48,12 @@ class SRDAModel:
             s.add(Definition(tmp_atom, CodeLocation(block_loc.block_addr, stmt_idx, block_idx=block_loc.block_idx)))
         return s
+    @overload
+    def get_uses_by_location(self, loc: CodeLocation, exprs: Literal[True]) -> set[tuple[Definition, Any | None]]: ...
+    @overload
+    def get_uses_by_location(self, loc: CodeLocation, exprs: Literal[False] = ...) -> set[Definition]: ...
     def get_uses_by_location(
         self, loc: CodeLocation, exprs: bool = False
     ) -> set[Definition] | set[tuple[Definition, Any | None]]:

angr/analyses/s_reaching_definitions/s_rda_view.py CHANGED Viewed

@@ -79,8 +79,8 @@ class StackVVarPredicate:
             isinstance(stmt, Assignment)
             and isinstance(stmt.dst, VirtualVariable)
             and stmt.dst.was_stack
-            and stmt.dst.stack_offset == self.stack_offset
-            and stmt.dst.size == self.size
+            and stmt.dst.stack_offset <= self.stack_offset < stmt.dst.stack_offset + stmt.dst.size
+            and stmt.dst.stack_offset <= self.stack_offset + self.size <= stmt.dst.stack_offset + stmt.dst.size
         ):
             self.vvars.add(stmt.dst)
             return True

angr/analyses/s_reaching_definitions/s_reaching_definitions.py CHANGED Viewed

@@ -143,7 +143,9 @@ class SReachingDefinitionsAnalysis(Analysis):
                         cc = cc_cls(self.project.arch)
                     codeloc = CodeLocation(block_addr, stmt_idx, block_idx=block_idx, ins_addr=stmt.ins_addr)
-                    arg_locs = cc.ARG_REGS
+                    arg_locs = list(cc.ARG_REGS)
+                    if cc.FP_ARG_REGS:
+                        arg_locs += [r_name for r_name in cc.FP_ARG_REGS if r_name not in arg_locs]
                     for arg_reg_name in arg_locs:
                         reg_offset = self.project.arch.registers[arg_reg_name][0]

angr/analyses/stack_pointer_tracker.py CHANGED Viewed

@@ -22,6 +22,7 @@ try:
     from angr.engines import pcode
 except ImportError:
     pypcode = None
+    pcode = None
 if TYPE_CHECKING:
     from angr.block import Block
@@ -93,6 +94,11 @@ class Register:
             return self.offset == other.offset
         return False
+    def __add__(self, other) -> OffsetVal:
+        if type(other) is Constant:
+            return OffsetVal(self, other.val)
+        raise CouldNotResolveException
     def __repr__(self):
         return str(self.offset)
@@ -232,6 +238,7 @@ class StackPointerTrackerState:
     def give_up_on_memory_tracking(self):
         self.memory = {}
         self.is_tracking_memory = False
+        return self
     def store(self, addr, val):
         # strong update
@@ -370,7 +377,8 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
         self._mem_merge_cache = {}
         if initial_reg_values:
-            self._reg_value_at_block_start[func.addr if func is not None else block.addr] = initial_reg_values
+            block_start_addr = func.addr if func is not None else block.addr  # type: ignore
+            self._reg_value_at_block_start[block_start_addr] = initial_reg_values
         _l.debug("Running on function %r", self._func)
         self._analyze()
@@ -461,9 +469,13 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
         return any(self.inconsistent_for(r) for r in self.reg_offsets)
     def inconsistent_for(self, reg):
+        if self._func is None:
+            raise ValueError("inconsistent_for() is only supported in function mode")
         return any(self.offset_after_block(endpoint.addr, reg) is TOP for endpoint in self._func.endpoints)
     def offsets_for(self, reg):
+        if self._func is None:
+            raise ValueError("offsets_for() is only supported in function mode")
         return [
             o for block in self._func.blocks if (o := self.offset_after_block(block.addr, reg)) not in (TOP, BOTTOM)
         ]
@@ -481,7 +493,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
     def _post_analysis(self):
         pass
-    def _get_register(self, offset):
+    def _get_register(self, offset) -> Register:
         name = self.project.arch.register_names[offset]
         size = self.project.arch.registers[name][1]
         return Register(offset, size * self.project.arch.byte_width)
@@ -557,7 +569,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
         output_state = state.freeze()
         return None, output_state
-    def _process_vex_irsb(self, node, vex_block: pyvex.IRSB, state: StackPointerTrackerState) -> int:
+    def _process_vex_irsb(self, node, vex_block: pyvex.IRSB, state: StackPointerTrackerState) -> int | None:
         tmps = {}
         curr_stmt_start_addr = None
@@ -704,21 +716,16 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
             if callees:
                 if len(callees) == 1:
                     callee = callees[0]
+                    track_rax = False
+                    if (
+                        (callee.info.get("is_rust_probestack", False) and self.project.arch.name == "AMD64")
+                        or (callee.info.get("is_alloca_probe", False) and self.project.arch.name == "AMD64")
+                        or callee.name == "__chkstk"
+                    ):
+                        # sp = sp - rax right after returning from the call
+                        track_rax = True
-                    if callee.info.get("is_rust_probestack", False) is True and self.project.arch.name == "AMD64":
-                        # special-case for rust_probestack: sp = sp - rax right after returning from the call, so we
-                        # need to keep track of rax
-                        for stmt in reversed(vex_block.statements):
-                            if (
-                                isinstance(stmt, pyvex.IRStmt.Put)
-                                and stmt.offset == self.project.arch.registers["rax"][0]
-                                and isinstance(stmt.data, pyvex.IRExpr.Const)
-                            ):
-                                state.put(stmt.offset, Constant(stmt.data.con.value), force=True)
-                                break
-                    elif callee.name == "__chkstk":
-                        # special-case for __chkstk: sp = sp - rax right after returning from the call, so we need to
-                        # keep track of rax
+                    if track_rax:
                         for stmt in reversed(vex_block.statements):
                             if (
                                 isinstance(stmt, pyvex.IRStmt.Put)
@@ -737,18 +744,20 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
                     # found callee clean-up cases...
                     try:
                         v = state.get(self.project.arch.sp_offset)
+                        incremented = None
                         if v is BOTTOM:
                             incremented = BOTTOM
                         elif callee_cleanups[0].prototype is not None:
                             num_args = len(callee_cleanups[0].prototype.args)
                             incremented = v + Constant(self.project.arch.bytes * num_args)
-                        state.put(self.project.arch.sp_offset, incremented)
+                        if incremented is not None:
+                            state.put(self.project.arch.sp_offset, incremented)
                     except CouldNotResolveException:
                         pass
         return curr_stmt_start_addr
-    def _process_pcode_irsb(self, node, pcode_irsb: pcode.lifter.IRSB, state: StackPointerTrackerState) -> int:
+    def _process_pcode_irsb(self, node, pcode_irsb: pcode.lifter.IRSB, state: StackPointerTrackerState) -> int | None:
         unique = {}
         curr_stmt_start_addr = None
@@ -830,18 +839,20 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
                     # found callee clean-up cases...
                     try:
                         v = state.get(self.project.arch.sp_offset)
+                        incremented = None
                         if v is BOTTOM:
                             incremented = BOTTOM
                         elif callee_cleanups[0].prototype is not None:
                             num_args = len(callee_cleanups[0].prototype.args)
                             incremented = v + Constant(self.project.arch.bytes * num_args)
-                        state.put(self.project.arch.sp_offset, incremented)
+                        if incremented is not None:
+                            state.put(self.project.arch.sp_offset, incremented)
                     except CouldNotResolveException:
                         pass
         return curr_stmt_start_addr
-    def _widen_states(self, *states):
+    def _widen_states(self, *states: FrozenStackPointerTrackerState):
         assert len(states) == 2
         merged, _ = self._merge_states(None, *states)
         if len(merged.memory) > 5:
@@ -849,13 +860,16 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
             merged = merged.unfreeze().give_up_on_memory_tracking().freeze()
         return merged
-    def _merge_states(self, node, *states: StackPointerTrackerState):
+    def _merge_states(self, node, *states: FrozenStackPointerTrackerState):
         merged_state = states[0]
         for other in states[1:]:
             merged_state = merged_state.merge(other, node.addr, self._reg_merge_cache, self._mem_merge_cache)
         return merged_state, merged_state == states[0]
     def _find_callees(self, node) -> list[Function]:
+        if self._func is None:
+            raise ValueError("find_callees() is only supported in function mode")
         callees: list[Function] = []
         for _, dst, data in self._func.transition_graph.out_edges(node, data=True):
             if data.get("type") == "call" and isinstance(dst, Function):

angr/analyses/typehoon/simple_solver.py CHANGED Viewed

@@ -5,6 +5,7 @@ from collections import defaultdict
 import logging
 import networkx
+from sortedcontainers import SortedDict
 from angr.utils.constants import MAX_POINTSTO_BITS
 from .typevars import (
@@ -1165,25 +1166,45 @@ class SimpleSolver:
             # this might be a struct
             fields = {}
-            candidate_bases = defaultdict(set)
+            candidate_bases = SortedDict()
             for labels, _succ in path_and_successors:
                 last_label = labels[-1] if labels else None
                 if isinstance(last_label, HasField):
                     # TODO: Really determine the maximum possible size of the field when MAX_POINTSTO_BITS is in use
+                    if last_label.offset not in candidate_bases:
+                        candidate_bases[last_label.offset] = set()
                     candidate_bases[last_label.offset].add(
                         1 if last_label.bits == MAX_POINTSTO_BITS else (last_label.bits // 8)
                     )
+            # determine possible bases and map each offset to its base
+            offset_to_base = SortedDict()
+            for start_offset, sizes in candidate_bases.items():
+                for size in sizes:
+                    for i in range(size):
+                        access_off = start_offset + i
+                        if access_off not in offset_to_base:
+                            offset_to_base[access_off] = start_offset
+            # determine again the maximum size of each field (at each offset)
+            offset_to_maxsize = defaultdict(int)
+            offset_to_sizes = defaultdict(set)  # we do not consider offsets to each base offset
+            for labels, _succ in path_and_successors:
+                last_label = labels[-1] if labels else None
+                if isinstance(last_label, HasField):
+                    base = offset_to_base[last_label.offset]
+                    access_size = 1 if last_label.bits == MAX_POINTSTO_BITS else (last_label.bits // 8)
+                    offset_to_maxsize[base] = max(offset_to_maxsize[base], (last_label.offset - base) + access_size)
+                    offset_to_sizes[base].add(access_size)
             node_to_base = {}
             for labels, succ in path_and_successors:
                 last_label = labels[-1] if labels else None
                 if isinstance(last_label, HasField):
-                    for start_offset, sizes in candidate_bases.items():
-                        for size in sizes:
-                            if last_label.offset > start_offset and last_label.offset < start_offset + size:  # ???
-                                node_to_base[succ] = start_offset
+                    prev_offset = next(offset_to_base.irange(maximum=last_label.offset, reverse=True))
+                    node_to_base[succ] = offset_to_base[prev_offset]
             node_by_offset = defaultdict(set)
@@ -1195,16 +1216,33 @@ class SimpleSolver:
                     else:
                         node_by_offset[last_label.offset].add(succ)
-            for offset, child_nodes in node_by_offset.items():
+            sorted_offsets: list[int] = sorted(node_by_offset)
+            for i in range(len(sorted_offsets)):  # pylint:disable=consider-using-enumerate
+                offset = sorted_offsets[i]
+                child_nodes = node_by_offset[offset]
                 sol = self._determine(equivalent_classes, the_typevar, sketch, solution, nodes=child_nodes)
                 if isinstance(sol, TopType):
-                    sol = int_type(min(candidate_bases[offset]) * 8)
+                    # make it an array if possible
+                    elem_size = min(offset_to_sizes[offset])
+                    array_size = offset_to_maxsize[offset]
+                    if array_size % elem_size != 0:
+                        # fall back to byte_t
+                        elem_size = 1
+                    elem_type = int_type(elem_size * 8)
+                    sol = elem_type if array_size == elem_size else Array(elem_type, array_size // elem_size)
                 fields[offset] = sol
             if not fields:
                 result = Top_
                 for node in nodes:
                     self._solution_cache[node.typevar] = result
+                    solution[node.typevar] = result
+            elif any(off < 0 for off in fields):
+                result = self._pointer_class()(Bottom_)
+                for node in nodes:
+                    self._solution_cache[node.typevar] = result
+                    solution[node.typevar] = result
             else:
                 # back-patch
                 struct_type.fields = fields