PyPI - angr - Versions diffs - 9.2.166__cp310-abi3-macosx_10_12_x86_64.whl → 9.2.168__cp310-abi3-macosx_10_12_x86_64.whl - Mend

angr 9.2.166__cp310-abi3-macosx_10_12_x86_64.whl → 9.2.168__cp310-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (36) hide show

angr/__init__.py +1 -1
angr/analyses/cfg/cfb.py +7 -7
angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -8
angr/analyses/decompiler/clinic.py +8 -0
angr/analyses/decompiler/condition_processor.py +44 -1
angr/analyses/decompiler/decompilation_cache.py +2 -0
angr/analyses/decompiler/decompilation_options.py +10 -0
angr/analyses/decompiler/decompiler.py +26 -2
angr/analyses/decompiler/node_replacer.py +42 -0
angr/analyses/decompiler/notes/__init__.py +9 -0
angr/analyses/decompiler/notes/decompilation_note.py +48 -0
angr/analyses/decompiler/notes/deobfuscated_strings.py +56 -0
angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -0
angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +5 -76
angr/analyses/decompiler/region_identifier.py +12 -3
angr/analyses/decompiler/sequence_walker.py +11 -7
angr/analyses/decompiler/structured_codegen/base.py +34 -1
angr/analyses/decompiler/structured_codegen/c.py +44 -10
angr/analyses/decompiler/structuring/phoenix.py +645 -305
angr/analyses/decompiler/structuring/structurer_base.py +75 -1
angr/analyses/decompiler/utils.py +71 -28
angr/analyses/deobfuscator/string_obf_finder.py +19 -16
angr/analyses/deobfuscator/string_obf_opt_passes.py +6 -3
angr/analyses/reaching_definitions/engine_vex.py +3 -2
angr/procedures/glibc/scanf.py +8 -0
angr/procedures/glibc/sscanf.py +4 -0
angr/rustylib.abi3.so +0 -0
angr/unicornlib.dylib +0 -0
angr/utils/graph.py +62 -24
{angr-9.2.166.dist-info → angr-9.2.168.dist-info}/METADATA +5 -5
{angr-9.2.166.dist-info → angr-9.2.168.dist-info}/RECORD +36 -32
{angr-9.2.166.dist-info → angr-9.2.168.dist-info}/WHEEL +0 -0
{angr-9.2.166.dist-info → angr-9.2.168.dist-info}/entry_points.txt +0 -0
{angr-9.2.166.dist-info → angr-9.2.168.dist-info}/licenses/LICENSE +0 -0
{angr-9.2.166.dist-info → angr-9.2.168.dist-info}/top_level.txt +0 -0

angr/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # pylint: disable=wrong-import-position
 from __future__ import annotations
-__version__ = "9.2.166"
+__version__ = "9.2.168"
 if bytes is str:
     raise Exception(

angr/analyses/cfg/cfb.py CHANGED Viewed

@@ -119,19 +119,19 @@ class CFBlanket(Analysis):
     def _init_regions(self):
         for obj in self.project.loader.all_objects:
             if isinstance(obj, cle.MetaELF):
-                if obj.sections:
+                if obj.segments:
+                    if "segment" not in self._exclude_region_types:
+                        for segment in obj.segments:
+                            if segment.memsize > 0:
+                                mr = MemoryRegion(segment.vaddr, segment.memsize, "segment", obj, segment)
+                                self._regions.append(mr)
+                elif obj.sections:
                     if "section" not in self._exclude_region_types:
                         # Enumerate sections in an ELF file
                         for section in obj.sections:
                             if section.occupies_memory:
                                 mr = MemoryRegion(section.vaddr, section.memsize, "section", obj, section)
                                 self._regions.append(mr)
-                elif obj.segments:
-                    if "segment" not in self._exclude_region_types:
-                        for segment in obj.segments:
-                            if segment.memsize > 0:
-                                mr = MemoryRegion(segment.vaddr, segment.memsize, "segment", obj, segment)
-                                self._regions.append(mr)
                 else:
                     raise NotImplementedError(
                         "Currently ELFs without sections or segments are not supported. Please "

angr/analyses/cfg/indirect_jump_resolvers/jumptable.py CHANGED Viewed

@@ -920,32 +920,32 @@ class JumpTableResolver(IndirectJumpResolver):
         # more sanity checks
         # for a typical jump table, the current block has only one predecessor, and the predecessor to the current
-        # block has two successors (not including itself)
+        # block has two successors
         # for a typical vtable call (or jump if at the end of a function), the block as two predecessors that form a
         # diamond shape
         curr_node = func.get_node(addr)
-        if curr_node is None or curr_node not in func.graph:
+        if curr_node is None or curr_node not in func.transition_graph:
             l.debug("Could not find the node %#x in the function transition graph", addr)
             return False, None
-        preds = list(func.graph.predecessors(curr_node))
+        preds = list(func.transition_graph.predecessors(curr_node))
         pred_endaddrs = {pred.addr + pred.size for pred in preds}  # handle non-normalized CFGs
         if func_graph_complete and not is_arm and not potential_call_table:
             # on ARM you can do a single-block jump table...
             if len(pred_endaddrs) == 1:
-                pred_succs = [succ for succ in func.graph.successors(preds[0]) if succ.addr != preds[0].addr]
+                pred_succs = [succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
                 if len(pred_succs) != 2:
                     l.debug("Expect two successors to the single predecessor, found %d.", len(pred_succs))
                     return False, None
             elif len(pred_endaddrs) == 2 and len(preds) == 2:
                 pred_succs = set(
-                    [succ for succ in func.graph.successors(preds[0]) if succ.addr != preds[0].addr]
-                    + [succ for succ in func.graph.successors(preds[1]) if succ.addr != preds[1].addr]
+                    [succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
+                    + [succ for succ in func.transition_graph.successors(preds[1]) if succ.addr != preds[1].addr]
                 )
                 is_diamond = False
                 if len(pred_succs) == 2:
                     non_node_succ = next(iter(pred_succ for pred_succ in pred_succs if pred_succ is not curr_node))
-                    while func.graph.out_degree[non_node_succ] == 1:
-                        non_node_succ = next(iter(func.graph.successors(non_node_succ)))
+                    while func.transition_graph.out_degree[non_node_succ] == 1:
+                        non_node_succ = next(iter(func.transition_graph.successors(non_node_succ)))
                         if non_node_succ == curr_node:
                             is_diamond = True
                             break

angr/analyses/decompiler/clinic.py CHANGED Viewed

@@ -55,6 +55,7 @@ from .optimization_passes import (
 if TYPE_CHECKING:
     from angr.knowledge_plugins.cfg import CFGModel
+    from .notes import DecompilationNote
     from .decompilation_cache import DecompilationCache
     from .peephole_optimizations import PeepholeOptimizationStmtBase, PeepholeOptimizationExprBase
@@ -142,12 +143,14 @@ class Clinic(Analysis):
         optimization_scratch: dict[str, Any] | None = None,
         desired_variables: set[str] | None = None,
         force_loop_single_exit: bool = True,
+        refine_loops_with_single_successor: bool = False,
         complete_successors: bool = False,
         max_type_constraints: int = 100_000,
         type_constraint_set_degradation_threshold: int = 150,
         ail_graph: networkx.DiGraph | None = None,
         arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
         start_stage: ClinicStage | None = ClinicStage.INITIALIZATION,
+        notes: dict[str, DecompilationNote] | None = None,
     ):
         if not func.normalized and mode == ClinicMode.DECOMPILE:
             raise ValueError("Decompilation must work on normalized function graphs.")
@@ -193,6 +196,8 @@ class Clinic(Analysis):
         # actual stack variables. these secondary stack variables can be safely eliminated if not used by anything.
         self.secondary_stackvars: set[int] = set()
+        self.notes = notes if notes is not None else {}
         #
         # intermediate variables used during decompilation
         #
@@ -212,6 +217,7 @@ class Clinic(Analysis):
         self._inlining_parents = inlining_parents or ()
         self._desired_variables = desired_variables
         self._force_loop_single_exit = force_loop_single_exit
+        self._refine_loops_with_single_successor = refine_loops_with_single_successor
         self._complete_successors = complete_successors
         self._register_save_areas_removed: bool = False
@@ -1550,8 +1556,10 @@ class Clinic(Analysis):
                 entry_node_addr=self.entry_node_addr,
                 scratch=self.optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
+                refine_loops_with_single_successor=self._refine_loops_with_single_successor,
                 complete_successors=self._complete_successors,
                 stack_pointer_tracker=stack_pointer_tracker,
+                notes=self.notes,
                 **kwargs,
             )
             if a.out_graph:

angr/analyses/decompiler/condition_processor.py CHANGED Viewed

@@ -239,6 +239,24 @@ class ConditionProcessor:
         condition translation if possible.
         """
+        if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
+            cond_node = src.nodes[-1]
+            if (
+                isinstance(cond_node.true_node, ailment.Block)
+                and isinstance(cond_node.false_node, ailment.Block)
+                and cond_node.true_node.statements
+                and cond_node.false_node.statements
+            ):
+                last_stmt_true = self.get_last_statement(cond_node.true_node)
+                last_stmt_false = self.get_last_statement(cond_node.false_node)
+                if (
+                    isinstance(last_stmt_true, ailment.Stmt.Jump)
+                    and isinstance(last_stmt_false, ailment.Stmt.Jump)
+                    and isinstance(last_stmt_true.target, ailment.Expr.Const)
+                    and isinstance(last_stmt_false.target, ailment.Expr.Const)
+                ):
+                    return {last_stmt_true.target.value, last_stmt_false.target.value} == {dst0.addr, dst1.addr}
         if src in graph and graph.out_degree[src] == 2 and graph.has_edge(src, dst0) and graph.has_edge(src, dst1):
             # sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
             if isinstance(src, ailment.Block) and src.statements and is_head_controlled_loop_block(src):
@@ -247,7 +265,10 @@ class ConditionProcessor:
                 )
                 assert last_stmt is not None
             else:
-                last_stmt = self.get_last_statement(src)
+                try:
+                    last_stmt = self.get_last_statement(src)
+                except EmptyBlockNotice:
+                    last_stmt = None
             if isinstance(last_stmt, ailment.Stmt.ConditionalJump):
                 return True
@@ -258,6 +279,28 @@ class ConditionProcessor:
         return claripy.is_true(claripy.Not(edge_cond_left) == edge_cond_right)  # type: ignore
     def recover_edge_condition(self, graph: networkx.DiGraph, src, dst):
+        def _check_condnode_and_get_condition(cond_node: ConditionNode) -> claripy.ast.Bool | None:
+            for cond_block, negate in [(cond_node.true_node, False), (cond_node.false_node, True)]:
+                if isinstance(cond_block, ailment.Block) and cond_block.statements:
+                    last_stmt = self.get_last_statement(cond_block)
+                    if (
+                        isinstance(last_stmt, ailment.Stmt.Jump)
+                        and isinstance(last_stmt.target, ailment.Expr.Const)
+                        and last_stmt.target.value == dst.addr
+                    ):
+                        return claripy.Not(cond_node.condition) if negate else cond_node.condition
+            return None
+        if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
+            predicate = _check_condnode_and_get_condition(src.nodes[-1])
+            if predicate is not None:
+                return predicate
+        if isinstance(src, ConditionNode):
+            predicate = _check_condnode_and_get_condition(src)
+            if predicate is not None:
+                return predicate
         edge = src, dst
         edge_data = graph.get_edge_data(*edge)
         edge_type = edge_data.get("type", "transition") if edge_data is not None else "transition"

angr/analyses/decompiler/decompilation_cache.py CHANGED Viewed

@@ -22,6 +22,7 @@ class DecompilationCache:
         "errors",
         "func_typevar",
         "ite_exprs",
+        "notes",
         "parameters",
         "type_constraints",
         "var_to_typevar",
@@ -38,6 +39,7 @@ class DecompilationCache:
         self.ite_exprs: set[tuple[int, Any]] | None = None
         self.binop_operators: dict[OpDescriptor, str] | None = None
         self.errors: list[str] = []
+        self.notes: dict[str, str] = {}
     @property
     def local_types(self):

angr/analyses/decompiler/decompilation_options.py CHANGED Viewed

@@ -239,6 +239,16 @@ options = [
         default_value=False,
         clears_cache=True,
     ),
+    O(
+        "Display decompilation notes as comments",
+        "Display decompilation notes in the outpu as function comments.",
+        bool,
+        "codegen",
+        "display_notes",
+        category="Display",
+        default_value=False,
+        clears_cache=False,
+    ),
     O(
         "Multi-expression statements generation",
         "Should the structuring algorithm generate multi-expression statements? If so, under what conditions?",

angr/analyses/decompiler/decompiler.py CHANGED Viewed

@@ -21,12 +21,13 @@ from .region_identifier import RegionIdentifier
 from .optimization_passes.optimization_pass import OptimizationPassStage
 from .ailgraph_walker import AILGraphWalker
 from .condition_processor import ConditionProcessor
-from .decompilation_options import DecompilationOption
+from .decompilation_options import DecompilationOption, PARAM_TO_OPTION
 from .decompilation_cache import DecompilationCache
 from .utils import remove_edges_in_ailgraph
 from .sequence_walker import SequenceWalker
 from .structuring.structurer_nodes import SequenceNode
 from .presets import DECOMPILATION_PRESETS, DecompilationPreset
+from .notes import DecompilationNote
 if TYPE_CHECKING:
     from angr.knowledge_plugins.cfg.cfg_model import CFGModel
@@ -80,7 +81,7 @@ class Decompiler(Analysis):
             func = self.kb.functions[func]
         self.func: Function = func
         self._cfg = cfg.model if isinstance(cfg, CFGFast) else cfg
-        self._options = options or []
+        self._options = self._parse_options(options) if options else []
         if preset is None and optimization_passes:
             self._optimization_passes = optimization_passes
@@ -145,6 +146,7 @@ class Decompiler(Analysis):
         self._copied_var_ids: set[int] = set()
         self._optimization_scratch: dict[str, Any] = {}
         self.expr_collapse_depth = expr_collapse_depth
+        self.notes: dict[str, DecompilationNote] = {}
         if decompile:
             with self._resilience():
@@ -171,6 +173,20 @@ class Decompiler(Analysis):
         id_checks = {"cfg", "variable_kb"}
         return all(a[k] is b[k] if k in id_checks else a[k] == b[k] for k in self._cache_parameters)
+    @staticmethod
+    def _parse_options(options: list[tuple[DecompilationOption | str, Any]]) -> list[tuple[DecompilationOption, Any]]:
+        """
+        Parse the options and return a list of option tuples.
+        """
+        converted_options = []
+        for o, v in options:
+            if isinstance(o, str):
+                # convert to DecompilationOption
+                o = PARAM_TO_OPTION[o]
+            converted_options.append((o, v))
+        return converted_options
     @timethis
     def _decompile(self):
         if self.func.is_simprocedure:
@@ -222,6 +238,7 @@ class Decompiler(Analysis):
         # determine a few arguments according to the structuring algorithm
         fold_callexprs_into_conditions = False
         self._force_loop_single_exit = True
+        self._refine_loops_with_single_successor = False
         self._complete_successors = False
         self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
         if "structurer_cls" not in self._recursive_structurer_params:
@@ -229,6 +246,7 @@ class Decompiler(Analysis):
         # is the algorithm based on Phoenix (a schema-based algorithm)?
         if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
             self._force_loop_single_exit = False
+            # self._refine_loops_with_single_successor = True
             self._complete_successors = True
             fold_callexprs_into_conditions = True
@@ -261,10 +279,12 @@ class Decompiler(Analysis):
                 desired_variables=self._desired_variables,
                 optimization_scratch=self._optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
+                refine_loops_with_single_successor=self._refine_loops_with_single_successor,
                 complete_successors=self._complete_successors,
                 ail_graph=self._clinic_graph,
                 arg_vvars=self._clinic_arg_vvars,
                 start_stage=self._clinic_start_stage,
+                notes=self.notes,
                 **self.options_to_params(self.options_by_class["clinic"]),
             )
         else:
@@ -375,6 +395,7 @@ class Decompiler(Analysis):
                 const_formats=old_codegen.const_formats if old_codegen is not None else None,
                 externs=clinic.externs,
                 binop_depth_cutoff=self.expr_collapse_depth,
+                notes=self.notes,
                 **self.options_to_params(self.options_by_class["codegen"]),
             )
@@ -396,6 +417,7 @@ class Decompiler(Analysis):
             cond_proc=condition_processor,
             update_graph=update_graph,
             force_loop_single_exit=self._force_loop_single_exit,
+            refine_loops_with_single_successor=self._refine_loops_with_single_successor,
             complete_successors=self._complete_successors,
             entry_node_addr=self.clinic.entry_node_addr,
             **self.options_to_params(self.options_by_class["region_identifier"]),
@@ -444,6 +466,7 @@ class Decompiler(Analysis):
                 entry_node_addr=self.clinic.entry_node_addr,
                 scratch=self._optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
+                refine_loops_with_single_successor=self._refine_loops_with_single_successor,
                 complete_successors=self._complete_successors,
                 **kwargs,
             )
@@ -507,6 +530,7 @@ class Decompiler(Analysis):
                 entry_node_addr=self.clinic.entry_node_addr,
                 scratch=self._optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
+                refine_loops_with_single_successor=self._refine_loops_with_single_successor,
                 complete_successors=self._complete_successors,
                 peephole_optimizations=self._peephole_optimizations,
                 avoid_vvar_ids=self._copied_var_ids,

angr/analyses/decompiler/node_replacer.py ADDED Viewed

@@ -0,0 +1,42 @@
+from __future__ import annotations
+from angr.ailment import Block
+from .sequence_walker import SequenceWalker
+from .structuring.structurer_nodes import BaseNode, SequenceNode, MultiNode
+class NodeReplacer(SequenceWalker):
+    """
+    Replaces nodes in a node with new nodes based on a mapping.
+    """
+    def __init__(self, root: BaseNode, replacements: dict) -> None:
+        super().__init__(update_seqnode_in_place=False)
+        self.root = root
+        self.replacements = replacements
+        self.result: BaseNode = self.walk(self.root)  # type:ignore
+    def _handle(self, node: BaseNode, **kwargs):
+        return self.replacements[node] if node in self.replacements else super()._handle(node, **kwargs)
+    def _handle_MultiNode(self, node: MultiNode, **kwargs):
+        changed = False
+        nodes_copy = list(node.nodes)
+        i = len(nodes_copy) - 1
+        has_non_block = False
+        while i > -1:
+            node_ = nodes_copy[i]
+            new_node = self._handle(node_, parent=node, index=i)
+            if new_node is not None:
+                changed = True
+                nodes_copy[i] = new_node
+                if not isinstance(new_node, Block):
+                    has_non_block = True
+            i -= 1
+        if not changed:
+            return None
+        if has_non_block:
+            return SequenceNode(node.addr, nodes=nodes_copy)
+        return MultiNode(nodes_copy, addr=node.addr, idx=node.idx)

angr/analyses/decompiler/notes/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from __future__ import annotations
+from .decompilation_note import DecompilationNote, DecompilationNoteLevel
+__all__ = (
+    "DecompilationNote",
+    "DecompilationNoteLevel",
+)

angr/analyses/decompiler/notes/decompilation_note.py ADDED Viewed

@@ -0,0 +1,48 @@
+from __future__ import annotations
+from typing import Any
+from enum import Enum
+class DecompilationNoteLevel(Enum):
+    """
+    Enum class describing the level of each decompilation note.
+    """
+    DEBUG = 0
+    INFO = 1
+    WARNING = 2
+    CRITICAL = 3
+class DecompilationNote:
+    """
+    Describes a note that is generated during decompilation.
+    Key is a unique string for the decompilation note. It is used as an index in the decompilation notes dictionary in
+    the Decompiler class.
+    Name is string for display by default.
+    Content is the actual content of the note. It can be of any time, but for custom types, you must override `__str__`
+    so that it can be displayed.
+    Level is the level of the note. The following values are available: DecompilationNoteLevel.DEBUG,
+    DecompilationNoteLevel.INFO, DecompilationNoteLevel.WARNING, and DecompilationNoteLevel.CRITICAL.
+    """
+    __slots__ = (
+        "content",
+        "key",
+        "level",
+        "name",
+    )
+    def __init__(self, key: str, name: str, content: Any, *, level=DecompilationNoteLevel.INFO):
+        self.key = key
+        self.name = name
+        self.content = content
+        self.level = level
+    def __repr__(self):
+        return f"<DecompilationNote: {self.name}>"
+    def __str__(self):
+        return f"{self.name}: {self.content}"

angr/analyses/decompiler/notes/deobfuscated_strings.py ADDED Viewed

@@ -0,0 +1,56 @@
+from __future__ import annotations
+from .decompilation_note import DecompilationNote
+class DeobfuscatedString:
+    """
+    Represents a deobfuscated string.
+    """
+    __slots__ = ("ref_addr", "type", "value")
+    def __init__(self, value: bytes, obf_type: str, ref_addr: int | None = None):
+        self.value = value
+        self.type = obf_type
+        self.ref_addr = ref_addr
+    def __repr__(self):
+        return (
+            f"<DeobfuscatedString Type{self.type} value={self.value!r} ref={self.ref_addr:#x}>"
+            if self.ref_addr is not None
+            else f"<DeobfuscatedString Type{self.type} value={self.value!r}>"
+        )
+    def __str__(self):
+        return repr(self.value)
+class DeobfuscatedStringsNote(DecompilationNote):
+    """
+    Represents a decompilation note that describes obfuscated strings found during decompilation.
+    """
+    def __init__(self, key: str = "deobfuscated_strings", name: str = "Deobfuscated Strings"):
+        super().__init__(key, name, None)
+        self.strings: dict[int, DeobfuscatedString] = {}
+    def add_string(self, obf_type: str, value: bytes, *, ref_addr: int):
+        """
+        Add a deobfuscated string to the note.
+        :param obf_type: The type of obfuscation (e.g., "1", "2").
+        :param value: The deobfuscated string value.
+        :param ref_addr: The address where this string is referenced, if applicable.
+        """
+        deobf_str = DeobfuscatedString(value, obf_type, ref_addr=ref_addr)
+        self.strings[ref_addr] = deobf_str
+    def __str__(self):
+        lines = ["Obfuscated strings are found in decompilation and have been deobfuscated:"]
+        for addr in sorted(self.strings):
+            deobf_str = self.strings[addr]
+            lines.append(f"  Type {deobf_str.type} @ {deobf_str.ref_addr:#x}: {deobf_str.value!r}")
+        return "\n".join(lines)

angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py CHANGED Viewed

@@ -163,7 +163,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
             require_gotos=False,
             prevent_new_gotos=False,
             simplify_ail=False,
-            must_improve_rel_quality=True,
+            must_improve_rel_quality=False,
             **kwargs,
         )

angr/analyses/decompiler/optimization_passes/optimization_pass.py CHANGED Viewed

@@ -135,11 +135,13 @@ class OptimizationPass(BaseOptimizationPass):
         entry_node_addr=None,
         scratch: dict[str, Any] | None = None,
         force_loop_single_exit: bool = True,
+        refine_loops_with_single_successor: bool = False,
         complete_successors: bool = False,
         avoid_vvar_ids: set[int] | None = None,
         arg_vvars: set[int] | None = None,
         peephole_optimizations=None,
         stack_pointer_tracker=None,
+        notes: dict | None = None,
         **kwargs,
     ):
         super().__init__(func)
@@ -158,10 +160,12 @@ class OptimizationPass(BaseOptimizationPass):
             entry_node_addr if entry_node_addr is not None else (func.addr, None)
         )
         self._force_loop_single_exit = force_loop_single_exit
+        self._refine_loops_with_single_successor = refine_loops_with_single_successor
         self._complete_successors = complete_successors
         self._avoid_vvar_ids = avoid_vvar_ids or set()
         self._peephole_optimizations = peephole_optimizations
         self._stack_pointer_tracker = stack_pointer_tracker
+        self.notes = notes if notes is not None else {}
         # output
         self.out_graph: networkx.DiGraph | None = None
@@ -397,6 +401,7 @@ class OptimizationPass(BaseOptimizationPass):
             cond_proc=condition_processor or ConditionProcessor(self.project.arch),
             update_graph=update_graph,
             force_loop_single_exit=self._force_loop_single_exit,
+            refine_loops_with_single_successor=self._refine_loops_with_single_successor,
             complete_successors=self._complete_successors,
             entry_node_addr=self.entry_node_addr,
         )

angr/analyses/decompiler/optimization_passes/return_duplicator_low.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Any
 import networkx
 from angr.ailment import Block
-from angr.ailment.statement import ConditionalJump, Label
+from angr.ailment.statement import ConditionalJump
 from .return_duplicator_base import ReturnDuplicatorBase
 from .optimization_pass import StructuringOptimizationPass
@@ -53,7 +53,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
         prevent_new_gotos: bool = True,
         minimize_copies_for_regions: bool = True,
         region_identifier=None,
-        vvar_id_start: int | None = None,
+        vvar_id_start: int = 0,
         scratch: dict[str, Any] | None = None,
         max_func_blocks: int = 500,
         **kwargs,
@@ -91,8 +91,9 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
         self,
         src: Block,
         dst: Block,
-        graph: networkx.DiGraph = None,
         max_level_check=1,
+        *,
+        graph: networkx.DiGraph,
     ):
         """
         TODO: Implement a more principled way of checking if an edge is a goto edge with Phoenix's structuring info
@@ -100,6 +101,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
         above a goto edge as the goto src.
         """
         # Do a simple and fast check first
+        assert self._goto_manager is not None
         is_simple_goto = self._goto_manager.is_goto_edge(src, dst)
         if is_simple_goto:
             return True
@@ -155,79 +157,6 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
                 # keep testing the next edge
                 node = succ
-            # Special case 3: In Phoenix, regions full of only if-stmts can be collapsed and moved. This causes
-            # the goto manager to report gotos that are at the top of the region instead of ones in the middle of it.
-            # Because of this, we need to gather all the nodes above the original src and check if any of them
-            # go to the destination. Additionally, we need to do this on the supergraph to get rid of
-            # goto edges that are removed by Phoenix.
-            # This case is observed in the test case `TestDecompiler.test_tail_tail_bytes_ret_dup`.
-            if self._supergraph is None:
-                return False
-            super_to_og_nodes = {n: self._supergraph.nodes[n]["original_nodes"] for n in self._supergraph.nodes}
-            og_to_super_nodes = {og: super_n for super_n, ogs in super_to_og_nodes.items() for og in ogs}
-            super_src = og_to_super_nodes.get(src)
-            super_dst = og_to_super_nodes.get(dst)
-            if super_src is None or super_dst is None:
-                return False
-            # collect all nodes which have only an if-stmt in them that are ancestors of super_src
-            check_blks = {super_src}
-            level_blocks = {super_src}
-            for _ in range(10):
-                done = False
-                if_blks = set()
-                for lblock in level_blocks:
-                    preds = list(self._supergraph.predecessors(lblock))
-                    for pred in preds:
-                        only_cond_jump = all(isinstance(s, (ConditionalJump, Label)) for s in pred.statements)
-                        if only_cond_jump:
-                            if_blks.add(pred)
-                    done = len(if_blks) == 0
-                if done:
-                    break
-                check_blks |= if_blks
-                level_blocks = if_blks
-            # convert all the found if-only super-blocks back into their original blocks
-            og_check_blocks = set()
-            for blk in check_blks:
-                og_check_blocks |= set(super_to_og_nodes[blk])
-            # check if any of the original blocks are gotos to the destination
-            goto_hits = 0
-            for block in og_check_blocks:
-                if self._goto_manager.is_goto_edge(block, dst):
-                    goto_hits += 1
-            # Although it is good to find a goto in the if-only block region, having more than a single goto
-            # existing that goes to the same dst is a bad sign. This can be seen in the the following test:
-            # TestDecompiler.test_dd_iread_ret_dup_region
-            #
-            # It occurs when you have something like:
-            # ```
-            # if (a || c)
-            #     goto target;
-            # target:
-            # return 0;
-            # ```
-            #
-            #
-            # This looks like an edge from (a, target) and (c, target) but it is actually a single edge.
-            # If you allow both to duplicate you get the following:
-            # ```
-            # if (a):
-            #    return
-            # if (c):
-            #    return
-            # ```
-            # This is not the desired behavior.
-            # So we need to check if there is only a single goto that goes to the destination.
-            return goto_hits == 1
         return False
     def _analyze(self, cache=None):