PyPI - angr - Versions diffs - 9.2.95__py3-none-manylinux2014_x86_64.whl → 9.2.97__py3-none-manylinux2014_x86_64.whl - Mend

angr 9.2.95__py3-none-manylinux2014_x86_64.whl → 9.2.97__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (55) hide show

angr/__init__.py +1 -1
angr/analyses/cfg/cfg_fast.py +9 -6
angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +6 -1
angr/analyses/complete_calling_conventions.py +27 -11
angr/analyses/decompiler/ail_simplifier.py +30 -8
angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +20 -7
angr/analyses/decompiler/clinic.py +21 -5
angr/analyses/decompiler/condition_processor.py +11 -0
angr/analyses/decompiler/decompiler.py +58 -46
angr/analyses/decompiler/optimization_passes/__init__.py +11 -5
angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +13 -7
angr/analyses/decompiler/optimization_passes/optimization_pass.py +31 -11
angr/analyses/decompiler/optimization_passes/{return_duplicator.py → return_duplicator_base.py} +54 -102
angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +57 -0
angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +121 -0
angr/analyses/decompiler/region_identifier.py +13 -0
angr/analyses/decompiler/seq_to_blocks.py +19 -0
angr/analyses/decompiler/structured_codegen/c.py +21 -0
angr/analyses/decompiler/structuring/phoenix.py +28 -4
angr/analyses/decompiler/structuring/recursive_structurer.py +35 -1
angr/analyses/decompiler/structuring/structurer_base.py +3 -0
angr/analyses/decompiler/utils.py +41 -6
angr/analyses/disassembly.py +4 -1
angr/analyses/find_objects_static.py +15 -10
angr/analyses/forward_analysis/forward_analysis.py +15 -1
angr/analyses/propagator/engine_ail.py +40 -0
angr/analyses/propagator/propagator.py +6 -3
angr/analyses/reaching_definitions/engine_ail.py +16 -24
angr/analyses/reaching_definitions/rd_state.py +14 -1
angr/analyses/reaching_definitions/reaching_definitions.py +19 -2
angr/analyses/variable_recovery/engine_ail.py +6 -6
angr/analyses/variable_recovery/engine_base.py +22 -4
angr/analyses/variable_recovery/variable_recovery_base.py +4 -1
angr/engines/light/engine.py +8 -1
angr/knowledge_plugins/key_definitions/atoms.py +4 -2
angr/knowledge_plugins/key_definitions/environment.py +11 -0
angr/knowledge_plugins/key_definitions/live_definitions.py +41 -8
angr/knowledge_plugins/key_definitions/uses.py +18 -4
angr/knowledge_plugins/propagations/states.py +22 -3
angr/knowledge_plugins/types.py +6 -0
angr/knowledge_plugins/variables/variable_manager.py +54 -5
angr/simos/simos.py +2 -0
angr/storage/memory_mixins/__init__.py +3 -0
angr/storage/memory_mixins/multi_value_merger_mixin.py +22 -11
angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +20 -2
angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +81 -44
angr/utils/cowdict.py +4 -2
angr/utils/funcid.py +6 -0
angr/utils/mp.py +1 -1
{angr-9.2.95.dist-info → angr-9.2.97.dist-info}/METADATA +6 -6
{angr-9.2.95.dist-info → angr-9.2.97.dist-info}/RECORD +55 -52
{angr-9.2.95.dist-info → angr-9.2.97.dist-info}/LICENSE +0 -0
{angr-9.2.95.dist-info → angr-9.2.97.dist-info}/WHEEL +0 -0
{angr-9.2.95.dist-info → angr-9.2.97.dist-info}/entry_points.txt +0 -0
{angr-9.2.95.dist-info → angr-9.2.97.dist-info}/top_level.txt +0 -0

angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py CHANGED Viewed

@@ -5,7 +5,7 @@ import ailment
 from ailment.expression import Op
 from ..structuring.structurer_nodes import ConditionNode
-from ..utils import structured_node_is_simple_return
+from ..utils import structured_node_is_simple_return, sequence_to_statements
 from ..sequence_walker import SequenceWalker
 from .optimization_pass import SequenceOptimizationPass, OptimizationPassStage
@@ -13,12 +13,14 @@ from .optimization_pass import SequenceOptimizationPass, OptimizationPassStage
 class FlipBooleanWalker(SequenceWalker):
     """
     Walks a SequenceNode and handles every sequence.
+    Uses the flip_size to determine when to flip the condition on large if-statement bodies.
     """
-    def __init__(self, graph, last_node=None):
+    def __init__(self, graph, flip_size=10, last_node=None):
         super().__init__()
         self._graph = graph
         self._last_node = last_node
+        self._flip_size = flip_size
     def _handle_Sequence(self, seq_node, **kwargs):
         # Type 1:
@@ -48,9 +50,12 @@ class FlipBooleanWalker(SequenceWalker):
                 node.true_node, node.false_node = node.false_node, node.true_node
         for idx, cond_node, successor in type2_condition_nodes:
-            # flipping the condition on the last node of the program will cause
-            # the program to look strange, so avoid this case
-            if successor is not self._last_node:
+            # there are two possibilities when you might want to flip the condition and move the return statement:
+            # 1. This if-stmt if found somewhere in the middle of the function
+            # 2. This if-stmt is pretty large, but still ends in a return outside of the if-stmt
+            if (successor is not self._last_node) or (
+                len(sequence_to_statements(cond_node.true_node)) >= self._flip_size
+            ):
                 cond_node.condition = ailment.expression.negate(cond_node.condition)
                 seq_node.nodes[idx + 1] = cond_node.true_node
                 cond_node.true_node = successor
@@ -71,15 +76,16 @@ class FlipBooleanCmp(SequenceOptimizationPass):
     NAME = "Flip small ret booleans"
     DESCRIPTION = "When false node has no successors, flip condition so else scope can be simplified later"
-    def __init__(self, func, **kwargs):
+    def __init__(self, func, flip_size=10, **kwargs):
         super().__init__(func, **kwargs)
         self._graph = kwargs.get("graph", None)
+        self._flip_size = flip_size
         self.analyze()
     def _check(self):
         return bool(self.seq.nodes), None
     def _analyze(self, cache=None):
-        walker = FlipBooleanWalker(self._graph, last_node=self.seq.nodes[-1])
+        walker = FlipBooleanWalker(self._graph, last_node=self.seq.nodes[-1], flip_size=self._flip_size)
         walker.walk(self.seq)
         self.out_seq = self.seq

angr/analyses/decompiler/optimization_passes/optimization_pass.py CHANGED Viewed

@@ -6,6 +6,7 @@ import networkx  # pylint:disable=unused-import
 import ailment
 from angr.analyses.decompiler import RegionIdentifier
+from angr.analyses.decompiler.condition_processor import ConditionProcessor
 from angr.analyses.decompiler.goto_manager import GotoManager
 from angr.analyses.decompiler.structuring import RecursiveStructurer, PhoenixStructurer
 from angr.analyses.decompiler.utils import add_labels
@@ -93,6 +94,26 @@ class BaseOptimizationPass:
         """
         raise NotImplementedError()
+    def _simplify_graph(self, graph):
+        simp = self.project.analyses.AILSimplifier(
+            self._func,
+            func_graph=graph,
+            use_callee_saved_regs_at_return=False,
+            gp=self._func.info.get("gp", None) if self.project.arch.name in {"MIPS32", "MIPS64"} else None,
+        )
+        return simp.func_graph if simp.simplified else graph
+    def _recover_regions(self, graph: networkx.DiGraph, condition_processor=None, update_graph: bool = False):
+        return self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
+            self._func,
+            graph=graph,
+            cond_proc=condition_processor or ConditionProcessor(self.project.arch),
+            update_graph=update_graph,
+            # TODO: find a way to pass Phoenix/DREAM options here (see decompiler.py for correct use)
+            force_loop_single_exit=True,
+            complete_successors=False,
+        )
 class OptimizationPass(BaseOptimizationPass):
     """
@@ -256,6 +277,7 @@ class StructuringOptimizationPass(OptimizationPass):
         recover_structure_fails=True,
         max_opt_iters=1,
         simplify_ail=True,
+        require_gotos=True,
         **kwargs,
     ):
         super().__init__(func, **kwargs)
@@ -264,6 +286,7 @@ class StructuringOptimizationPass(OptimizationPass):
         self._recover_structure_fails = recover_structure_fails
         self._max_opt_iters = max_opt_iters
         self._simplify_ail = simplify_ail
+        self._require_gotos = require_gotos
         self._goto_manager: Optional[GotoManager] = None
         self._prev_graph: Optional[networkx.DiGraph] = None
@@ -279,6 +302,9 @@ class StructuringOptimizationPass(OptimizationPass):
             return
         initial_gotos = self._goto_manager.gotos.copy()
+        if self._require_gotos and not initial_gotos:
+            return
         # replace the normal check in OptimizationPass.analyze()
         ret, cache = self._check()
         if not ret:
@@ -304,7 +330,7 @@ class StructuringOptimizationPass(OptimizationPass):
         # simplify the AIL graph
         if self._simplify_ail:
             # this should not (TM) change the structure of the graph but is needed for later optimizations
-            self.out_graph = self._simplify_ail_graph(self.out_graph)
+            self.out_graph = self._simplify_graph(self.out_graph)
         if self._prevent_new_gotos:
             prev_gotos = len(initial_gotos)
@@ -317,6 +343,9 @@ class StructuringOptimizationPass(OptimizationPass):
     def _fixed_point_analyze(self, cache=None):
         for _ in range(self._max_opt_iters):
+            if self._require_gotos and not self._goto_manager.gotos:
+                break
             # backup the graph before the optimization
             if self._recover_structure_fails and self.out_graph is not None:
                 self._prev_graph = networkx.DiGraph(self.out_graph)
@@ -331,15 +360,6 @@ class StructuringOptimizationPass(OptimizationPass):
                 self.out_graph = self._prev_graph if self._recover_structure_fails else None
                 break
-    def _simplify_ail_graph(self, graph):
-        simp = self.project.analyses.AILSimplifier(
-            self._func,
-            func_graph=graph,
-            use_callee_saved_regs_at_return=False,
-            gp=self._func.info.get("gp", None) if self.project.arch.name in {"MIPS32", "MIPS64"} else None,
-        )
-        return simp.func_graph if simp.simplified else graph
     def _graph_is_structurable(self, graph, readd_labels=False) -> bool:
         """
         Checks weather the input graph is structurable under the Phoenix schema-matching structuring algorithm.
@@ -367,7 +387,7 @@ class StructuringOptimizationPass(OptimizationPass):
             func=self._func,
             structurer_cls=PhoenixStructurer,
         )
-        if not rs or not rs.result or not rs.result.nodes:
+        if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
             return False
         rs = self.project.analyses.RegionSimplifier(self._func, rs.result, kb=self.kb, variable_kb=self._variable_kb)

angr/analyses/decompiler/optimization_passes/{return_duplicator.py → return_duplicator_base.py} RENAMED Viewed

@@ -1,8 +1,7 @@
-from typing import Any, Tuple, Dict, List
+from typing import Any, Tuple, Dict, List, Optional
 from itertools import count
 import copy
 import logging
-import inspect
 import ailment.expression
 import networkx
@@ -11,138 +10,84 @@ from ailment import Block
 from ailment.statement import Jump, ConditionalJump, Assignment, Return, Label
 from ailment.expression import Const
-from .optimization_pass import StructuringOptimizationPass
 from ..condition_processor import ConditionProcessor, EmptyBlockNotice
 from ..graph_region import GraphRegion
 from ..utils import remove_labels, to_ail_supergraph, calls_in_graph
-from ..structuring.structurer_nodes import MultiNode
+from ..structuring.structurer_nodes import MultiNode, ConditionNode
+from ..region_identifier import RegionIdentifier
 _l = logging.getLogger(name=__name__)
-class ReturnDuplicator(StructuringOptimizationPass):
+class ReturnDuplicatorBase:
     """
-    An optimization pass that reverts a subset of Irreducible Statement Condensing (ISC) optimizations, as described
-    in the USENIX 2024 paper SAILR.
-    Some compilers, including GCC, Clang, and MSVC, apply various optimizations to reduce the number of statements in
-    code. These optimizations will take equivalent statements, or a subset of them, and replace them with a single
-    copy that is jumped to by gotos -- optimizing for space and sometimes speed.
-    This optimization pass will revert those gotos by re-duplicating the condensed blocks. Since Return statements
-    are the most common, we use this optimization pass to revert only gotos to return statements. Additionally, we
-    perform some additional readability fixups, like not re-duplicating returns to shared components.
-    Args:
-        func: The function to optimize.
-        node_idx_start: The index to start at when creating new nodes. This is used by Clinic to ensure that
-            node indices are unique across multiple passes.
-        max_opt_iters: The maximum number of optimization iterations to perform.
-        max_calls_in_regions: The maximum number of calls that can be in a region. This is used to prevent
-            duplicating too much code.
-        prevent_new_gotos: If True, this optimization pass will prevent new gotos from being created.
-        minimize_copies_for_regions: If True, this optimization pass will minimize the number of copies by doing only
-            a single copy for connected in_edges that form a region.
+    The base class for implementing Return Duplication as described in the SAILR paper.
+    This base class describes the general algorithm for duplicating return regions in a graph.
     """
-    ARCHES = None
-    PLATFORMS = None
-    NAME = "Duplicate return blocks to reduce goto statements"
-    DESCRIPTION = inspect.cleandoc(__doc__[: __doc__.index("Args:")])  # pylint:disable=unsubscriptable-object
+    # pylint:disable=unused-argument
     def __init__(
         self,
         func,
-        # internal parameters that should be used by Clinic
         node_idx_start: int = 0,
-        # settings
-        max_opt_iters: int = 10,
         max_calls_in_regions: int = 2,
-        prevent_new_gotos: bool = True,
         minimize_copies_for_regions: bool = True,
+        ri: Optional[RegionIdentifier] = None,
         **kwargs,
     ):
-        super().__init__(func, max_opt_iters=max_opt_iters, prevent_new_gotos=prevent_new_gotos, **kwargs)
+        self.node_idx = count(start=node_idx_start)
         self._max_calls_in_region = max_calls_in_regions
         self._minimize_copies_for_regions = minimize_copies_for_regions
-        self.node_idx = count(start=node_idx_start)
-        self.analyze()
+        # this should also be set by the optimization passes initer
+        self._func = func
+        self._ri: Optional[RegionIdentifier] = ri
+    #
+    # must implement these methods
+    #
+    def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False) -> bool:
+        raise NotImplementedError()
+    #
+    # main analysis
+    #
     def _check(self):
         # does this function have end points?
         return bool(self._func.endpoints), None
-    def _analyze(self, cache=None):
+    def _analyze_core(self, graph: networkx.DiGraph) -> bool:
         """
-        This analysis is run in a loop in analyze() for a maximum of max_opt_iters times.
+        This function does the core checks and duplications to the graph passed.
+        The return value is True if the graph was changed.
         """
         graph_changed = False
-        endnode_regions = self._find_endnode_regions(self.out_graph)
+        endnode_regions = self._find_endnode_regions(graph)
         if self._minimize_copies_for_regions:
             # perform a second pass to minimize the number of copies by doing only a single copy
             # for connected in_edges that form a region
-            endnode_regions = self._copy_connected_edge_components(endnode_regions, self.out_graph)
+            endnode_regions = self._copy_connected_edge_components(endnode_regions, graph)
         for region_head, (in_edges, region) in endnode_regions.items():
             is_single_const_ret_region = self._is_simple_return_graph(region)
             for in_edge in in_edges:
                 pred_node = in_edge[0]
                 if self._should_duplicate_dst(
-                    pred_node, region_head, self.out_graph, dst_is_const_ret=is_single_const_ret_region
+                    pred_node, region_head, graph, dst_is_const_ret=is_single_const_ret_region
                 ):
                     # every eligible pred gets a new region copy
-                    self._copy_region([pred_node], region_head, region, self.out_graph)
+                    self._copy_region([pred_node], region_head, region, graph)
-            if region_head in self.out_graph and self.out_graph.in_degree(region_head) == 0:
-                self.out_graph.remove_nodes_from(region)
+            if region_head in graph and graph.in_degree(region_head) == 0:
+                graph.remove_nodes_from(region)
             graph_changed = True
         return graph_changed
-    def _is_goto_edge(
-        self,
-        src: Block,
-        dst: Block,
-        graph: networkx.DiGraph = None,
-        check_for_ifstmts=True,
-        max_level_check=1,
-    ):
-        """
-        TODO: correct how goto edge addressing works
-        This function only exists because a long-standing bug that sometimes reports the if-stmt addr
-        above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
-        see if they are a goto. This needs to include Jump to deal with loops.
-        """
-        if check_for_ifstmts and graph is not None:
-            blocks = [src]
-            level_blocks = [src]
-            for _ in range(max_level_check):
-                new_level_blocks = []
-                for lblock in level_blocks:
-                    new_level_blocks += list(graph.predecessors(lblock))
-                blocks += new_level_blocks
-                level_blocks = new_level_blocks
-            src_direct_parents = list(graph.predecessors(src))
-            for block in blocks:
-                if not block or not block.statements:
-                    continue
-                # special case if-stmts that are next to each other
-                if block in src_direct_parents and isinstance(block.statements[-1], ConditionalJump):
-                    continue
-                if self._goto_manager.is_goto_edge(block, dst):
-                    return True
-        else:
-            return self._goto_manager.is_goto_edge(src, dst)
-        return False
     def _find_endnode_regions(self, graph) -> Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]]:
         """
         Find all the regions that contain a node with no successors. These are the "end nodes" of the graph.
@@ -194,14 +139,6 @@ class ReturnDuplicator(StructuringOptimizationPass):
         return end_node_regions
-    def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
-        # returns that are only returning a constant should be duplicated always;
-        if dst_is_const_ret:
-            return True
-        # check above
-        return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
     def _copy_region(self, pred_nodes, region_head, region, graph):
         # copy the entire return region
         copies = {}
@@ -295,8 +232,14 @@ class ReturnDuplicator(StructuringOptimizationPass):
     @staticmethod
     def _is_simple_return_graph(graph: networkx.DiGraph, max_assigns=1):
         """
-        Checks if the graph is a single block, or a series of simple assignments, that ends
-        in a return statement. This is used to know when we MUST duplicate the return block.
+        Checks if the provided graph is a graph that ONLY contains a "simple" return.
+        If there were absolutely no bugs in angr, we could just check that a single return block exists.
+        However, due to some propagation bugs, these cases can all happen and are all valid:
+        1. [Jmp] -> [Jmp] -> [Ret]
+        2. [Jmp] -> [Jmp, x=0] -> [Ret x]
+        3. [Jmp] -> [Jmp, x=rdi] -> [Ret x]
+        To deal with this, we need to do the sketchy checks we do below.
         """
         labeless_graph = to_ail_supergraph(remove_labels(graph))
         nodes = list(labeless_graph.nodes())
@@ -466,14 +409,20 @@ class ReturnDuplicator(StructuringOptimizationPass):
     @staticmethod
     def _find_block_sets_in_all_regions(top_region: GraphRegion):
+        def _unpack_block_type_to_addrs(node):
+            if isinstance(node, Block):
+                return {node.addr}
+            elif isinstance(node, MultiNode):
+                return {n.addr for n in node.nodes}
+            elif isinstance(node, ConditionNode):
+                return _unpack_block_type_to_addrs(node.true_node) | _unpack_block_type_to_addrs(node.false_node)
+            return set()
         def _unpack_region_to_block_addrs(region: GraphRegion):
             region_addrs = set()
             for node in region.graph.nodes:
-                if isinstance(node, Block):
-                    region_addrs.add(node.addr)
-                elif isinstance(node, MultiNode):
-                    for _node in node.nodes:
-                        region_addrs.add(_node.addr)
+                if isinstance(node, (Block, MultiNode, ConditionNode)):
+                    region_addrs |= _unpack_block_type_to_addrs(node)
                 elif isinstance(node, GraphRegion):
                     region_addrs |= _unpack_region_to_block_addrs(node)
@@ -487,6 +436,9 @@ class ReturnDuplicator(StructuringOptimizationPass):
                 elif isinstance(node, MultiNode):
                     for _node in node.nodes:
                         addrs_by_region[region].add(_node.addr)
+                elif isinstance(node, ConditionNode):
+                    addrs_by_region[region] |= _unpack_block_type_to_addrs(node.true_node)
+                    addrs_by_region[region] |= _unpack_block_type_to_addrs(node.false_node)
                 else:
                     addrs_by_region[region] |= _unpack_region_to_block_addrs(node)
                     _unpack_every_region(node, addrs_by_region)

angr/analyses/decompiler/optimization_passes/return_duplicator_high.py ADDED Viewed

@@ -0,0 +1,57 @@
+import logging
+import networkx
+from .return_duplicator_base import ReturnDuplicatorBase
+from .optimization_pass import OptimizationPass, OptimizationPassStage
+_l = logging.getLogger(name=__name__)
+class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
+    """
+    This is a light-level goto-less version of the ReturnDuplicator optimization pass. It will only
+    duplicate return-only blocks.
+    """
+    ARCHES = None
+    PLATFORMS = None
+    STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
+    NAME = "Duplicate return-only blocks (high)"
+    DESCRIPTION = __doc__
+    def __init__(
+        self,
+        func,
+        # internal parameters that should be used by Clinic
+        node_idx_start: int = 0,
+        # settings
+        max_calls_in_regions: int = 2,
+        minimize_copies_for_regions: bool = True,
+        **kwargs,
+    ):
+        ReturnDuplicatorBase.__init__(
+            self,
+            func,
+            node_idx_start=node_idx_start,
+            max_calls_in_regions=max_calls_in_regions,
+            minimize_copies_for_regions=minimize_copies_for_regions,
+            **kwargs,
+        )
+        OptimizationPass.__init__(self, func, **kwargs)
+        # since we run before the RegionIdentification pass in the decompiler, we need to collect it early here
+        self._ri = self._recover_regions(self._graph)
+        self.analyze()
+    def _check(self):
+        return ReturnDuplicatorBase._check(self)
+    def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
+        # TODO: implement a better check
+        return dst_is_const_ret
+    def _analyze(self, cache=None):
+        copy_graph = networkx.DiGraph(self._graph)
+        if self._analyze_core(copy_graph):
+            self.out_graph = self._simplify_graph(copy_graph)

angr/analyses/decompiler/optimization_passes/return_duplicator_low.py ADDED Viewed

@@ -0,0 +1,121 @@
+import logging
+import inspect
+import networkx
+from ailment import Block
+from ailment.statement import ConditionalJump
+from .return_duplicator_base import ReturnDuplicatorBase
+from .optimization_pass import StructuringOptimizationPass
+_l = logging.getLogger(name=__name__)
+class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
+    """
+    An optimization pass that reverts a subset of Irreducible Statement Condensing (ISC) optimizations, as described
+    in the USENIX 2024 paper SAILR. This is the heavy/goto version of the ReturnDuplicator optimization pass.
+    Some compilers, including GCC, Clang, and MSVC, apply various optimizations to reduce the number of statements in
+    code. These optimizations will take equivalent statements, or a subset of them, and replace them with a single
+    copy that is jumped to by gotos -- optimizing for space and sometimes speed.
+    This optimization pass will revert those gotos by re-duplicating the condensed blocks. Since Return statements
+    are the most common, we use this optimization pass to revert only gotos to return statements. Additionally, we
+    perform some additional readability fixups, like not re-duplicating returns to shared components.
+    Args:
+        func: The function to optimize.
+        node_idx_start: The index to start at when creating new nodes. This is used by Clinic to ensure that
+            node indices are unique across multiple passes.
+        max_opt_iters: The maximum number of optimization iterations to perform.
+        max_calls_in_regions: The maximum number of calls that can be in a region. This is used to prevent
+            duplicating too much code.
+        prevent_new_gotos: If True, this optimization pass will prevent new gotos from being created.
+        minimize_copies_for_regions: If True, this optimization pass will minimize the number of copies by doing only
+            a single copy for connected in_edges that form a region.
+    """
+    ARCHES = None
+    PLATFORMS = None
+    NAME = "Duplicate returns connect with gotos (low)"
+    DESCRIPTION = inspect.cleandoc(__doc__[: __doc__.index("Args:")])  # pylint:disable=unsubscriptable-object
+    def __init__(
+        self,
+        func,
+        # internal parameters that should be used by Clinic
+        node_idx_start: int = 0,
+        # settings
+        max_opt_iters: int = 4,
+        max_calls_in_regions: int = 2,
+        prevent_new_gotos: bool = True,
+        minimize_copies_for_regions: bool = True,
+        **kwargs,
+    ):
+        ReturnDuplicatorBase.__init__(
+            self,
+            func,
+            node_idx_start=node_idx_start,
+            max_calls_in_regions=max_calls_in_regions,
+            minimize_copies_for_regions=minimize_copies_for_regions,
+            **kwargs,
+        )
+        StructuringOptimizationPass.__init__(
+            self, func, max_opt_iters=max_opt_iters, prevent_new_gotos=prevent_new_gotos, require_gotos=True, **kwargs
+        )
+        self.analyze()
+    def _check(self):
+        return ReturnDuplicatorBase._check(self)
+    def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
+        return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
+    def _is_goto_edge(
+        self,
+        src: Block,
+        dst: Block,
+        graph: networkx.DiGraph = None,
+        check_for_ifstmts=True,
+        max_level_check=1,
+    ):
+        """
+        TODO: correct how goto edge addressing works
+        This function only exists because a long-standing bug that sometimes reports the if-stmt addr
+        above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
+        see if they are a goto. This needs to include Jump to deal with loops.
+        """
+        if check_for_ifstmts and graph is not None:
+            blocks = [src]
+            level_blocks = [src]
+            for _ in range(max_level_check):
+                new_level_blocks = []
+                for lblock in level_blocks:
+                    new_level_blocks += list(graph.predecessors(lblock))
+                blocks += new_level_blocks
+                level_blocks = new_level_blocks
+            src_direct_parents = list(graph.predecessors(src))
+            for block in blocks:
+                if not block or not block.statements:
+                    continue
+                # special case if-stmts that are next to each other
+                if block in src_direct_parents and isinstance(block.statements[-1], ConditionalJump):
+                    continue
+                if self._goto_manager.is_goto_edge(block, dst):
+                    return True
+        else:
+            return self._goto_manager.is_goto_edge(src, dst)
+        return False
+    def _analyze(self, cache=None):
+        """
+        This analysis is run in a loop in analyze() for a maximum of max_opt_iters times.
+        """
+        return self._analyze_core(self.out_graph)

angr/analyses/decompiler/region_identifier.py CHANGED Viewed

@@ -718,6 +718,13 @@ class RegionIdentifier(Analysis):
                                                 region.graph_with_successors.add_edge(nn, succ)
                                                 region.successors.add(succ)
+                                # add edges between successors
+                                for succ_0 in region.successors:
+                                    for succ_1 in region.successors:
+                                        if succ_0 is not succ_1:
+                                            if secondary_graph.has_edge(succ_0, succ_1):
+                                                region.graph_with_successors.add_edge(succ_0, succ_1)
                             # l.debug("Walked back %d levels in postdom tree.", levels)
                             l.debug("Node %r, frontier %r.", node, frontier)
                             # l.debug("Identified an acyclic region %s.", self._dbg_block_list(region.graph.nodes()))
@@ -929,6 +936,12 @@ class RegionIdentifier(Analysis):
             region.successors = []
         region.successors += list(abnormal_exit_nodes)
+        for succ_0 in region.successors:
+            for succ_1 in region.successors:
+                if succ_0 is not succ_1:
+                    if graph.has_edge(succ_0, succ_1):
+                        region.graph_with_successors.add_edge(succ_0, succ_1)
         for node in loop_nodes:
             graph.remove_node(node)

angr/analyses/decompiler/seq_to_blocks.py ADDED Viewed

@@ -0,0 +1,19 @@
+from ailment import Block
+from .sequence_walker import SequenceWalker
+class SequenceToBlocks(SequenceWalker):
+    """
+    A helper class to convert a sequence node into a list of blocks.
+    """
+    def __init__(self):
+        handlers = {
+            Block: self._handle_Block,
+        }
+        self.blocks = []
+        super().__init__(handlers, force_forward_scan=True, update_seqnode_in_place=False)
+    def _handle_Block(self, node: Block, **kwargs):  # pylint:disable=unused-argument
+        self.blocks.append(node)