PyPI - angr - Versions diffs - 9.2.114__py3-none-macosx_11_0_arm64.whl → 9.2.115__py3-none-macosx_11_0_arm64.whl - Mend

angr 9.2.114__py3-none-macosx_11_0_arm64.whl → 9.2.115__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (41) hide show

angr/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # pylint: disable=wildcard-import
 # pylint: disable=wrong-import-position
-__version__ = "9.2.114"
+__version__ = "9.2.115"
 if bytes is str:
     raise Exception(

angr/__main__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import argparse
-from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES
+from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES, DEFAULT_STRUCTURER
 from angr.analyses.decompiler.utils import decompile_functions
@@ -42,7 +42,7 @@ def main():
         "--structurer",
         help="The structuring algorithm to use for decompilation.",
         choices=STRUCTURER_CLASSES.keys(),
-        default="phoenix",
+        default=DEFAULT_STRUCTURER,
     )
     args = parser.parse_args()

angr/analyses/cfg/cfg_fast.py CHANGED Viewed

@@ -2981,7 +2981,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase):  # pylin
         simsucc = self.project.factory.default_engine.process(self._initial_state, irsb, force_addr=addr)
         if len(simsucc.successors) == 1:
             ip = simsucc.successors[0].ip
-            if ip._model_concrete is not ip:
+            if claripy.backends.concrete.convert(ip) is not ip:
                 target_addr = ip.concrete_value
                 obj = self.project.loader.find_object_containing(target_addr, membership_check=False)
                 if (obj is not None and obj is not self.project.loader.main_object) or self.project.is_hooked(

angr/analyses/cfg/indirect_jump_resolvers/jumptable.py CHANGED Viewed

@@ -1734,7 +1734,7 @@ class JumpTableResolver(IndirectJumpResolver):
                 # full-function data propagation before performing jump table recovery.
                 l.debug("Multiple statements adding bases, not supported yet")  # FIXME: Just check the addresses?
-        jumptable_addr_vsa = jumptable_addr._model_vsa
+        jumptable_addr_vsa = claripy.backends.vsa.convert(jumptable_addr)
         if not isinstance(jumptable_addr_vsa, claripy.vsa.StridedInterval):
             return None
@@ -2103,7 +2103,7 @@ class JumpTableResolver(IndirectJumpResolver):
             read_length = state.inspect.mem_read_length
             if not isinstance(read_length, int):
-                read_length = read_length._model_vsa.upper_bound
+                read_length = claripy.backends.vsa.convert(read_length).upper_bound
             if read_length > 16:
                 return
             new_read_addr = state.solver.BVV(UninitReadMeta.uninit_read_base, state.arch.bits)

angr/analyses/decompiler/decompilation_options.py CHANGED Viewed

@@ -193,21 +193,11 @@ options = [
         "recursive_structurer",
         "structurer_cls",
         category="Structuring",
-        default_value="Phoenix",
-        candidate_values=["Dream", "Phoenix"],
+        default_value="SAILR",
+        candidate_values=["SAILR", "Phoenix", "DREAM"],
         clears_cache=True,
         convert=structurer_class_from_name,
     ),
-    O(
-        "Improve structuring algorithm",
-        "If applicable in deeper structurer, like Phoenix, improves decompilation output",
-        bool,
-        "recursive_structurer",
-        "improve_structurer",
-        category="Structuring",
-        default_value=True,
-        clears_cache=True,
-    ),
     O(
         "C-style null compares",
         "Rewrites the (x == 0) => (!x) && (x != 0) => (x)",

angr/analyses/decompiler/decompiler.py CHANGED Viewed

@@ -14,7 +14,7 @@ from ...knowledge_base import KnowledgeBase
 from ...sim_variable import SimMemoryVariable, SimRegisterVariable, SimStackVariable
 from ...utils import timethis
 from .. import Analysis, AnalysesHub
-from .structuring import RecursiveStructurer, PhoenixStructurer
+from .structuring import RecursiveStructurer, PhoenixStructurer, DEFAULT_STRUCTURER
 from .region_identifier import RegionIdentifier
 from .optimization_passes.optimization_pass import OptimizationPassStage
 from .optimization_passes import get_default_optimization_passes
@@ -146,8 +146,9 @@ class Decompiler(Analysis):
         self._complete_successors = False
         self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
         if "structurer_cls" not in self._recursive_structurer_params:
-            self._recursive_structurer_params["structurer_cls"] = PhoenixStructurer
-        if self._recursive_structurer_params["structurer_cls"] == PhoenixStructurer:
+            self._recursive_structurer_params["structurer_cls"] = DEFAULT_STRUCTURER
+        # is the algorithm based on Phoenix (a schema-based algorithm)?
+        if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
             self._force_loop_single_exit = False
             self._complete_successors = True
             fold_callexprs_into_conditions = True
@@ -316,6 +317,11 @@ class Decompiler(Analysis):
                 continue
             if pass_.STRUCTURING:
                 if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
+                    l.warning(
+                        "Skipping %s because it does not support structuring algorithm: %s",
+                        pass_,
+                        self._recursive_structurer_params["structurer_cls"].NAME,
+                    )
                     continue
             a = pass_(
@@ -367,6 +373,11 @@ class Decompiler(Analysis):
                 continue
             if pass_.STRUCTURING:
                 if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
+                    l.warning(
+                        "Skipping %s because it does not support structuring algorithm: %s",
+                        pass_,
+                        self._recursive_structurer_params["structurer_cls"].NAME,
+                    )
                     continue
             a = pass_(

angr/analyses/decompiler/optimization_passes/const_prop_reverter.py CHANGED Viewed

@@ -10,6 +10,7 @@ from ailment.statement import Call, Statement, ConditionalJump, Assignment, Stor
 from ailment.expression import Convert, Register, Expression
 from .optimization_pass import OptimizationPass, OptimizationPassStage
+from ..structuring import SAILRStructurer, DreamStructurer
 from ....knowledge_plugins.key_definitions.atoms import MemoryLocation
 from ....knowledge_plugins.key_definitions.constants import OP_BEFORE
@@ -140,6 +141,8 @@ class ConstPropOptReverter(OptimizationPass):
     ARCHES = None
     PLATFORMS = None
+    # allow DREAM since it's useful for return merging
+    STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
     STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
     NAME = "Revert Constant Propagation Optimizations"
     DESCRIPTION = __doc__.strip()

angr/analyses/decompiler/optimization_passes/cross_jump_reverter.py CHANGED Viewed

@@ -21,11 +21,8 @@ class CrossJumpReverter(StructuringOptimizationPass):
     a max of max_opt_iters times. Second, it will not duplicate a block with too many calls.
     """
-    ARCHES = None
-    PLATFORMS = None
     STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
     NAME = "Duplicate linear blocks with gotos"
-    STRUCTURING = ["phoenix"]
     DESCRIPTION = inspect.cleandoc(__doc__).strip()
     def __init__(

angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ailment.expression import Expression, BinaryOp, Const, Load
 from angr.utils.graph import GraphUtils
 from ..utils import first_nonlabel_statement, remove_last_statement
 from ..structuring.structurer_nodes import IncompleteSwitchCaseHeadStatement, SequenceNode, MultiNode
-from .optimization_pass import OptimizationPassStage, MultipleBlocksException, StructuringOptimizationPass
+from .optimization_pass import MultipleBlocksException, StructuringOptimizationPass
 from ..region_simplifiers.switch_cluster_simplifier import SwitchClusterFinder
 if TYPE_CHECKING:
@@ -143,14 +143,13 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
     As a hack for now, we only run this deoptimization on Linux binaries.
     """
+    # TODO: this needs to be updated to support Windows, but detect and disable on MSVC
     PLATFORMS = ["linux"]
-    STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
     NAME = "Convert lowered switch-cases (if-else) to switch-cases"
     DESCRIPTION = (
         "Convert lowered switch-cases (if-else) to switch-cases. Only works when the Phoenix structuring "
         "algorithm is in use."
     )
-    STRUCTURING = ["phoenix"]
     def __init__(self, func, min_distinct_cases=2, **kwargs):
         super().__init__(

angr/analyses/decompiler/optimization_passes/optimization_pass.py CHANGED Viewed

@@ -10,7 +10,7 @@ import ailment
 from angr.analyses.decompiler import RegionIdentifier
 from angr.analyses.decompiler.condition_processor import ConditionProcessor
 from angr.analyses.decompiler.goto_manager import GotoManager
-from angr.analyses.decompiler.structuring import RecursiveStructurer, PhoenixStructurer
+from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
 from angr.analyses.decompiler.utils import add_labels
 from angr.analyses.decompiler.seq_cf_structure_counter import ControlFlowStructureCounter
@@ -266,10 +266,17 @@ class StructuringOptimizationPass(OptimizationPass):
     The base class for any optimization pass that requires structuring. Optimization passes that inherit from this class
     should directly depend on structuring artifacts, such as regions and gotos. Otherwise, they should use
     OptimizationPass. This is the heaviest (computation time) optimization pass class.
+    By default this type of optimization should work:
+    - on any architecture
+    - on any platform
+    - during region identification (to have iterative structuring)
+    - only with the SAILR structuring algorithm
     """
     ARCHES = None
     PLATFORMS = None
+    STRUCTURING = [SAILRStructurer.NAME]
     STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
     def __init__(
@@ -401,7 +408,7 @@ class StructuringOptimizationPass(OptimizationPass):
                 self._ri.region,
                 cond_proc=self._ri.cond_proc,
                 func=self._func,
-                structurer_cls=PhoenixStructurer,
+                structurer_cls=SAILRStructurer,
             )
         # pylint:disable=broad-except
         except Exception:

angr/analyses/decompiler/optimization_passes/ret_deduplicator.py CHANGED Viewed

@@ -4,6 +4,7 @@ import logging
 from ailment import Block
 from ailment.statement import ConditionalJump, Return
+from ..structuring import SAILRStructurer, DreamStructurer
 from ....utils.graph import subgraph_between_nodes
 from ..utils import remove_labels, to_ail_supergraph, update_labels
 from .optimization_pass import OptimizationPass, OptimizationPassStage
@@ -28,6 +29,7 @@ class ReturnDeduplicator(OptimizationPass):
     STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
     NAME = "Deduplicates return statements that may have been duplicated"
     DESCRIPTION = __doc__.strip()
+    STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
     def __init__(self, func, **kwargs):
         super().__init__(func, **kwargs)

angr/analyses/decompiler/optimization_passes/return_duplicator_high.py CHANGED Viewed

@@ -4,6 +4,7 @@ import networkx
 from .return_duplicator_base import ReturnDuplicatorBase
 from .optimization_pass import OptimizationPass, OptimizationPassStage
+from ..structuring import SAILRStructurer, DreamStructurer
 _l = logging.getLogger(name=__name__)
@@ -19,6 +20,7 @@ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
     STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
     NAME = "Duplicate return-only blocks (high)"
     DESCRIPTION = __doc__
+    STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
     def __init__(
         self,

angr/analyses/decompiler/structuring/__init__.py CHANGED Viewed

@@ -2,14 +2,18 @@ from typing import Optional, Type
 from .dream import DreamStructurer
 from .phoenix import PhoenixStructurer
+from .sailr import SAILRStructurer
 from .recursive_structurer import RecursiveStructurer
 STRUCTURER_CLASSES = {
-    "dream": DreamStructurer,
-    "phoenix": PhoenixStructurer,
+    SAILRStructurer.NAME: SAILRStructurer,
+    PhoenixStructurer.NAME: PhoenixStructurer,
+    DreamStructurer.NAME: DreamStructurer,
 }
+DEFAULT_STRUCTURER = SAILRStructurer
 def structurer_class_from_name(name: str) -> type | None:
     return STRUCTURER_CLASSES.get(name.lower(), None)

angr/analyses/decompiler/structuring/phoenix.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # pylint:disable=line-too-long,import-outside-toplevel,import-error,multiple-statements,too-many-boolean-expressions
 from typing import Any, DefaultDict, Optional, TYPE_CHECKING
 from collections import OrderedDict as ODict
-from collections import defaultdict, OrderedDict
+from collections import defaultdict
 from enum import Enum
 import logging
@@ -12,7 +12,7 @@ from ailment.block import Block
 from ailment.statement import Statement, ConditionalJump, Jump, Label, Return
 from ailment.expression import Const, UnaryOp, MultiStatementExpression
-from angr.utils.graph import GraphUtils, TemporaryNode, PostDominators
+from angr.utils.graph import GraphUtils
 from ....knowledge_plugins.cfg import IndirectJumpType
 from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
 from ....utils.graph import dominates, to_acyclic_graph, dfs_back_edges
@@ -24,7 +24,6 @@ from ..utils import (
     is_empty_or_label_only_node,
     has_nonlabel_statements,
     first_nonlabel_statement,
-    structured_node_is_simple_return,
 )
 from ..call_counter import AILCallCounter
 from .structurer_nodes import (
@@ -84,7 +83,7 @@ class PhoenixStructurer(StructurerBase):
         func: Optional["Function"] = None,
         case_entry_to_switch_head: dict[int, int] | None = None,
         parent_region=None,
-        improve_structurer=True,
+        improve_algorithm=False,
         use_multistmtexprs: MultiStmtExprMode = MultiStmtExprMode.MAX_ONE_CALL,
         **kwargs,
     ):
@@ -95,7 +94,6 @@ class PhoenixStructurer(StructurerBase):
             func=func,
             case_entry_to_switch_head=case_entry_to_switch_head,
             parent_region=parent_region,
-            improve_structurer=improve_structurer,
             **kwargs,
         )
@@ -112,13 +110,17 @@ class PhoenixStructurer(StructurerBase):
         # absorbed into other SequenceNodes
         self.dowhile_known_tail_nodes: set = set()
-        self._phoenix_improved = self._improve_structurer
+        # in reimplementing the core phoenix algorithm from the phoenix decompiler paper, two types of changes were
+        # made to the algorithm:
+        # 1. Mandatory fixes to correct flaws we found in the algorithm
+        # 2. Optional fixes to improve the results of already correct choices
+        #
+        # the improve_algorithm flag controls whether the optional fixes are applied. these are disabled by default
+        # to be as close to the original algorithm as possible. for best results, enable this flag.
+        self._improve_algorithm = improve_algorithm
         self._edge_virtualization_hints = []
         self._use_multistmtexprs = use_multistmtexprs
-        if not self._phoenix_improved:
-            self._use_multistmtexprs = MultiStmtExprMode.NEVER
         self._analyze()
     @staticmethod
@@ -246,7 +248,7 @@ class PhoenixStructurer(StructurerBase):
             self._rewrite_jumps_to_continues(loop_node.sequence_node, loop_node=loop_node)
             return True
-        if self._phoenix_improved:
+        if self._improve_algorithm:
             matched, loop_node, successor_node = self._match_cyclic_while_with_single_successor(
                 node, head, graph, full_graph
             )
@@ -379,7 +381,7 @@ class PhoenixStructurer(StructurerBase):
                             return True, loop_node, right
-                if self._phoenix_improved:
+                if self._improve_algorithm:
                     if full_graph.out_degree[node] == 1:
                         # while (true) { ...; if (...) break; }
                         _, _, head_block = self._find_node_going_to_dst(node, left, condjump_only=True)
@@ -498,7 +500,7 @@ class PhoenixStructurer(StructurerBase):
                             self._remove_last_statement_if_jump(succ)
                             drop_succ = False
-                            if self._phoenix_improved:
+                            if self._improve_algorithm:
                                 # absorb the entire succ block if possible
                                 if self._is_sequential_statement_block(succ) and self._should_use_multistmtexprs(succ):
                                     stmts = self._build_multistatementexpr_statements(succ)
@@ -1004,7 +1006,7 @@ class PhoenixStructurer(StructurerBase):
             any_matches |= matched
             if matched:
                 break
-            if self._phoenix_improved:
+            if self._improve_algorithm:
                 l.debug("... matching acyclic ITE with short-circuit conditions at %r", node)
                 matched = self._match_acyclic_short_circuit_conditions(graph, full_graph, node)
                 l.debug("... matched: %s", matched)
@@ -1307,7 +1309,7 @@ class PhoenixStructurer(StructurerBase):
         graph,
         full_graph,
     ) -> tuple[ODict, Any, set[Any]]:
-        cases: ODict[int | tuple[int], SequenceNode] = OrderedDict()
+        cases: ODict[int | tuple[int], SequenceNode] = ODict()
         to_remove = set()
         # it is possible that the default node gets duplicated by other analyses and creates a default node (addr.a)
@@ -2108,7 +2110,7 @@ class PhoenixStructurer(StructurerBase):
         return None
     def _last_resort_refinement(self, head, graph: networkx.DiGraph, full_graph: networkx.DiGraph | None) -> bool:
-        if self._phoenix_improved:
+        if self._improve_algorithm:
             while self._edge_virtualization_hints:
                 src, dst = self._edge_virtualization_hints.pop(0)
                 if graph.has_edge(src, dst):
@@ -2229,6 +2231,15 @@ class PhoenixStructurer(StructurerBase):
             remove_last_statement(src)
     def _should_use_multistmtexprs(self, node: Block | BaseNode) -> bool:
+        """
+        The original Phoenix algorithm had no support for multi-stmt expressions, such as the following:
+        if ((x = y) && z) { ... }
+        There are multiple levels at which multi-stmt expressions can be used. If the Phoenix algorith is not not
+        set to be in improved mode, then we should not use multi-stmt expressions at all.
+        """
+        if not self._improve_algorithm:
+            return False
         if self._use_multistmtexprs == MultiStmtExprMode.NEVER:
             return False
         if self._use_multistmtexprs == MultiStmtExprMode.ALWAYS:
@@ -2313,7 +2324,6 @@ class PhoenixStructurer(StructurerBase):
                     walker.block_id += 1
                 if _check(block.nodes[-1].statements[-1]):
                     walker.parent_and_block.append((walker.block_id, parent, block))
-                    return
         def _handle_BreakNode(break_node: BreakNode, parent=None, **kwargs):  # pylint:disable=unused-argument
             walker.block_id += 1
@@ -2324,7 +2334,6 @@ class PhoenixStructurer(StructurerBase):
             ):
                 # FIXME: idx is ignored
                 walker.parent_and_block.append((walker.block_id, parent, break_node))
-                return
         walker = SequenceWalker(
             handlers={
@@ -2502,84 +2511,12 @@ class PhoenixStructurer(StructurerBase):
                 break
         return None
+    # pylint: disable=unused-argument,no-self-use
     def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
         """
         Returns a list of edges that are ordered by the best edges to virtualize first.
-        The criteria for "best" is defined by a variety of heuristics described below.
         """
-        if len(edges) <= 1:
-            return edges
-        # TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
-        try:
-            entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
-        except IndexError:
-            entry_node = None
-        best_edges = edges
-        if self._phoenix_improved and entry_node is not None:
-            # the first few heuristics are based on the post-dominator count of the edge
-            # so we collect them for each candidate edge
-            edge_postdom_count = {}
-            edge_sibling_count = {}
-            for edge in edges:
-                _, dst = edge
-                graph_copy = networkx.DiGraph(graph)
-                graph_copy.remove_edge(*edge)
-                sibling_cnt = graph_copy.in_degree(dst)
-                if sibling_cnt == 0:
-                    continue
-                edge_sibling_count[edge] = sibling_cnt
-                post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
-                post_doms = set()
-                for postdom_node, dominatee in post_dom_graph.edges():
-                    if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
-                        post_doms.add((postdom_node, dominatee))
-                edge_postdom_count[edge] = len(post_doms)
-                # H1: the edge that has the least amount of sibling edges should be virtualized first
-                # this is believed to reduce the amount of virtualization needed in future rounds and increase
-                # the edges that enter a single outer-scope if-stmt
-                if edge_sibling_count:
-                    min_sibling_count = min(edge_sibling_count.values())
-                    best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
-                    if len(best_edges) == 1:
-                        return best_edges
-                    # create the next heuristic based on the best edges from the previous heuristic
-                    filtered_edge_postdom_count = edge_postdom_count.copy()
-                    for edge in list(edge_postdom_count.keys()):
-                        if edge not in best_edges:
-                            del filtered_edge_postdom_count[edge]
-                    if filtered_edge_postdom_count:
-                        edge_postdom_count = filtered_edge_postdom_count
-                # H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
-                # first. this is believed to make the code more linear looking be reducing the amount of scopes.
-                # informally, we believe post-dominators to be an inverse indicator of the number of scopes present
-                if edge_postdom_count:
-                    max_postdom_count = max(edge_postdom_count.values())
-                    best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
-                    if len(best_edges) == 1:
-                        return best_edges
-                # H3: the edge that goes directly to a return statement should be virtualized first
-                # this is believed to be good because it can be corrected in later optimization by duplicating
-                # the return
-                candidate_edges = best_edges
-                best_edges = []
-                for src, dst in candidate_edges:
-                    if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
-                        best_edges.append((src, dst))
-                if len(best_edges) == 1:
-                    return best_edges
-                elif not best_edges:
-                    best_edges = candidate_edges
-        # if we have another tie, or we never used improved heuristics, then we do the chick_order.
-        return PhoenixStructurer._chick_order_edges(best_edges, node_seq)
+        return PhoenixStructurer._chick_order_edges(edges, node_seq)
     @staticmethod
     def _chick_order_edges(edges: list, node_seq: dict[Any, int]) -> list:

angr/analyses/decompiler/structuring/recursive_structurer.py CHANGED Viewed

@@ -34,14 +34,12 @@ class RecursiveStructurer(Analysis):
         cond_proc=None,
         func: Optional["Function"] = None,
         structurer_cls: type | None = None,
-        improve_structurer=True,
         **kwargs,
     ):
         self._region = region
         self.cond_proc = cond_proc if cond_proc is not None else ConditionProcessor(self.project.arch)
         self.function = func
         self.structurer_cls = structurer_cls if structurer_cls is not None else DreamStructurer
-        self.improve_structurer = improve_structurer
         self.structurer_options = kwargs
         self.result = None
@@ -91,7 +89,6 @@ class RecursiveStructurer(Analysis):
                     case_entry_to_switch_head=self._case_entry_to_switch_head,
                     func=self.function,
                     parent_region=parent_region,
-                    improve_structurer=self.improve_structurer,
                     **self.structurer_options,
                 )
                 # replace this region with the resulting node in its parent region... if it's not an orphan

angr/analyses/decompiler/structuring/sailr.py ADDED Viewed

@@ -0,0 +1,111 @@
+from typing import Any
+import networkx
+from ..utils import structured_node_is_simple_return
+from ....utils.graph import PostDominators, TemporaryNode
+from .phoenix import PhoenixStructurer
+class SAILRStructurer(PhoenixStructurer):
+    """
+    The SAILR structuring algorithm is the phoenix-based algorithm from the USENIX 2024 paper SAILR.
+    The entirety of the algorithm is implemented across this class and various optimization passes in the decompiler.
+    To find each optimization class, simply search for optimizations which reference this class.NAME.
+    At a high-level, SAILR does three things different from the traditional Phoenix schema-based algorithm:
+    1. It recursively structures the graph, rather than doing it in a single pass. This allows decisions to be made
+        based on the currrent state of what the decompilation would look like.
+    2. It performs deoptimizations targeting specific optimizations that introduces gotos and mis-structured code.
+        It can only do this because of the recursive nature of the algorithm.
+    3. It uses a more advanced heuristic for virtualizing edges, which is implemented in this class.
+    Additionally, some changes in Phoenix are only activated when SAILR is used.
+    """
+    NAME = "sailr"
+    def __init__(self, region, improve_phoenix=True, **kwargs):
+        super().__init__(
+            region,
+            improve_algorithm=improve_phoenix,
+            **kwargs,
+        )
+    def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
+        """
+        The criteria for "best" is defined by a variety of heuristics described below.
+        """
+        if len(edges) <= 1:
+            return edges
+        # TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
+        try:
+            entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
+        except IndexError:
+            entry_node = None
+        best_edges = edges
+        if entry_node is not None:
+            # the first few heuristics are based on the post-dominator count of the edge
+            # so we collect them for each candidate edge
+            edge_postdom_count = {}
+            edge_sibling_count = {}
+            for edge in edges:
+                _, dst = edge
+                graph_copy = networkx.DiGraph(graph)
+                graph_copy.remove_edge(*edge)
+                sibling_cnt = graph_copy.in_degree(dst)
+                if sibling_cnt == 0:
+                    continue
+                edge_sibling_count[edge] = sibling_cnt
+                post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
+                post_doms = set()
+                for postdom_node, dominatee in post_dom_graph.edges():
+                    if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
+                        post_doms.add((postdom_node, dominatee))
+                edge_postdom_count[edge] = len(post_doms)
+                # H1: the edge that has the least amount of sibling edges should be virtualized first
+                # this is believed to reduce the amount of virtualization needed in future rounds and increase
+                # the edges that enter a single outer-scope if-stmt
+                if edge_sibling_count:
+                    min_sibling_count = min(edge_sibling_count.values())
+                    best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
+                    if len(best_edges) == 1:
+                        return best_edges
+                    # create the next heuristic based on the best edges from the previous heuristic
+                    filtered_edge_postdom_count = edge_postdom_count.copy()
+                    for edge in list(edge_postdom_count.keys()):
+                        if edge not in best_edges:
+                            del filtered_edge_postdom_count[edge]
+                    if filtered_edge_postdom_count:
+                        edge_postdom_count = filtered_edge_postdom_count
+                # H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
+                # first. this is believed to make the code more linear looking be reducing the amount of scopes.
+                # informally, we believe post-dominators to be an inverse indicator of the number of scopes present
+                if edge_postdom_count:
+                    max_postdom_count = max(edge_postdom_count.values())
+                    best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
+                    if len(best_edges) == 1:
+                        return best_edges
+                # H3: the edge that goes directly to a return statement should be virtualized first
+                # this is believed to be good because it can be corrected in later optimization by duplicating
+                # the return
+                candidate_edges = best_edges
+                best_edges = []
+                for src, dst in candidate_edges:
+                    if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
+                        best_edges.append((src, dst))
+                if len(best_edges) == 1:
+                    return best_edges
+                elif not best_edges:
+                    best_edges = candidate_edges
+        # if we have another tie, or we never used improved heuristics, then we do the default ordering.
+        return super()._order_virtualizable_edges(graph, best_edges, node_seq)

angr/analyses/decompiler/structuring/structurer_base.py CHANGED Viewed

@@ -53,7 +53,6 @@ class StructurerBase(Analysis):
         func: Optional["Function"] = None,
         case_entry_to_switch_head: dict[int, int] | None = None,
         parent_region=None,
-        improve_structurer=True,
         **kwargs,
     ):
         self._region: "GraphRegion" = region
@@ -61,7 +60,6 @@ class StructurerBase(Analysis):
         self.function = func
         self._case_entry_to_switch_head = case_entry_to_switch_head
         self._parent_region = parent_region
-        self._improve_structurer = improve_structurer
         self.cond_proc = (
             condition_processor if condition_processor is not None else ConditionProcessor(self.project.arch)