PyPI - angr - Versions diffs - 9.2.166__cp310-abi3-macosx_10_12_x86_64.whl → 9.2.167__cp310-abi3-macosx_10_12_x86_64.whl - Mend

angr 9.2.166__cp310-abi3-macosx_10_12_x86_64.whl → 9.2.167__cp310-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (26) hide show

angr/__init__.py +1 -1
angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -8
angr/analyses/decompiler/clinic.py +3 -0
angr/analyses/decompiler/condition_processor.py +44 -1
angr/analyses/decompiler/decompiler.py +6 -0
angr/analyses/decompiler/node_replacer.py +42 -0
angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
angr/analyses/decompiler/optimization_passes/optimization_pass.py +3 -0
angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +5 -76
angr/analyses/decompiler/region_identifier.py +12 -3
angr/analyses/decompiler/sequence_walker.py +11 -7
angr/analyses/decompiler/structuring/phoenix.py +645 -305
angr/analyses/decompiler/structuring/structurer_base.py +75 -1
angr/analyses/decompiler/utils.py +71 -28
angr/analyses/reaching_definitions/engine_vex.py +3 -2
angr/procedures/glibc/scanf.py +8 -0
angr/procedures/glibc/sscanf.py +4 -0
angr/rustylib.abi3.so +0 -0
angr/unicornlib.dylib +0 -0
angr/utils/graph.py +62 -24
{angr-9.2.166.dist-info → angr-9.2.167.dist-info}/METADATA +5 -5
{angr-9.2.166.dist-info → angr-9.2.167.dist-info}/RECORD +26 -25
{angr-9.2.166.dist-info → angr-9.2.167.dist-info}/WHEEL +0 -0
{angr-9.2.166.dist-info → angr-9.2.167.dist-info}/entry_points.txt +0 -0
{angr-9.2.166.dist-info → angr-9.2.167.dist-info}/licenses/LICENSE +0 -0
{angr-9.2.166.dist-info → angr-9.2.167.dist-info}/top_level.txt +0 -0

angr/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # pylint: disable=wrong-import-position
 from __future__ import annotations
-__version__ = "9.2.166"
+__version__ = "9.2.167"
 if bytes is str:
     raise Exception(

angr/analyses/cfg/indirect_jump_resolvers/jumptable.py CHANGED Viewed

@@ -920,32 +920,32 @@ class JumpTableResolver(IndirectJumpResolver):
         # more sanity checks
         # for a typical jump table, the current block has only one predecessor, and the predecessor to the current
-        # block has two successors (not including itself)
+        # block has two successors
         # for a typical vtable call (or jump if at the end of a function), the block as two predecessors that form a
         # diamond shape
         curr_node = func.get_node(addr)
-        if curr_node is None or curr_node not in func.graph:
+        if curr_node is None or curr_node not in func.transition_graph:
             l.debug("Could not find the node %#x in the function transition graph", addr)
             return False, None
-        preds = list(func.graph.predecessors(curr_node))
+        preds = list(func.transition_graph.predecessors(curr_node))
         pred_endaddrs = {pred.addr + pred.size for pred in preds}  # handle non-normalized CFGs
         if func_graph_complete and not is_arm and not potential_call_table:
             # on ARM you can do a single-block jump table...
             if len(pred_endaddrs) == 1:
-                pred_succs = [succ for succ in func.graph.successors(preds[0]) if succ.addr != preds[0].addr]
+                pred_succs = [succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
                 if len(pred_succs) != 2:
                     l.debug("Expect two successors to the single predecessor, found %d.", len(pred_succs))
                     return False, None
             elif len(pred_endaddrs) == 2 and len(preds) == 2:
                 pred_succs = set(
-                    [succ for succ in func.graph.successors(preds[0]) if succ.addr != preds[0].addr]
-                    + [succ for succ in func.graph.successors(preds[1]) if succ.addr != preds[1].addr]
+                    [succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
+                    + [succ for succ in func.transition_graph.successors(preds[1]) if succ.addr != preds[1].addr]
                 )
                 is_diamond = False
                 if len(pred_succs) == 2:
                     non_node_succ = next(iter(pred_succ for pred_succ in pred_succs if pred_succ is not curr_node))
-                    while func.graph.out_degree[non_node_succ] == 1:
-                        non_node_succ = next(iter(func.graph.successors(non_node_succ)))
+                    while func.transition_graph.out_degree[non_node_succ] == 1:
+                        non_node_succ = next(iter(func.transition_graph.successors(non_node_succ)))
                         if non_node_succ == curr_node:
                             is_diamond = True
                             break

angr/analyses/decompiler/clinic.py CHANGED Viewed

@@ -142,6 +142,7 @@ class Clinic(Analysis):
         optimization_scratch: dict[str, Any] | None = None,
         desired_variables: set[str] | None = None,
         force_loop_single_exit: bool = True,
+        refine_loops_with_single_successor: bool = False,
         complete_successors: bool = False,
         max_type_constraints: int = 100_000,
         type_constraint_set_degradation_threshold: int = 150,
@@ -212,6 +213,7 @@ class Clinic(Analysis):
         self._inlining_parents = inlining_parents or ()
         self._desired_variables = desired_variables
         self._force_loop_single_exit = force_loop_single_exit
+        self._refine_loops_with_single_successor = refine_loops_with_single_successor
         self._complete_successors = complete_successors
         self._register_save_areas_removed: bool = False
@@ -1550,6 +1552,7 @@ class Clinic(Analysis):
                 entry_node_addr=self.entry_node_addr,
                 scratch=self.optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
+                refine_loops_with_single_successor=self._refine_loops_with_single_successor,
                 complete_successors=self._complete_successors,
                 stack_pointer_tracker=stack_pointer_tracker,
                 **kwargs,

angr/analyses/decompiler/condition_processor.py CHANGED Viewed

@@ -239,6 +239,24 @@ class ConditionProcessor:
         condition translation if possible.
         """
+        if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
+            cond_node = src.nodes[-1]
+            if (
+                isinstance(cond_node.true_node, ailment.Block)
+                and isinstance(cond_node.false_node, ailment.Block)
+                and cond_node.true_node.statements
+                and cond_node.false_node.statements
+            ):
+                last_stmt_true = self.get_last_statement(cond_node.true_node)
+                last_stmt_false = self.get_last_statement(cond_node.false_node)
+                if (
+                    isinstance(last_stmt_true, ailment.Stmt.Jump)
+                    and isinstance(last_stmt_false, ailment.Stmt.Jump)
+                    and isinstance(last_stmt_true.target, ailment.Expr.Const)
+                    and isinstance(last_stmt_false.target, ailment.Expr.Const)
+                ):
+                    return {last_stmt_true.target.value, last_stmt_false.target.value} == {dst0.addr, dst1.addr}
         if src in graph and graph.out_degree[src] == 2 and graph.has_edge(src, dst0) and graph.has_edge(src, dst1):
             # sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
             if isinstance(src, ailment.Block) and src.statements and is_head_controlled_loop_block(src):
@@ -247,7 +265,10 @@ class ConditionProcessor:
                 )
                 assert last_stmt is not None
             else:
-                last_stmt = self.get_last_statement(src)
+                try:
+                    last_stmt = self.get_last_statement(src)
+                except EmptyBlockNotice:
+                    last_stmt = None
             if isinstance(last_stmt, ailment.Stmt.ConditionalJump):
                 return True
@@ -258,6 +279,28 @@ class ConditionProcessor:
         return claripy.is_true(claripy.Not(edge_cond_left) == edge_cond_right)  # type: ignore
     def recover_edge_condition(self, graph: networkx.DiGraph, src, dst):
+        def _check_condnode_and_get_condition(cond_node: ConditionNode) -> claripy.ast.Bool | None:
+            for cond_block, negate in [(cond_node.true_node, False), (cond_node.false_node, True)]:
+                if isinstance(cond_block, ailment.Block) and cond_block.statements:
+                    last_stmt = self.get_last_statement(cond_block)
+                    if (
+                        isinstance(last_stmt, ailment.Stmt.Jump)
+                        and isinstance(last_stmt.target, ailment.Expr.Const)
+                        and last_stmt.target.value == dst.addr
+                    ):
+                        return claripy.Not(cond_node.condition) if negate else cond_node.condition
+            return None
+        if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
+            predicate = _check_condnode_and_get_condition(src.nodes[-1])
+            if predicate is not None:
+                return predicate
+        if isinstance(src, ConditionNode):
+            predicate = _check_condnode_and_get_condition(src)
+            if predicate is not None:
+                return predicate
         edge = src, dst
         edge_data = graph.get_edge_data(*edge)
         edge_type = edge_data.get("type", "transition") if edge_data is not None else "transition"

angr/analyses/decompiler/decompiler.py CHANGED Viewed

@@ -222,6 +222,7 @@ class Decompiler(Analysis):
         # determine a few arguments according to the structuring algorithm
         fold_callexprs_into_conditions = False
         self._force_loop_single_exit = True
+        self._refine_loops_with_single_successor = False
         self._complete_successors = False
         self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
         if "structurer_cls" not in self._recursive_structurer_params:
@@ -229,6 +230,7 @@ class Decompiler(Analysis):
         # is the algorithm based on Phoenix (a schema-based algorithm)?
         if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
             self._force_loop_single_exit = False
+            # self._refine_loops_with_single_successor = True
             self._complete_successors = True
             fold_callexprs_into_conditions = True
@@ -261,6 +263,7 @@ class Decompiler(Analysis):
                 desired_variables=self._desired_variables,
                 optimization_scratch=self._optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
+                refine_loops_with_single_successor=self._refine_loops_with_single_successor,
                 complete_successors=self._complete_successors,
                 ail_graph=self._clinic_graph,
                 arg_vvars=self._clinic_arg_vvars,
@@ -396,6 +399,7 @@ class Decompiler(Analysis):
             cond_proc=condition_processor,
             update_graph=update_graph,
             force_loop_single_exit=self._force_loop_single_exit,
+            refine_loops_with_single_successor=self._refine_loops_with_single_successor,
             complete_successors=self._complete_successors,
             entry_node_addr=self.clinic.entry_node_addr,
             **self.options_to_params(self.options_by_class["region_identifier"]),
@@ -444,6 +448,7 @@ class Decompiler(Analysis):
                 entry_node_addr=self.clinic.entry_node_addr,
                 scratch=self._optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
+                refine_loops_with_single_successor=self._refine_loops_with_single_successor,
                 complete_successors=self._complete_successors,
                 **kwargs,
             )
@@ -507,6 +512,7 @@ class Decompiler(Analysis):
                 entry_node_addr=self.clinic.entry_node_addr,
                 scratch=self._optimization_scratch,
                 force_loop_single_exit=self._force_loop_single_exit,
+                refine_loops_with_single_successor=self._refine_loops_with_single_successor,
                 complete_successors=self._complete_successors,
                 peephole_optimizations=self._peephole_optimizations,
                 avoid_vvar_ids=self._copied_var_ids,

angr/analyses/decompiler/node_replacer.py ADDED Viewed

@@ -0,0 +1,42 @@
+from __future__ import annotations
+from angr.ailment import Block
+from .sequence_walker import SequenceWalker
+from .structuring.structurer_nodes import BaseNode, SequenceNode, MultiNode
+class NodeReplacer(SequenceWalker):
+    """
+    Replaces nodes in a node with new nodes based on a mapping.
+    """
+    def __init__(self, root: BaseNode, replacements: dict) -> None:
+        super().__init__(update_seqnode_in_place=False)
+        self.root = root
+        self.replacements = replacements
+        self.result: BaseNode = self.walk(self.root)  # type:ignore
+    def _handle(self, node: BaseNode, **kwargs):
+        return self.replacements[node] if node in self.replacements else super()._handle(node, **kwargs)
+    def _handle_MultiNode(self, node: MultiNode, **kwargs):
+        changed = False
+        nodes_copy = list(node.nodes)
+        i = len(nodes_copy) - 1
+        has_non_block = False
+        while i > -1:
+            node_ = nodes_copy[i]
+            new_node = self._handle(node_, parent=node, index=i)
+            if new_node is not None:
+                changed = True
+                nodes_copy[i] = new_node
+                if not isinstance(new_node, Block):
+                    has_non_block = True
+            i -= 1
+        if not changed:
+            return None
+        if has_non_block:
+            return SequenceNode(node.addr, nodes=nodes_copy)
+        return MultiNode(nodes_copy, addr=node.addr, idx=node.idx)

angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py CHANGED Viewed

@@ -163,7 +163,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
             require_gotos=False,
             prevent_new_gotos=False,
             simplify_ail=False,
-            must_improve_rel_quality=True,
+            must_improve_rel_quality=False,
             **kwargs,
         )

angr/analyses/decompiler/optimization_passes/optimization_pass.py CHANGED Viewed

@@ -135,6 +135,7 @@ class OptimizationPass(BaseOptimizationPass):
         entry_node_addr=None,
         scratch: dict[str, Any] | None = None,
         force_loop_single_exit: bool = True,
+        refine_loops_with_single_successor: bool = False,
         complete_successors: bool = False,
         avoid_vvar_ids: set[int] | None = None,
         arg_vvars: set[int] | None = None,
@@ -158,6 +159,7 @@ class OptimizationPass(BaseOptimizationPass):
             entry_node_addr if entry_node_addr is not None else (func.addr, None)
         )
         self._force_loop_single_exit = force_loop_single_exit
+        self._refine_loops_with_single_successor = refine_loops_with_single_successor
         self._complete_successors = complete_successors
         self._avoid_vvar_ids = avoid_vvar_ids or set()
         self._peephole_optimizations = peephole_optimizations
@@ -397,6 +399,7 @@ class OptimizationPass(BaseOptimizationPass):
             cond_proc=condition_processor or ConditionProcessor(self.project.arch),
             update_graph=update_graph,
             force_loop_single_exit=self._force_loop_single_exit,
+            refine_loops_with_single_successor=self._refine_loops_with_single_successor,
             complete_successors=self._complete_successors,
             entry_node_addr=self.entry_node_addr,
         )

angr/analyses/decompiler/optimization_passes/return_duplicator_low.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Any
 import networkx
 from angr.ailment import Block
-from angr.ailment.statement import ConditionalJump, Label
+from angr.ailment.statement import ConditionalJump
 from .return_duplicator_base import ReturnDuplicatorBase
 from .optimization_pass import StructuringOptimizationPass
@@ -53,7 +53,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
         prevent_new_gotos: bool = True,
         minimize_copies_for_regions: bool = True,
         region_identifier=None,
-        vvar_id_start: int | None = None,
+        vvar_id_start: int = 0,
         scratch: dict[str, Any] | None = None,
         max_func_blocks: int = 500,
         **kwargs,
@@ -91,8 +91,9 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
         self,
         src: Block,
         dst: Block,
-        graph: networkx.DiGraph = None,
         max_level_check=1,
+        *,
+        graph: networkx.DiGraph,
     ):
         """
         TODO: Implement a more principled way of checking if an edge is a goto edge with Phoenix's structuring info
@@ -100,6 +101,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
         above a goto edge as the goto src.
         """
         # Do a simple and fast check first
+        assert self._goto_manager is not None
         is_simple_goto = self._goto_manager.is_goto_edge(src, dst)
         if is_simple_goto:
             return True
@@ -155,79 +157,6 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
                 # keep testing the next edge
                 node = succ
-            # Special case 3: In Phoenix, regions full of only if-stmts can be collapsed and moved. This causes
-            # the goto manager to report gotos that are at the top of the region instead of ones in the middle of it.
-            # Because of this, we need to gather all the nodes above the original src and check if any of them
-            # go to the destination. Additionally, we need to do this on the supergraph to get rid of
-            # goto edges that are removed by Phoenix.
-            # This case is observed in the test case `TestDecompiler.test_tail_tail_bytes_ret_dup`.
-            if self._supergraph is None:
-                return False
-            super_to_og_nodes = {n: self._supergraph.nodes[n]["original_nodes"] for n in self._supergraph.nodes}
-            og_to_super_nodes = {og: super_n for super_n, ogs in super_to_og_nodes.items() for og in ogs}
-            super_src = og_to_super_nodes.get(src)
-            super_dst = og_to_super_nodes.get(dst)
-            if super_src is None or super_dst is None:
-                return False
-            # collect all nodes which have only an if-stmt in them that are ancestors of super_src
-            check_blks = {super_src}
-            level_blocks = {super_src}
-            for _ in range(10):
-                done = False
-                if_blks = set()
-                for lblock in level_blocks:
-                    preds = list(self._supergraph.predecessors(lblock))
-                    for pred in preds:
-                        only_cond_jump = all(isinstance(s, (ConditionalJump, Label)) for s in pred.statements)
-                        if only_cond_jump:
-                            if_blks.add(pred)
-                    done = len(if_blks) == 0
-                if done:
-                    break
-                check_blks |= if_blks
-                level_blocks = if_blks
-            # convert all the found if-only super-blocks back into their original blocks
-            og_check_blocks = set()
-            for blk in check_blks:
-                og_check_blocks |= set(super_to_og_nodes[blk])
-            # check if any of the original blocks are gotos to the destination
-            goto_hits = 0
-            for block in og_check_blocks:
-                if self._goto_manager.is_goto_edge(block, dst):
-                    goto_hits += 1
-            # Although it is good to find a goto in the if-only block region, having more than a single goto
-            # existing that goes to the same dst is a bad sign. This can be seen in the the following test:
-            # TestDecompiler.test_dd_iread_ret_dup_region
-            #
-            # It occurs when you have something like:
-            # ```
-            # if (a || c)
-            #     goto target;
-            # target:
-            # return 0;
-            # ```
-            #
-            #
-            # This looks like an edge from (a, target) and (c, target) but it is actually a single edge.
-            # If you allow both to duplicate you get the following:
-            # ```
-            # if (a):
-            #    return
-            # if (c):
-            #    return
-            # ```
-            # This is not the desired behavior.
-            # So we need to check if there is only a single goto that goes to the destination.
-            return goto_hits == 1
         return False
     def _analyze(self, cache=None):

angr/analyses/decompiler/region_identifier.py CHANGED Viewed

@@ -43,6 +43,7 @@ class RegionIdentifier(Analysis):
         update_graph=True,
         largest_successor_tree_outside_loop=True,
         force_loop_single_exit=True,
+        refine_loops_with_single_successor=False,
         complete_successors=False,
         entry_node_addr: tuple[int, int | None] | None = None,
     ):
@@ -70,6 +71,7 @@ class RegionIdentifier(Analysis):
         self.regions_by_block_addrs = []
         self._largest_successor_tree_outside_loop = largest_successor_tree_outside_loop
         self._force_loop_single_exit = force_loop_single_exit
+        self._refine_loops_with_single_successor = refine_loops_with_single_successor
         self._complete_successors = complete_successors
         # we keep a dictionary of node and their traversal order in a quasi-topological traversal and update this
         # dictionary as we update the graph
@@ -265,13 +267,18 @@ class RegionIdentifier(Analysis):
         # special case: any node with more than two non-self successors are probably the head of a switch-case. we
         # should include all successors into the loop subgraph.
+        # we must be extra careful here to not include nodes that are reachable from outside the loop subgraph. an
+        # example is in binary 064e1d62c8542d658d83f7e231cc3b935a1f18153b8aea809dcccfd446a91c93, loop 0x40d7b0 should
+        # not include block 0x40d9d5 because this node has a out-of-loop-body predecessor (block 0x40d795).
         while True:
             updated = False
             for node in list(loop_subgraph):
                 nonself_successors = [succ for succ in graph.successors(node) if succ is not node]
                 if len(nonself_successors) > 2:
                     for succ in nonself_successors:
-                        if not loop_subgraph.has_edge(node, succ):
+                        if not loop_subgraph.has_edge(node, succ) and all(
+                            pred in loop_subgraph for pred in graph.predecessors(succ)
+                        ):
                             updated = True
                             loop_subgraph.add_edge(node, succ)
             if not updated:
@@ -280,7 +287,9 @@ class RegionIdentifier(Analysis):
         return set(loop_subgraph)
     def _refine_loop(self, graph: networkx.DiGraph, head, initial_loop_nodes, initial_exit_nodes):
-        if len(initial_exit_nodes) <= 1:
+        if (self._refine_loops_with_single_successor and len(initial_exit_nodes) == 0) or (
+            not self._refine_loops_with_single_successor and len(initial_exit_nodes) <= 1
+        ):
             return initial_loop_nodes, initial_exit_nodes
         refined_loop_nodes = initial_loop_nodes.copy()
@@ -713,7 +722,7 @@ class RegionIdentifier(Analysis):
         # visit the nodes in post-order
         region_created = False
-        for node in list(networkx.dfs_postorder_nodes(graph_copy, source=head)):
+        for node in list(GraphUtils.dfs_postorder_nodes_deterministic(graph_copy, head)):
             if node is dummy_endnode:
                 # skip the dummy endnode
                 continue

angr/analyses/decompiler/sequence_walker.py CHANGED Viewed

@@ -110,24 +110,28 @@ class SequenceWalker:
     def _handle_MultiNode(self, node, **kwargs):
         changed = False
-        nodes_copy = list(node.nodes)
+        nodes = node.nodes if self._update_seqnode_in_place else list(node.nodes)
         if self._force_forward_scan:
-            for i, node_ in enumerate(nodes_copy):
+            for i, node_ in enumerate(nodes):
                 new_node = self._handle(node_, parent=node, index=i)
                 if new_node is not None:
                     changed = True
-                    node.nodes[i] = new_node
+                    nodes[i] = new_node
         else:
-            i = len(nodes_copy) - 1
+            i = len(nodes) - 1
             while i > -1:
-                node_ = nodes_copy[i]
+                node_ = nodes[i]
                 new_node = self._handle(node_, parent=node, index=i)
                 if new_node is not None:
                     changed = True
-                    node.nodes[i] = new_node
+                    nodes[i] = new_node
                 i -= 1
-        return None if not changed else node
+        if not changed:
+            return None
+        if self._update_seqnode_in_place:
+            return node
+        return MultiNode(nodes, addr=node.addr, idx=node.idx)
     def _handle_SwitchCase(self, node, **kwargs):
         self._handle(node.switch_expr, parent=node, label="switch_expr")