PyPI - angr - Versions diffs - 9.2.125__py3-none-macosx_11_0_arm64.whl → 9.2.127__py3-none-macosx_11_0_arm64.whl - Mend

angr 9.2.125__py3-none-macosx_11_0_arm64.whl → 9.2.127__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (51) hide show

angr/__init__.py +1 -1
angr/analyses/__init__.py +4 -0
angr/analyses/analysis.py +8 -2
angr/analyses/cfg/cfg_fast.py +12 -1
angr/analyses/decompiler/ail_simplifier.py +1 -0
angr/analyses/decompiler/callsite_maker.py +9 -1
angr/analyses/decompiler/clinic.py +2 -1
angr/analyses/decompiler/condition_processor.py +109 -73
angr/analyses/decompiler/decompilation_cache.py +4 -0
angr/analyses/decompiler/decompiler.py +21 -3
angr/analyses/decompiler/dephication/graph_vvar_mapping.py +1 -2
angr/analyses/decompiler/optimization_passes/__init__.py +15 -1
angr/analyses/decompiler/return_maker.py +1 -0
angr/analyses/decompiler/ssailification/rewriting.py +4 -0
angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
angr/analyses/decompiler/ssailification/traversal.py +1 -0
angr/analyses/decompiler/ssailification/traversal_engine.py +15 -0
angr/analyses/decompiler/structured_codegen/c.py +18 -5
angr/analyses/decompiler/structured_codegen/dwarf_import.py +4 -1
angr/analyses/deobfuscator/__init__.py +18 -0
angr/analyses/deobfuscator/api_obf_finder.py +313 -0
angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
angr/analyses/deobfuscator/string_obf_finder.py +774 -0
angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
angr/analyses/reaching_definitions/function_handler_library/string.py +2 -2
angr/analyses/s_liveness.py +3 -3
angr/analyses/s_propagator.py +74 -3
angr/analyses/unpacker/__init__.py +6 -0
angr/analyses/unpacker/obfuscation_detector.py +103 -0
angr/analyses/unpacker/packing_detector.py +138 -0
angr/angrdb/models.py +2 -1
angr/angrdb/serializers/kb.py +3 -3
angr/angrdb/serializers/structured_code.py +5 -3
angr/calling_conventions.py +4 -2
angr/engines/vex/claripy/irop.py +10 -5
angr/knowledge_base.py +1 -1
angr/knowledge_plugins/__init__.py +2 -2
angr/knowledge_plugins/obfuscations.py +36 -0
angr/knowledge_plugins/structured_code.py +1 -1
angr/lib/angr_native.dylib +0 -0
angr/utils/ssa/__init__.py +8 -3
{angr-9.2.125.dist-info → angr-9.2.127.dist-info}/METADATA +6 -6
{angr-9.2.125.dist-info → angr-9.2.127.dist-info}/RECORD +50 -40
{angr-9.2.125.dist-info → angr-9.2.127.dist-info}/WHEEL +1 -1
angr/knowledge_plugins/decompilation.py +0 -45
{angr-9.2.125.dist-info → angr-9.2.127.dist-info}/LICENSE +0 -0
{angr-9.2.125.dist-info → angr-9.2.127.dist-info}/entry_points.txt +0 -0
{angr-9.2.125.dist-info → angr-9.2.127.dist-info}/top_level.txt +0 -0

angr/analyses/deobfuscator/string_obf_opt_passes.py ADDED Viewed

@@ -0,0 +1,133 @@
+# pylint:disable=too-many-boolean-expressions
+from __future__ import annotations
+import archinfo
+from ailment import Block
+from ailment.statement import Statement, Call, Assignment
+from ailment.expression import Const, Register, VirtualVariable
+from angr.analyses.decompiler.optimization_passes.optimization_pass import OptimizationPass, OptimizationPassStage
+from angr.analyses.decompiler.optimization_passes import register_optimization_pass
+WIN64_REG_ARGS = {
+    archinfo.ArchAMD64().registers["rcx"][0],
+    archinfo.ArchAMD64().registers["rdx"][0],
+    archinfo.ArchAMD64().registers["r8"][0],
+    archinfo.ArchAMD64().registers["r9"][0],
+}
+class StringObfType3Rewriter(OptimizationPass):
+    """
+    Type-3 optimization pass replaces deobfuscate_string calls with the deobfuscated strings, and then removes
+    arguments on the stack.
+    """
+    ARCHES = ["X86", "AMD64"]
+    PLATFORMS = ["windows"]
+    STAGE = OptimizationPassStage.AFTER_MAKING_CALLSITES
+    NAME = "Simplify Type 3 string deobfuscation calls"
+    DESCRIPTION = "Simplify Type 3 string deobfuscation calls"
+    stmt_classes = ()
+    def __init__(self, func, **kwargs):
+        super().__init__(func, **kwargs)
+        self.analyze()
+    def _check(self):
+        if self.kb.obfuscations.type3_deobfuscated_strings:
+            return True, None
+        return False, None
+    @staticmethod
+    def is_call_or_call_assignment(stmt) -> bool:
+        return isinstance(stmt, Call) or isinstance(stmt, Assignment) and isinstance(stmt.src, Call)
+    def _analyze(self, cache=None):
+        # find all blocks with type-3 deobfuscation calls
+        for block in list(self._graph):
+            if not block.statements:
+                continue
+            last_stmt = block.statements[-1]
+            if (
+                self.is_call_or_call_assignment(last_stmt)
+                and last_stmt.ins_addr in self.kb.obfuscations.type3_deobfuscated_strings
+            ):
+                new_block = self._process_block(
+                    block, self.kb.obfuscations.type3_deobfuscated_strings[block.statements[-1].ins_addr]
+                )
+                if new_block is not None:
+                    self._update_block(block, new_block)
+    def _process_block(self, block: Block, deobf_content: bytes):
+        # FIXME: This rewriter is very specific to the implementation of the deobfuscation scheme. we can make it more
+        # generic when there are more cases available in the wild.
+        # TODO: Support multiple blocks
+        # replace the call
+        old_stmt: Statement = block.statements[-1]
+        str_id = self.kb.custom_strings.allocate(deobf_content)
+        old_call: Call = old_stmt.src if isinstance(old_stmt, Assignment) else old_stmt
+        new_call = Call(
+            old_call.idx,
+            "init_str",
+            args=[
+                old_call.args[0],
+                Const(None, None, str_id, self.project.arch.bits, custom_string=True),
+                Const(None, None, len(deobf_content), self.project.arch.bits),
+            ],
+            ret_expr=old_call.ret_expr,
+            bits=old_call.bits,
+            **old_call.tags,
+        )
+        if isinstance(old_stmt, Assignment):
+            new_stmt = Assignment(old_stmt.idx, old_stmt.dst, new_call, **old_stmt.tags)
+        else:
+            new_stmt = new_call
+        statements = block.statements[:-1] + [new_stmt]
+        # remove N-2 continuous stack assignment
+        if len(deobf_content) > 2:
+            stack_offset_to_stmtid: dict[int, int] = {}
+            for idx, stmt in enumerate(statements):
+                if (
+                    isinstance(stmt, Assignment)
+                    and isinstance(stmt.dst, VirtualVariable)
+                    and stmt.dst.was_stack
+                    and isinstance(stmt.dst.stack_offset, int)
+                    and isinstance(stmt.src, Const)
+                    and stmt.src.value <= 0xFF
+                ):
+                    stack_offset_to_stmtid[stmt.dst.stack_offset] = idx
+            sorted_offsets = sorted(stack_offset_to_stmtid)
+            if sorted_offsets:
+                spacing = 8  # FIXME: Make it adjustable
+                distance = min(len(deobf_content) - 2, len(sorted_offsets) - 1)
+                for start_idx in range(len(sorted_offsets) - distance):
+                    if sorted_offsets[start_idx] + spacing * distance == sorted_offsets[start_idx + distance]:
+                        # found them
+                        # remove these statements
+                        for i in range(start_idx, start_idx + distance + 1):
+                            statements[stack_offset_to_stmtid[sorted_offsets[i]]] = None
+                        break
+                statements = [stmt for stmt in statements if stmt is not None]
+        # remove writes to rdx, rcx, r8, and r9
+        if self.project.arch.name == "AMD64":
+            statements = [stmt for stmt in statements if not self._stmt_sets_win64_reg_arg(stmt)]
+        # return the new block
+        return block.copy(statements=statements)
+    @staticmethod
+    def _stmt_sets_win64_reg_arg(stmt) -> bool:
+        return isinstance(stmt, Assignment) and isinstance(stmt.dst, Register) and stmt.dst.reg_offset in WIN64_REG_ARGS
+register_optimization_pass(StringObfType3Rewriter, presets=["fast", "full"])

angr/analyses/deobfuscator/string_obf_peephole_optimizer.py ADDED Viewed

@@ -0,0 +1,47 @@
+from __future__ import annotations
+from ailment.statement import Call
+from ailment.expression import Const
+import claripy
+from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
+from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
+from angr.errors import AngrCallableMultistateError
+class StringObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
+    """
+    Integrate type-1 deobfuscated strings into decompilation output.
+    """
+    __slots__ = ()
+    NAME = "Simplify Type 1/2 string deobfuscation references"
+    expr_classes = (Call,)
+    def optimize(self, expr: Call, **kwargs):
+        if isinstance(expr.target, Const) and (  # noqa: SIM102
+            expr.target.value in self.kb.obfuscations.type1_string_loader_candidates
+            or expr.target.value in self.kb.obfuscations.type2_string_loader_candidates
+        ):
+            # this is a function calling a type1 or a type2 string loader
+            # optimize this call away if possible
+            if expr.args and all(isinstance(arg, Const) for arg in expr.args):
+                # execute the function with the given argument
+                func = self.kb.functions[expr.target.value]
+                func_call = self.project.factory.callable(
+                    expr.target.value, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
+                )
+                try:
+                    out = func_call(*[claripy.BVV(arg.value, arg.bits) for arg in expr.args])
+                except AngrCallableMultistateError:
+                    return None
+                if out.concrete:
+                    return Const(
+                        None, None, out.concrete_value, self.project.arch.bits, **expr.tags
+                    )  # FIXME: use out.bits when the function prototype recovery is more reliable
+        return None
+EXPR_OPTS.append(StringObfType1PeepholeOptimizer)

angr/analyses/reaching_definitions/function_handler_library/stdio.py CHANGED Viewed

@@ -197,6 +197,11 @@ def handle_printf(
             buf_data = state.get_values(buf_atoms)
             if buf_data is not None:
                 buf_data = buf_data.extract(0, len(buf_data) // 8 - 1, archinfo.Endness.BE)
+            else:
+                top_val = state.top(state.arch.bits)
+                for defn in state.get_definitions(atom):
+                    top_val = state.annotate_with_def(top_val, defn)
+                buf_data = MultiValues(top_val)
         elif fmt == "%u":
             buf_atoms = atom
             buf_data = state.get_concrete_value(buf_atoms)
@@ -217,7 +222,9 @@ def handle_printf(
         else:
             _l.warning("Unimplemented printf format string %s", fmt)
             buf_atoms = set()
-            buf_data = None
+            top_val = state.top(state.arch.bits)
+            buf_data = MultiValues(top_val)
         if result is not None and buf_data is not None:
             result = result.concat(buf_data)
         source_atoms.update(buf_atoms)

angr/analyses/reaching_definitions/function_handler_library/string.py CHANGED Viewed

@@ -46,8 +46,8 @@ class LibcStringHandlers(FunctionHandler):
     @FunctionCallDataUnwrapped.decorate
     def handle_impl_strncpy(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
-        n = state.get_concrete_value(data.args_atoms[1])
-        src_atom = state.deref(data.args_atoms[2], DerefSize.NULL_TERMINATE if n is None else n)
+        n = state.get_concrete_value(data.args_atoms[2])
+        src_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE if n is None else n)
         src_str = state.get_values(src_atom)
         if src_str is not None:
             dst_atom = state.deref(data.args_atoms[0], len(src_str) // 8)

angr/analyses/s_liveness.py CHANGED Viewed

@@ -5,7 +5,7 @@ from ailment.expression import VirtualVariable
 from ailment.statement import Assignment
 from angr.analyses import Analysis, register_analysis
-from angr.utils.ssa import is_phi_assignment, VVarUsesCollector
+from angr.utils.ssa import VVarUsesCollector, phi_assignment_get_src
 class SLivenessModel:
@@ -85,8 +85,8 @@ class SLivenessAnalysis(Analysis):
                 if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
                     live.discard(stmt.dst.varid)
-                r, phi_expr = is_phi_assignment(stmt)
-                if r:
+                phi_expr = phi_assignment_get_src(stmt)
+                if phi_expr is not None:
                     for src, vvar in phi_expr.src_and_vvars:
                         if src not in live_in_by_pred:
                             live_in_by_pred[src] = live.copy()

angr/analyses/s_propagator.py CHANGED Viewed

@@ -4,7 +4,7 @@ import contextlib
 from collections import defaultdict
 from ailment.block import Block
-from ailment.expression import Const, VirtualVariable, VirtualVariableCategory, StackBaseOffset
+from ailment.expression import Const, VirtualVariable, VirtualVariableCategory, StackBaseOffset, Load, Convert
 from ailment.statement import Assignment, Store, Return, Jump
 from angr.knowledge_plugins.functions import Function
@@ -21,6 +21,7 @@ from angr.utils.ssa import (
     is_const_vvar_tmp_assignment,
     get_tmp_uselocs,
     get_tmp_deflocs,
+    phi_assignment_get_src,
 )
@@ -129,8 +130,8 @@ class SPropagatorAnalysis(Analysis):
                     replacements[useloc][vvar_at_use] = v
                 continue
-            r, v = is_phi_assignment(stmt)
-            if r:
+            v = phi_assignment_get_src(stmt)
+            if v is not None:
                 src_varids = {vvar.varid if vvar is not None else None for _, vvar in v.src_and_vvars}
                 if None not in src_varids and all(varid in const_vvars for varid in src_varids):
                     src_values = {
@@ -182,6 +183,31 @@ class SPropagatorAnalysis(Analysis):
                         # this vvar is used once if we exclude its uses at ret sites or jump sites. we can propagate it
                         for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
                             replacements[vvar_useloc][vvar_used] = stmt.src
+                        continue
+                # special logic for global variables: if it's used once or multiple times, and the variable is never
+                # updated before it's used, we will propagate the load
+                if isinstance(stmt, Assignment):
+                    stmt_src = stmt.src
+                    # unpack conversions
+                    while isinstance(stmt_src, Convert):
+                        stmt_src = stmt_src.operand
+                    if isinstance(stmt_src, Load) and isinstance(stmt_src.addr, Const):
+                        gv_updated = False
+                        for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
+                            gv_updated |= self.is_global_variable_updated(
+                                self.func_graph,
+                                blocks,
+                                vvar.varid,
+                                stmt_src.addr.value,
+                                stmt_src.size,
+                                defloc,
+                                vvar_useloc,
+                            )
+                        if not gv_updated:
+                            for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
+                                replacements[vvar_useloc][vvar_used] = stmt.src
+                            continue
         for vvar_id, uselocs in vvar_uselocs.items():
             vvar = next(iter(uselocs))[0] if vvar_id not in vvarid_to_vvar else vvarid_to_vvar[vvar_id]
@@ -257,5 +283,50 @@ class SPropagatorAnalysis(Analysis):
         self.model.replacements = replacements
+    @staticmethod
+    def is_global_variable_updated(
+        func_graph, block_dict, varid: int, gv_addr: int, gv_size: int, defloc: CodeLocation, useloc: CodeLocation
+    ) -> bool:
+        defblock = block_dict[(defloc.block_addr, defloc.block_idx)]
+        useblock = block_dict[(useloc.block_addr, useloc.block_idx)]
+        # traverse a graph slice from the def block to the use block and check if the global variable is updated
+        seen = {defblock}
+        queue = [defblock]
+        while queue:
+            block = queue.pop(0)
+            start_stmt_idx = defloc.stmt_idx if block is defblock else 0  # inclusive
+            end_stmt_idx = useloc.stmt_idx if block is useblock else len(block.statements)  # exclusive
+            for idx in range(start_stmt_idx, end_stmt_idx):
+                stmt = block.statements[idx]
+                if isinstance(stmt, Store) and isinstance(stmt.addr, Const):
+                    store_addr = stmt.addr.value
+                    store_size = stmt.size
+                    if gv_addr <= store_addr < gv_addr + gv_size or store_addr <= gv_addr < store_addr + store_size:
+                        return True
+            if block is useblock:
+                continue
+            for succ in func_graph.successors(block):
+                if succ not in seen:
+                    abort_path = False
+                    for stmt in succ.statements:
+                        if is_phi_assignment(stmt) and any(
+                            vvar.varid == varid for _, vvar in stmt.src.src_and_vvars if vvar is not None
+                        ):
+                            # the virtual variable is no longer live after this point
+                            abort_path = True
+                            break
+                    if abort_path:
+                        continue
+                    seen.add(succ)
+                    queue.append(succ)
+        return False
 register_analysis(SPropagatorAnalysis, "SPropagator")

angr/analyses/unpacker/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+from .packing_detector import PackingDetector
+from .obfuscation_detector import ObfuscationDetector
+__all__ = ("PackingDetector", "ObfuscationDetector")

angr/analyses/unpacker/obfuscation_detector.py ADDED Viewed

@@ -0,0 +1,103 @@
+from __future__ import annotations
+import logging
+import networkx
+from angr.analyses.analysis import Analysis, AnalysesHub
+from angr.knowledge_plugins.cfg import CFGModel
+_l = logging.getLogger(__name__)
+class ObfuscationDetector(Analysis):
+    """
+    This analysis detects, usually in ways that are more robust than section name matching or signature matching, the
+    existence of obfuscation techniques in a binary.
+    """
+    def __init__(self, cfg: CFGModel | None = None):
+        self.obfuscated: bool = False
+        self.possible_obfuscators: list[str] = []
+        if cfg is None:
+            _l.warning(
+                "PackingDetector is using a most accurate CFG model in the knowledge base. We assume it is "
+                "generated with force_smart_scan=False and force_complete_scan=False."
+            )
+            self._cfg = self.kb.cfgs.get_most_accurate()
+        else:
+            self._cfg = cfg
+        self.analyze()
+    def analyze(self):
+        analysis_routines = [
+            self._analyze_vmprotect,
+        ]
+        for routine in analysis_routines:
+            tool = routine()
+            if tool:
+                self.obfuscated = True
+                self.possible_obfuscators.append(tool)
+    def _analyze_vmprotect(self) -> str | None:
+        """
+        We detect VMProtect v3 (with control-flow obfuscation) based on two main characteristics:
+        - In amd64 binaries, there exists a strongly connected component in the call graph with over 1,000 nodes.
+          Edge/node ratio is >= 1.3
+        - There is a high number of pushf and popf instructions in the visible functions.
+        """
+        high_scc_node_edge_ratio = False
+        high_pushf = False
+        high_popf = False
+        high_clc = False  # pylint:disable=unused-variable
+        if self.project.arch.name == "AMD64":
+            cg = self.kb.functions.callgraph
+            sccs = networkx.strongly_connected_components(cg)
+            for scc in sccs:
+                subgraph = networkx.subgraph(cg, scc)
+                node_count = len(scc)
+                if node_count > 1000:
+                    edge_count = len(subgraph.edges)
+                    if edge_count / node_count >= 1.3:
+                        high_scc_node_edge_ratio = True
+                        break
+        else:
+            high_scc_node_edge_ratio = True
+        pushf_ctr = 0
+        popf_ctr = 0
+        clc_ctr = 0  # only used for x86
+        is_x86 = self.project.arch.name == "X86"
+        cfg_node_count = len(self._cfg.graph)
+        for node in self._cfg.nodes():
+            if node.size > 0 and node.instruction_addrs:
+                block = node.block
+                for insn in block.capstone.insns:
+                    if insn.mnemonic in {"pushf", "pushfd", "pushfq"}:
+                        pushf_ctr += 1
+                    elif insn.mnemonic in {"popf", "popfd", "popfq"}:
+                        popf_ctr += 1
+                    elif is_x86 and insn.mnemonic == "clc":
+                        clc_ctr += 1
+        if pushf_ctr > cfg_node_count * 0.002:
+            high_pushf = True
+        if popf_ctr > cfg_node_count * 0.002:
+            high_popf = True
+        if not is_x86 or clc_ctr > cfg_node_count * 0.002:
+            high_clc = True  # noqa: F841
+        if high_scc_node_edge_ratio and high_pushf and high_popf:
+            return "vmprotect"
+        return None
+AnalysesHub.register_default("ObfuscationDetector", ObfuscationDetector)

angr/analyses/unpacker/packing_detector.py ADDED Viewed

@@ -0,0 +1,138 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import math
+import logging
+from angr.analyses.analysis import Analysis, AnalysesHub
+from angr.knowledge_plugins.cfg import CFGModel
+if TYPE_CHECKING:
+    from cle import Section
+_l = logging.getLogger(__name__)
+class PackingDetector(Analysis):
+    """
+    This analysis detects if a binary is likely packed or not. We may extend it to identify which packer is in use in
+    the future.
+    """
+    PACKED_MIN_BYTES = 256
+    PACKED_ENTROPY_MIN_THRESHOLD = 0.88
+    def __init__(self, cfg: CFGModel | None = None, region_size_threshold: int = 0x20):
+        self.packed: bool = False
+        self.region_size_threshold: int = region_size_threshold
+        if cfg is None:
+            _l.warning(
+                "PackingDetector is using a most accurate CFG model in the knowledge base. We assume it is "
+                "generated with force_smart_scan=False and force_complete_scan=False."
+            )
+            self._cfg = self.kb.cfgs.get_most_accurate()
+        else:
+            self._cfg = cfg
+        self.analyze()
+    def analyze(self):
+        # assume we already have a CFG with complete scanning disabled
+        # collect all regions that are not covered by the CFG in r+x sections, and then compute the entropy. we believe
+        # the binary is packed if it is beyond a threshold
+        covered_regions: list[tuple[int, int]] = []
+        last_known_section: Section | None = None
+        for node in sorted(self._cfg.nodes(), key=lambda n: n.addr):
+            section = None
+            if last_known_section is not None and last_known_section.contains_addr(node.addr):
+                section = last_known_section
+            if section is None:
+                section = self.project.loader.find_section_containing(node.addr)
+                if section is None:
+                    # this node does not belong to any known section - ignore it
+                    continue
+                if section.is_readable and section.is_executable:
+                    last_known_section = section
+            if section is None:
+                # the node does not belong to any section. ignore it
+                continue
+            if node.size == 0:
+                # ignore empty nodes
+                continue
+            if not covered_regions:
+                covered_regions.append((node.addr, node.addr + node.size))
+            else:
+                last_item = covered_regions[-1]
+                if last_item[0] <= node.addr <= last_item[1] < node.addr + node.size:
+                    # update the last item
+                    covered_regions[-1] = last_item[0], node.addr + node.size
+                else:
+                    # add a new item
+                    covered_regions.append((node.addr, node.addr + node.size))
+        # now we get the uncovered regions
+        uncovered_regions: list[tuple[int, int]] = self._get_uncovered_regions(covered_regions)
+        # compute entropy
+        total_bytes, entropy = self._compute_entropy(uncovered_regions)
+        self.packed = total_bytes >= self.PACKED_MIN_BYTES and entropy >= self.PACKED_ENTROPY_MIN_THRESHOLD
+    def _get_uncovered_regions(self, covered_regions: list[tuple[int, int]]) -> list[tuple[int, int]]:
+        # FIXME: We only support binaries with sections. Add support for segments in the future
+        all_executable_sections = [
+            sec
+            for sec in self.project.loader.main_object.sections
+            if sec.is_executable and sec.is_readable and not sec.only_contains_uninitialized_data
+        ]
+        all_executable_sections = sorted(all_executable_sections, key=lambda sec: sec.vaddr)
+        idx = 0
+        uncovered_regions: list[tuple[int, int]] = []
+        for section in all_executable_sections:
+            if idx >= len(covered_regions):
+                if section.memsize > self.region_size_threshold:
+                    uncovered_regions.append((section.vaddr, section.vaddr + section.memsize))
+            else:
+                i = idx
+                last_end = section.vaddr
+                while i < len(covered_regions):
+                    region_start, region_end = covered_regions[i]
+                    if region_end >= section.vaddr + section.memsize:
+                        # move on to the next section
+                        break
+                    if last_end < region_start and region_start - last_end > self.region_size_threshold:
+                        uncovered_regions.append((last_end, region_start))
+                    i += 1
+                    last_end = max(last_end, region_end)
+                idx = i
+        return uncovered_regions
+    def _compute_entropy(self, regions: list[tuple[int, int]]) -> tuple[int, float]:
+        byte_counts = [0] * 256
+        for start, end in regions:
+            for b in self.project.loader.memory.load(start, end - start):
+                byte_counts[b] += 1
+        total = sum(byte_counts)
+        if total == 0:
+            return 0, 0.0
+        entropy = 0.0
+        for count in byte_counts:
+            if count == 0:
+                continue
+            p = 1.0 * count / total
+            entropy -= p * math.log(p, 256)
+        return total, entropy
+AnalysesHub.register_default("PackingDetector", PackingDetector)

angr/angrdb/models.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from __future__ import annotations
-from sqlalchemy import Column, Integer, String, Boolean, BLOB, ForeignKey
+from sqlalchemy import Column, Integer, String, Boolean, BLOB, TEXT, ForeignKey
 from sqlalchemy.orm import declarative_base, relationship
 Base = declarative_base()
@@ -127,6 +127,7 @@ class DbStructuredCode(Base):
     configuration = Column(BLOB, nullable=True)
     const_formats = Column(BLOB, nullable=True)
     ite_exprs = Column(BLOB, nullable=True)
+    errors = Column(TEXT, nullable=True)
 class DbXRefs(Base):

angr/angrdb/serializers/kb.py CHANGED Viewed

@@ -16,7 +16,7 @@ class KnowledgeBaseSerializer:
     """
     @staticmethod
-    def dump(session, kb):
+    def dump(session, kb: KnowledgeBase):
         """
         :param session:             The database session object.
@@ -40,7 +40,7 @@ class KnowledgeBaseSerializer:
         CommentsSerializer.dump(session, db_kb, kb.comments)
         LabelsSerializer.dump(session, db_kb, kb.labels)
         VariableManagerSerializer.dump(session, db_kb, kb.variables)
-        StructuredCodeManagerSerializer.dump(session, db_kb, kb.structured_code)
+        StructuredCodeManagerSerializer.dump(session, db_kb, kb.decompilations)
     @staticmethod
     def load(session, project, name):
@@ -89,7 +89,7 @@ class KnowledgeBaseSerializer:
         # Load structured code
         structured_code = StructuredCodeManagerSerializer.load(session, db_kb, kb)
         if structured_code is not None:
-            kb.structured_code = structured_code
+            kb.decompilations = structured_code
         if cfg_model is not None:
             # CFG may not exist for all knowledge bases

angr/angrdb/serializers/structured_code.py CHANGED Viewed

@@ -37,15 +37,15 @@ class StructuredCodeManagerSerializer:
             # TODO: Cache types
             expr_comments = None
-            if cache.codegen.expr_comments:
+            if cache.codegen is not None and cache.codegen.expr_comments:
                 expr_comments = json.dumps(cache.codegen.expr_comments).encode("utf-8")
             stmt_comments = None
-            if cache.codegen.stmt_comments:
+            if cache.codegen is not None and cache.codegen.stmt_comments:
                 stmt_comments = json.dumps(cache.codegen.stmt_comments).encode("utf-8")
             const_formats = None
-            if cache.codegen.const_formats:
+            if cache.codegen is not None and cache.codegen.const_formats:
                 const_formats = pickle.dumps(cache.codegen.const_formats)
             ite_exprs = None
@@ -60,6 +60,7 @@ class StructuredCodeManagerSerializer:
                 stmt_comments=stmt_comments,
                 const_formats=const_formats,
                 ite_exprs=ite_exprs,
+                errors="\n\n\n".join(cache.errors),
                 # configuration=configuration,
             )
             session.add(db_code)
@@ -118,6 +119,7 @@ class StructuredCodeManagerSerializer:
             cache = DecompilationCache(db_code.func_addr)
             cache.codegen = dummy_codegen
             cache.ite_exprs = ite_exprs
+            cache.errors = db_code.errors.split("\n\n\n")
             manager[(db_code.func_addr, db_code.flavor)] = cache
         return manager