angr 9.2.124__py3-none-macosx_11_0_arm64.whl → 9.2.126__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/__init__.py +13 -1
- angr/analyses/codecave.py +77 -0
- angr/analyses/decompiler/ail_simplifier.py +1 -0
- angr/analyses/decompiler/callsite_maker.py +9 -1
- angr/analyses/decompiler/clinic.py +32 -2
- angr/analyses/decompiler/condition_processor.py +104 -66
- angr/analyses/decompiler/decompiler.py +7 -0
- angr/analyses/decompiler/optimization_passes/__init__.py +18 -1
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +6 -0
- angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
- angr/analyses/decompiler/return_maker.py +1 -0
- angr/analyses/decompiler/ssailification/rewriting.py +4 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
- angr/analyses/decompiler/structured_codegen/c.py +18 -2
- angr/analyses/deobfuscator/__init__.py +18 -0
- angr/analyses/deobfuscator/api_obf_finder.py +313 -0
- angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
- angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
- angr/analyses/deobfuscator/string_obf_finder.py +774 -0
- angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
- angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
- angr/analyses/patchfinder.py +137 -0
- angr/analyses/pathfinder.py +282 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
- angr/analyses/smc.py +159 -0
- angr/analyses/unpacker/__init__.py +6 -0
- angr/analyses/unpacker/obfuscation_detector.py +103 -0
- angr/analyses/unpacker/packing_detector.py +138 -0
- angr/angrdb/models.py +1 -2
- angr/calling_conventions.py +3 -1
- angr/engines/vex/claripy/irop.py +10 -5
- angr/engines/vex/heavy/heavy.py +2 -0
- angr/exploration_techniques/spiller_db.py +1 -2
- angr/knowledge_plugins/__init__.py +2 -0
- angr/knowledge_plugins/functions/function.py +4 -0
- angr/knowledge_plugins/functions/function_manager.py +18 -9
- angr/knowledge_plugins/functions/function_parser.py +1 -1
- angr/knowledge_plugins/functions/soot_function.py +1 -0
- angr/knowledge_plugins/obfuscations.py +36 -0
- angr/lib/angr_native.dylib +0 -0
- angr/misc/ux.py +2 -2
- angr/project.py +17 -1
- angr/state_plugins/history.py +6 -4
- angr/utils/bits.py +4 -0
- angr/utils/tagged_interval_map.py +112 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/RECORD +53 -36
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# pylint:disable=too-many-boolean-expressions
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import archinfo
|
|
5
|
+
|
|
6
|
+
from ailment import Block
|
|
7
|
+
from ailment.statement import Statement, Call, Assignment
|
|
8
|
+
from ailment.expression import Const, Register, VirtualVariable
|
|
9
|
+
|
|
10
|
+
from angr.analyses.decompiler.optimization_passes.optimization_pass import OptimizationPass, OptimizationPassStage
|
|
11
|
+
from angr.analyses.decompiler.optimization_passes import register_optimization_pass
|
|
12
|
+
|
|
13
|
+
WIN64_REG_ARGS = {
|
|
14
|
+
archinfo.ArchAMD64().registers["rcx"][0],
|
|
15
|
+
archinfo.ArchAMD64().registers["rdx"][0],
|
|
16
|
+
archinfo.ArchAMD64().registers["r8"][0],
|
|
17
|
+
archinfo.ArchAMD64().registers["r9"][0],
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class StringObfType3Rewriter(OptimizationPass):
|
|
22
|
+
"""
|
|
23
|
+
Type-3 optimization pass replaces deobfuscate_string calls with the deobfuscated strings, and then removes
|
|
24
|
+
arguments on the stack.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
ARCHES = ["X86", "AMD64"]
|
|
28
|
+
PLATFORMS = ["windows"]
|
|
29
|
+
STAGE = OptimizationPassStage.AFTER_MAKING_CALLSITES
|
|
30
|
+
|
|
31
|
+
NAME = "Simplify Type 3 string deobfuscation calls"
|
|
32
|
+
DESCRIPTION = "Simplify Type 3 string deobfuscation calls"
|
|
33
|
+
stmt_classes = ()
|
|
34
|
+
|
|
35
|
+
def __init__(self, func, **kwargs):
|
|
36
|
+
super().__init__(func, **kwargs)
|
|
37
|
+
|
|
38
|
+
self.analyze()
|
|
39
|
+
|
|
40
|
+
def _check(self):
|
|
41
|
+
if self.kb.obfuscations.type3_deobfuscated_strings:
|
|
42
|
+
return True, None
|
|
43
|
+
return False, None
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def is_call_or_call_assignment(stmt) -> bool:
|
|
47
|
+
return isinstance(stmt, Call) or isinstance(stmt, Assignment) and isinstance(stmt.src, Call)
|
|
48
|
+
|
|
49
|
+
def _analyze(self, cache=None):
|
|
50
|
+
|
|
51
|
+
# find all blocks with type-3 deobfuscation calls
|
|
52
|
+
for block in list(self._graph):
|
|
53
|
+
if not block.statements:
|
|
54
|
+
continue
|
|
55
|
+
last_stmt = block.statements[-1]
|
|
56
|
+
if (
|
|
57
|
+
self.is_call_or_call_assignment(last_stmt)
|
|
58
|
+
and last_stmt.ins_addr in self.kb.obfuscations.type3_deobfuscated_strings
|
|
59
|
+
):
|
|
60
|
+
new_block = self._process_block(
|
|
61
|
+
block, self.kb.obfuscations.type3_deobfuscated_strings[block.statements[-1].ins_addr]
|
|
62
|
+
)
|
|
63
|
+
if new_block is not None:
|
|
64
|
+
self._update_block(block, new_block)
|
|
65
|
+
|
|
66
|
+
def _process_block(self, block: Block, deobf_content: bytes):
|
|
67
|
+
# FIXME: This rewriter is very specific to the implementation of the deobfuscation scheme. we can make it more
|
|
68
|
+
# generic when there are more cases available in the wild.
|
|
69
|
+
|
|
70
|
+
# TODO: Support multiple blocks
|
|
71
|
+
|
|
72
|
+
# replace the call
|
|
73
|
+
old_stmt: Statement = block.statements[-1]
|
|
74
|
+
str_id = self.kb.custom_strings.allocate(deobf_content)
|
|
75
|
+
old_call: Call = old_stmt.src if isinstance(old_stmt, Assignment) else old_stmt
|
|
76
|
+
new_call = Call(
|
|
77
|
+
old_call.idx,
|
|
78
|
+
"init_str",
|
|
79
|
+
args=[
|
|
80
|
+
old_call.args[0],
|
|
81
|
+
Const(None, None, str_id, self.project.arch.bits, custom_string=True),
|
|
82
|
+
Const(None, None, len(deobf_content), self.project.arch.bits),
|
|
83
|
+
],
|
|
84
|
+
ret_expr=old_call.ret_expr,
|
|
85
|
+
bits=old_call.bits,
|
|
86
|
+
**old_call.tags,
|
|
87
|
+
)
|
|
88
|
+
if isinstance(old_stmt, Assignment):
|
|
89
|
+
new_stmt = Assignment(old_stmt.idx, old_stmt.dst, new_call, **old_stmt.tags)
|
|
90
|
+
else:
|
|
91
|
+
new_stmt = new_call
|
|
92
|
+
|
|
93
|
+
statements = block.statements[:-1] + [new_stmt]
|
|
94
|
+
|
|
95
|
+
# remove N-2 continuous stack assignment
|
|
96
|
+
if len(deobf_content) > 2:
|
|
97
|
+
stack_offset_to_stmtid: dict[int, int] = {}
|
|
98
|
+
for idx, stmt in enumerate(statements):
|
|
99
|
+
if (
|
|
100
|
+
isinstance(stmt, Assignment)
|
|
101
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
102
|
+
and stmt.dst.was_stack
|
|
103
|
+
and isinstance(stmt.dst.stack_offset, int)
|
|
104
|
+
and isinstance(stmt.src, Const)
|
|
105
|
+
and stmt.src.value <= 0xFF
|
|
106
|
+
):
|
|
107
|
+
stack_offset_to_stmtid[stmt.dst.stack_offset] = idx
|
|
108
|
+
sorted_offsets = sorted(stack_offset_to_stmtid)
|
|
109
|
+
if sorted_offsets:
|
|
110
|
+
spacing = 8 # FIXME: Make it adjustable
|
|
111
|
+
distance = min(len(deobf_content) - 2, len(sorted_offsets) - 1)
|
|
112
|
+
for start_idx in range(len(sorted_offsets) - distance):
|
|
113
|
+
if sorted_offsets[start_idx] + spacing * distance == sorted_offsets[start_idx + distance]:
|
|
114
|
+
# found them
|
|
115
|
+
# remove these statements
|
|
116
|
+
for i in range(start_idx, start_idx + distance + 1):
|
|
117
|
+
statements[stack_offset_to_stmtid[sorted_offsets[i]]] = None
|
|
118
|
+
break
|
|
119
|
+
statements = [stmt for stmt in statements if stmt is not None]
|
|
120
|
+
|
|
121
|
+
# remove writes to rdx, rcx, r8, and r9
|
|
122
|
+
if self.project.arch.name == "AMD64":
|
|
123
|
+
statements = [stmt for stmt in statements if not self._stmt_sets_win64_reg_arg(stmt)]
|
|
124
|
+
|
|
125
|
+
# return the new block
|
|
126
|
+
return block.copy(statements=statements)
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def _stmt_sets_win64_reg_arg(stmt) -> bool:
|
|
130
|
+
return isinstance(stmt, Assignment) and isinstance(stmt.dst, Register) and stmt.dst.reg_offset in WIN64_REG_ARGS
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
register_optimization_pass(StringObfType3Rewriter, presets=["fast", "full"])
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from ailment.statement import Call
|
|
3
|
+
from ailment.expression import Const
|
|
4
|
+
import claripy
|
|
5
|
+
|
|
6
|
+
from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
|
|
7
|
+
from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
|
|
8
|
+
from angr.errors import AngrCallableMultistateError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class StringObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
|
|
12
|
+
"""
|
|
13
|
+
Integrate type-1 deobfuscated strings into decompilation output.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__slots__ = ()
|
|
17
|
+
|
|
18
|
+
NAME = "Simplify Type 1/2 string deobfuscation references"
|
|
19
|
+
expr_classes = (Call,)
|
|
20
|
+
|
|
21
|
+
def optimize(self, expr: Call, **kwargs):
|
|
22
|
+
if isinstance(expr.target, Const) and ( # noqa: SIM102
|
|
23
|
+
expr.target.value in self.kb.obfuscations.type1_string_loader_candidates
|
|
24
|
+
or expr.target.value in self.kb.obfuscations.type2_string_loader_candidates
|
|
25
|
+
):
|
|
26
|
+
# this is a function calling a type1 or a type2 string loader
|
|
27
|
+
# optimize this call away if possible
|
|
28
|
+
if expr.args and all(isinstance(arg, Const) for arg in expr.args):
|
|
29
|
+
# execute the function with the given argument
|
|
30
|
+
func = self.kb.functions[expr.target.value]
|
|
31
|
+
func_call = self.project.factory.callable(
|
|
32
|
+
expr.target.value, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
|
|
33
|
+
)
|
|
34
|
+
try:
|
|
35
|
+
out = func_call(*[claripy.BVV(arg.value, arg.bits) for arg in expr.args])
|
|
36
|
+
except AngrCallableMultistateError:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
if out.concrete:
|
|
40
|
+
return Const(
|
|
41
|
+
None, None, out.concrete_value, self.project.arch.bits, **expr.tags
|
|
42
|
+
) # FIXME: use out.bits when the function prototype recovery is more reliable
|
|
43
|
+
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
EXPR_OPTS.append(StringObfType1PeepholeOptimizer)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# pylint:disable=missing-class-docstring
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from sortedcontainers import SortedDict
|
|
9
|
+
|
|
10
|
+
from angr.analyses import Analysis, AnalysesHub
|
|
11
|
+
from angr.utils.bits import ffs
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from angr.knowledge_plugins import Function
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
log = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class OverlappingFunctionsAnalysis(Analysis):
|
|
21
|
+
"""
|
|
22
|
+
Identify functions with interleaved blocks.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
overlapping_functions: dict[int, list[int]]
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
self.overlapping_functions = defaultdict(list)
|
|
29
|
+
addr_to_func_max_addr = SortedDict()
|
|
30
|
+
|
|
31
|
+
for func in self.project.kb.functions.values():
|
|
32
|
+
if func.is_alignment:
|
|
33
|
+
continue
|
|
34
|
+
func_max_addr = max((block.addr + block.size) for block in func.blocks)
|
|
35
|
+
addr_to_func_max_addr[func.addr] = (func, func_max_addr)
|
|
36
|
+
|
|
37
|
+
for idx, (addr, (func, max_addr)) in enumerate(addr_to_func_max_addr.items()):
|
|
38
|
+
for other_addr in addr_to_func_max_addr.islice(idx + 1):
|
|
39
|
+
if other_addr >= max_addr:
|
|
40
|
+
break
|
|
41
|
+
|
|
42
|
+
self.overlapping_functions[addr].append(other_addr)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class FunctionAlignmentAnalysis(Analysis):
|
|
46
|
+
"""
|
|
47
|
+
Determine typical function alignment
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
alignment: int | None
|
|
51
|
+
|
|
52
|
+
def __init__(self):
|
|
53
|
+
self.alignment = None
|
|
54
|
+
|
|
55
|
+
if len(self.project.kb.functions) == 0:
|
|
56
|
+
if self.project.kb.cfgs.get_most_accurate() is None:
|
|
57
|
+
log.warning("Please run CFGFast analysis first, to identify functions")
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
alignment_bins = defaultdict(int)
|
|
61
|
+
count = 0
|
|
62
|
+
for func in self.project.kb.functions.values():
|
|
63
|
+
if not (func.is_alignment or func.is_plt or func.is_simprocedure):
|
|
64
|
+
alignment_bins[ffs(func.addr)] += 1
|
|
65
|
+
count += 1
|
|
66
|
+
|
|
67
|
+
# FIXME: Higher alignment values will be naturally aligned
|
|
68
|
+
|
|
69
|
+
typical_alignment = max(alignment_bins, key=lambda k: alignment_bins[k])
|
|
70
|
+
if count > 10 and alignment_bins[typical_alignment] >= count / 4: # XXX: cutoff
|
|
71
|
+
self.alignment = 1 << max(typical_alignment, 0)
|
|
72
|
+
log.debug("Function alignment appears to be %d bytes", self.alignment)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class AtypicallyAlignedFunction:
|
|
77
|
+
function: Function
|
|
78
|
+
expected_alignment: int
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class PatchedOutFunctionality:
|
|
83
|
+
patched_function: Function
|
|
84
|
+
patched_out_function: Function
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class PatchFinderAnalysis(Analysis):
|
|
88
|
+
"""
|
|
89
|
+
Looks for binary patches using some basic heuristics:
|
|
90
|
+
- Looking for interleaved functions
|
|
91
|
+
- Looking for unaligned functions
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
# FIXME: Possible additional heuristics:
|
|
95
|
+
# - Jumps out to end of function, then back
|
|
96
|
+
# - Looking for patch jumps, e.g. push <addr>; ret
|
|
97
|
+
# - Looking for instruction partials broken by a patch (nodecode)
|
|
98
|
+
# - Unusual stack manipulation
|
|
99
|
+
|
|
100
|
+
atypical_alignments: list[Function]
|
|
101
|
+
possibly_patched_out: list[PatchedOutFunctionality]
|
|
102
|
+
|
|
103
|
+
def __init__(self):
|
|
104
|
+
self.atypical_alignments = []
|
|
105
|
+
self.possibly_patched_out = []
|
|
106
|
+
|
|
107
|
+
if len(self.project.kb.functions) == 0:
|
|
108
|
+
if self.project.kb.cfgs.get_most_accurate() is None:
|
|
109
|
+
log.warning("Please run CFGFast analysis first, to identify functions")
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
# In CFGFast with scanning enabled, a function may be created from unreachable blocks within another function.
|
|
113
|
+
# Search for interleaved/overlapping functions to identify possible patches.
|
|
114
|
+
overlapping_functions = self.project.analyses.OverlappingFunctions().overlapping_functions
|
|
115
|
+
for addr, overlapping_func_addrs in overlapping_functions.items():
|
|
116
|
+
func = self.project.kb.functions[addr]
|
|
117
|
+
|
|
118
|
+
# Are the overlapping functions reachable?
|
|
119
|
+
for overlapping_addr in overlapping_func_addrs:
|
|
120
|
+
overlapping_func = self.project.kb.functions[overlapping_addr]
|
|
121
|
+
if self.project.kb.callgraph.in_degree(overlapping_addr) == 0:
|
|
122
|
+
self.possibly_patched_out.append(PatchedOutFunctionality(func, overlapping_func))
|
|
123
|
+
# FIXME: What does the patch do?
|
|
124
|
+
|
|
125
|
+
# Look for unaligned functions
|
|
126
|
+
expected_alignment = self.project.analyses.FunctionAlignment().alignment
|
|
127
|
+
if expected_alignment is not None and expected_alignment > self.project.arch.instruction_alignment:
|
|
128
|
+
for func in self.project.kb.functions.values():
|
|
129
|
+
if not (func.is_alignment or func.is_plt or func.is_simprocedure) and func.addr & (
|
|
130
|
+
expected_alignment - 1
|
|
131
|
+
):
|
|
132
|
+
self.atypical_alignments.append(AtypicallyAlignedFunction(func, expected_alignment))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
AnalysesHub.register_default("OverlappingFunctions", OverlappingFunctionsAnalysis)
|
|
136
|
+
AnalysesHub.register_default("FunctionAlignment", FunctionAlignmentAnalysis)
|
|
137
|
+
AnalysesHub.register_default("PatchFinder", PatchFinderAnalysis)
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
# pylint:disable=missing-class-docstring
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from enum import Enum, auto
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from weakref import ref
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
|
|
8
|
+
from networkx import DiGraph
|
|
9
|
+
from networkx.algorithms.shortest_paths import single_target_shortest_path_length
|
|
10
|
+
|
|
11
|
+
from angr.sim_state import SimState
|
|
12
|
+
from angr.engines.successors import SimSuccessors
|
|
13
|
+
from angr.knowledge_plugins.cfg import CFGModel, CFGNode
|
|
14
|
+
from .analysis import Analysis, AnalysesHub
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Unreachable(Exception):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(eq=False)
|
|
22
|
+
class SimStateMarker:
|
|
23
|
+
addr: int
|
|
24
|
+
parent: SimStateMarker | None = None
|
|
25
|
+
banned: bool = False
|
|
26
|
+
misses: int = 0
|
|
27
|
+
|
|
28
|
+
def __repr__(self):
|
|
29
|
+
inner_repr = "None" if self.parent is None else "..."
|
|
30
|
+
return f"SimStateMarker(addr={self.addr:#x}, parent={inner_repr}, banned={self.banned}, misses={self.misses})"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SuccessorsKind(Enum):
|
|
34
|
+
SAT = auto()
|
|
35
|
+
UNSAT = auto()
|
|
36
|
+
MISSING = auto()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class TestPathReport:
|
|
41
|
+
path_markers: dict[int, SimStateMarker]
|
|
42
|
+
termination: SuccessorsKind
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def nilref():
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Pathfinder(Analysis):
|
|
50
|
+
def __init__(self, start_state: SimState, goal_addr: int, cfg: CFGModel, cache_size=10000):
|
|
51
|
+
self.start_state = start_state
|
|
52
|
+
self.goal_addr = goal_addr
|
|
53
|
+
self.goal_state: SimState | None = None
|
|
54
|
+
self.cfg = cfg
|
|
55
|
+
self.cache_size = cache_size
|
|
56
|
+
|
|
57
|
+
# HACK HACK HACK HACK TODO FIXME FISH PLEASE GET RID OF THIS
|
|
58
|
+
extra_edges = []
|
|
59
|
+
for node in self.cfg.graph.nodes:
|
|
60
|
+
if node.is_syscall:
|
|
61
|
+
for pred in self.cfg.graph.pred[node]:
|
|
62
|
+
for succ, data in self.cfg.graph.succ[pred].items():
|
|
63
|
+
if data["jumpkind"] == "Ijk_FakeRet":
|
|
64
|
+
extra_edges.append((node, succ))
|
|
65
|
+
for node, succ in extra_edges:
|
|
66
|
+
self.cfg.graph.add_edge(node, succ, jumpkind="Ijk_Ret")
|
|
67
|
+
|
|
68
|
+
goal_node = self.cfg.get_any_node(goal_addr)
|
|
69
|
+
if goal_node is None:
|
|
70
|
+
raise ValueError(f"Node {goal_addr:#x} is not in graph")
|
|
71
|
+
|
|
72
|
+
self.start_marker = SimStateMarker(start_state.addr)
|
|
73
|
+
self.transition_cache: DiGraph[SimStateMarker] = DiGraph()
|
|
74
|
+
self.transition_cache.add_node(self.start_marker, state=ref(start_state))
|
|
75
|
+
self.base_heuristic: dict[int, int] = {
|
|
76
|
+
node.addr: dist for node, dist in single_target_shortest_path_length(cfg.graph, goal_node)
|
|
77
|
+
}
|
|
78
|
+
self.state_cache = {}
|
|
79
|
+
self.unsat_markers = set()
|
|
80
|
+
self.extra_weight = defaultdict(int)
|
|
81
|
+
|
|
82
|
+
self._search_frontier_marker = self.start_marker
|
|
83
|
+
self._search_path: list[tuple[int, str]] = [(self.start_marker.addr, "Ijk_Boring")]
|
|
84
|
+
self._search_stack = []
|
|
85
|
+
self._search_backtrack_to = {self.start_marker}
|
|
86
|
+
self._search_address_backtrack_points = {self.start_marker.addr: self.start_marker}
|
|
87
|
+
|
|
88
|
+
def cache_state(self, state: SimState):
|
|
89
|
+
self.state_cache[state] = self.state_cache.pop(state, None)
|
|
90
|
+
if len(self.state_cache) > self.cache_size:
|
|
91
|
+
self.state_cache.pop(next(iter(self.state_cache)))
|
|
92
|
+
|
|
93
|
+
def marker_to_state(self, marker: SimStateMarker) -> SimState | None:
|
|
94
|
+
return self.transition_cache.nodes[marker]["state"]()
|
|
95
|
+
|
|
96
|
+
def analyze(self) -> bool:
|
|
97
|
+
while True:
|
|
98
|
+
search_path = self.find_best_hypothesis_path()
|
|
99
|
+
result = self.test_path(search_path)
|
|
100
|
+
if result.termination == SuccessorsKind.SAT:
|
|
101
|
+
self.goal_state = self.marker_to_state(result.path_markers[len(search_path) - 1])
|
|
102
|
+
return True
|
|
103
|
+
marker = result.path_markers[max(result.path_markers)]
|
|
104
|
+
marker.banned = True
|
|
105
|
+
self._search_backtrack_to.add(marker)
|
|
106
|
+
if result.termination == SuccessorsKind.UNSAT:
|
|
107
|
+
self.unsat_markers.add(marker)
|
|
108
|
+
|
|
109
|
+
def _search_backtrack(self):
|
|
110
|
+
if self._search_address_backtrack_points[self._search_frontier_marker.addr] is self._search_frontier_marker:
|
|
111
|
+
self._search_address_backtrack_points.pop(self._search_frontier_marker.addr)
|
|
112
|
+
|
|
113
|
+
self._search_frontier_marker = self._search_frontier_marker.parent
|
|
114
|
+
if self._search_frontier_marker is None:
|
|
115
|
+
raise Unreachable
|
|
116
|
+
|
|
117
|
+
addr, jumpkind = self._search_path.pop()
|
|
118
|
+
if jumpkind == "Ijk_Ret":
|
|
119
|
+
self._search_stack.append(addr)
|
|
120
|
+
elif jumpkind == "Ijk_Call" or jumpkind.startswith("Ijk_Sys"):
|
|
121
|
+
self._search_stack.pop()
|
|
122
|
+
|
|
123
|
+
def find_best_hypothesis_path(self) -> tuple[int, ...]:
|
|
124
|
+
assert self._search_backtrack_to, "Uhh every iteration should set at least one backtrack point"
|
|
125
|
+
if self.start_marker in self._search_backtrack_to:
|
|
126
|
+
self._search_frontier_marker = self.start_marker
|
|
127
|
+
self._search_path: list[tuple[int, str]] = [(self.start_marker.addr, "Ijk_Boring")]
|
|
128
|
+
self._search_stack = []
|
|
129
|
+
self._search_backtrack_to = set()
|
|
130
|
+
else:
|
|
131
|
+
while self._search_backtrack_to:
|
|
132
|
+
self._search_backtrack_to.discard(self._search_frontier_marker)
|
|
133
|
+
try:
|
|
134
|
+
self._search_backtrack()
|
|
135
|
+
except Unreachable as e:
|
|
136
|
+
raise RuntimeError("oops") from e
|
|
137
|
+
|
|
138
|
+
while self._search_path[-1][0] != self.goal_addr:
|
|
139
|
+
banned = {
|
|
140
|
+
marker.addr for marker in self.transition_cache.succ[self._search_frontier_marker] if marker.banned
|
|
141
|
+
}
|
|
142
|
+
current_node = self.cfg.get_any_node(self._search_path[-1][0])
|
|
143
|
+
options = [
|
|
144
|
+
(node, data["jumpkind"], self.base_heuristic[node.addr] + self.extra_weight[node.addr])
|
|
145
|
+
for node, data in self.cfg.graph.succ[current_node].items()
|
|
146
|
+
if data["jumpkind"] != "Ijk_FakeRet"
|
|
147
|
+
and node.addr not in banned
|
|
148
|
+
and node.addr in self.base_heuristic
|
|
149
|
+
and (data["jumpkind"] != "Ijk_Ret" or node.addr == self._search_stack[-1])
|
|
150
|
+
]
|
|
151
|
+
if not options:
|
|
152
|
+
# backtrack
|
|
153
|
+
self._search_frontier_marker.banned = True
|
|
154
|
+
self._search_backtrack()
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
best_node, best_jumpkind, best_weight = min(
|
|
158
|
+
options,
|
|
159
|
+
default=(None, None),
|
|
160
|
+
key=lambda xyz: xyz[2],
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
assert isinstance(best_jumpkind, str)
|
|
164
|
+
assert isinstance(best_node, CFGNode)
|
|
165
|
+
self.extra_weight[best_node.addr] += 1
|
|
166
|
+
self._search_path.append((best_node.addr, best_jumpkind))
|
|
167
|
+
|
|
168
|
+
if best_jumpkind == "Ijk_Call" or best_jumpkind.startswith("Ijk_Sys"):
|
|
169
|
+
self._search_stack.append(
|
|
170
|
+
next(
|
|
171
|
+
iter(
|
|
172
|
+
node.addr
|
|
173
|
+
for node, data in self.cfg.graph.succ[current_node].items()
|
|
174
|
+
if data["jumpkind"] == "Ijk_FakeRet"
|
|
175
|
+
),
|
|
176
|
+
None,
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
elif best_jumpkind == "Ijk_Ret":
|
|
180
|
+
self._search_stack.pop()
|
|
181
|
+
|
|
182
|
+
frontier_marker_nullable = next(
|
|
183
|
+
(
|
|
184
|
+
marker
|
|
185
|
+
for marker in self.transition_cache.succ[self._search_frontier_marker]
|
|
186
|
+
if marker.addr == best_node.addr
|
|
187
|
+
),
|
|
188
|
+
None,
|
|
189
|
+
)
|
|
190
|
+
if frontier_marker_nullable is None:
|
|
191
|
+
new_marker = SimStateMarker(best_node.addr, self._search_frontier_marker)
|
|
192
|
+
self.transition_cache.add_node(new_marker, state=nilref)
|
|
193
|
+
self.transition_cache.add_edge(self._search_frontier_marker, new_marker)
|
|
194
|
+
self._search_frontier_marker = new_marker
|
|
195
|
+
else:
|
|
196
|
+
self._search_frontier_marker = frontier_marker_nullable
|
|
197
|
+
|
|
198
|
+
if self._search_frontier_marker.addr not in self._search_address_backtrack_points:
|
|
199
|
+
self._search_address_backtrack_points[self._search_frontier_marker.addr] = self._search_frontier_marker
|
|
200
|
+
|
|
201
|
+
# TODO does this go above the above stanza?
|
|
202
|
+
if sum(weight == best_weight for _, _, weight in options) != 1:
|
|
203
|
+
self._search_backtrack_to.add(self._search_address_backtrack_points[self._search_frontier_marker.addr])
|
|
204
|
+
|
|
205
|
+
return tuple(addr for addr, _ in self._search_path)
|
|
206
|
+
|
|
207
|
+
def diagnose_unsat(self, state: SimState):
|
|
208
|
+
pass
|
|
209
|
+
|
|
210
|
+
def test_path(self, bbl_addr_trace: tuple[int, ...]) -> TestPathReport:
|
|
211
|
+
assert bbl_addr_trace[0] == self.start_marker.addr, "Paths must begin with the start state"
|
|
212
|
+
|
|
213
|
+
known_markers = [self.start_marker]
|
|
214
|
+
for addr in bbl_addr_trace[1:]:
|
|
215
|
+
for succ in self.transition_cache.succ[known_markers[-1]]:
|
|
216
|
+
if succ.addr == addr:
|
|
217
|
+
break
|
|
218
|
+
else:
|
|
219
|
+
break
|
|
220
|
+
known_markers.append(succ)
|
|
221
|
+
|
|
222
|
+
marker = None
|
|
223
|
+
for ri, marker_ in enumerate(reversed(known_markers)):
|
|
224
|
+
i = len(known_markers) - 1 - ri
|
|
225
|
+
state: SimState = self.transition_cache.nodes[marker_]["state"]()
|
|
226
|
+
marker = marker_
|
|
227
|
+
if state is not None:
|
|
228
|
+
break
|
|
229
|
+
else:
|
|
230
|
+
assert False, "The first item in known_markers should always have a resolvable weakref"
|
|
231
|
+
|
|
232
|
+
while i != len(bbl_addr_trace) - 1:
|
|
233
|
+
assert state.addr == bbl_addr_trace[i]
|
|
234
|
+
|
|
235
|
+
marker.misses += 1
|
|
236
|
+
successors = state.step(strict_block_end=True)
|
|
237
|
+
succ, kind = find_successor(successors, bbl_addr_trace[i + 1])
|
|
238
|
+
|
|
239
|
+
# cache state
|
|
240
|
+
if i + 1 < len(known_markers):
|
|
241
|
+
succ_marker = known_markers[i + 1]
|
|
242
|
+
else:
|
|
243
|
+
succ_marker = SimStateMarker(bbl_addr_trace[i + 1], parent=marker)
|
|
244
|
+
self.transition_cache.add_node(succ_marker)
|
|
245
|
+
self.transition_cache.add_edge(marker, succ_marker)
|
|
246
|
+
self.transition_cache.nodes[succ_marker]["state"] = ref(succ) if succ is not None else nilref
|
|
247
|
+
if succ is not None:
|
|
248
|
+
self.cache_state(succ)
|
|
249
|
+
|
|
250
|
+
if kind == SuccessorsKind.SAT:
|
|
251
|
+
assert succ is not None
|
|
252
|
+
state = succ
|
|
253
|
+
marker = succ_marker
|
|
254
|
+
i += 1
|
|
255
|
+
continue
|
|
256
|
+
if kind == SuccessorsKind.UNSAT:
|
|
257
|
+
assert succ is not None
|
|
258
|
+
return TestPathReport(
|
|
259
|
+
path_markers={i: marker, i + 1: succ_marker},
|
|
260
|
+
termination=SuccessorsKind.UNSAT,
|
|
261
|
+
)
|
|
262
|
+
return TestPathReport(path_markers={i: marker, i + 1: succ_marker}, termination=SuccessorsKind.MISSING)
|
|
263
|
+
|
|
264
|
+
return TestPathReport(path_markers={i: marker}, termination=SuccessorsKind.SAT)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def find_successor(successors: SimSuccessors, target_addr: int) -> tuple[SimState | None, SuccessorsKind]:
|
|
268
|
+
for succ in successors.flat_successors:
|
|
269
|
+
if succ.addr == target_addr:
|
|
270
|
+
return succ, SuccessorsKind.SAT
|
|
271
|
+
for succ in successors.unsat_successors:
|
|
272
|
+
if succ.addr == target_addr:
|
|
273
|
+
return succ, SuccessorsKind.UNSAT
|
|
274
|
+
for succ in successors.unconstrained_successors:
|
|
275
|
+
succ2 = succ.copy()
|
|
276
|
+
succ2.add_constraints(succ2._ip == target_addr)
|
|
277
|
+
if succ2.satisfiable():
|
|
278
|
+
return succ2, SuccessorsKind.SAT
|
|
279
|
+
return None, SuccessorsKind.MISSING
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
AnalysesHub.register_default("Pathfinder", Pathfinder)
|
|
@@ -197,6 +197,11 @@ def handle_printf(
|
|
|
197
197
|
buf_data = state.get_values(buf_atoms)
|
|
198
198
|
if buf_data is not None:
|
|
199
199
|
buf_data = buf_data.extract(0, len(buf_data) // 8 - 1, archinfo.Endness.BE)
|
|
200
|
+
else:
|
|
201
|
+
top_val = state.top(state.arch.bits)
|
|
202
|
+
for defn in state.get_definitions(atom):
|
|
203
|
+
top_val = state.annotate_with_def(top_val, defn)
|
|
204
|
+
buf_data = MultiValues(top_val)
|
|
200
205
|
elif fmt == "%u":
|
|
201
206
|
buf_atoms = atom
|
|
202
207
|
buf_data = state.get_concrete_value(buf_atoms)
|
|
@@ -217,7 +222,9 @@ def handle_printf(
|
|
|
217
222
|
else:
|
|
218
223
|
_l.warning("Unimplemented printf format string %s", fmt)
|
|
219
224
|
buf_atoms = set()
|
|
220
|
-
|
|
225
|
+
top_val = state.top(state.arch.bits)
|
|
226
|
+
buf_data = MultiValues(top_val)
|
|
227
|
+
|
|
221
228
|
if result is not None and buf_data is not None:
|
|
222
229
|
result = result.concat(buf_data)
|
|
223
230
|
source_atoms.update(buf_atoms)
|