angr 9.2.124__py3-none-manylinux2014_aarch64.whl → 9.2.126__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (52) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +13 -1
  3. angr/analyses/codecave.py +77 -0
  4. angr/analyses/decompiler/ail_simplifier.py +1 -0
  5. angr/analyses/decompiler/callsite_maker.py +9 -1
  6. angr/analyses/decompiler/clinic.py +32 -2
  7. angr/analyses/decompiler/condition_processor.py +104 -66
  8. angr/analyses/decompiler/decompiler.py +7 -0
  9. angr/analyses/decompiler/optimization_passes/__init__.py +18 -1
  10. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +6 -0
  11. angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
  12. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
  13. angr/analyses/decompiler/return_maker.py +1 -0
  14. angr/analyses/decompiler/ssailification/rewriting.py +4 -0
  15. angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
  16. angr/analyses/decompiler/structured_codegen/c.py +18 -2
  17. angr/analyses/deobfuscator/__init__.py +18 -0
  18. angr/analyses/deobfuscator/api_obf_finder.py +313 -0
  19. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
  20. angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
  21. angr/analyses/deobfuscator/string_obf_finder.py +774 -0
  22. angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
  23. angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
  24. angr/analyses/patchfinder.py +137 -0
  25. angr/analyses/pathfinder.py +282 -0
  26. angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
  27. angr/analyses/smc.py +159 -0
  28. angr/analyses/unpacker/__init__.py +6 -0
  29. angr/analyses/unpacker/obfuscation_detector.py +103 -0
  30. angr/analyses/unpacker/packing_detector.py +138 -0
  31. angr/angrdb/models.py +1 -2
  32. angr/calling_conventions.py +3 -1
  33. angr/engines/vex/claripy/irop.py +10 -5
  34. angr/engines/vex/heavy/heavy.py +2 -0
  35. angr/exploration_techniques/spiller_db.py +1 -2
  36. angr/knowledge_plugins/__init__.py +2 -0
  37. angr/knowledge_plugins/functions/function.py +4 -0
  38. angr/knowledge_plugins/functions/function_manager.py +18 -9
  39. angr/knowledge_plugins/functions/function_parser.py +1 -1
  40. angr/knowledge_plugins/functions/soot_function.py +1 -0
  41. angr/knowledge_plugins/obfuscations.py +36 -0
  42. angr/misc/ux.py +2 -2
  43. angr/project.py +17 -1
  44. angr/state_plugins/history.py +6 -4
  45. angr/utils/bits.py +4 -0
  46. angr/utils/tagged_interval_map.py +112 -0
  47. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
  48. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/RECORD +52 -35
  49. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
  50. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
  51. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
  52. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,133 @@
1
+ # pylint:disable=too-many-boolean-expressions
2
+ from __future__ import annotations
3
+
4
+ import archinfo
5
+
6
+ from ailment import Block
7
+ from ailment.statement import Statement, Call, Assignment
8
+ from ailment.expression import Const, Register, VirtualVariable
9
+
10
+ from angr.analyses.decompiler.optimization_passes.optimization_pass import OptimizationPass, OptimizationPassStage
11
+ from angr.analyses.decompiler.optimization_passes import register_optimization_pass
12
+
13
+ WIN64_REG_ARGS = {
14
+ archinfo.ArchAMD64().registers["rcx"][0],
15
+ archinfo.ArchAMD64().registers["rdx"][0],
16
+ archinfo.ArchAMD64().registers["r8"][0],
17
+ archinfo.ArchAMD64().registers["r9"][0],
18
+ }
19
+
20
+
21
+ class StringObfType3Rewriter(OptimizationPass):
22
+ """
23
+ Type-3 optimization pass replaces deobfuscate_string calls with the deobfuscated strings, and then removes
24
+ arguments on the stack.
25
+ """
26
+
27
+ ARCHES = ["X86", "AMD64"]
28
+ PLATFORMS = ["windows"]
29
+ STAGE = OptimizationPassStage.AFTER_MAKING_CALLSITES
30
+
31
+ NAME = "Simplify Type 3 string deobfuscation calls"
32
+ DESCRIPTION = "Simplify Type 3 string deobfuscation calls"
33
+ stmt_classes = ()
34
+
35
+ def __init__(self, func, **kwargs):
36
+ super().__init__(func, **kwargs)
37
+
38
+ self.analyze()
39
+
40
+ def _check(self):
41
+ if self.kb.obfuscations.type3_deobfuscated_strings:
42
+ return True, None
43
+ return False, None
44
+
45
+ @staticmethod
46
+ def is_call_or_call_assignment(stmt) -> bool:
47
+ return isinstance(stmt, Call) or isinstance(stmt, Assignment) and isinstance(stmt.src, Call)
48
+
49
+ def _analyze(self, cache=None):
50
+
51
+ # find all blocks with type-3 deobfuscation calls
52
+ for block in list(self._graph):
53
+ if not block.statements:
54
+ continue
55
+ last_stmt = block.statements[-1]
56
+ if (
57
+ self.is_call_or_call_assignment(last_stmt)
58
+ and last_stmt.ins_addr in self.kb.obfuscations.type3_deobfuscated_strings
59
+ ):
60
+ new_block = self._process_block(
61
+ block, self.kb.obfuscations.type3_deobfuscated_strings[block.statements[-1].ins_addr]
62
+ )
63
+ if new_block is not None:
64
+ self._update_block(block, new_block)
65
+
66
+ def _process_block(self, block: Block, deobf_content: bytes):
67
+ # FIXME: This rewriter is very specific to the implementation of the deobfuscation scheme. we can make it more
68
+ # generic when there are more cases available in the wild.
69
+
70
+ # TODO: Support multiple blocks
71
+
72
+ # replace the call
73
+ old_stmt: Statement = block.statements[-1]
74
+ str_id = self.kb.custom_strings.allocate(deobf_content)
75
+ old_call: Call = old_stmt.src if isinstance(old_stmt, Assignment) else old_stmt
76
+ new_call = Call(
77
+ old_call.idx,
78
+ "init_str",
79
+ args=[
80
+ old_call.args[0],
81
+ Const(None, None, str_id, self.project.arch.bits, custom_string=True),
82
+ Const(None, None, len(deobf_content), self.project.arch.bits),
83
+ ],
84
+ ret_expr=old_call.ret_expr,
85
+ bits=old_call.bits,
86
+ **old_call.tags,
87
+ )
88
+ if isinstance(old_stmt, Assignment):
89
+ new_stmt = Assignment(old_stmt.idx, old_stmt.dst, new_call, **old_stmt.tags)
90
+ else:
91
+ new_stmt = new_call
92
+
93
+ statements = block.statements[:-1] + [new_stmt]
94
+
95
+ # remove N-2 continuous stack assignment
96
+ if len(deobf_content) > 2:
97
+ stack_offset_to_stmtid: dict[int, int] = {}
98
+ for idx, stmt in enumerate(statements):
99
+ if (
100
+ isinstance(stmt, Assignment)
101
+ and isinstance(stmt.dst, VirtualVariable)
102
+ and stmt.dst.was_stack
103
+ and isinstance(stmt.dst.stack_offset, int)
104
+ and isinstance(stmt.src, Const)
105
+ and stmt.src.value <= 0xFF
106
+ ):
107
+ stack_offset_to_stmtid[stmt.dst.stack_offset] = idx
108
+ sorted_offsets = sorted(stack_offset_to_stmtid)
109
+ if sorted_offsets:
110
+ spacing = 8 # FIXME: Make it adjustable
111
+ distance = min(len(deobf_content) - 2, len(sorted_offsets) - 1)
112
+ for start_idx in range(len(sorted_offsets) - distance):
113
+ if sorted_offsets[start_idx] + spacing * distance == sorted_offsets[start_idx + distance]:
114
+ # found them
115
+ # remove these statements
116
+ for i in range(start_idx, start_idx + distance + 1):
117
+ statements[stack_offset_to_stmtid[sorted_offsets[i]]] = None
118
+ break
119
+ statements = [stmt for stmt in statements if stmt is not None]
120
+
121
+ # remove writes to rdx, rcx, r8, and r9
122
+ if self.project.arch.name == "AMD64":
123
+ statements = [stmt for stmt in statements if not self._stmt_sets_win64_reg_arg(stmt)]
124
+
125
+ # return the new block
126
+ return block.copy(statements=statements)
127
+
128
+ @staticmethod
129
+ def _stmt_sets_win64_reg_arg(stmt) -> bool:
130
+ return isinstance(stmt, Assignment) and isinstance(stmt.dst, Register) and stmt.dst.reg_offset in WIN64_REG_ARGS
131
+
132
+
133
+ register_optimization_pass(StringObfType3Rewriter, presets=["fast", "full"])
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+ from ailment.statement import Call
3
+ from ailment.expression import Const
4
+ import claripy
5
+
6
+ from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
7
+ from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
8
+ from angr.errors import AngrCallableMultistateError
9
+
10
+
11
+ class StringObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
12
+ """
13
+ Integrate type-1 deobfuscated strings into decompilation output.
14
+ """
15
+
16
+ __slots__ = ()
17
+
18
+ NAME = "Simplify Type 1/2 string deobfuscation references"
19
+ expr_classes = (Call,)
20
+
21
+ def optimize(self, expr: Call, **kwargs):
22
+ if isinstance(expr.target, Const) and ( # noqa: SIM102
23
+ expr.target.value in self.kb.obfuscations.type1_string_loader_candidates
24
+ or expr.target.value in self.kb.obfuscations.type2_string_loader_candidates
25
+ ):
26
+ # this is a function calling a type1 or a type2 string loader
27
+ # optimize this call away if possible
28
+ if expr.args and all(isinstance(arg, Const) for arg in expr.args):
29
+ # execute the function with the given argument
30
+ func = self.kb.functions[expr.target.value]
31
+ func_call = self.project.factory.callable(
32
+ expr.target.value, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
33
+ )
34
+ try:
35
+ out = func_call(*[claripy.BVV(arg.value, arg.bits) for arg in expr.args])
36
+ except AngrCallableMultistateError:
37
+ return None
38
+
39
+ if out.concrete:
40
+ return Const(
41
+ None, None, out.concrete_value, self.project.arch.bits, **expr.tags
42
+ ) # FIXME: use out.bits when the function prototype recovery is more reliable
43
+
44
+ return None
45
+
46
+
47
+ EXPR_OPTS.append(StringObfType1PeepholeOptimizer)
@@ -0,0 +1,137 @@
1
+ # pylint:disable=missing-class-docstring
2
+ from __future__ import annotations
3
+ import logging
4
+ from typing import TYPE_CHECKING
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+
8
+ from sortedcontainers import SortedDict
9
+
10
+ from angr.analyses import Analysis, AnalysesHub
11
+ from angr.utils.bits import ffs
12
+
13
+ if TYPE_CHECKING:
14
+ from angr.knowledge_plugins import Function
15
+
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+
20
+ class OverlappingFunctionsAnalysis(Analysis):
21
+ """
22
+ Identify functions with interleaved blocks.
23
+ """
24
+
25
+ overlapping_functions: dict[int, list[int]]
26
+
27
+ def __init__(self):
28
+ self.overlapping_functions = defaultdict(list)
29
+ addr_to_func_max_addr = SortedDict()
30
+
31
+ for func in self.project.kb.functions.values():
32
+ if func.is_alignment:
33
+ continue
34
+ func_max_addr = max((block.addr + block.size) for block in func.blocks)
35
+ addr_to_func_max_addr[func.addr] = (func, func_max_addr)
36
+
37
+ for idx, (addr, (func, max_addr)) in enumerate(addr_to_func_max_addr.items()):
38
+ for other_addr in addr_to_func_max_addr.islice(idx + 1):
39
+ if other_addr >= max_addr:
40
+ break
41
+
42
+ self.overlapping_functions[addr].append(other_addr)
43
+
44
+
45
+ class FunctionAlignmentAnalysis(Analysis):
46
+ """
47
+ Determine typical function alignment
48
+ """
49
+
50
+ alignment: int | None
51
+
52
+ def __init__(self):
53
+ self.alignment = None
54
+
55
+ if len(self.project.kb.functions) == 0:
56
+ if self.project.kb.cfgs.get_most_accurate() is None:
57
+ log.warning("Please run CFGFast analysis first, to identify functions")
58
+ return
59
+
60
+ alignment_bins = defaultdict(int)
61
+ count = 0
62
+ for func in self.project.kb.functions.values():
63
+ if not (func.is_alignment or func.is_plt or func.is_simprocedure):
64
+ alignment_bins[ffs(func.addr)] += 1
65
+ count += 1
66
+
67
+ # FIXME: Higher alignment values will be naturally aligned
68
+
69
+ typical_alignment = max(alignment_bins, key=lambda k: alignment_bins[k])
70
+ if count > 10 and alignment_bins[typical_alignment] >= count / 4: # XXX: cutoff
71
+ self.alignment = 1 << max(typical_alignment, 0)
72
+ log.debug("Function alignment appears to be %d bytes", self.alignment)
73
+
74
+
75
+ @dataclass
76
+ class AtypicallyAlignedFunction:
77
+ function: Function
78
+ expected_alignment: int
79
+
80
+
81
+ @dataclass
82
+ class PatchedOutFunctionality:
83
+ patched_function: Function
84
+ patched_out_function: Function
85
+
86
+
87
+ class PatchFinderAnalysis(Analysis):
88
+ """
89
+ Looks for binary patches using some basic heuristics:
90
+ - Looking for interleaved functions
91
+ - Looking for unaligned functions
92
+ """
93
+
94
+ # FIXME: Possible additional heuristics:
95
+ # - Jumps out to end of function, then back
96
+ # - Looking for patch jumps, e.g. push <addr>; ret
97
+ # - Looking for instruction partials broken by a patch (nodecode)
98
+ # - Unusual stack manipulation
99
+
100
+ atypical_alignments: list[Function]
101
+ possibly_patched_out: list[PatchedOutFunctionality]
102
+
103
+ def __init__(self):
104
+ self.atypical_alignments = []
105
+ self.possibly_patched_out = []
106
+
107
+ if len(self.project.kb.functions) == 0:
108
+ if self.project.kb.cfgs.get_most_accurate() is None:
109
+ log.warning("Please run CFGFast analysis first, to identify functions")
110
+ return
111
+
112
+ # In CFGFast with scanning enabled, a function may be created from unreachable blocks within another function.
113
+ # Search for interleaved/overlapping functions to identify possible patches.
114
+ overlapping_functions = self.project.analyses.OverlappingFunctions().overlapping_functions
115
+ for addr, overlapping_func_addrs in overlapping_functions.items():
116
+ func = self.project.kb.functions[addr]
117
+
118
+ # Are the overlapping functions reachable?
119
+ for overlapping_addr in overlapping_func_addrs:
120
+ overlapping_func = self.project.kb.functions[overlapping_addr]
121
+ if self.project.kb.callgraph.in_degree(overlapping_addr) == 0:
122
+ self.possibly_patched_out.append(PatchedOutFunctionality(func, overlapping_func))
123
+ # FIXME: What does the patch do?
124
+
125
+ # Look for unaligned functions
126
+ expected_alignment = self.project.analyses.FunctionAlignment().alignment
127
+ if expected_alignment is not None and expected_alignment > self.project.arch.instruction_alignment:
128
+ for func in self.project.kb.functions.values():
129
+ if not (func.is_alignment or func.is_plt or func.is_simprocedure) and func.addr & (
130
+ expected_alignment - 1
131
+ ):
132
+ self.atypical_alignments.append(AtypicallyAlignedFunction(func, expected_alignment))
133
+
134
+
135
+ AnalysesHub.register_default("OverlappingFunctions", OverlappingFunctionsAnalysis)
136
+ AnalysesHub.register_default("FunctionAlignment", FunctionAlignmentAnalysis)
137
+ AnalysesHub.register_default("PatchFinder", PatchFinderAnalysis)
@@ -0,0 +1,282 @@
1
+ # pylint:disable=missing-class-docstring
2
+ from __future__ import annotations
3
+ from enum import Enum, auto
4
+ from dataclasses import dataclass
5
+ from weakref import ref
6
+ from collections import defaultdict
7
+
8
+ from networkx import DiGraph
9
+ from networkx.algorithms.shortest_paths import single_target_shortest_path_length
10
+
11
+ from angr.sim_state import SimState
12
+ from angr.engines.successors import SimSuccessors
13
+ from angr.knowledge_plugins.cfg import CFGModel, CFGNode
14
+ from .analysis import Analysis, AnalysesHub
15
+
16
+
17
+ class Unreachable(Exception):
18
+ pass
19
+
20
+
21
+ @dataclass(eq=False)
22
+ class SimStateMarker:
23
+ addr: int
24
+ parent: SimStateMarker | None = None
25
+ banned: bool = False
26
+ misses: int = 0
27
+
28
+ def __repr__(self):
29
+ inner_repr = "None" if self.parent is None else "..."
30
+ return f"SimStateMarker(addr={self.addr:#x}, parent={inner_repr}, banned={self.banned}, misses={self.misses})"
31
+
32
+
33
+ class SuccessorsKind(Enum):
34
+ SAT = auto()
35
+ UNSAT = auto()
36
+ MISSING = auto()
37
+
38
+
39
+ @dataclass
40
+ class TestPathReport:
41
+ path_markers: dict[int, SimStateMarker]
42
+ termination: SuccessorsKind
43
+
44
+
45
+ def nilref():
46
+ return None
47
+
48
+
49
+ class Pathfinder(Analysis):
50
+ def __init__(self, start_state: SimState, goal_addr: int, cfg: CFGModel, cache_size=10000):
51
+ self.start_state = start_state
52
+ self.goal_addr = goal_addr
53
+ self.goal_state: SimState | None = None
54
+ self.cfg = cfg
55
+ self.cache_size = cache_size
56
+
57
+ # HACK HACK HACK HACK TODO FIXME FISH PLEASE GET RID OF THIS
58
+ extra_edges = []
59
+ for node in self.cfg.graph.nodes:
60
+ if node.is_syscall:
61
+ for pred in self.cfg.graph.pred[node]:
62
+ for succ, data in self.cfg.graph.succ[pred].items():
63
+ if data["jumpkind"] == "Ijk_FakeRet":
64
+ extra_edges.append((node, succ))
65
+ for node, succ in extra_edges:
66
+ self.cfg.graph.add_edge(node, succ, jumpkind="Ijk_Ret")
67
+
68
+ goal_node = self.cfg.get_any_node(goal_addr)
69
+ if goal_node is None:
70
+ raise ValueError(f"Node {goal_addr:#x} is not in graph")
71
+
72
+ self.start_marker = SimStateMarker(start_state.addr)
73
+ self.transition_cache: DiGraph[SimStateMarker] = DiGraph()
74
+ self.transition_cache.add_node(self.start_marker, state=ref(start_state))
75
+ self.base_heuristic: dict[int, int] = {
76
+ node.addr: dist for node, dist in single_target_shortest_path_length(cfg.graph, goal_node)
77
+ }
78
+ self.state_cache = {}
79
+ self.unsat_markers = set()
80
+ self.extra_weight = defaultdict(int)
81
+
82
+ self._search_frontier_marker = self.start_marker
83
+ self._search_path: list[tuple[int, str]] = [(self.start_marker.addr, "Ijk_Boring")]
84
+ self._search_stack = []
85
+ self._search_backtrack_to = {self.start_marker}
86
+ self._search_address_backtrack_points = {self.start_marker.addr: self.start_marker}
87
+
88
+ def cache_state(self, state: SimState):
89
+ self.state_cache[state] = self.state_cache.pop(state, None)
90
+ if len(self.state_cache) > self.cache_size:
91
+ self.state_cache.pop(next(iter(self.state_cache)))
92
+
93
+ def marker_to_state(self, marker: SimStateMarker) -> SimState | None:
94
+ return self.transition_cache.nodes[marker]["state"]()
95
+
96
+ def analyze(self) -> bool:
97
+ while True:
98
+ search_path = self.find_best_hypothesis_path()
99
+ result = self.test_path(search_path)
100
+ if result.termination == SuccessorsKind.SAT:
101
+ self.goal_state = self.marker_to_state(result.path_markers[len(search_path) - 1])
102
+ return True
103
+ marker = result.path_markers[max(result.path_markers)]
104
+ marker.banned = True
105
+ self._search_backtrack_to.add(marker)
106
+ if result.termination == SuccessorsKind.UNSAT:
107
+ self.unsat_markers.add(marker)
108
+
109
+ def _search_backtrack(self):
110
+ if self._search_address_backtrack_points[self._search_frontier_marker.addr] is self._search_frontier_marker:
111
+ self._search_address_backtrack_points.pop(self._search_frontier_marker.addr)
112
+
113
+ self._search_frontier_marker = self._search_frontier_marker.parent
114
+ if self._search_frontier_marker is None:
115
+ raise Unreachable
116
+
117
+ addr, jumpkind = self._search_path.pop()
118
+ if jumpkind == "Ijk_Ret":
119
+ self._search_stack.append(addr)
120
+ elif jumpkind == "Ijk_Call" or jumpkind.startswith("Ijk_Sys"):
121
+ self._search_stack.pop()
122
+
123
+ def find_best_hypothesis_path(self) -> tuple[int, ...]:
124
+ assert self._search_backtrack_to, "Uhh every iteration should set at least one backtrack point"
125
+ if self.start_marker in self._search_backtrack_to:
126
+ self._search_frontier_marker = self.start_marker
127
+ self._search_path: list[tuple[int, str]] = [(self.start_marker.addr, "Ijk_Boring")]
128
+ self._search_stack = []
129
+ self._search_backtrack_to = set()
130
+ else:
131
+ while self._search_backtrack_to:
132
+ self._search_backtrack_to.discard(self._search_frontier_marker)
133
+ try:
134
+ self._search_backtrack()
135
+ except Unreachable as e:
136
+ raise RuntimeError("oops") from e
137
+
138
+ while self._search_path[-1][0] != self.goal_addr:
139
+ banned = {
140
+ marker.addr for marker in self.transition_cache.succ[self._search_frontier_marker] if marker.banned
141
+ }
142
+ current_node = self.cfg.get_any_node(self._search_path[-1][0])
143
+ options = [
144
+ (node, data["jumpkind"], self.base_heuristic[node.addr] + self.extra_weight[node.addr])
145
+ for node, data in self.cfg.graph.succ[current_node].items()
146
+ if data["jumpkind"] != "Ijk_FakeRet"
147
+ and node.addr not in banned
148
+ and node.addr in self.base_heuristic
149
+ and (data["jumpkind"] != "Ijk_Ret" or node.addr == self._search_stack[-1])
150
+ ]
151
+ if not options:
152
+ # backtrack
153
+ self._search_frontier_marker.banned = True
154
+ self._search_backtrack()
155
+ continue
156
+
157
+ best_node, best_jumpkind, best_weight = min(
158
+ options,
159
+ default=(None, None),
160
+ key=lambda xyz: xyz[2],
161
+ )
162
+
163
+ assert isinstance(best_jumpkind, str)
164
+ assert isinstance(best_node, CFGNode)
165
+ self.extra_weight[best_node.addr] += 1
166
+ self._search_path.append((best_node.addr, best_jumpkind))
167
+
168
+ if best_jumpkind == "Ijk_Call" or best_jumpkind.startswith("Ijk_Sys"):
169
+ self._search_stack.append(
170
+ next(
171
+ iter(
172
+ node.addr
173
+ for node, data in self.cfg.graph.succ[current_node].items()
174
+ if data["jumpkind"] == "Ijk_FakeRet"
175
+ ),
176
+ None,
177
+ )
178
+ )
179
+ elif best_jumpkind == "Ijk_Ret":
180
+ self._search_stack.pop()
181
+
182
+ frontier_marker_nullable = next(
183
+ (
184
+ marker
185
+ for marker in self.transition_cache.succ[self._search_frontier_marker]
186
+ if marker.addr == best_node.addr
187
+ ),
188
+ None,
189
+ )
190
+ if frontier_marker_nullable is None:
191
+ new_marker = SimStateMarker(best_node.addr, self._search_frontier_marker)
192
+ self.transition_cache.add_node(new_marker, state=nilref)
193
+ self.transition_cache.add_edge(self._search_frontier_marker, new_marker)
194
+ self._search_frontier_marker = new_marker
195
+ else:
196
+ self._search_frontier_marker = frontier_marker_nullable
197
+
198
+ if self._search_frontier_marker.addr not in self._search_address_backtrack_points:
199
+ self._search_address_backtrack_points[self._search_frontier_marker.addr] = self._search_frontier_marker
200
+
201
+ # TODO does this go above the above stanza?
202
+ if sum(weight == best_weight for _, _, weight in options) != 1:
203
+ self._search_backtrack_to.add(self._search_address_backtrack_points[self._search_frontier_marker.addr])
204
+
205
+ return tuple(addr for addr, _ in self._search_path)
206
+
207
+ def diagnose_unsat(self, state: SimState):
208
+ pass
209
+
210
+ def test_path(self, bbl_addr_trace: tuple[int, ...]) -> TestPathReport:
211
+ assert bbl_addr_trace[0] == self.start_marker.addr, "Paths must begin with the start state"
212
+
213
+ known_markers = [self.start_marker]
214
+ for addr in bbl_addr_trace[1:]:
215
+ for succ in self.transition_cache.succ[known_markers[-1]]:
216
+ if succ.addr == addr:
217
+ break
218
+ else:
219
+ break
220
+ known_markers.append(succ)
221
+
222
+ marker = None
223
+ for ri, marker_ in enumerate(reversed(known_markers)):
224
+ i = len(known_markers) - 1 - ri
225
+ state: SimState = self.transition_cache.nodes[marker_]["state"]()
226
+ marker = marker_
227
+ if state is not None:
228
+ break
229
+ else:
230
+ assert False, "The first item in known_markers should always have a resolvable weakref"
231
+
232
+ while i != len(bbl_addr_trace) - 1:
233
+ assert state.addr == bbl_addr_trace[i]
234
+
235
+ marker.misses += 1
236
+ successors = state.step(strict_block_end=True)
237
+ succ, kind = find_successor(successors, bbl_addr_trace[i + 1])
238
+
239
+ # cache state
240
+ if i + 1 < len(known_markers):
241
+ succ_marker = known_markers[i + 1]
242
+ else:
243
+ succ_marker = SimStateMarker(bbl_addr_trace[i + 1], parent=marker)
244
+ self.transition_cache.add_node(succ_marker)
245
+ self.transition_cache.add_edge(marker, succ_marker)
246
+ self.transition_cache.nodes[succ_marker]["state"] = ref(succ) if succ is not None else nilref
247
+ if succ is not None:
248
+ self.cache_state(succ)
249
+
250
+ if kind == SuccessorsKind.SAT:
251
+ assert succ is not None
252
+ state = succ
253
+ marker = succ_marker
254
+ i += 1
255
+ continue
256
+ if kind == SuccessorsKind.UNSAT:
257
+ assert succ is not None
258
+ return TestPathReport(
259
+ path_markers={i: marker, i + 1: succ_marker},
260
+ termination=SuccessorsKind.UNSAT,
261
+ )
262
+ return TestPathReport(path_markers={i: marker, i + 1: succ_marker}, termination=SuccessorsKind.MISSING)
263
+
264
+ return TestPathReport(path_markers={i: marker}, termination=SuccessorsKind.SAT)
265
+
266
+
267
+ def find_successor(successors: SimSuccessors, target_addr: int) -> tuple[SimState | None, SuccessorsKind]:
268
+ for succ in successors.flat_successors:
269
+ if succ.addr == target_addr:
270
+ return succ, SuccessorsKind.SAT
271
+ for succ in successors.unsat_successors:
272
+ if succ.addr == target_addr:
273
+ return succ, SuccessorsKind.UNSAT
274
+ for succ in successors.unconstrained_successors:
275
+ succ2 = succ.copy()
276
+ succ2.add_constraints(succ2._ip == target_addr)
277
+ if succ2.satisfiable():
278
+ return succ2, SuccessorsKind.SAT
279
+ return None, SuccessorsKind.MISSING
280
+
281
+
282
+ AnalysesHub.register_default("Pathfinder", Pathfinder)
@@ -197,6 +197,11 @@ def handle_printf(
197
197
  buf_data = state.get_values(buf_atoms)
198
198
  if buf_data is not None:
199
199
  buf_data = buf_data.extract(0, len(buf_data) // 8 - 1, archinfo.Endness.BE)
200
+ else:
201
+ top_val = state.top(state.arch.bits)
202
+ for defn in state.get_definitions(atom):
203
+ top_val = state.annotate_with_def(top_val, defn)
204
+ buf_data = MultiValues(top_val)
200
205
  elif fmt == "%u":
201
206
  buf_atoms = atom
202
207
  buf_data = state.get_concrete_value(buf_atoms)
@@ -217,7 +222,9 @@ def handle_printf(
217
222
  else:
218
223
  _l.warning("Unimplemented printf format string %s", fmt)
219
224
  buf_atoms = set()
220
- buf_data = None
225
+ top_val = state.top(state.arch.bits)
226
+ buf_data = MultiValues(top_val)
227
+
221
228
  if result is not None and buf_data is not None:
222
229
  result = result.concat(buf_data)
223
230
  source_atoms.update(buf_atoms)