angr 9.2.114__py3-none-macosx_11_0_arm64.whl → 9.2.116__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +2 -2
- angr/analyses/cfg/cfg_fast.py +1 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +2 -2
- angr/analyses/decompiler/decompilation_options.py +2 -12
- angr/analyses/decompiler/decompiler.py +14 -3
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +3 -0
- angr/analyses/decompiler/optimization_passes/cross_jump_reverter.py +0 -3
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +2 -3
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +9 -2
- angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +2 -0
- angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +2 -0
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +5 -1
- angr/analyses/decompiler/structured_codegen/c.py +10 -13
- angr/analyses/decompiler/structuring/__init__.py +6 -2
- angr/analyses/decompiler/structuring/dream.py +3 -4
- angr/analyses/decompiler/structuring/phoenix.py +29 -93
- angr/analyses/decompiler/structuring/recursive_structurer.py +0 -3
- angr/analyses/decompiler/structuring/sailr.py +111 -0
- angr/analyses/decompiler/structuring/structurer_base.py +2 -5
- angr/analyses/decompiler/structuring/structurer_nodes.py +3 -3
- angr/analyses/reaching_definitions/dep_graph.py +62 -5
- angr/analyses/reaching_definitions/function_handler.py +11 -1
- angr/analyses/reaching_definitions/function_handler_library/__init__.py +11 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +262 -0
- angr/analyses/reaching_definitions/function_handler_library/stdlib.py +157 -0
- angr/analyses/reaching_definitions/function_handler_library/string.py +93 -0
- angr/analyses/reaching_definitions/function_handler_library/unistd.py +23 -0
- angr/analyses/reaching_definitions/rd_state.py +28 -29
- angr/analyses/variable_recovery/engine_vex.py +0 -9
- angr/analyses/vfg.py +13 -14
- angr/code_location.py +4 -4
- angr/engines/pcode/cc.py +2 -0
- angr/engines/vex/heavy/heavy.py +1 -1
- angr/knowledge_plugins/key_definitions/live_definitions.py +12 -13
- angr/lib/angr_native.dylib +0 -0
- angr/procedures/libc/strlen.py +5 -2
- angr/sim_variable.py +3 -18
- angr/state_plugins/solver.py +3 -9
- angr/storage/memory_mixins/address_concretization_mixin.py +1 -1
- angr/storage/memory_mixins/paged_memory/pages/cooperation.py +2 -1
- angr/storage/memory_mixins/regioned_memory/abstract_merger_mixin.py +4 -2
- angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +5 -5
- angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +3 -3
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/METADATA +7 -7
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/RECORD +50 -44
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/WHEEL +1 -1
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/LICENSE +0 -0
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/entry_points.txt +0 -0
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import networkx
|
|
4
|
+
|
|
5
|
+
from ..utils import structured_node_is_simple_return
|
|
6
|
+
from ....utils.graph import PostDominators, TemporaryNode
|
|
7
|
+
from .phoenix import PhoenixStructurer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SAILRStructurer(PhoenixStructurer):
|
|
11
|
+
"""
|
|
12
|
+
The SAILR structuring algorithm is the phoenix-based algorithm from the USENIX 2024 paper SAILR.
|
|
13
|
+
The entirety of the algorithm is implemented across this class and various optimization passes in the decompiler.
|
|
14
|
+
To find each optimization class, simply search for optimizations which reference this class.NAME.
|
|
15
|
+
|
|
16
|
+
At a high-level, SAILR does three things different from the traditional Phoenix schema-based algorithm:
|
|
17
|
+
1. It recursively structures the graph, rather than doing it in a single pass. This allows decisions to be made
|
|
18
|
+
based on the currrent state of what the decompilation would look like.
|
|
19
|
+
2. It performs deoptimizations targeting specific optimizations that introduces gotos and mis-structured code.
|
|
20
|
+
It can only do this because of the recursive nature of the algorithm.
|
|
21
|
+
3. It uses a more advanced heuristic for virtualizing edges, which is implemented in this class.
|
|
22
|
+
|
|
23
|
+
Additionally, some changes in Phoenix are only activated when SAILR is used.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
NAME = "sailr"
|
|
27
|
+
|
|
28
|
+
def __init__(self, region, improve_phoenix=True, **kwargs):
|
|
29
|
+
super().__init__(
|
|
30
|
+
region,
|
|
31
|
+
improve_algorithm=improve_phoenix,
|
|
32
|
+
**kwargs,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
|
|
36
|
+
"""
|
|
37
|
+
The criteria for "best" is defined by a variety of heuristics described below.
|
|
38
|
+
"""
|
|
39
|
+
if len(edges) <= 1:
|
|
40
|
+
return edges
|
|
41
|
+
|
|
42
|
+
# TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
|
|
43
|
+
try:
|
|
44
|
+
entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
|
|
45
|
+
except IndexError:
|
|
46
|
+
entry_node = None
|
|
47
|
+
|
|
48
|
+
best_edges = edges
|
|
49
|
+
if entry_node is not None:
|
|
50
|
+
# the first few heuristics are based on the post-dominator count of the edge
|
|
51
|
+
# so we collect them for each candidate edge
|
|
52
|
+
edge_postdom_count = {}
|
|
53
|
+
edge_sibling_count = {}
|
|
54
|
+
for edge in edges:
|
|
55
|
+
_, dst = edge
|
|
56
|
+
graph_copy = networkx.DiGraph(graph)
|
|
57
|
+
graph_copy.remove_edge(*edge)
|
|
58
|
+
sibling_cnt = graph_copy.in_degree(dst)
|
|
59
|
+
if sibling_cnt == 0:
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
edge_sibling_count[edge] = sibling_cnt
|
|
63
|
+
post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
|
|
64
|
+
post_doms = set()
|
|
65
|
+
for postdom_node, dominatee in post_dom_graph.edges():
|
|
66
|
+
if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
|
|
67
|
+
post_doms.add((postdom_node, dominatee))
|
|
68
|
+
edge_postdom_count[edge] = len(post_doms)
|
|
69
|
+
|
|
70
|
+
# H1: the edge that has the least amount of sibling edges should be virtualized first
|
|
71
|
+
# this is believed to reduce the amount of virtualization needed in future rounds and increase
|
|
72
|
+
# the edges that enter a single outer-scope if-stmt
|
|
73
|
+
if edge_sibling_count:
|
|
74
|
+
min_sibling_count = min(edge_sibling_count.values())
|
|
75
|
+
best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
|
|
76
|
+
if len(best_edges) == 1:
|
|
77
|
+
return best_edges
|
|
78
|
+
|
|
79
|
+
# create the next heuristic based on the best edges from the previous heuristic
|
|
80
|
+
filtered_edge_postdom_count = edge_postdom_count.copy()
|
|
81
|
+
for edge in list(edge_postdom_count.keys()):
|
|
82
|
+
if edge not in best_edges:
|
|
83
|
+
del filtered_edge_postdom_count[edge]
|
|
84
|
+
if filtered_edge_postdom_count:
|
|
85
|
+
edge_postdom_count = filtered_edge_postdom_count
|
|
86
|
+
|
|
87
|
+
# H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
|
|
88
|
+
# first. this is believed to make the code more linear looking be reducing the amount of scopes.
|
|
89
|
+
# informally, we believe post-dominators to be an inverse indicator of the number of scopes present
|
|
90
|
+
if edge_postdom_count:
|
|
91
|
+
max_postdom_count = max(edge_postdom_count.values())
|
|
92
|
+
best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
|
|
93
|
+
if len(best_edges) == 1:
|
|
94
|
+
return best_edges
|
|
95
|
+
|
|
96
|
+
# H3: the edge that goes directly to a return statement should be virtualized first
|
|
97
|
+
# this is believed to be good because it can be corrected in later optimization by duplicating
|
|
98
|
+
# the return
|
|
99
|
+
candidate_edges = best_edges
|
|
100
|
+
best_edges = []
|
|
101
|
+
for src, dst in candidate_edges:
|
|
102
|
+
if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
|
|
103
|
+
best_edges.append((src, dst))
|
|
104
|
+
|
|
105
|
+
if len(best_edges) == 1:
|
|
106
|
+
return best_edges
|
|
107
|
+
elif not best_edges:
|
|
108
|
+
best_edges = candidate_edges
|
|
109
|
+
|
|
110
|
+
# if we have another tie, or we never used improved heuristics, then we do the default ordering.
|
|
111
|
+
return super()._order_virtualizable_edges(graph, best_edges, node_seq)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# pylint:disable=unused-argument
|
|
2
2
|
from typing import Optional, Any, TYPE_CHECKING
|
|
3
|
-
from collections import OrderedDict as ODict
|
|
4
3
|
from collections import defaultdict, OrderedDict
|
|
5
4
|
import logging
|
|
6
5
|
|
|
@@ -53,7 +52,6 @@ class StructurerBase(Analysis):
|
|
|
53
52
|
func: Optional["Function"] = None,
|
|
54
53
|
case_entry_to_switch_head: dict[int, int] | None = None,
|
|
55
54
|
parent_region=None,
|
|
56
|
-
improve_structurer=True,
|
|
57
55
|
**kwargs,
|
|
58
56
|
):
|
|
59
57
|
self._region: "GraphRegion" = region
|
|
@@ -61,7 +59,6 @@ class StructurerBase(Analysis):
|
|
|
61
59
|
self.function = func
|
|
62
60
|
self._case_entry_to_switch_head = case_entry_to_switch_head
|
|
63
61
|
self._parent_region = parent_region
|
|
64
|
-
self._improve_structurer = improve_structurer
|
|
65
62
|
|
|
66
63
|
self.cond_proc = (
|
|
67
64
|
condition_processor if condition_processor is not None else ConditionProcessor(self.project.arch)
|
|
@@ -741,8 +738,8 @@ class StructurerBase(Analysis):
|
|
|
741
738
|
#
|
|
742
739
|
|
|
743
740
|
def _reorganize_switch_cases(
|
|
744
|
-
self, cases:
|
|
745
|
-
) ->
|
|
741
|
+
self, cases: OrderedDict[int | tuple[int, ...], SequenceNode]
|
|
742
|
+
) -> OrderedDict[int | tuple[int, ...], SequenceNode]:
|
|
746
743
|
new_cases = OrderedDict()
|
|
747
744
|
|
|
748
745
|
caseid2gotoaddrs = {}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pylint:disable=missing-class-docstring
|
|
2
2
|
from typing import Any
|
|
3
|
-
from collections import OrderedDict
|
|
3
|
+
from collections import OrderedDict
|
|
4
4
|
|
|
5
5
|
import claripy
|
|
6
6
|
import ailment
|
|
@@ -358,9 +358,9 @@ class SwitchCaseNode(BaseNode):
|
|
|
358
358
|
"addr",
|
|
359
359
|
)
|
|
360
360
|
|
|
361
|
-
def __init__(self, switch_expr, cases:
|
|
361
|
+
def __init__(self, switch_expr, cases: OrderedDict[int | tuple[int, ...], SequenceNode], default_node, addr=None):
|
|
362
362
|
self.switch_expr = switch_expr
|
|
363
|
-
self.cases:
|
|
363
|
+
self.cases: OrderedDict[int | tuple[int, ...], SequenceNode] = cases
|
|
364
364
|
self.default_node = default_node
|
|
365
365
|
self.addr = addr
|
|
366
366
|
|
|
@@ -213,6 +213,68 @@ class DepGraph:
|
|
|
213
213
|
|
|
214
214
|
self.graph.add_edge(memory_location_definition, definition)
|
|
215
215
|
|
|
216
|
+
@overload
|
|
217
|
+
def find_definitions(
|
|
218
|
+
self,
|
|
219
|
+
*,
|
|
220
|
+
kind: type[A],
|
|
221
|
+
**kwargs: Any,
|
|
222
|
+
) -> list[Definition[A]]: ...
|
|
223
|
+
|
|
224
|
+
@overload
|
|
225
|
+
def find_definitions(
|
|
226
|
+
self,
|
|
227
|
+
*,
|
|
228
|
+
kind: Literal[AtomKind.REGISTER] = AtomKind.REGISTER,
|
|
229
|
+
**kwargs: Any,
|
|
230
|
+
) -> list[Definition[Register]]: ...
|
|
231
|
+
|
|
232
|
+
@overload
|
|
233
|
+
def find_definitions(
|
|
234
|
+
self,
|
|
235
|
+
*,
|
|
236
|
+
kind: Literal[AtomKind.MEMORY] = AtomKind.MEMORY,
|
|
237
|
+
**kwargs: Any,
|
|
238
|
+
) -> list[Definition[MemoryLocation]]: ...
|
|
239
|
+
|
|
240
|
+
@overload
|
|
241
|
+
def find_definitions(
|
|
242
|
+
self,
|
|
243
|
+
*,
|
|
244
|
+
kind: Literal[AtomKind.TMP] = AtomKind.TMP,
|
|
245
|
+
**kwargs: Any,
|
|
246
|
+
) -> list[Definition[Tmp]]: ...
|
|
247
|
+
|
|
248
|
+
@overload
|
|
249
|
+
def find_definitions(
|
|
250
|
+
self,
|
|
251
|
+
*,
|
|
252
|
+
kind: Literal[AtomKind.CONSTANT] = AtomKind.CONSTANT,
|
|
253
|
+
**kwargs: Any,
|
|
254
|
+
) -> list[Definition[ConstantSrc]]: ...
|
|
255
|
+
|
|
256
|
+
@overload
|
|
257
|
+
def find_definitions(
|
|
258
|
+
self,
|
|
259
|
+
*,
|
|
260
|
+
kind: Literal[AtomKind.GUARD] = AtomKind.GUARD,
|
|
261
|
+
**kwargs: Any,
|
|
262
|
+
) -> list[Definition[GuardUse]]: ...
|
|
263
|
+
|
|
264
|
+
@overload
|
|
265
|
+
def find_definitions(
|
|
266
|
+
self,
|
|
267
|
+
*,
|
|
268
|
+
reg_name: int | str = ...,
|
|
269
|
+
**kwargs: Any,
|
|
270
|
+
) -> list[Definition[Register]]: ...
|
|
271
|
+
|
|
272
|
+
@overload
|
|
273
|
+
def find_definitions(self, *, stack_offset: int = ..., **kwargs: Any) -> list[Definition[MemoryLocation]]: ...
|
|
274
|
+
|
|
275
|
+
@overload
|
|
276
|
+
def find_definitions(self, *, const_val: int = ..., **kwargs: Any) -> list[Definition[ConstantSrc]]: ...
|
|
277
|
+
|
|
216
278
|
def find_definitions(self, **kwargs) -> list[Definition]:
|
|
217
279
|
"""
|
|
218
280
|
Filter the definitions present in the graph based on various criteria.
|
|
@@ -299,11 +361,6 @@ class DepGraph:
|
|
|
299
361
|
self, starts: Definition[Atom] | Iterable[Definition[Atom]], *, const_val: int = ..., **kwargs: Any
|
|
300
362
|
) -> list[Definition[ConstantSrc]]: ...
|
|
301
363
|
|
|
302
|
-
@overload
|
|
303
|
-
def find_all_predecessors(
|
|
304
|
-
self, starts: Definition[Atom] | Iterable[Definition[Atom]], **kwargs: Any
|
|
305
|
-
) -> list[Definition[Atom]]: ...
|
|
306
|
-
|
|
307
364
|
def find_all_predecessors(self, starts, **kwargs):
|
|
308
365
|
"""
|
|
309
366
|
Filter the ancestors of the given start node or nodes that match various criteria.
|
|
@@ -254,15 +254,25 @@ class FunctionCallDataUnwrapped(FunctionCallData):
|
|
|
254
254
|
return inner
|
|
255
255
|
|
|
256
256
|
|
|
257
|
+
def _mk_wrapper(func, iself):
|
|
258
|
+
return lambda *args, **kwargs: func(iself, *args, **kwargs)
|
|
259
|
+
|
|
260
|
+
|
|
257
261
|
# pylint: disable=unused-argument, no-self-use
|
|
258
262
|
class FunctionHandler:
|
|
259
263
|
"""
|
|
260
264
|
A mechanism for summarizing a function call's effect on a program for ReachingDefinitionsAnalysis.
|
|
261
265
|
"""
|
|
262
266
|
|
|
263
|
-
def __init__(self, interfunction_level: int = 0):
|
|
267
|
+
def __init__(self, interfunction_level: int = 0, extra_impls: Iterable["FunctionHandler"] | None = None):
|
|
264
268
|
self.interfunction_level: int = interfunction_level
|
|
265
269
|
|
|
270
|
+
if extra_impls is not None:
|
|
271
|
+
for extra_handler in extra_impls:
|
|
272
|
+
for name, func in vars(extra_handler).items():
|
|
273
|
+
if name.startswith("handle_impl_"):
|
|
274
|
+
setattr(self, name, _mk_wrapper(func, self))
|
|
275
|
+
|
|
266
276
|
def hook(self, analysis: "ReachingDefinitionsAnalysis") -> "FunctionHandler":
|
|
267
277
|
"""
|
|
268
278
|
Attach this instance of the function handler to an instance of RDA.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .stdlib import LibcStdlibHandlers, EnvironAtom, SystemAtom, ExecveAtom
|
|
2
|
+
from .stdio import LibcStdioHandlers, StdoutAtom, StdinAtom
|
|
3
|
+
from .unistd import LibcUnistdHandlers
|
|
4
|
+
from .string import LibcStringHandlers
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class LibcHandlers(LibcStdlibHandlers, LibcStdioHandlers, LibcUnistdHandlers, LibcStringHandlers):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
__all__ = ["EnvironAtom", "SystemAtom", "ExecveAtom", "StdoutAtom", "StdinAtom", "LibcHandlers"]
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import random
|
|
3
|
+
import logging
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
|
|
6
|
+
import archinfo
|
|
7
|
+
import claripy
|
|
8
|
+
|
|
9
|
+
from angr.analyses.reaching_definitions.function_handler import FunctionCallDataUnwrapped, FunctionHandler
|
|
10
|
+
from angr.analyses.reaching_definitions.rd_state import ReachingDefinitionsState
|
|
11
|
+
from angr.knowledge_plugins.key_definitions.atoms import Atom
|
|
12
|
+
from angr.knowledge_plugins.key_definitions.live_definitions import DerefSize
|
|
13
|
+
from angr.sim_type import SimType, SimTypeBottom, SimTypeChar, SimTypeFunction, SimTypeInt, SimTypePointer
|
|
14
|
+
from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
|
|
15
|
+
|
|
16
|
+
# pylint: disable=no-self-use,missing-class-docstring,unused-argument
|
|
17
|
+
|
|
18
|
+
_l = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class StdoutAtom(Atom):
|
|
22
|
+
def __init__(self, sink: str, size: int | None):
|
|
23
|
+
self.nonce = random.randint(0, 999999999999)
|
|
24
|
+
self.sink = sink
|
|
25
|
+
super().__init__(size if size is not None else 1)
|
|
26
|
+
|
|
27
|
+
def _identity(self):
|
|
28
|
+
return (self.nonce,)
|
|
29
|
+
|
|
30
|
+
def __repr__(self):
|
|
31
|
+
return f"<StdoutAtom {self.sink}>"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class StdinAtom(Atom):
|
|
35
|
+
def __init__(self, source: str, size: int | None):
|
|
36
|
+
self.nonce = random.randint(0, 999999999999)
|
|
37
|
+
self.source = source
|
|
38
|
+
super().__init__(size if size is not None else 1)
|
|
39
|
+
|
|
40
|
+
def _identity(self):
|
|
41
|
+
return (self.nonce,)
|
|
42
|
+
|
|
43
|
+
def __repr__(self):
|
|
44
|
+
return f"<StdinAtom {self.source}>"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def parse_format_string(format_string: str) -> tuple[list[str | int], list[SimType], list[str]]:
|
|
48
|
+
result_pieces: list[str | int] = []
|
|
49
|
+
result_types: list[SimType] = []
|
|
50
|
+
result_specs: list[str] = []
|
|
51
|
+
|
|
52
|
+
last_piece = 0
|
|
53
|
+
idx = 0
|
|
54
|
+
for argspec in re.finditer(r"\%([0 #+-]?[0-9*]*\.?\d*([hl]{0,2}|[jztL])?[diuoxXeEfgGaAcpsSn%])", format_string):
|
|
55
|
+
start, end = argspec.span()
|
|
56
|
+
if format_string[end - 1] == "%":
|
|
57
|
+
continue
|
|
58
|
+
if start != last_piece:
|
|
59
|
+
result_pieces.append(format_string[last_piece:start])
|
|
60
|
+
result_pieces.append(idx)
|
|
61
|
+
idx += 1
|
|
62
|
+
fmt = format_string[start:end]
|
|
63
|
+
if fmt == "%s":
|
|
64
|
+
arg = SimTypePointer(SimTypeChar())
|
|
65
|
+
elif fmt == "%d":
|
|
66
|
+
arg = SimTypeInt(signed=True)
|
|
67
|
+
elif fmt == "%u":
|
|
68
|
+
arg = SimTypeInt(signed=False)
|
|
69
|
+
elif fmt == "%c":
|
|
70
|
+
arg = SimTypeChar(signed=True)
|
|
71
|
+
else:
|
|
72
|
+
arg = SimTypeBottom()
|
|
73
|
+
result_types.append(arg)
|
|
74
|
+
result_specs.append(fmt)
|
|
75
|
+
last_piece = end
|
|
76
|
+
if last_piece != len(format_string):
|
|
77
|
+
result_pieces.append(format_string[last_piece:])
|
|
78
|
+
|
|
79
|
+
return result_pieces, result_types, result_specs
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class LibcStdioHandlers(FunctionHandler):
|
|
83
|
+
@FunctionCallDataUnwrapped.decorate
|
|
84
|
+
def handle_impl_printf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
85
|
+
result, source_atoms = handle_printf(state, data, 0)
|
|
86
|
+
dst_atoms = StdoutAtom("printf", len(result) if result is not None else None)
|
|
87
|
+
data.depends(dst_atoms, source_atoms, value=result)
|
|
88
|
+
|
|
89
|
+
@FunctionCallDataUnwrapped.decorate
|
|
90
|
+
def handle_impl_dprintf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
91
|
+
result, source_atoms = handle_printf(state, data, 1)
|
|
92
|
+
dst_atoms = StdoutAtom("dprintf", len(result) if result is not None else None)
|
|
93
|
+
data.depends(dst_atoms, source_atoms, value=result)
|
|
94
|
+
|
|
95
|
+
@FunctionCallDataUnwrapped.decorate
|
|
96
|
+
def handle_impl_fprintf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
97
|
+
result, source_atoms = handle_printf(state, data, 1)
|
|
98
|
+
dst_atoms = StdoutAtom("fprintf", len(result) if result is not None else None)
|
|
99
|
+
data.depends(dst_atoms, source_atoms, value=result)
|
|
100
|
+
|
|
101
|
+
@FunctionCallDataUnwrapped.decorate
|
|
102
|
+
def handle_impl_sprintf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
103
|
+
result, source_atoms = handle_printf(state, data, 1)
|
|
104
|
+
dst_atoms = state.deref(data.args_atoms[0], size=len(result) // 8 if result is not None else 1)
|
|
105
|
+
data.depends(dst_atoms, source_atoms, value=result)
|
|
106
|
+
|
|
107
|
+
@FunctionCallDataUnwrapped.decorate
|
|
108
|
+
def handle_impl_snprintf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
109
|
+
result, source_atoms = handle_printf(state, data, 2)
|
|
110
|
+
size = state.get_concrete_value(data.args_atoms[1]) or 2
|
|
111
|
+
if result is not None:
|
|
112
|
+
size = min(size, len(result) // 8)
|
|
113
|
+
dst_atoms = state.deref(data.args_atoms[0], size=size)
|
|
114
|
+
data.depends(dst_atoms, source_atoms, value=result)
|
|
115
|
+
|
|
116
|
+
@FunctionCallDataUnwrapped.decorate
|
|
117
|
+
def handle_impl___sprintf_chk(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
118
|
+
result, source_atoms = handle_printf(state, data, 3)
|
|
119
|
+
dst_atoms = state.deref(data.args_atoms[0], size=len(result) // 8 if result is not None else 1)
|
|
120
|
+
data.depends(dst_atoms, source_atoms, value=result)
|
|
121
|
+
|
|
122
|
+
@FunctionCallDataUnwrapped.decorate
|
|
123
|
+
def handle_impl___snprintf_chk(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
124
|
+
result, source_atoms = handle_printf(state, data, 4)
|
|
125
|
+
size = state.get_concrete_value(data.args_atoms[1]) or 2
|
|
126
|
+
dst_atoms = state.deref(data.args_atoms[0], size=size)
|
|
127
|
+
data.depends(dst_atoms, source_atoms, value=result)
|
|
128
|
+
|
|
129
|
+
@FunctionCallDataUnwrapped.decorate
|
|
130
|
+
def handle_impl_scanf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
131
|
+
handle_scanf(state, data, 0, {StdinAtom("scanf", None)})
|
|
132
|
+
|
|
133
|
+
handle_impl___isoc99_scanf = handle_impl_scanf
|
|
134
|
+
|
|
135
|
+
@FunctionCallDataUnwrapped.decorate
|
|
136
|
+
def handle_impl_sscanf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
137
|
+
handle_scanf(state, data, 1, state.deref(data.args_atoms[0], DerefSize.NULL_TERMINATE))
|
|
138
|
+
|
|
139
|
+
handle_impl___isoc99_sscanf = handle_impl_sscanf
|
|
140
|
+
|
|
141
|
+
@FunctionCallDataUnwrapped.decorate
|
|
142
|
+
def handle_impl_fgets(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
143
|
+
size = state.get_concrete_value(data.args_atoms[1]) or 2
|
|
144
|
+
dst_atom = state.deref(data.args_atoms[0], size)
|
|
145
|
+
input_value = claripy.BVS("weh", (size - 1) * 8).concat(claripy.BVV(0, 8))
|
|
146
|
+
data.depends(dst_atom, StdinAtom("fgets", size), value=input_value)
|
|
147
|
+
data.depends(data.ret_atoms, data.args_atoms[0])
|
|
148
|
+
|
|
149
|
+
@FunctionCallDataUnwrapped.decorate
|
|
150
|
+
def handle_impl_fgetc(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
151
|
+
data.depends(data.ret_atoms, StdinAtom(data.function.name, 1))
|
|
152
|
+
|
|
153
|
+
handle_impl_getchar = handle_impl_getc = handle_impl_fgetc
|
|
154
|
+
|
|
155
|
+
@FunctionCallDataUnwrapped.decorate
|
|
156
|
+
def handle_impl_fread(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
157
|
+
size = state.get_concrete_value(data.args_atoms[1]) or 1
|
|
158
|
+
nmemb = state.get_concrete_value(data.args_atoms[1]) or 2
|
|
159
|
+
dst_atom = state.deref(data.args_atoms[0], size * nmemb)
|
|
160
|
+
data.depends(dst_atom, StdinAtom("fread", size * nmemb))
|
|
161
|
+
|
|
162
|
+
@FunctionCallDataUnwrapped.decorate
|
|
163
|
+
def handle_impl_fwrite(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
164
|
+
size = state.get_concrete_value(data.args_atoms[1]) or 1
|
|
165
|
+
nmemb = state.get_concrete_value(data.args_atoms[1]) or 2
|
|
166
|
+
src_atom = state.deref(data.args_atoms[0], size * nmemb)
|
|
167
|
+
data.depends(StdoutAtom("fwrite", size * nmemb), src_atom, value=state.get_values(src_atom))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def handle_printf(
|
|
171
|
+
state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped, fmt_idx: int
|
|
172
|
+
) -> tuple[MultiValues | None, Iterable[Atom]]:
|
|
173
|
+
format_str = state.get_concrete_value(
|
|
174
|
+
state.deref(data.args_atoms[fmt_idx], DerefSize.NULL_TERMINATE), cast_to=bytes
|
|
175
|
+
)
|
|
176
|
+
if format_str is None:
|
|
177
|
+
_l.info("Hmmm.... non-constant format string")
|
|
178
|
+
return None, set()
|
|
179
|
+
|
|
180
|
+
format_str = format_str.strip(b"\0").decode()
|
|
181
|
+
arg_pieces, arg_types, formats = parse_format_string(format_str)
|
|
182
|
+
data.reset_prototype(SimTypeFunction(data.prototype.args + tuple(arg_types), data.prototype.returnty), state)
|
|
183
|
+
|
|
184
|
+
result = MultiValues(claripy.BVV(b""))
|
|
185
|
+
source_atoms: set[Atom] = set()
|
|
186
|
+
for piece in arg_pieces:
|
|
187
|
+
if isinstance(piece, str):
|
|
188
|
+
if result is not None:
|
|
189
|
+
result = result.concat(piece.encode())
|
|
190
|
+
continue
|
|
191
|
+
atom = data.args_atoms[fmt_idx + 1 + piece]
|
|
192
|
+
fmt = formats[piece]
|
|
193
|
+
|
|
194
|
+
if fmt == "%s":
|
|
195
|
+
buf_atoms = state.deref(atom, DerefSize.NULL_TERMINATE)
|
|
196
|
+
buf_data = state.get_values(buf_atoms)
|
|
197
|
+
if buf_data is not None:
|
|
198
|
+
buf_data = buf_data.extract(0, len(buf_data) // 8 - 1, archinfo.Endness.BE)
|
|
199
|
+
elif fmt == "%u":
|
|
200
|
+
buf_atoms = atom
|
|
201
|
+
buf_data = state.get_concrete_value(buf_atoms)
|
|
202
|
+
if buf_data is not None:
|
|
203
|
+
buf_data = str(buf_data).encode()
|
|
204
|
+
elif fmt == "%d":
|
|
205
|
+
buf_atoms = atom
|
|
206
|
+
buf_data = state.get_concrete_value(buf_atoms)
|
|
207
|
+
if buf_data is not None:
|
|
208
|
+
if buf_data >= 2**31:
|
|
209
|
+
buf_data -= 2**32
|
|
210
|
+
buf_data = str(buf_data).encode()
|
|
211
|
+
elif fmt == "%c":
|
|
212
|
+
buf_atoms = atom
|
|
213
|
+
buf_data = state.get_concrete_value(atom)
|
|
214
|
+
if buf_data is not None:
|
|
215
|
+
buf_data = chr(buf_data).encode()
|
|
216
|
+
else:
|
|
217
|
+
_l.warning("Unimplemented printf format string %s", fmt)
|
|
218
|
+
buf_atoms = set()
|
|
219
|
+
buf_data = None
|
|
220
|
+
if result is not None:
|
|
221
|
+
if buf_data is not None:
|
|
222
|
+
result = result.concat(buf_data)
|
|
223
|
+
source_atoms.update(buf_atoms)
|
|
224
|
+
if result is not None:
|
|
225
|
+
result = result.concat(b"\0")
|
|
226
|
+
|
|
227
|
+
return result, source_atoms
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def handle_scanf(
|
|
231
|
+
state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped, fmt_idx: int, source_atoms: Iterable[Atom]
|
|
232
|
+
):
|
|
233
|
+
format_str = state.get_concrete_value(
|
|
234
|
+
state.deref(data.args_atoms[fmt_idx], DerefSize.NULL_TERMINATE), cast_to=bytes
|
|
235
|
+
)
|
|
236
|
+
if format_str is None:
|
|
237
|
+
_l.info("Hmmm.... non-constant format string")
|
|
238
|
+
return
|
|
239
|
+
format_str = format_str.strip(b"\0").decode()
|
|
240
|
+
arg_pieces, arg_types, formats = parse_format_string(format_str)
|
|
241
|
+
data.reset_prototype(SimTypeFunction(data.prototype.args + tuple(arg_types), data.prototype.returnty), state)
|
|
242
|
+
|
|
243
|
+
for piece in arg_pieces:
|
|
244
|
+
if isinstance(piece, str):
|
|
245
|
+
continue
|
|
246
|
+
atom = data.args_atoms[fmt_idx + 1 + piece]
|
|
247
|
+
fmt = formats[piece]
|
|
248
|
+
buf_data = None
|
|
249
|
+
|
|
250
|
+
if fmt == "%s":
|
|
251
|
+
buf_atom = state.deref(atom, 1)
|
|
252
|
+
buf_data = b"\0"
|
|
253
|
+
elif fmt == "%u":
|
|
254
|
+
buf_atom = state.deref(atom, 4, state.arch.memory_endness)
|
|
255
|
+
elif fmt == "%d":
|
|
256
|
+
buf_atom = state.deref(atom, 4, state.arch.memory_endness)
|
|
257
|
+
elif fmt == "%c":
|
|
258
|
+
buf_atom = state.deref(atom, 1, state.arch.memory_endness)
|
|
259
|
+
else:
|
|
260
|
+
_l.warning("Unimplemented scanf format string %s", fmt)
|
|
261
|
+
continue
|
|
262
|
+
data.depends(buf_atom, source_atoms, value=buf_data)
|