angr 9.2.114__py3-none-win_amd64.whl → 9.2.116__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (50) hide show
  1. angr/__init__.py +1 -1
  2. angr/__main__.py +2 -2
  3. angr/analyses/cfg/cfg_fast.py +1 -1
  4. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +2 -2
  5. angr/analyses/decompiler/decompilation_options.py +2 -12
  6. angr/analyses/decompiler/decompiler.py +14 -3
  7. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +3 -0
  8. angr/analyses/decompiler/optimization_passes/cross_jump_reverter.py +0 -3
  9. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +2 -3
  10. angr/analyses/decompiler/optimization_passes/optimization_pass.py +9 -2
  11. angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +2 -0
  12. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +2 -0
  13. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +5 -1
  14. angr/analyses/decompiler/structured_codegen/c.py +10 -13
  15. angr/analyses/decompiler/structuring/__init__.py +6 -2
  16. angr/analyses/decompiler/structuring/dream.py +3 -4
  17. angr/analyses/decompiler/structuring/phoenix.py +29 -93
  18. angr/analyses/decompiler/structuring/recursive_structurer.py +0 -3
  19. angr/analyses/decompiler/structuring/sailr.py +111 -0
  20. angr/analyses/decompiler/structuring/structurer_base.py +2 -5
  21. angr/analyses/decompiler/structuring/structurer_nodes.py +3 -3
  22. angr/analyses/reaching_definitions/dep_graph.py +62 -5
  23. angr/analyses/reaching_definitions/function_handler.py +11 -1
  24. angr/analyses/reaching_definitions/function_handler_library/__init__.py +11 -0
  25. angr/analyses/reaching_definitions/function_handler_library/stdio.py +262 -0
  26. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +157 -0
  27. angr/analyses/reaching_definitions/function_handler_library/string.py +93 -0
  28. angr/analyses/reaching_definitions/function_handler_library/unistd.py +23 -0
  29. angr/analyses/reaching_definitions/rd_state.py +28 -29
  30. angr/analyses/variable_recovery/engine_vex.py +0 -9
  31. angr/analyses/vfg.py +13 -14
  32. angr/code_location.py +4 -4
  33. angr/engines/pcode/cc.py +2 -0
  34. angr/engines/vex/heavy/heavy.py +1 -1
  35. angr/knowledge_plugins/key_definitions/live_definitions.py +12 -13
  36. angr/lib/angr_native.dll +0 -0
  37. angr/procedures/libc/strlen.py +5 -2
  38. angr/sim_variable.py +3 -18
  39. angr/state_plugins/solver.py +3 -9
  40. angr/storage/memory_mixins/address_concretization_mixin.py +1 -1
  41. angr/storage/memory_mixins/paged_memory/pages/cooperation.py +2 -1
  42. angr/storage/memory_mixins/regioned_memory/abstract_merger_mixin.py +4 -2
  43. angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +5 -5
  44. angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +3 -3
  45. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/METADATA +7 -7
  46. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/RECORD +50 -44
  47. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/WHEEL +1 -1
  48. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/LICENSE +0 -0
  49. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/entry_points.txt +0 -0
  50. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ from typing import Any
2
+
3
+ import networkx
4
+
5
+ from ..utils import structured_node_is_simple_return
6
+ from ....utils.graph import PostDominators, TemporaryNode
7
+ from .phoenix import PhoenixStructurer
8
+
9
+
10
+ class SAILRStructurer(PhoenixStructurer):
11
+ """
12
+ The SAILR structuring algorithm is the phoenix-based algorithm from the USENIX 2024 paper SAILR.
13
+ The entirety of the algorithm is implemented across this class and various optimization passes in the decompiler.
14
+ To find each optimization class, simply search for optimizations which reference this class.NAME.
15
+
16
+ At a high-level, SAILR does three things different from the traditional Phoenix schema-based algorithm:
17
+ 1. It recursively structures the graph, rather than doing it in a single pass. This allows decisions to be made
18
+ based on the currrent state of what the decompilation would look like.
19
+ 2. It performs deoptimizations targeting specific optimizations that introduces gotos and mis-structured code.
20
+ It can only do this because of the recursive nature of the algorithm.
21
+ 3. It uses a more advanced heuristic for virtualizing edges, which is implemented in this class.
22
+
23
+ Additionally, some changes in Phoenix are only activated when SAILR is used.
24
+ """
25
+
26
+ NAME = "sailr"
27
+
28
+ def __init__(self, region, improve_phoenix=True, **kwargs):
29
+ super().__init__(
30
+ region,
31
+ improve_algorithm=improve_phoenix,
32
+ **kwargs,
33
+ )
34
+
35
+ def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
36
+ """
37
+ The criteria for "best" is defined by a variety of heuristics described below.
38
+ """
39
+ if len(edges) <= 1:
40
+ return edges
41
+
42
+ # TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
43
+ try:
44
+ entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
45
+ except IndexError:
46
+ entry_node = None
47
+
48
+ best_edges = edges
49
+ if entry_node is not None:
50
+ # the first few heuristics are based on the post-dominator count of the edge
51
+ # so we collect them for each candidate edge
52
+ edge_postdom_count = {}
53
+ edge_sibling_count = {}
54
+ for edge in edges:
55
+ _, dst = edge
56
+ graph_copy = networkx.DiGraph(graph)
57
+ graph_copy.remove_edge(*edge)
58
+ sibling_cnt = graph_copy.in_degree(dst)
59
+ if sibling_cnt == 0:
60
+ continue
61
+
62
+ edge_sibling_count[edge] = sibling_cnt
63
+ post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
64
+ post_doms = set()
65
+ for postdom_node, dominatee in post_dom_graph.edges():
66
+ if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
67
+ post_doms.add((postdom_node, dominatee))
68
+ edge_postdom_count[edge] = len(post_doms)
69
+
70
+ # H1: the edge that has the least amount of sibling edges should be virtualized first
71
+ # this is believed to reduce the amount of virtualization needed in future rounds and increase
72
+ # the edges that enter a single outer-scope if-stmt
73
+ if edge_sibling_count:
74
+ min_sibling_count = min(edge_sibling_count.values())
75
+ best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
76
+ if len(best_edges) == 1:
77
+ return best_edges
78
+
79
+ # create the next heuristic based on the best edges from the previous heuristic
80
+ filtered_edge_postdom_count = edge_postdom_count.copy()
81
+ for edge in list(edge_postdom_count.keys()):
82
+ if edge not in best_edges:
83
+ del filtered_edge_postdom_count[edge]
84
+ if filtered_edge_postdom_count:
85
+ edge_postdom_count = filtered_edge_postdom_count
86
+
87
+ # H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
88
+ # first. this is believed to make the code more linear looking be reducing the amount of scopes.
89
+ # informally, we believe post-dominators to be an inverse indicator of the number of scopes present
90
+ if edge_postdom_count:
91
+ max_postdom_count = max(edge_postdom_count.values())
92
+ best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
93
+ if len(best_edges) == 1:
94
+ return best_edges
95
+
96
+ # H3: the edge that goes directly to a return statement should be virtualized first
97
+ # this is believed to be good because it can be corrected in later optimization by duplicating
98
+ # the return
99
+ candidate_edges = best_edges
100
+ best_edges = []
101
+ for src, dst in candidate_edges:
102
+ if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
103
+ best_edges.append((src, dst))
104
+
105
+ if len(best_edges) == 1:
106
+ return best_edges
107
+ elif not best_edges:
108
+ best_edges = candidate_edges
109
+
110
+ # if we have another tie, or we never used improved heuristics, then we do the default ordering.
111
+ return super()._order_virtualizable_edges(graph, best_edges, node_seq)
@@ -1,6 +1,5 @@
1
1
  # pylint:disable=unused-argument
2
2
  from typing import Optional, Any, TYPE_CHECKING
3
- from collections import OrderedDict as ODict
4
3
  from collections import defaultdict, OrderedDict
5
4
  import logging
6
5
 
@@ -53,7 +52,6 @@ class StructurerBase(Analysis):
53
52
  func: Optional["Function"] = None,
54
53
  case_entry_to_switch_head: dict[int, int] | None = None,
55
54
  parent_region=None,
56
- improve_structurer=True,
57
55
  **kwargs,
58
56
  ):
59
57
  self._region: "GraphRegion" = region
@@ -61,7 +59,6 @@ class StructurerBase(Analysis):
61
59
  self.function = func
62
60
  self._case_entry_to_switch_head = case_entry_to_switch_head
63
61
  self._parent_region = parent_region
64
- self._improve_structurer = improve_structurer
65
62
 
66
63
  self.cond_proc = (
67
64
  condition_processor if condition_processor is not None else ConditionProcessor(self.project.arch)
@@ -741,8 +738,8 @@ class StructurerBase(Analysis):
741
738
  #
742
739
 
743
740
  def _reorganize_switch_cases(
744
- self, cases: ODict[int | tuple[int, ...], SequenceNode]
745
- ) -> ODict[int | tuple[int, ...], SequenceNode]:
741
+ self, cases: OrderedDict[int | tuple[int, ...], SequenceNode]
742
+ ) -> OrderedDict[int | tuple[int, ...], SequenceNode]:
746
743
  new_cases = OrderedDict()
747
744
 
748
745
  caseid2gotoaddrs = {}
@@ -1,6 +1,6 @@
1
1
  # pylint:disable=missing-class-docstring
2
2
  from typing import Any
3
- from collections import OrderedDict as ODict
3
+ from collections import OrderedDict
4
4
 
5
5
  import claripy
6
6
  import ailment
@@ -358,9 +358,9 @@ class SwitchCaseNode(BaseNode):
358
358
  "addr",
359
359
  )
360
360
 
361
- def __init__(self, switch_expr, cases: ODict[int | tuple[int, ...], SequenceNode], default_node, addr=None):
361
+ def __init__(self, switch_expr, cases: OrderedDict[int | tuple[int, ...], SequenceNode], default_node, addr=None):
362
362
  self.switch_expr = switch_expr
363
- self.cases: ODict[int | tuple[int, ...], SequenceNode] = cases
363
+ self.cases: OrderedDict[int | tuple[int, ...], SequenceNode] = cases
364
364
  self.default_node = default_node
365
365
  self.addr = addr
366
366
 
@@ -213,6 +213,68 @@ class DepGraph:
213
213
 
214
214
  self.graph.add_edge(memory_location_definition, definition)
215
215
 
216
+ @overload
217
+ def find_definitions(
218
+ self,
219
+ *,
220
+ kind: type[A],
221
+ **kwargs: Any,
222
+ ) -> list[Definition[A]]: ...
223
+
224
+ @overload
225
+ def find_definitions(
226
+ self,
227
+ *,
228
+ kind: Literal[AtomKind.REGISTER] = AtomKind.REGISTER,
229
+ **kwargs: Any,
230
+ ) -> list[Definition[Register]]: ...
231
+
232
+ @overload
233
+ def find_definitions(
234
+ self,
235
+ *,
236
+ kind: Literal[AtomKind.MEMORY] = AtomKind.MEMORY,
237
+ **kwargs: Any,
238
+ ) -> list[Definition[MemoryLocation]]: ...
239
+
240
+ @overload
241
+ def find_definitions(
242
+ self,
243
+ *,
244
+ kind: Literal[AtomKind.TMP] = AtomKind.TMP,
245
+ **kwargs: Any,
246
+ ) -> list[Definition[Tmp]]: ...
247
+
248
+ @overload
249
+ def find_definitions(
250
+ self,
251
+ *,
252
+ kind: Literal[AtomKind.CONSTANT] = AtomKind.CONSTANT,
253
+ **kwargs: Any,
254
+ ) -> list[Definition[ConstantSrc]]: ...
255
+
256
+ @overload
257
+ def find_definitions(
258
+ self,
259
+ *,
260
+ kind: Literal[AtomKind.GUARD] = AtomKind.GUARD,
261
+ **kwargs: Any,
262
+ ) -> list[Definition[GuardUse]]: ...
263
+
264
+ @overload
265
+ def find_definitions(
266
+ self,
267
+ *,
268
+ reg_name: int | str = ...,
269
+ **kwargs: Any,
270
+ ) -> list[Definition[Register]]: ...
271
+
272
+ @overload
273
+ def find_definitions(self, *, stack_offset: int = ..., **kwargs: Any) -> list[Definition[MemoryLocation]]: ...
274
+
275
+ @overload
276
+ def find_definitions(self, *, const_val: int = ..., **kwargs: Any) -> list[Definition[ConstantSrc]]: ...
277
+
216
278
  def find_definitions(self, **kwargs) -> list[Definition]:
217
279
  """
218
280
  Filter the definitions present in the graph based on various criteria.
@@ -299,11 +361,6 @@ class DepGraph:
299
361
  self, starts: Definition[Atom] | Iterable[Definition[Atom]], *, const_val: int = ..., **kwargs: Any
300
362
  ) -> list[Definition[ConstantSrc]]: ...
301
363
 
302
- @overload
303
- def find_all_predecessors(
304
- self, starts: Definition[Atom] | Iterable[Definition[Atom]], **kwargs: Any
305
- ) -> list[Definition[Atom]]: ...
306
-
307
364
  def find_all_predecessors(self, starts, **kwargs):
308
365
  """
309
366
  Filter the ancestors of the given start node or nodes that match various criteria.
@@ -254,15 +254,25 @@ class FunctionCallDataUnwrapped(FunctionCallData):
254
254
  return inner
255
255
 
256
256
 
257
+ def _mk_wrapper(func, iself):
258
+ return lambda *args, **kwargs: func(iself, *args, **kwargs)
259
+
260
+
257
261
  # pylint: disable=unused-argument, no-self-use
258
262
  class FunctionHandler:
259
263
  """
260
264
  A mechanism for summarizing a function call's effect on a program for ReachingDefinitionsAnalysis.
261
265
  """
262
266
 
263
- def __init__(self, interfunction_level: int = 0):
267
+ def __init__(self, interfunction_level: int = 0, extra_impls: Iterable["FunctionHandler"] | None = None):
264
268
  self.interfunction_level: int = interfunction_level
265
269
 
270
+ if extra_impls is not None:
271
+ for extra_handler in extra_impls:
272
+ for name, func in vars(extra_handler).items():
273
+ if name.startswith("handle_impl_"):
274
+ setattr(self, name, _mk_wrapper(func, self))
275
+
266
276
  def hook(self, analysis: "ReachingDefinitionsAnalysis") -> "FunctionHandler":
267
277
  """
268
278
  Attach this instance of the function handler to an instance of RDA.
@@ -0,0 +1,11 @@
1
+ from .stdlib import LibcStdlibHandlers, EnvironAtom, SystemAtom, ExecveAtom
2
+ from .stdio import LibcStdioHandlers, StdoutAtom, StdinAtom
3
+ from .unistd import LibcUnistdHandlers
4
+ from .string import LibcStringHandlers
5
+
6
+
7
+ class LibcHandlers(LibcStdlibHandlers, LibcStdioHandlers, LibcUnistdHandlers, LibcStringHandlers):
8
+ pass
9
+
10
+
11
+ __all__ = ["EnvironAtom", "SystemAtom", "ExecveAtom", "StdoutAtom", "StdinAtom", "LibcHandlers"]
@@ -0,0 +1,262 @@
1
+ import re
2
+ import random
3
+ import logging
4
+ from collections.abc import Iterable
5
+
6
+ import archinfo
7
+ import claripy
8
+
9
+ from angr.analyses.reaching_definitions.function_handler import FunctionCallDataUnwrapped, FunctionHandler
10
+ from angr.analyses.reaching_definitions.rd_state import ReachingDefinitionsState
11
+ from angr.knowledge_plugins.key_definitions.atoms import Atom
12
+ from angr.knowledge_plugins.key_definitions.live_definitions import DerefSize
13
+ from angr.sim_type import SimType, SimTypeBottom, SimTypeChar, SimTypeFunction, SimTypeInt, SimTypePointer
14
+ from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
15
+
16
+ # pylint: disable=no-self-use,missing-class-docstring,unused-argument
17
+
18
+ _l = logging.getLogger(__name__)
19
+
20
+
21
+ class StdoutAtom(Atom):
22
+ def __init__(self, sink: str, size: int | None):
23
+ self.nonce = random.randint(0, 999999999999)
24
+ self.sink = sink
25
+ super().__init__(size if size is not None else 1)
26
+
27
+ def _identity(self):
28
+ return (self.nonce,)
29
+
30
+ def __repr__(self):
31
+ return f"<StdoutAtom {self.sink}>"
32
+
33
+
34
+ class StdinAtom(Atom):
35
+ def __init__(self, source: str, size: int | None):
36
+ self.nonce = random.randint(0, 999999999999)
37
+ self.source = source
38
+ super().__init__(size if size is not None else 1)
39
+
40
+ def _identity(self):
41
+ return (self.nonce,)
42
+
43
+ def __repr__(self):
44
+ return f"<StdinAtom {self.source}>"
45
+
46
+
47
+ def parse_format_string(format_string: str) -> tuple[list[str | int], list[SimType], list[str]]:
48
+ result_pieces: list[str | int] = []
49
+ result_types: list[SimType] = []
50
+ result_specs: list[str] = []
51
+
52
+ last_piece = 0
53
+ idx = 0
54
+ for argspec in re.finditer(r"\%([0 #+-]?[0-9*]*\.?\d*([hl]{0,2}|[jztL])?[diuoxXeEfgGaAcpsSn%])", format_string):
55
+ start, end = argspec.span()
56
+ if format_string[end - 1] == "%":
57
+ continue
58
+ if start != last_piece:
59
+ result_pieces.append(format_string[last_piece:start])
60
+ result_pieces.append(idx)
61
+ idx += 1
62
+ fmt = format_string[start:end]
63
+ if fmt == "%s":
64
+ arg = SimTypePointer(SimTypeChar())
65
+ elif fmt == "%d":
66
+ arg = SimTypeInt(signed=True)
67
+ elif fmt == "%u":
68
+ arg = SimTypeInt(signed=False)
69
+ elif fmt == "%c":
70
+ arg = SimTypeChar(signed=True)
71
+ else:
72
+ arg = SimTypeBottom()
73
+ result_types.append(arg)
74
+ result_specs.append(fmt)
75
+ last_piece = end
76
+ if last_piece != len(format_string):
77
+ result_pieces.append(format_string[last_piece:])
78
+
79
+ return result_pieces, result_types, result_specs
80
+
81
+
82
+ class LibcStdioHandlers(FunctionHandler):
83
+ @FunctionCallDataUnwrapped.decorate
84
+ def handle_impl_printf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
85
+ result, source_atoms = handle_printf(state, data, 0)
86
+ dst_atoms = StdoutAtom("printf", len(result) if result is not None else None)
87
+ data.depends(dst_atoms, source_atoms, value=result)
88
+
89
+ @FunctionCallDataUnwrapped.decorate
90
+ def handle_impl_dprintf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
91
+ result, source_atoms = handle_printf(state, data, 1)
92
+ dst_atoms = StdoutAtom("dprintf", len(result) if result is not None else None)
93
+ data.depends(dst_atoms, source_atoms, value=result)
94
+
95
+ @FunctionCallDataUnwrapped.decorate
96
+ def handle_impl_fprintf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
97
+ result, source_atoms = handle_printf(state, data, 1)
98
+ dst_atoms = StdoutAtom("fprintf", len(result) if result is not None else None)
99
+ data.depends(dst_atoms, source_atoms, value=result)
100
+
101
+ @FunctionCallDataUnwrapped.decorate
102
+ def handle_impl_sprintf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
103
+ result, source_atoms = handle_printf(state, data, 1)
104
+ dst_atoms = state.deref(data.args_atoms[0], size=len(result) // 8 if result is not None else 1)
105
+ data.depends(dst_atoms, source_atoms, value=result)
106
+
107
+ @FunctionCallDataUnwrapped.decorate
108
+ def handle_impl_snprintf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
109
+ result, source_atoms = handle_printf(state, data, 2)
110
+ size = state.get_concrete_value(data.args_atoms[1]) or 2
111
+ if result is not None:
112
+ size = min(size, len(result) // 8)
113
+ dst_atoms = state.deref(data.args_atoms[0], size=size)
114
+ data.depends(dst_atoms, source_atoms, value=result)
115
+
116
+ @FunctionCallDataUnwrapped.decorate
117
+ def handle_impl___sprintf_chk(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
118
+ result, source_atoms = handle_printf(state, data, 3)
119
+ dst_atoms = state.deref(data.args_atoms[0], size=len(result) // 8 if result is not None else 1)
120
+ data.depends(dst_atoms, source_atoms, value=result)
121
+
122
+ @FunctionCallDataUnwrapped.decorate
123
+ def handle_impl___snprintf_chk(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
124
+ result, source_atoms = handle_printf(state, data, 4)
125
+ size = state.get_concrete_value(data.args_atoms[1]) or 2
126
+ dst_atoms = state.deref(data.args_atoms[0], size=size)
127
+ data.depends(dst_atoms, source_atoms, value=result)
128
+
129
+ @FunctionCallDataUnwrapped.decorate
130
+ def handle_impl_scanf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
131
+ handle_scanf(state, data, 0, {StdinAtom("scanf", None)})
132
+
133
+ handle_impl___isoc99_scanf = handle_impl_scanf
134
+
135
+ @FunctionCallDataUnwrapped.decorate
136
+ def handle_impl_sscanf(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
137
+ handle_scanf(state, data, 1, state.deref(data.args_atoms[0], DerefSize.NULL_TERMINATE))
138
+
139
+ handle_impl___isoc99_sscanf = handle_impl_sscanf
140
+
141
+ @FunctionCallDataUnwrapped.decorate
142
+ def handle_impl_fgets(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
143
+ size = state.get_concrete_value(data.args_atoms[1]) or 2
144
+ dst_atom = state.deref(data.args_atoms[0], size)
145
+ input_value = claripy.BVS("weh", (size - 1) * 8).concat(claripy.BVV(0, 8))
146
+ data.depends(dst_atom, StdinAtom("fgets", size), value=input_value)
147
+ data.depends(data.ret_atoms, data.args_atoms[0])
148
+
149
+ @FunctionCallDataUnwrapped.decorate
150
+ def handle_impl_fgetc(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
151
+ data.depends(data.ret_atoms, StdinAtom(data.function.name, 1))
152
+
153
+ handle_impl_getchar = handle_impl_getc = handle_impl_fgetc
154
+
155
+ @FunctionCallDataUnwrapped.decorate
156
+ def handle_impl_fread(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
157
+ size = state.get_concrete_value(data.args_atoms[1]) or 1
158
+ nmemb = state.get_concrete_value(data.args_atoms[1]) or 2
159
+ dst_atom = state.deref(data.args_atoms[0], size * nmemb)
160
+ data.depends(dst_atom, StdinAtom("fread", size * nmemb))
161
+
162
+ @FunctionCallDataUnwrapped.decorate
163
+ def handle_impl_fwrite(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
164
+ size = state.get_concrete_value(data.args_atoms[1]) or 1
165
+ nmemb = state.get_concrete_value(data.args_atoms[1]) or 2
166
+ src_atom = state.deref(data.args_atoms[0], size * nmemb)
167
+ data.depends(StdoutAtom("fwrite", size * nmemb), src_atom, value=state.get_values(src_atom))
168
+
169
+
170
+ def handle_printf(
171
+ state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped, fmt_idx: int
172
+ ) -> tuple[MultiValues | None, Iterable[Atom]]:
173
+ format_str = state.get_concrete_value(
174
+ state.deref(data.args_atoms[fmt_idx], DerefSize.NULL_TERMINATE), cast_to=bytes
175
+ )
176
+ if format_str is None:
177
+ _l.info("Hmmm.... non-constant format string")
178
+ return None, set()
179
+
180
+ format_str = format_str.strip(b"\0").decode()
181
+ arg_pieces, arg_types, formats = parse_format_string(format_str)
182
+ data.reset_prototype(SimTypeFunction(data.prototype.args + tuple(arg_types), data.prototype.returnty), state)
183
+
184
+ result = MultiValues(claripy.BVV(b""))
185
+ source_atoms: set[Atom] = set()
186
+ for piece in arg_pieces:
187
+ if isinstance(piece, str):
188
+ if result is not None:
189
+ result = result.concat(piece.encode())
190
+ continue
191
+ atom = data.args_atoms[fmt_idx + 1 + piece]
192
+ fmt = formats[piece]
193
+
194
+ if fmt == "%s":
195
+ buf_atoms = state.deref(atom, DerefSize.NULL_TERMINATE)
196
+ buf_data = state.get_values(buf_atoms)
197
+ if buf_data is not None:
198
+ buf_data = buf_data.extract(0, len(buf_data) // 8 - 1, archinfo.Endness.BE)
199
+ elif fmt == "%u":
200
+ buf_atoms = atom
201
+ buf_data = state.get_concrete_value(buf_atoms)
202
+ if buf_data is not None:
203
+ buf_data = str(buf_data).encode()
204
+ elif fmt == "%d":
205
+ buf_atoms = atom
206
+ buf_data = state.get_concrete_value(buf_atoms)
207
+ if buf_data is not None:
208
+ if buf_data >= 2**31:
209
+ buf_data -= 2**32
210
+ buf_data = str(buf_data).encode()
211
+ elif fmt == "%c":
212
+ buf_atoms = atom
213
+ buf_data = state.get_concrete_value(atom)
214
+ if buf_data is not None:
215
+ buf_data = chr(buf_data).encode()
216
+ else:
217
+ _l.warning("Unimplemented printf format string %s", fmt)
218
+ buf_atoms = set()
219
+ buf_data = None
220
+ if result is not None:
221
+ if buf_data is not None:
222
+ result = result.concat(buf_data)
223
+ source_atoms.update(buf_atoms)
224
+ if result is not None:
225
+ result = result.concat(b"\0")
226
+
227
+ return result, source_atoms
228
+
229
+
230
+ def handle_scanf(
231
+ state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped, fmt_idx: int, source_atoms: Iterable[Atom]
232
+ ):
233
+ format_str = state.get_concrete_value(
234
+ state.deref(data.args_atoms[fmt_idx], DerefSize.NULL_TERMINATE), cast_to=bytes
235
+ )
236
+ if format_str is None:
237
+ _l.info("Hmmm.... non-constant format string")
238
+ return
239
+ format_str = format_str.strip(b"\0").decode()
240
+ arg_pieces, arg_types, formats = parse_format_string(format_str)
241
+ data.reset_prototype(SimTypeFunction(data.prototype.args + tuple(arg_types), data.prototype.returnty), state)
242
+
243
+ for piece in arg_pieces:
244
+ if isinstance(piece, str):
245
+ continue
246
+ atom = data.args_atoms[fmt_idx + 1 + piece]
247
+ fmt = formats[piece]
248
+ buf_data = None
249
+
250
+ if fmt == "%s":
251
+ buf_atom = state.deref(atom, 1)
252
+ buf_data = b"\0"
253
+ elif fmt == "%u":
254
+ buf_atom = state.deref(atom, 4, state.arch.memory_endness)
255
+ elif fmt == "%d":
256
+ buf_atom = state.deref(atom, 4, state.arch.memory_endness)
257
+ elif fmt == "%c":
258
+ buf_atom = state.deref(atom, 1, state.arch.memory_endness)
259
+ else:
260
+ _l.warning("Unimplemented scanf format string %s", fmt)
261
+ continue
262
+ data.depends(buf_atom, source_atoms, value=buf_data)