angr 9.2.152__py3-none-manylinux2014_x86_64.whl → 9.2.154__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/analysis.py +3 -3
- angr/analyses/calling_convention/fact_collector.py +8 -14
- angr/analyses/cfg/cfg_base.py +1 -1
- angr/analyses/cfg/cfg_fast.py +40 -1
- angr/analyses/decompiler/ail_simplifier.py +0 -1
- angr/analyses/decompiler/callsite_maker.py +17 -17
- angr/analyses/decompiler/ccall_rewriters/x86_ccalls.py +210 -1
- angr/analyses/decompiler/clinic.py +51 -13
- angr/analyses/decompiler/decompilation_cache.py +1 -1
- angr/analyses/decompiler/region_identifier.py +171 -119
- angr/analyses/decompiler/ssailification/ssailification.py +1 -1
- angr/analyses/decompiler/structured_codegen/c.py +15 -15
- angr/analyses/decompiler/structuring/phoenix.py +28 -0
- angr/analyses/decompiler/structuring/structurer_nodes.py +11 -0
- angr/analyses/reaching_definitions/function_handler.py +13 -19
- angr/analyses/smc.py +3 -1
- angr/analyses/stack_pointer_tracker.py +7 -1
- angr/analyses/typehoon/simple_solver.py +143 -81
- angr/analyses/typehoon/typehoon.py +2 -1
- angr/analyses/variable_recovery/engine_ail.py +14 -25
- angr/analyses/variable_recovery/engine_base.py +1 -1
- angr/knowledge_plugins/functions/function.py +10 -4
- angr/sim_type.py +11 -70
- angr/utils/types.py +93 -1
- {angr-9.2.152.dist-info → angr-9.2.154.dist-info}/METADATA +6 -6
- {angr-9.2.152.dist-info → angr-9.2.154.dist-info}/RECORD +31 -31
- {angr-9.2.152.dist-info → angr-9.2.154.dist-info}/WHEEL +1 -1
- {angr-9.2.152.dist-info → angr-9.2.154.dist-info}/entry_points.txt +0 -0
- {angr-9.2.152.dist-info → angr-9.2.154.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.152.dist-info → angr-9.2.154.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
from typing import Any
|
|
2
3
|
from itertools import count
|
|
3
4
|
from collections import defaultdict
|
|
4
5
|
import logging
|
|
@@ -70,6 +71,9 @@ class RegionIdentifier(Analysis):
|
|
|
70
71
|
self._largest_successor_tree_outside_loop = largest_successor_tree_outside_loop
|
|
71
72
|
self._force_loop_single_exit = force_loop_single_exit
|
|
72
73
|
self._complete_successors = complete_successors
|
|
74
|
+
# we keep a dictionary of node and their traversal order in a quasi-topological traversal and update this
|
|
75
|
+
# dictionary as we update the graph
|
|
76
|
+
self._node_order: dict[Any, tuple[int, int]] = {}
|
|
73
77
|
|
|
74
78
|
self._analyze()
|
|
75
79
|
|
|
@@ -102,11 +106,30 @@ class RegionIdentifier(Analysis):
|
|
|
102
106
|
|
|
103
107
|
self._start_node = self._get_start_node(graph)
|
|
104
108
|
|
|
109
|
+
self._node_order = self._compute_node_order(graph)
|
|
110
|
+
|
|
105
111
|
self.region = self._make_regions(graph)
|
|
106
112
|
|
|
107
113
|
# make regions into block address lists
|
|
108
114
|
self.regions_by_block_addrs = self._make_regions_by_block_addrs()
|
|
109
115
|
|
|
116
|
+
@staticmethod
|
|
117
|
+
def _compute_node_order(graph: networkx.DiGraph) -> dict[Any, tuple[int, int]]:
|
|
118
|
+
sorted_nodes = GraphUtils.quasi_topological_sort_nodes(graph)
|
|
119
|
+
node_order = {}
|
|
120
|
+
for i, n in enumerate(sorted_nodes):
|
|
121
|
+
node_order[n] = i, 0
|
|
122
|
+
return node_order
|
|
123
|
+
|
|
124
|
+
def _sort_nodes(self, nodes: list | set) -> list:
|
|
125
|
+
"""
|
|
126
|
+
Sorts the nodes in the order specified in self._node_order.
|
|
127
|
+
|
|
128
|
+
:param nodes: A list or set of nodes to be sorted.
|
|
129
|
+
:return: A sorted list of nodes.
|
|
130
|
+
"""
|
|
131
|
+
return sorted(nodes, key=lambda n: self._node_order[n])
|
|
132
|
+
|
|
110
133
|
def _make_regions_by_block_addrs(self) -> list[list[tuple[int, int | None]]]:
|
|
111
134
|
"""
|
|
112
135
|
Creates a list of addr lists representing each region without recursion. A single region is defined
|
|
@@ -182,30 +205,6 @@ class RegionIdentifier(Analysis):
|
|
|
182
205
|
None,
|
|
183
206
|
)
|
|
184
207
|
|
|
185
|
-
def _test_reducibility(self):
|
|
186
|
-
# make a copy of the graph
|
|
187
|
-
graph = networkx.DiGraph(self._graph)
|
|
188
|
-
|
|
189
|
-
# preprocess: make it a super graph
|
|
190
|
-
self._make_supergraph(graph)
|
|
191
|
-
|
|
192
|
-
while True:
|
|
193
|
-
changed = False
|
|
194
|
-
|
|
195
|
-
# find a node with a back-edge, remove the edge (deleting the loop), and replace it with a MultiNode
|
|
196
|
-
changed |= self._remove_self_loop(graph)
|
|
197
|
-
|
|
198
|
-
# find a node that has only one predecessor, and merge it with its predecessor (replace them with a
|
|
199
|
-
# MultiNode)
|
|
200
|
-
changed |= self._merge_single_entry_node(graph)
|
|
201
|
-
|
|
202
|
-
if not changed:
|
|
203
|
-
# a fixed-point is reached
|
|
204
|
-
break
|
|
205
|
-
|
|
206
|
-
# Flow graph reducibility, Hecht and Ullman
|
|
207
|
-
return len(graph.nodes) == 1
|
|
208
|
-
|
|
209
208
|
def _make_supergraph(self, graph: networkx.DiGraph):
|
|
210
209
|
|
|
211
210
|
entry_node = None
|
|
@@ -236,7 +235,7 @@ class RegionIdentifier(Analysis):
|
|
|
236
235
|
|
|
237
236
|
def _find_loop_headers(self, graph: networkx.DiGraph) -> list:
|
|
238
237
|
heads = list({t for _, t in dfs_back_edges(graph, self._start_node)})
|
|
239
|
-
return
|
|
238
|
+
return self._sort_nodes(heads)
|
|
240
239
|
|
|
241
240
|
def _find_initial_loop_nodes(self, graph: networkx.DiGraph, head):
|
|
242
241
|
# TODO optimize
|
|
@@ -290,7 +289,7 @@ class RegionIdentifier(Analysis):
|
|
|
290
289
|
# node.
|
|
291
290
|
subgraph = networkx.DiGraph()
|
|
292
291
|
|
|
293
|
-
sorted_refined_exit_nodes =
|
|
292
|
+
sorted_refined_exit_nodes = self._sort_nodes(refined_exit_nodes)
|
|
294
293
|
while len(sorted_refined_exit_nodes) > 1 and new_exit_nodes:
|
|
295
294
|
# visit each node in refined_exit_nodes once and determine which nodes to consider as loop nodes
|
|
296
295
|
candidate_nodes = {}
|
|
@@ -324,7 +323,7 @@ class RegionIdentifier(Analysis):
|
|
|
324
323
|
|
|
325
324
|
sorted_refined_exit_nodes += list(new_exit_nodes)
|
|
326
325
|
sorted_refined_exit_nodes = list(set(sorted_refined_exit_nodes))
|
|
327
|
-
sorted_refined_exit_nodes =
|
|
326
|
+
sorted_refined_exit_nodes = self._sort_nodes(sorted_refined_exit_nodes)
|
|
328
327
|
|
|
329
328
|
refined_exit_nodes = set(sorted_refined_exit_nodes)
|
|
330
329
|
refined_loop_nodes = refined_loop_nodes - refined_exit_nodes
|
|
@@ -373,37 +372,6 @@ class RegionIdentifier(Analysis):
|
|
|
373
372
|
|
|
374
373
|
return refined_loop_nodes, refined_exit_nodes
|
|
375
374
|
|
|
376
|
-
def _remove_self_loop(self, graph: networkx.DiGraph):
|
|
377
|
-
r = False
|
|
378
|
-
|
|
379
|
-
while True:
|
|
380
|
-
for node in graph.nodes():
|
|
381
|
-
if node in graph[node]:
|
|
382
|
-
# found a self loop
|
|
383
|
-
self._remove_node(graph, node)
|
|
384
|
-
r = True
|
|
385
|
-
break
|
|
386
|
-
else:
|
|
387
|
-
break
|
|
388
|
-
|
|
389
|
-
return r
|
|
390
|
-
|
|
391
|
-
def _merge_single_entry_node(self, graph: networkx.DiGraph):
|
|
392
|
-
r = False
|
|
393
|
-
|
|
394
|
-
while True:
|
|
395
|
-
for node in networkx.dfs_postorder_nodes(graph):
|
|
396
|
-
preds = list(graph.predecessors(node))
|
|
397
|
-
if len(preds) == 1:
|
|
398
|
-
# merge the two nodes
|
|
399
|
-
self._absorb_node(graph, preds[0], node)
|
|
400
|
-
r = True
|
|
401
|
-
break
|
|
402
|
-
else:
|
|
403
|
-
break
|
|
404
|
-
|
|
405
|
-
return r
|
|
406
|
-
|
|
407
375
|
def _make_regions(self, graph: networkx.DiGraph):
|
|
408
376
|
structured_loop_headers = set()
|
|
409
377
|
new_regions = []
|
|
@@ -535,7 +503,14 @@ class RegionIdentifier(Analysis):
|
|
|
535
503
|
abnormal_exit_nodes = set()
|
|
536
504
|
|
|
537
505
|
region = self._abstract_cyclic_region(
|
|
538
|
-
graph,
|
|
506
|
+
graph,
|
|
507
|
+
refined_loop_nodes,
|
|
508
|
+
head,
|
|
509
|
+
normal_entries,
|
|
510
|
+
abnormal_entries,
|
|
511
|
+
normal_exit_node,
|
|
512
|
+
abnormal_exit_nodes,
|
|
513
|
+
self._node_order,
|
|
539
514
|
)
|
|
540
515
|
if region.successors is not None and len(region.successors) > 1 and self._force_loop_single_exit:
|
|
541
516
|
# multi-successor region. refinement is required
|
|
@@ -661,6 +636,10 @@ class RegionIdentifier(Analysis):
|
|
|
661
636
|
graph.remove_edge(region, succ)
|
|
662
637
|
graph.add_edge(cond, succ, **edge_data)
|
|
663
638
|
|
|
639
|
+
# compute the node order of newly created nodes
|
|
640
|
+
self._node_order[region] = region_node_order = min(self._node_order[node_] for node_ in region.graph)
|
|
641
|
+
self._node_order[cond] = region_node_order[0], region_node_order[1] + 1
|
|
642
|
+
|
|
664
643
|
#
|
|
665
644
|
# Acyclic regions
|
|
666
645
|
#
|
|
@@ -733,6 +712,7 @@ class RegionIdentifier(Analysis):
|
|
|
733
712
|
graph,
|
|
734
713
|
GraphRegion(node, subgraph, None, None, False, None, cyclic_ancestor=cyclic),
|
|
735
714
|
[],
|
|
715
|
+
self._node_order,
|
|
736
716
|
secondary_graph=secondary_graph,
|
|
737
717
|
)
|
|
738
718
|
continue
|
|
@@ -780,7 +760,12 @@ class RegionIdentifier(Analysis):
|
|
|
780
760
|
l.debug("Node %r, frontier %r.", node, frontier)
|
|
781
761
|
# l.debug("Identified an acyclic region %s.", self._dbg_block_list(region.graph.nodes()))
|
|
782
762
|
self._abstract_acyclic_region(
|
|
783
|
-
graph,
|
|
763
|
+
graph,
|
|
764
|
+
region,
|
|
765
|
+
frontier,
|
|
766
|
+
self._node_order,
|
|
767
|
+
dummy_endnode=dummy_endnode,
|
|
768
|
+
secondary_graph=secondary_graph,
|
|
784
769
|
)
|
|
785
770
|
# assert dummy_endnode not in graph
|
|
786
771
|
region_created = True
|
|
@@ -909,11 +894,17 @@ class RegionIdentifier(Analysis):
|
|
|
909
894
|
)
|
|
910
895
|
return None
|
|
911
896
|
|
|
897
|
+
@staticmethod
|
|
912
898
|
def _abstract_acyclic_region(
|
|
913
|
-
|
|
899
|
+
graph: networkx.DiGraph,
|
|
900
|
+
region,
|
|
901
|
+
frontier,
|
|
902
|
+
node_order: dict[Any, tuple[int, int]],
|
|
903
|
+
dummy_endnode=None,
|
|
904
|
+
secondary_graph=None,
|
|
914
905
|
):
|
|
915
|
-
in_edges =
|
|
916
|
-
out_edges =
|
|
906
|
+
in_edges = RegionIdentifier._region_in_edges(graph, region, data=True)
|
|
907
|
+
out_edges = RegionIdentifier._region_out_edges(graph, region, data=True)
|
|
917
908
|
|
|
918
909
|
nodes_set = set()
|
|
919
910
|
for node_ in list(region.graph.nodes()):
|
|
@@ -922,6 +913,7 @@ class RegionIdentifier(Analysis):
|
|
|
922
913
|
graph.remove_node(node_)
|
|
923
914
|
|
|
924
915
|
graph.add_node(region)
|
|
916
|
+
node_order[region] = min(node_order[node_] for node_ in nodes_set)
|
|
925
917
|
|
|
926
918
|
for src, _, data in in_edges:
|
|
927
919
|
if src not in nodes_set:
|
|
@@ -937,7 +929,7 @@ class RegionIdentifier(Analysis):
|
|
|
937
929
|
graph.add_edge(region, frontier_node)
|
|
938
930
|
|
|
939
931
|
if secondary_graph is not None:
|
|
940
|
-
|
|
932
|
+
RegionIdentifier._abstract_acyclic_region(secondary_graph, region, {}, node_order)
|
|
941
933
|
|
|
942
934
|
@staticmethod
|
|
943
935
|
def _abstract_cyclic_region(
|
|
@@ -948,6 +940,7 @@ class RegionIdentifier(Analysis):
|
|
|
948
940
|
abnormal_entries,
|
|
949
941
|
normal_exit_node,
|
|
950
942
|
abnormal_exit_nodes,
|
|
943
|
+
node_order: dict[Any, tuple[int, int]],
|
|
951
944
|
):
|
|
952
945
|
region = GraphRegion(head, None, None, None, True, None)
|
|
953
946
|
|
|
@@ -1019,6 +1012,8 @@ class RegionIdentifier(Analysis):
|
|
|
1019
1012
|
graph.add_node(region)
|
|
1020
1013
|
for src, dst, data in delayed_edges:
|
|
1021
1014
|
graph.add_edge(src, dst, **data)
|
|
1015
|
+
# update node order
|
|
1016
|
+
node_order[region] = node_order[head]
|
|
1022
1017
|
|
|
1023
1018
|
region.full_graph = full_graph
|
|
1024
1019
|
|
|
@@ -1039,25 +1034,8 @@ class RegionIdentifier(Analysis):
|
|
|
1039
1034
|
out_edges.append((region, dst, data_))
|
|
1040
1035
|
return out_edges
|
|
1041
1036
|
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
out_edges = [(src, dst, data) for (src, dst, data) in graph.out_edges(node, data=True) if dst is not node]
|
|
1045
|
-
|
|
1046
|
-
# true case: it forms a region by itself :-)
|
|
1047
|
-
new_node = None if len(in_edges) <= 1 and len(out_edges) <= 1 else MultiNode([node])
|
|
1048
|
-
|
|
1049
|
-
graph.remove_node(node)
|
|
1050
|
-
|
|
1051
|
-
if new_node is not None:
|
|
1052
|
-
for src, _, data in in_edges:
|
|
1053
|
-
graph.add_edge(src, new_node, **data)
|
|
1054
|
-
|
|
1055
|
-
for _, dst, data in out_edges:
|
|
1056
|
-
graph.add_edge(new_node, dst, **data)
|
|
1057
|
-
|
|
1058
|
-
def _merge_nodes(
|
|
1059
|
-
self, graph: networkx.DiGraph, node_a, node_b, force_multinode=False
|
|
1060
|
-
): # pylint:disable=no-self-use
|
|
1037
|
+
@staticmethod
|
|
1038
|
+
def _merge_nodes(graph: networkx.DiGraph, node_a, node_b, force_multinode=False):
|
|
1061
1039
|
in_edges = list(graph.in_edges(node_a, data=True))
|
|
1062
1040
|
out_edges = list(graph.out_edges(node_b, data=True))
|
|
1063
1041
|
|
|
@@ -1089,9 +1067,116 @@ class RegionIdentifier(Analysis):
|
|
|
1089
1067
|
|
|
1090
1068
|
return new_node
|
|
1091
1069
|
|
|
1092
|
-
def
|
|
1093
|
-
|
|
1094
|
-
|
|
1070
|
+
def _ensure_jump_at_loop_exit_ends(self, node: Block | MultiNode) -> None:
|
|
1071
|
+
if isinstance(node, Block):
|
|
1072
|
+
if not node.statements:
|
|
1073
|
+
node.statements.append(
|
|
1074
|
+
Jump(
|
|
1075
|
+
None,
|
|
1076
|
+
Const(None, None, node.addr + node.original_size, self.project.arch.bits),
|
|
1077
|
+
ins_addr=node.addr,
|
|
1078
|
+
)
|
|
1079
|
+
)
|
|
1080
|
+
else:
|
|
1081
|
+
if not isinstance(first_nonlabel_nonphi_statement(node), ConditionalJump) and not isinstance(
|
|
1082
|
+
node.statements[-1],
|
|
1083
|
+
(
|
|
1084
|
+
Jump,
|
|
1085
|
+
ConditionalJump,
|
|
1086
|
+
IncompleteSwitchCaseHeadStatement,
|
|
1087
|
+
),
|
|
1088
|
+
):
|
|
1089
|
+
node.statements.append(
|
|
1090
|
+
Jump(
|
|
1091
|
+
None,
|
|
1092
|
+
Const(None, None, node.addr + node.original_size, self.project.arch.bits),
|
|
1093
|
+
ins_addr=node.addr,
|
|
1094
|
+
)
|
|
1095
|
+
)
|
|
1096
|
+
elif isinstance(node, MultiNode) and node.nodes:
|
|
1097
|
+
self._ensure_jump_at_loop_exit_ends(node.nodes[-1])
|
|
1098
|
+
|
|
1099
|
+
@staticmethod
|
|
1100
|
+
def _dbg_block_list(blocks):
|
|
1101
|
+
return [(hex(b.addr) if hasattr(b, "addr") else repr(b)) for b in blocks]
|
|
1102
|
+
|
|
1103
|
+
#
|
|
1104
|
+
# Reducibility
|
|
1105
|
+
#
|
|
1106
|
+
|
|
1107
|
+
def test_reducibility(self) -> bool:
|
|
1108
|
+
# make a copy of the graph
|
|
1109
|
+
graph = networkx.DiGraph(self._graph)
|
|
1110
|
+
|
|
1111
|
+
# preprocess: make it a super graph
|
|
1112
|
+
self._make_supergraph(graph)
|
|
1113
|
+
|
|
1114
|
+
while True:
|
|
1115
|
+
changed = False
|
|
1116
|
+
|
|
1117
|
+
# find a node with a back-edge, remove the edge (deleting the loop), and replace it with a MultiNode
|
|
1118
|
+
changed |= self._remove_self_loop(graph)
|
|
1119
|
+
|
|
1120
|
+
# find a node that has only one predecessor, and merge it with its predecessor (replace them with a
|
|
1121
|
+
# MultiNode)
|
|
1122
|
+
changed |= self._merge_single_entry_node(graph)
|
|
1123
|
+
|
|
1124
|
+
if not changed:
|
|
1125
|
+
# a fixed-point is reached
|
|
1126
|
+
break
|
|
1127
|
+
|
|
1128
|
+
# Flow graph reducibility, Hecht and Ullman
|
|
1129
|
+
return len(graph.nodes) == 1
|
|
1130
|
+
|
|
1131
|
+
def _remove_self_loop(self, graph: networkx.DiGraph) -> bool:
|
|
1132
|
+
r = False
|
|
1133
|
+
|
|
1134
|
+
while True:
|
|
1135
|
+
for node in graph.nodes():
|
|
1136
|
+
if node in graph[node]:
|
|
1137
|
+
# found a self loop
|
|
1138
|
+
self._remove_node(graph, node)
|
|
1139
|
+
r = True
|
|
1140
|
+
break
|
|
1141
|
+
else:
|
|
1142
|
+
break
|
|
1143
|
+
|
|
1144
|
+
return r
|
|
1145
|
+
|
|
1146
|
+
def _merge_single_entry_node(self, graph: networkx.DiGraph) -> bool:
|
|
1147
|
+
r = False
|
|
1148
|
+
|
|
1149
|
+
while True:
|
|
1150
|
+
for node in networkx.dfs_postorder_nodes(graph):
|
|
1151
|
+
preds = list(graph.predecessors(node))
|
|
1152
|
+
if len(preds) == 1:
|
|
1153
|
+
# merge the two nodes
|
|
1154
|
+
self._absorb_node(graph, preds[0], node)
|
|
1155
|
+
r = True
|
|
1156
|
+
break
|
|
1157
|
+
else:
|
|
1158
|
+
break
|
|
1159
|
+
|
|
1160
|
+
return r
|
|
1161
|
+
|
|
1162
|
+
def _remove_node(self, graph: networkx.DiGraph, node): # pylint:disable=no-self-use
|
|
1163
|
+
in_edges = [(src, dst, data) for (src, dst, data) in graph.in_edges(node, data=True) if src is not node]
|
|
1164
|
+
out_edges = [(src, dst, data) for (src, dst, data) in graph.out_edges(node, data=True) if dst is not node]
|
|
1165
|
+
|
|
1166
|
+
# true case: it forms a region by itself :-)
|
|
1167
|
+
new_node = None if len(in_edges) <= 1 and len(out_edges) <= 1 else MultiNode([node])
|
|
1168
|
+
|
|
1169
|
+
graph.remove_node(node)
|
|
1170
|
+
|
|
1171
|
+
if new_node is not None:
|
|
1172
|
+
for src, _, data in in_edges:
|
|
1173
|
+
graph.add_edge(src, new_node, **data)
|
|
1174
|
+
|
|
1175
|
+
for _, dst, data in out_edges:
|
|
1176
|
+
graph.add_edge(new_node, dst, **data)
|
|
1177
|
+
|
|
1178
|
+
@staticmethod
|
|
1179
|
+
def _absorb_node(graph: networkx.DiGraph, node_mommy, node_kiddie, force_multinode=False):
|
|
1095
1180
|
in_edges_mommy = graph.in_edges(node_mommy, data=True)
|
|
1096
1181
|
out_edges_mommy = graph.out_edges(node_mommy, data=True)
|
|
1097
1182
|
out_edges_kiddie = graph.out_edges(node_kiddie, data=True)
|
|
@@ -1129,38 +1214,5 @@ class RegionIdentifier(Analysis):
|
|
|
1129
1214
|
assert node_mommy not in graph
|
|
1130
1215
|
assert node_kiddie not in graph
|
|
1131
1216
|
|
|
1132
|
-
def _ensure_jump_at_loop_exit_ends(self, node: Block | MultiNode) -> None:
|
|
1133
|
-
if isinstance(node, Block):
|
|
1134
|
-
if not node.statements:
|
|
1135
|
-
node.statements.append(
|
|
1136
|
-
Jump(
|
|
1137
|
-
None,
|
|
1138
|
-
Const(None, None, node.addr + node.original_size, self.project.arch.bits),
|
|
1139
|
-
ins_addr=node.addr,
|
|
1140
|
-
)
|
|
1141
|
-
)
|
|
1142
|
-
else:
|
|
1143
|
-
if not isinstance(first_nonlabel_nonphi_statement(node), ConditionalJump) and not isinstance(
|
|
1144
|
-
node.statements[-1],
|
|
1145
|
-
(
|
|
1146
|
-
Jump,
|
|
1147
|
-
ConditionalJump,
|
|
1148
|
-
IncompleteSwitchCaseHeadStatement,
|
|
1149
|
-
),
|
|
1150
|
-
):
|
|
1151
|
-
node.statements.append(
|
|
1152
|
-
Jump(
|
|
1153
|
-
None,
|
|
1154
|
-
Const(None, None, node.addr + node.original_size, self.project.arch.bits),
|
|
1155
|
-
ins_addr=node.addr,
|
|
1156
|
-
)
|
|
1157
|
-
)
|
|
1158
|
-
elif isinstance(node, MultiNode) and node.nodes:
|
|
1159
|
-
self._ensure_jump_at_loop_exit_ends(node.nodes[-1])
|
|
1160
|
-
|
|
1161
|
-
@staticmethod
|
|
1162
|
-
def _dbg_block_list(blocks):
|
|
1163
|
-
return [(hex(b.addr) if hasattr(b, "addr") else repr(b)) for b in blocks]
|
|
1164
|
-
|
|
1165
1217
|
|
|
1166
1218
|
register_analysis(RegionIdentifier, "RegionIdentifier")
|
|
@@ -107,7 +107,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
|
|
|
107
107
|
)
|
|
108
108
|
self.secondary_stackvars = rewriter.secondary_stackvars
|
|
109
109
|
self.out_graph = rewriter.out_graph
|
|
110
|
-
self.max_vvar_id = rewriter.max_vvar_id
|
|
110
|
+
self.max_vvar_id: int = rewriter.max_vvar_id if rewriter.max_vvar_id is not None else 0
|
|
111
111
|
|
|
112
112
|
def _calculate_virtual_variables(
|
|
113
113
|
self,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pylint:disable=missing-class-docstring,too-many-boolean-expressions,unused-argument,no-self-use
|
|
2
2
|
from __future__ import annotations
|
|
3
|
-
from typing import Any, TYPE_CHECKING
|
|
3
|
+
from typing import cast, Any, TYPE_CHECKING
|
|
4
4
|
from collections.abc import Callable
|
|
5
5
|
from collections import defaultdict, Counter
|
|
6
6
|
import logging
|
|
@@ -12,7 +12,6 @@ from ailment.constant import UNDETERMINED_SIZE
|
|
|
12
12
|
from ailment.expression import StackBaseOffset, BinaryOp
|
|
13
13
|
from unique_log_filter import UniqueLogFilter
|
|
14
14
|
|
|
15
|
-
from angr.procedures import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
|
|
16
15
|
from angr.sim_type import (
|
|
17
16
|
SimTypeLongLong,
|
|
18
17
|
SimTypeInt,
|
|
@@ -32,7 +31,6 @@ from angr.sim_type import (
|
|
|
32
31
|
SimTypeFixedSizeArray,
|
|
33
32
|
SimTypeLength,
|
|
34
33
|
SimTypeReg,
|
|
35
|
-
dereference_simtype,
|
|
36
34
|
SimTypeInt128,
|
|
37
35
|
SimTypeInt256,
|
|
38
36
|
SimTypeInt512,
|
|
@@ -43,7 +41,7 @@ from angr.sim_variable import SimVariable, SimTemporaryVariable, SimStackVariabl
|
|
|
43
41
|
from angr.utils.constants import is_alignment_mask
|
|
44
42
|
from angr.utils.library import get_cpp_function_name
|
|
45
43
|
from angr.utils.loader import is_in_readonly_segment, is_in_readonly_section
|
|
46
|
-
from angr.utils.types import unpack_typeref, unpack_pointer_and_array
|
|
44
|
+
from angr.utils.types import unpack_typeref, unpack_pointer_and_array, dereference_simtype_by_lib
|
|
47
45
|
from angr.analyses.decompiler.utils import structured_node_is_simple_return
|
|
48
46
|
from angr.errors import UnsupportedNodeTypeError, AngrRuntimeError
|
|
49
47
|
from angr.knowledge_plugins.cfg.memory_data import MemoryData, MemoryDataSort
|
|
@@ -1301,12 +1299,7 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1301
1299
|
proto = self.callee_func.prototype
|
|
1302
1300
|
if self.callee_func.prototype_libname is not None:
|
|
1303
1301
|
# we need to deref the prototype in case it uses SimTypeRef internally
|
|
1304
|
-
|
|
1305
|
-
for prototype_lib in SIM_LIBRARIES[self.callee_func.prototype_libname]:
|
|
1306
|
-
if prototype_lib.type_collection_names:
|
|
1307
|
-
for typelib_name in prototype_lib.type_collection_names:
|
|
1308
|
-
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
1309
|
-
proto = dereference_simtype(proto, type_collections)
|
|
1302
|
+
proto = cast(SimTypeFunction, dereference_simtype_by_lib(proto, self.callee_func.prototype_libname))
|
|
1310
1303
|
return proto
|
|
1311
1304
|
returnty = SimTypeInt(signed=False)
|
|
1312
1305
|
return SimTypeFunction([arg.type for arg in self.args], returnty).with_arch(self.codegen.project.arch)
|
|
@@ -1314,7 +1307,9 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1314
1307
|
@property
|
|
1315
1308
|
def type(self):
|
|
1316
1309
|
if self.is_expr:
|
|
1317
|
-
return self.prototype.returnty or SimTypeInt(
|
|
1310
|
+
return (self.prototype.returnty if self.prototype is not None else None) or SimTypeInt(
|
|
1311
|
+
signed=False
|
|
1312
|
+
).with_arch(self.codegen.project.arch)
|
|
1318
1313
|
raise AngrRuntimeError("CFunctionCall.type should not be accessed if the function call is used as a statement.")
|
|
1319
1314
|
|
|
1320
1315
|
def _is_target_ambiguous(self, func_name: str) -> bool:
|
|
@@ -1323,6 +1318,8 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1323
1318
|
"""
|
|
1324
1319
|
caller, callee = self.codegen._func, self.callee_func
|
|
1325
1320
|
|
|
1321
|
+
assert self.codegen._variables_in_use is not None
|
|
1322
|
+
|
|
1326
1323
|
for var in self.codegen._variables_in_use.values():
|
|
1327
1324
|
if func_name == var.name:
|
|
1328
1325
|
return True
|
|
@@ -1461,7 +1458,7 @@ class CGoto(CStatement):
|
|
|
1461
1458
|
def c_repr_chunks(self, indent=0, asexpr=False):
|
|
1462
1459
|
indent_str = self.indent_str(indent=indent)
|
|
1463
1460
|
lbl = None
|
|
1464
|
-
if self.codegen is not None:
|
|
1461
|
+
if self.codegen is not None and isinstance(self.target, int):
|
|
1465
1462
|
lbl = self.codegen.map_addr_to_label.get((self.target, self.target_idx))
|
|
1466
1463
|
|
|
1467
1464
|
yield indent_str, None
|
|
@@ -1609,7 +1606,9 @@ class CVariable(CExpression):
|
|
|
1609
1606
|
|
|
1610
1607
|
self.variable: SimVariable = variable
|
|
1611
1608
|
self.unified_variable: SimVariable | None = unified_variable
|
|
1612
|
-
self.variable_type: SimType =
|
|
1609
|
+
self.variable_type: SimType | None = (
|
|
1610
|
+
variable_type.with_arch(self.codegen.project.arch) if variable_type is not None else None
|
|
1611
|
+
)
|
|
1613
1612
|
self.vvar_id = vvar_id
|
|
1614
1613
|
|
|
1615
1614
|
@property
|
|
@@ -3970,7 +3969,8 @@ class MakeTypecastsImplicit(CStructuredCodeWalker):
|
|
|
3970
3969
|
return super().handle_CAssignment(obj)
|
|
3971
3970
|
|
|
3972
3971
|
def handle_CFunctionCall(self, obj: CFunctionCall):
|
|
3973
|
-
|
|
3972
|
+
prototype_args = [] if obj.prototype is None else obj.prototype.args
|
|
3973
|
+
for i, (c_arg, arg_ty) in enumerate(zip(obj.args, prototype_args)):
|
|
3974
3974
|
obj.args[i] = self.collapse(arg_ty, c_arg)
|
|
3975
3975
|
return super().handle_CFunctionCall(obj)
|
|
3976
3976
|
|
|
@@ -4033,7 +4033,7 @@ class PointerArithmeticFixer(CStructuredCodeWalker):
|
|
|
4033
4033
|
a_ptr = a_ptr + 1.
|
|
4034
4034
|
"""
|
|
4035
4035
|
|
|
4036
|
-
def handle_CBinaryOp(self, obj):
|
|
4036
|
+
def handle_CBinaryOp(self, obj: CBinaryOp): # type: ignore
|
|
4037
4037
|
obj: CBinaryOp = super().handle_CBinaryOp(obj)
|
|
4038
4038
|
if (
|
|
4039
4039
|
obj.op in ("Add", "Sub")
|
|
@@ -1261,6 +1261,10 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1261
1261
|
# update node_a
|
|
1262
1262
|
node_a = next(iter(nn for nn in graph.nodes if nn.addr == target))
|
|
1263
1263
|
|
|
1264
|
+
better_node_a = node_a
|
|
1265
|
+
if isinstance(node_a, SequenceNode) and is_empty_or_label_only_node(node_a.nodes[0]) and len(node_a.nodes) == 2:
|
|
1266
|
+
better_node_a = node_a.nodes[1]
|
|
1267
|
+
|
|
1264
1268
|
case_and_entry_addrs = self._find_case_and_entry_addrs(node_a, graph, cmp_lb, jump_table)
|
|
1265
1269
|
|
|
1266
1270
|
cases, node_default, to_remove = self._switch_build_cases(
|
|
@@ -1272,6 +1276,30 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1272
1276
|
full_graph,
|
|
1273
1277
|
)
|
|
1274
1278
|
|
|
1279
|
+
if isinstance(better_node_a, SwitchCaseNode) and better_node_a.default_node is None:
|
|
1280
|
+
# we found a different head for an otherwise complete edge case.
|
|
1281
|
+
# recreate the switch with it.
|
|
1282
|
+
newsc = SwitchCaseNode(better_node_a.switch_expr, better_node_a.cases, node_default, addr=node.addr)
|
|
1283
|
+
|
|
1284
|
+
if node_default is not None and set(graph.succ[node_a]) != set(graph.succ[node_default]):
|
|
1285
|
+
# if node_a and default_node have different successors we need to bail
|
|
1286
|
+
return False
|
|
1287
|
+
|
|
1288
|
+
for pgraph in (graph, full_graph):
|
|
1289
|
+
all_preds = set(pgraph.pred[node])
|
|
1290
|
+
all_succs = set(pgraph.succ[node_a])
|
|
1291
|
+
if node_default is not None:
|
|
1292
|
+
pgraph.remove_node(node_default)
|
|
1293
|
+
pgraph.remove_node(node)
|
|
1294
|
+
pgraph.remove_node(node_a)
|
|
1295
|
+
pgraph.add_node(newsc)
|
|
1296
|
+
for pred in all_preds:
|
|
1297
|
+
pgraph.add_edge(pred, newsc)
|
|
1298
|
+
for succ in all_succs:
|
|
1299
|
+
pgraph.add_edge(newsc, succ)
|
|
1300
|
+
|
|
1301
|
+
return True
|
|
1302
|
+
|
|
1275
1303
|
if node_default is None:
|
|
1276
1304
|
switch_end_addr = node_b_addr
|
|
1277
1305
|
else:
|
|
@@ -368,6 +368,17 @@ class SwitchCaseNode(BaseNode):
|
|
|
368
368
|
self.default_node = default_node
|
|
369
369
|
self.addr = addr
|
|
370
370
|
|
|
371
|
+
def dbg_repr(self, indent=0) -> str:
|
|
372
|
+
return (
|
|
373
|
+
f"SwitchCaseNode(switch_expr={self.switch_expr}, cases=["
|
|
374
|
+
+ ", ".join(
|
|
375
|
+
hex(case) if isinstance(case, int) else f"({', '.join(hex(ccase) for ccase in case)})"
|
|
376
|
+
for case in self.cases
|
|
377
|
+
)
|
|
378
|
+
+ "\n"
|
|
379
|
+
+ f"], default_node={self.default_node})"
|
|
380
|
+
)
|
|
381
|
+
|
|
371
382
|
|
|
372
383
|
class IncompleteSwitchCaseNode(BaseNode):
|
|
373
384
|
"""
|
|
@@ -9,7 +9,7 @@ from cle.backends import ELF
|
|
|
9
9
|
import claripy
|
|
10
10
|
|
|
11
11
|
from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
|
|
12
|
-
from angr.sim_type import SimTypeBottom
|
|
12
|
+
from angr.sim_type import SimTypeBottom
|
|
13
13
|
from angr.knowledge_plugins.key_definitions.atoms import Atom, Register, MemoryLocation, SpOffset
|
|
14
14
|
from angr.knowledge_plugins.key_definitions.tag import Tag
|
|
15
15
|
from angr.calling_conventions import SimCC
|
|
@@ -18,7 +18,7 @@ from angr.knowledge_plugins.key_definitions.definition import Definition
|
|
|
18
18
|
from angr.knowledge_plugins.functions import Function
|
|
19
19
|
from angr.code_location import CodeLocation, ExternalCodeLocation
|
|
20
20
|
from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
|
|
21
|
-
from angr import
|
|
21
|
+
from angr.utils.types import dereference_simtype_by_lib
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
@@ -221,16 +221,16 @@ class FunctionCallDataUnwrapped(FunctionCallData):
|
|
|
221
221
|
Typechecks be gone!
|
|
222
222
|
"""
|
|
223
223
|
|
|
224
|
-
address_multi: MultiValues
|
|
225
|
-
address: int
|
|
224
|
+
address_multi: MultiValues # type: ignore[reportIncompatibleVariableOverride]
|
|
225
|
+
address: int # type: ignore[reportIncompatibleVariableOverride]
|
|
226
226
|
symbol: Symbol
|
|
227
|
-
function: Function
|
|
228
|
-
name: str
|
|
229
|
-
cc: SimCC
|
|
230
|
-
prototype: SimTypeFunction
|
|
231
|
-
args_atoms: list[set[Atom]]
|
|
232
|
-
args_values: list[MultiValues]
|
|
233
|
-
ret_atoms: set[Atom]
|
|
227
|
+
function: Function # type: ignore[reportIncompatibleVariableOverride]
|
|
228
|
+
name: str # type: ignore[reportIncompatibleVariableOverride]
|
|
229
|
+
cc: SimCC # type: ignore[reportIncompatibleVariableOverride]
|
|
230
|
+
prototype: SimTypeFunction # type: ignore[reportIncompatibleVariableOverride]
|
|
231
|
+
args_atoms: list[set[Atom]] # type: ignore[reportIncompatibleVariableOverride]
|
|
232
|
+
args_values: list[MultiValues] # type: ignore[reportIncompatibleVariableOverride]
|
|
233
|
+
ret_atoms: set[Atom] # type: ignore[reportIncompatibleVariableOverride]
|
|
234
234
|
|
|
235
235
|
def __init__(self, inner: FunctionCallData):
|
|
236
236
|
d = dict(inner.__dict__)
|
|
@@ -399,14 +399,8 @@ class FunctionHandler:
|
|
|
399
399
|
if data.function is not None and data.function.prototype_libname
|
|
400
400
|
else hook_libname
|
|
401
401
|
)
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
for prototype_lib in SIM_LIBRARIES[prototype_libname]:
|
|
405
|
-
if prototype_lib.type_collection_names:
|
|
406
|
-
for typelib_name in prototype_lib.type_collection_names:
|
|
407
|
-
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
408
|
-
if type_collections:
|
|
409
|
-
prototype = dereference_simtype(data.prototype, type_collections).with_arch(state.arch)
|
|
402
|
+
if prototype_libname is not None:
|
|
403
|
+
prototype = dereference_simtype_by_lib(data.prototype, prototype_libname)
|
|
410
404
|
data.prototype = cast(SimTypeFunction, prototype)
|
|
411
405
|
|
|
412
406
|
if isinstance(data.prototype, SimTypeFunction):
|
angr/analyses/smc.py
CHANGED
|
@@ -42,6 +42,8 @@ class TraceClassifier:
|
|
|
42
42
|
"""
|
|
43
43
|
addr = state.solver.eval(state.inspect.mem_write_address)
|
|
44
44
|
length = state.inspect.mem_write_length
|
|
45
|
+
if length is None:
|
|
46
|
+
length = len(state.inspect.mem_write_expr) // state.arch.byte_width
|
|
45
47
|
if not isinstance(length, int):
|
|
46
48
|
length = state.solver.eval(length)
|
|
47
49
|
self.map.add(addr, length, TraceActions.WRITE)
|
|
@@ -103,7 +105,7 @@ class SelfModifyingCodeAnalysis(Analysis):
|
|
|
103
105
|
"""
|
|
104
106
|
:param subject: Subject of analysis
|
|
105
107
|
:param max_bytes: Maximum number of bytes from subject address. 0 for no limit (default).
|
|
106
|
-
:param state: State to begin executing from
|
|
108
|
+
:param state: State to begin executing from.
|
|
107
109
|
"""
|
|
108
110
|
assert self.project.selfmodifying_code
|
|
109
111
|
|