angr 9.2.84__py3-none-win_amd64.whl → 9.2.85__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_base.py +6 -1
- angr/analyses/cfg/cfg_fast.py +32 -10
- angr/analyses/decompiler/clinic.py +204 -4
- angr/analyses/decompiler/condition_processor.py +8 -2
- angr/analyses/decompiler/decompiler.py +19 -17
- angr/analyses/decompiler/goto_manager.py +34 -51
- angr/analyses/decompiler/optimization_passes/__init__.py +5 -5
- angr/analyses/decompiler/optimization_passes/div_simplifier.py +2 -0
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/mod_simplifier.py +2 -0
- angr/analyses/decompiler/optimization_passes/multi_simplifier.py +2 -0
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +131 -3
- angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +3 -3
- angr/analyses/decompiler/optimization_passes/return_duplicator.py +519 -0
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +14 -2
- angr/analyses/decompiler/region_identifier.py +8 -2
- angr/analyses/decompiler/region_simplifiers/goto.py +5 -4
- angr/analyses/decompiler/structured_codegen/c.py +33 -1
- angr/analyses/decompiler/structuring/phoenix.py +3 -1
- angr/analyses/decompiler/structuring/structurer_nodes.py +11 -5
- angr/analyses/decompiler/utils.py +50 -0
- angr/analyses/disassembly.py +10 -3
- angr/analyses/propagator/engine_ail.py +125 -0
- angr/analyses/reaching_definitions/engine_ail.py +36 -2
- angr/analyses/reaching_definitions/rd_initializer.py +15 -1
- angr/analyses/reaching_definitions/rd_state.py +9 -4
- angr/analyses/stack_pointer_tracker.py +10 -17
- angr/analyses/variable_recovery/engine_ail.py +27 -1
- angr/angrdb/serializers/loader.py +10 -3
- angr/calling_conventions.py +2 -0
- angr/engines/pcode/behavior.py +7 -2
- angr/engines/pcode/cc.py +1 -0
- angr/engines/pcode/emulate.py +144 -104
- angr/engines/pcode/lifter.py +135 -79
- angr/knowledge_plugins/functions/function_manager.py +5 -3
- angr/knowledge_plugins/propagations/states.py +14 -0
- angr/lib/angr_native.dll +0 -0
- angr/procedures/cgc/deallocate.py +5 -2
- angr/procedures/posix/gethostbyname.py +23 -8
- angr/project.py +4 -0
- angr/simos/__init__.py +2 -0
- angr/simos/simos.py +1 -0
- angr/simos/snimmuc_nxp.py +152 -0
- angr/state_plugins/history.py +3 -1
- angr/utils/graph.py +20 -18
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/METADATA +9 -8
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/RECORD +57 -55
- tests/analyses/cfg/test_cfg_rust_got_resolution.py +2 -1
- tests/analyses/cfg/test_jumptables.py +2 -1
- tests/analyses/decompiler/test_decompiler.py +130 -103
- tests/engines/pcode/test_emulate.py +607 -0
- tests/serialization/test_db.py +30 -0
- angr/analyses/decompiler/optimization_passes/eager_returns.py +0 -285
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/LICENSE +0 -0
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/WHEEL +0 -0
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/entry_points.txt +0 -0
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
from typing import Any, Tuple, Dict, List, TYPE_CHECKING, Optional
|
|
2
|
+
from itertools import count
|
|
3
|
+
import copy
|
|
4
|
+
import logging
|
|
5
|
+
import inspect
|
|
6
|
+
|
|
7
|
+
import ailment.expression
|
|
8
|
+
import networkx
|
|
9
|
+
|
|
10
|
+
from ailment import Block
|
|
11
|
+
from ailment.statement import Jump, ConditionalJump, Assignment, Statement, Return, Label
|
|
12
|
+
from ailment.expression import Const
|
|
13
|
+
from ailment.block_walker import AILBlockWalkerBase
|
|
14
|
+
|
|
15
|
+
from .optimization_pass import StructuringOptimizationPass
|
|
16
|
+
from ..condition_processor import ConditionProcessor, EmptyBlockNotice
|
|
17
|
+
from ..graph_region import GraphRegion
|
|
18
|
+
from ..utils import remove_labels, to_ail_supergraph
|
|
19
|
+
from ..structuring.structurer_nodes import MultiNode
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from ailment.statement import Call
|
|
23
|
+
|
|
24
|
+
_l = logging.getLogger(name=__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class AILCallCounter(AILBlockWalkerBase):
|
|
28
|
+
"""
|
|
29
|
+
Helper class to count AIL Calls in a block.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
calls = 0
|
|
33
|
+
|
|
34
|
+
def _handle_Call(self, stmt_idx: int, stmt: "Call", block: Optional["Block"]):
|
|
35
|
+
self.calls += 1
|
|
36
|
+
super()._handle_Call(stmt_idx, stmt, block)
|
|
37
|
+
|
|
38
|
+
def _handle_CallExpr(self, expr_idx: int, expr: "Call", stmt_idx: int, stmt: Statement, block: Optional[Block]):
|
|
39
|
+
self.calls += 1
|
|
40
|
+
super()._handle_CallExpr(expr_idx, expr, stmt_idx, stmt, block)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ReturnDuplicator(StructuringOptimizationPass):
|
|
44
|
+
"""
|
|
45
|
+
An optimization pass that reverts a subset of Irreducible Statement Condensing (ISC) optimizations, as described
|
|
46
|
+
in the USENIX 2024 paper SAILR.
|
|
47
|
+
|
|
48
|
+
Some compilers, including GCC, Clang, and MSVC, apply various optimizations to reduce the number of statements in
|
|
49
|
+
code. These optimizations will take equivalent statements, or a subset of them, and replace them with a single
|
|
50
|
+
copy that is jumped to by gotos -- optimizing for space and sometimes speed.
|
|
51
|
+
|
|
52
|
+
This optimization pass will revert those gotos by re-duplicating the condensed blocks. Since Return statements
|
|
53
|
+
are the most common, we use this optimization pass to revert only gotos to return statements. Additionally, we
|
|
54
|
+
perform some additional readability fixups, like not re-duplicating returns to shared components.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
func: The function to optimize.
|
|
58
|
+
node_idx_start: The index to start at when creating new nodes. This is used by Clinic to ensure that
|
|
59
|
+
node indices are unique across multiple passes.
|
|
60
|
+
max_opt_iters: The maximum number of optimization iterations to perform.
|
|
61
|
+
max_calls_in_regions: The maximum number of calls that can be in a region. This is used to prevent
|
|
62
|
+
duplicating too much code.
|
|
63
|
+
prevent_new_gotos: If True, this optimization pass will prevent new gotos from being created.
|
|
64
|
+
minimize_copies_for_regions: If True, this optimization pass will minimize the number of copies by doing only
|
|
65
|
+
a single copy for connected in_edges that form a region.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
ARCHES = None
|
|
69
|
+
PLATFORMS = None
|
|
70
|
+
NAME = "Duplicate return blocks to reduce goto statements"
|
|
71
|
+
DESCRIPTION = inspect.cleandoc(__doc__[: __doc__.index("Args:")]) # pylint:disable=unsubscriptable-object
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
func,
|
|
76
|
+
# internal parameters that should be used by Clinic
|
|
77
|
+
node_idx_start: int = 0,
|
|
78
|
+
# settings
|
|
79
|
+
max_opt_iters: int = 10,
|
|
80
|
+
max_calls_in_regions: int = 2,
|
|
81
|
+
prevent_new_gotos: bool = True,
|
|
82
|
+
minimize_copies_for_regions: bool = True,
|
|
83
|
+
**kwargs,
|
|
84
|
+
):
|
|
85
|
+
super().__init__(func, max_opt_iters=max_opt_iters, prevent_new_gotos=prevent_new_gotos, **kwargs)
|
|
86
|
+
self._max_calls_in_region = max_calls_in_regions
|
|
87
|
+
self._minimize_copies_for_regions = minimize_copies_for_regions
|
|
88
|
+
|
|
89
|
+
self.node_idx = count(start=node_idx_start)
|
|
90
|
+
self.analyze()
|
|
91
|
+
|
|
92
|
+
def _check(self):
|
|
93
|
+
# does this function have end points?
|
|
94
|
+
return bool(self._func.endpoints), None
|
|
95
|
+
|
|
96
|
+
def _analyze(self, cache=None):
|
|
97
|
+
"""
|
|
98
|
+
This analysis is run in a loop in analyze() for a maximum of max_opt_iters times.
|
|
99
|
+
"""
|
|
100
|
+
graph_changed = False
|
|
101
|
+
endnode_regions = self._find_endnode_regions(self.out_graph)
|
|
102
|
+
|
|
103
|
+
if self._minimize_copies_for_regions:
|
|
104
|
+
# perform a second pass to minimize the number of copies by doing only a single copy
|
|
105
|
+
# for connected in_edges that form a region
|
|
106
|
+
endnode_regions = self._copy_connected_edge_components(endnode_regions, self.out_graph)
|
|
107
|
+
|
|
108
|
+
for region_head, (in_edges, region) in endnode_regions.items():
|
|
109
|
+
is_single_const_ret_region = self._is_simple_return_graph(region)
|
|
110
|
+
for in_edge in in_edges:
|
|
111
|
+
pred_node = in_edge[0]
|
|
112
|
+
if self._should_duplicate_dst(
|
|
113
|
+
pred_node, region_head, self.out_graph, dst_is_const_ret=is_single_const_ret_region
|
|
114
|
+
):
|
|
115
|
+
# every eligible pred gets a new region copy
|
|
116
|
+
self._copy_region([pred_node], region_head, region, self.out_graph)
|
|
117
|
+
|
|
118
|
+
if region_head in self.out_graph and self.out_graph.in_degree(region_head) == 0:
|
|
119
|
+
self.out_graph.remove_nodes_from(region)
|
|
120
|
+
|
|
121
|
+
graph_changed = True
|
|
122
|
+
|
|
123
|
+
return graph_changed
|
|
124
|
+
|
|
125
|
+
def _is_goto_edge(
|
|
126
|
+
self,
|
|
127
|
+
src: Block,
|
|
128
|
+
dst: Block,
|
|
129
|
+
graph: networkx.DiGraph = None,
|
|
130
|
+
check_for_ifstmts=True,
|
|
131
|
+
max_level_check=1,
|
|
132
|
+
):
|
|
133
|
+
"""
|
|
134
|
+
TODO: correct how goto edge addressing works
|
|
135
|
+
This function only exists because a long-standing bug that sometimes reports the if-stmt addr
|
|
136
|
+
above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
|
|
137
|
+
see if they are a goto. This needs to include Jump to deal with loops.
|
|
138
|
+
"""
|
|
139
|
+
if check_for_ifstmts and graph is not None:
|
|
140
|
+
blocks = [src]
|
|
141
|
+
level_blocks = [src]
|
|
142
|
+
for _ in range(max_level_check):
|
|
143
|
+
new_level_blocks = []
|
|
144
|
+
for lblock in level_blocks:
|
|
145
|
+
new_level_blocks += list(graph.predecessors(lblock))
|
|
146
|
+
|
|
147
|
+
blocks += new_level_blocks
|
|
148
|
+
level_blocks = new_level_blocks
|
|
149
|
+
|
|
150
|
+
src_direct_parents = list(graph.predecessors(src))
|
|
151
|
+
for block in blocks:
|
|
152
|
+
if not block or not block.statements:
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
# special case if-stmts that are next to each other
|
|
156
|
+
if block in src_direct_parents and isinstance(block.statements[-1], ConditionalJump):
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
if self._goto_manager.is_goto_edge(block, dst):
|
|
160
|
+
return True
|
|
161
|
+
else:
|
|
162
|
+
return self._goto_manager.is_goto_edge(src, dst)
|
|
163
|
+
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
def _find_endnode_regions(self, graph) -> Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]]:
|
|
167
|
+
"""
|
|
168
|
+
Find all the regions that contain a node with no successors. These are the "end nodes" of the graph.
|
|
169
|
+
"""
|
|
170
|
+
endnodes = [node for node in graph.nodes() if graph.out_degree[node] == 0]
|
|
171
|
+
|
|
172
|
+
# to_update is keyed by the region head.
|
|
173
|
+
# this is because different end nodes may lead to the same region head: consider the case of the typical "fork"
|
|
174
|
+
# region where stack canary is checked in x86-64 binaries.
|
|
175
|
+
end_node_regions: Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]] = {}
|
|
176
|
+
|
|
177
|
+
for end_node in endnodes:
|
|
178
|
+
in_edges = list(graph.in_edges(end_node))
|
|
179
|
+
|
|
180
|
+
if len(in_edges) > 1:
|
|
181
|
+
region = networkx.DiGraph()
|
|
182
|
+
region.add_node(end_node)
|
|
183
|
+
region_head = end_node
|
|
184
|
+
elif len(in_edges) == 1:
|
|
185
|
+
# back-trace until it reaches a node with two predecessors
|
|
186
|
+
region, region_head = self._single_entry_region(graph, end_node)
|
|
187
|
+
tmp_in_edges = graph.in_edges(region_head)
|
|
188
|
+
# remove in_edges that are coming from a node inside the region
|
|
189
|
+
in_edges = []
|
|
190
|
+
for src, dst in tmp_in_edges:
|
|
191
|
+
if src not in region:
|
|
192
|
+
in_edges.append((src, dst))
|
|
193
|
+
else: # len(in_edges) == 0
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
# region and in_edge might have been updated. re-check
|
|
197
|
+
if not in_edges:
|
|
198
|
+
# this is a single connected component in the graph
|
|
199
|
+
# no need to duplicate anything
|
|
200
|
+
continue
|
|
201
|
+
if len(in_edges) == 1:
|
|
202
|
+
# there is no need to duplicate it
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
if any(self._is_indirect_jump_ailblock(src) for src, _ in in_edges):
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
# to assure we are not copying like crazy, set a max amount of code (which is estimated in calls)
|
|
209
|
+
# that can be copied in a region
|
|
210
|
+
if self._number_of_calls_in(region) > self._max_calls_in_region:
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
end_node_regions[region_head] = in_edges, region
|
|
214
|
+
|
|
215
|
+
return end_node_regions
|
|
216
|
+
|
|
217
|
+
def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
|
|
218
|
+
# returns that are only returning a constant should be duplicated always;
|
|
219
|
+
if dst_is_const_ret:
|
|
220
|
+
return True
|
|
221
|
+
|
|
222
|
+
# check above
|
|
223
|
+
return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
|
|
224
|
+
|
|
225
|
+
def _copy_region(self, pred_nodes, region_head, region, graph):
|
|
226
|
+
# copy the entire return region
|
|
227
|
+
copies = {}
|
|
228
|
+
queue = [(pred_node, region_head) for pred_node in pred_nodes]
|
|
229
|
+
while queue:
|
|
230
|
+
pred, node = queue.pop(0)
|
|
231
|
+
if node in copies:
|
|
232
|
+
node_copy = copies[node]
|
|
233
|
+
else:
|
|
234
|
+
node_copy = copy.deepcopy(node)
|
|
235
|
+
node_copy.idx = next(self.node_idx)
|
|
236
|
+
copies[node] = node_copy
|
|
237
|
+
|
|
238
|
+
# modify Jump.target_idx and ConditionalJump.{true,false}_target_idx accordingly
|
|
239
|
+
graph.add_edge(pred, node_copy)
|
|
240
|
+
try:
|
|
241
|
+
last_stmt = ConditionProcessor.get_last_statement(pred)
|
|
242
|
+
if isinstance(last_stmt, Jump):
|
|
243
|
+
if isinstance(last_stmt.target, Const) and last_stmt.target.value == node_copy.addr:
|
|
244
|
+
last_stmt.target_idx = node_copy.idx
|
|
245
|
+
elif isinstance(last_stmt, ConditionalJump):
|
|
246
|
+
if isinstance(last_stmt.true_target, Const) and last_stmt.true_target.value == node_copy.addr:
|
|
247
|
+
last_stmt.true_target_idx = node_copy.idx
|
|
248
|
+
elif isinstance(last_stmt.false_target, Const) and last_stmt.false_target.value == node_copy.addr:
|
|
249
|
+
last_stmt.false_target_idx = node_copy.idx
|
|
250
|
+
except EmptyBlockNotice:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
for succ in region.successors(node):
|
|
254
|
+
queue.append((node_copy, succ))
|
|
255
|
+
|
|
256
|
+
for pred_node in pred_nodes:
|
|
257
|
+
# delete the old edge to the return node
|
|
258
|
+
graph.remove_edge(pred_node, region_head)
|
|
259
|
+
|
|
260
|
+
def _copy_connected_edge_components(
|
|
261
|
+
self, endnode_regions: Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]], graph: networkx.DiGraph
|
|
262
|
+
):
|
|
263
|
+
updated_regions = endnode_regions.copy()
|
|
264
|
+
all_region_block_addrs = list(self._find_block_sets_in_all_regions(self._ri.region).values())
|
|
265
|
+
for region_head, (in_edges, region) in endnode_regions.items():
|
|
266
|
+
is_single_const_ret_region = self._is_simple_return_graph(region)
|
|
267
|
+
pred_nodes = [src for src, _ in in_edges]
|
|
268
|
+
pred_subgraph = networkx.subgraph(graph, pred_nodes)
|
|
269
|
+
components = list(networkx.weakly_connected_components(pred_subgraph))
|
|
270
|
+
multi_node_components = [c for c in components if len(c) > 1]
|
|
271
|
+
if not multi_node_components:
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
# find components that have a node that should be duplicated
|
|
275
|
+
candidate_components = []
|
|
276
|
+
for nodes in multi_node_components:
|
|
277
|
+
if any(
|
|
278
|
+
self._should_duplicate_dst(n, region_head, graph, dst_is_const_ret=is_single_const_ret_region)
|
|
279
|
+
for n in nodes
|
|
280
|
+
):
|
|
281
|
+
candidate_components.append(nodes)
|
|
282
|
+
if not candidate_components:
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
# we can only handle instances where components do not overlap
|
|
286
|
+
overlapping_comps = set()
|
|
287
|
+
for component in candidate_components:
|
|
288
|
+
overlapping_comps &= component
|
|
289
|
+
if overlapping_comps:
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
# every component needs to form its own region with ONLY those nodes in the region
|
|
293
|
+
duplicatable_components = []
|
|
294
|
+
for component in candidate_components:
|
|
295
|
+
comp_addrs = {n.addr for n in component}
|
|
296
|
+
if comp_addrs in all_region_block_addrs:
|
|
297
|
+
duplicatable_components.append(component)
|
|
298
|
+
|
|
299
|
+
new_in_edges = in_edges
|
|
300
|
+
for nodes in duplicatable_components:
|
|
301
|
+
self._copy_region(nodes, region_head, region, graph)
|
|
302
|
+
if region_head in graph and graph.in_degree(region_head) == 0:
|
|
303
|
+
graph.remove_nodes_from(region)
|
|
304
|
+
|
|
305
|
+
# update the in_edges to remove any nodes that have been copied
|
|
306
|
+
new_in_edges = [edge for edge in new_in_edges if edge[0] not in nodes]
|
|
307
|
+
|
|
308
|
+
if not new_in_edges:
|
|
309
|
+
del updated_regions[region_head]
|
|
310
|
+
else:
|
|
311
|
+
updated_regions[region_head] = new_in_edges, region
|
|
312
|
+
|
|
313
|
+
return updated_regions
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
def _is_simple_return_graph(graph: networkx.DiGraph, max_assigns=1):
|
|
317
|
+
"""
|
|
318
|
+
Checks if the graph is a single block, or a series of simple assignments, that ends
|
|
319
|
+
in a return statement. This is used to know when we MUST duplicate the return block.
|
|
320
|
+
"""
|
|
321
|
+
labeless_graph = to_ail_supergraph(remove_labels(graph))
|
|
322
|
+
nodes = list(labeless_graph.nodes())
|
|
323
|
+
if not nodes:
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
# check if the graph is a single successor chain
|
|
327
|
+
if not all(labeless_graph.out_degree(n) <= 1 for n in nodes):
|
|
328
|
+
return False
|
|
329
|
+
|
|
330
|
+
# collect the statements from the top node, make sure one exists
|
|
331
|
+
root_nodes = [n for n in nodes if labeless_graph.in_degree(n) == 0]
|
|
332
|
+
if len(root_nodes) != 1:
|
|
333
|
+
return False
|
|
334
|
+
|
|
335
|
+
root_node = root_nodes[0]
|
|
336
|
+
queue = [root_node]
|
|
337
|
+
stmts = []
|
|
338
|
+
while queue:
|
|
339
|
+
node = queue.pop(0)
|
|
340
|
+
succs = list(labeless_graph.successors(node))
|
|
341
|
+
queue += succs
|
|
342
|
+
if node.statements:
|
|
343
|
+
stmts += node.statements
|
|
344
|
+
|
|
345
|
+
# all statements must be either a return, a jump, or an assignment
|
|
346
|
+
type_white_list = (Return, Jump, Assignment)
|
|
347
|
+
for stmt in stmts:
|
|
348
|
+
if not isinstance(stmt, type_white_list):
|
|
349
|
+
return False
|
|
350
|
+
|
|
351
|
+
# gather all assignments
|
|
352
|
+
assignments = [s for s in stmts if isinstance(s, Assignment)]
|
|
353
|
+
has_assign = len(assignments) > 0
|
|
354
|
+
if len(assignments) > max_assigns:
|
|
355
|
+
return False
|
|
356
|
+
|
|
357
|
+
# gather return stmts
|
|
358
|
+
ret_stmt = stmts[-1]
|
|
359
|
+
ret_exprs = ret_stmt.ret_exprs
|
|
360
|
+
# must be 1 or none
|
|
361
|
+
if ret_exprs and len(ret_exprs) > 1:
|
|
362
|
+
return False
|
|
363
|
+
|
|
364
|
+
ret_expr = ret_exprs[0] if ret_exprs and len(ret_exprs) == 1 else None
|
|
365
|
+
# stop early if there are no assignments at all and just jumps and rets, or a const ret
|
|
366
|
+
if not has_assign:
|
|
367
|
+
return True
|
|
368
|
+
|
|
369
|
+
assign: Assignment = assignments[0]
|
|
370
|
+
# const assignments are valid
|
|
371
|
+
if isinstance(assign.src, Const):
|
|
372
|
+
valid_assignment = ret_expr and ret_expr.likes(assign.dst)
|
|
373
|
+
# assignments to registers from the stack are valid, since cases of these assignments
|
|
374
|
+
# pop up across optimized binaries
|
|
375
|
+
elif (
|
|
376
|
+
isinstance(assign.dst, ailment.expression.Register)
|
|
377
|
+
and isinstance(assign.src, ailment.expression.Load)
|
|
378
|
+
and isinstance(assign.src.addr, ailment.expression.StackBaseOffset)
|
|
379
|
+
):
|
|
380
|
+
valid_assignment = True
|
|
381
|
+
else:
|
|
382
|
+
valid_assignment = False
|
|
383
|
+
|
|
384
|
+
return valid_assignment
|
|
385
|
+
|
|
386
|
+
@staticmethod
|
|
387
|
+
def _number_of_calls_in(graph: networkx.DiGraph) -> int:
|
|
388
|
+
counter = AILCallCounter()
|
|
389
|
+
for node in graph.nodes:
|
|
390
|
+
counter.walk(node)
|
|
391
|
+
|
|
392
|
+
return counter.calls
|
|
393
|
+
|
|
394
|
+
@staticmethod
|
|
395
|
+
def _single_entry_region(graph, end_node) -> Tuple[networkx.DiGraph, Any]:
|
|
396
|
+
"""
|
|
397
|
+
Back track on the graph from `end_node` and find the longest chain of nodes where each node has only one
|
|
398
|
+
predecessor and one successor (the second-to-last node may have two successors to account for the typical
|
|
399
|
+
stack-canary-detection logic).
|
|
400
|
+
|
|
401
|
+
:param end_node: A node in the graph.
|
|
402
|
+
:return: A graph of nodes where the first node either has no predecessors or at least two
|
|
403
|
+
predecessors.
|
|
404
|
+
"""
|
|
405
|
+
|
|
406
|
+
def _is_fork_node(node_) -> bool:
|
|
407
|
+
"""
|
|
408
|
+
Check if the node and its successors form a "fork" region. A "fork" region is a region where:
|
|
409
|
+
- The entry node has two successors,
|
|
410
|
+
- Each successor has only the entry node as its predecessor.
|
|
411
|
+
- Each successor has no successors.
|
|
412
|
+
"""
|
|
413
|
+
|
|
414
|
+
succs = list(graph.successors(node_))
|
|
415
|
+
if len(succs) != 2:
|
|
416
|
+
return False
|
|
417
|
+
for succ in succs:
|
|
418
|
+
if graph.in_degree[succ] != 1:
|
|
419
|
+
return False
|
|
420
|
+
if graph.out_degree[succ] != 0:
|
|
421
|
+
return False
|
|
422
|
+
return True
|
|
423
|
+
|
|
424
|
+
region = networkx.DiGraph()
|
|
425
|
+
region.add_node(end_node)
|
|
426
|
+
|
|
427
|
+
traversed = {end_node}
|
|
428
|
+
region_head = end_node
|
|
429
|
+
while True:
|
|
430
|
+
preds = list(graph.predecessors(region_head))
|
|
431
|
+
if len(preds) != 1:
|
|
432
|
+
break
|
|
433
|
+
second_to_last_node = region_head is end_node
|
|
434
|
+
|
|
435
|
+
pred_node = preds[0]
|
|
436
|
+
|
|
437
|
+
if pred_node in traversed:
|
|
438
|
+
break
|
|
439
|
+
|
|
440
|
+
if second_to_last_node:
|
|
441
|
+
if _is_fork_node(pred_node):
|
|
442
|
+
# add the entire "fork" to the region
|
|
443
|
+
for succ in graph.successors(pred_node):
|
|
444
|
+
region.add_edge(pred_node, succ)
|
|
445
|
+
elif graph.out_degree[pred_node] != 1:
|
|
446
|
+
# the predecessor has more than one successor, and it's not a fork node
|
|
447
|
+
break
|
|
448
|
+
|
|
449
|
+
if graph.in_degree[pred_node] == 1:
|
|
450
|
+
# continue search
|
|
451
|
+
pass
|
|
452
|
+
else:
|
|
453
|
+
region.add_edge(pred_node, region_head)
|
|
454
|
+
traversed.add(pred_node)
|
|
455
|
+
region_head = pred_node
|
|
456
|
+
break
|
|
457
|
+
elif not second_to_last_node and graph.out_degree[pred_node] != 1:
|
|
458
|
+
break
|
|
459
|
+
|
|
460
|
+
region.add_edge(pred_node, region_head)
|
|
461
|
+
traversed.add(pred_node)
|
|
462
|
+
region_head = pred_node
|
|
463
|
+
|
|
464
|
+
return region, region_head
|
|
465
|
+
|
|
466
|
+
@staticmethod
|
|
467
|
+
def _is_indirect_jump_ailblock(block: "Block") -> bool:
|
|
468
|
+
if block.statements and isinstance(block.statements[-1], Jump):
|
|
469
|
+
last_stmt = block.statements[-1]
|
|
470
|
+
if not isinstance(last_stmt.target, Const):
|
|
471
|
+
# it's an indirect jump (assuming the AIL block is properly optimized)
|
|
472
|
+
return True
|
|
473
|
+
return False
|
|
474
|
+
|
|
475
|
+
@staticmethod
|
|
476
|
+
def _is_single_return_stmt_region(region: networkx.DiGraph) -> bool:
|
|
477
|
+
"""
|
|
478
|
+
Checks weather the provided region contains only one return statement. This stmt
|
|
479
|
+
can be connected by many jumps, but none can be conditional. A valid case is:
|
|
480
|
+
[Jmp] -> [Jmp] -> [Ret]
|
|
481
|
+
"""
|
|
482
|
+
valid_stmt_types = (Return, Jump, Label)
|
|
483
|
+
for node in region.nodes():
|
|
484
|
+
if isinstance(node, Block):
|
|
485
|
+
for stmt in node.statements:
|
|
486
|
+
if not isinstance(stmt, valid_stmt_types):
|
|
487
|
+
return False
|
|
488
|
+
return True
|
|
489
|
+
|
|
490
|
+
@staticmethod
|
|
491
|
+
def _find_block_sets_in_all_regions(top_region: GraphRegion):
|
|
492
|
+
def _unpack_region_to_block_addrs(region: GraphRegion):
|
|
493
|
+
region_addrs = set()
|
|
494
|
+
for node in region.graph.nodes:
|
|
495
|
+
if isinstance(node, Block):
|
|
496
|
+
region_addrs.add(node.addr)
|
|
497
|
+
elif isinstance(node, MultiNode):
|
|
498
|
+
for _node in node.nodes:
|
|
499
|
+
region_addrs.add(_node.addr)
|
|
500
|
+
elif isinstance(node, GraphRegion):
|
|
501
|
+
region_addrs |= _unpack_region_to_block_addrs(node)
|
|
502
|
+
|
|
503
|
+
return region_addrs
|
|
504
|
+
|
|
505
|
+
def _unpack_every_region(region: GraphRegion, addrs_by_region: dict):
|
|
506
|
+
addrs_by_region[region] = set()
|
|
507
|
+
for node in region.graph.nodes:
|
|
508
|
+
if isinstance(node, Block):
|
|
509
|
+
addrs_by_region[region].add(node.addr)
|
|
510
|
+
elif isinstance(node, MultiNode):
|
|
511
|
+
for _node in node.nodes:
|
|
512
|
+
addrs_by_region[region].add(_node.addr)
|
|
513
|
+
else:
|
|
514
|
+
addrs_by_region[region] |= _unpack_region_to_block_addrs(node)
|
|
515
|
+
_unpack_every_region(node, addrs_by_region)
|
|
516
|
+
|
|
517
|
+
all_region_block_sets = {}
|
|
518
|
+
_unpack_every_region(top_region, all_region_block_sets)
|
|
519
|
+
return all_region_block_sets
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from ailment.expression import Load, Const
|
|
2
|
+
from cle.backends import Blob
|
|
2
3
|
|
|
3
4
|
from .base import PeepholeOptimizationExprBase
|
|
4
5
|
|
|
@@ -22,8 +23,19 @@ class ConstantDereferences(PeepholeOptimizationExprBase):
|
|
|
22
23
|
try:
|
|
23
24
|
val = self.project.loader.memory.unpack_word(expr.addr.value, size=expr.size)
|
|
24
25
|
except KeyError:
|
|
25
|
-
return
|
|
26
|
+
return None
|
|
26
27
|
|
|
27
|
-
return Const(None, None, val, expr.bits, **expr.tags)
|
|
28
|
+
return Const(None, None, val, expr.bits, **expr.tags, deref_src_addr=expr.addr.value)
|
|
29
|
+
|
|
30
|
+
# is it loading from a blob?
|
|
31
|
+
obj = self.project.loader.find_object_containing(expr.addr.value)
|
|
32
|
+
if obj is not None and isinstance(obj, Blob):
|
|
33
|
+
# do we know the value that it's reading?
|
|
34
|
+
try:
|
|
35
|
+
val = self.project.loader.memory.unpack_word(expr.addr.value, size=self.project.arch.bytes)
|
|
36
|
+
except KeyError:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
return Const(None, None, val, expr.bits, **expr.tags, deref_src_addr=expr.addr.value)
|
|
28
40
|
|
|
29
41
|
return None
|
|
@@ -16,7 +16,7 @@ from .. import Analysis, register_analysis
|
|
|
16
16
|
from .structuring.structurer_nodes import MultiNode, ConditionNode, IncompleteSwitchCaseHeadStatement
|
|
17
17
|
from .graph_region import GraphRegion
|
|
18
18
|
from .condition_processor import ConditionProcessor
|
|
19
|
-
from .utils import replace_last_statement, first_nonlabel_statement
|
|
19
|
+
from .utils import replace_last_statement, first_nonlabel_statement, copy_graph
|
|
20
20
|
|
|
21
21
|
l = logging.getLogger(name=__name__)
|
|
22
22
|
|
|
@@ -27,7 +27,9 @@ CONDITIONNODE_ADDR = count(0xFF000000)
|
|
|
27
27
|
|
|
28
28
|
class RegionIdentifier(Analysis):
|
|
29
29
|
"""
|
|
30
|
-
Identifies regions within a function.
|
|
30
|
+
Identifies regions within a function graph and creates a recursive GraphRegion object.
|
|
31
|
+
Note, that the analysis may modify the graph in-place. If you want to keep the original graph,
|
|
32
|
+
set the `update_graph` parameter to False.
|
|
31
33
|
"""
|
|
32
34
|
|
|
33
35
|
def __init__(
|
|
@@ -35,6 +37,7 @@ class RegionIdentifier(Analysis):
|
|
|
35
37
|
func,
|
|
36
38
|
cond_proc=None,
|
|
37
39
|
graph=None,
|
|
40
|
+
update_graph=True,
|
|
38
41
|
largest_successor_tree_outside_loop=True,
|
|
39
42
|
force_loop_single_exit=True,
|
|
40
43
|
complete_successors=False,
|
|
@@ -50,6 +53,9 @@ class RegionIdentifier(Analysis):
|
|
|
50
53
|
)
|
|
51
54
|
)
|
|
52
55
|
self._graph = graph if graph is not None else self.function.graph
|
|
56
|
+
if not update_graph:
|
|
57
|
+
# copy the graph so updates don't affect the original graph
|
|
58
|
+
self._graph = copy_graph(self._graph)
|
|
53
59
|
|
|
54
60
|
self.region = None
|
|
55
61
|
self._start_node = None
|
|
@@ -164,14 +164,15 @@ class GotoSimplifier(SequenceWalker):
|
|
|
164
164
|
|
|
165
165
|
# normal Goto Label
|
|
166
166
|
if branch_target is None:
|
|
167
|
-
|
|
167
|
+
dst_target = goto_stmt.target
|
|
168
168
|
# true branch of a conditional jump
|
|
169
169
|
elif branch_target:
|
|
170
|
-
|
|
170
|
+
dst_target = goto_stmt.true_target
|
|
171
171
|
# false branch of a conditional jump
|
|
172
172
|
else:
|
|
173
|
-
|
|
173
|
+
dst_target = goto_stmt.true_target
|
|
174
174
|
|
|
175
|
-
|
|
175
|
+
src_ins_addr = goto_stmt.ins_addr if "ins_addr" in goto_stmt.tags else block.addr
|
|
176
|
+
goto = Goto(block.addr, dst_target.value, src_idx=block.idx, dst_idx=None, src_ins_addr=src_ins_addr)
|
|
176
177
|
l.debug("Storing %r goto", goto)
|
|
177
178
|
self.irreducible_gotos.add(goto)
|