angr 9.2.83__py3-none-win_amd64.whl → 9.2.85__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (62) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +6 -1
  3. angr/analyses/cfg/cfg_fast.py +32 -10
  4. angr/analyses/decompiler/clinic.py +204 -4
  5. angr/analyses/decompiler/condition_processor.py +8 -2
  6. angr/analyses/decompiler/decompilation_options.py +10 -0
  7. angr/analyses/decompiler/decompiler.py +19 -17
  8. angr/analyses/decompiler/goto_manager.py +34 -51
  9. angr/analyses/decompiler/optimization_passes/__init__.py +5 -5
  10. angr/analyses/decompiler/optimization_passes/div_simplifier.py +2 -0
  11. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  12. angr/analyses/decompiler/optimization_passes/mod_simplifier.py +2 -0
  13. angr/analyses/decompiler/optimization_passes/multi_simplifier.py +2 -0
  14. angr/analyses/decompiler/optimization_passes/optimization_pass.py +131 -3
  15. angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +3 -3
  16. angr/analyses/decompiler/optimization_passes/return_duplicator.py +519 -0
  17. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +14 -2
  18. angr/analyses/decompiler/region_identifier.py +8 -2
  19. angr/analyses/decompiler/region_simplifiers/goto.py +5 -4
  20. angr/analyses/decompiler/structured_codegen/c.py +66 -5
  21. angr/analyses/decompiler/structuring/phoenix.py +3 -1
  22. angr/analyses/decompiler/structuring/structurer_nodes.py +11 -5
  23. angr/analyses/decompiler/utils.py +50 -0
  24. angr/analyses/disassembly.py +10 -3
  25. angr/analyses/propagator/engine_ail.py +125 -0
  26. angr/analyses/reaching_definitions/engine_ail.py +36 -2
  27. angr/analyses/reaching_definitions/rd_initializer.py +15 -1
  28. angr/analyses/reaching_definitions/rd_state.py +9 -4
  29. angr/analyses/stack_pointer_tracker.py +10 -17
  30. angr/analyses/variable_recovery/engine_ail.py +27 -1
  31. angr/angrdb/serializers/loader.py +10 -3
  32. angr/calling_conventions.py +2 -0
  33. angr/engines/pcode/behavior.py +7 -2
  34. angr/engines/pcode/cc.py +1 -0
  35. angr/engines/pcode/emulate.py +144 -104
  36. angr/engines/pcode/lifter.py +135 -79
  37. angr/knowledge_plugins/functions/function.py +28 -0
  38. angr/knowledge_plugins/functions/function_manager.py +48 -5
  39. angr/knowledge_plugins/propagations/states.py +14 -0
  40. angr/lib/angr_native.dll +0 -0
  41. angr/procedures/cgc/deallocate.py +5 -2
  42. angr/procedures/posix/gethostbyname.py +23 -8
  43. angr/project.py +4 -0
  44. angr/simos/__init__.py +2 -0
  45. angr/simos/simos.py +1 -0
  46. angr/simos/snimmuc_nxp.py +152 -0
  47. angr/state_plugins/history.py +3 -1
  48. angr/utils/graph.py +20 -18
  49. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/METADATA +9 -8
  50. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/RECORD +61 -59
  51. tests/analyses/cfg/test_cfg_rust_got_resolution.py +2 -1
  52. tests/analyses/cfg/test_jumptables.py +2 -1
  53. tests/analyses/decompiler/test_decompiler.py +155 -103
  54. tests/engines/pcode/test_emulate.py +607 -0
  55. tests/engines/test_java.py +609 -663
  56. tests/knowledge_plugins/functions/test_function_manager.py +13 -0
  57. tests/serialization/test_db.py +30 -0
  58. angr/analyses/decompiler/optimization_passes/eager_returns.py +0 -285
  59. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/LICENSE +0 -0
  60. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/WHEEL +0 -0
  61. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/entry_points.txt +0 -0
  62. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,519 @@
1
+ from typing import Any, Tuple, Dict, List, TYPE_CHECKING, Optional
2
+ from itertools import count
3
+ import copy
4
+ import logging
5
+ import inspect
6
+
7
+ import ailment.expression
8
+ import networkx
9
+
10
+ from ailment import Block
11
+ from ailment.statement import Jump, ConditionalJump, Assignment, Statement, Return, Label
12
+ from ailment.expression import Const
13
+ from ailment.block_walker import AILBlockWalkerBase
14
+
15
+ from .optimization_pass import StructuringOptimizationPass
16
+ from ..condition_processor import ConditionProcessor, EmptyBlockNotice
17
+ from ..graph_region import GraphRegion
18
+ from ..utils import remove_labels, to_ail_supergraph
19
+ from ..structuring.structurer_nodes import MultiNode
20
+
21
+ if TYPE_CHECKING:
22
+ from ailment.statement import Call
23
+
24
+ _l = logging.getLogger(name=__name__)
25
+
26
+
27
+ class AILCallCounter(AILBlockWalkerBase):
28
+ """
29
+ Helper class to count AIL Calls in a block.
30
+ """
31
+
32
+ calls = 0
33
+
34
+ def _handle_Call(self, stmt_idx: int, stmt: "Call", block: Optional["Block"]):
35
+ self.calls += 1
36
+ super()._handle_Call(stmt_idx, stmt, block)
37
+
38
+ def _handle_CallExpr(self, expr_idx: int, expr: "Call", stmt_idx: int, stmt: Statement, block: Optional[Block]):
39
+ self.calls += 1
40
+ super()._handle_CallExpr(expr_idx, expr, stmt_idx, stmt, block)
41
+
42
+
43
+ class ReturnDuplicator(StructuringOptimizationPass):
44
+ """
45
+ An optimization pass that reverts a subset of Irreducible Statement Condensing (ISC) optimizations, as described
46
+ in the USENIX 2024 paper SAILR.
47
+
48
+ Some compilers, including GCC, Clang, and MSVC, apply various optimizations to reduce the number of statements in
49
+ code. These optimizations will take equivalent statements, or a subset of them, and replace them with a single
50
+ copy that is jumped to by gotos -- optimizing for space and sometimes speed.
51
+
52
+ This optimization pass will revert those gotos by re-duplicating the condensed blocks. Since Return statements
53
+ are the most common, we use this optimization pass to revert only gotos to return statements. Additionally, we
54
+ perform some additional readability fixups, like not re-duplicating returns to shared components.
55
+
56
+ Args:
57
+ func: The function to optimize.
58
+ node_idx_start: The index to start at when creating new nodes. This is used by Clinic to ensure that
59
+ node indices are unique across multiple passes.
60
+ max_opt_iters: The maximum number of optimization iterations to perform.
61
+ max_calls_in_regions: The maximum number of calls that can be in a region. This is used to prevent
62
+ duplicating too much code.
63
+ prevent_new_gotos: If True, this optimization pass will prevent new gotos from being created.
64
+ minimize_copies_for_regions: If True, this optimization pass will minimize the number of copies by doing only
65
+ a single copy for connected in_edges that form a region.
66
+ """
67
+
68
+ ARCHES = None
69
+ PLATFORMS = None
70
+ NAME = "Duplicate return blocks to reduce goto statements"
71
+ DESCRIPTION = inspect.cleandoc(__doc__[: __doc__.index("Args:")]) # pylint:disable=unsubscriptable-object
72
+
73
+ def __init__(
74
+ self,
75
+ func,
76
+ # internal parameters that should be used by Clinic
77
+ node_idx_start: int = 0,
78
+ # settings
79
+ max_opt_iters: int = 10,
80
+ max_calls_in_regions: int = 2,
81
+ prevent_new_gotos: bool = True,
82
+ minimize_copies_for_regions: bool = True,
83
+ **kwargs,
84
+ ):
85
+ super().__init__(func, max_opt_iters=max_opt_iters, prevent_new_gotos=prevent_new_gotos, **kwargs)
86
+ self._max_calls_in_region = max_calls_in_regions
87
+ self._minimize_copies_for_regions = minimize_copies_for_regions
88
+
89
+ self.node_idx = count(start=node_idx_start)
90
+ self.analyze()
91
+
92
+ def _check(self):
93
+ # does this function have end points?
94
+ return bool(self._func.endpoints), None
95
+
96
+ def _analyze(self, cache=None):
97
+ """
98
+ This analysis is run in a loop in analyze() for a maximum of max_opt_iters times.
99
+ """
100
+ graph_changed = False
101
+ endnode_regions = self._find_endnode_regions(self.out_graph)
102
+
103
+ if self._minimize_copies_for_regions:
104
+ # perform a second pass to minimize the number of copies by doing only a single copy
105
+ # for connected in_edges that form a region
106
+ endnode_regions = self._copy_connected_edge_components(endnode_regions, self.out_graph)
107
+
108
+ for region_head, (in_edges, region) in endnode_regions.items():
109
+ is_single_const_ret_region = self._is_simple_return_graph(region)
110
+ for in_edge in in_edges:
111
+ pred_node = in_edge[0]
112
+ if self._should_duplicate_dst(
113
+ pred_node, region_head, self.out_graph, dst_is_const_ret=is_single_const_ret_region
114
+ ):
115
+ # every eligible pred gets a new region copy
116
+ self._copy_region([pred_node], region_head, region, self.out_graph)
117
+
118
+ if region_head in self.out_graph and self.out_graph.in_degree(region_head) == 0:
119
+ self.out_graph.remove_nodes_from(region)
120
+
121
+ graph_changed = True
122
+
123
+ return graph_changed
124
+
125
+ def _is_goto_edge(
126
+ self,
127
+ src: Block,
128
+ dst: Block,
129
+ graph: networkx.DiGraph = None,
130
+ check_for_ifstmts=True,
131
+ max_level_check=1,
132
+ ):
133
+ """
134
+ TODO: correct how goto edge addressing works
135
+ This function only exists because a long-standing bug that sometimes reports the if-stmt addr
136
+ above a goto edge as the goto src. Because of this, we need to check for predecessors above the goto and
137
+ see if they are a goto. This needs to include Jump to deal with loops.
138
+ """
139
+ if check_for_ifstmts and graph is not None:
140
+ blocks = [src]
141
+ level_blocks = [src]
142
+ for _ in range(max_level_check):
143
+ new_level_blocks = []
144
+ for lblock in level_blocks:
145
+ new_level_blocks += list(graph.predecessors(lblock))
146
+
147
+ blocks += new_level_blocks
148
+ level_blocks = new_level_blocks
149
+
150
+ src_direct_parents = list(graph.predecessors(src))
151
+ for block in blocks:
152
+ if not block or not block.statements:
153
+ continue
154
+
155
+ # special case if-stmts that are next to each other
156
+ if block in src_direct_parents and isinstance(block.statements[-1], ConditionalJump):
157
+ continue
158
+
159
+ if self._goto_manager.is_goto_edge(block, dst):
160
+ return True
161
+ else:
162
+ return self._goto_manager.is_goto_edge(src, dst)
163
+
164
+ return False
165
+
166
+ def _find_endnode_regions(self, graph) -> Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]]:
167
+ """
168
+ Find all the regions that contain a node with no successors. These are the "end nodes" of the graph.
169
+ """
170
+ endnodes = [node for node in graph.nodes() if graph.out_degree[node] == 0]
171
+
172
+ # to_update is keyed by the region head.
173
+ # this is because different end nodes may lead to the same region head: consider the case of the typical "fork"
174
+ # region where stack canary is checked in x86-64 binaries.
175
+ end_node_regions: Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]] = {}
176
+
177
+ for end_node in endnodes:
178
+ in_edges = list(graph.in_edges(end_node))
179
+
180
+ if len(in_edges) > 1:
181
+ region = networkx.DiGraph()
182
+ region.add_node(end_node)
183
+ region_head = end_node
184
+ elif len(in_edges) == 1:
185
+ # back-trace until it reaches a node with two predecessors
186
+ region, region_head = self._single_entry_region(graph, end_node)
187
+ tmp_in_edges = graph.in_edges(region_head)
188
+ # remove in_edges that are coming from a node inside the region
189
+ in_edges = []
190
+ for src, dst in tmp_in_edges:
191
+ if src not in region:
192
+ in_edges.append((src, dst))
193
+ else: # len(in_edges) == 0
194
+ continue
195
+
196
+ # region and in_edge might have been updated. re-check
197
+ if not in_edges:
198
+ # this is a single connected component in the graph
199
+ # no need to duplicate anything
200
+ continue
201
+ if len(in_edges) == 1:
202
+ # there is no need to duplicate it
203
+ continue
204
+
205
+ if any(self._is_indirect_jump_ailblock(src) for src, _ in in_edges):
206
+ continue
207
+
208
+ # to assure we are not copying like crazy, set a max amount of code (which is estimated in calls)
209
+ # that can be copied in a region
210
+ if self._number_of_calls_in(region) > self._max_calls_in_region:
211
+ continue
212
+
213
+ end_node_regions[region_head] = in_edges, region
214
+
215
+ return end_node_regions
216
+
217
+ def _should_duplicate_dst(self, src, dst, graph, dst_is_const_ret=False):
218
+ # returns that are only returning a constant should be duplicated always;
219
+ if dst_is_const_ret:
220
+ return True
221
+
222
+ # check above
223
+ return self._is_goto_edge(src, dst, graph=graph, check_for_ifstmts=True)
224
+
225
+ def _copy_region(self, pred_nodes, region_head, region, graph):
226
+ # copy the entire return region
227
+ copies = {}
228
+ queue = [(pred_node, region_head) for pred_node in pred_nodes]
229
+ while queue:
230
+ pred, node = queue.pop(0)
231
+ if node in copies:
232
+ node_copy = copies[node]
233
+ else:
234
+ node_copy = copy.deepcopy(node)
235
+ node_copy.idx = next(self.node_idx)
236
+ copies[node] = node_copy
237
+
238
+ # modify Jump.target_idx and ConditionalJump.{true,false}_target_idx accordingly
239
+ graph.add_edge(pred, node_copy)
240
+ try:
241
+ last_stmt = ConditionProcessor.get_last_statement(pred)
242
+ if isinstance(last_stmt, Jump):
243
+ if isinstance(last_stmt.target, Const) and last_stmt.target.value == node_copy.addr:
244
+ last_stmt.target_idx = node_copy.idx
245
+ elif isinstance(last_stmt, ConditionalJump):
246
+ if isinstance(last_stmt.true_target, Const) and last_stmt.true_target.value == node_copy.addr:
247
+ last_stmt.true_target_idx = node_copy.idx
248
+ elif isinstance(last_stmt.false_target, Const) and last_stmt.false_target.value == node_copy.addr:
249
+ last_stmt.false_target_idx = node_copy.idx
250
+ except EmptyBlockNotice:
251
+ pass
252
+
253
+ for succ in region.successors(node):
254
+ queue.append((node_copy, succ))
255
+
256
+ for pred_node in pred_nodes:
257
+ # delete the old edge to the return node
258
+ graph.remove_edge(pred_node, region_head)
259
+
260
+ def _copy_connected_edge_components(
261
+ self, endnode_regions: Dict[Any, Tuple[List[Tuple[Any, Any]], networkx.DiGraph]], graph: networkx.DiGraph
262
+ ):
263
+ updated_regions = endnode_regions.copy()
264
+ all_region_block_addrs = list(self._find_block_sets_in_all_regions(self._ri.region).values())
265
+ for region_head, (in_edges, region) in endnode_regions.items():
266
+ is_single_const_ret_region = self._is_simple_return_graph(region)
267
+ pred_nodes = [src for src, _ in in_edges]
268
+ pred_subgraph = networkx.subgraph(graph, pred_nodes)
269
+ components = list(networkx.weakly_connected_components(pred_subgraph))
270
+ multi_node_components = [c for c in components if len(c) > 1]
271
+ if not multi_node_components:
272
+ continue
273
+
274
+ # find components that have a node that should be duplicated
275
+ candidate_components = []
276
+ for nodes in multi_node_components:
277
+ if any(
278
+ self._should_duplicate_dst(n, region_head, graph, dst_is_const_ret=is_single_const_ret_region)
279
+ for n in nodes
280
+ ):
281
+ candidate_components.append(nodes)
282
+ if not candidate_components:
283
+ continue
284
+
285
+ # we can only handle instances where components do not overlap
286
+ overlapping_comps = set()
287
+ for component in candidate_components:
288
+ overlapping_comps &= component
289
+ if overlapping_comps:
290
+ continue
291
+
292
+ # every component needs to form its own region with ONLY those nodes in the region
293
+ duplicatable_components = []
294
+ for component in candidate_components:
295
+ comp_addrs = {n.addr for n in component}
296
+ if comp_addrs in all_region_block_addrs:
297
+ duplicatable_components.append(component)
298
+
299
+ new_in_edges = in_edges
300
+ for nodes in duplicatable_components:
301
+ self._copy_region(nodes, region_head, region, graph)
302
+ if region_head in graph and graph.in_degree(region_head) == 0:
303
+ graph.remove_nodes_from(region)
304
+
305
+ # update the in_edges to remove any nodes that have been copied
306
+ new_in_edges = [edge for edge in new_in_edges if edge[0] not in nodes]
307
+
308
+ if not new_in_edges:
309
+ del updated_regions[region_head]
310
+ else:
311
+ updated_regions[region_head] = new_in_edges, region
312
+
313
+ return updated_regions
314
+
315
+ @staticmethod
316
+ def _is_simple_return_graph(graph: networkx.DiGraph, max_assigns=1):
317
+ """
318
+ Checks if the graph is a single block, or a series of simple assignments, that ends
319
+ in a return statement. This is used to know when we MUST duplicate the return block.
320
+ """
321
+ labeless_graph = to_ail_supergraph(remove_labels(graph))
322
+ nodes = list(labeless_graph.nodes())
323
+ if not nodes:
324
+ return False
325
+
326
+ # check if the graph is a single successor chain
327
+ if not all(labeless_graph.out_degree(n) <= 1 for n in nodes):
328
+ return False
329
+
330
+ # collect the statements from the top node, make sure one exists
331
+ root_nodes = [n for n in nodes if labeless_graph.in_degree(n) == 0]
332
+ if len(root_nodes) != 1:
333
+ return False
334
+
335
+ root_node = root_nodes[0]
336
+ queue = [root_node]
337
+ stmts = []
338
+ while queue:
339
+ node = queue.pop(0)
340
+ succs = list(labeless_graph.successors(node))
341
+ queue += succs
342
+ if node.statements:
343
+ stmts += node.statements
344
+
345
+ # all statements must be either a return, a jump, or an assignment
346
+ type_white_list = (Return, Jump, Assignment)
347
+ for stmt in stmts:
348
+ if not isinstance(stmt, type_white_list):
349
+ return False
350
+
351
+ # gather all assignments
352
+ assignments = [s for s in stmts if isinstance(s, Assignment)]
353
+ has_assign = len(assignments) > 0
354
+ if len(assignments) > max_assigns:
355
+ return False
356
+
357
+ # gather return stmts
358
+ ret_stmt = stmts[-1]
359
+ ret_exprs = ret_stmt.ret_exprs
360
+ # must be 1 or none
361
+ if ret_exprs and len(ret_exprs) > 1:
362
+ return False
363
+
364
+ ret_expr = ret_exprs[0] if ret_exprs and len(ret_exprs) == 1 else None
365
+ # stop early if there are no assignments at all and just jumps and rets, or a const ret
366
+ if not has_assign:
367
+ return True
368
+
369
+ assign: Assignment = assignments[0]
370
+ # const assignments are valid
371
+ if isinstance(assign.src, Const):
372
+ valid_assignment = ret_expr and ret_expr.likes(assign.dst)
373
+ # assignments to registers from the stack are valid, since cases of these assignments
374
+ # pop up across optimized binaries
375
+ elif (
376
+ isinstance(assign.dst, ailment.expression.Register)
377
+ and isinstance(assign.src, ailment.expression.Load)
378
+ and isinstance(assign.src.addr, ailment.expression.StackBaseOffset)
379
+ ):
380
+ valid_assignment = True
381
+ else:
382
+ valid_assignment = False
383
+
384
+ return valid_assignment
385
+
386
+ @staticmethod
387
+ def _number_of_calls_in(graph: networkx.DiGraph) -> int:
388
+ counter = AILCallCounter()
389
+ for node in graph.nodes:
390
+ counter.walk(node)
391
+
392
+ return counter.calls
393
+
394
+ @staticmethod
395
+ def _single_entry_region(graph, end_node) -> Tuple[networkx.DiGraph, Any]:
396
+ """
397
+ Back track on the graph from `end_node` and find the longest chain of nodes where each node has only one
398
+ predecessor and one successor (the second-to-last node may have two successors to account for the typical
399
+ stack-canary-detection logic).
400
+
401
+ :param end_node: A node in the graph.
402
+ :return: A graph of nodes where the first node either has no predecessors or at least two
403
+ predecessors.
404
+ """
405
+
406
+ def _is_fork_node(node_) -> bool:
407
+ """
408
+ Check if the node and its successors form a "fork" region. A "fork" region is a region where:
409
+ - The entry node has two successors,
410
+ - Each successor has only the entry node as its predecessor.
411
+ - Each successor has no successors.
412
+ """
413
+
414
+ succs = list(graph.successors(node_))
415
+ if len(succs) != 2:
416
+ return False
417
+ for succ in succs:
418
+ if graph.in_degree[succ] != 1:
419
+ return False
420
+ if graph.out_degree[succ] != 0:
421
+ return False
422
+ return True
423
+
424
+ region = networkx.DiGraph()
425
+ region.add_node(end_node)
426
+
427
+ traversed = {end_node}
428
+ region_head = end_node
429
+ while True:
430
+ preds = list(graph.predecessors(region_head))
431
+ if len(preds) != 1:
432
+ break
433
+ second_to_last_node = region_head is end_node
434
+
435
+ pred_node = preds[0]
436
+
437
+ if pred_node in traversed:
438
+ break
439
+
440
+ if second_to_last_node:
441
+ if _is_fork_node(pred_node):
442
+ # add the entire "fork" to the region
443
+ for succ in graph.successors(pred_node):
444
+ region.add_edge(pred_node, succ)
445
+ elif graph.out_degree[pred_node] != 1:
446
+ # the predecessor has more than one successor, and it's not a fork node
447
+ break
448
+
449
+ if graph.in_degree[pred_node] == 1:
450
+ # continue search
451
+ pass
452
+ else:
453
+ region.add_edge(pred_node, region_head)
454
+ traversed.add(pred_node)
455
+ region_head = pred_node
456
+ break
457
+ elif not second_to_last_node and graph.out_degree[pred_node] != 1:
458
+ break
459
+
460
+ region.add_edge(pred_node, region_head)
461
+ traversed.add(pred_node)
462
+ region_head = pred_node
463
+
464
+ return region, region_head
465
+
466
+ @staticmethod
467
+ def _is_indirect_jump_ailblock(block: "Block") -> bool:
468
+ if block.statements and isinstance(block.statements[-1], Jump):
469
+ last_stmt = block.statements[-1]
470
+ if not isinstance(last_stmt.target, Const):
471
+ # it's an indirect jump (assuming the AIL block is properly optimized)
472
+ return True
473
+ return False
474
+
475
+ @staticmethod
476
+ def _is_single_return_stmt_region(region: networkx.DiGraph) -> bool:
477
+ """
478
+ Checks weather the provided region contains only one return statement. This stmt
479
+ can be connected by many jumps, but none can be conditional. A valid case is:
480
+ [Jmp] -> [Jmp] -> [Ret]
481
+ """
482
+ valid_stmt_types = (Return, Jump, Label)
483
+ for node in region.nodes():
484
+ if isinstance(node, Block):
485
+ for stmt in node.statements:
486
+ if not isinstance(stmt, valid_stmt_types):
487
+ return False
488
+ return True
489
+
490
+ @staticmethod
491
+ def _find_block_sets_in_all_regions(top_region: GraphRegion):
492
+ def _unpack_region_to_block_addrs(region: GraphRegion):
493
+ region_addrs = set()
494
+ for node in region.graph.nodes:
495
+ if isinstance(node, Block):
496
+ region_addrs.add(node.addr)
497
+ elif isinstance(node, MultiNode):
498
+ for _node in node.nodes:
499
+ region_addrs.add(_node.addr)
500
+ elif isinstance(node, GraphRegion):
501
+ region_addrs |= _unpack_region_to_block_addrs(node)
502
+
503
+ return region_addrs
504
+
505
+ def _unpack_every_region(region: GraphRegion, addrs_by_region: dict):
506
+ addrs_by_region[region] = set()
507
+ for node in region.graph.nodes:
508
+ if isinstance(node, Block):
509
+ addrs_by_region[region].add(node.addr)
510
+ elif isinstance(node, MultiNode):
511
+ for _node in node.nodes:
512
+ addrs_by_region[region].add(_node.addr)
513
+ else:
514
+ addrs_by_region[region] |= _unpack_region_to_block_addrs(node)
515
+ _unpack_every_region(node, addrs_by_region)
516
+
517
+ all_region_block_sets = {}
518
+ _unpack_every_region(top_region, all_region_block_sets)
519
+ return all_region_block_sets
@@ -1,4 +1,5 @@
1
1
  from ailment.expression import Load, Const
2
+ from cle.backends import Blob
2
3
 
3
4
  from .base import PeepholeOptimizationExprBase
4
5
 
@@ -22,8 +23,19 @@ class ConstantDereferences(PeepholeOptimizationExprBase):
22
23
  try:
23
24
  val = self.project.loader.memory.unpack_word(expr.addr.value, size=expr.size)
24
25
  except KeyError:
25
- return expr
26
+ return None
26
27
 
27
- return Const(None, None, val, expr.bits, **expr.tags)
28
+ return Const(None, None, val, expr.bits, **expr.tags, deref_src_addr=expr.addr.value)
29
+
30
+ # is it loading from a blob?
31
+ obj = self.project.loader.find_object_containing(expr.addr.value)
32
+ if obj is not None and isinstance(obj, Blob):
33
+ # do we know the value that it's reading?
34
+ try:
35
+ val = self.project.loader.memory.unpack_word(expr.addr.value, size=self.project.arch.bytes)
36
+ except KeyError:
37
+ return None
38
+
39
+ return Const(None, None, val, expr.bits, **expr.tags, deref_src_addr=expr.addr.value)
28
40
 
29
41
  return None
@@ -16,7 +16,7 @@ from .. import Analysis, register_analysis
16
16
  from .structuring.structurer_nodes import MultiNode, ConditionNode, IncompleteSwitchCaseHeadStatement
17
17
  from .graph_region import GraphRegion
18
18
  from .condition_processor import ConditionProcessor
19
- from .utils import replace_last_statement, first_nonlabel_statement
19
+ from .utils import replace_last_statement, first_nonlabel_statement, copy_graph
20
20
 
21
21
  l = logging.getLogger(name=__name__)
22
22
 
@@ -27,7 +27,9 @@ CONDITIONNODE_ADDR = count(0xFF000000)
27
27
 
28
28
  class RegionIdentifier(Analysis):
29
29
  """
30
- Identifies regions within a function.
30
+ Identifies regions within a function graph and creates a recursive GraphRegion object.
31
+ Note, that the analysis may modify the graph in-place. If you want to keep the original graph,
32
+ set the `update_graph` parameter to False.
31
33
  """
32
34
 
33
35
  def __init__(
@@ -35,6 +37,7 @@ class RegionIdentifier(Analysis):
35
37
  func,
36
38
  cond_proc=None,
37
39
  graph=None,
40
+ update_graph=True,
38
41
  largest_successor_tree_outside_loop=True,
39
42
  force_loop_single_exit=True,
40
43
  complete_successors=False,
@@ -50,6 +53,9 @@ class RegionIdentifier(Analysis):
50
53
  )
51
54
  )
52
55
  self._graph = graph if graph is not None else self.function.graph
56
+ if not update_graph:
57
+ # copy the graph so updates don't affect the original graph
58
+ self._graph = copy_graph(self._graph)
53
59
 
54
60
  self.region = None
55
61
  self._start_node = None
@@ -164,14 +164,15 @@ class GotoSimplifier(SequenceWalker):
164
164
 
165
165
  # normal Goto Label
166
166
  if branch_target is None:
167
- stmt_target = goto_stmt.target
167
+ dst_target = goto_stmt.target
168
168
  # true branch of a conditional jump
169
169
  elif branch_target:
170
- stmt_target = goto_stmt.true_target
170
+ dst_target = goto_stmt.true_target
171
171
  # false branch of a conditional jump
172
172
  else:
173
- stmt_target = goto_stmt.true_target
173
+ dst_target = goto_stmt.true_target
174
174
 
175
- goto = Goto(block_addr=block.addr, ins_addr=goto_stmt.ins_addr, target_addr=stmt_target.value)
175
+ src_ins_addr = goto_stmt.ins_addr if "ins_addr" in goto_stmt.tags else block.addr
176
+ goto = Goto(block.addr, dst_target.value, src_idx=block.idx, dst_idx=None, src_ins_addr=src_ins_addr)
176
177
  l.debug("Storing %r goto", goto)
177
178
  self.irreducible_gotos.add(goto)