angr 9.2.92__py3-none-manylinux2014_x86_64.whl → 9.2.94__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (45) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +20 -10
  3. angr/analyses/cfg/indirect_jump_resolvers/amd64_elf_got.py +1 -1
  4. angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +89 -32
  5. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +276 -133
  6. angr/analyses/complete_calling_conventions.py +1 -1
  7. angr/analyses/decompiler/ail_simplifier.py +20 -0
  8. angr/analyses/decompiler/block_io_finder.py +293 -0
  9. angr/analyses/decompiler/block_similarity.py +190 -0
  10. angr/analyses/decompiler/callsite_maker.py +5 -0
  11. angr/analyses/decompiler/clinic.py +103 -1
  12. angr/analyses/decompiler/decompilation_cache.py +2 -0
  13. angr/analyses/decompiler/decompiler.py +21 -4
  14. angr/analyses/decompiler/optimization_passes/__init__.py +6 -0
  15. angr/analyses/decompiler/optimization_passes/code_motion.py +361 -0
  16. angr/analyses/decompiler/optimization_passes/optimization_pass.py +1 -0
  17. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +30 -18
  18. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +110 -0
  19. angr/analyses/decompiler/peephole_optimizations/bswap.py +53 -2
  20. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +20 -1
  21. angr/analyses/decompiler/structured_codegen/c.py +76 -41
  22. angr/analyses/decompiler/structuring/phoenix.py +41 -9
  23. angr/analyses/decompiler/utils.py +13 -4
  24. angr/analyses/propagator/engine_ail.py +3 -0
  25. angr/analyses/reaching_definitions/engine_ail.py +3 -0
  26. angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
  27. angr/analyses/stack_pointer_tracker.py +60 -10
  28. angr/analyses/typehoon/simple_solver.py +95 -24
  29. angr/analyses/typehoon/typeconsts.py +1 -1
  30. angr/calling_conventions.py +0 -3
  31. angr/engines/pcode/cc.py +1 -1
  32. angr/engines/successors.py +6 -0
  33. angr/knowledge_plugins/propagations/states.py +2 -1
  34. angr/procedures/definitions/glibc.py +3 -1
  35. angr/procedures/definitions/parse_win32json.py +2135 -383
  36. angr/procedures/definitions/wdk_ntoskrnl.py +956 -0
  37. angr/sim_type.py +53 -13
  38. angr/utils/library.py +2 -2
  39. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/METADATA +6 -6
  40. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/RECORD +44 -41
  41. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/WHEEL +1 -1
  42. angr/procedures/definitions/wdk_ntdll.py +0 -994
  43. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/LICENSE +0 -0
  44. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/entry_points.txt +0 -0
  45. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,7 @@ import ailment
10
10
  from angr.analyses.cfg import CFGFast
11
11
  from ...knowledge_plugins.functions.function import Function
12
12
  from ...knowledge_base import KnowledgeBase
13
- from ...sim_variable import SimMemoryVariable
13
+ from ...sim_variable import SimMemoryVariable, SimRegisterVariable, SimStackVariable
14
14
  from ...utils import timethis
15
15
  from .. import Analysis, AnalysesHub
16
16
  from .structuring import RecursiveStructurer, PhoenixStructurer
@@ -403,7 +403,7 @@ class Decompiler(Analysis):
403
403
  SimMemoryVariable(symbol.rebased_addr, 1, name=symbol.name, ident=ident),
404
404
  )
405
405
 
406
- def reflow_variable_types(self, type_constraints: Set, var_to_typevar: Dict, codegen):
406
+ def reflow_variable_types(self, type_constraints: Set, func_typevar, var_to_typevar: Dict, codegen):
407
407
  """
408
408
  Re-run type inference on an existing variable recovery result, then rerun codegen to generate new results.
409
409
 
@@ -439,13 +439,30 @@ class Decompiler(Analysis):
439
439
  try:
440
440
  tp = self.project.analyses.Typehoon(
441
441
  type_constraints,
442
+ func_typevar,
442
443
  kb=var_kb,
443
444
  var_mapping=var_to_typevar,
444
445
  must_struct=must_struct,
445
446
  ground_truth=groundtruth,
446
447
  )
447
- tp.update_variable_types(self.func.addr, var_to_typevar)
448
- tp.update_variable_types("global", var_to_typevar)
448
+ tp.update_variable_types(
449
+ self.func.addr,
450
+ {v: t for v, t in var_to_typevar.items() if isinstance(v, (SimRegisterVariable, SimStackVariable))},
451
+ )
452
+ tp.update_variable_types(
453
+ "global",
454
+ {v: t for v, t in var_to_typevar.items() if isinstance(v, (SimRegisterVariable, SimStackVariable))},
455
+ )
456
+ # update the function prototype if needed
457
+ if self.func.prototype is not None and self.func.prototype.args:
458
+ var_manager = var_kb.variables[self.func.addr]
459
+ for i, arg in enumerate(codegen.cfunc.arg_list):
460
+ if i >= len(self.func.prototype.args):
461
+ break
462
+ var = arg.variable
463
+ new_type = var_manager.get_variable_type(var)
464
+ if new_type is not None:
465
+ self.func.prototype.args[i] = new_type
449
466
  except Exception: # pylint:disable=broad-except
450
467
  l.warning(
451
468
  "Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
@@ -22,6 +22,8 @@ from .flip_boolean_cmp import FlipBooleanCmp
22
22
  from .ret_deduplicator import ReturnDeduplicator
23
23
  from .win_stack_canary_simplifier import WinStackCanarySimplifier
24
24
  from .cross_jump_reverter import CrossJumpReverter
25
+ from .code_motion import CodeMotionOptimization
26
+ from .switch_default_case_duplicator import SwitchDefaultCaseDuplicator
25
27
 
26
28
  # order matters!
27
29
  _all_optimization_passes = [
@@ -38,15 +40,19 @@ _all_optimization_passes = [
38
40
  (ITERegionConverter, True),
39
41
  (ITEExprConverter, True),
40
42
  (ExprOpSwapper, True),
43
+ (SwitchDefaultCaseDuplicator, True),
41
44
  (ReturnDuplicator, True),
42
45
  (LoweredSwitchSimplifier, False),
43
46
  (ReturnDeduplicator, True),
47
+ (CodeMotionOptimization, True),
44
48
  (CrossJumpReverter, True),
45
49
  (FlipBooleanCmp, True),
46
50
  ]
47
51
 
48
52
  # these passes may duplicate code to remove gotos or improve the structure of the graph
49
53
  DUPLICATING_OPTS = [ReturnDuplicator, CrossJumpReverter]
54
+ # these passes may destroy blocks by merging them into semantically equivalent blocks
55
+ CONDENSING_OPTS = [CodeMotionOptimization, ReturnDeduplicator]
50
56
 
51
57
 
52
58
  def get_optimization_passes(arch, platform):
@@ -0,0 +1,361 @@
1
+ import itertools
2
+ from typing import Tuple, List, Optional, Dict
3
+ import logging
4
+
5
+ from ailment import Block
6
+ from ailment.statement import Jump, ConditionalJump, Statement, DirtyStatement
7
+ import networkx as nx
8
+
9
+ from angr.analyses.decompiler.optimization_passes.optimization_pass import OptimizationPass, OptimizationPassStage
10
+ from angr.analyses.decompiler.block_similarity import is_similar, index_of_similar_stmts
11
+ from angr.analyses.decompiler.block_io_finder import BlockIOFinder
12
+ from angr.analyses.decompiler.utils import to_ail_supergraph, remove_labels, add_labels
13
+
14
+ _l = logging.getLogger(name=__name__)
15
+
16
+
17
+ class CodeMotionOptimization(OptimizationPass):
18
+ """
19
+ Moves common statements out of blocks that share the same predecessors or the same
20
+ successors. This is done to reduce the number of statements in a block and to make the
21
+ blocks more similar to each other.
22
+
23
+ As an example:
24
+ if (x) {
25
+ b = 2;
26
+ a = 1;
27
+ c = 3;
28
+ } else {
29
+ b = 2;
30
+ c = 3;
31
+ }
32
+
33
+ Will be turned into:
34
+ if (x) {
35
+ a = 1;
36
+ }
37
+ b = 2;
38
+ c = 3;
39
+
40
+ Current limitations (for very conservative operations):
41
+ - moving statements above conditional jumps is not supported
42
+ - only immediate children and parents are considered for moving statements
43
+ - when moving statements down, a block is only considered if already has a matching statement at the end
44
+ """
45
+
46
+ ARCHES = None
47
+ PLATFORMS = None
48
+ NAME = "Merge common statements in sub-scopes"
49
+ STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
50
+ DESCRIPTION = __doc__
51
+
52
+ def __init__(self, func, *args, max_iters=10, node_idx_start: int = 0, **kwargs):
53
+ super().__init__(func, *args, **kwargs)
54
+ self._node_idx_start = node_idx_start
55
+ self._max_optimization_runs = max_iters
56
+ self.analyze()
57
+
58
+ def _check(self):
59
+ return True, None
60
+
61
+ def _analyze(self, cache=None):
62
+ optimization_runs = 0
63
+ graph_copy = remove_labels(nx.DiGraph(self._graph))
64
+ updates = True
65
+ graph_changed = False
66
+ while optimization_runs < self._max_optimization_runs and updates:
67
+ optimization_runs += 1
68
+ super_graph = to_ail_supergraph(graph_copy)
69
+ updates, updated_blocks = self._move_common_code(super_graph)
70
+ if updates:
71
+ critical_fail = self.update_graph_with_super_edits(graph_copy, super_graph, updated_blocks)
72
+ if critical_fail:
73
+ _l.error("Critical failure in updating graph with super edits, aborting")
74
+ break
75
+ graph_changed = True
76
+
77
+ if graph_changed:
78
+ self.out_graph = add_labels(graph_copy)
79
+
80
+ @staticmethod
81
+ def update_graph_with_super_edits(
82
+ original_graph: nx.DiGraph, super_graph: nx.DiGraph, updated_blocks: Dict[Block, Block]
83
+ ) -> bool:
84
+ """
85
+ This function updates an graph when doing block edits on a supergraph version of that same graph.
86
+ The updated blocks must be provided as a dictionary where the keys are original block in the supergraph and
87
+ the values are the new blocks that should replace them.
88
+
89
+ The supergraph MUST be generated using the to_ail_supergraph function, since it stores the original nodes
90
+ each super node represents. This is necessary to update the original graph with the new super nodes.
91
+ """
92
+ og_to_super = {}
93
+ for old_super, new_super in updated_blocks.items():
94
+ original_blocks = super_graph.nodes[old_super]["original_nodes"]
95
+ for original_block in original_blocks:
96
+ og_to_super[original_block] = new_super
97
+
98
+ for old_super, new_super in updated_blocks.items():
99
+ original_blocks = super_graph.nodes[old_super]["original_nodes"]
100
+ first_node_preds = []
101
+ last_node_preds = []
102
+ for original_block in original_blocks:
103
+ if original_block not in original_graph.nodes:
104
+ return True
105
+
106
+ external_preds = [
107
+ pred for pred in original_graph.predecessors(original_block) if pred not in original_blocks
108
+ ]
109
+ external_succs = [
110
+ succ for succ in original_graph.successors(original_block) if succ not in original_blocks
111
+ ]
112
+ if external_preds:
113
+ first_node_preds = external_preds
114
+ if external_succs:
115
+ last_node_preds = external_succs
116
+
117
+ original_graph.remove_nodes_from(original_blocks)
118
+ original_graph.add_node(new_super)
119
+ for pred in first_node_preds:
120
+ original_graph.add_edge(og_to_super[pred] if pred in og_to_super else pred, new_super)
121
+ for succ in last_node_preds:
122
+ original_graph.add_edge(new_super, og_to_super[succ] if succ in og_to_super else succ)
123
+
124
+ return False
125
+
126
+ def _move_common_code(self, graph) -> Tuple[bool, Optional[Dict[Block, Block]]]:
127
+ """
128
+ Does two things at a high level:
129
+ 1. rearrange code in blocks to maximize the number of similar statements at the end of the block
130
+ 2. move common code out of blocks
131
+
132
+ To understand the limitations of this approach, see the TODOs.
133
+ """
134
+ # TODO: how can you handle an odd-numbered switch case? or many blocks with the same child?
135
+ for b0, b1 in itertools.combinations(graph.nodes, 2):
136
+ if (
137
+ b0 is b1
138
+ or not b0.statements
139
+ or not b1.statements
140
+ or any(isinstance(stmt, DirtyStatement) for stmt in b0.statements + b1.statements)
141
+ or is_similar(b0, b1)
142
+ ):
143
+ continue
144
+
145
+ # TODO: add support for moving code to a shared parent block, which requires that we figure out how to
146
+ # move code above conditional jumps. Hard since you need to know if the condition executes code.
147
+ # TODO: also, how do you deal with short-circuiting, which is a region parent, not just a block?
148
+
149
+ # target any blocks that have a shared child and move common code to the child
150
+ b0_succs = list(graph.successors(b0))
151
+ b1_succs = list(graph.successors(b1))
152
+ if (len(b0_succs) == len(b1_succs) == 1) and b0_succs[0] == b1_succs[0]:
153
+ common_succ = b0_succs[0]
154
+ common_succ_preds = list(graph.predecessors(common_succ))
155
+ # you can only safely move code to a child if all the common_succ's preds are the ones
156
+ # we are moving code from (2 nodes).
157
+ if all(csp in (b0, b1) for csp in common_succ_preds):
158
+ success, updated_blocks = self._move_common_code_to_child(b0, b1, common_succ)
159
+ if success:
160
+ return True, updated_blocks
161
+
162
+ return False, None
163
+
164
+ def _move_common_code_to_parent(self, b0: Block, b1: Block, parent: Block):
165
+ # TODO: this function does not work yet because you need to figure out if you can move a stmt above
166
+ # a conditional jump, which requires cross-block analysis
167
+ changed, new_b0, new_b1 = self._make_stmts_end_similar(b0, b1, up=True)
168
+ if not changed:
169
+ return False, None
170
+
171
+ # move the longest common suffix to the parent
172
+ new_b0_stmts = new_b0.statements
173
+ new_b1_stmts = new_b1.statements
174
+ common_len = 0
175
+ for idx, new_b0_stmt in enumerate(new_b0_stmts):
176
+ if not new_b0_stmt.likes(new_b1_stmts[idx]):
177
+ break
178
+ common_len += 1
179
+
180
+ if not common_len:
181
+ raise ValueError("No common statements found, this is unexpected")
182
+
183
+ common_stmts = [new_b0_stmts.pop(0) for _ in range(common_len)]
184
+ for _ in range(common_len):
185
+ new_b1_stmts.pop(0)
186
+
187
+ parent_stmts = parent.statements.copy() or []
188
+ if isinstance(parent_stmts[-1], (ConditionalJump, Jump)):
189
+ parent_stmts = parent_stmts[:-1] + common_stmts + [parent_stmts[-1]]
190
+ new_parent = parent.copy(statements=parent_stmts)
191
+
192
+ return True, {b0: new_b0, b1: new_b1, parent: new_parent}
193
+
194
+ def _move_common_code_to_child(self, b0: Block, b1: Block, child: Block):
195
+ changed, new_b0, new_b1 = self._make_stmts_end_similar(b0, b1, down=True)
196
+ if not changed:
197
+ return False, None
198
+
199
+ # move the longest common suffix to the parent
200
+ new_b0_stmts = new_b0.statements
201
+ new_b1_stmts = new_b1.statements
202
+ common_len = 0
203
+ # start from the -1 index and go backwards
204
+ for idx in range(-1, -(min(len(new_b0_stmts), len(new_b1_stmts))) - 1, -1):
205
+ if not new_b0_stmts[idx].likes(new_b1_stmts[idx]):
206
+ break
207
+ common_len += 1
208
+
209
+ if not common_len:
210
+ raise ValueError("No common statements found, this is unexpected")
211
+
212
+ common_stmts = [new_b0_stmts.pop() for _ in range(common_len)]
213
+ for _ in range(common_len):
214
+ new_b1_stmts.pop()
215
+
216
+ child_stmts = child.statements.copy() or []
217
+ new_child = child.copy(statements=common_stmts[::-1] + child_stmts)
218
+
219
+ return True, {b0: new_b0, b1: new_b1, child: new_child}
220
+
221
+ def _make_stmts_end_similar(
222
+ self, b0: Block, b1: Block, up=False, down=False
223
+ ) -> Tuple[bool, Optional[Block], Optional[Block]]:
224
+ """
225
+ This algorithm attempts to rearrange two blocks to have the longest common sequence of statements
226
+ at either ends of the blocks. It is flawed in that it currently only attempts to do this rearrangement
227
+ if the blocks have at least one matching statement at the end.
228
+
229
+ This algorithm iteratively removes statements from the ends of the blocks and then attempts to match
230
+ the ends of the blocks. It will only do this if one of the two ends has a matching statement in the other.
231
+ """
232
+ self._assert_up_or_down(up, down)
233
+ # copy the statements while filtering out statements that are not needed in the specific
234
+ # movement case (up or down)
235
+ curr_stmts = {}
236
+ for blk in (b0, b1):
237
+ new_stmts = blk.statements.copy()
238
+ if down:
239
+ last_stmt = new_stmts[-1]
240
+ if isinstance(last_stmt, Jump):
241
+ new_stmts.pop()
242
+ elif isinstance(last_stmt, ConditionalJump):
243
+ _l.warning("ConditionalJump at the end of block %s, this should never happen!", blk)
244
+ return False, None, None
245
+
246
+ curr_stmts[blk] = new_stmts
247
+ if not curr_stmts[b0] or not curr_stmts[b1]:
248
+ return False, None, None
249
+
250
+ # attempt to do a swapping algorithm to maximize the number of similar statements at the end
251
+ changed = True
252
+ stmts_updated = False
253
+ matched_stmts = {b0: [], b1: []}
254
+ max_iters = len(curr_stmts[b0]) * len(curr_stmts[b1])
255
+ curr_iters = 0
256
+ while changed and curr_iters < max_iters:
257
+ changed = False
258
+ try_next_swap = False
259
+ for tgt0, tgt1 in ((b0, b1), (b1, b0)):
260
+ t0_stmts = curr_stmts[tgt0]
261
+ t1_stmts = curr_stmts[tgt1]
262
+ if not t0_stmts or not t1_stmts:
263
+ break
264
+
265
+ if up:
266
+ # maximize up
267
+ while t0_stmts and t1_stmts:
268
+ if t0_stmts[0].likes(t1_stmts[0]):
269
+ matched_stmts[b0].append((t0_stmts.pop(0), 0))
270
+ matched_stmts[b1].append((t1_stmts.pop(0), 0))
271
+ try_next_swap = True
272
+ else:
273
+ break
274
+ if not t0_stmts or not t1_stmts:
275
+ break
276
+ elif down:
277
+ # maximize down
278
+ while t0_stmts and t1_stmts:
279
+ if t0_stmts[-1].likes(t1_stmts[-1]):
280
+ matched_stmts[b0].append((t0_stmts.pop(), -1))
281
+ matched_stmts[b1].append((t1_stmts.pop(), -1))
282
+ try_next_swap = True
283
+ else:
284
+ break
285
+ if not t0_stmts or not t1_stmts:
286
+ break
287
+
288
+ if not try_next_swap:
289
+ continue
290
+
291
+ stmts_updated = True
292
+ swap_occurred, new_stmts = self._maximize_ends(t0_stmts, t1_stmts, up=up, down=down)
293
+ if swap_occurred:
294
+ changed = True
295
+ curr_stmts[b0], curr_stmts[b1] = new_stmts
296
+ break
297
+
298
+ try_next_swap = True
299
+
300
+ curr_iters += 1
301
+ if curr_iters > max_iters:
302
+ raise ValueError("Exceeded max iterations, likely stuck in infinite loop")
303
+
304
+ # did any changes occur?
305
+ if not stmts_updated:
306
+ return False, None, None
307
+
308
+ # reconstruct the blocks and return them
309
+ new_blks = {}
310
+ for blk in (b0, b1):
311
+ new_stmts = curr_stmts[blk]
312
+ for stmt, idx in matched_stmts[blk][::-1]:
313
+ if idx == -1:
314
+ new_stmts.append(stmt)
315
+ else:
316
+ new_stmts.insert(idx, stmt)
317
+
318
+ new_blks[blk] = blk.copy(statements=new_stmts)
319
+
320
+ return True, new_blks[b0], new_blks[b1]
321
+
322
+ def _maximize_ends(
323
+ self, b0_stmts, b1_stmts, up=False, down=False
324
+ ) -> Tuple[bool, Tuple[List[Statement], List[Statement]]]:
325
+ self._assert_up_or_down(up, down)
326
+
327
+ similar_stmt = b0_stmts[0] if up else b0_stmts[-1]
328
+ idx_similar = index_of_similar_stmts([similar_stmt], b1_stmts)
329
+ if idx_similar is None or len(b1_stmts) == 1:
330
+ return False, (b0_stmts, b1_stmts)
331
+
332
+ target_stmt = b1_stmts[idx_similar]
333
+ success, new_b1_stmts = self._move_to_end(target_stmt, b1_stmts, up=up, down=down)
334
+ return (success and (b1_stmts != new_b1_stmts)), (b0_stmts, new_b1_stmts)
335
+
336
+ def _move_to_end(self, stmt, stmts, up=False, down=False) -> Tuple[bool, List[Statement]]:
337
+ """
338
+ Attempts to move a stmt to either the top or the bottom of stmts.
339
+ It does this by attempting to swap, 1 by 1, in either direction it is targeting.
340
+ """
341
+ new_stmts = stmts.copy()
342
+ stmt_idx = new_stmts.index(stmt)
343
+ swap_offset = -1 if up else 1
344
+ swap_order = range(stmt_idx + 1, len(new_stmts)) if down else range(stmt_idx - 1, -1, -1)
345
+ io_finder = BlockIOFinder(new_stmts, self.project)
346
+ for swap_pos in swap_order:
347
+ src_stmt = new_stmts[stmt_idx]
348
+ if io_finder.can_swap(src_stmt, new_stmts, 1 if down else -1):
349
+ new_stmts[stmt_idx], new_stmts[swap_pos] = new_stmts[swap_pos], new_stmts[stmt_idx]
350
+ stmt_idx += swap_offset
351
+ else:
352
+ return False, stmts
353
+
354
+ return True, new_stmts
355
+
356
+ @staticmethod
357
+ def _assert_up_or_down(up, down):
358
+ if up and down:
359
+ raise ValueError("Cannot maximize both up and down")
360
+ if not up and not down:
361
+ raise ValueError("Must maximize either up or down")
@@ -335,6 +335,7 @@ class StructuringOptimizationPass(OptimizationPass):
335
335
  simp = self.project.analyses.AILSimplifier(
336
336
  self._func,
337
337
  func_graph=graph,
338
+ use_callee_saved_regs_at_return=False,
338
339
  gp=self._func.info.get("gp", None) if self.project.arch.name in {"MIPS32", "MIPS64"} else None,
339
340
  )
340
341
  return simp.func_graph if simp.simplified else graph
@@ -178,24 +178,36 @@ class StackCanarySimplifier(OptimizationPass):
178
178
  # Done!
179
179
 
180
180
  def _find_canary_init_stmt(self):
181
- first_block = self._get_block(self._func.addr)
182
- if first_block is None:
183
- return None
184
-
185
- for idx, stmt in enumerate(first_block.statements):
186
- if (
187
- isinstance(stmt, ailment.Stmt.Store)
188
- and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
189
- and isinstance(stmt.data, ailment.Expr.Load)
190
- and self._is_add(stmt.data.addr)
191
- ):
192
- # Check addr: must be fs+0x28
193
- op0, op1 = stmt.data.addr.operands
194
- if isinstance(op1, ailment.Expr.Register):
195
- op0, op1 = op1, op0
196
- if isinstance(op0, ailment.Expr.Register) and isinstance(op1, ailment.Expr.Const):
197
- if op0.reg_offset == self.project.arch.get_register_offset("fs") and op1.value == 0x28:
198
- return first_block, idx
181
+ block_addr = self._func.addr
182
+ traversed = set()
183
+
184
+ while True:
185
+ traversed.add(block_addr)
186
+ first_block = self._get_block(block_addr)
187
+ if first_block is None:
188
+ break
189
+
190
+ for idx, stmt in enumerate(first_block.statements):
191
+ if (
192
+ isinstance(stmt, ailment.Stmt.Store)
193
+ and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
194
+ and isinstance(stmt.data, ailment.Expr.Load)
195
+ and self._is_add(stmt.data.addr)
196
+ ):
197
+ # Check addr: must be fs+0x28
198
+ op0, op1 = stmt.data.addr.operands
199
+ if isinstance(op1, ailment.Expr.Register):
200
+ op0, op1 = op1, op0
201
+ if isinstance(op0, ailment.Expr.Register) and isinstance(op1, ailment.Expr.Const):
202
+ if op0.reg_offset == self.project.arch.get_register_offset("fs") and op1.value == 0x28:
203
+ return first_block, idx
204
+
205
+ succs = list(self._graph.successors(first_block))
206
+ if len(succs) == 1:
207
+ block_addr = succs[0].addr
208
+ if block_addr not in traversed:
209
+ continue
210
+ break
199
211
 
200
212
  return None
201
213
 
@@ -0,0 +1,110 @@
1
+ # pylint:disable=too-many-boolean-expressions
2
+ from itertools import count
3
+ import logging
4
+
5
+ import networkx
6
+
7
+ from angr.knowledge_plugins.cfg import IndirectJumpType
8
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
9
+
10
+
11
+ _l = logging.getLogger(name=__name__)
12
+
13
+
14
+ def s2u(s, bits):
15
+ if s > 0:
16
+ return s
17
+ return (1 << bits) + s
18
+
19
+
20
+ class SwitchDefaultCaseDuplicator(OptimizationPass):
21
+ """
22
+ For each switch-case construct (identified by jump tables), duplicate the default-case node when we detect
23
+ situations where the default-case node is seemingly reused by edges outside the switch-case construct. This code
24
+ reuse is usually caused by compiler code deduplication.
25
+
26
+ Ideally this pass should be implemented as an ISC optimization reversion.
27
+ """
28
+
29
+ ARCHES = None
30
+ PLATFORMS = None
31
+ STAGE = OptimizationPassStage.BEFORE_REGION_IDENTIFICATION
32
+ NAME = "Duplicate default-case nodes to undo default-case node reuse caused by compiler code deduplication"
33
+ DESCRIPTION = __doc__.strip()
34
+
35
+ def __init__(self, func, **kwargs):
36
+ super().__init__(func, **kwargs)
37
+
38
+ self.node_idx = count(start=0)
39
+
40
+ self.analyze()
41
+
42
+ def _check(self):
43
+ jumptables = self.kb.cfgs.get_most_accurate().jump_tables
44
+ switch_jump_block_addrs = {
45
+ jumptable.addr
46
+ for jumptable in jumptables.values()
47
+ if jumptable.type
48
+ in {IndirectJumpType.Jumptable_AddressComputed, IndirectJumpType.Jumptable_AddressLoadedFromMemory}
49
+ }
50
+ jump_node_addrs = self._func.block_addrs_set.intersection(switch_jump_block_addrs)
51
+ if not jump_node_addrs:
52
+ return False, None
53
+
54
+ default_case_node_addrs = set()
55
+ for node_addr in jump_node_addrs:
56
+ node = self._func.get_node(node_addr)
57
+ if self._func.graph.in_degree[node] == 1:
58
+ pred = list(self._func.graph.predecessors(node))[0]
59
+ if self._func.graph.out_degree[pred] == 2:
60
+ default_case_node = next(
61
+ iter(nn for nn in self._func.graph.successors(pred) if nn.addr != node_addr)
62
+ )
63
+ if self._func.graph.out_degree[default_case_node] == 1:
64
+ default_case_node_addrs.add((pred.addr, node_addr, default_case_node.addr))
65
+
66
+ if not default_case_node_addrs:
67
+ return False, None
68
+
69
+ cache = {"default_case_node_addrs": default_case_node_addrs}
70
+ return True, cache
71
+
72
+ def _analyze(self, cache=None):
73
+
74
+ default_case_node_addrs = cache["default_case_node_addrs"]
75
+
76
+ out_graph = None
77
+
78
+ for switch_head_addr, jump_node_addr, default_addr in default_case_node_addrs:
79
+ default_case_node = self._func.get_node(default_addr)
80
+ unexpected_pred_addrs = {
81
+ pred.addr
82
+ for pred in self._func.graph.predecessors(default_case_node)
83
+ if pred.addr not in {switch_head_addr, jump_node_addr}
84
+ }
85
+ if unexpected_pred_addrs:
86
+ default_case_block = self._get_block(default_addr)
87
+ default_case_succ_block = list(self._graph.successors(default_case_block))[0]
88
+
89
+ jump_nodes = self._get_blocks(jump_node_addr)
90
+ jump_node_descedents = set()
91
+ for jump_node in jump_nodes:
92
+ jump_node_descedents |= networkx.descendants(self._graph, jump_node)
93
+
94
+ # duplicate default_case_node for each unexpected predecessor
95
+ for unexpected_pred_addr in unexpected_pred_addrs:
96
+ for unexpected_pred in self._get_blocks(unexpected_pred_addr):
97
+ # is this predecessor reachable from the jump node? if so, we believe this is a legitimate edge
98
+ # and do not duplicate it.
99
+ if unexpected_pred in jump_node_descedents:
100
+ continue
101
+
102
+ default_case_block_copy = default_case_block.copy()
103
+ default_case_block_copy.idx = next(self.node_idx)
104
+ if out_graph is None:
105
+ out_graph = self._graph
106
+ out_graph.remove_edge(unexpected_pred, default_case_block)
107
+ out_graph.add_edge(unexpected_pred, default_case_block_copy)
108
+ out_graph.add_edge(default_case_block_copy, default_case_succ_block)
109
+
110
+ self.out_graph = out_graph