angr 9.2.123__py3-none-manylinux2014_x86_64.whl → 9.2.125__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (103) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +9 -1
  3. angr/analyses/cfg/indirect_jump_resolvers/mips_elf_fast.py +11 -8
  4. angr/analyses/cfg/indirect_jump_resolvers/mips_elf_got.py +2 -2
  5. angr/analyses/codecave.py +77 -0
  6. angr/analyses/decompiler/ail_simplifier.py +16 -19
  7. angr/analyses/decompiler/callsite_maker.py +8 -7
  8. angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +24 -2
  9. angr/analyses/decompiler/clinic.py +58 -2
  10. angr/analyses/decompiler/condition_processor.py +10 -3
  11. angr/analyses/decompiler/decompilation_cache.py +2 -0
  12. angr/analyses/decompiler/decompiler.py +54 -8
  13. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +10 -2
  14. angr/analyses/decompiler/dephication/rewriting_engine.py +64 -1
  15. angr/analyses/decompiler/expression_narrower.py +5 -1
  16. angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
  17. angr/analyses/decompiler/optimization_passes/div_simplifier.py +4 -1
  18. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +13 -0
  19. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +23 -4
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +3 -1
  21. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +8 -5
  22. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +10 -5
  23. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +18 -7
  24. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +6 -0
  25. angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +2 -0
  27. angr/analyses/decompiler/peephole_optimizations/const_mull_a_shift.py +2 -0
  28. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
  29. angr/analyses/decompiler/peephole_optimizations/remove_cascading_conversions.py +8 -2
  30. angr/analyses/decompiler/region_identifier.py +36 -0
  31. angr/analyses/decompiler/region_simplifiers/loop.py +2 -8
  32. angr/analyses/decompiler/region_simplifiers/switch_cluster_simplifier.py +9 -3
  33. angr/analyses/decompiler/ssailification/rewriting.py +5 -2
  34. angr/analyses/decompiler/ssailification/rewriting_engine.py +151 -25
  35. angr/analyses/decompiler/ssailification/rewriting_state.py +1 -0
  36. angr/analyses/decompiler/ssailification/ssailification.py +17 -9
  37. angr/analyses/decompiler/ssailification/traversal.py +3 -1
  38. angr/analyses/decompiler/ssailification/traversal_engine.py +35 -8
  39. angr/analyses/decompiler/ssailification/traversal_state.py +1 -0
  40. angr/analyses/decompiler/structured_codegen/c.py +42 -4
  41. angr/analyses/decompiler/structuring/phoenix.py +3 -0
  42. angr/analyses/patchfinder.py +137 -0
  43. angr/analyses/pathfinder.py +282 -0
  44. angr/analyses/propagator/engine_ail.py +10 -3
  45. angr/analyses/reaching_definitions/engine_ail.py +10 -15
  46. angr/analyses/s_propagator.py +16 -9
  47. angr/analyses/s_reaching_definitions/s_rda_view.py +127 -63
  48. angr/analyses/smc.py +159 -0
  49. angr/analyses/variable_recovery/engine_ail.py +14 -0
  50. angr/analyses/variable_recovery/engine_base.py +11 -0
  51. angr/angrdb/models.py +1 -2
  52. angr/engines/light/engine.py +12 -0
  53. angr/engines/vex/heavy/heavy.py +2 -0
  54. angr/exploration_techniques/spiller_db.py +1 -2
  55. angr/knowledge_plugins/__init__.py +2 -0
  56. angr/knowledge_plugins/decompilation.py +45 -0
  57. angr/knowledge_plugins/functions/function.py +4 -0
  58. angr/knowledge_plugins/functions/function_manager.py +18 -9
  59. angr/knowledge_plugins/functions/function_parser.py +1 -1
  60. angr/knowledge_plugins/functions/soot_function.py +1 -0
  61. angr/knowledge_plugins/key_definitions/atoms.py +8 -0
  62. angr/misc/ux.py +2 -2
  63. angr/procedures/definitions/parse_win32json.py +2 -1
  64. angr/project.py +17 -1
  65. angr/state_plugins/history.py +6 -4
  66. angr/storage/memory_mixins/actions_mixin.py +7 -7
  67. angr/storage/memory_mixins/address_concretization_mixin.py +5 -5
  68. angr/storage/memory_mixins/bvv_conversion_mixin.py +1 -1
  69. angr/storage/memory_mixins/clouseau_mixin.py +3 -3
  70. angr/storage/memory_mixins/conditional_store_mixin.py +3 -3
  71. angr/storage/memory_mixins/default_filler_mixin.py +3 -3
  72. angr/storage/memory_mixins/memory_mixin.py +45 -34
  73. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +15 -14
  74. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +27 -16
  75. angr/storage/memory_mixins/paged_memory/pages/cooperation.py +18 -9
  76. angr/storage/memory_mixins/paged_memory/pages/ispo_mixin.py +5 -5
  77. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +89 -55
  78. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +16 -25
  79. angr/storage/memory_mixins/paged_memory/pages/permissions_mixin.py +11 -9
  80. angr/storage/memory_mixins/paged_memory/pages/ultra_page.py +23 -7
  81. angr/storage/memory_mixins/paged_memory/privileged_mixin.py +1 -1
  82. angr/storage/memory_mixins/regioned_memory/region_meta_mixin.py +9 -7
  83. angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +9 -9
  84. angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +1 -0
  85. angr/storage/memory_mixins/simple_interface_mixin.py +2 -2
  86. angr/storage/memory_mixins/simplification_mixin.py +2 -2
  87. angr/storage/memory_mixins/size_resolution_mixin.py +1 -1
  88. angr/storage/memory_mixins/slotted_memory.py +3 -3
  89. angr/storage/memory_mixins/smart_find_mixin.py +1 -0
  90. angr/storage/memory_mixins/underconstrained_mixin.py +5 -5
  91. angr/storage/memory_mixins/unwrapper_mixin.py +4 -4
  92. angr/storage/memory_object.py +4 -3
  93. angr/utils/bits.py +4 -0
  94. angr/utils/constants.py +1 -1
  95. angr/utils/graph.py +15 -0
  96. angr/utils/tagged_interval_map.py +112 -0
  97. angr/vaults.py +2 -2
  98. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/METADATA +6 -6
  99. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/RECORD +103 -96
  100. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/WHEEL +1 -1
  101. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/LICENSE +0 -0
  102. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/entry_points.txt +0 -0
  103. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
  from collections import OrderedDict
3
3
 
4
4
  from ailment.statement import Assignment, Call, Store, ConditionalJump
5
- from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression
5
+ from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression, Tmp, DirtyExpression
6
6
 
7
7
  from angr.engines.light import SimEngineLight, SimEngineLightAILMixin
8
8
  from angr.utils.ssa import get_reg_offset_base
@@ -21,7 +21,14 @@ class SimEngineSSATraversal(
21
21
  state: TraversalState
22
22
 
23
23
  def __init__(
24
- self, arch, sp_tracker=None, bp_as_gpr: bool = False, def_to_loc=None, loc_to_defs=None, stackvars: bool = False
24
+ self,
25
+ arch,
26
+ sp_tracker=None,
27
+ bp_as_gpr: bool = False,
28
+ def_to_loc=None,
29
+ loc_to_defs=None,
30
+ stackvars: bool = False,
31
+ tmps: bool = False,
25
32
  ):
26
33
  super().__init__()
27
34
 
@@ -29,14 +36,15 @@ class SimEngineSSATraversal(
29
36
  self.sp_tracker = sp_tracker
30
37
  self.bp_as_gpr = bp_as_gpr
31
38
  self.stackvars = stackvars
39
+ self.tmps = tmps
32
40
 
33
- self.def_to_loc = def_to_loc if def_to_loc is not None else OrderedDict()
41
+ self.def_to_loc = def_to_loc if def_to_loc is not None else []
34
42
  self.loc_to_defs = loc_to_defs if loc_to_defs is not None else OrderedDict()
35
43
 
36
44
  def _handle_Assignment(self, stmt: Assignment):
37
45
  if isinstance(stmt.dst, Register):
38
46
  codeloc = self._codeloc()
39
- self.def_to_loc[stmt.dst] = codeloc
47
+ self.def_to_loc.append((stmt.dst, codeloc))
40
48
  if codeloc not in self.loc_to_defs:
41
49
  self.loc_to_defs[codeloc] = OrderedSet()
42
50
  self.loc_to_defs[codeloc].add(stmt.dst)
@@ -52,7 +60,7 @@ class SimEngineSSATraversal(
52
60
 
53
61
  if self.stackvars and isinstance(stmt.addr, StackBaseOffset) and isinstance(stmt.addr.offset, int):
54
62
  codeloc = self._codeloc()
55
- self.def_to_loc[stmt] = codeloc
63
+ self.def_to_loc.append((stmt, codeloc))
56
64
  if codeloc not in self.loc_to_defs:
57
65
  self.loc_to_defs[codeloc] = OrderedSet()
58
66
  self.loc_to_defs[codeloc].add(stmt)
@@ -69,7 +77,7 @@ class SimEngineSSATraversal(
69
77
  def _handle_Call(self, stmt: Call):
70
78
  if stmt.ret_expr is not None and isinstance(stmt.ret_expr, Register):
71
79
  codeloc = self._codeloc()
72
- self.def_to_loc[stmt.ret_expr] = codeloc
80
+ self.def_to_loc.append((stmt.ret_expr, codeloc))
73
81
  if codeloc not in self.loc_to_defs:
74
82
  self.loc_to_defs[codeloc] = OrderedSet()
75
83
  self.loc_to_defs[codeloc].add(stmt.ret_expr)
@@ -79,18 +87,30 @@ class SimEngineSSATraversal(
79
87
 
80
88
  super()._ail_handle_Call(stmt)
81
89
 
90
+ _handle_CallExpr = _handle_Call
91
+
82
92
  def _handle_Register(self, expr: Register):
83
93
  base_offset = get_reg_offset_base(expr.reg_offset, self.arch)
84
94
 
85
95
  if base_offset not in self.state.live_registers:
86
96
  codeloc = self._codeloc()
87
- self.def_to_loc[expr] = codeloc
97
+ self.def_to_loc.append((expr, codeloc))
88
98
  if codeloc not in self.loc_to_defs:
89
99
  self.loc_to_defs[codeloc] = OrderedSet()
90
100
  self.loc_to_defs[codeloc].add(expr)
91
101
 
92
102
  self.state.live_registers.add(base_offset)
93
103
 
104
+ def _handle_Tmp(self, expr: Tmp):
105
+ if self.tmps:
106
+ codeloc = self._codeloc()
107
+ self.def_to_loc.append((expr, codeloc))
108
+ if codeloc not in self.loc_to_defs:
109
+ self.loc_to_defs[codeloc] = OrderedSet()
110
+ self.loc_to_defs[codeloc].add(expr)
111
+
112
+ self.state.live_tmps.add(expr.tmp_idx)
113
+
94
114
  def _handle_Cmp(self, expr: BinaryOp):
95
115
  self._expr(expr.operands[0])
96
116
  self._expr(expr.operands[1])
@@ -123,9 +143,16 @@ class SimEngineSSATraversal(
123
143
  for operand in expr.operands:
124
144
  self._expr(operand)
125
145
 
146
+ def _handle_DirtyExpression(self, expr: DirtyExpression):
147
+ for operand in expr.operands:
148
+ self._expr(operand)
149
+ if expr.guard is not None:
150
+ self._expr(expr.guard)
151
+ if expr.maddr is not None:
152
+ self._expr(expr.maddr)
153
+
126
154
  def _handle_Dummy(self, expr):
127
155
  pass
128
156
 
129
157
  _handle_VirtualVariable = _handle_Dummy
130
158
  _handle_Phi = _handle_Dummy
131
- _handle_DirtyExpression = _handle_Dummy
@@ -18,6 +18,7 @@ class TraversalState:
18
18
 
19
19
  self.live_registers: set[int] = set() if live_registers is None else live_registers
20
20
  self.live_stackvars: set[tuple[int, int]] = set() if live_stackvars is None else live_stackvars
21
+ self.live_tmps: set[int] = set() # tmps are internal to a block only and never propagated from another state
21
22
 
22
23
  def copy(self) -> TraversalState:
23
24
  return TraversalState(
@@ -1408,7 +1408,7 @@ class CUnsupportedStatement(CStatement):
1408
1408
  class CDirtyStatement(CExpression):
1409
1409
  __slots__ = ("dirty",)
1410
1410
 
1411
- def __init__(self, dirty, **kwargs):
1411
+ def __init__(self, dirty: CDirtyExpression, **kwargs):
1412
1412
  super().__init__(**kwargs)
1413
1413
  self.dirty = dirty
1414
1414
 
@@ -1420,7 +1420,7 @@ class CDirtyStatement(CExpression):
1420
1420
  indent_str = self.indent_str(indent=indent)
1421
1421
 
1422
1422
  yield indent_str, None
1423
- yield str(self.dirty), None
1423
+ yield from self.dirty.c_repr_chunks()
1424
1424
  yield "\n", None
1425
1425
 
1426
1426
 
@@ -2303,6 +2303,38 @@ class CMultiStatementExpression(CExpression):
2303
2303
  yield ")", paren
2304
2304
 
2305
2305
 
2306
+ class CVEXCCallExpression(CExpression):
2307
+ """
2308
+ ccall_name(arg0, arg1, ...)
2309
+ """
2310
+
2311
+ __slots__ = (
2312
+ "callee",
2313
+ "operands",
2314
+ "tags",
2315
+ )
2316
+
2317
+ def __init__(self, callee: str, operands: list[CExpression], tags=None, **kwargs):
2318
+ super().__init__(**kwargs)
2319
+ self.callee = callee
2320
+ self.operands = operands
2321
+ self.tags = tags
2322
+
2323
+ @property
2324
+ def type(self):
2325
+ return SimTypeInt().with_arch(self.codegen.project.arch)
2326
+
2327
+ def c_repr_chunks(self, indent=0, asexpr=False):
2328
+ paren = CClosingObject("(")
2329
+ yield f"{self.callee}", self
2330
+ yield "(", paren
2331
+ for idx, operand in enumerate(self.operands):
2332
+ if idx != 0:
2333
+ yield ", ", None
2334
+ yield from operand.c_repr_chunks()
2335
+ yield ")", paren
2336
+
2337
+
2306
2338
  class CDirtyExpression(CExpression):
2307
2339
  """
2308
2340
  Ideally all dirty expressions should be handled and converted to proper conversions during conversion from VEX to
@@ -2424,6 +2456,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2424
2456
  Expr.BinaryOp: self._handle_Expr_BinaryOp,
2425
2457
  Expr.Convert: self._handle_Expr_Convert,
2426
2458
  Expr.StackBaseOffset: self._handle_Expr_StackBaseOffset,
2459
+ Expr.VEXCCallExpression: self._handle_Expr_VEXCCallExpression,
2427
2460
  Expr.DirtyExpression: self._handle_Expr_Dirty,
2428
2461
  Expr.ITE: self._handle_Expr_ITE,
2429
2462
  Expr.Reinterpret: self._handle_Reinterpret,
@@ -3318,7 +3351,8 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3318
3351
  return clabel
3319
3352
 
3320
3353
  def _handle_Stmt_Dirty(self, stmt: Stmt.DirtyStatement, **kwargs):
3321
- return CDirtyStatement(stmt, codegen=self)
3354
+ dirty = self._handle(stmt.dirty)
3355
+ return CDirtyStatement(dirty, codegen=self)
3322
3356
 
3323
3357
  #
3324
3358
  # AIL expression handlers
@@ -3519,7 +3553,11 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3519
3553
 
3520
3554
  return CTypeCast(None, dst_type.with_arch(self.project.arch), child, tags=expr.tags, codegen=self)
3521
3555
 
3522
- def _handle_Expr_Dirty(self, expr, **kwargs):
3556
+ def _handle_Expr_VEXCCallExpression(self, expr: Expr.VEXCCallExpression, **kwargs):
3557
+ operands = [self._handle(arg) for arg in expr.operands]
3558
+ return CVEXCCallExpression(expr.callee, operands, tags=expr.tags, codegen=self)
3559
+
3560
+ def _handle_Expr_Dirty(self, expr: Expr.DirtyExpression, **kwargs):
3523
3561
  return CDirtyExpression(expr, codegen=self)
3524
3562
 
3525
3563
  def _handle_Expr_ITE(self, expr: Expr.ITE, **kwargs):
@@ -566,6 +566,9 @@ class PhoenixStructurer(StructurerBase):
566
566
 
567
567
  if next_node is node:
568
568
  break
569
+ if next_node is head:
570
+ # we don't want a loop with region head not as the first node of the body!
571
+ return False, None
569
572
  if next_node is not node and next_node in seen_nodes:
570
573
  return False, None
571
574
 
@@ -0,0 +1,137 @@
1
+ # pylint:disable=missing-class-docstring
2
+ from __future__ import annotations
3
+ import logging
4
+ from typing import TYPE_CHECKING
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+
8
+ from sortedcontainers import SortedDict
9
+
10
+ from angr.analyses import Analysis, AnalysesHub
11
+ from angr.utils.bits import ffs
12
+
13
+ if TYPE_CHECKING:
14
+ from angr.knowledge_plugins import Function
15
+
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+
20
+ class OverlappingFunctionsAnalysis(Analysis):
21
+ """
22
+ Identify functions with interleaved blocks.
23
+ """
24
+
25
+ overlapping_functions: dict[int, list[int]]
26
+
27
+ def __init__(self):
28
+ self.overlapping_functions = defaultdict(list)
29
+ addr_to_func_max_addr = SortedDict()
30
+
31
+ for func in self.project.kb.functions.values():
32
+ if func.is_alignment:
33
+ continue
34
+ func_max_addr = max((block.addr + block.size) for block in func.blocks)
35
+ addr_to_func_max_addr[func.addr] = (func, func_max_addr)
36
+
37
+ for idx, (addr, (func, max_addr)) in enumerate(addr_to_func_max_addr.items()):
38
+ for other_addr in addr_to_func_max_addr.islice(idx + 1):
39
+ if other_addr >= max_addr:
40
+ break
41
+
42
+ self.overlapping_functions[addr].append(other_addr)
43
+
44
+
45
+ class FunctionAlignmentAnalysis(Analysis):
46
+ """
47
+ Determine typical function alignment
48
+ """
49
+
50
+ alignment: int | None
51
+
52
+ def __init__(self):
53
+ self.alignment = None
54
+
55
+ if len(self.project.kb.functions) == 0:
56
+ if self.project.kb.cfgs.get_most_accurate() is None:
57
+ log.warning("Please run CFGFast analysis first, to identify functions")
58
+ return
59
+
60
+ alignment_bins = defaultdict(int)
61
+ count = 0
62
+ for func in self.project.kb.functions.values():
63
+ if not (func.is_alignment or func.is_plt or func.is_simprocedure):
64
+ alignment_bins[ffs(func.addr)] += 1
65
+ count += 1
66
+
67
+ # FIXME: Higher alignment values will be naturally aligned
68
+
69
+ typical_alignment = max(alignment_bins, key=lambda k: alignment_bins[k])
70
+ if count > 10 and alignment_bins[typical_alignment] >= count / 4: # XXX: cutoff
71
+ self.alignment = 1 << max(typical_alignment, 0)
72
+ log.debug("Function alignment appears to be %d bytes", self.alignment)
73
+
74
+
75
+ @dataclass
76
+ class AtypicallyAlignedFunction:
77
+ function: Function
78
+ expected_alignment: int
79
+
80
+
81
+ @dataclass
82
+ class PatchedOutFunctionality:
83
+ patched_function: Function
84
+ patched_out_function: Function
85
+
86
+
87
+ class PatchFinderAnalysis(Analysis):
88
+ """
89
+ Looks for binary patches using some basic heuristics:
90
+ - Looking for interleaved functions
91
+ - Looking for unaligned functions
92
+ """
93
+
94
+ # FIXME: Possible additional heuristics:
95
+ # - Jumps out to end of function, then back
96
+ # - Looking for patch jumps, e.g. push <addr>; ret
97
+ # - Looking for instruction partials broken by a patch (nodecode)
98
+ # - Unusual stack manipulation
99
+
100
+ atypical_alignments: list[Function]
101
+ possibly_patched_out: list[PatchedOutFunctionality]
102
+
103
+ def __init__(self):
104
+ self.atypical_alignments = []
105
+ self.possibly_patched_out = []
106
+
107
+ if len(self.project.kb.functions) == 0:
108
+ if self.project.kb.cfgs.get_most_accurate() is None:
109
+ log.warning("Please run CFGFast analysis first, to identify functions")
110
+ return
111
+
112
+ # In CFGFast with scanning enabled, a function may be created from unreachable blocks within another function.
113
+ # Search for interleaved/overlapping functions to identify possible patches.
114
+ overlapping_functions = self.project.analyses.OverlappingFunctions().overlapping_functions
115
+ for addr, overlapping_func_addrs in overlapping_functions.items():
116
+ func = self.project.kb.functions[addr]
117
+
118
+ # Are the overlapping functions reachable?
119
+ for overlapping_addr in overlapping_func_addrs:
120
+ overlapping_func = self.project.kb.functions[overlapping_addr]
121
+ if self.project.kb.callgraph.in_degree(overlapping_addr) == 0:
122
+ self.possibly_patched_out.append(PatchedOutFunctionality(func, overlapping_func))
123
+ # FIXME: What does the patch do?
124
+
125
+ # Look for unaligned functions
126
+ expected_alignment = self.project.analyses.FunctionAlignment().alignment
127
+ if expected_alignment is not None and expected_alignment > self.project.arch.instruction_alignment:
128
+ for func in self.project.kb.functions.values():
129
+ if not (func.is_alignment or func.is_plt or func.is_simprocedure) and func.addr & (
130
+ expected_alignment - 1
131
+ ):
132
+ self.atypical_alignments.append(AtypicallyAlignedFunction(func, expected_alignment))
133
+
134
+
135
+ AnalysesHub.register_default("OverlappingFunctions", OverlappingFunctionsAnalysis)
136
+ AnalysesHub.register_default("FunctionAlignment", FunctionAlignmentAnalysis)
137
+ AnalysesHub.register_default("PatchFinder", PatchFinderAnalysis)
@@ -0,0 +1,282 @@
1
+ # pylint:disable=missing-class-docstring
2
+ from __future__ import annotations
3
+ from enum import Enum, auto
4
+ from dataclasses import dataclass
5
+ from weakref import ref
6
+ from collections import defaultdict
7
+
8
+ from networkx import DiGraph
9
+ from networkx.algorithms.shortest_paths import single_target_shortest_path_length
10
+
11
+ from angr.sim_state import SimState
12
+ from angr.engines.successors import SimSuccessors
13
+ from angr.knowledge_plugins.cfg import CFGModel, CFGNode
14
+ from .analysis import Analysis, AnalysesHub
15
+
16
+
17
+ class Unreachable(Exception):
18
+ pass
19
+
20
+
21
+ @dataclass(eq=False)
22
+ class SimStateMarker:
23
+ addr: int
24
+ parent: SimStateMarker | None = None
25
+ banned: bool = False
26
+ misses: int = 0
27
+
28
+ def __repr__(self):
29
+ inner_repr = "None" if self.parent is None else "..."
30
+ return f"SimStateMarker(addr={self.addr:#x}, parent={inner_repr}, banned={self.banned}, misses={self.misses})"
31
+
32
+
33
+ class SuccessorsKind(Enum):
34
+ SAT = auto()
35
+ UNSAT = auto()
36
+ MISSING = auto()
37
+
38
+
39
+ @dataclass
40
+ class TestPathReport:
41
+ path_markers: dict[int, SimStateMarker]
42
+ termination: SuccessorsKind
43
+
44
+
45
+ def nilref():
46
+ return None
47
+
48
+
49
+ class Pathfinder(Analysis):
50
+ def __init__(self, start_state: SimState, goal_addr: int, cfg: CFGModel, cache_size=10000):
51
+ self.start_state = start_state
52
+ self.goal_addr = goal_addr
53
+ self.goal_state: SimState | None = None
54
+ self.cfg = cfg
55
+ self.cache_size = cache_size
56
+
57
+ # HACK HACK HACK HACK TODO FIXME FISH PLEASE GET RID OF THIS
58
+ extra_edges = []
59
+ for node in self.cfg.graph.nodes:
60
+ if node.is_syscall:
61
+ for pred in self.cfg.graph.pred[node]:
62
+ for succ, data in self.cfg.graph.succ[pred].items():
63
+ if data["jumpkind"] == "Ijk_FakeRet":
64
+ extra_edges.append((node, succ))
65
+ for node, succ in extra_edges:
66
+ self.cfg.graph.add_edge(node, succ, jumpkind="Ijk_Ret")
67
+
68
+ goal_node = self.cfg.get_any_node(goal_addr)
69
+ if goal_node is None:
70
+ raise ValueError(f"Node {goal_addr:#x} is not in graph")
71
+
72
+ self.start_marker = SimStateMarker(start_state.addr)
73
+ self.transition_cache: DiGraph[SimStateMarker] = DiGraph()
74
+ self.transition_cache.add_node(self.start_marker, state=ref(start_state))
75
+ self.base_heuristic: dict[int, int] = {
76
+ node.addr: dist for node, dist in single_target_shortest_path_length(cfg.graph, goal_node)
77
+ }
78
+ self.state_cache = {}
79
+ self.unsat_markers = set()
80
+ self.extra_weight = defaultdict(int)
81
+
82
+ self._search_frontier_marker = self.start_marker
83
+ self._search_path: list[tuple[int, str]] = [(self.start_marker.addr, "Ijk_Boring")]
84
+ self._search_stack = []
85
+ self._search_backtrack_to = {self.start_marker}
86
+ self._search_address_backtrack_points = {self.start_marker.addr: self.start_marker}
87
+
88
+ def cache_state(self, state: SimState):
89
+ self.state_cache[state] = self.state_cache.pop(state, None)
90
+ if len(self.state_cache) > self.cache_size:
91
+ self.state_cache.pop(next(iter(self.state_cache)))
92
+
93
+ def marker_to_state(self, marker: SimStateMarker) -> SimState | None:
94
+ return self.transition_cache.nodes[marker]["state"]()
95
+
96
+ def analyze(self) -> bool:
97
+ while True:
98
+ search_path = self.find_best_hypothesis_path()
99
+ result = self.test_path(search_path)
100
+ if result.termination == SuccessorsKind.SAT:
101
+ self.goal_state = self.marker_to_state(result.path_markers[len(search_path) - 1])
102
+ return True
103
+ marker = result.path_markers[max(result.path_markers)]
104
+ marker.banned = True
105
+ self._search_backtrack_to.add(marker)
106
+ if result.termination == SuccessorsKind.UNSAT:
107
+ self.unsat_markers.add(marker)
108
+
109
+ def _search_backtrack(self):
110
+ if self._search_address_backtrack_points[self._search_frontier_marker.addr] is self._search_frontier_marker:
111
+ self._search_address_backtrack_points.pop(self._search_frontier_marker.addr)
112
+
113
+ self._search_frontier_marker = self._search_frontier_marker.parent
114
+ if self._search_frontier_marker is None:
115
+ raise Unreachable
116
+
117
+ addr, jumpkind = self._search_path.pop()
118
+ if jumpkind == "Ijk_Ret":
119
+ self._search_stack.append(addr)
120
+ elif jumpkind == "Ijk_Call" or jumpkind.startswith("Ijk_Sys"):
121
+ self._search_stack.pop()
122
+
123
+ def find_best_hypothesis_path(self) -> tuple[int, ...]:
124
+ assert self._search_backtrack_to, "Uhh every iteration should set at least one backtrack point"
125
+ if self.start_marker in self._search_backtrack_to:
126
+ self._search_frontier_marker = self.start_marker
127
+ self._search_path: list[tuple[int, str]] = [(self.start_marker.addr, "Ijk_Boring")]
128
+ self._search_stack = []
129
+ self._search_backtrack_to = set()
130
+ else:
131
+ while self._search_backtrack_to:
132
+ self._search_backtrack_to.discard(self._search_frontier_marker)
133
+ try:
134
+ self._search_backtrack()
135
+ except Unreachable as e:
136
+ raise RuntimeError("oops") from e
137
+
138
+ while self._search_path[-1][0] != self.goal_addr:
139
+ banned = {
140
+ marker.addr for marker in self.transition_cache.succ[self._search_frontier_marker] if marker.banned
141
+ }
142
+ current_node = self.cfg.get_any_node(self._search_path[-1][0])
143
+ options = [
144
+ (node, data["jumpkind"], self.base_heuristic[node.addr] + self.extra_weight[node.addr])
145
+ for node, data in self.cfg.graph.succ[current_node].items()
146
+ if data["jumpkind"] != "Ijk_FakeRet"
147
+ and node.addr not in banned
148
+ and node.addr in self.base_heuristic
149
+ and (data["jumpkind"] != "Ijk_Ret" or node.addr == self._search_stack[-1])
150
+ ]
151
+ if not options:
152
+ # backtrack
153
+ self._search_frontier_marker.banned = True
154
+ self._search_backtrack()
155
+ continue
156
+
157
+ best_node, best_jumpkind, best_weight = min(
158
+ options,
159
+ default=(None, None),
160
+ key=lambda xyz: xyz[2],
161
+ )
162
+
163
+ assert isinstance(best_jumpkind, str)
164
+ assert isinstance(best_node, CFGNode)
165
+ self.extra_weight[best_node.addr] += 1
166
+ self._search_path.append((best_node.addr, best_jumpkind))
167
+
168
+ if best_jumpkind == "Ijk_Call" or best_jumpkind.startswith("Ijk_Sys"):
169
+ self._search_stack.append(
170
+ next(
171
+ iter(
172
+ node.addr
173
+ for node, data in self.cfg.graph.succ[current_node].items()
174
+ if data["jumpkind"] == "Ijk_FakeRet"
175
+ ),
176
+ None,
177
+ )
178
+ )
179
+ elif best_jumpkind == "Ijk_Ret":
180
+ self._search_stack.pop()
181
+
182
+ frontier_marker_nullable = next(
183
+ (
184
+ marker
185
+ for marker in self.transition_cache.succ[self._search_frontier_marker]
186
+ if marker.addr == best_node.addr
187
+ ),
188
+ None,
189
+ )
190
+ if frontier_marker_nullable is None:
191
+ new_marker = SimStateMarker(best_node.addr, self._search_frontier_marker)
192
+ self.transition_cache.add_node(new_marker, state=nilref)
193
+ self.transition_cache.add_edge(self._search_frontier_marker, new_marker)
194
+ self._search_frontier_marker = new_marker
195
+ else:
196
+ self._search_frontier_marker = frontier_marker_nullable
197
+
198
+ if self._search_frontier_marker.addr not in self._search_address_backtrack_points:
199
+ self._search_address_backtrack_points[self._search_frontier_marker.addr] = self._search_frontier_marker
200
+
201
+ # TODO does this go above the above stanza?
202
+ if sum(weight == best_weight for _, _, weight in options) != 1:
203
+ self._search_backtrack_to.add(self._search_address_backtrack_points[self._search_frontier_marker.addr])
204
+
205
+ return tuple(addr for addr, _ in self._search_path)
206
+
207
+ def diagnose_unsat(self, state: SimState):
208
+ pass
209
+
210
+ def test_path(self, bbl_addr_trace: tuple[int, ...]) -> TestPathReport:
211
+ assert bbl_addr_trace[0] == self.start_marker.addr, "Paths must begin with the start state"
212
+
213
+ known_markers = [self.start_marker]
214
+ for addr in bbl_addr_trace[1:]:
215
+ for succ in self.transition_cache.succ[known_markers[-1]]:
216
+ if succ.addr == addr:
217
+ break
218
+ else:
219
+ break
220
+ known_markers.append(succ)
221
+
222
+ marker = None
223
+ for ri, marker_ in enumerate(reversed(known_markers)):
224
+ i = len(known_markers) - 1 - ri
225
+ state: SimState = self.transition_cache.nodes[marker_]["state"]()
226
+ marker = marker_
227
+ if state is not None:
228
+ break
229
+ else:
230
+ assert False, "The first item in known_markers should always have a resolvable weakref"
231
+
232
+ while i != len(bbl_addr_trace) - 1:
233
+ assert state.addr == bbl_addr_trace[i]
234
+
235
+ marker.misses += 1
236
+ successors = state.step(strict_block_end=True)
237
+ succ, kind = find_successor(successors, bbl_addr_trace[i + 1])
238
+
239
+ # cache state
240
+ if i + 1 < len(known_markers):
241
+ succ_marker = known_markers[i + 1]
242
+ else:
243
+ succ_marker = SimStateMarker(bbl_addr_trace[i + 1], parent=marker)
244
+ self.transition_cache.add_node(succ_marker)
245
+ self.transition_cache.add_edge(marker, succ_marker)
246
+ self.transition_cache.nodes[succ_marker]["state"] = ref(succ) if succ is not None else nilref
247
+ if succ is not None:
248
+ self.cache_state(succ)
249
+
250
+ if kind == SuccessorsKind.SAT:
251
+ assert succ is not None
252
+ state = succ
253
+ marker = succ_marker
254
+ i += 1
255
+ continue
256
+ if kind == SuccessorsKind.UNSAT:
257
+ assert succ is not None
258
+ return TestPathReport(
259
+ path_markers={i: marker, i + 1: succ_marker},
260
+ termination=SuccessorsKind.UNSAT,
261
+ )
262
+ return TestPathReport(path_markers={i: marker, i + 1: succ_marker}, termination=SuccessorsKind.MISSING)
263
+
264
+ return TestPathReport(path_markers={i: marker}, termination=SuccessorsKind.SAT)
265
+
266
+
267
+ def find_successor(successors: SimSuccessors, target_addr: int) -> tuple[SimState | None, SuccessorsKind]:
268
+ for succ in successors.flat_successors:
269
+ if succ.addr == target_addr:
270
+ return succ, SuccessorsKind.SAT
271
+ for succ in successors.unsat_successors:
272
+ if succ.addr == target_addr:
273
+ return succ, SuccessorsKind.UNSAT
274
+ for succ in successors.unconstrained_successors:
275
+ succ2 = succ.copy()
276
+ succ2.add_constraints(succ2._ip == target_addr)
277
+ if succ2.satisfiable():
278
+ return succ2, SuccessorsKind.SAT
279
+ return None, SuccessorsKind.MISSING
280
+
281
+
282
+ AnalysesHub.register_default("Pathfinder", Pathfinder)
@@ -740,9 +740,16 @@ class SimEnginePropagatorAIL(
740
740
  return PropValue.from_value_and_details(v, expr.size, expr, self._codeloc())
741
741
 
742
742
  def _ail_handle_DirtyExpression(self, expr: Expr.DirtyExpression) -> PropValue | None: # pylint:disable=no-self-use
743
- if isinstance(expr.dirty_expr, Expr.VEXCCallExpression):
744
- for operand in expr.dirty_expr.operands:
745
- _ = self._expr(operand)
743
+ for operand in expr.operands:
744
+ _ = self._expr(operand)
745
+
746
+ return PropValue.from_value_and_details(self.state.top(expr.bits), expr.size, expr, self._codeloc())
747
+
748
+ def _ail_handle_VEXCCallExpression(
749
+ self, expr: Expr.VEXCCallExpression
750
+ ) -> PropValue | None: # pylint:disable=no-self-use
751
+ for operand in expr.operands:
752
+ _ = self._expr(operand)
746
753
 
747
754
  return PropValue.from_value_and_details(self.state.top(expr.bits), expr.size, expr, self._codeloc())
748
755