angr 9.2.123__py3-none-manylinux2014_x86_64.whl → 9.2.125__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (103) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +9 -1
  3. angr/analyses/cfg/indirect_jump_resolvers/mips_elf_fast.py +11 -8
  4. angr/analyses/cfg/indirect_jump_resolvers/mips_elf_got.py +2 -2
  5. angr/analyses/codecave.py +77 -0
  6. angr/analyses/decompiler/ail_simplifier.py +16 -19
  7. angr/analyses/decompiler/callsite_maker.py +8 -7
  8. angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +24 -2
  9. angr/analyses/decompiler/clinic.py +58 -2
  10. angr/analyses/decompiler/condition_processor.py +10 -3
  11. angr/analyses/decompiler/decompilation_cache.py +2 -0
  12. angr/analyses/decompiler/decompiler.py +54 -8
  13. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +10 -2
  14. angr/analyses/decompiler/dephication/rewriting_engine.py +64 -1
  15. angr/analyses/decompiler/expression_narrower.py +5 -1
  16. angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
  17. angr/analyses/decompiler/optimization_passes/div_simplifier.py +4 -1
  18. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +13 -0
  19. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +23 -4
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +3 -1
  21. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +8 -5
  22. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +10 -5
  23. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +18 -7
  24. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +6 -0
  25. angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +2 -0
  27. angr/analyses/decompiler/peephole_optimizations/const_mull_a_shift.py +2 -0
  28. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
  29. angr/analyses/decompiler/peephole_optimizations/remove_cascading_conversions.py +8 -2
  30. angr/analyses/decompiler/region_identifier.py +36 -0
  31. angr/analyses/decompiler/region_simplifiers/loop.py +2 -8
  32. angr/analyses/decompiler/region_simplifiers/switch_cluster_simplifier.py +9 -3
  33. angr/analyses/decompiler/ssailification/rewriting.py +5 -2
  34. angr/analyses/decompiler/ssailification/rewriting_engine.py +151 -25
  35. angr/analyses/decompiler/ssailification/rewriting_state.py +1 -0
  36. angr/analyses/decompiler/ssailification/ssailification.py +17 -9
  37. angr/analyses/decompiler/ssailification/traversal.py +3 -1
  38. angr/analyses/decompiler/ssailification/traversal_engine.py +35 -8
  39. angr/analyses/decompiler/ssailification/traversal_state.py +1 -0
  40. angr/analyses/decompiler/structured_codegen/c.py +42 -4
  41. angr/analyses/decompiler/structuring/phoenix.py +3 -0
  42. angr/analyses/patchfinder.py +137 -0
  43. angr/analyses/pathfinder.py +282 -0
  44. angr/analyses/propagator/engine_ail.py +10 -3
  45. angr/analyses/reaching_definitions/engine_ail.py +10 -15
  46. angr/analyses/s_propagator.py +16 -9
  47. angr/analyses/s_reaching_definitions/s_rda_view.py +127 -63
  48. angr/analyses/smc.py +159 -0
  49. angr/analyses/variable_recovery/engine_ail.py +14 -0
  50. angr/analyses/variable_recovery/engine_base.py +11 -0
  51. angr/angrdb/models.py +1 -2
  52. angr/engines/light/engine.py +12 -0
  53. angr/engines/vex/heavy/heavy.py +2 -0
  54. angr/exploration_techniques/spiller_db.py +1 -2
  55. angr/knowledge_plugins/__init__.py +2 -0
  56. angr/knowledge_plugins/decompilation.py +45 -0
  57. angr/knowledge_plugins/functions/function.py +4 -0
  58. angr/knowledge_plugins/functions/function_manager.py +18 -9
  59. angr/knowledge_plugins/functions/function_parser.py +1 -1
  60. angr/knowledge_plugins/functions/soot_function.py +1 -0
  61. angr/knowledge_plugins/key_definitions/atoms.py +8 -0
  62. angr/misc/ux.py +2 -2
  63. angr/procedures/definitions/parse_win32json.py +2 -1
  64. angr/project.py +17 -1
  65. angr/state_plugins/history.py +6 -4
  66. angr/storage/memory_mixins/actions_mixin.py +7 -7
  67. angr/storage/memory_mixins/address_concretization_mixin.py +5 -5
  68. angr/storage/memory_mixins/bvv_conversion_mixin.py +1 -1
  69. angr/storage/memory_mixins/clouseau_mixin.py +3 -3
  70. angr/storage/memory_mixins/conditional_store_mixin.py +3 -3
  71. angr/storage/memory_mixins/default_filler_mixin.py +3 -3
  72. angr/storage/memory_mixins/memory_mixin.py +45 -34
  73. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +15 -14
  74. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +27 -16
  75. angr/storage/memory_mixins/paged_memory/pages/cooperation.py +18 -9
  76. angr/storage/memory_mixins/paged_memory/pages/ispo_mixin.py +5 -5
  77. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +89 -55
  78. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +16 -25
  79. angr/storage/memory_mixins/paged_memory/pages/permissions_mixin.py +11 -9
  80. angr/storage/memory_mixins/paged_memory/pages/ultra_page.py +23 -7
  81. angr/storage/memory_mixins/paged_memory/privileged_mixin.py +1 -1
  82. angr/storage/memory_mixins/regioned_memory/region_meta_mixin.py +9 -7
  83. angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +9 -9
  84. angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +1 -0
  85. angr/storage/memory_mixins/simple_interface_mixin.py +2 -2
  86. angr/storage/memory_mixins/simplification_mixin.py +2 -2
  87. angr/storage/memory_mixins/size_resolution_mixin.py +1 -1
  88. angr/storage/memory_mixins/slotted_memory.py +3 -3
  89. angr/storage/memory_mixins/smart_find_mixin.py +1 -0
  90. angr/storage/memory_mixins/underconstrained_mixin.py +5 -5
  91. angr/storage/memory_mixins/unwrapper_mixin.py +4 -4
  92. angr/storage/memory_object.py +4 -3
  93. angr/utils/bits.py +4 -0
  94. angr/utils/constants.py +1 -1
  95. angr/utils/graph.py +15 -0
  96. angr/utils/tagged_interval_map.py +112 -0
  97. angr/vaults.py +2 -2
  98. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/METADATA +6 -6
  99. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/RECORD +103 -96
  100. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/WHEEL +1 -1
  101. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/LICENSE +0 -0
  102. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/entry_points.txt +0 -0
  103. {angr-9.2.123.dist-info → angr-9.2.125.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,6 @@ import logging
7
7
  import archinfo
8
8
  import claripy
9
9
  import ailment
10
- import pyvex
11
10
  from claripy import FSORT_DOUBLE, FSORT_FLOAT
12
11
 
13
12
  from angr.engines.light import SimEngineLight, SimEngineLightAILMixin, SpOffset
@@ -364,17 +363,7 @@ class SimEngineRDAIL(
364
363
  # self.state.add_use(Register(self.project.arch.sp_offset, self.project.arch.bits // 8))
365
364
 
366
365
  def _ail_handle_DirtyStatement(self, stmt: ailment.Stmt.DirtyStatement):
367
- # TODO: The logic below is subject to change when ailment.Stmt.DirtyStatement is changed
368
-
369
- if isinstance(stmt.dirty_stmt, pyvex.stmt.Dirty):
370
- # TODO: We need dirty helpers for a more complete understanding of clobbered registers
371
- tmp = stmt.dirty_stmt.tmp
372
- if tmp in (-1, 0xFFFFFFFF):
373
- return
374
- size = 32 # FIXME: We don't know the size.
375
- self.state.kill_and_add_definition(Tmp(tmp, size), MultiValues(self.state.top(size)))
376
- else:
377
- l.warning("Unexpected type of dirty statement %s.", type(stmt.dirty_stmt))
366
+ self._expr(stmt.dirty)
378
367
 
379
368
  #
380
369
  # AIL expression handlers
@@ -1125,12 +1114,18 @@ class SimEngineRDAIL(
1125
1114
  stack_addr = self.state.stack_address(expr.offset)
1126
1115
  return MultiValues(stack_addr)
1127
1116
 
1117
+ def _ail_handle_VEXCCallExpression(self, expr: ailment.Expr.VEXCCallExpression) -> MultiValues:
1118
+ for operand in expr.operands:
1119
+ self._expr(operand)
1120
+
1121
+ top = self.state.top(expr.bits)
1122
+ return MultiValues(top)
1123
+
1128
1124
  def _ail_handle_DirtyExpression(
1129
1125
  self, expr: ailment.Expr.DirtyExpression
1130
1126
  ) -> MultiValues: # pylint:disable=no-self-use
1131
- if isinstance(expr.dirty_expr, ailment.Expr.VEXCCallExpression):
1132
- for operand in expr.dirty_expr.operands:
1133
- self._expr(operand)
1127
+ for operand in expr.operands:
1128
+ self._expr(operand)
1134
1129
 
1135
1130
  top = self.state.top(expr.bits)
1136
1131
  return MultiValues(top)
@@ -238,15 +238,22 @@ class SPropagatorAnalysis(Analysis):
238
238
 
239
239
  if len(tmp_uses) <= 2:
240
240
  tmp_used, tmp_use_stmtidx = next(iter(tmp_uses))
241
- if is_const_vvar_load_dirty_assignment(stmt) and not any(
242
- isinstance(stmt_, Store)
243
- for stmt_ in block.statements[tmp_def_stmtidx + 1 : tmp_use_stmtidx]
244
- ):
245
- # we can propagate this load because there is no store between its def and use
246
- replacements[
247
- CodeLocation(block_loc.block_addr, tmp_use_stmtidx, block_idx=block_loc.block_idx)
248
- ][tmp_used] = stmt.src
249
- continue
241
+ if is_const_vvar_load_dirty_assignment(stmt):
242
+ same_inst = (
243
+ block.statements[tmp_def_stmtidx].ins_addr == block.statements[tmp_use_stmtidx].ins_addr
244
+ )
245
+ has_store = any(
246
+ isinstance(stmt_, Store)
247
+ for stmt_ in block.statements[tmp_def_stmtidx + 1 : tmp_use_stmtidx]
248
+ )
249
+ if same_inst or not has_store:
250
+ # we can propagate this load because either we do not consider memory aliasing problem
251
+ # within the same instruction (blocks must be originally lifted with
252
+ # CROSS_INSN_OPT=False), or there is no store between its def and use.
253
+ replacements[
254
+ CodeLocation(block_loc.block_addr, tmp_use_stmtidx, block_idx=block_loc.block_idx)
255
+ ][tmp_used] = stmt.src
256
+ continue
250
257
 
251
258
  self.model.replacements = replacements
252
259
 
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import logging
4
4
  from collections import defaultdict
5
5
 
6
- from ailment.statement import Assignment, Call, Label
6
+ from ailment.statement import Statement, Assignment, Call, Label
7
7
  from ailment.expression import VirtualVariable, Expression
8
8
 
9
9
  from angr.utils.ail import is_phi_assignment
@@ -17,27 +17,141 @@ from .s_rda_model import SRDAModel
17
17
  log = logging.getLogger(__name__)
18
18
 
19
19
 
20
- class SRDAView:
20
+ class RegVVarPredicate:
21
21
  """
22
- A view of SRDA model that provides various functionalities for querying the model.
22
+ Implements a predicate that is used in get_reg_vvar_by_stmt_idx and get_reg_vvar_by_insn.
23
23
  """
24
24
 
25
- def __init__(self, model: SRDAModel):
26
- self.model = model
25
+ def __init__(self, reg_offset: int, vvars: set[VirtualVariable], arch):
26
+ self.reg_offset = reg_offset
27
+ self.vvars = vvars
28
+ self.arch = arch
27
29
 
28
30
  def _get_call_clobbered_regs(self, stmt: Call) -> set[int]:
29
31
  cc = stmt.calling_convention
30
32
  if cc is None:
31
33
  # get the default calling convention
32
- cc = default_cc(self.model.arch.name) # TODO: platform and language
34
+ cc = default_cc(self.arch.name) # TODO: platform and language
33
35
  if cc is not None:
34
36
  reg_list = cc.CALLER_SAVED_REGS
35
37
  if isinstance(cc.RETURN_VAL, SimRegArg):
36
38
  reg_list.append(cc.RETURN_VAL.reg_name)
37
- return {self.model.arch.registers[reg_name][0] for reg_name in reg_list}
39
+ return {self.arch.registers[reg_name][0] for reg_name in reg_list}
38
40
  log.warning("Cannot determine registers that are clobbered by call statement %r.", stmt)
39
41
  return set()
40
42
 
43
+ def predicate(self, stmt: Statement) -> bool:
44
+ if (
45
+ isinstance(stmt, Assignment)
46
+ and isinstance(stmt.dst, VirtualVariable)
47
+ and stmt.dst.was_reg
48
+ and stmt.dst.reg_offset == self.reg_offset
49
+ ):
50
+ self.vvars.add(stmt.dst)
51
+ return True
52
+ if isinstance(stmt, Call):
53
+ if (
54
+ isinstance(stmt.ret_expr, VirtualVariable)
55
+ and stmt.ret_expr.was_reg
56
+ and stmt.ret_expr.reg_offset == self.reg_offset
57
+ ):
58
+ self.vvars.add(stmt.ret_expr)
59
+ return True
60
+ # is it clobbered maybe?
61
+ clobbered_regs = self._get_call_clobbered_regs(stmt)
62
+ if self.reg_offset in clobbered_regs:
63
+ return True
64
+ return False
65
+
66
+
67
+ class StackVVarPredicate:
68
+ """
69
+ Implements a predicate that is used in get_stack_vvar_by_stmt_idx and get_stack_vvar_by_insn.
70
+ """
71
+
72
+ def __init__(self, stack_offset: int, size: int, vvars: set[VirtualVariable]):
73
+ self.stack_offset = stack_offset
74
+ self.size = size
75
+ self.vvars = vvars
76
+
77
+ def predicate(self, stmt: Statement) -> bool:
78
+ if (
79
+ isinstance(stmt, Assignment)
80
+ and isinstance(stmt.dst, VirtualVariable)
81
+ and stmt.dst.was_stack
82
+ and stmt.dst.stack_offset == self.stack_offset
83
+ and stmt.dst.size == self.size
84
+ ):
85
+ self.vvars.add(stmt.dst)
86
+ return True
87
+ return False
88
+
89
+
90
+ class SRDAView:
91
+ """
92
+ A view of SRDA model that provides various functionalities for querying the model.
93
+ """
94
+
95
+ def __init__(self, model: SRDAModel):
96
+ self.model = model
97
+
98
+ def _get_vvar_by_stmt(
99
+ self, block_addr: int, block_idx: int | None, stmt_idx: int, op_type: ObservationPointType, predicate
100
+ ):
101
+ # find the starting block
102
+ for block in self.model.func_graph:
103
+ if block.addr == block_addr and block.idx == block_idx:
104
+ the_block = block
105
+ break
106
+ else:
107
+ return
108
+
109
+ traversed = set()
110
+ queue = [(the_block, stmt_idx if op_type == ObservationPointType.OP_BEFORE else stmt_idx + 1)]
111
+ while queue:
112
+ block, start_stmt_idx = queue.pop(0)
113
+ traversed.add(block)
114
+
115
+ stmts = block.statements[:start_stmt_idx] if start_stmt_idx is not None else block.statements
116
+
117
+ for stmt in reversed(stmts):
118
+ should_break = predicate(stmt)
119
+ if should_break:
120
+ break
121
+ else:
122
+ # not found
123
+ for pred in self.model.func_graph.predecessors(block):
124
+ if pred not in traversed:
125
+ traversed.add(pred)
126
+ queue.append((pred, None))
127
+
128
+ def get_reg_vvar_by_stmt(
129
+ self, reg_offset: int, block_addr: int, block_idx: int | None, stmt_idx: int, op_type: ObservationPointType
130
+ ) -> VirtualVariable | None:
131
+ reg_offset = get_reg_offset_base(reg_offset, self.model.arch)
132
+ vvars = set()
133
+ predicater = RegVVarPredicate(reg_offset, vvars, self.model.arch)
134
+ self._get_vvar_by_stmt(block_addr, block_idx, stmt_idx, op_type, predicater.predicate)
135
+
136
+ assert len(vvars) <= 1
137
+ return next(iter(vvars), None)
138
+
139
+ def get_stack_vvar_by_stmt( # pylint: disable=too-many-positional-arguments
140
+ self,
141
+ stack_offset: int,
142
+ size: int,
143
+ block_addr: int,
144
+ block_idx: int | None,
145
+ stmt_idx: int,
146
+ op_type: ObservationPointType,
147
+ ) -> VirtualVariable | None:
148
+ vvars = set()
149
+ predicater = StackVVarPredicate(stack_offset, size, vvars)
150
+ self._get_vvar_by_stmt(block_addr, block_idx, stmt_idx, op_type, predicater.predicate)
151
+
152
+ assert len(vvars) <= 1
153
+ return next(iter(vvars), None)
154
+
41
155
  def _get_vvar_by_insn(self, addr: int, op_type: ObservationPointType, predicate, block_idx: int | None = None):
42
156
  # find the starting block
43
157
  for block in self.model.func_graph:
@@ -47,6 +161,7 @@ class SRDAView:
47
161
  else:
48
162
  return
49
163
 
164
+ # determine the starting stmt_idx
50
165
  starting_stmt_idx = len(the_block.statements) if op_type == ObservationPointType.OP_AFTER else 0
51
166
  for stmt_idx, stmt in enumerate(the_block.statements):
52
167
  # skip all labels and phi assignments
@@ -65,55 +180,16 @@ class SRDAView:
65
180
  starting_stmt_idx = stmt_idx
66
181
  break
67
182
 
68
- traversed = set()
69
- queue = [(the_block, starting_stmt_idx)]
70
- while queue:
71
- block, start_stmt_idx = queue.pop(0)
72
- traversed.add(block)
73
-
74
- stmts = block.statements[:start_stmt_idx] if start_stmt_idx is not None else block.statements
75
-
76
- for stmt in reversed(stmts):
77
- should_break = predicate(stmt)
78
- if should_break:
79
- break
80
- else:
81
- # not found
82
- for pred in self.model.func_graph.predecessors(block):
83
- if pred not in traversed:
84
- traversed.add(pred)
85
- queue.append((pred, None))
183
+ self._get_vvar_by_stmt(the_block.addr, the_block.idx, starting_stmt_idx, op_type, predicate)
86
184
 
87
185
  def get_reg_vvar_by_insn(
88
186
  self, reg_offset: int, addr: int, op_type: ObservationPointType, block_idx: int | None = None
89
187
  ) -> VirtualVariable | None:
90
188
  reg_offset = get_reg_offset_base(reg_offset, self.model.arch)
91
189
  vvars = set()
190
+ predicater = RegVVarPredicate(reg_offset, vvars, self.model.arch)
92
191
 
93
- def _predicate(stmt) -> bool:
94
- if (
95
- isinstance(stmt, Assignment)
96
- and isinstance(stmt.dst, VirtualVariable)
97
- and stmt.dst.was_reg
98
- and stmt.dst.reg_offset == reg_offset
99
- ):
100
- vvars.add(stmt.dst)
101
- return True
102
- if isinstance(stmt, Call):
103
- if (
104
- isinstance(stmt.ret_expr, VirtualVariable)
105
- and stmt.ret_expr.was_reg
106
- and stmt.ret_expr.reg_offset == reg_offset
107
- ):
108
- vvars.add(stmt.ret_expr)
109
- return True
110
- # is it clobbered maybe?
111
- clobbered_regs = self._get_call_clobbered_regs(stmt)
112
- if reg_offset in clobbered_regs:
113
- return True
114
- return False
115
-
116
- self._get_vvar_by_insn(addr, op_type, _predicate, block_idx=block_idx)
192
+ self._get_vvar_by_insn(addr, op_type, predicater.predicate, block_idx=block_idx)
117
193
 
118
194
  assert len(vvars) <= 1
119
195
  return next(iter(vvars), None)
@@ -122,20 +198,8 @@ class SRDAView:
122
198
  self, stack_offset: int, size: int, addr: int, op_type: ObservationPointType, block_idx: int | None = None
123
199
  ) -> VirtualVariable | None:
124
200
  vvars = set()
125
-
126
- def _predicate(stmt) -> bool:
127
- if (
128
- isinstance(stmt, Assignment)
129
- and isinstance(stmt.dst, VirtualVariable)
130
- and stmt.dst.was_stack
131
- and stmt.dst.stack_offset == stack_offset
132
- and stmt.dst.size == size
133
- ):
134
- vvars.add(stmt.dst)
135
- return True
136
- return False
137
-
138
- self._get_vvar_by_insn(addr, op_type, _predicate, block_idx=block_idx)
201
+ predicater = StackVVarPredicate(stack_offset, size, vvars)
202
+ self._get_vvar_by_insn(addr, op_type, predicater.predicate, block_idx=block_idx)
139
203
 
140
204
  assert len(vvars) <= 1
141
205
  return next(iter(vvars), None)
angr/analyses/smc.py ADDED
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+ import logging
3
+ import random
4
+
5
+ from enum import auto, IntFlag
6
+ from collections.abc import Generator
7
+
8
+ import angr
9
+ from angr.analyses import Analysis, AnalysesHub
10
+ from angr.knowledge_plugins import Function
11
+ from angr.sim_state import SimState
12
+
13
+ from angr.utils.tagged_interval_map import TaggedIntervalMap
14
+
15
+
16
+ log = logging.getLogger(__name__)
17
+ log.setLevel(logging.INFO)
18
+
19
+
20
+ class TraceActions(IntFlag):
21
+ """
22
+ Describe memory access actions.
23
+ """
24
+
25
+ WRITE = auto()
26
+ EXECUTE = auto()
27
+
28
+
29
+ class TraceClassifier:
30
+ """
31
+ Classify traces.
32
+ """
33
+
34
+ def __init__(self, state: SimState | None = None):
35
+ self.map = TaggedIntervalMap()
36
+ if state:
37
+ self.instrument(state)
38
+
39
+ def act_mem_write(self, state) -> None:
40
+ """
41
+ SimInspect callback for memory writes.
42
+ """
43
+ addr = state.solver.eval(state.inspect.mem_write_address)
44
+ length = state.inspect.mem_write_length
45
+ if not isinstance(length, int):
46
+ length = state.solver.eval(length)
47
+ self.map.add(addr, length, TraceActions.WRITE)
48
+
49
+ def act_instruction(self, state) -> None:
50
+ """
51
+ SimInspect callback for instruction execution.
52
+ """
53
+ addr = state.inspect.instruction
54
+ if addr is None:
55
+ log.warning("Symbolic addr")
56
+ return
57
+
58
+ # FIXME: Ensure block size is correct
59
+ self.map.add(addr, state.block().size, TraceActions.EXECUTE)
60
+
61
+ def instrument(self, state) -> None:
62
+ """
63
+ Instrument `state` for tracing.
64
+ """
65
+ state.inspect.b("mem_write", when=angr.BP_BEFORE, action=self.act_mem_write)
66
+ state.inspect.b("instruction", when=angr.BP_BEFORE, action=self.act_instruction)
67
+
68
+ def get_smc_address_and_lengths(self) -> Generator[tuple[int, int]]:
69
+ """
70
+ Evaluate the trace to find which areas of memory were both written to and executed.
71
+ """
72
+ smc_flags = TraceActions.WRITE | TraceActions.EXECUTE
73
+ for addr, size, flags in self.map.irange():
74
+ if (flags & smc_flags) == smc_flags:
75
+ yield (addr, size)
76
+
77
+ def determine_smc(self) -> bool:
78
+ """
79
+ Evaluate the trace to find areas of memory that were both written to and executed.
80
+ """
81
+ return any(self.get_smc_address_and_lengths())
82
+
83
+ def pp(self):
84
+ for a, b, c in self.map.irange():
85
+ print(f"{a:8x} {b} {c}")
86
+
87
+
88
+ class SelfModifyingCodeAnalysis(Analysis):
89
+ """
90
+ Determine if some piece of code is self-modifying.
91
+
92
+ This determination is made by simply executing. If an address is executed
93
+ that is also written to, the code is determined to be self-modifying. The
94
+ determination is stored in the `result` property. The `regions` property
95
+ contains a list of (addr, length) regions that were both written to and
96
+ executed.
97
+ """
98
+
99
+ result: bool
100
+ regions: list[tuple[int, int]]
101
+
102
+ def __init__(self, subject: None | int | str | Function, max_bytes: int = 0, state: SimState | None = None):
103
+ """
104
+ :param subject: Subject of analysis
105
+ :param max_bytes: Maximum number of bytes from subject address. 0 for no limit (default).
106
+ :param state: State to begin executing from from.
107
+ """
108
+ assert self.project.selfmodifying_code
109
+
110
+ if subject is None:
111
+ subject = self.project.entry
112
+ if isinstance(subject, str):
113
+ try:
114
+ addr = self.project.kb.labels.lookup(subject)
115
+ except KeyError:
116
+ addr = self.project.kb.functions[subject].addr
117
+ elif isinstance(subject, Function):
118
+ addr = subject.addr
119
+ elif isinstance(subject, int):
120
+ addr = subject
121
+ else:
122
+ raise ValueError("Not a supported subject")
123
+
124
+ if state is None:
125
+ init_state = self.project.factory.call_state(addr)
126
+ else:
127
+ init_state = state.copy()
128
+ init_state.regs.pc = addr
129
+
130
+ init_state.options -= angr.sim_options.simplification
131
+
132
+ self._trace_classifier = TraceClassifier(init_state)
133
+ simgr = self.project.factory.simgr(init_state)
134
+
135
+ kwargs = {}
136
+ if max_bytes:
137
+ kwargs["filter_func"] = lambda s: (
138
+ "active" if s.solver.eval(addr <= s.regs.pc) and s.solver.eval(s.regs.pc < addr + max_bytes) else "oob"
139
+ )
140
+
141
+ # FIXME: Early out on SMC detect
142
+ # FIXME: Configurable step threshold
143
+ # FIXME: Loop analysis
144
+
145
+ for n in range(100):
146
+ self._update_progress(n)
147
+ simgr.step(n=3)
148
+ random.shuffle(simgr.active)
149
+ simgr.split(from_stash="active", to_stash=simgr.DROP, limit=10)
150
+
151
+ # Classify any out of bound entrypoints
152
+ for state_ in simgr.stashes["oob"]:
153
+ self._trace_classifier.act_instruction(state_)
154
+
155
+ self.regions = list(self._trace_classifier.get_smc_address_and_lengths())
156
+ self.result = len(self.regions) > 0
157
+
158
+
159
+ AnalysesHub.register_default("SMC", SelfModifyingCodeAnalysis)
@@ -223,6 +223,20 @@ class SimEngineVRAIL(
223
223
  for ret_expr in stmt.ret_exprs:
224
224
  self._expr(ret_expr)
225
225
 
226
+ def _ail_handle_DirtyExpression(self, expr: ailment.Expr.DirtyExpression) -> RichR:
227
+ for op in expr.operands:
228
+ self._expr(op)
229
+ if expr.guard:
230
+ self._expr(expr.guard)
231
+ if expr.maddr:
232
+ self._expr(expr.maddr)
233
+ return RichR(self.state.top(expr.bits))
234
+
235
+ def _ail_handle_VEXCCallExpression(self, expr: ailment.Expr.VEXCCallExpression) -> RichR:
236
+ for op in expr.operands:
237
+ self._expr(op)
238
+ return RichR(self.state.top(expr.bits))
239
+
226
240
  # Expression handlers
227
241
 
228
242
  def _expr(self, expr: ailment.Expr.Expression):
@@ -435,6 +435,14 @@ class SimEngineVRBase(SimEngineLight):
435
435
  region=self.func_addr,
436
436
  )
437
437
  self.variable_manager[self.func_addr].add_variable("register", vvar.oident, variable)
438
+ elif vvar.was_tmp:
439
+ # FIXME: we treat all tmp vvars as registers
440
+ variable = SimRegisterVariable(
441
+ 4096 + vvar.tmp_idx,
442
+ vvar.size,
443
+ ident=self.variable_manager[self.func_addr].next_variable_ident("register"),
444
+ region=self.func_addr,
445
+ )
438
446
  else:
439
447
  raise NotImplementedError
440
448
  else:
@@ -1071,6 +1079,9 @@ class SimEngineVRBase(SimEngineLight):
1071
1079
  self.variable_manager[self.func_addr].add_variable("stack", vvar.stack_offset, variable)
1072
1080
  elif vvar.category == ailment.Expr.VirtualVariableCategory.PARAMETER:
1073
1081
  raise KeyError(f"Missing virtual variable for parameter {vvar}")
1082
+ elif vvar.category == ailment.Expr.VirtualVariableCategory.TMP:
1083
+ # we don't track variables for tmps
1084
+ pass
1074
1085
  else:
1075
1086
  raise NotImplementedError
1076
1087
 
angr/angrdb/models.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
  from sqlalchemy import Column, Integer, String, Boolean, BLOB, ForeignKey
3
- from sqlalchemy.orm import relationship
4
- from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import declarative_base, relationship
5
4
 
6
5
  Base = declarative_base()
7
6
 
@@ -957,6 +957,9 @@ class SimEngineLightAILMixin(SimEngineLightMixin):
957
957
  def _ail_handle_Return(self, stmt):
958
958
  pass
959
959
 
960
+ def _ail_handle_DirtyStatement(self, stmt):
961
+ self._expr(stmt.dirty)
962
+
960
963
  #
961
964
  # Expression handlers
962
965
  #
@@ -1009,6 +1012,15 @@ class SimEngineLightAILMixin(SimEngineLightMixin):
1009
1012
 
1010
1013
  return expr
1011
1014
 
1015
+ def _ail_handle_DirtyExpression(self, expr: ailment.Expr.DirtyExpression):
1016
+ for operand in expr.operands:
1017
+ self._expr(operand)
1018
+ if expr.guard is not None:
1019
+ self._expr(expr.guard)
1020
+ if expr.maddr is not None:
1021
+ self._expr(expr.maddr)
1022
+ return expr
1023
+
1012
1024
  def _ail_handle_UnaryOp(self, expr):
1013
1025
  handler_name = f"_handle_{expr.op}"
1014
1026
  try:
@@ -90,6 +90,7 @@ class HeavyVEXMixin(SuccessorsMixin, ClaripyDataMixin, SimStateStorageMixin, VEX
90
90
  num_inst=None,
91
91
  extra_stop_points=None,
92
92
  opt_level=None,
93
+ strict_block_end=None,
93
94
  **kwargs,
94
95
  ):
95
96
  if not pyvex.lifting.lifters[self.state.arch.name] or type(successors.addr) is not int:
@@ -144,6 +145,7 @@ class HeavyVEXMixin(SuccessorsMixin, ClaripyDataMixin, SimStateStorageMixin, VEX
144
145
  num_inst=num_inst,
145
146
  extra_stop_points=extra_stop_points,
146
147
  opt_level=opt_level,
148
+ strict_block_end=strict_block_end,
147
149
  )
148
150
 
149
151
  if (
@@ -5,8 +5,7 @@ import datetime
5
5
  try:
6
6
  import sqlalchemy
7
7
  from sqlalchemy import Column, Integer, String, Boolean, DateTime, create_engine
8
- from sqlalchemy.orm import sessionmaker
9
- from sqlalchemy.ext.declarative import declarative_base
8
+ from sqlalchemy.orm import declarative_base, sessionmaker
10
9
  from sqlalchemy.exc import OperationalError
11
10
 
12
11
  Base = declarative_base()
@@ -17,6 +17,7 @@ from .structured_code import StructuredCodeManager
17
17
  from .types import TypesStore
18
18
  from .callsite_prototypes import CallsitePrototypes
19
19
  from .custom_strings import CustomStrings
20
+ from .decompilation import DecompilationManager
20
21
 
21
22
 
22
23
  __all__ = (
@@ -38,4 +39,5 @@ __all__ = (
38
39
  "TypesStore",
39
40
  "CallsitePrototypes",
40
41
  "CustomStrings",
42
+ "DecompilationManager",
41
43
  )
@@ -0,0 +1,45 @@
1
+ # pylint:disable=import-outside-toplevel
2
+ from __future__ import annotations
3
+
4
+ from typing import Any, TYPE_CHECKING
5
+
6
+ from .plugin import KnowledgeBasePlugin
7
+
8
+ if TYPE_CHECKING:
9
+ from angr.analyses.decompiler.decompilation_cache import DecompilationCache
10
+
11
+
12
+ class DecompilationManager(KnowledgeBasePlugin):
13
+ """A knowledge base plugin to store decompilation results."""
14
+
15
+ def __init__(self, kb):
16
+ super().__init__(kb=kb)
17
+ self.cached: dict[Any, DecompilationCache] = {}
18
+
19
+ def _normalize_key(self, item: int | str):
20
+ if type(item) is str:
21
+ item = (self._kb.labels.lookup(item[0]), *item[1:])
22
+ return item
23
+
24
+ def __getitem__(self, item) -> DecompilationCache:
25
+ return self.cached[self._normalize_key(item)]
26
+
27
+ def __setitem__(self, key, value: DecompilationCache):
28
+ self.cached[self._normalize_key(key)] = value
29
+
30
+ def __contains__(self, key):
31
+ return self._normalize_key(key) in self.cached
32
+
33
+ def __delitem__(self, key):
34
+ del self.cached[self._normalize_key(key)]
35
+
36
+ def discard(self, key):
37
+ normalized_key = self._normalize_key(key)
38
+ if normalized_key in self.cached:
39
+ del self.cached[normalized_key]
40
+
41
+ def copy(self):
42
+ raise NotImplementedError
43
+
44
+
45
+ KnowledgeBasePlugin.register_default("decompilations", DecompilationManager)