angr 9.2.134__py3-none-macosx_11_0_arm64.whl → 9.2.136__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (174) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +5 -8
  3. angr/analyses/analysis.py +4 -0
  4. angr/analyses/backward_slice.py +1 -2
  5. angr/analyses/binary_optimizer.py +3 -4
  6. angr/analyses/bindiff.py +4 -6
  7. angr/analyses/boyscout.py +1 -3
  8. angr/analyses/callee_cleanup_finder.py +4 -4
  9. angr/analyses/calling_convention/__init__.py +6 -0
  10. angr/analyses/{calling_convention.py → calling_convention/calling_convention.py} +32 -64
  11. angr/analyses/calling_convention/fact_collector.py +502 -0
  12. angr/analyses/calling_convention/utils.py +57 -0
  13. angr/analyses/cdg.py +1 -2
  14. angr/analyses/cfg/cfb.py +1 -3
  15. angr/analyses/cfg/cfg.py +2 -2
  16. angr/analyses/cfg/cfg_base.py +37 -35
  17. angr/analyses/cfg/cfg_emulated.py +1 -1
  18. angr/analyses/cfg/cfg_fast.py +62 -15
  19. angr/analyses/cfg/cfg_fast_soot.py +1 -1
  20. angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
  21. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +46 -10
  22. angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +5 -1
  23. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +50 -14
  24. angr/analyses/cfg/indirect_jump_resolvers/memload_resolver.py +81 -0
  25. angr/analyses/cfg/indirect_jump_resolvers/propagator_utils.py +24 -5
  26. angr/analyses/cfg/indirect_jump_resolvers/x86_pe_iat.py +2 -5
  27. angr/analyses/complete_calling_conventions.py +32 -3
  28. angr/analyses/congruency_check.py +2 -3
  29. angr/analyses/data_dep/data_dependency_analysis.py +2 -2
  30. angr/analyses/ddg.py +1 -4
  31. angr/analyses/decompiler/ail_simplifier.py +3 -4
  32. angr/analyses/decompiler/clinic.py +42 -7
  33. angr/analyses/decompiler/optimization_passes/duplication_reverter/ail_merge_graph.py +2 -2
  34. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +2 -2
  35. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +1 -1
  36. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  37. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +0 -6
  38. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +2 -7
  39. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +0 -6
  40. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +0 -6
  41. angr/analyses/decompiler/structuring/phoenix.py +1 -1
  42. angr/analyses/disassembly.py +5 -5
  43. angr/analyses/fcp/__init__.py +4 -0
  44. angr/analyses/fcp/fcp.py +429 -0
  45. angr/analyses/identifier/identify.py +1 -3
  46. angr/analyses/loopfinder.py +4 -3
  47. angr/analyses/patchfinder.py +1 -1
  48. angr/analyses/propagator/engine_base.py +4 -3
  49. angr/analyses/propagator/propagator.py +14 -53
  50. angr/analyses/reassembler.py +1 -2
  51. angr/analyses/s_propagator.py +1 -3
  52. angr/analyses/soot_class_hierarchy.py +1 -2
  53. angr/analyses/stack_pointer_tracker.py +18 -2
  54. angr/analyses/static_hooker.py +1 -2
  55. angr/analyses/typehoon/simple_solver.py +2 -2
  56. angr/analyses/variable_recovery/engine_vex.py +5 -0
  57. angr/analyses/variable_recovery/variable_recovery_fast.py +1 -2
  58. angr/analyses/veritesting.py +4 -7
  59. angr/analyses/vfg.py +1 -1
  60. angr/analyses/vsa_ddg.py +1 -2
  61. angr/block.py +3 -2
  62. angr/callable.py +1 -3
  63. angr/calling_conventions.py +15 -7
  64. angr/codenode.py +5 -1
  65. angr/concretization_strategies/__init__.py +1 -83
  66. angr/concretization_strategies/any.py +2 -1
  67. angr/concretization_strategies/any_named.py +1 -1
  68. angr/concretization_strategies/base.py +81 -0
  69. angr/concretization_strategies/controlled_data.py +2 -1
  70. angr/concretization_strategies/eval.py +2 -1
  71. angr/concretization_strategies/logging.py +3 -1
  72. angr/concretization_strategies/max.py +2 -1
  73. angr/concretization_strategies/nonzero.py +2 -1
  74. angr/concretization_strategies/nonzero_range.py +2 -1
  75. angr/concretization_strategies/norepeats.py +2 -1
  76. angr/concretization_strategies/norepeats_range.py +2 -1
  77. angr/concretization_strategies/range.py +2 -1
  78. angr/concretization_strategies/signed_add.py +2 -1
  79. angr/concretization_strategies/single.py +2 -1
  80. angr/concretization_strategies/solutions.py +2 -1
  81. angr/concretization_strategies/unlimited_range.py +2 -1
  82. angr/engines/__init__.py +8 -5
  83. angr/engines/engine.py +3 -5
  84. angr/engines/failure.py +4 -5
  85. angr/engines/procedure.py +5 -7
  86. angr/engines/soot/expressions/__init__.py +22 -23
  87. angr/engines/soot/expressions/base.py +4 -4
  88. angr/engines/soot/expressions/invoke.py +1 -2
  89. angr/engines/soot/statements/__init__.py +9 -10
  90. angr/engines/soot/values/__init__.py +9 -10
  91. angr/engines/soot/values/arrayref.py +3 -3
  92. angr/engines/soot/values/instancefieldref.py +3 -2
  93. angr/engines/successors.py +7 -6
  94. angr/engines/syscall.py +4 -6
  95. angr/engines/unicorn.py +3 -2
  96. angr/engines/vex/claripy/ccall.py +8 -10
  97. angr/engines/vex/claripy/datalayer.py +4 -5
  98. angr/exploration_techniques/__init__.py +0 -2
  99. angr/exploration_techniques/spiller.py +1 -3
  100. angr/exploration_techniques/stochastic.py +2 -3
  101. angr/factory.py +3 -9
  102. angr/knowledge_plugins/cfg/cfg_model.py +20 -17
  103. angr/knowledge_plugins/functions/function.py +74 -77
  104. angr/knowledge_plugins/functions/function_manager.py +14 -7
  105. angr/knowledge_plugins/functions/function_parser.py +1 -1
  106. angr/knowledge_plugins/functions/soot_function.py +16 -16
  107. angr/knowledge_plugins/propagations/propagation_model.py +4 -5
  108. angr/knowledge_plugins/propagations/states.py +0 -511
  109. angr/lib/angr_native.dylib +0 -0
  110. angr/procedures/libc/memcpy.py +4 -4
  111. angr/procedures/procedure_dict.py +3 -2
  112. angr/protos/__init__.py +2 -5
  113. angr/protos/cfg_pb2.py +21 -18
  114. angr/protos/function_pb2.py +17 -14
  115. angr/protos/primitives_pb2.py +44 -39
  116. angr/protos/variables_pb2.py +36 -31
  117. angr/protos/xrefs_pb2.py +15 -12
  118. angr/sim_procedure.py +15 -16
  119. angr/sim_variable.py +13 -1
  120. angr/simos/__init__.py +2 -0
  121. angr/simos/javavm.py +4 -6
  122. angr/simos/xbox.py +32 -0
  123. angr/state_plugins/__init__.py +0 -2
  124. angr/state_plugins/callstack.py +4 -4
  125. angr/state_plugins/cgc.py +3 -2
  126. angr/state_plugins/gdb.py +6 -5
  127. angr/state_plugins/globals.py +1 -2
  128. angr/state_plugins/heap/heap_brk.py +1 -2
  129. angr/state_plugins/history.py +10 -12
  130. angr/state_plugins/inspect.py +3 -5
  131. angr/state_plugins/libc.py +2 -2
  132. angr/state_plugins/log.py +8 -10
  133. angr/state_plugins/loop_data.py +1 -2
  134. angr/state_plugins/posix.py +7 -7
  135. angr/state_plugins/preconstrainer.py +2 -3
  136. angr/state_plugins/scratch.py +5 -8
  137. angr/state_plugins/sim_action.py +3 -3
  138. angr/state_plugins/solver.py +8 -3
  139. angr/state_plugins/symbolizer.py +5 -4
  140. angr/state_plugins/uc_manager.py +3 -3
  141. angr/state_plugins/unicorn_engine.py +5 -1
  142. angr/state_plugins/view.py +3 -5
  143. angr/storage/file.py +3 -5
  144. angr/storage/memory_mixins/address_concretization_mixin.py +2 -2
  145. angr/storage/memory_mixins/bvv_conversion_mixin.py +3 -3
  146. angr/storage/memory_mixins/clouseau_mixin.py +1 -3
  147. angr/storage/memory_mixins/name_resolution_mixin.py +1 -3
  148. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +13 -15
  149. angr/storage/memory_mixins/paged_memory/pages/__init__.py +1 -22
  150. angr/storage/memory_mixins/paged_memory/pages/base.py +31 -0
  151. angr/storage/memory_mixins/paged_memory/pages/list_page.py +1 -1
  152. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +1 -1
  153. angr/storage/memory_mixins/paged_memory/pages/ultra_page.py +2 -4
  154. angr/storage/memory_mixins/paged_memory/privileged_mixin.py +3 -4
  155. angr/storage/memory_mixins/regioned_memory/abstract_merger_mixin.py +4 -2
  156. angr/storage/memory_mixins/smart_find_mixin.py +1 -1
  157. angr/storage/memory_mixins/underconstrained_mixin.py +1 -1
  158. angr/storage/memory_mixins/unwrapper_mixin.py +1 -3
  159. angr/utils/bits.py +13 -0
  160. angr/utils/enums_conv.py +28 -12
  161. angr/utils/segment_list.py +25 -22
  162. angr/utils/timing.py +18 -1
  163. angr/vaults.py +5 -6
  164. {angr-9.2.134.dist-info → angr-9.2.136.dist-info}/METADATA +6 -6
  165. {angr-9.2.134.dist-info → angr-9.2.136.dist-info}/RECORD +169 -165
  166. {angr-9.2.134.dist-info → angr-9.2.136.dist-info}/WHEEL +1 -1
  167. angr/analyses/propagator/outdated_definition_walker.py +0 -159
  168. angr/analyses/propagator/tmpvar_finder.py +0 -18
  169. angr/engines/concrete.py +0 -180
  170. angr/exploration_techniques/symbion.py +0 -80
  171. angr/state_plugins/concrete.py +0 -295
  172. {angr-9.2.134.dist-info → angr-9.2.136.dist-info}/LICENSE +0 -0
  173. {angr-9.2.134.dist-info → angr-9.2.136.dist-info}/entry_points.txt +0 -0
  174. {angr-9.2.134.dist-info → angr-9.2.136.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,502 @@
1
+ from __future__ import annotations
2
+ from typing import Any
3
+
4
+ import pyvex
5
+ import claripy
6
+
7
+ from angr.utils.bits import s2u, u2s
8
+ from angr.block import Block
9
+ from angr.analyses.analysis import Analysis
10
+ from angr.analyses import AnalysesHub
11
+ from angr.knowledge_plugins.functions import Function
12
+ from angr.codenode import BlockNode, HookNode
13
+ from angr.engines.light import SimEngineNostmtVEX, SimEngineLight, SpOffset, RegisterOffset
14
+ from angr.calling_conventions import SimRegArg, SimStackArg, default_cc
15
+ from angr.sim_type import SimTypeBottom
16
+ from .utils import is_sane_register_variable
17
+
18
+
19
+ class FactCollectorState:
20
+ """
21
+ The abstract state for FactCollector.
22
+ """
23
+
24
+ __slots__ = (
25
+ "bp_value",
26
+ "callee_stored_regs",
27
+ "reg_reads",
28
+ "reg_writes",
29
+ "simple_stack",
30
+ "sp_value",
31
+ "stack_reads",
32
+ "stack_writes",
33
+ "tmps",
34
+ )
35
+
36
+ def __init__(self):
37
+ self.tmps = {}
38
+ self.simple_stack = {}
39
+
40
+ self.callee_stored_regs: dict[int, int] = {} # reg offset -> stack offset
41
+ self.reg_reads = {}
42
+ self.reg_writes: set[int] = set()
43
+ self.stack_reads = {}
44
+ self.stack_writes: set[int] = set()
45
+ self.sp_value = 0
46
+ self.bp_value = 0
47
+
48
+ def register_read(self, offset: int, size_in_bytes: int):
49
+ if offset in self.reg_writes:
50
+ return
51
+ if offset not in self.reg_reads:
52
+ self.reg_reads[offset] = size_in_bytes
53
+ else:
54
+ self.reg_reads[offset] = max(self.reg_reads[offset], size_in_bytes)
55
+
56
+ def register_written(self, offset: int, size_in_bytes: int):
57
+ for o in range(size_in_bytes):
58
+ self.reg_writes.add(offset + o)
59
+
60
+ def stack_read(self, offset: int, size_in_bytes: int):
61
+ if offset in self.stack_writes:
62
+ return
63
+ if offset not in self.stack_reads:
64
+ self.stack_reads[offset] = size_in_bytes
65
+ else:
66
+ self.stack_reads[offset] = max(self.stack_reads[offset], size_in_bytes)
67
+
68
+ def stack_written(self, offset: int, size_int_bytes: int):
69
+ for o in range(size_int_bytes):
70
+ self.stack_writes.add(offset + o)
71
+
72
+ def copy(self, with_tmps: bool = False) -> FactCollectorState:
73
+ new_state = FactCollectorState()
74
+ new_state.reg_reads = self.reg_reads.copy()
75
+ new_state.stack_reads = self.stack_reads.copy()
76
+ new_state.stack_writes = self.stack_writes.copy()
77
+ new_state.reg_writes = self.reg_writes.copy()
78
+ new_state.callee_stored_regs = self.callee_stored_regs.copy()
79
+ new_state.sp_value = self.sp_value
80
+ new_state.bp_value = self.bp_value
81
+ new_state.simple_stack = self.simple_stack.copy()
82
+ if with_tmps:
83
+ new_state.tmps = self.tmps.copy()
84
+ return new_state
85
+
86
+
87
+ binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactCollectorState].binop_handler
88
+
89
+
90
+ class SimEngineFactCollectorVEX(
91
+ SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
92
+ SimEngineLight[type[FactCollectorState], SpOffset | RegisterOffset | int, Block, None],
93
+ ):
94
+ """
95
+ THe engine for FactCollector.
96
+ """
97
+
98
+ def __init__(self, project, bp_as_gpr: bool):
99
+ self.bp_as_gpr = bp_as_gpr
100
+ super().__init__(project)
101
+
102
+ def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
103
+ if self.block.vex.jumpkind == "Ijk_Call":
104
+ self.state.register_written(self.arch.ret_offset, self.arch.bytes)
105
+
106
+ def _top(self, bits: int):
107
+ return None
108
+
109
+ def _is_top(self, expr: Any) -> bool:
110
+ raise NotImplementedError
111
+
112
+ def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
113
+ return None
114
+
115
+ def _handle_stmt_Put(self, stmt):
116
+ v = self._expr(stmt.data)
117
+ if stmt.offset == self.arch.sp_offset and isinstance(v, SpOffset):
118
+ self.state.sp_value = v.offset
119
+ elif stmt.offset == self.arch.bp_offset and isinstance(v, SpOffset):
120
+ self.state.bp_value = v.offset
121
+ else:
122
+ self.state.register_written(stmt.offset, stmt.data.result_size(self.tyenv) // self.arch.byte_width)
123
+
124
+ def _handle_stmt_Store(self, stmt: pyvex.IRStmt.Store):
125
+ addr = self._expr(stmt.addr)
126
+ if isinstance(addr, SpOffset):
127
+ self.state.stack_written(addr.offset, stmt.data.result_size(self.tyenv) // self.arch.byte_width)
128
+ data = self._expr(stmt.data)
129
+ if isinstance(data, RegisterOffset) and not isinstance(data, SpOffset):
130
+ # push reg; we record the stored register as well as the stack slot offset
131
+ self.state.callee_stored_regs[data.reg] = u2s(addr.offset, self.arch.bits)
132
+ if isinstance(data, SpOffset):
133
+ self.state.simple_stack[addr.offset] = data
134
+
135
+ def _handle_stmt_WrTmp(self, stmt: pyvex.IRStmt.WrTmp):
136
+ v = self._expr(stmt.data)
137
+ if v is not None:
138
+ self.state.tmps[stmt.tmp] = v
139
+
140
+ def _handle_expr_Const(self, expr: pyvex.IRExpr.Const):
141
+ return expr.con.value
142
+
143
+ def _handle_expr_GSPTR(self, expr):
144
+ return None
145
+
146
+ def _handle_expr_Get(self, expr) -> SpOffset | None:
147
+ if expr.offset == self.arch.sp_offset:
148
+ return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
149
+ if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
150
+ return SpOffset(self.arch.bits, self.state.bp_value, is_base=False)
151
+ bits = expr.result_size(self.tyenv)
152
+ self.state.register_read(expr.offset, bits // self.arch.byte_width)
153
+ return RegisterOffset(bits, expr.offset, 0)
154
+
155
+ def _handle_expr_GetI(self, expr):
156
+ return None
157
+
158
+ def _handle_expr_ITE(self, expr):
159
+ return None
160
+
161
+ def _handle_expr_Load(self, expr):
162
+ addr = self._expr(expr.addr)
163
+ if isinstance(addr, SpOffset):
164
+ self.state.stack_read(addr.offset, expr.result_size(self.tyenv) // self.arch.byte_width)
165
+ return self.state.simple_stack.get(addr.offset)
166
+ return None
167
+
168
+ def _handle_expr_RdTmp(self, expr):
169
+ return self.state.tmps.get(expr.tmp, None)
170
+
171
+ def _handle_expr_VECRET(self, expr):
172
+ return None
173
+
174
+ @binop_handler
175
+ def _handle_binop_Add(self, expr):
176
+ op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
177
+ if isinstance(op0, SpOffset) and isinstance(op1, int):
178
+ return SpOffset(op0.bits, s2u(op0.offset + op1, op0.bits), is_base=op0.is_base)
179
+ if isinstance(op1, SpOffset) and isinstance(op0, int):
180
+ return SpOffset(op1.bits, s2u(op1.offset + op0, op1.bits), is_base=op1.is_base)
181
+ return None
182
+
183
+ @binop_handler
184
+ def _handle_binop_Sub(self, expr):
185
+ op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
186
+ if isinstance(op0, SpOffset) and isinstance(op1, int):
187
+ return SpOffset(op0.bits, s2u(op0.offset - op1, op0.bits), is_base=op0.is_base)
188
+ if isinstance(op1, SpOffset) and isinstance(op0, int):
189
+ return SpOffset(op1.bits, s2u(op1.offset - op0, op1.bits), is_base=op1.is_base)
190
+ return None
191
+
192
+ @binop_handler
193
+ def _handle_binop_And(self, expr):
194
+ op0, op1 = self._expr(expr.args[0]), self._expr(expr.args[1])
195
+ if isinstance(op0, SpOffset):
196
+ return op0
197
+ if isinstance(op1, SpOffset):
198
+ return op1
199
+ return None
200
+
201
+
202
+ class FactCollector(Analysis):
203
+ """
204
+ An extremely fast analysis that extracts necessary facts of a function for CallingConventionAnalysis to make
205
+ decision on the calling convention and prototype of a function.
206
+ """
207
+
208
+ def __init__(self, func: Function, max_depth: int = 5):
209
+ self.function = func
210
+ self._max_depth = max_depth
211
+
212
+ self.input_args: list[SimRegArg | SimStackArg] | None = None
213
+ self.retval_size: int | None = None
214
+
215
+ self._analyze()
216
+
217
+ def _analyze(self):
218
+ # breadth-first search using function graph, collect registers and stack variables that are written to as well
219
+ # as read from, until max_depth is reached
220
+
221
+ end_states = self._analyze_startpoint()
222
+ self._analyze_endpoints_for_retval_size()
223
+ callee_restored_regs = self._analyze_endpoints_for_restored_regs()
224
+ self._determine_input_args(end_states, callee_restored_regs)
225
+
226
+ def _analyze_startpoint(self):
227
+ func_graph = self.function.transition_graph
228
+ startpoint = self.function.startpoint
229
+ bp_as_gpr = self.function.info.get("bp_as_gpr", False)
230
+ engine = SimEngineFactCollectorVEX(self.project, bp_as_gpr)
231
+ init_state = FactCollectorState()
232
+ if self.project.arch.call_pushes_ret:
233
+ init_state.sp_value = self.project.arch.bytes
234
+ init_state.bp_value = init_state.sp_value
235
+
236
+ traversed = set()
237
+ queue: list[tuple[int, FactCollectorState, BlockNode | HookNode | Function, BlockNode | HookNode | None]] = [
238
+ (0, init_state, startpoint, None)
239
+ ]
240
+ end_states: list[FactCollectorState] = []
241
+ while queue:
242
+ depth, state, node, retnode = queue.pop(0)
243
+ traversed.add(node)
244
+
245
+ if depth > self._max_depth:
246
+ end_states.append(state)
247
+ break
248
+
249
+ if isinstance(node, BlockNode) and node.size == 0:
250
+ continue
251
+ if isinstance(node, HookNode):
252
+ # attempt to convert it into a function
253
+ if self.kb.functions.contains_addr(node.addr):
254
+ node = self.kb.functions.get_by_addr(node.addr)
255
+ else:
256
+ continue
257
+ if isinstance(node, Function):
258
+ if node.calling_convention is not None and node.prototype is not None:
259
+ # consume args and overwrite the return register
260
+ self._handle_function(state, node)
261
+ if node.returning is False or retnode is None:
262
+ # the function call does not return
263
+ end_states.append(state)
264
+ else:
265
+ # enqueue the retnode, but we don't increment the depth
266
+ new_state = state.copy()
267
+ if self.project.arch.call_pushes_ret:
268
+ new_state.sp_value += self.project.arch.bytes
269
+ queue.append((depth, new_state, retnode, None))
270
+ continue
271
+
272
+ block = self.project.factory.block(node.addr, size=node.size)
273
+ engine.process(state, block=block)
274
+
275
+ successor_added = False
276
+ call_succ, ret_succ = None, None
277
+ for _, succ, data in func_graph.out_edges(node, data=True):
278
+ edge_type = data.get("type")
279
+ if succ not in traversed and depth + 1 <= self._max_depth:
280
+ if edge_type == "fake_return":
281
+ ret_succ = succ
282
+ elif edge_type == "transition":
283
+ successor_added = True
284
+ queue.append((depth + 1, state.copy(), succ, None))
285
+ elif edge_type == "call":
286
+ call_succ = succ
287
+ if call_succ is not None:
288
+ successor_added = True
289
+ queue.append((depth + 1, state.copy(), call_succ, ret_succ))
290
+
291
+ if not successor_added:
292
+ end_states.append(state)
293
+
294
+ return end_states
295
+
296
+ def _handle_function(self, state: FactCollectorState, func: Function) -> None:
297
+ try:
298
+ arg_locs = func.calling_convention.arg_locs(func.prototype)
299
+ except (TypeError, ValueError):
300
+ return
301
+
302
+ if None in arg_locs:
303
+ return
304
+
305
+ for arg_loc in arg_locs:
306
+ for loc in arg_loc.get_footprint():
307
+ if isinstance(loc, SimRegArg):
308
+ state.register_read(self.project.arch.registers[loc.reg_name][0] + loc.reg_offset, loc.size)
309
+ elif isinstance(loc, SimStackArg):
310
+ sp_value = state.sp_value
311
+ if sp_value is not None:
312
+ state.stack_read(sp_value + loc.stack_offset, loc.size)
313
+
314
+ # clobber caller-saved regs
315
+ for reg_name in func.calling_convention.CALLER_SAVED_REGS:
316
+ offset = self.project.arch.registers[reg_name][0]
317
+ state.register_written(offset, self.project.arch.registers[reg_name][1])
318
+
319
+ def _analyze_endpoints_for_retval_size(self):
320
+ """
321
+ Analyze all endpoints to determine the return value size.
322
+ """
323
+ func_graph = self.function.transition_graph
324
+ cc_cls = default_cc(
325
+ self.project.arch.name, platform=self.project.simos.name if self.project.simos is not None else None
326
+ )
327
+ cc = cc_cls(self.project.arch)
328
+ if isinstance(cc.RETURN_VAL, SimRegArg):
329
+ retreg_offset = cc.RETURN_VAL.check_offset(self.project.arch)
330
+ else:
331
+ return
332
+
333
+ retval_sizes = []
334
+ for endpoint in self.function.endpoints:
335
+ traversed = set()
336
+ queue: list[tuple[int, BlockNode | HookNode]] = [(0, endpoint)]
337
+ while queue:
338
+ depth, node = queue.pop(0)
339
+ traversed.add(node)
340
+
341
+ if depth > 3:
342
+ break
343
+
344
+ if isinstance(node, BlockNode) and node.size == 0:
345
+ continue
346
+ if isinstance(node, HookNode):
347
+ # attempt to convert it into a function
348
+ if self.kb.functions.contains_addr(node.addr):
349
+ node = self.kb.functions.get_by_addr(node.addr)
350
+ else:
351
+ continue
352
+ if isinstance(node, Function):
353
+ if (
354
+ node.calling_convention is not None
355
+ and node.prototype is not None
356
+ and node.prototype.returnty is not None
357
+ and not isinstance(node.prototype.returnty, SimTypeBottom)
358
+ ):
359
+ # assume the function overwrites the return variable
360
+ retval_size = (
361
+ node.prototype.returnty.with_arch(self.project.arch).size // self.project.arch.byte_width
362
+ )
363
+ retval_sizes.append(retval_size)
364
+ continue
365
+
366
+ block = self.project.factory.block(node.addr, size=node.size)
367
+ # scan the block statements backwards to find writes to the return value register
368
+ retval_size = None
369
+ for stmt in reversed(block.vex.statements):
370
+ if isinstance(stmt, pyvex.IRStmt.Put):
371
+ size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
372
+ if stmt.offset == retreg_offset:
373
+ retval_size = max(size, 1)
374
+
375
+ if retval_size is not None:
376
+ retval_sizes.append(retval_size)
377
+ continue
378
+
379
+ for pred, _, data in func_graph.in_edges(node, data=True):
380
+ edge_type = data.get("type")
381
+ if pred not in traversed and depth + 1 <= self._max_depth:
382
+ if edge_type == "fake_return":
383
+ continue
384
+ if edge_type in {"transition", "call"}:
385
+ queue.append((depth + 1, pred))
386
+
387
+ self.retval_size = max(retval_sizes) if retval_sizes else None
388
+
389
+ def _analyze_endpoints_for_restored_regs(self):
390
+ """
391
+ Analyze all endpoints to determine the restored registers.
392
+ """
393
+ func_graph = self.function.transition_graph
394
+ callee_restored_regs = set()
395
+
396
+ for endpoint in self.function.endpoints:
397
+ traversed = set()
398
+ queue: list[tuple[int, BlockNode | HookNode]] = [(0, endpoint)]
399
+ while queue:
400
+ depth, node = queue.pop(0)
401
+ traversed.add(node)
402
+
403
+ if depth > 3:
404
+ break
405
+
406
+ if isinstance(node, BlockNode) and node.size == 0:
407
+ continue
408
+ if isinstance(node, (HookNode, Function)):
409
+ continue
410
+
411
+ block = self.project.factory.block(node.addr, size=node.size)
412
+ # scan the block statements backwards to find all statements that restore registers from the stack
413
+ tmps = {}
414
+ for stmt in block.vex.statements:
415
+ if isinstance(stmt, pyvex.IRStmt.WrTmp):
416
+ if isinstance(stmt.data, pyvex.IRExpr.Get) and stmt.data.offset in {
417
+ self.project.arch.bp_offset,
418
+ self.project.arch.sp_offset,
419
+ }:
420
+ tmps[stmt.tmp] = "sp"
421
+ elif (
422
+ isinstance(stmt.data, pyvex.IRExpr.Load)
423
+ and isinstance(stmt.data.addr, pyvex.IRExpr.RdTmp)
424
+ and tmps.get(stmt.data.addr.tmp) == "sp"
425
+ ):
426
+ tmps[stmt.tmp] = "stack_value"
427
+ elif isinstance(stmt.data, pyvex.IRExpr.Const):
428
+ tmps[stmt.tmp] = "const"
429
+ elif isinstance(stmt.data, pyvex.IRExpr.Binop) and ( # noqa:SIM102
430
+ stmt.data.op.startswith("Iop_Add") or stmt.data.op.startswith("Iop_Sub")
431
+ ):
432
+ if (
433
+ isinstance(stmt.data.args[0], pyvex.IRExpr.RdTmp)
434
+ and tmps.get(stmt.data.args[0].tmp) == "sp"
435
+ ) or (
436
+ isinstance(stmt.data.args[1], pyvex.IRExpr.RdTmp)
437
+ and tmps.get(stmt.data.args[1].tmp) == "sp"
438
+ ):
439
+ tmps[stmt.tmp] = "sp"
440
+ if isinstance(stmt, pyvex.IRStmt.Put):
441
+ size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
442
+ # is the data loaded from the stack?
443
+ if (
444
+ size == self.project.arch.bytes
445
+ and isinstance(stmt.data, pyvex.IRExpr.RdTmp)
446
+ and tmps.get(stmt.data.tmp) == "stack_value"
447
+ ):
448
+ callee_restored_regs.add(stmt.offset)
449
+
450
+ for pred, _, data in func_graph.in_edges(node, data=True):
451
+ edge_type = data.get("type")
452
+ if pred not in traversed and depth + 1 <= self._max_depth and edge_type == "transition":
453
+ queue.append((depth + 1, pred))
454
+
455
+ return callee_restored_regs
456
+
457
+ def _determine_input_args(self, end_states: list[FactCollectorState], callee_restored_regs: set[int]) -> None:
458
+ self.input_args = []
459
+ reg_offset_created = set()
460
+ callee_saved_regs = set()
461
+ callee_saved_reg_stack_offsets = set()
462
+
463
+ # determine callee-saved registers
464
+ for state in end_states:
465
+ for reg_offset, stack_offset in state.callee_stored_regs.items():
466
+ if reg_offset in callee_restored_regs:
467
+ callee_saved_regs.add(reg_offset)
468
+ callee_saved_reg_stack_offsets.add(stack_offset)
469
+
470
+ for state in end_states:
471
+ for offset, size in state.reg_reads.items():
472
+ if (
473
+ offset in reg_offset_created
474
+ or offset == self.project.arch.bp_offset
475
+ or not is_sane_register_variable(self.project.arch, offset, size)
476
+ or offset in callee_saved_regs
477
+ ):
478
+ continue
479
+ reg_offset_created.add(offset)
480
+ if self.project.arch.name in {"AMD64", "X86"} and size < self.project.arch.bytes:
481
+ # use complete registers on AMD64 and X86
482
+ reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
483
+ arg = SimRegArg(reg_name, self.project.arch.bytes)
484
+ else:
485
+ reg_name = self.project.arch.translate_register_name(offset, size=size)
486
+ arg = SimRegArg(reg_name, size)
487
+ self.input_args.append(arg)
488
+
489
+ stack_offset_created = set()
490
+ ret_addr_offset = 0 if not self.project.arch.call_pushes_ret else self.project.arch.bytes
491
+ for state in end_states:
492
+ for offset, size in state.stack_reads.items():
493
+ offset = u2s(offset, self.project.arch.bits)
494
+ if offset - ret_addr_offset > 0:
495
+ if offset in stack_offset_created or offset in callee_saved_reg_stack_offsets:
496
+ continue
497
+ stack_offset_created.add(offset)
498
+ arg = SimStackArg(offset - ret_addr_offset, size)
499
+ self.input_args.append(arg)
500
+
501
+
502
+ AnalysesHub.register_default("FunctionFactCollector", FactCollector)
@@ -0,0 +1,57 @@
1
+ from __future__ import annotations
2
+ import logging
3
+
4
+ import archinfo
5
+ from archinfo.arch_arm import is_arm_arch, ArchARMHF
6
+
7
+ from angr.calling_conventions import SimCC
8
+
9
+ l = logging.getLogger(__name__)
10
+
11
+
12
+ def is_sane_register_variable(arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | None = None) -> bool:
13
+ """
14
+ Filters all registers that are surly not members of function arguments.
15
+ This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which
16
+ is a VEX-specific register) :-(
17
+
18
+ :param reg_offset: The register offset.
19
+ :param reg_size: The register size.
20
+ :return: True if it is an acceptable function argument, False otherwise.
21
+ :rtype: bool
22
+ """
23
+
24
+ arch_name = arch.name
25
+ if ":" in arch_name:
26
+ # for pcode architectures, we only leave registers that are known to be used as input arguments
27
+ if def_cc is not None:
28
+ return arch.translate_register_name(reg_offset, size=reg_size) in def_cc.ARG_REGS
29
+ return True
30
+
31
+ # VEX
32
+ if arch_name == "AARCH64":
33
+ return 16 <= reg_offset < 80 # x0-x7
34
+
35
+ if arch_name == "AMD64":
36
+ return 24 <= reg_offset < 40 or 64 <= reg_offset < 104 # rcx, rdx # rsi, rdi, r8, r9, r10
37
+ # 224 <= reg_offset < 480) # xmm0-xmm7
38
+
39
+ if is_arm_arch(arch):
40
+ if isinstance(arch, ArchARMHF):
41
+ return 8 <= reg_offset < 24 or 128 <= reg_offset < 160 # r0 - 32 # s0 - s7, or d0 - d4
42
+ return 8 <= reg_offset < 24 # r0-r3
43
+
44
+ if arch_name == "MIPS32":
45
+ return 24 <= reg_offset < 40 # a0-a3
46
+
47
+ if arch_name == "MIPS64":
48
+ return 48 <= reg_offset < 80 or 112 <= reg_offset < 208 # a0-a3 or t4-t7
49
+
50
+ if arch_name == "PPC32":
51
+ return 28 <= reg_offset < 60 # r3-r10
52
+
53
+ if arch_name == "X86":
54
+ return 8 <= reg_offset < 24 or 160 <= reg_offset < 288 # eax, ebx, ecx, edx # xmm0-xmm7
55
+
56
+ l.critical("Unsupported architecture %s.", arch.name)
57
+ return True
angr/analyses/cdg.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
3
 
4
4
  import networkx
5
5
 
6
+ from angr.analyses import AnalysesHub
6
7
  from angr.utils.graph import compute_dominance_frontier, PostDominators, TemporaryNode
7
8
  from . import Analysis
8
9
 
@@ -185,6 +186,4 @@ class CDG(Analysis):
185
186
  _l.debug("%s is not in post dominator dict.", b2)
186
187
 
187
188
 
188
- from angr.analyses import AnalysesHub
189
-
190
189
  AnalysesHub.register_default("CDG", CDG)
angr/analyses/cfg/cfb.py CHANGED
@@ -6,9 +6,9 @@ from collections.abc import Callable
6
6
  import cle
7
7
  from cle.backends.externs import KernelObject, ExternObject
8
8
  from cle.backends.tls.elf_tls import ELFTLSObject
9
-
10
9
  from sortedcontainers import SortedDict
11
10
 
11
+ from angr.analyses import AnalysesHub
12
12
  from angr.knowledge_plugins.cfg.memory_data import MemoryDataSort, MemoryData
13
13
  from angr.analyses.analysis import Analysis
14
14
 
@@ -424,7 +424,5 @@ class CFBlanket(Analysis):
424
424
  addr = max_addr
425
425
 
426
426
 
427
- from angr.analyses import AnalysesHub
428
-
429
427
  AnalysesHub.register_default("CFB", CFBlanket)
430
428
  AnalysesHub.register_default("CFBlanket", CFBlanket)
angr/analyses/cfg/cfg.py CHANGED
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
+
2
3
  import sys
3
4
 
5
+ from angr.analyses import AnalysesHub
4
6
  from .cfg_fast import CFGFast
5
7
 
6
8
 
@@ -69,6 +71,4 @@ class CFG(CFGFast): # pylint: disable=abstract-method
69
71
  CFGFast.__init__(self, **kwargs)
70
72
 
71
73
 
72
- from angr.analyses import AnalysesHub
73
-
74
74
  AnalysesHub.register_default("CFG", CFG)