angr 9.2.142__py3-none-win_amd64.whl → 9.2.144__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +22 -10
- angr/analyses/calling_convention/fact_collector.py +72 -14
- angr/analyses/cfg/cfg_base.py +7 -2
- angr/analyses/cfg/cfg_emulated.py +13 -4
- angr/analyses/cfg/cfg_fast.py +21 -60
- angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
- angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
- angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
- angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -102
- angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
- angr/analyses/complete_calling_conventions.py +18 -5
- angr/analyses/decompiler/ail_simplifier.py +95 -65
- angr/analyses/decompiler/clinic.py +162 -68
- angr/analyses/decompiler/decompiler.py +4 -4
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +49 -14
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
- angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
- angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
- angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
- angr/analyses/decompiler/sequence_walker.py +8 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
- angr/analyses/decompiler/ssailification/ssailification.py +10 -2
- angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
- angr/analyses/decompiler/structured_codegen/c.py +25 -4
- angr/analyses/decompiler/utils.py +13 -0
- angr/analyses/disassembly.py +3 -3
- angr/analyses/fcp/fcp.py +1 -4
- angr/analyses/s_propagator.py +40 -29
- angr/analyses/s_reaching_definitions/s_rda_model.py +45 -36
- angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
- angr/analyses/s_reaching_definitions/s_reaching_definitions.py +41 -42
- angr/analyses/typehoon/dfa.py +13 -3
- angr/analyses/typehoon/typehoon.py +60 -18
- angr/analyses/typehoon/typevars.py +11 -7
- angr/analyses/variable_recovery/engine_ail.py +19 -23
- angr/analyses/variable_recovery/engine_base.py +26 -30
- angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
- angr/calling_conventions.py +18 -8
- angr/knowledge_plugins/functions/function.py +29 -15
- angr/knowledge_plugins/key_definitions/constants.py +2 -2
- angr/knowledge_plugins/key_definitions/liveness.py +4 -4
- angr/lib/angr_native.dll +0 -0
- angr/procedures/definitions/linux_kernel.py +5 -0
- angr/state_plugins/unicorn_engine.py +24 -8
- angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
- angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
- angr/utils/doms.py +40 -33
- angr/utils/graph.py +26 -20
- angr/utils/ssa/__init__.py +21 -14
- angr/utils/ssa/vvar_uses_collector.py +2 -2
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/METADATA +11 -8
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/RECORD +61 -58
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/WHEEL +1 -1
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/LICENSE +0 -0
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/entry_points.txt +0 -0
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
|
@@ -220,9 +220,9 @@ class CallingConventionAnalysis(Analysis):
|
|
|
220
220
|
self.prototype = prototype # type: ignore
|
|
221
221
|
return
|
|
222
222
|
if self._function.is_plt:
|
|
223
|
-
|
|
224
|
-
if
|
|
225
|
-
self.cc, self.prototype =
|
|
223
|
+
r_plt = self._analyze_plt()
|
|
224
|
+
if r_plt is not None:
|
|
225
|
+
self.cc, self.prototype, self.prototype_libname = r_plt
|
|
226
226
|
return
|
|
227
227
|
|
|
228
228
|
r = self._analyze_function()
|
|
@@ -278,11 +278,11 @@ class CallingConventionAnalysis(Analysis):
|
|
|
278
278
|
self.cc = cc
|
|
279
279
|
self.prototype = prototype
|
|
280
280
|
|
|
281
|
-
def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
|
|
281
|
+
def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None, str | None] | None:
|
|
282
282
|
"""
|
|
283
283
|
Get the calling convention for a PLT stub.
|
|
284
284
|
|
|
285
|
-
:return: A calling convention.
|
|
285
|
+
:return: A calling convention, the function type, as well as the library name if available.
|
|
286
286
|
"""
|
|
287
287
|
assert self._function is not None
|
|
288
288
|
|
|
@@ -326,11 +326,11 @@ class CallingConventionAnalysis(Analysis):
|
|
|
326
326
|
# we only take the prototype from the SimProcedure if
|
|
327
327
|
# - the SimProcedure is a function
|
|
328
328
|
# - the prototype of the SimProcedure is not guessed
|
|
329
|
-
return cc, hooker.prototype
|
|
329
|
+
return cc, hooker.prototype, hooker.library_name
|
|
330
330
|
if real_func.prototype is not None:
|
|
331
|
-
return cc, real_func.prototype
|
|
331
|
+
return cc, real_func.prototype, real_func.prototype_libname
|
|
332
332
|
else:
|
|
333
|
-
return cc, real_func.prototype
|
|
333
|
+
return cc, real_func.prototype, real_func.prototype_libname
|
|
334
334
|
|
|
335
335
|
if self.analyze_callsites:
|
|
336
336
|
# determine the calling convention by analyzing its callsites
|
|
@@ -344,7 +344,7 @@ class CallingConventionAnalysis(Analysis):
|
|
|
344
344
|
prototype = self._adjust_prototype(
|
|
345
345
|
prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
|
|
346
346
|
)
|
|
347
|
-
return cc, prototype
|
|
347
|
+
return cc, prototype, None
|
|
348
348
|
|
|
349
349
|
return None
|
|
350
350
|
|
|
@@ -864,7 +864,19 @@ class CallingConventionAnalysis(Analysis):
|
|
|
864
864
|
else:
|
|
865
865
|
int_args.append(arg)
|
|
866
866
|
|
|
867
|
-
|
|
867
|
+
initial_stack_args = sorted([a for a in args if isinstance(a, SimStackArg)], key=lambda a: a.stack_offset)
|
|
868
|
+
# ensure stack args are consecutive if necessary
|
|
869
|
+
if cc.STACKARG_SP_DIFF is not None and initial_stack_args:
|
|
870
|
+
arg_by_offset = {a.stack_offset: a for a in initial_stack_args}
|
|
871
|
+
init_stackarg_offset = cc.STACKARG_SP_DIFF + cc.STACKARG_SP_BUFF
|
|
872
|
+
int_arg_size = self.project.arch.bytes
|
|
873
|
+
for stackarg_offset in range(init_stackarg_offset, max(arg_by_offset), int_arg_size):
|
|
874
|
+
if stackarg_offset not in arg_by_offset:
|
|
875
|
+
arg_by_offset[stackarg_offset] = SimStackArg(stackarg_offset, int_arg_size)
|
|
876
|
+
stack_args = [arg_by_offset[offset] for offset in sorted(arg_by_offset)]
|
|
877
|
+
else:
|
|
878
|
+
stack_args = initial_stack_args
|
|
879
|
+
|
|
868
880
|
stack_int_args = [a for a in stack_args if not a.is_fp]
|
|
869
881
|
stack_fp_args = [a for a in stack_args if a.is_fp]
|
|
870
882
|
# match int args first
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
# pylint:disable=too-many-boolean-expressions
|
|
2
2
|
from __future__ import annotations
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any, TYPE_CHECKING
|
|
4
|
+
from collections import defaultdict
|
|
4
5
|
|
|
5
6
|
import pyvex
|
|
6
7
|
import claripy
|
|
7
8
|
|
|
9
|
+
from angr import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
|
|
8
10
|
from angr.utils.bits import s2u, u2s
|
|
9
11
|
from angr.block import Block
|
|
10
12
|
from angr.analyses.analysis import Analysis
|
|
@@ -13,9 +15,12 @@ from angr.knowledge_plugins.functions import Function
|
|
|
13
15
|
from angr.codenode import BlockNode, HookNode
|
|
14
16
|
from angr.engines.light import SimEngineNostmtVEX, SimEngineLight, SpOffset, RegisterOffset
|
|
15
17
|
from angr.calling_conventions import SimRegArg, SimStackArg, default_cc
|
|
16
|
-
from angr.sim_type import SimTypeBottom
|
|
18
|
+
from angr.sim_type import SimTypeBottom, dereference_simtype, SimTypeFunction
|
|
17
19
|
from .utils import is_sane_register_variable
|
|
18
20
|
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from angr.codenode import CodeNode
|
|
23
|
+
|
|
19
24
|
|
|
20
25
|
class FactCollectorState:
|
|
21
26
|
"""
|
|
@@ -26,6 +31,7 @@ class FactCollectorState:
|
|
|
26
31
|
"bp_value",
|
|
27
32
|
"callee_stored_regs",
|
|
28
33
|
"reg_reads",
|
|
34
|
+
"reg_reads_count",
|
|
29
35
|
"reg_writes",
|
|
30
36
|
"simple_stack",
|
|
31
37
|
"sp_value",
|
|
@@ -40,6 +46,7 @@ class FactCollectorState:
|
|
|
40
46
|
|
|
41
47
|
self.callee_stored_regs: dict[int, int] = {} # reg offset -> stack offset
|
|
42
48
|
self.reg_reads = {}
|
|
49
|
+
self.reg_reads_count = defaultdict(int)
|
|
43
50
|
self.reg_writes: set[int] = set()
|
|
44
51
|
self.stack_reads = {}
|
|
45
52
|
self.stack_writes: set[int] = set()
|
|
@@ -47,6 +54,7 @@ class FactCollectorState:
|
|
|
47
54
|
self.bp_value = 0
|
|
48
55
|
|
|
49
56
|
def register_read(self, offset: int, size_in_bytes: int):
|
|
57
|
+
self.reg_reads_count[offset] += 1
|
|
50
58
|
if offset in self.reg_writes:
|
|
51
59
|
return
|
|
52
60
|
if offset not in self.reg_reads:
|
|
@@ -54,6 +62,14 @@ class FactCollectorState:
|
|
|
54
62
|
else:
|
|
55
63
|
self.reg_reads[offset] = max(self.reg_reads[offset], size_in_bytes)
|
|
56
64
|
|
|
65
|
+
def register_read_undo(self, offset: int) -> None:
|
|
66
|
+
if offset not in self.reg_reads or offset not in self.reg_reads_count:
|
|
67
|
+
return
|
|
68
|
+
self.reg_reads_count[offset] -= 1
|
|
69
|
+
if self.reg_reads_count[offset] == 0:
|
|
70
|
+
self.reg_reads.pop(offset)
|
|
71
|
+
self.reg_reads_count.pop(offset)
|
|
72
|
+
|
|
57
73
|
def register_written(self, offset: int, size_in_bytes: int):
|
|
58
74
|
for o in range(size_in_bytes):
|
|
59
75
|
self.reg_writes.add(offset + o)
|
|
@@ -80,6 +96,7 @@ class FactCollectorState:
|
|
|
80
96
|
new_state.sp_value = self.sp_value
|
|
81
97
|
new_state.bp_value = self.bp_value
|
|
82
98
|
new_state.simple_stack = self.simple_stack.copy()
|
|
99
|
+
new_state.reg_reads_count = self.reg_reads_count.copy()
|
|
83
100
|
if with_tmps:
|
|
84
101
|
new_state.tmps = self.tmps.copy()
|
|
85
102
|
return new_state
|
|
@@ -115,6 +132,26 @@ class SimEngineFactCollectorVEX(
|
|
|
115
132
|
|
|
116
133
|
def _handle_stmt_Put(self, stmt):
|
|
117
134
|
v = self._expr(stmt.data)
|
|
135
|
+
# there are cases like VMOV.F32 S0, S0
|
|
136
|
+
# so we need to check if this register write is actually a no-op
|
|
137
|
+
if isinstance(stmt.data, pyvex.IRExpr.RdTmp):
|
|
138
|
+
t = self.state.tmps.get(stmt.data.tmp, None)
|
|
139
|
+
if isinstance(t, RegisterOffset) and t.reg == stmt.offset:
|
|
140
|
+
same_ins_read = False
|
|
141
|
+
for i in range(self.stmt_idx, -1, -1):
|
|
142
|
+
if i >= self.block.vex.stmts_used:
|
|
143
|
+
break
|
|
144
|
+
prev_stmt = self.block.vex.statements[i]
|
|
145
|
+
if isinstance(prev_stmt, pyvex.IRStmt.IMark):
|
|
146
|
+
break
|
|
147
|
+
if isinstance(prev_stmt, pyvex.IRStmt.WrTmp) and prev_stmt.tmp == stmt.data.tmp:
|
|
148
|
+
same_ins_read = True
|
|
149
|
+
break
|
|
150
|
+
if same_ins_read:
|
|
151
|
+
# we need to revert the read operation as well
|
|
152
|
+
self.state.register_read_undo(stmt.offset)
|
|
153
|
+
return
|
|
154
|
+
|
|
118
155
|
if stmt.offset == self.arch.sp_offset and isinstance(v, SpOffset):
|
|
119
156
|
self.state.sp_value = v.offset
|
|
120
157
|
elif stmt.offset == self.arch.bp_offset and isinstance(v, SpOffset):
|
|
@@ -206,7 +243,7 @@ class FactCollector(Analysis):
|
|
|
206
243
|
decision on the calling convention and prototype of a function.
|
|
207
244
|
"""
|
|
208
245
|
|
|
209
|
-
def __init__(self, func: Function, max_depth: int =
|
|
246
|
+
def __init__(self, func: Function, max_depth: int = 30):
|
|
210
247
|
self.function = func
|
|
211
248
|
self._max_depth = max_depth
|
|
212
249
|
|
|
@@ -224,9 +261,12 @@ class FactCollector(Analysis):
|
|
|
224
261
|
callee_restored_regs = self._analyze_endpoints_for_restored_regs()
|
|
225
262
|
self._determine_input_args(end_states, callee_restored_regs)
|
|
226
263
|
|
|
227
|
-
def _analyze_startpoint(self):
|
|
264
|
+
def _analyze_startpoint(self) -> list[FactCollectorState]:
|
|
228
265
|
func_graph = self.function.transition_graph
|
|
229
266
|
startpoint = self.function.startpoint
|
|
267
|
+
if startpoint is None:
|
|
268
|
+
return []
|
|
269
|
+
|
|
230
270
|
bp_as_gpr = self.function.info.get("bp_as_gpr", False)
|
|
231
271
|
engine = SimEngineFactCollectorVEX(self.project, bp_as_gpr)
|
|
232
272
|
init_state = FactCollectorState()
|
|
@@ -235,9 +275,9 @@ class FactCollector(Analysis):
|
|
|
235
275
|
init_state.bp_value = init_state.sp_value
|
|
236
276
|
|
|
237
277
|
traversed = set()
|
|
238
|
-
queue: list[
|
|
239
|
-
|
|
240
|
-
]
|
|
278
|
+
queue: list[
|
|
279
|
+
tuple[int, FactCollectorState, CodeNode | BlockNode | HookNode | Function, BlockNode | HookNode | None]
|
|
280
|
+
] = [(0, init_state, startpoint, None)]
|
|
241
281
|
end_states: list[FactCollectorState] = []
|
|
242
282
|
while queue:
|
|
243
283
|
depth, state, node, retnode = queue.pop(0)
|
|
@@ -278,14 +318,17 @@ class FactCollector(Analysis):
|
|
|
278
318
|
for _, succ, data in func_graph.out_edges(node, data=True):
|
|
279
319
|
edge_type = data.get("type")
|
|
280
320
|
outside = data.get("outside", False)
|
|
281
|
-
if
|
|
321
|
+
if depth + 1 <= self._max_depth:
|
|
282
322
|
if edge_type == "fake_return":
|
|
283
|
-
|
|
323
|
+
if succ not in traversed:
|
|
324
|
+
ret_succ = succ
|
|
284
325
|
elif edge_type == "transition" and not outside:
|
|
285
|
-
|
|
286
|
-
|
|
326
|
+
if succ not in traversed:
|
|
327
|
+
successor_added = True
|
|
328
|
+
queue.append((depth + 1, state.copy(), succ, None))
|
|
287
329
|
elif edge_type == "call" or (edge_type == "transition" and outside):
|
|
288
330
|
# a call or a tail-call
|
|
331
|
+
# note that it's ok to traverse a called function multiple times
|
|
289
332
|
if not isinstance(succ, Function):
|
|
290
333
|
if self.kb.functions.contains_addr(succ.addr):
|
|
291
334
|
succ = self.kb.functions.get_by_addr(succ.addr)
|
|
@@ -398,9 +441,24 @@ class FactCollector(Analysis):
|
|
|
398
441
|
and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
|
|
399
442
|
):
|
|
400
443
|
# assume the function overwrites the return variable
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
444
|
+
proto = func_succ.prototype
|
|
445
|
+
if func_succ.prototype_libname is not None:
|
|
446
|
+
# we need to deref the prototype in case it uses SimTypeRef internally
|
|
447
|
+
type_collections = []
|
|
448
|
+
prototype_lib = SIM_LIBRARIES[func_succ.prototype_libname]
|
|
449
|
+
if prototype_lib.type_collection_names:
|
|
450
|
+
for typelib_name in prototype_lib.type_collection_names:
|
|
451
|
+
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
452
|
+
proto = dereference_simtype(proto, type_collections)
|
|
453
|
+
|
|
454
|
+
assert isinstance(proto, SimTypeFunction) and proto.returnty is not None
|
|
455
|
+
returnty_size = proto.returnty.with_arch(self.project.arch).size
|
|
456
|
+
if returnty_size is None:
|
|
457
|
+
# it may be None if somehow we cannot resolve a SimTypeRef; we fall back to the full
|
|
458
|
+
# machine word size
|
|
459
|
+
retval_size = self.project.arch.bytes
|
|
460
|
+
else:
|
|
461
|
+
retval_size = returnty_size // self.project.arch.byte_width
|
|
404
462
|
retval_sizes.append(retval_size)
|
|
405
463
|
continue
|
|
406
464
|
|
angr/analyses/cfg/cfg_base.py
CHANGED
|
@@ -1701,7 +1701,12 @@ class CFGBase(Analysis):
|
|
|
1701
1701
|
self._update_progress(progress)
|
|
1702
1702
|
|
|
1703
1703
|
self._graph_bfs_custom(
|
|
1704
|
-
self.graph,
|
|
1704
|
+
self.graph,
|
|
1705
|
+
[fn],
|
|
1706
|
+
self._graph_traversal_handler,
|
|
1707
|
+
blockaddr_to_function,
|
|
1708
|
+
tmp_functions,
|
|
1709
|
+
traversed_cfg_nodes,
|
|
1705
1710
|
)
|
|
1706
1711
|
|
|
1707
1712
|
to_remove = set()
|
|
@@ -2731,7 +2736,7 @@ class CFGBase(Analysis):
|
|
|
2731
2736
|
relifted = self.project.factory.block(block.addr, size=block.size, opt_level=1, cross_insn_opt=True).vex
|
|
2732
2737
|
except SimError:
|
|
2733
2738
|
return False, []
|
|
2734
|
-
if isinstance(relifted.next, pyvex.IRExpr.Const):
|
|
2739
|
+
if not relifted.jumpkind.startswith("Ijk_Sys") and isinstance(relifted.next, pyvex.IRExpr.Const):
|
|
2735
2740
|
# yes!
|
|
2736
2741
|
return True, [relifted.next.con.value]
|
|
2737
2742
|
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
2
3
|
import itertools
|
|
3
4
|
import logging
|
|
4
5
|
import sys
|
|
5
6
|
from collections import defaultdict
|
|
6
7
|
from functools import reduce
|
|
8
|
+
import contextlib
|
|
7
9
|
|
|
8
10
|
import angr
|
|
9
11
|
import claripy
|
|
@@ -45,7 +47,10 @@ from angr.analyses.backward_slice import BackwardSlice
|
|
|
45
47
|
from angr.analyses.loopfinder import LoopFinder, Loop
|
|
46
48
|
from .cfg_base import CFGBase
|
|
47
49
|
from .cfg_job_base import BlockID, CFGJobBase
|
|
48
|
-
|
|
50
|
+
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
from angr.knowledge_plugins.cfg import CFGNode
|
|
53
|
+
|
|
49
54
|
|
|
50
55
|
l = logging.getLogger(name=__name__)
|
|
51
56
|
|
|
@@ -505,6 +510,8 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
|
|
|
505
510
|
:return: None
|
|
506
511
|
"""
|
|
507
512
|
|
|
513
|
+
assert self._starts is not None
|
|
514
|
+
|
|
508
515
|
if not isinstance(max_loop_unrolling_times, int) or max_loop_unrolling_times < 0:
|
|
509
516
|
raise AngrCFGError(
|
|
510
517
|
"Max loop unrolling times must be set to an integer greater than or equal to 0 if "
|
|
@@ -586,6 +593,7 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
|
|
|
586
593
|
|
|
587
594
|
graph_copy.remove_node(new_end_node)
|
|
588
595
|
src, dst = loop_backedge
|
|
596
|
+
assert src is not None and dst is not None
|
|
589
597
|
if graph_copy.has_edge(src, dst): # It might have been removed before
|
|
590
598
|
# Duplicate the dst node
|
|
591
599
|
new_dst = dst.copy()
|
|
@@ -713,9 +721,10 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
|
|
|
713
721
|
# FIXME: start should also take a CFGNode instance
|
|
714
722
|
|
|
715
723
|
start_node = self.get_any_node(start)
|
|
724
|
+
assert start_node is not None
|
|
716
725
|
|
|
717
726
|
node_wrapper = (start_node, 0)
|
|
718
|
-
stack = [node_wrapper]
|
|
727
|
+
stack: list[tuple[CFGNode, int]] = [node_wrapper]
|
|
719
728
|
traversed_nodes = {start_node}
|
|
720
729
|
subgraph_nodes = {start_node}
|
|
721
730
|
|
|
@@ -727,6 +736,7 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
|
|
|
727
736
|
edges = self.graph.out_edges(n, data=True)
|
|
728
737
|
|
|
729
738
|
for _, dst, data in edges:
|
|
739
|
+
assert dst is not None
|
|
730
740
|
if dst not in traversed_nodes:
|
|
731
741
|
# We see a new node!
|
|
732
742
|
traversed_nodes.add(dst)
|
|
@@ -1687,9 +1697,8 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
|
|
|
1687
1697
|
|
|
1688
1698
|
for block_id in pending_exits_to_remove:
|
|
1689
1699
|
l.debug(
|
|
1690
|
-
"Removing all pending exits to %#x since the target function
|
|
1700
|
+
"Removing all pending exits to %#x since the target function does not return",
|
|
1691
1701
|
self._block_id_addr(block_id),
|
|
1692
|
-
next(iter(self._pending_jobs[block_id])).returning_source,
|
|
1693
1702
|
)
|
|
1694
1703
|
|
|
1695
1704
|
for to_remove in self._pending_jobs[block_id]:
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -31,13 +31,10 @@ from angr import sim_options as o
|
|
|
31
31
|
from angr.errors import (
|
|
32
32
|
AngrCFGError,
|
|
33
33
|
AngrSkipJobNotice,
|
|
34
|
-
AngrUnsupportedSyscallError,
|
|
35
34
|
SimEngineError,
|
|
36
35
|
SimMemoryError,
|
|
37
36
|
SimTranslationError,
|
|
38
37
|
SimValueError,
|
|
39
|
-
SimOperationError,
|
|
40
|
-
SimError,
|
|
41
38
|
SimIRSBNoDecodeError,
|
|
42
39
|
)
|
|
43
40
|
from angr.utils.constants import DEFAULT_STATEMENT
|
|
@@ -200,7 +197,7 @@ class PendingJobs:
|
|
|
200
197
|
return self._pop_job(next(reversed(self._jobs.keys())))
|
|
201
198
|
|
|
202
199
|
# Prioritize returning functions
|
|
203
|
-
for func_addr in reversed(self._jobs
|
|
200
|
+
for func_addr in reversed(self._jobs):
|
|
204
201
|
if func_addr not in self._returning_functions:
|
|
205
202
|
continue
|
|
206
203
|
return self._pop_job(func_addr)
|
|
@@ -621,6 +618,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
621
618
|
nodecode_window_size=512,
|
|
622
619
|
nodecode_threshold=0.3,
|
|
623
620
|
nodecode_step=16483,
|
|
621
|
+
check_funcret_max_job=500,
|
|
624
622
|
indirect_calls_always_return: bool | None = None,
|
|
625
623
|
jumptable_resolver_resolves_calls: bool | None = None,
|
|
626
624
|
start=None, # deprecated
|
|
@@ -680,6 +678,12 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
680
678
|
table resolver and must be resolved using their specific resolvers. By default,
|
|
681
679
|
we will only disable JumpTableResolver from resolving indirect calls for large
|
|
682
680
|
binaries (region > 50 KB).
|
|
681
|
+
:param check_funcret_max_job When popping return-site jobs out of the job queue, angr will prioritize jobs
|
|
682
|
+
for which the callee is known to return. This check may be slow when there are
|
|
683
|
+
a large amount of jobs in different caller functions, and this situation often
|
|
684
|
+
occurs in obfuscated binaries where many functions never return. This parameter
|
|
685
|
+
acts as a threshold to disable this check when the number of jobs in the queue
|
|
686
|
+
exceeds this threshold.
|
|
683
687
|
:param int start: (Deprecated) The beginning address of CFG recovery.
|
|
684
688
|
:param int end: (Deprecated) The end address of CFG recovery.
|
|
685
689
|
:param CFGArchOptions arch_options: Architecture-specific options.
|
|
@@ -768,6 +772,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
768
772
|
self._force_complete_scan = force_complete_scan
|
|
769
773
|
self._use_elf_eh_frame = elf_eh_frame
|
|
770
774
|
self._use_exceptions = exceptions
|
|
775
|
+
self._check_funcret_max_job = check_funcret_max_job
|
|
771
776
|
|
|
772
777
|
self._nodecode_window_size = nodecode_window_size
|
|
773
778
|
self._nodecode_threshold = nodecode_threshold
|
|
@@ -2576,38 +2581,16 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2576
2581
|
jobs: list[CFGJob] = []
|
|
2577
2582
|
|
|
2578
2583
|
if is_syscall:
|
|
2579
|
-
|
|
2580
|
-
|
|
2581
|
-
mode="fastpath",
|
|
2582
|
-
addr=cfg_node.addr,
|
|
2583
|
-
add_options={o.SYMBOL_FILL_UNCONSTRAINED_MEMORY, o.SYMBOL_FILL_UNCONSTRAINED_REGISTERS},
|
|
2584
|
+
resolved, resolved_targets, ij = self._indirect_jump_encountered(
|
|
2585
|
+
addr, cfg_node, irsb, current_function_addr, stmt_idx
|
|
2584
2586
|
)
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
iter(
|
|
2590
|
-
succ
|
|
2591
|
-
for succ in successors.flat_successors
|
|
2592
|
-
if succ.history.jumpkind and succ.history.jumpkind.startswith("Ijk_Sys")
|
|
2593
|
-
),
|
|
2594
|
-
None,
|
|
2595
|
-
)
|
|
2596
|
-
else:
|
|
2597
|
-
succ = None
|
|
2598
|
-
if succ is None:
|
|
2599
|
-
# For some reason, there is no such successor with a syscall jumpkind
|
|
2600
|
-
target_addr = self._unresolvable_call_target_addr
|
|
2587
|
+
target_addr = None
|
|
2588
|
+
if resolved:
|
|
2589
|
+
if len(resolved_targets) == 1:
|
|
2590
|
+
(target_addr,) = resolved_targets
|
|
2601
2591
|
else:
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
if syscall_stub: # can be None if simos is not a subclass of SimUserspace
|
|
2605
|
-
syscall_addr = syscall_stub.addr
|
|
2606
|
-
target_addr = syscall_addr
|
|
2607
|
-
else:
|
|
2608
|
-
target_addr = self._unresolvable_call_target_addr
|
|
2609
|
-
except AngrUnsupportedSyscallError:
|
|
2610
|
-
target_addr = self._unresolvable_call_target_addr
|
|
2592
|
+
if ij is not None:
|
|
2593
|
+
self._indirect_jumps_to_resolve.add(ij)
|
|
2611
2594
|
|
|
2612
2595
|
new_function_addr = target_addr.method if isinstance(target_addr, SootAddressDescriptor) else target_addr
|
|
2613
2596
|
|
|
@@ -2732,30 +2715,6 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2732
2715
|
|
|
2733
2716
|
return jobs
|
|
2734
2717
|
|
|
2735
|
-
def _simulate_block_with_resilience(self, state):
|
|
2736
|
-
"""
|
|
2737
|
-
Execute a basic block with "On Error Resume Next". Give up when there is no way moving forward.
|
|
2738
|
-
|
|
2739
|
-
:param SimState state: The initial state to start simulation with.
|
|
2740
|
-
:return: A SimSuccessors instance or None if we are unable to resume execution with resilience.
|
|
2741
|
-
:rtype: SimSuccessors or None
|
|
2742
|
-
"""
|
|
2743
|
-
|
|
2744
|
-
stmt_idx = 0
|
|
2745
|
-
successors = None # make PyCharm's linting happy
|
|
2746
|
-
|
|
2747
|
-
while True:
|
|
2748
|
-
try:
|
|
2749
|
-
successors = self.project.factory.successors(state, skip_stmts=stmt_idx)
|
|
2750
|
-
break
|
|
2751
|
-
except SimOperationError as ex:
|
|
2752
|
-
stmt_idx = ex.stmt_idx + 1
|
|
2753
|
-
continue
|
|
2754
|
-
except SimError:
|
|
2755
|
-
return None
|
|
2756
|
-
|
|
2757
|
-
return successors
|
|
2758
|
-
|
|
2759
2718
|
def _is_branching_to_outside(self, src_addr, target_addr, current_function_addr):
|
|
2760
2719
|
"""
|
|
2761
2720
|
Determine if a branch is branching to a different function (i.e., branching to outside the current function).
|
|
@@ -3236,7 +3195,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
3236
3195
|
if jump.jumpkind == "Ijk_Boring":
|
|
3237
3196
|
unresolvable_target_addr = self._unresolvable_jump_target_addr
|
|
3238
3197
|
simprocedure_name = "UnresolvableJumpTarget"
|
|
3239
|
-
elif jump.jumpkind == "Ijk_Call":
|
|
3198
|
+
elif jump.jumpkind == "Ijk_Call" or jump.jumpkind.startswith("Ijk_Sys"):
|
|
3240
3199
|
unresolvable_target_addr = self._unresolvable_call_target_addr
|
|
3241
3200
|
simprocedure_name = "UnresolvableCallTarget"
|
|
3242
3201
|
else:
|
|
@@ -3707,7 +3666,9 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
3707
3666
|
|
|
3708
3667
|
def _pop_pending_job(self, returning=True) -> CFGJob | None:
|
|
3709
3668
|
while self._pending_jobs:
|
|
3710
|
-
job = self._pending_jobs.pop_job(
|
|
3669
|
+
job = self._pending_jobs.pop_job(
|
|
3670
|
+
returning=returning if len(self._pending_jobs) < self._check_funcret_max_job else False
|
|
3671
|
+
)
|
|
3711
3672
|
if job is not None and job.job_type == CFGJobType.DATAREF_HINTS and self._seg_list.is_occupied(job.addr):
|
|
3712
3673
|
# ignore this hint from data refs because the target address has already been analyzed
|
|
3713
3674
|
continue
|
|
@@ -10,6 +10,7 @@ from .arm_elf_fast import ArmElfFastResolver
|
|
|
10
10
|
from .const_resolver import ConstantResolver
|
|
11
11
|
from .amd64_pe_iat import AMD64PeIatResolver
|
|
12
12
|
from .memload_resolver import MemoryLoadResolver
|
|
13
|
+
from .syscall_resolver import SyscallResolver
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
__all__ = (
|
|
@@ -21,6 +22,7 @@ __all__ = (
|
|
|
21
22
|
"MemoryLoadResolver",
|
|
22
23
|
"MipsElfFastResolver",
|
|
23
24
|
"MipsElfGotResolver",
|
|
25
|
+
"SyscallResolver",
|
|
24
26
|
"X86ElfPicPltResolver",
|
|
25
27
|
"X86PeIatResolver",
|
|
26
28
|
)
|
|
@@ -43,11 +43,22 @@ class ConstantResolver(IndirectJumpResolver):
|
|
|
43
43
|
be resolved to a constant value. This resolver must be run after all other more specific resolvers.
|
|
44
44
|
"""
|
|
45
45
|
|
|
46
|
-
def __init__(self, project):
|
|
46
|
+
def __init__(self, project, max_func_nodes: int = 512):
|
|
47
47
|
super().__init__(project, timeless=False)
|
|
48
|
+
self.max_func_nodes = max_func_nodes
|
|
48
49
|
|
|
49
50
|
def filter(self, cfg, addr, func_addr, block, jumpkind):
|
|
51
|
+
if not cfg.functions.contains_addr(func_addr):
|
|
52
|
+
# the function does not exist
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
# for performance, we don't run constant resolver if the function is too large
|
|
56
|
+
func = cfg.functions.get_by_addr(func_addr)
|
|
57
|
+
if len(func.block_addrs_set) > self.max_func_nodes:
|
|
58
|
+
return False
|
|
59
|
+
|
|
50
60
|
# we support both an indirect call and jump since the value can be resolved
|
|
61
|
+
|
|
51
62
|
return jumpkind in {"Ijk_Boring", "Ijk_Call"}
|
|
52
63
|
|
|
53
64
|
def resolve( # pylint:disable=unused-argument
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING, Any
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
import claripy
|
|
6
|
+
|
|
7
|
+
from angr.code_location import CodeLocation
|
|
8
|
+
from angr.project import Project
|
|
9
|
+
from angr.analyses.propagator.vex_vars import VEXReg
|
|
10
|
+
from .propagator_utils import PropagatorLoadCallback
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from angr import SimState
|
|
14
|
+
from angr.knowledge_plugins import Function
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
l = logging.getLogger(name=__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ConstantValueManager:
|
|
21
|
+
"""
|
|
22
|
+
Manages the loading of registers who hold constant values.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
__slots__ = (
|
|
26
|
+
"func",
|
|
27
|
+
"indirect_jump_addr",
|
|
28
|
+
"kb",
|
|
29
|
+
"mapping",
|
|
30
|
+
"project",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def __init__(self, project: Project, kb, func: Function, ij_addr: int):
|
|
34
|
+
self.project = project
|
|
35
|
+
self.kb = kb
|
|
36
|
+
self.func = func
|
|
37
|
+
self.indirect_jump_addr = ij_addr
|
|
38
|
+
|
|
39
|
+
self.mapping: dict[Any, dict[Any, claripy.ast.Base]] | None = None
|
|
40
|
+
|
|
41
|
+
def reg_read_callback(self, state: SimState):
|
|
42
|
+
if self.mapping is None:
|
|
43
|
+
self._build_mapping()
|
|
44
|
+
assert self.mapping is not None
|
|
45
|
+
|
|
46
|
+
codeloc = CodeLocation(state.scratch.bbl_addr, state.scratch.stmt_idx, ins_addr=state.scratch.ins_addr)
|
|
47
|
+
if codeloc in self.mapping:
|
|
48
|
+
reg_read_offset = state.inspect.reg_read_offset
|
|
49
|
+
if isinstance(reg_read_offset, claripy.ast.BV) and reg_read_offset.op == "BVV":
|
|
50
|
+
reg_read_offset = reg_read_offset.args[0]
|
|
51
|
+
variable = VEXReg(reg_read_offset, state.inspect.reg_read_length)
|
|
52
|
+
if variable in self.mapping[codeloc]:
|
|
53
|
+
v = self.mapping[codeloc][variable]
|
|
54
|
+
if isinstance(v, int):
|
|
55
|
+
v = claripy.BVV(v, state.inspect.reg_read_length * state.arch.byte_width)
|
|
56
|
+
state.inspect.reg_read_expr = v
|
|
57
|
+
|
|
58
|
+
def _build_mapping(self):
|
|
59
|
+
# constant propagation
|
|
60
|
+
l.debug("JumpTable: Propagating for %r at %#x.", self.func, self.indirect_jump_addr)
|
|
61
|
+
|
|
62
|
+
# determine blocks to run FCP on
|
|
63
|
+
|
|
64
|
+
# - include at most three levels of superblock successors from the entrypoint
|
|
65
|
+
self.mapping = {}
|
|
66
|
+
startpoint = self.func.startpoint
|
|
67
|
+
if startpoint is None:
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
blocks = set()
|
|
71
|
+
succ_and_levels = [(startpoint, 0)]
|
|
72
|
+
while succ_and_levels:
|
|
73
|
+
new_succs = []
|
|
74
|
+
for node, level in succ_and_levels:
|
|
75
|
+
if node in blocks:
|
|
76
|
+
continue
|
|
77
|
+
blocks.add(node)
|
|
78
|
+
if node.addr == self.indirect_jump_addr:
|
|
79
|
+
# stop at the indirect jump block
|
|
80
|
+
continue
|
|
81
|
+
for _, succ, data in self.func.graph.out_edges(node, data=True):
|
|
82
|
+
new_level = level if data.get("type") == "fake_return" else level + 1
|
|
83
|
+
if new_level <= 3:
|
|
84
|
+
new_succs.append((succ, new_level))
|
|
85
|
+
succ_and_levels = new_succs
|
|
86
|
+
|
|
87
|
+
# - include at most six levels of predecessors from the indirect jump block
|
|
88
|
+
ij_block = self.func.get_node(self.indirect_jump_addr)
|
|
89
|
+
preds = [ij_block]
|
|
90
|
+
for _ in range(6):
|
|
91
|
+
new_preds = []
|
|
92
|
+
for node in preds:
|
|
93
|
+
if node in blocks:
|
|
94
|
+
continue
|
|
95
|
+
blocks.add(node)
|
|
96
|
+
new_preds += list(self.func.graph.predecessors(node))
|
|
97
|
+
preds = new_preds
|
|
98
|
+
if not preds:
|
|
99
|
+
break
|
|
100
|
+
|
|
101
|
+
prop = self.project.analyses.FastConstantPropagation(
|
|
102
|
+
self.func,
|
|
103
|
+
blocks=blocks,
|
|
104
|
+
vex_cross_insn_opt=True,
|
|
105
|
+
load_callback=PropagatorLoadCallback(self.project).propagator_load_callback,
|
|
106
|
+
)
|
|
107
|
+
self.mapping = prop.replacements
|
|
@@ -11,6 +11,7 @@ from . import ConstantResolver
|
|
|
11
11
|
from . import ArmElfFastResolver
|
|
12
12
|
from . import AMD64PeIatResolver
|
|
13
13
|
from . import MipsElfGotResolver
|
|
14
|
+
from . import SyscallResolver
|
|
14
15
|
|
|
15
16
|
DEFAULT_RESOLVERS = {
|
|
16
17
|
"X86": {
|
|
@@ -58,7 +59,7 @@ DEFAULT_RESOLVERS = {
|
|
|
58
59
|
ArmElfFastResolver,
|
|
59
60
|
]
|
|
60
61
|
},
|
|
61
|
-
"ALL": [MemoryLoadResolver, JumpTableResolver, ConstantResolver],
|
|
62
|
+
"ALL": [MemoryLoadResolver, JumpTableResolver, ConstantResolver, SyscallResolver],
|
|
62
63
|
}
|
|
63
64
|
|
|
64
65
|
|