angr 9.2.147__py3-none-manylinux2014_x86_64.whl → 9.2.148__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/analysis.py +3 -11
- angr/analyses/calling_convention/fact_collector.py +5 -4
- angr/analyses/calling_convention/utils.py +1 -0
- angr/analyses/cfg/cfg_base.py +3 -59
- angr/analyses/cfg/cfg_emulated.py +12 -12
- angr/analyses/cfg/cfg_fast.py +20 -17
- angr/analyses/cfg/cfg_fast_soot.py +3 -3
- angr/analyses/decompiler/callsite_maker.py +28 -18
- angr/analyses/decompiler/clinic.py +4 -4
- angr/analyses/decompiler/condition_processor.py +0 -21
- angr/analyses/decompiler/counters/call_counter.py +3 -0
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +14 -0
- angr/analyses/decompiler/structured_codegen/c.py +5 -5
- angr/analyses/decompiler/structuring/phoenix.py +11 -3
- angr/analyses/deobfuscator/api_obf_finder.py +5 -1
- angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +1 -1
- angr/analyses/forward_analysis/visitors/graph.py +0 -8
- angr/analyses/identifier/runner.py +1 -1
- angr/analyses/reaching_definitions/function_handler.py +4 -4
- angr/analyses/reassembler.py +1 -1
- angr/analyses/stack_pointer_tracker.py +1 -1
- angr/analyses/static_hooker.py +11 -9
- angr/analyses/variable_recovery/engine_ail.py +8 -8
- angr/analyses/variable_recovery/engine_base.py +2 -0
- angr/calling_conventions.py +74 -23
- angr/exploration_techniques/director.py +1 -1
- angr/knowledge_plugins/functions/function.py +41 -38
- angr/knowledge_plugins/functions/function_manager.py +9 -0
- angr/knowledge_plugins/functions/function_parser.py +9 -1
- angr/knowledge_plugins/functions/soot_function.py +1 -1
- angr/knowledge_plugins/key_definitions/key_definition_manager.py +1 -1
- angr/procedures/definitions/__init__.py +14 -11
- angr/procedures/stubs/format_parser.py +1 -1
- angr/project.py +23 -29
- angr/protos/cfg_pb2.py +14 -25
- angr/protos/function_pb2.py +11 -22
- angr/protos/primitives_pb2.py +36 -47
- angr/protos/variables_pb2.py +28 -39
- angr/protos/xrefs_pb2.py +8 -19
- angr/sim_type.py +0 -16
- angr/simos/cgc.py +1 -1
- angr/simos/linux.py +5 -5
- angr/simos/windows.py +5 -5
- angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +1 -1
- {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/METADATA +8 -7
- {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/RECORD +52 -52
- {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/WHEEL +1 -1
- {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/entry_points.txt +0 -0
- {angr-9.2.147.dist-info → angr-9.2.148.dist-info/licenses}/LICENSE +0 -0
- {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,6 @@ from typing import TypeVar, Generic
|
|
|
3
3
|
from collections.abc import Collection, Iterator
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
|
|
6
|
-
from angr.misc.ux import deprecated
|
|
7
6
|
from angr.utils.algo import binary_insert
|
|
8
7
|
|
|
9
8
|
NodeType = TypeVar("NodeType")
|
|
@@ -94,13 +93,6 @@ class GraphVisitor(Generic[NodeType]):
|
|
|
94
93
|
|
|
95
94
|
return iter(self.sort_nodes())
|
|
96
95
|
|
|
97
|
-
@deprecated(replacement="nodes")
|
|
98
|
-
def nodes_iter(self):
|
|
99
|
-
"""
|
|
100
|
-
(Deprecated) Return an iterator of nodes following an optimal traversal order. Will be removed in the future.
|
|
101
|
-
"""
|
|
102
|
-
return self.nodes()
|
|
103
|
-
|
|
104
96
|
# Traversal
|
|
105
97
|
|
|
106
98
|
def reset(self):
|
|
@@ -29,7 +29,7 @@ assert len(FLAG_DATA) == 0x1000
|
|
|
29
29
|
class Runner:
|
|
30
30
|
def __init__(self, project, cfg):
|
|
31
31
|
# this is kind of fucked up
|
|
32
|
-
project.simos.syscall_library.update(SIM_LIBRARIES["cgcabi_tracer"])
|
|
32
|
+
project.simos.syscall_library.update(SIM_LIBRARIES["cgcabi_tracer"][0])
|
|
33
33
|
|
|
34
34
|
self.project = project
|
|
35
35
|
self.cfg = cfg
|
|
@@ -401,10 +401,10 @@ class FunctionHandler:
|
|
|
401
401
|
)
|
|
402
402
|
type_collections = []
|
|
403
403
|
if prototype_libname is not None and prototype_libname in SIM_LIBRARIES:
|
|
404
|
-
prototype_lib
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
404
|
+
for prototype_lib in SIM_LIBRARIES[prototype_libname]:
|
|
405
|
+
if prototype_lib.type_collection_names:
|
|
406
|
+
for typelib_name in prototype_lib.type_collection_names:
|
|
407
|
+
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
408
408
|
if type_collections:
|
|
409
409
|
prototype = dereference_simtype(data.prototype, type_collections).with_arch(state.arch)
|
|
410
410
|
data.prototype = cast(SimTypeFunction, prototype)
|
angr/analyses/reassembler.py
CHANGED
|
@@ -2410,7 +2410,7 @@ class Reassembler(Analysis):
|
|
|
2410
2410
|
|
|
2411
2411
|
# collect address of all instructions
|
|
2412
2412
|
l.debug("Collecting instruction addresses...")
|
|
2413
|
-
for cfg_node in self.cfg.nodes():
|
|
2413
|
+
for cfg_node in self.cfg.model.nodes():
|
|
2414
2414
|
self.all_insn_addrs |= set(cfg_node.instruction_addrs)
|
|
2415
2415
|
|
|
2416
2416
|
# Functions
|
|
@@ -520,7 +520,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
|
|
|
520
520
|
# Setting register values to fresh ones will cause problems down the line when merging with normal
|
|
521
521
|
# register values happen. therefore, we set their values to BOTTOM. these BOTTOMs will be replaced once
|
|
522
522
|
# a merge with normal blocks happen.
|
|
523
|
-
initial_regs =
|
|
523
|
+
initial_regs = dict.fromkeys(self.reg_offsets, BOTTOM)
|
|
524
524
|
|
|
525
525
|
return StackPointerTrackerState(
|
|
526
526
|
regs=initial_regs, memory={}, is_tracking_memory=self.track_mem, resilient=self._resilient
|
angr/analyses/static_hooker.py
CHANGED
|
@@ -21,7 +21,7 @@ class StaticHooker(Analysis):
|
|
|
21
21
|
def __init__(self, library, binary=None):
|
|
22
22
|
self.results = {}
|
|
23
23
|
try:
|
|
24
|
-
|
|
24
|
+
libs = SIM_LIBRARIES[library]
|
|
25
25
|
except KeyError as err:
|
|
26
26
|
raise AngrValueError(f"No such library {library}") from err
|
|
27
27
|
|
|
@@ -36,14 +36,16 @@ class StaticHooker(Analysis):
|
|
|
36
36
|
l.debug("Skipping %s at %#x, already hooked", func.name, func.rebased_addr)
|
|
37
37
|
continue
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
39
|
+
for lib in libs:
|
|
40
|
+
if lib.has_implementation(func.name):
|
|
41
|
+
proc = lib.get(func.name, self.project.arch)
|
|
42
|
+
self.results[func.rebased_addr] = proc
|
|
43
|
+
if self.project.is_hooked(func.rebased_addr):
|
|
44
|
+
l.debug("Skipping %s at %#x, already hooked", func.name, func.rebased_addr)
|
|
45
|
+
else:
|
|
46
|
+
self.project.hook(func.rebased_addr, proc)
|
|
47
|
+
l.info("Hooked %s at %#x", func.name, func.rebased_addr)
|
|
48
|
+
break
|
|
47
49
|
else:
|
|
48
50
|
l.debug("Failed to hook %s at %#x", func.name, func.rebased_addr)
|
|
49
51
|
|
|
@@ -169,10 +169,10 @@ class SimEngineVRAIL(
|
|
|
169
169
|
|
|
170
170
|
type_collections = []
|
|
171
171
|
if prototype_libname is not None:
|
|
172
|
-
prototype_lib
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
172
|
+
for prototype_lib in SIM_LIBRARIES[prototype_libname]:
|
|
173
|
+
if prototype_lib.type_collection_names:
|
|
174
|
+
for typelib_name in prototype_lib.type_collection_names:
|
|
175
|
+
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
176
176
|
|
|
177
177
|
for arg, arg_type in zip(args, prototype.args):
|
|
178
178
|
if arg.typevar is not None:
|
|
@@ -262,10 +262,10 @@ class SimEngineVRAIL(
|
|
|
262
262
|
|
|
263
263
|
type_collections = []
|
|
264
264
|
if prototype_libname is not None:
|
|
265
|
-
prototype_lib
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
265
|
+
for prototype_lib in SIM_LIBRARIES[prototype_libname]:
|
|
266
|
+
if prototype_lib.type_collection_names:
|
|
267
|
+
for typelib_name in prototype_lib.type_collection_names:
|
|
268
|
+
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
269
269
|
|
|
270
270
|
for arg, arg_type in zip(args, prototype.args):
|
|
271
271
|
if arg.typevar is not None:
|
|
@@ -417,6 +417,7 @@ class SimEngineVRBase(
|
|
|
417
417
|
vvar.size,
|
|
418
418
|
ident=self.state.variable_manager[self.func_addr].next_variable_ident("stack"),
|
|
419
419
|
region=self.func_addr,
|
|
420
|
+
base="bp",
|
|
420
421
|
)
|
|
421
422
|
self.state.variable_manager[self.func_addr].add_variable("stack", vvar.stack_offset, variable)
|
|
422
423
|
elif vvar.was_parameter:
|
|
@@ -1079,6 +1080,7 @@ class SimEngineVRBase(
|
|
|
1079
1080
|
vvar.size,
|
|
1080
1081
|
ident=self.state.variable_manager[self.func_addr].next_variable_ident("stack"),
|
|
1081
1082
|
region=self.func_addr,
|
|
1083
|
+
base="bp",
|
|
1082
1084
|
)
|
|
1083
1085
|
value = self.state.annotate_with_variables(value, [(0, variable)])
|
|
1084
1086
|
self.state.variable_manager[self.func_addr].add_variable("stack", vvar.stack_offset, variable)
|
angr/calling_conventions.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# pylint:disable=line-too-long,missing-class-docstring,no-self-use
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
import logging
|
|
4
|
-
from typing import cast
|
|
4
|
+
from typing import Generic, cast, TypeVar
|
|
5
5
|
|
|
6
|
-
from collections.abc import Iterable
|
|
6
|
+
from collections.abc import Iterable
|
|
7
7
|
from collections import defaultdict
|
|
8
8
|
import contextlib
|
|
9
9
|
|
|
@@ -39,6 +39,8 @@ from .state_plugins.sim_action_object import SimActionObject
|
|
|
39
39
|
l = logging.getLogger(name=__name__)
|
|
40
40
|
l.addFilter(UniqueLogFilter())
|
|
41
41
|
|
|
42
|
+
T = TypeVar("T", bound="SimFunctionArgument")
|
|
43
|
+
|
|
42
44
|
|
|
43
45
|
class PointerWrapper:
|
|
44
46
|
def __init__(self, value, buffer=False):
|
|
@@ -386,12 +388,12 @@ class SimStackArg(SimFunctionArgument):
|
|
|
386
388
|
return SimStackArg(self.stack_offset + offset, size, is_fp)
|
|
387
389
|
|
|
388
390
|
|
|
389
|
-
class SimComboArg(SimFunctionArgument):
|
|
391
|
+
class SimComboArg(SimFunctionArgument, Generic[T]):
|
|
390
392
|
"""
|
|
391
393
|
An argument which spans multiple storage locations. Locations should be given least-significant first.
|
|
392
394
|
"""
|
|
393
395
|
|
|
394
|
-
def __init__(self, locations, is_fp=False):
|
|
396
|
+
def __init__(self, locations: list[T], is_fp=False):
|
|
395
397
|
super().__init__(sum(x.size for x in locations), is_fp=is_fp)
|
|
396
398
|
self.locations = locations
|
|
397
399
|
|
|
@@ -449,6 +451,45 @@ class SimStructArg(SimFunctionArgument):
|
|
|
449
451
|
|
|
450
452
|
return others
|
|
451
453
|
|
|
454
|
+
def get_single_footprint(self) -> SimStackArg | SimRegArg | SimComboArg:
|
|
455
|
+
if self.struct._arch is None:
|
|
456
|
+
raise TypeError("Can't tell the size of a struct without an arch")
|
|
457
|
+
stack_min = None
|
|
458
|
+
stack_max = None
|
|
459
|
+
regs = []
|
|
460
|
+
for field in self.struct.fields:
|
|
461
|
+
loc = self.locs[field]
|
|
462
|
+
if isinstance(loc, SimStackArg):
|
|
463
|
+
if stack_min is None or stack_max is None:
|
|
464
|
+
stack_min = loc.stack_offset
|
|
465
|
+
stack_max = loc.stack_offset
|
|
466
|
+
else:
|
|
467
|
+
# sanity check that arguments are laid out in order...
|
|
468
|
+
assert loc.stack_offset >= stack_max
|
|
469
|
+
stack_max = loc.stack_offset + loc.size
|
|
470
|
+
elif isinstance(loc, SimRegArg):
|
|
471
|
+
regs.append(loc)
|
|
472
|
+
else:
|
|
473
|
+
assert False, "Why would a struct have layout elements other than stack and reg?"
|
|
474
|
+
|
|
475
|
+
# things to consider...
|
|
476
|
+
# what happens if we return the concat of two registers but there's slack space missing?
|
|
477
|
+
# an example of this would be big-endian struct { long a; int b; }
|
|
478
|
+
# do any CCs do this??
|
|
479
|
+
# for now assume no
|
|
480
|
+
|
|
481
|
+
if stack_min is not None:
|
|
482
|
+
if regs:
|
|
483
|
+
assert (
|
|
484
|
+
False
|
|
485
|
+
), "Unknown CC argument passing structure - why are we passing both regs and stack at the same time?"
|
|
486
|
+
return SimStackArg(stack_min, self.struct.size // self.struct._arch.byte_width)
|
|
487
|
+
if not regs:
|
|
488
|
+
assert False, "huh??????"
|
|
489
|
+
if len(regs) == 1:
|
|
490
|
+
return regs[0]
|
|
491
|
+
return SimComboArg(regs)
|
|
492
|
+
|
|
452
493
|
def get_value(self, state, **kwargs):
|
|
453
494
|
return SimStructValue(
|
|
454
495
|
self.struct, {field: getter.get_value(state, **kwargs) for field, getter in self.locs.items()}
|
|
@@ -486,7 +527,7 @@ class SimReferenceArgument(SimFunctionArgument):
|
|
|
486
527
|
zero on the stack. It will be passed ``stack_base=ptr_loc.get_value(state)``
|
|
487
528
|
"""
|
|
488
529
|
|
|
489
|
-
def __init__(self, ptr_loc, main_loc):
|
|
530
|
+
def __init__(self, ptr_loc: SimFunctionArgument, main_loc: SimFunctionArgument):
|
|
490
531
|
super().__init__(ptr_loc.size) # ???
|
|
491
532
|
self.ptr_loc = ptr_loc
|
|
492
533
|
self.main_loc = main_loc
|
|
@@ -700,6 +741,7 @@ class SimCC:
|
|
|
700
741
|
)
|
|
701
742
|
if self.return_in_implicit_outparam(ty):
|
|
702
743
|
if perspective_returned:
|
|
744
|
+
assert self.RETURN_VAL is not None
|
|
703
745
|
ptr_loc = self.RETURN_VAL
|
|
704
746
|
else:
|
|
705
747
|
ptr_loc = self.next_arg(self.ArgSession(self), SimTypePointer(SimTypeBottom()))
|
|
@@ -713,6 +755,7 @@ class SimCC:
|
|
|
713
755
|
if self.RETURN_VAL is None or isinstance(ty, SimTypeBottom):
|
|
714
756
|
return None
|
|
715
757
|
if ty.size > self.RETURN_VAL.size * self.arch.byte_width:
|
|
758
|
+
assert self.OVERFLOW_RETURN_VAL is not None
|
|
716
759
|
return SimComboArg([self.RETURN_VAL, self.OVERFLOW_RETURN_VAL])
|
|
717
760
|
return self.RETURN_VAL.refine(size=ty.size // self.arch.byte_width, arch=self.arch, is_fp=False)
|
|
718
761
|
|
|
@@ -991,7 +1034,8 @@ class SimCC:
|
|
|
991
1034
|
else:
|
|
992
1035
|
raise TypeError("PointerWrapper(buffer=True) can only be used with a bitvector or a bytestring")
|
|
993
1036
|
else:
|
|
994
|
-
|
|
1037
|
+
sub = ty.pts_to if isinstance(ty, SimTypePointer) else ty.refs
|
|
1038
|
+
child_type = SimTypeArray(sub) if isinstance(arg.value, (str, bytes, list)) else sub
|
|
995
1039
|
try:
|
|
996
1040
|
real_value = SimCC._standardize_value(arg.value, child_type, state, alloc)
|
|
997
1041
|
except TypeError as e: # this is a dangerous catch...
|
|
@@ -1003,32 +1047,34 @@ class SimCC:
|
|
|
1003
1047
|
|
|
1004
1048
|
if isinstance(arg, (str, bytes)):
|
|
1005
1049
|
# sanitize the argument and request standardization again with SimTypeArray
|
|
1006
|
-
if
|
|
1050
|
+
if isinstance(arg, str):
|
|
1007
1051
|
arg = arg.encode()
|
|
1008
1052
|
arg += b"\0"
|
|
1009
1053
|
if isinstance(ty, SimTypePointer) and isinstance(ty.pts_to, SimTypeChar):
|
|
1010
1054
|
pass
|
|
1011
|
-
elif isinstance(ty, SimTypeFixedSizeArray) and isinstance(ty.elem_type, SimTypeChar)
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
arg = arg.ljust(ty.length, b"\0")
|
|
1015
|
-
elif isinstance(ty, SimTypeArray) and isinstance(ty.elem_type, SimTypeChar):
|
|
1055
|
+
elif (isinstance(ty, SimTypeFixedSizeArray) and isinstance(ty.elem_type, SimTypeChar)) or (
|
|
1056
|
+
isinstance(ty, SimTypeArray) and isinstance(ty.elem_type, SimTypeChar)
|
|
1057
|
+
):
|
|
1016
1058
|
if ty.length is not None:
|
|
1017
1059
|
if len(arg) > ty.length:
|
|
1018
1060
|
raise TypeError(f"String {arg!r} is too long for {ty}")
|
|
1019
1061
|
arg = arg.ljust(ty.length, b"\0")
|
|
1020
1062
|
elif isinstance(ty, SimTypeString):
|
|
1021
|
-
if
|
|
1022
|
-
|
|
1023
|
-
|
|
1063
|
+
if ty.length is not None:
|
|
1064
|
+
if len(arg) > ty.length + 1:
|
|
1065
|
+
raise TypeError(f"String {arg!r} is too long for {ty}")
|
|
1066
|
+
arg = arg.ljust(ty.length + 1, b"\0")
|
|
1024
1067
|
else:
|
|
1025
1068
|
raise TypeError(f"Type mismatch: Expected {ty}, got char*")
|
|
1026
1069
|
return SimCC._standardize_value(list(arg), SimTypeArray(SimTypeChar(), len(arg)), state, alloc)
|
|
1027
1070
|
|
|
1028
1071
|
if isinstance(arg, list):
|
|
1029
|
-
if isinstance(ty,
|
|
1072
|
+
if isinstance(ty, SimTypePointer):
|
|
1030
1073
|
ref = True
|
|
1031
1074
|
subty = ty.pts_to
|
|
1075
|
+
elif isinstance(ty, SimTypeReference):
|
|
1076
|
+
ref = True
|
|
1077
|
+
subty = ty.refs
|
|
1032
1078
|
elif isinstance(ty, SimTypeArray):
|
|
1033
1079
|
ref = True
|
|
1034
1080
|
subty = ty.elem_type
|
|
@@ -1045,7 +1091,7 @@ class SimCC:
|
|
|
1045
1091
|
if isinstance(arg, (tuple, dict, SimStructValue)):
|
|
1046
1092
|
if not isinstance(ty, SimStruct):
|
|
1047
1093
|
raise TypeError(f"Type mismatch: Expected {ty}, got {type(arg)} (i.e. struct)")
|
|
1048
|
-
if
|
|
1094
|
+
if not isinstance(arg, SimStructValue):
|
|
1049
1095
|
if len(arg) != len(ty.fields):
|
|
1050
1096
|
raise TypeError(f"Wrong number of fields in struct, expected {len(ty.fields)} got {len(arg)}")
|
|
1051
1097
|
arg = SimStructValue(ty, arg)
|
|
@@ -1075,14 +1121,16 @@ class SimCC:
|
|
|
1075
1121
|
raise TypeError(f"Type mismatch: expected {ty}, got {arg.sort}")
|
|
1076
1122
|
return arg
|
|
1077
1123
|
if isinstance(ty, (SimTypeReg, SimTypeNum)):
|
|
1078
|
-
return arg.val_to_bv(ty.size, ty.signed)
|
|
1124
|
+
return arg.val_to_bv(ty.size, ty.signed if isinstance(ty, SimTypeNum) else False)
|
|
1079
1125
|
raise TypeError(f"Type mismatch: expected {ty}, got {arg.sort}")
|
|
1080
1126
|
|
|
1081
1127
|
if isinstance(arg, claripy.ast.BV):
|
|
1082
1128
|
if isinstance(ty, (SimTypeReg, SimTypeNum)):
|
|
1083
1129
|
if len(arg) != ty.size:
|
|
1084
1130
|
if arg.concrete:
|
|
1085
|
-
|
|
1131
|
+
size = ty.size
|
|
1132
|
+
assert size is not None
|
|
1133
|
+
return claripy.BVV(arg.concrete_value, size)
|
|
1086
1134
|
raise TypeError(f"Type mismatch of symbolic data: expected {ty}, got {len(arg)} bits")
|
|
1087
1135
|
return arg
|
|
1088
1136
|
if isinstance(ty, (SimTypeFloat)):
|
|
@@ -1101,7 +1149,7 @@ class SimCC:
|
|
|
1101
1149
|
return isinstance(other, self.__class__)
|
|
1102
1150
|
|
|
1103
1151
|
@classmethod
|
|
1104
|
-
def _match(cls, arch, args: list, sp_delta):
|
|
1152
|
+
def _match(cls, arch, args: list[SimRegArg | SimStackArg], sp_delta):
|
|
1105
1153
|
if (
|
|
1106
1154
|
cls.arches() is not None and ":" not in arch.name and not isinstance(arch, cls.arches())
|
|
1107
1155
|
): # pylint:disable=isinstance-second-argument-not-valid-type
|
|
@@ -1139,13 +1187,16 @@ class SimCC:
|
|
|
1139
1187
|
@classmethod
|
|
1140
1188
|
def _guess_arg_count(cls, args, limit: int = 64) -> int:
|
|
1141
1189
|
# pylint:disable=not-callable
|
|
1190
|
+
assert cls.ARCH is not None
|
|
1142
1191
|
stack_args = [a for a in args if isinstance(a, SimStackArg)]
|
|
1143
|
-
stack_arg_count = (
|
|
1192
|
+
stack_arg_count = (
|
|
1193
|
+
(max(a.stack_offset for a in stack_args) // cls.ARCH(archinfo.Endness.LE).bytes + 1) if stack_args else 0
|
|
1194
|
+
)
|
|
1144
1195
|
return min(limit, max(len(args), stack_arg_count))
|
|
1145
1196
|
|
|
1146
1197
|
@staticmethod
|
|
1147
1198
|
def find_cc(
|
|
1148
|
-
arch: archinfo.Arch, args:
|
|
1199
|
+
arch: archinfo.Arch, args: list[SimRegArg | SimStackArg], sp_delta: int, platform: str = "Linux"
|
|
1149
1200
|
) -> SimCC | None:
|
|
1150
1201
|
"""
|
|
1151
1202
|
Pinpoint the best-fit calling convention and return the corresponding SimCC instance, or None if no fit is
|
|
@@ -1229,7 +1280,7 @@ class SimCCUsercall(SimCC):
|
|
|
1229
1280
|
def next_arg(self, session, arg_type):
|
|
1230
1281
|
return next(session.real_args)
|
|
1231
1282
|
|
|
1232
|
-
def return_val(self, ty, **kwargs):
|
|
1283
|
+
def return_val(self, ty, **kwargs): # pylint: disable=unused-argument
|
|
1233
1284
|
return self.ret_loc
|
|
1234
1285
|
|
|
1235
1286
|
|
|
@@ -23,7 +23,6 @@ from angr.procedures import SIM_LIBRARIES
|
|
|
23
23
|
from angr.procedures.definitions import SimSyscallLibrary
|
|
24
24
|
from angr.protos import function_pb2
|
|
25
25
|
from angr.calling_conventions import DEFAULT_CC, default_cc
|
|
26
|
-
from angr.misc.ux import deprecated
|
|
27
26
|
from angr.sim_type import SimTypeFunction, parse_defns
|
|
28
27
|
from angr.calling_conventions import SimCC
|
|
29
28
|
from angr.project import Project
|
|
@@ -92,6 +91,10 @@ class Function(Serializable):
|
|
|
92
91
|
is_plt: bool | None = None,
|
|
93
92
|
returning=None,
|
|
94
93
|
alignment=False,
|
|
94
|
+
calling_convention: SimCC | None = None,
|
|
95
|
+
prototype: SimTypeFunction | None = None,
|
|
96
|
+
prototype_libname: str | None = None,
|
|
97
|
+
is_prototype_guessed: bool = True,
|
|
95
98
|
):
|
|
96
99
|
"""
|
|
97
100
|
Function constructor. If the optional parameters are not provided, they will be automatically determined upon
|
|
@@ -139,11 +142,11 @@ class Function(Serializable):
|
|
|
139
142
|
self.retaddr_on_stack = False
|
|
140
143
|
self.sp_delta = 0
|
|
141
144
|
# Calling convention
|
|
142
|
-
self.calling_convention
|
|
145
|
+
self.calling_convention = calling_convention
|
|
143
146
|
# Function prototype
|
|
144
|
-
self.prototype
|
|
145
|
-
self.prototype_libname
|
|
146
|
-
self.is_prototype_guessed
|
|
147
|
+
self.prototype = prototype
|
|
148
|
+
self.prototype_libname = prototype_libname
|
|
149
|
+
self.is_prototype_guessed = is_prototype_guessed
|
|
147
150
|
# Whether this function returns or not. `None` means it's not determined yet
|
|
148
151
|
self._returning = None
|
|
149
152
|
|
|
@@ -239,15 +242,6 @@ class Function(Serializable):
|
|
|
239
242
|
|
|
240
243
|
self._init_prototype_and_calling_convention()
|
|
241
244
|
|
|
242
|
-
@property
|
|
243
|
-
@deprecated(".is_alignment")
|
|
244
|
-
def alignment(self):
|
|
245
|
-
return self.is_alignment
|
|
246
|
-
|
|
247
|
-
@alignment.setter
|
|
248
|
-
def alignment(self, value):
|
|
249
|
-
self.is_alignment = value
|
|
250
|
-
|
|
251
245
|
@property
|
|
252
246
|
def name(self):
|
|
253
247
|
return self._name
|
|
@@ -357,7 +351,8 @@ class Function(Serializable):
|
|
|
357
351
|
# we know the size
|
|
358
352
|
size = self._block_sizes[addr]
|
|
359
353
|
|
|
360
|
-
|
|
354
|
+
assert self.project is not None
|
|
355
|
+
block = self.project.factory.block(addr, size=size, byte_string=byte_string)
|
|
361
356
|
if size is None:
|
|
362
357
|
# update block_size dict
|
|
363
358
|
self._block_sizes[addr] = block.size
|
|
@@ -460,18 +455,19 @@ class Function(Serializable):
|
|
|
460
455
|
"""
|
|
461
456
|
constants = set()
|
|
462
457
|
|
|
463
|
-
|
|
458
|
+
assert self.project is not None
|
|
459
|
+
if not self.project.loader.main_object.contains_addr(self.addr):
|
|
464
460
|
return constants
|
|
465
461
|
|
|
466
462
|
# FIXME the old way was better for architectures like mips, but we need the initial irsb
|
|
467
463
|
# reanalyze function with a new initial state (use persistent registers)
|
|
468
464
|
# initial_state = self._function_manager._cfg.get_any_irsb(self.addr).initial_state
|
|
469
|
-
# fresh_state = self.
|
|
465
|
+
# fresh_state = self.project.factory.blank_state(mode="fastpath")
|
|
470
466
|
# for reg in initial_state.arch.persistent_regs + ['ip']:
|
|
471
467
|
# fresh_state.registers.store(reg, initial_state.registers.load(reg))
|
|
472
468
|
|
|
473
469
|
# reanalyze function with a new initial state
|
|
474
|
-
fresh_state = self.
|
|
470
|
+
fresh_state = self.project.factory.blank_state(mode="fastpath")
|
|
475
471
|
fresh_state.regs.ip = self.addr
|
|
476
472
|
|
|
477
473
|
graph_addrs = {x.addr for x in self.graph.nodes() if isinstance(x, BlockNode)}
|
|
@@ -486,10 +482,10 @@ class Function(Serializable):
|
|
|
486
482
|
if state.solver.eval(state.ip) not in graph_addrs:
|
|
487
483
|
continue
|
|
488
484
|
# don't trace into simprocedures
|
|
489
|
-
if self.
|
|
485
|
+
if self.project.is_hooked(state.solver.eval(state.ip)):
|
|
490
486
|
continue
|
|
491
487
|
# don't trace outside of the binary
|
|
492
|
-
if not self.
|
|
488
|
+
if not self.project.loader.main_object.contains_addr(state.solver.eval(state.ip)):
|
|
493
489
|
continue
|
|
494
490
|
# don't trace unreachable blocks
|
|
495
491
|
if state.history.jumpkind in {
|
|
@@ -506,7 +502,7 @@ class Function(Serializable):
|
|
|
506
502
|
curr_ip = state.solver.eval(state.ip)
|
|
507
503
|
|
|
508
504
|
# get runtime values from logs of successors
|
|
509
|
-
successors = self.
|
|
505
|
+
successors = self.project.factory.successors(state)
|
|
510
506
|
for succ in successors.flat_successors + successors.unsat_successors:
|
|
511
507
|
for a in succ.history.recent_actions:
|
|
512
508
|
for ao in a.all_objects:
|
|
@@ -562,7 +558,7 @@ class Function(Serializable):
|
|
|
562
558
|
f" SP difference: {self.sp_delta}\n"
|
|
563
559
|
f" Has return: {self.has_return}\n"
|
|
564
560
|
f" Returning: {'Unknown' if self.returning is None else self.returning}\n"
|
|
565
|
-
f" Alignment: {self.
|
|
561
|
+
f" Alignment: {self.is_alignment}\n"
|
|
566
562
|
f" Arguments: reg: {self._argument_registers}, stack: {self._argument_stack_variables}\n"
|
|
567
563
|
f" Blocks: [{', '.join(f'{i:#x}' for i in self.block_addrs)}]\n"
|
|
568
564
|
f" Cyclomatic Complexity: {self.cyclomatic_complexity}\n"
|
|
@@ -612,7 +608,7 @@ class Function(Serializable):
|
|
|
612
608
|
|
|
613
609
|
@property
|
|
614
610
|
def size(self):
|
|
615
|
-
return sum(self._block_sizes.
|
|
611
|
+
return sum(self._block_sizes[addr] for addr in self._local_blocks)
|
|
616
612
|
|
|
617
613
|
@property
|
|
618
614
|
def binary(self):
|
|
@@ -620,8 +616,8 @@ class Function(Serializable):
|
|
|
620
616
|
Get the object this function belongs to.
|
|
621
617
|
:return: The object this function belongs to.
|
|
622
618
|
"""
|
|
623
|
-
|
|
624
|
-
return self.
|
|
619
|
+
assert self.project is not None
|
|
620
|
+
return self.project.loader.find_object_containing(self.addr, membership_check=False)
|
|
625
621
|
|
|
626
622
|
@property
|
|
627
623
|
def offset(self) -> int:
|
|
@@ -698,10 +694,12 @@ class Function(Serializable):
|
|
|
698
694
|
project = self.project
|
|
699
695
|
if project.is_hooked(addr):
|
|
700
696
|
hooker = project.hooked_by(addr)
|
|
701
|
-
|
|
697
|
+
if hooker is not None:
|
|
698
|
+
name = hooker.display_name
|
|
702
699
|
elif project.simos.is_syscall_addr(addr):
|
|
703
700
|
syscall_inst = project.simos.syscall_from_addr(addr)
|
|
704
|
-
|
|
701
|
+
if syscall_inst is not None:
|
|
702
|
+
name = syscall_inst.display_name
|
|
705
703
|
|
|
706
704
|
# generate an IDA-style sub_X name
|
|
707
705
|
if name is None:
|
|
@@ -1338,7 +1336,8 @@ class Function(Serializable):
|
|
|
1338
1336
|
|
|
1339
1337
|
@property
|
|
1340
1338
|
def callable(self):
|
|
1341
|
-
|
|
1339
|
+
assert self.project is not None
|
|
1340
|
+
return self.project.factory.callable(self.addr)
|
|
1342
1341
|
|
|
1343
1342
|
def normalize(self):
|
|
1344
1343
|
"""
|
|
@@ -1349,6 +1348,7 @@ class Function(Serializable):
|
|
|
1349
1348
|
|
|
1350
1349
|
:return: None
|
|
1351
1350
|
"""
|
|
1351
|
+
assert self.project is not None
|
|
1352
1352
|
|
|
1353
1353
|
# let's put a check here
|
|
1354
1354
|
if self.startpoint is None:
|
|
@@ -1377,8 +1377,8 @@ class Function(Serializable):
|
|
|
1377
1377
|
|
|
1378
1378
|
# Break other nodes
|
|
1379
1379
|
for n in other_nodes:
|
|
1380
|
-
new_size = get_real_address_if_arm(self.
|
|
1381
|
-
self.
|
|
1380
|
+
new_size = get_real_address_if_arm(self.project.arch, smallest_node.addr) - get_real_address_if_arm(
|
|
1381
|
+
self.project.arch, n.addr
|
|
1382
1382
|
)
|
|
1383
1383
|
if new_size == 0:
|
|
1384
1384
|
# This is the node that has the same size as the smallest one
|
|
@@ -1511,20 +1511,21 @@ class Function(Serializable):
|
|
|
1511
1511
|
lib = SIM_LIBRARIES.get(binary_name, None)
|
|
1512
1512
|
libraries = set()
|
|
1513
1513
|
if lib is not None:
|
|
1514
|
-
libraries.
|
|
1514
|
+
libraries.update(lib)
|
|
1515
1515
|
|
|
1516
1516
|
else:
|
|
1517
1517
|
# try all libraries or all libraries that match the given library name hint
|
|
1518
1518
|
libraries = set()
|
|
1519
|
-
for lib_name,
|
|
1519
|
+
for lib_name, libs in SIM_LIBRARIES.items():
|
|
1520
1520
|
# TODO: Add support for syscall libraries. Note that syscall libraries have different function
|
|
1521
1521
|
# prototypes for .has_prototype() and .get_prototype()...
|
|
1522
|
-
|
|
1523
|
-
if
|
|
1524
|
-
if binary_name_hint
|
|
1522
|
+
for lib in libs:
|
|
1523
|
+
if not isinstance(lib, SimSyscallLibrary):
|
|
1524
|
+
if binary_name_hint:
|
|
1525
|
+
if binary_name_hint.lower() in lib_name.lower():
|
|
1526
|
+
libraries.add(lib)
|
|
1527
|
+
else:
|
|
1525
1528
|
libraries.add(lib)
|
|
1526
|
-
else:
|
|
1527
|
-
libraries.add(lib)
|
|
1528
1529
|
|
|
1529
1530
|
if not libraries:
|
|
1530
1531
|
return False
|
|
@@ -1597,6 +1598,7 @@ class Function(Serializable):
|
|
|
1597
1598
|
::<addr>::<name> when the function binary is an unnamed non-main object, or when multiple functions with
|
|
1598
1599
|
the same name are defined in the function binary.
|
|
1599
1600
|
"""
|
|
1601
|
+
assert self.project is not None
|
|
1600
1602
|
must_disambiguate_by_addr = self.binary is not self.project.loader.main_object and self.binary_name is None
|
|
1601
1603
|
|
|
1602
1604
|
# If there are multiple functions with the same name in the same object, disambiguate by address
|
|
@@ -1615,6 +1617,7 @@ class Function(Serializable):
|
|
|
1615
1617
|
return n + (display_name or self.name)
|
|
1616
1618
|
|
|
1617
1619
|
def apply_definition(self, definition: str, calling_convention: SimCC | type[SimCC] | None = None) -> None:
|
|
1620
|
+
assert self.project is not None
|
|
1618
1621
|
if not definition.endswith(";"):
|
|
1619
1622
|
definition += ";"
|
|
1620
1623
|
func_def = parse_defns(definition, arch=self.project.arch)
|
|
@@ -1677,7 +1680,7 @@ class Function(Serializable):
|
|
|
1677
1680
|
func.calling_convention = self.calling_convention
|
|
1678
1681
|
func.prototype = self.prototype
|
|
1679
1682
|
func._returning = self._returning
|
|
1680
|
-
func.
|
|
1683
|
+
func.is_alignment = self.is_alignment
|
|
1681
1684
|
func.startpoint = self.startpoint
|
|
1682
1685
|
func._addr_to_block_node = self._addr_to_block_node.copy()
|
|
1683
1686
|
func._block_sizes = self._block_sizes.copy()
|
|
@@ -505,6 +505,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
505
505
|
|
|
506
506
|
def rebuild_callgraph(self):
|
|
507
507
|
self.callgraph = networkx.MultiDiGraph()
|
|
508
|
+
cfg = self._kb.cfgs.get_most_accurate()
|
|
508
509
|
for func_addr in self._function_map:
|
|
509
510
|
self.callgraph.add_node(func_addr)
|
|
510
511
|
for func in self._function_map.values():
|
|
@@ -512,6 +513,14 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
512
513
|
for node in func.transition_graph.nodes():
|
|
513
514
|
if isinstance(node, Function):
|
|
514
515
|
self.callgraph.add_edge(func.addr, node.addr)
|
|
516
|
+
else:
|
|
517
|
+
cfgnode = cfg.get_any_node(node.addr)
|
|
518
|
+
if (
|
|
519
|
+
cfgnode is not None
|
|
520
|
+
and cfgnode.function_address is not None
|
|
521
|
+
and cfgnode.function_address != func.addr
|
|
522
|
+
):
|
|
523
|
+
self.callgraph.add_edge(func.addr, cfgnode.function_address)
|
|
515
524
|
|
|
516
525
|
|
|
517
526
|
KnowledgeBasePlugin.register_default("functions", FunctionManager)
|