angr 9.2.147__py3-none-manylinux2014_aarch64.whl → 9.2.149__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/analysis.py +3 -11
- angr/analyses/calling_convention/calling_convention.py +42 -2
- angr/analyses/calling_convention/fact_collector.py +5 -4
- angr/analyses/calling_convention/utils.py +1 -0
- angr/analyses/cfg/cfg_base.py +3 -59
- angr/analyses/cfg/cfg_emulated.py +17 -14
- angr/analyses/cfg/cfg_fast.py +68 -63
- angr/analyses/cfg/cfg_fast_soot.py +3 -3
- angr/analyses/decompiler/ail_simplifier.py +65 -32
- angr/analyses/decompiler/block_simplifier.py +20 -6
- angr/analyses/decompiler/callsite_maker.py +28 -18
- angr/analyses/decompiler/clinic.py +84 -17
- angr/analyses/decompiler/condition_processor.py +0 -21
- angr/analyses/decompiler/counters/call_counter.py +3 -0
- angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
- angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
- angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
- angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
- angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
- angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
- angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +14 -0
- angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
- angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
- angr/analyses/decompiler/presets/fast.py +2 -0
- angr/analyses/decompiler/presets/full.py +2 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
- angr/analyses/decompiler/ssailification/ssailification.py +23 -3
- angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
- angr/analyses/decompiler/structured_codegen/c.py +146 -15
- angr/analyses/decompiler/structuring/phoenix.py +11 -3
- angr/analyses/decompiler/utils.py +6 -1
- angr/analyses/deobfuscator/api_obf_finder.py +5 -1
- angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +1 -1
- angr/analyses/forward_analysis/visitors/graph.py +0 -8
- angr/analyses/identifier/runner.py +1 -1
- angr/analyses/reaching_definitions/function_handler.py +4 -4
- angr/analyses/reassembler.py +1 -1
- angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
- angr/analyses/stack_pointer_tracker.py +1 -1
- angr/analyses/static_hooker.py +11 -9
- angr/analyses/typehoon/lifter.py +20 -0
- angr/analyses/typehoon/simple_solver.py +42 -9
- angr/analyses/typehoon/translator.py +4 -1
- angr/analyses/typehoon/typeconsts.py +17 -6
- angr/analyses/typehoon/typehoon.py +21 -5
- angr/analyses/variable_recovery/engine_ail.py +52 -13
- angr/analyses/variable_recovery/engine_base.py +37 -12
- angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
- angr/calling_conventions.py +96 -27
- angr/engines/light/engine.py +7 -0
- angr/exploration_techniques/director.py +1 -1
- angr/knowledge_plugins/functions/function.py +109 -38
- angr/knowledge_plugins/functions/function_manager.py +9 -0
- angr/knowledge_plugins/functions/function_parser.py +9 -1
- angr/knowledge_plugins/functions/soot_function.py +1 -1
- angr/knowledge_plugins/key_definitions/key_definition_manager.py +1 -1
- angr/knowledge_plugins/propagations/states.py +5 -2
- angr/knowledge_plugins/variables/variable_manager.py +3 -3
- angr/procedures/definitions/__init__.py +15 -12
- angr/procedures/definitions/types_stl.py +22 -0
- angr/procedures/stubs/format_parser.py +1 -1
- angr/project.py +23 -29
- angr/protos/cfg_pb2.py +14 -25
- angr/protos/function_pb2.py +11 -22
- angr/protos/primitives_pb2.py +36 -47
- angr/protos/variables_pb2.py +28 -39
- angr/protos/xrefs_pb2.py +8 -19
- angr/sim_type.py +251 -146
- angr/simos/cgc.py +1 -1
- angr/simos/linux.py +5 -5
- angr/simos/windows.py +5 -5
- angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +1 -1
- {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/METADATA +9 -8
- {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/RECORD +90 -84
- {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
- {angr-9.2.147.dist-info → angr-9.2.149.dist-info/licenses}/LICENSE +3 -0
- {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
- {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
|
@@ -23,10 +23,10 @@ from angr.procedures import SIM_LIBRARIES
|
|
|
23
23
|
from angr.procedures.definitions import SimSyscallLibrary
|
|
24
24
|
from angr.protos import function_pb2
|
|
25
25
|
from angr.calling_conventions import DEFAULT_CC, default_cc
|
|
26
|
-
from angr.misc.ux import deprecated
|
|
27
26
|
from angr.sim_type import SimTypeFunction, parse_defns
|
|
28
27
|
from angr.calling_conventions import SimCC
|
|
29
28
|
from angr.project import Project
|
|
29
|
+
from angr.utils.library import get_cpp_function_name
|
|
30
30
|
from .function_parser import FunctionParser
|
|
31
31
|
|
|
32
32
|
l = logging.getLogger(name=__name__)
|
|
@@ -92,6 +92,10 @@ class Function(Serializable):
|
|
|
92
92
|
is_plt: bool | None = None,
|
|
93
93
|
returning=None,
|
|
94
94
|
alignment=False,
|
|
95
|
+
calling_convention: SimCC | None = None,
|
|
96
|
+
prototype: SimTypeFunction | None = None,
|
|
97
|
+
prototype_libname: str | None = None,
|
|
98
|
+
is_prototype_guessed: bool = True,
|
|
95
99
|
):
|
|
96
100
|
"""
|
|
97
101
|
Function constructor. If the optional parameters are not provided, they will be automatically determined upon
|
|
@@ -139,11 +143,11 @@ class Function(Serializable):
|
|
|
139
143
|
self.retaddr_on_stack = False
|
|
140
144
|
self.sp_delta = 0
|
|
141
145
|
# Calling convention
|
|
142
|
-
self.calling_convention
|
|
146
|
+
self.calling_convention = calling_convention
|
|
143
147
|
# Function prototype
|
|
144
|
-
self.prototype
|
|
145
|
-
self.prototype_libname
|
|
146
|
-
self.is_prototype_guessed
|
|
148
|
+
self.prototype = prototype
|
|
149
|
+
self.prototype_libname = prototype_libname
|
|
150
|
+
self.is_prototype_guessed = is_prototype_guessed
|
|
147
151
|
# Whether this function returns or not. `None` means it's not determined yet
|
|
148
152
|
self._returning = None
|
|
149
153
|
|
|
@@ -239,15 +243,6 @@ class Function(Serializable):
|
|
|
239
243
|
|
|
240
244
|
self._init_prototype_and_calling_convention()
|
|
241
245
|
|
|
242
|
-
@property
|
|
243
|
-
@deprecated(".is_alignment")
|
|
244
|
-
def alignment(self):
|
|
245
|
-
return self.is_alignment
|
|
246
|
-
|
|
247
|
-
@alignment.setter
|
|
248
|
-
def alignment(self, value):
|
|
249
|
-
self.is_alignment = value
|
|
250
|
-
|
|
251
246
|
@property
|
|
252
247
|
def name(self):
|
|
253
248
|
return self._name
|
|
@@ -357,7 +352,8 @@ class Function(Serializable):
|
|
|
357
352
|
# we know the size
|
|
358
353
|
size = self._block_sizes[addr]
|
|
359
354
|
|
|
360
|
-
|
|
355
|
+
assert self.project is not None
|
|
356
|
+
block = self.project.factory.block(addr, size=size, byte_string=byte_string)
|
|
361
357
|
if size is None:
|
|
362
358
|
# update block_size dict
|
|
363
359
|
self._block_sizes[addr] = block.size
|
|
@@ -460,18 +456,19 @@ class Function(Serializable):
|
|
|
460
456
|
"""
|
|
461
457
|
constants = set()
|
|
462
458
|
|
|
463
|
-
|
|
459
|
+
assert self.project is not None
|
|
460
|
+
if not self.project.loader.main_object.contains_addr(self.addr):
|
|
464
461
|
return constants
|
|
465
462
|
|
|
466
463
|
# FIXME the old way was better for architectures like mips, but we need the initial irsb
|
|
467
464
|
# reanalyze function with a new initial state (use persistent registers)
|
|
468
465
|
# initial_state = self._function_manager._cfg.get_any_irsb(self.addr).initial_state
|
|
469
|
-
# fresh_state = self.
|
|
466
|
+
# fresh_state = self.project.factory.blank_state(mode="fastpath")
|
|
470
467
|
# for reg in initial_state.arch.persistent_regs + ['ip']:
|
|
471
468
|
# fresh_state.registers.store(reg, initial_state.registers.load(reg))
|
|
472
469
|
|
|
473
470
|
# reanalyze function with a new initial state
|
|
474
|
-
fresh_state = self.
|
|
471
|
+
fresh_state = self.project.factory.blank_state(mode="fastpath")
|
|
475
472
|
fresh_state.regs.ip = self.addr
|
|
476
473
|
|
|
477
474
|
graph_addrs = {x.addr for x in self.graph.nodes() if isinstance(x, BlockNode)}
|
|
@@ -486,10 +483,10 @@ class Function(Serializable):
|
|
|
486
483
|
if state.solver.eval(state.ip) not in graph_addrs:
|
|
487
484
|
continue
|
|
488
485
|
# don't trace into simprocedures
|
|
489
|
-
if self.
|
|
486
|
+
if self.project.is_hooked(state.solver.eval(state.ip)):
|
|
490
487
|
continue
|
|
491
488
|
# don't trace outside of the binary
|
|
492
|
-
if not self.
|
|
489
|
+
if not self.project.loader.main_object.contains_addr(state.solver.eval(state.ip)):
|
|
493
490
|
continue
|
|
494
491
|
# don't trace unreachable blocks
|
|
495
492
|
if state.history.jumpkind in {
|
|
@@ -506,7 +503,7 @@ class Function(Serializable):
|
|
|
506
503
|
curr_ip = state.solver.eval(state.ip)
|
|
507
504
|
|
|
508
505
|
# get runtime values from logs of successors
|
|
509
|
-
successors = self.
|
|
506
|
+
successors = self.project.factory.successors(state)
|
|
510
507
|
for succ in successors.flat_successors + successors.unsat_successors:
|
|
511
508
|
for a in succ.history.recent_actions:
|
|
512
509
|
for ao in a.all_objects:
|
|
@@ -562,7 +559,7 @@ class Function(Serializable):
|
|
|
562
559
|
f" SP difference: {self.sp_delta}\n"
|
|
563
560
|
f" Has return: {self.has_return}\n"
|
|
564
561
|
f" Returning: {'Unknown' if self.returning is None else self.returning}\n"
|
|
565
|
-
f" Alignment: {self.
|
|
562
|
+
f" Alignment: {self.is_alignment}\n"
|
|
566
563
|
f" Arguments: reg: {self._argument_registers}, stack: {self._argument_stack_variables}\n"
|
|
567
564
|
f" Blocks: [{', '.join(f'{i:#x}' for i in self.block_addrs)}]\n"
|
|
568
565
|
f" Cyclomatic Complexity: {self.cyclomatic_complexity}\n"
|
|
@@ -612,7 +609,7 @@ class Function(Serializable):
|
|
|
612
609
|
|
|
613
610
|
@property
|
|
614
611
|
def size(self):
|
|
615
|
-
return sum(self._block_sizes.
|
|
612
|
+
return sum(self._block_sizes[addr] for addr in self._local_blocks)
|
|
616
613
|
|
|
617
614
|
@property
|
|
618
615
|
def binary(self):
|
|
@@ -620,8 +617,8 @@ class Function(Serializable):
|
|
|
620
617
|
Get the object this function belongs to.
|
|
621
618
|
:return: The object this function belongs to.
|
|
622
619
|
"""
|
|
623
|
-
|
|
624
|
-
return self.
|
|
620
|
+
assert self.project is not None
|
|
621
|
+
return self.project.loader.find_object_containing(self.addr, membership_check=False)
|
|
625
622
|
|
|
626
623
|
@property
|
|
627
624
|
def offset(self) -> int:
|
|
@@ -698,10 +695,12 @@ class Function(Serializable):
|
|
|
698
695
|
project = self.project
|
|
699
696
|
if project.is_hooked(addr):
|
|
700
697
|
hooker = project.hooked_by(addr)
|
|
701
|
-
|
|
698
|
+
if hooker is not None:
|
|
699
|
+
name = hooker.display_name
|
|
702
700
|
elif project.simos.is_syscall_addr(addr):
|
|
703
701
|
syscall_inst = project.simos.syscall_from_addr(addr)
|
|
704
|
-
|
|
702
|
+
if syscall_inst is not None:
|
|
703
|
+
name = syscall_inst.display_name
|
|
705
704
|
|
|
706
705
|
# generate an IDA-style sub_X name
|
|
707
706
|
if name is None:
|
|
@@ -1338,7 +1337,8 @@ class Function(Serializable):
|
|
|
1338
1337
|
|
|
1339
1338
|
@property
|
|
1340
1339
|
def callable(self):
|
|
1341
|
-
|
|
1340
|
+
assert self.project is not None
|
|
1341
|
+
return self.project.factory.callable(self.addr)
|
|
1342
1342
|
|
|
1343
1343
|
def normalize(self):
|
|
1344
1344
|
"""
|
|
@@ -1349,6 +1349,7 @@ class Function(Serializable):
|
|
|
1349
1349
|
|
|
1350
1350
|
:return: None
|
|
1351
1351
|
"""
|
|
1352
|
+
assert self.project is not None
|
|
1352
1353
|
|
|
1353
1354
|
# let's put a check here
|
|
1354
1355
|
if self.startpoint is None:
|
|
@@ -1377,8 +1378,8 @@ class Function(Serializable):
|
|
|
1377
1378
|
|
|
1378
1379
|
# Break other nodes
|
|
1379
1380
|
for n in other_nodes:
|
|
1380
|
-
new_size = get_real_address_if_arm(self.
|
|
1381
|
-
self.
|
|
1381
|
+
new_size = get_real_address_if_arm(self.project.arch, smallest_node.addr) - get_real_address_if_arm(
|
|
1382
|
+
self.project.arch, n.addr
|
|
1382
1383
|
)
|
|
1383
1384
|
if new_size == 0:
|
|
1384
1385
|
# This is the node that has the same size as the smallest one
|
|
@@ -1511,20 +1512,21 @@ class Function(Serializable):
|
|
|
1511
1512
|
lib = SIM_LIBRARIES.get(binary_name, None)
|
|
1512
1513
|
libraries = set()
|
|
1513
1514
|
if lib is not None:
|
|
1514
|
-
libraries.
|
|
1515
|
+
libraries.update(lib)
|
|
1515
1516
|
|
|
1516
1517
|
else:
|
|
1517
1518
|
# try all libraries or all libraries that match the given library name hint
|
|
1518
1519
|
libraries = set()
|
|
1519
|
-
for lib_name,
|
|
1520
|
+
for lib_name, libs in SIM_LIBRARIES.items():
|
|
1520
1521
|
# TODO: Add support for syscall libraries. Note that syscall libraries have different function
|
|
1521
1522
|
# prototypes for .has_prototype() and .get_prototype()...
|
|
1522
|
-
|
|
1523
|
-
if
|
|
1524
|
-
if binary_name_hint
|
|
1523
|
+
for lib in libs:
|
|
1524
|
+
if not isinstance(lib, SimSyscallLibrary):
|
|
1525
|
+
if binary_name_hint:
|
|
1526
|
+
if binary_name_hint.lower() in lib_name.lower():
|
|
1527
|
+
libraries.add(lib)
|
|
1528
|
+
else:
|
|
1525
1529
|
libraries.add(lib)
|
|
1526
|
-
else:
|
|
1527
|
-
libraries.add(lib)
|
|
1528
1530
|
|
|
1529
1531
|
if not libraries:
|
|
1530
1532
|
return False
|
|
@@ -1581,11 +1583,78 @@ class Function(Serializable):
|
|
|
1581
1583
|
# int, long
|
|
1582
1584
|
return addr
|
|
1583
1585
|
|
|
1586
|
+
def is_rust_function(self):
|
|
1587
|
+
ast = pydemumble.demangle(self.name)
|
|
1588
|
+
if ast:
|
|
1589
|
+
nodes = ast.split("::")
|
|
1590
|
+
if len(nodes) >= 2:
|
|
1591
|
+
last_node = nodes[-1]
|
|
1592
|
+
return (
|
|
1593
|
+
len(last_node) == 17
|
|
1594
|
+
and last_node.startswith("h")
|
|
1595
|
+
and all(c in "0123456789abcdef" for c in last_node[1:])
|
|
1596
|
+
)
|
|
1597
|
+
return False
|
|
1598
|
+
|
|
1599
|
+
@staticmethod
|
|
1600
|
+
def _rust_fmt_node(node):
|
|
1601
|
+
result = []
|
|
1602
|
+
rest = node
|
|
1603
|
+
if rest.startswith("_$"):
|
|
1604
|
+
rest = rest[1:]
|
|
1605
|
+
while True:
|
|
1606
|
+
if rest.startswith("."):
|
|
1607
|
+
if len(rest) > 1 and rest[1] == ".":
|
|
1608
|
+
result.append("::")
|
|
1609
|
+
rest = rest[2:]
|
|
1610
|
+
else:
|
|
1611
|
+
result.append(".")
|
|
1612
|
+
rest = rest[1:]
|
|
1613
|
+
elif rest.startswith("$"):
|
|
1614
|
+
if "$" in rest[1:]:
|
|
1615
|
+
escape, rest = rest[1:].split("$", 1)
|
|
1616
|
+
else:
|
|
1617
|
+
break
|
|
1618
|
+
|
|
1619
|
+
unescaped = {"SP": "@", "BP": "*", "RF": "&", "LT": "<", "GT": ">", "LP": "(", "RP": ")", "C": ","}.get(
|
|
1620
|
+
escape
|
|
1621
|
+
)
|
|
1622
|
+
|
|
1623
|
+
if unescaped is None and escape.startswith("u"):
|
|
1624
|
+
digits = escape[1:]
|
|
1625
|
+
if all(c in "0123456789abcdef" for c in digits):
|
|
1626
|
+
c = chr(int(digits, 16))
|
|
1627
|
+
if ord(c) >= 32 and ord(c) != 127:
|
|
1628
|
+
result.append(c)
|
|
1629
|
+
continue
|
|
1630
|
+
if unescaped:
|
|
1631
|
+
result.append(unescaped)
|
|
1632
|
+
else:
|
|
1633
|
+
break
|
|
1634
|
+
else:
|
|
1635
|
+
idx = min((rest.find(c) for c in "$." if c in rest), default=len(rest))
|
|
1636
|
+
result.append(rest[:idx])
|
|
1637
|
+
rest = rest[idx:]
|
|
1638
|
+
if not rest:
|
|
1639
|
+
break
|
|
1640
|
+
return "".join(result)
|
|
1641
|
+
|
|
1584
1642
|
@property
|
|
1585
1643
|
def demangled_name(self):
|
|
1586
1644
|
ast = pydemumble.demangle(self.name)
|
|
1645
|
+
if self.is_rust_function():
|
|
1646
|
+
nodes = ast.split("::")[:-1]
|
|
1647
|
+
ast = "::".join([Function._rust_fmt_node(node) for node in nodes])
|
|
1587
1648
|
return ast if ast else self.name
|
|
1588
1649
|
|
|
1650
|
+
@property
|
|
1651
|
+
def short_name(self):
|
|
1652
|
+
if self.is_rust_function():
|
|
1653
|
+
ast = pydemumble.demangle(self.name)
|
|
1654
|
+
return Function._rust_fmt_node(ast.split("::")[-2])
|
|
1655
|
+
func_name = get_cpp_function_name(self.demangled_name, specialized=False, qualified=True)
|
|
1656
|
+
return func_name.split("::")[-1]
|
|
1657
|
+
|
|
1589
1658
|
def get_unambiguous_name(self, display_name: str | None = None) -> str:
|
|
1590
1659
|
"""
|
|
1591
1660
|
Get a disambiguated function name.
|
|
@@ -1597,6 +1666,7 @@ class Function(Serializable):
|
|
|
1597
1666
|
::<addr>::<name> when the function binary is an unnamed non-main object, or when multiple functions with
|
|
1598
1667
|
the same name are defined in the function binary.
|
|
1599
1668
|
"""
|
|
1669
|
+
assert self.project is not None
|
|
1600
1670
|
must_disambiguate_by_addr = self.binary is not self.project.loader.main_object and self.binary_name is None
|
|
1601
1671
|
|
|
1602
1672
|
# If there are multiple functions with the same name in the same object, disambiguate by address
|
|
@@ -1615,6 +1685,7 @@ class Function(Serializable):
|
|
|
1615
1685
|
return n + (display_name or self.name)
|
|
1616
1686
|
|
|
1617
1687
|
def apply_definition(self, definition: str, calling_convention: SimCC | type[SimCC] | None = None) -> None:
|
|
1688
|
+
assert self.project is not None
|
|
1618
1689
|
if not definition.endswith(";"):
|
|
1619
1690
|
definition += ";"
|
|
1620
1691
|
func_def = parse_defns(definition, arch=self.project.arch)
|
|
@@ -1677,7 +1748,7 @@ class Function(Serializable):
|
|
|
1677
1748
|
func.calling_convention = self.calling_convention
|
|
1678
1749
|
func.prototype = self.prototype
|
|
1679
1750
|
func._returning = self._returning
|
|
1680
|
-
func.
|
|
1751
|
+
func.is_alignment = self.is_alignment
|
|
1681
1752
|
func.startpoint = self.startpoint
|
|
1682
1753
|
func._addr_to_block_node = self._addr_to_block_node.copy()
|
|
1683
1754
|
func._block_sizes = self._block_sizes.copy()
|
|
@@ -505,6 +505,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
505
505
|
|
|
506
506
|
def rebuild_callgraph(self):
|
|
507
507
|
self.callgraph = networkx.MultiDiGraph()
|
|
508
|
+
cfg = self._kb.cfgs.get_most_accurate()
|
|
508
509
|
for func_addr in self._function_map:
|
|
509
510
|
self.callgraph.add_node(func_addr)
|
|
510
511
|
for func in self._function_map.values():
|
|
@@ -512,6 +513,14 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
512
513
|
for node in func.transition_graph.nodes():
|
|
513
514
|
if isinstance(node, Function):
|
|
514
515
|
self.callgraph.add_edge(func.addr, node.addr)
|
|
516
|
+
else:
|
|
517
|
+
cfgnode = cfg.get_any_node(node.addr)
|
|
518
|
+
if (
|
|
519
|
+
cfgnode is not None
|
|
520
|
+
and cfgnode.function_address is not None
|
|
521
|
+
and cfgnode.function_address != func.addr
|
|
522
|
+
):
|
|
523
|
+
self.callgraph.add_edge(func.addr, cfgnode.function_address)
|
|
515
524
|
|
|
516
525
|
|
|
517
526
|
KnowledgeBasePlugin.register_default("functions", FunctionManager)
|
|
@@ -36,6 +36,10 @@ class FunctionParser:
|
|
|
36
36
|
obj.alignment = function.is_alignment
|
|
37
37
|
obj.binary_name = function.binary_name or ""
|
|
38
38
|
obj.normalized = function.normalized
|
|
39
|
+
obj.calling_convention = pickle.dumps(function.calling_convention)
|
|
40
|
+
obj.prototype = pickle.dumps(function.prototype)
|
|
41
|
+
obj.prototype_libname = (function.prototype_libname or "").encode()
|
|
42
|
+
obj.is_prototype_guessed = function.is_prototype_guessed
|
|
39
43
|
|
|
40
44
|
# signature matched?
|
|
41
45
|
if not function.from_signature:
|
|
@@ -107,6 +111,10 @@ class FunctionParser:
|
|
|
107
111
|
returning=cmsg.returning,
|
|
108
112
|
alignment=cmsg.alignment,
|
|
109
113
|
binary_name=None if not cmsg.binary_name else cmsg.binary_name,
|
|
114
|
+
calling_convention=pickle.loads(cmsg.calling_convention),
|
|
115
|
+
prototype=pickle.loads(cmsg.prototype),
|
|
116
|
+
prototype_libname=cmsg.prototype_libname if cmsg.prototype_libname else None,
|
|
117
|
+
is_prototype_guessed=cmsg.is_prototype_guessed,
|
|
110
118
|
)
|
|
111
119
|
obj._project = project
|
|
112
120
|
obj.normalized = cmsg.normalized
|
|
@@ -209,7 +217,7 @@ class FunctionParser:
|
|
|
209
217
|
stmt_idx=stmt_idx,
|
|
210
218
|
is_exception=edge_type == "exception",
|
|
211
219
|
)
|
|
212
|
-
elif edge_type
|
|
220
|
+
elif edge_type in ("call", "syscall"):
|
|
213
221
|
# find the corresponding fake_ret edge
|
|
214
222
|
fake_ret_edge = next(
|
|
215
223
|
iter(edge_ for edge_ in fake_return_edges[src_addr] if edge_[1].addr == src.addr + src.size), None
|
|
@@ -83,7 +83,7 @@ class SootFunction(Function):
|
|
|
83
83
|
# Whether this function returns or not. `None` means it's not determined yet
|
|
84
84
|
self._returning = None
|
|
85
85
|
|
|
86
|
-
self.
|
|
86
|
+
self.is_alignment = None
|
|
87
87
|
|
|
88
88
|
# Determine returning status for SimProcedures and Syscalls
|
|
89
89
|
hooker = None
|
|
@@ -51,7 +51,7 @@ class KeyDefinitionManager(KnowledgeBasePlugin):
|
|
|
51
51
|
if not self._kb.functions.contains_addr(func_addr):
|
|
52
52
|
return None
|
|
53
53
|
func = self._kb.functions[func_addr]
|
|
54
|
-
if func.is_simprocedure or func.is_plt or func.
|
|
54
|
+
if func.is_simprocedure or func.is_plt or func.is_alignment:
|
|
55
55
|
return None
|
|
56
56
|
callsites = list(func.get_call_sites())
|
|
57
57
|
if not callsites:
|
|
@@ -527,12 +527,14 @@ class Equivalence:
|
|
|
527
527
|
"atom0",
|
|
528
528
|
"atom1",
|
|
529
529
|
"codeloc",
|
|
530
|
+
"is_weakassignment",
|
|
530
531
|
)
|
|
531
532
|
|
|
532
|
-
def __init__(self, codeloc, atom0, atom1):
|
|
533
|
+
def __init__(self, codeloc, atom0, atom1, is_weakassignment: bool = False):
|
|
533
534
|
self.codeloc = codeloc
|
|
534
535
|
self.atom0 = atom0
|
|
535
536
|
self.atom1 = atom1
|
|
537
|
+
self.is_weakassignment = is_weakassignment
|
|
536
538
|
|
|
537
539
|
def __repr__(self):
|
|
538
540
|
return f"<Eq@{self.codeloc!r}: {self.atom0!r}=={self.atom1!r}>"
|
|
@@ -543,7 +545,8 @@ class Equivalence:
|
|
|
543
545
|
and other.codeloc == self.codeloc
|
|
544
546
|
and other.atom0 == self.atom0
|
|
545
547
|
and other.atom1 == self.atom1
|
|
548
|
+
and other.is_weakassignment == self.is_weakassignment
|
|
546
549
|
)
|
|
547
550
|
|
|
548
551
|
def __hash__(self):
|
|
549
|
-
return hash((Equivalence, self.codeloc, self.atom0, self.atom1))
|
|
552
|
+
return hash((Equivalence, self.codeloc, self.atom0, self.atom1, self.is_weakassignment))
|
|
@@ -934,7 +934,7 @@ class VariableManagerInternal(Serializable):
|
|
|
934
934
|
|
|
935
935
|
for var in chain(sorted_stack_variables, sorted_reg_variables, phi_only_vars):
|
|
936
936
|
idx = next(var_ctr)
|
|
937
|
-
if var.name is not None and not reset:
|
|
937
|
+
if var.name is not None and var.name != var.ident and not reset:
|
|
938
938
|
continue
|
|
939
939
|
if isinstance(var, (SimStackVariable, SimRegisterVariable)):
|
|
940
940
|
var.name = f"v{idx}"
|
|
@@ -946,7 +946,7 @@ class VariableManagerInternal(Serializable):
|
|
|
946
946
|
arg_vars = sorted(arg_vars, key=lambda v: _id_from_varident(v.ident))
|
|
947
947
|
for var in arg_vars:
|
|
948
948
|
idx = next(arg_ctr)
|
|
949
|
-
if var.name is not None and not reset:
|
|
949
|
+
if var.name is not None and var.name != var.ident and not reset:
|
|
950
950
|
continue
|
|
951
951
|
var.name = arg_names[idx] if arg_names else f"a{idx}"
|
|
952
952
|
var._hash = None
|
|
@@ -1040,7 +1040,7 @@ class VariableManagerInternal(Serializable):
|
|
|
1040
1040
|
reg_vars: set[SimRegisterVariable] = set()
|
|
1041
1041
|
|
|
1042
1042
|
# unify stack variables based on their locations
|
|
1043
|
-
for v in self.get_variables():
|
|
1043
|
+
for v in self.get_variables() + list(self._phi_variables):
|
|
1044
1044
|
if v in self._variables_to_unified_variables:
|
|
1045
1045
|
# do not unify twice
|
|
1046
1046
|
continue
|
|
@@ -25,7 +25,7 @@ if TYPE_CHECKING:
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
l = logging.getLogger(name=__name__)
|
|
28
|
-
SIM_LIBRARIES: dict[str, SimLibrary] = {}
|
|
28
|
+
SIM_LIBRARIES: dict[str, list[SimLibrary]] = {}
|
|
29
29
|
SIM_TYPE_COLLECTIONS: dict[str, SimTypeCollection] = {}
|
|
30
30
|
|
|
31
31
|
|
|
@@ -38,8 +38,8 @@ class SimTypeCollection:
|
|
|
38
38
|
self.names: list[str] | None = None
|
|
39
39
|
self.types: dict[str, SimType] = {}
|
|
40
40
|
|
|
41
|
-
def set_names(self, *names):
|
|
42
|
-
self.names = names
|
|
41
|
+
def set_names(self, *names: str):
|
|
42
|
+
self.names = list(names)
|
|
43
43
|
for name in names:
|
|
44
44
|
SIM_TYPE_COLLECTIONS[name] = self
|
|
45
45
|
|
|
@@ -121,7 +121,7 @@ class SimLibrary:
|
|
|
121
121
|
o.names = list(self.names)
|
|
122
122
|
return o
|
|
123
123
|
|
|
124
|
-
def update(self, other):
|
|
124
|
+
def update(self, other: SimLibrary):
|
|
125
125
|
"""
|
|
126
126
|
Augment this SimLibrary with the information from another SimLibrary
|
|
127
127
|
|
|
@@ -147,7 +147,10 @@ class SimLibrary:
|
|
|
147
147
|
"""
|
|
148
148
|
for name in names:
|
|
149
149
|
self.names.append(name)
|
|
150
|
-
|
|
150
|
+
if name in SIM_LIBRARIES:
|
|
151
|
+
SIM_LIBRARIES[name].append(self)
|
|
152
|
+
else:
|
|
153
|
+
SIM_LIBRARIES[name] = [self]
|
|
151
154
|
|
|
152
155
|
def set_default_cc(self, arch_name, cc_cls):
|
|
153
156
|
"""
|
|
@@ -252,7 +255,7 @@ class SimLibrary:
|
|
|
252
255
|
proc.guessed_prototype = False
|
|
253
256
|
if proc.prototype.arg_names is None:
|
|
254
257
|
# Use inspect to extract the parameters from the run python function
|
|
255
|
-
proc.prototype.arg_names = inspect.getfullargspec(proc.run).args[1:]
|
|
258
|
+
proc.prototype.arg_names = tuple(inspect.getfullargspec(proc.run).args[1:])
|
|
256
259
|
if not proc.ARGS_MISMATCH:
|
|
257
260
|
proc.num_args = len(proc.prototype.args)
|
|
258
261
|
if proc.display_name in self.non_returning:
|
|
@@ -394,13 +397,12 @@ class SimCppLibrary(SimLibrary):
|
|
|
394
397
|
stub = super().get_stub(demangled_name, arch)
|
|
395
398
|
# try to determine a prototype from the function name if possible
|
|
396
399
|
if demangled_name != name:
|
|
397
|
-
#
|
|
400
|
+
# mangled function name
|
|
398
401
|
stub.prototype = self._proto_from_demangled_name(demangled_name)
|
|
399
402
|
if stub.prototype is not None:
|
|
400
403
|
stub.prototype = stub.prototype.with_arch(arch)
|
|
401
404
|
stub.guessed_prototype = False
|
|
402
405
|
if not stub.ARGS_MISMATCH:
|
|
403
|
-
stub.cc.num_args = len(stub.prototype.args)
|
|
404
406
|
stub.num_args = len(stub.prototype.args)
|
|
405
407
|
return stub
|
|
406
408
|
|
|
@@ -482,9 +484,10 @@ class SimSyscallLibrary(SimLibrary):
|
|
|
482
484
|
|
|
483
485
|
def update(self, other):
|
|
484
486
|
super().update(other)
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
487
|
+
if isinstance(other, SimSyscallLibrary):
|
|
488
|
+
self.syscall_number_mapping.update(other.syscall_number_mapping)
|
|
489
|
+
self.syscall_name_mapping.update(other.syscall_name_mapping)
|
|
490
|
+
self.default_cc_mapping.update(other.default_cc_mapping)
|
|
488
491
|
|
|
489
492
|
def minimum_syscall_number(self, abi):
|
|
490
493
|
"""
|
|
@@ -523,7 +526,7 @@ class SimSyscallLibrary(SimLibrary):
|
|
|
523
526
|
:param mapping: A dict mapping syscall numbers to function names
|
|
524
527
|
"""
|
|
525
528
|
self.syscall_number_mapping[abi].update(mapping)
|
|
526
|
-
self.syscall_name_mapping[abi].update(
|
|
529
|
+
self.syscall_name_mapping[abi].update({b: a for a, b in mapping.items()})
|
|
527
530
|
|
|
528
531
|
def set_abi_cc(self, abi, cc_cls):
|
|
529
532
|
"""
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# pylint:disable=line-too-long
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
|
|
5
|
+
from angr.procedures.definitions import SimTypeCollection
|
|
6
|
+
from angr.sim_type import SimCppClass, SimTypePointer, SimTypeChar, SimTypeInt
|
|
7
|
+
|
|
8
|
+
typelib = SimTypeCollection()
|
|
9
|
+
typelib.set_names("cpp::std")
|
|
10
|
+
typelib.types = {
|
|
11
|
+
"class std::basic_string<char, struct std::char_traits<char>, class std::allocator<char>>": SimCppClass(
|
|
12
|
+
unique_name="class std::basic_string<char, struct std::char_traits<char>, class std::allocator<char>>",
|
|
13
|
+
name="std::string",
|
|
14
|
+
members=OrderedDict(
|
|
15
|
+
[
|
|
16
|
+
("m_data", SimTypePointer(SimTypeChar())),
|
|
17
|
+
("m_size", SimTypeInt(signed=False)),
|
|
18
|
+
("m_capacity", SimTypeInt(signed=False)),
|
|
19
|
+
]
|
|
20
|
+
),
|
|
21
|
+
),
|
|
22
|
+
}
|
|
@@ -164,7 +164,7 @@ class FormatString:
|
|
|
164
164
|
negative = claripy.SLT(target_variable, 0)
|
|
165
165
|
|
|
166
166
|
# how many digits does it take to represent this variable fully?
|
|
167
|
-
max_digits =
|
|
167
|
+
max_digits = math.ceil(math.log(2**bits, base))
|
|
168
168
|
|
|
169
169
|
# how many digits does the format specify?
|
|
170
170
|
spec_digits = component.length_spec
|
angr/project.py
CHANGED
|
@@ -14,7 +14,6 @@ from archinfo.arch_soot import SootAddressDescriptor, ArchSoot
|
|
|
14
14
|
import cle
|
|
15
15
|
from .sim_procedure import SimProcedure
|
|
16
16
|
|
|
17
|
-
from .misc.ux import deprecated
|
|
18
17
|
from .errors import AngrNoPluginError
|
|
19
18
|
|
|
20
19
|
l = logging.getLogger(name=__name__)
|
|
@@ -300,16 +299,17 @@ class Project:
|
|
|
300
299
|
missing_libs = []
|
|
301
300
|
for lib_name in self.loader.missing_dependencies:
|
|
302
301
|
try:
|
|
303
|
-
missing_libs.
|
|
302
|
+
missing_libs.extend(SIM_LIBRARIES[lib_name])
|
|
304
303
|
except KeyError:
|
|
305
304
|
l.info("There are no simprocedures for missing library %s :(", lib_name)
|
|
306
305
|
# additionally provide libraries we _have_ loaded as a fallback fallback
|
|
307
306
|
# this helps in the case that e.g. CLE picked up a linux arm libc to satisfy an android arm binary
|
|
308
307
|
for lib in self.loader.all_objects:
|
|
309
308
|
if lib.provides is not None and lib.provides in SIM_LIBRARIES:
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
missing_libs
|
|
309
|
+
simlibs = SIM_LIBRARIES[lib.provides]
|
|
310
|
+
for simlib in simlibs:
|
|
311
|
+
if simlib not in missing_libs:
|
|
312
|
+
missing_libs.append(simlib)
|
|
313
313
|
|
|
314
314
|
# Step 2: Categorize every "import" symbol in each object.
|
|
315
315
|
# If it's IGNORED, mark it for stubbing
|
|
@@ -362,11 +362,13 @@ class Project:
|
|
|
362
362
|
owner_name = owner_name.lower()
|
|
363
363
|
if owner_name not in SIM_LIBRARIES:
|
|
364
364
|
continue
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
365
|
+
sim_libs = SIM_LIBRARIES[owner_name]
|
|
366
|
+
for sim_lib in sim_libs:
|
|
367
|
+
if not sim_lib.has_implementation(export.name):
|
|
368
|
+
continue
|
|
369
|
+
l.info("Using builtin SimProcedure for %s from %s", export.name, sim_lib.name)
|
|
370
|
+
self.hook_symbol(export.rebased_addr, sim_lib.get(export.name, sim_proc_arch))
|
|
371
|
+
break
|
|
370
372
|
|
|
371
373
|
# Step 2.3: If 2.2 didn't work, check if the symbol wants to be resolved
|
|
372
374
|
# by a library we already know something about. Resolve it appropriately.
|
|
@@ -375,7 +377,7 @@ class Project:
|
|
|
375
377
|
# we still want to try as hard as we can to figure out where it comes from
|
|
376
378
|
# so we can get the calling convention as close to right as possible.
|
|
377
379
|
elif reloc.resolvewith is not None and reloc.resolvewith in SIM_LIBRARIES:
|
|
378
|
-
sim_lib = SIM_LIBRARIES[reloc.resolvewith]
|
|
380
|
+
sim_lib = sorted(SIM_LIBRARIES[reloc.resolvewith], key=lambda lib: lib.has_prototype(export.name))[-1]
|
|
379
381
|
if self._check_user_blacklists(export.name):
|
|
380
382
|
if not func.is_weak:
|
|
381
383
|
l.info("Using stub SimProcedure for unresolved %s from %s", func.name, sim_lib.name)
|
|
@@ -407,7 +409,7 @@ class Project:
|
|
|
407
409
|
if export.name and export.name.startswith("_Z"):
|
|
408
410
|
# GNU C++ name. Use a C++ library to create the stub
|
|
409
411
|
if "libstdc++.so" in SIM_LIBRARIES:
|
|
410
|
-
the_lib = SIM_LIBRARIES["libstdc++.so"]
|
|
412
|
+
the_lib = SIM_LIBRARIES["libstdc++.so"][0]
|
|
411
413
|
else:
|
|
412
414
|
l.critical(
|
|
413
415
|
"Does not find any C++ library in SIM_LIBRARIES. We may not correctly "
|
|
@@ -437,16 +439,17 @@ class Project:
|
|
|
437
439
|
"""
|
|
438
440
|
# First, filter the SIM_LIBRARIES to a reasonable subset based on the hint
|
|
439
441
|
if hint == "win":
|
|
440
|
-
hinted_libs =
|
|
442
|
+
hinted_libs = [lib for lib in SIM_LIBRARIES if lib.endswith(".dll")]
|
|
441
443
|
else:
|
|
442
|
-
hinted_libs =
|
|
444
|
+
hinted_libs = [lib for lib in SIM_LIBRARIES if ".so" in lib]
|
|
443
445
|
|
|
444
446
|
for lib in hinted_libs:
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
447
|
+
for simlib in SIM_LIBRARIES[lib]:
|
|
448
|
+
if simlib.has_implementation(f.name):
|
|
449
|
+
l.debug("Found implementation for %s in %s", f, lib)
|
|
450
|
+
hook_at = f.resolvedby.rebased_addr if f.resolvedby else f.relative_addr # ????
|
|
451
|
+
self.hook_symbol(hook_at, (simlib.get(f.name, self.arch)))
|
|
452
|
+
return True
|
|
450
453
|
|
|
451
454
|
l.debug("Could not find matching SimProcedure for %s, ignoring.", f.name)
|
|
452
455
|
return False
|
|
@@ -826,18 +829,9 @@ class Project:
|
|
|
826
829
|
def __repr__(self):
|
|
827
830
|
return "<Project %s>" % (self.filename if self.filename is not None else "loaded from stream")
|
|
828
831
|
|
|
829
|
-
#
|
|
830
|
-
# Compatibility
|
|
831
|
-
#
|
|
832
|
-
|
|
833
|
-
@property
|
|
834
|
-
@deprecated(replacement="simos")
|
|
835
|
-
def _simos(self):
|
|
836
|
-
return self.simos
|
|
837
|
-
|
|
838
832
|
|
|
839
833
|
from .factory import AngrObjectFactory
|
|
840
|
-
from
|
|
834
|
+
from .simos import SimOS, os_mapping
|
|
841
835
|
from .analyses.analysis import AnalysesHub, AnalysesHubWithDefault
|
|
842
836
|
from .knowledge_base import KnowledgeBase
|
|
843
837
|
from .procedures import SIM_PROCEDURES, SIM_LIBRARIES
|