angr 9.2.77__py3-none-win_amd64.whl → 9.2.79__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +34 -0
- angr/analyses/calling_convention.py +15 -12
- angr/analyses/cfg/cfg_fast.py +12 -0
- angr/analyses/complete_calling_conventions.py +5 -2
- angr/analyses/decompiler/ail_simplifier.py +2 -2
- angr/analyses/decompiler/block_simplifier.py +25 -5
- angr/analyses/decompiler/clinic.py +27 -17
- angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +2 -2
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +2 -2
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +105 -12
- angr/analyses/decompiler/peephole_optimizations/__init__.py +11 -2
- angr/analyses/decompiler/peephole_optimizations/base.py +29 -2
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +83 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +103 -0
- angr/analyses/decompiler/structured_codegen/c.py +20 -4
- angr/analyses/decompiler/utils.py +128 -2
- angr/analyses/disassembly.py +8 -1
- angr/analyses/propagator/engine_ail.py +9 -2
- angr/analyses/proximity_graph.py +30 -0
- angr/analyses/variable_recovery/engine_ail.py +1 -1
- angr/analyses/variable_recovery/engine_vex.py +10 -1
- angr/blade.py +14 -2
- angr/block.py +4 -0
- angr/knowledge_plugins/__init__.py +1 -0
- angr/knowledge_plugins/custom_strings.py +40 -0
- angr/knowledge_plugins/functions/function.py +58 -38
- angr/knowledge_plugins/key_definitions/live_definitions.py +1 -1
- angr/knowledge_plugins/propagations/prop_value.py +6 -2
- angr/knowledge_plugins/variables/variable_manager.py +1 -1
- angr/lib/angr_native.dll +0 -0
- angr/sim_state.py +0 -2
- angr/sim_type.py +3 -0
- angr/storage/memory_mixins/__init__.pyi +49 -0
- angr/storage/memory_mixins/paged_memory/pages/multi_values.py +7 -1
- angr/utils/graph.py +20 -4
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/METADATA +6 -6
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/RECORD +46 -40
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/WHEEL +1 -1
- angr-9.2.79.dist-info/entry_points.txt +2 -0
- tests/analyses/cfg/test_cfgemulated.py +1 -1
- tests/storage/test_multivalues.py +18 -0
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/LICENSE +0 -0
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# pylint:disable=arguments-differ
|
|
2
|
+
from typing import List, Tuple, Optional
|
|
3
|
+
|
|
4
|
+
from ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset
|
|
5
|
+
from ailment.statement import Call, Store
|
|
6
|
+
|
|
7
|
+
from .base import PeepholeOptimizationMultiStmtBase
|
|
8
|
+
from .inlined_strcpy import InlinedStrcpy
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
|
|
12
|
+
"""
|
|
13
|
+
Consolidate multiple inlined strcpy calls.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__slots__ = ()
|
|
17
|
+
|
|
18
|
+
NAME = "Consolidate multiple inlined strcpy calls"
|
|
19
|
+
stmt_classes = ((Call, Call), (Call, Store))
|
|
20
|
+
|
|
21
|
+
def optimize(self, stmts: List[Call], **kwargs):
|
|
22
|
+
last_stmt, stmt = stmts
|
|
23
|
+
if InlinedStrcpyConsolidation._is_inlined_strcpy(last_stmt):
|
|
24
|
+
s_last: bytes = self.kb.custom_strings[last_stmt.args[1].value]
|
|
25
|
+
addr_last = last_stmt.args[0]
|
|
26
|
+
new_str = None # will be set if consolidation should happen
|
|
27
|
+
|
|
28
|
+
if isinstance(stmt, Call) and InlinedStrcpyConsolidation._is_inlined_strcpy(stmt):
|
|
29
|
+
# consolidating two calls
|
|
30
|
+
s_curr: bytes = self.kb.custom_strings[stmt.args[1].value]
|
|
31
|
+
addr_curr = stmt.args[0]
|
|
32
|
+
# determine if the two addresses are consecutive
|
|
33
|
+
delta = self._get_delta(addr_last, addr_curr)
|
|
34
|
+
if delta is not None and delta == len(s_last):
|
|
35
|
+
# consolidate both calls!
|
|
36
|
+
new_str = s_last + s_curr
|
|
37
|
+
elif isinstance(stmt, Store) and isinstance(stmt.data, Const):
|
|
38
|
+
# consolidating a call and a store, in case the store statement is storing the suffix of a string (but
|
|
39
|
+
# the suffix is too short to qualify an inlined strcpy optimization)
|
|
40
|
+
addr_curr = stmt.addr
|
|
41
|
+
delta = self._get_delta(addr_last, addr_curr)
|
|
42
|
+
if delta is not None and delta == len(s_last):
|
|
43
|
+
if stmt.size == 1 and stmt.data.value == 0:
|
|
44
|
+
# it's probably the terminating null byte
|
|
45
|
+
r, s = True, "\x00"
|
|
46
|
+
else:
|
|
47
|
+
r, s = InlinedStrcpy.is_integer_likely_a_string(
|
|
48
|
+
stmt.data.value, stmt.size, stmt.endness, min_length=1
|
|
49
|
+
)
|
|
50
|
+
if r:
|
|
51
|
+
new_str = s_last + s.encode("ascii")
|
|
52
|
+
|
|
53
|
+
if new_str is not None:
|
|
54
|
+
if new_str.endswith(b"\x00"):
|
|
55
|
+
call_name = "strcpy"
|
|
56
|
+
new_str_idx = self.kb.custom_strings.allocate(new_str[:-1])
|
|
57
|
+
args = [
|
|
58
|
+
last_stmt.args[0],
|
|
59
|
+
Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True),
|
|
60
|
+
]
|
|
61
|
+
else:
|
|
62
|
+
call_name = "strncpy"
|
|
63
|
+
new_str_idx = self.kb.custom_strings.allocate(new_str)
|
|
64
|
+
args = [
|
|
65
|
+
last_stmt.args[0],
|
|
66
|
+
Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True),
|
|
67
|
+
Const(None, None, len(new_str), self.project.arch.bits),
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
return [Call(stmt.idx, call_name, args=args, **stmt.tags)]
|
|
71
|
+
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def _is_inlined_strcpy(stmt: Call):
|
|
76
|
+
if isinstance(stmt.target, str) and stmt.target == "strncpy":
|
|
77
|
+
if len(stmt.args) == 3 and isinstance(stmt.args[1], Const) and hasattr(stmt.args[1], "custom_string"):
|
|
78
|
+
return True
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _parse_addr(addr: Expression) -> Tuple[Expression, int]:
|
|
83
|
+
if isinstance(addr, Register):
|
|
84
|
+
return addr, 0
|
|
85
|
+
if isinstance(addr, StackBaseOffset):
|
|
86
|
+
return StackBaseOffset(None, addr.bits, 0), addr.offset
|
|
87
|
+
if isinstance(addr, BinaryOp):
|
|
88
|
+
if addr.op == "Add" and isinstance(addr.operands[1], Const):
|
|
89
|
+
base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])
|
|
90
|
+
return base_0, offset_0 + addr.operands[1].value
|
|
91
|
+
if addr.op == "Sub" and isinstance(addr.operands[1], Const):
|
|
92
|
+
base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])
|
|
93
|
+
return base_0, offset_0 - addr.operands[1].value
|
|
94
|
+
|
|
95
|
+
return addr, 0
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def _get_delta(addr_0: Expression, addr_1: Expression) -> Optional[int]:
|
|
99
|
+
base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr_0)
|
|
100
|
+
base_1, offset_1 = InlinedStrcpyConsolidation._parse_addr(addr_1)
|
|
101
|
+
if base_0.likes(base_1):
|
|
102
|
+
return offset_1 - offset_0
|
|
103
|
+
return None
|
|
@@ -2037,11 +2037,22 @@ class CConstant(CExpression):
|
|
|
2037
2037
|
return
|
|
2038
2038
|
yield hex(self.reference_values[self._type]), self
|
|
2039
2039
|
elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeChar):
|
|
2040
|
-
refval = self.reference_values[self._type]
|
|
2041
|
-
|
|
2040
|
+
refval = self.reference_values[self._type]
|
|
2041
|
+
if isinstance(refval, MemoryData):
|
|
2042
|
+
v = refval.content.decode("utf-8")
|
|
2043
|
+
else:
|
|
2044
|
+
# it's a string
|
|
2045
|
+
assert isinstance(v, str)
|
|
2046
|
+
v = refval
|
|
2047
|
+
yield CConstant.str_to_c_str(v), self
|
|
2042
2048
|
elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeWideChar):
|
|
2043
|
-
refval = self.reference_values[self._type]
|
|
2044
|
-
|
|
2049
|
+
refval = self.reference_values[self._type]
|
|
2050
|
+
if isinstance(refval, MemoryData):
|
|
2051
|
+
v = refval.content.decode("utf_16_le")
|
|
2052
|
+
else:
|
|
2053
|
+
# it's a string
|
|
2054
|
+
v = refval
|
|
2055
|
+
yield CConstant.str_to_c_str(v, prefix="L"), self
|
|
2045
2056
|
else:
|
|
2046
2057
|
if isinstance(self.reference_values[self._type], int):
|
|
2047
2058
|
yield self.fmt_int(self.reference_values[self._type]), self
|
|
@@ -3199,6 +3210,11 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3199
3210
|
inline_string = False
|
|
3200
3211
|
function_pointer = False
|
|
3201
3212
|
|
|
3213
|
+
if reference_values is None and hasattr(expr, "reference_values"):
|
|
3214
|
+
reference_values = expr.reference_values.copy()
|
|
3215
|
+
if reference_values:
|
|
3216
|
+
type_ = next(iter(reference_values))
|
|
3217
|
+
|
|
3202
3218
|
if reference_values is None:
|
|
3203
3219
|
reference_values = {}
|
|
3204
3220
|
type_ = unpack_typeref(type_)
|
|
@@ -1,9 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
from typing import Optional, Tuple, Any, Union, List
|
|
1
|
+
import pathlib
|
|
2
|
+
from typing import Optional, Tuple, Any, Union, List, Iterable
|
|
3
|
+
import logging
|
|
3
4
|
|
|
4
5
|
import networkx
|
|
6
|
+
from rich.progress import track
|
|
5
7
|
|
|
6
8
|
import ailment
|
|
9
|
+
import angr
|
|
10
|
+
|
|
11
|
+
_l = logging.getLogger(__name__)
|
|
7
12
|
|
|
8
13
|
|
|
9
14
|
def remove_last_statement(node):
|
|
@@ -533,6 +538,127 @@ def peephole_optimize_stmts(block, stmt_opts):
|
|
|
533
538
|
return statements, any_update
|
|
534
539
|
|
|
535
540
|
|
|
541
|
+
def match_stmt_classes(all_stmts: List, idx: int, stmt_class_seq: Iterable[type]) -> bool:
|
|
542
|
+
for i, cls in enumerate(stmt_class_seq):
|
|
543
|
+
if idx + i >= len(all_stmts):
|
|
544
|
+
return False
|
|
545
|
+
if not isinstance(all_stmts[idx + i], cls):
|
|
546
|
+
return False
|
|
547
|
+
return True
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def peephole_optimize_multistmts(block, stmt_opts):
|
|
551
|
+
any_update = False
|
|
552
|
+
statements = block.statements[::]
|
|
553
|
+
|
|
554
|
+
# run multi-statement optimizers
|
|
555
|
+
stmt_idx = 0
|
|
556
|
+
while stmt_idx < len(statements):
|
|
557
|
+
redo = True
|
|
558
|
+
while redo and stmt_idx < len(statements):
|
|
559
|
+
redo = False
|
|
560
|
+
for opt in stmt_opts:
|
|
561
|
+
matched = False
|
|
562
|
+
stmt_seq_len = None
|
|
563
|
+
for stmt_class_seq in opt.stmt_classes:
|
|
564
|
+
if match_stmt_classes(statements, stmt_idx, stmt_class_seq):
|
|
565
|
+
stmt_seq_len = len(stmt_class_seq)
|
|
566
|
+
matched = True
|
|
567
|
+
break
|
|
568
|
+
|
|
569
|
+
if matched:
|
|
570
|
+
matched_stmts = statements[stmt_idx : stmt_idx + stmt_seq_len]
|
|
571
|
+
r = opt.optimize(matched_stmts, stmt_idx=stmt_idx, block=block)
|
|
572
|
+
if r is not None:
|
|
573
|
+
# update statements
|
|
574
|
+
statements = statements[:stmt_idx] + r + statements[stmt_idx + stmt_seq_len :]
|
|
575
|
+
any_update = True
|
|
576
|
+
redo = True
|
|
577
|
+
break
|
|
578
|
+
|
|
579
|
+
# move on to the next statement
|
|
580
|
+
stmt_idx += 1
|
|
581
|
+
|
|
582
|
+
return statements, any_update
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def decompile_functions(path, functions=None, structurer=None, catch_errors=True) -> Optional[str]:
|
|
586
|
+
"""
|
|
587
|
+
Decompile a binary into a set of functions.
|
|
588
|
+
|
|
589
|
+
:param path: The path to the binary to decompile.
|
|
590
|
+
:param functions: The functions to decompile. If None, all functions will be decompiled.
|
|
591
|
+
:param structurer: The structuring algorithms to use.
|
|
592
|
+
:param catch_errors: The structuring algorithms to use.
|
|
593
|
+
:return: The decompilation of all functions appended in order.
|
|
594
|
+
"""
|
|
595
|
+
# delayed imports to avoid circular imports
|
|
596
|
+
from angr.analyses.decompiler.decompilation_options import PARAM_TO_OPTION
|
|
597
|
+
|
|
598
|
+
structurer = structurer or "phoenix"
|
|
599
|
+
path = pathlib.Path(path).resolve().absolute()
|
|
600
|
+
proj = angr.Project(path, auto_load_libs=False)
|
|
601
|
+
cfg = proj.analyses.CFG(normalize=True, data_references=True)
|
|
602
|
+
proj.analyses.CompleteCallingConventions(recover_variables=True, analyze_callsites=True)
|
|
603
|
+
|
|
604
|
+
# collect all functions when None are provided
|
|
605
|
+
if functions is None:
|
|
606
|
+
functions = cfg.functions.values()
|
|
607
|
+
|
|
608
|
+
# normalize the functions that could be ints as names
|
|
609
|
+
normalized_functions = []
|
|
610
|
+
for func in functions:
|
|
611
|
+
try:
|
|
612
|
+
normalized_name = int(func, 0)
|
|
613
|
+
except ValueError:
|
|
614
|
+
normalized_name = func
|
|
615
|
+
normalized_functions.append(normalized_name)
|
|
616
|
+
functions = normalized_functions
|
|
617
|
+
|
|
618
|
+
# verify that all functions exist
|
|
619
|
+
for func in functions:
|
|
620
|
+
if func not in cfg.functions:
|
|
621
|
+
raise ValueError(f"Function {func} does not exist in the CFG.")
|
|
622
|
+
|
|
623
|
+
# decompile all functions
|
|
624
|
+
decompilation = ""
|
|
625
|
+
dec_options = [
|
|
626
|
+
(PARAM_TO_OPTION["structurer_cls"], structurer),
|
|
627
|
+
]
|
|
628
|
+
for func in track(functions, description="Decompiling functions", transient=True):
|
|
629
|
+
f = cfg.functions[func]
|
|
630
|
+
if f is None or f.is_plt:
|
|
631
|
+
continue
|
|
632
|
+
|
|
633
|
+
exception_string = ""
|
|
634
|
+
if not catch_errors:
|
|
635
|
+
dec = proj.analyses.Decompiler(f, cfg=cfg, options=dec_options)
|
|
636
|
+
else:
|
|
637
|
+
try:
|
|
638
|
+
# TODO: add a timeout
|
|
639
|
+
dec = proj.analyses.Decompiler(f, cfg=cfg, options=dec_options)
|
|
640
|
+
except Exception as e:
|
|
641
|
+
exception_string = str(e).replace("\n", " ")
|
|
642
|
+
dec = None
|
|
643
|
+
|
|
644
|
+
# do sanity checks on decompilation, skip checks if we already errored
|
|
645
|
+
if not exception_string:
|
|
646
|
+
if dec is None or not dec.codegen or not dec.codegen.text:
|
|
647
|
+
exception_string = "Decompilation had no code output (failed in Dec)"
|
|
648
|
+
elif "{\n}" in dec.codegen.text:
|
|
649
|
+
exception_string = "Decompilation outputted an empty function (failed in structuring)"
|
|
650
|
+
elif structurer in ["dream", "combing"] and "goto" in dec.codegen.text:
|
|
651
|
+
exception_string = "Decompilation outputted a goto for a Gotoless algorithm (failed in structuring)"
|
|
652
|
+
|
|
653
|
+
if exception_string:
|
|
654
|
+
_l.critical("Failed to decompile %s because %s", str(func), exception_string)
|
|
655
|
+
decompilation += f"// [error: {func} | {exception_string}]\n"
|
|
656
|
+
else:
|
|
657
|
+
decompilation += dec.codegen.text + "\n"
|
|
658
|
+
|
|
659
|
+
return decompilation
|
|
660
|
+
|
|
661
|
+
|
|
536
662
|
# delayed import
|
|
537
663
|
from .structuring.structurer_nodes import (
|
|
538
664
|
MultiNode,
|
angr/analyses/disassembly.py
CHANGED
|
@@ -219,6 +219,14 @@ class Instruction(DisassemblyPiece):
|
|
|
219
219
|
for operand in dummy_operands:
|
|
220
220
|
opr_pieces = self.split_op_string(operand)
|
|
221
221
|
cur_operand = []
|
|
222
|
+
|
|
223
|
+
if not (operand and opr_pieces):
|
|
224
|
+
# opr_pieces may contain empty string when invalid disasm
|
|
225
|
+
# result is generated by capstone
|
|
226
|
+
l.error(f'Failed to parse insn "{self.insn}". Please report.')
|
|
227
|
+
self.operands.clear()
|
|
228
|
+
break
|
|
229
|
+
|
|
222
230
|
if opr_pieces[0][0].isalpha() and opr_pieces[0] in self.arch.registers:
|
|
223
231
|
cur_operand.append(Register(opr_pieces[0]))
|
|
224
232
|
# handle register's suffix (e.g. "sp!", "d0[1]", "v0.16b")
|
|
@@ -269,7 +277,6 @@ class Instruction(DisassemblyPiece):
|
|
|
269
277
|
|
|
270
278
|
if len(self.operands) == 0 and len(self.insn.operands) != 0:
|
|
271
279
|
l.error("Operand parsing failed for instruction %s at address %x", str(self.insn), self.insn.address)
|
|
272
|
-
self.operands = []
|
|
273
280
|
return
|
|
274
281
|
|
|
275
282
|
@staticmethod
|
|
@@ -302,8 +302,15 @@ class SimEnginePropagatorAIL(
|
|
|
302
302
|
if 0 in current_reg_value.offset_and_details:
|
|
303
303
|
detail = current_reg_value.offset_and_details[0]
|
|
304
304
|
if detail.def_at == def_at:
|
|
305
|
-
|
|
306
|
-
self.
|
|
305
|
+
outdated = False
|
|
306
|
+
outdated_, has_avoid_ = self.is_using_outdated_def(
|
|
307
|
+
detail.expr, detail.def_at, self._codeloc(), avoid=expr
|
|
308
|
+
)
|
|
309
|
+
if outdated_ or has_avoid_:
|
|
310
|
+
outdated = True
|
|
311
|
+
if not outdated:
|
|
312
|
+
l.debug("Add a replacement: %s with %s", expr, reg_atom)
|
|
313
|
+
self.state.add_replacement(self._codeloc(), expr, reg_atom)
|
|
307
314
|
top = self.state.top(expr.size * self.arch.byte_width)
|
|
308
315
|
return PropValue.from_value_and_details(top, expr.size, expr, self._codeloc())
|
|
309
316
|
|
angr/analyses/proximity_graph.py
CHANGED
|
@@ -180,6 +180,33 @@ class ProximityGraphAnalysis(Analysis):
|
|
|
180
180
|
|
|
181
181
|
self._work()
|
|
182
182
|
|
|
183
|
+
def _condense_blank_nodes(self, graph: networkx.DiGraph) -> None:
|
|
184
|
+
nodes = list(graph.nodes)
|
|
185
|
+
blank_nodes: List[BaseProxiNode] = []
|
|
186
|
+
|
|
187
|
+
for node in nodes:
|
|
188
|
+
if isinstance(node, BaseProxiNode) and node.type_ == ProxiNodeTypes.Empty:
|
|
189
|
+
blank_nodes.append(node)
|
|
190
|
+
else:
|
|
191
|
+
if blank_nodes:
|
|
192
|
+
self._merge_nodes(graph, blank_nodes)
|
|
193
|
+
blank_nodes = []
|
|
194
|
+
|
|
195
|
+
if blank_nodes:
|
|
196
|
+
self._merge_nodes(graph, blank_nodes)
|
|
197
|
+
|
|
198
|
+
def _merge_nodes(self, graph: networkx.DiGraph, nodes: List[BaseProxiNode]) -> None:
|
|
199
|
+
for node in nodes:
|
|
200
|
+
predecessors = set(graph.predecessors(node))
|
|
201
|
+
successors = set(graph.successors(node))
|
|
202
|
+
|
|
203
|
+
for pred in predecessors:
|
|
204
|
+
for succ in successors:
|
|
205
|
+
edge_data = graph.get_edge_data(pred, node) or {}
|
|
206
|
+
graph.add_edge(pred, succ, **edge_data)
|
|
207
|
+
|
|
208
|
+
graph.remove_node(node)
|
|
209
|
+
|
|
183
210
|
def _work(self):
|
|
184
211
|
self.graph = networkx.DiGraph()
|
|
185
212
|
|
|
@@ -210,6 +237,9 @@ class ProximityGraphAnalysis(Analysis):
|
|
|
210
237
|
self.graph.add_nodes_from(subgraph.nodes())
|
|
211
238
|
self.graph.add_edges_from(subgraph.edges())
|
|
212
239
|
|
|
240
|
+
# condense blank nodes after the graph has been constructed
|
|
241
|
+
self._condense_blank_nodes(self.graph)
|
|
242
|
+
|
|
213
243
|
def _endnode_connector(self, func: "Function", subgraph: networkx.DiGraph):
|
|
214
244
|
"""
|
|
215
245
|
Properly connect expanded function call's to proximity graph.
|
|
@@ -124,7 +124,7 @@ class SimEngineVRAIL(
|
|
|
124
124
|
prototype = stmt.prototype
|
|
125
125
|
elif isinstance(stmt.target, ailment.Expr.Const):
|
|
126
126
|
func_addr = stmt.target.value
|
|
127
|
-
if func_addr in self.kb.functions:
|
|
127
|
+
if isinstance(func_addr, self.kb.functions.address_types) and func_addr in self.kb.functions:
|
|
128
128
|
func = self.kb.functions[func_addr]
|
|
129
129
|
prototype = func.prototype
|
|
130
130
|
|
|
@@ -179,7 +179,16 @@ class SimEngineVRVEX(
|
|
|
179
179
|
if func.prototype is None or func.calling_convention is None:
|
|
180
180
|
return
|
|
181
181
|
|
|
182
|
-
|
|
182
|
+
try:
|
|
183
|
+
arg_locs = func.calling_convention.arg_locs(func.prototype)
|
|
184
|
+
except (TypeError, ValueError):
|
|
185
|
+
func.prototype = None
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
if None in arg_locs:
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
for arg_loc in arg_locs:
|
|
183
192
|
for loc in arg_loc.get_footprint():
|
|
184
193
|
if isinstance(loc, SimRegArg):
|
|
185
194
|
self._read_from_register(self.arch.registers[loc.reg_name][0] + loc.reg_offset, loc.size)
|
angr/blade.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# pylint:disable=unnecessary-dunder-call
|
|
1
2
|
import itertools
|
|
2
3
|
|
|
3
4
|
import networkx
|
|
@@ -10,6 +11,12 @@ from .utils.constants import DEFAULT_STATEMENT
|
|
|
10
11
|
from .slicer import SimSlicer
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
class BadJumpkindNotification(Exception):
|
|
15
|
+
"""
|
|
16
|
+
Notifies the caller that the jumpkind is bad (e.g., Ijk_NoDecode)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
|
|
13
20
|
class Blade:
|
|
14
21
|
"""
|
|
15
22
|
Blade is a light-weight program slicer that works with networkx DiGraph containing CFGNodes.
|
|
@@ -177,6 +184,8 @@ class Blade:
|
|
|
177
184
|
irsb = self.project.factory.block(
|
|
178
185
|
v, cross_insn_opt=self._cross_insn_opt, backup_state=self._base_state
|
|
179
186
|
).vex
|
|
187
|
+
if irsb.jumpkind == "Ijk_NoDecode":
|
|
188
|
+
raise BadJumpkindNotification()
|
|
180
189
|
self._run_cache[v] = irsb
|
|
181
190
|
return irsb
|
|
182
191
|
else:
|
|
@@ -248,7 +257,7 @@ class Blade:
|
|
|
248
257
|
# Retrieve the target: are we slicing from a register(IRStmt.Put), or a temp(IRStmt.WrTmp)?
|
|
249
258
|
try:
|
|
250
259
|
stmts = self._get_irsb(self._dst_run).statements
|
|
251
|
-
except SimTranslationError:
|
|
260
|
+
except (SimTranslationError, BadJumpkindNotification):
|
|
252
261
|
return
|
|
253
262
|
|
|
254
263
|
if self._dst_stmt_idx != -1:
|
|
@@ -337,7 +346,10 @@ class Blade:
|
|
|
337
346
|
regs = regs.copy()
|
|
338
347
|
|
|
339
348
|
irsb_addr = self._get_addr(run)
|
|
340
|
-
|
|
349
|
+
try:
|
|
350
|
+
stmts = self._get_irsb(run).statements
|
|
351
|
+
except (SimTranslationError, BadJumpkindNotification):
|
|
352
|
+
return
|
|
341
353
|
|
|
342
354
|
if exit_stmt_idx is None or exit_stmt_idx == DEFAULT_STATEMENT:
|
|
343
355
|
# Initialize the temps set with whatever in the `next` attribute of this irsb
|
angr/block.py
CHANGED
|
@@ -427,6 +427,10 @@ class Block(Serializable):
|
|
|
427
427
|
|
|
428
428
|
@property
|
|
429
429
|
def instruction_addrs(self):
|
|
430
|
+
if self.size == 0:
|
|
431
|
+
# hooks and other pseudo-functions
|
|
432
|
+
return []
|
|
433
|
+
|
|
430
434
|
if not self._instruction_addrs and self._vex is None:
|
|
431
435
|
# initialize instruction addrs
|
|
432
436
|
_ = self.vex
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from .plugin import KnowledgeBasePlugin
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CustomStrings(KnowledgeBasePlugin):
|
|
7
|
+
"""
|
|
8
|
+
Store new strings that are recovered during various analysis. Each string has a unique ID associated.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, kb):
|
|
12
|
+
super().__init__()
|
|
13
|
+
self._kb = kb
|
|
14
|
+
|
|
15
|
+
self.string_id = 0
|
|
16
|
+
self.strings: Dict[int, bytes] = {}
|
|
17
|
+
|
|
18
|
+
def allocate(self, s: bytes) -> int:
|
|
19
|
+
# de-duplication
|
|
20
|
+
# TODO: Use a reverse map if this becomes a bottle-neck in the future
|
|
21
|
+
for idx, string in self.strings.items():
|
|
22
|
+
if string == s:
|
|
23
|
+
return idx
|
|
24
|
+
|
|
25
|
+
string_id = self.string_id
|
|
26
|
+
self.strings[string_id] = s
|
|
27
|
+
self.string_id += 1
|
|
28
|
+
return string_id
|
|
29
|
+
|
|
30
|
+
def __getitem__(self, idx):
|
|
31
|
+
return self.strings[idx]
|
|
32
|
+
|
|
33
|
+
def copy(self):
|
|
34
|
+
o = CustomStrings(self._kb)
|
|
35
|
+
o.strings = self.strings.copy()
|
|
36
|
+
o.string_id = self.string_id
|
|
37
|
+
return o
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
KnowledgeBasePlugin.register_default("custom_strings", CustomStrings)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import logging
|
|
3
3
|
import networkx
|
|
4
|
-
import string
|
|
5
4
|
import itertools
|
|
6
5
|
from collections import defaultdict
|
|
7
6
|
from typing import Union, Optional, Iterable, Set
|
|
@@ -14,6 +13,7 @@ from archinfo.arch_arm import get_real_address_if_arm
|
|
|
14
13
|
import claripy
|
|
15
14
|
|
|
16
15
|
from angr.block import Block
|
|
16
|
+
from angr.knowledge_plugins.cfg.memory_data import MemoryDataSort
|
|
17
17
|
|
|
18
18
|
from ...codenode import CodeNode, BlockNode, HookNode, SyscallNode
|
|
19
19
|
from ...serializable import Serializable
|
|
@@ -80,6 +80,7 @@ class Function(Serializable):
|
|
|
80
80
|
"is_alignment",
|
|
81
81
|
"is_prototype_guessed",
|
|
82
82
|
"ran_cca",
|
|
83
|
+
"_cyclomatic_complexity",
|
|
83
84
|
)
|
|
84
85
|
|
|
85
86
|
def __init__(
|
|
@@ -161,6 +162,9 @@ class Function(Serializable):
|
|
|
161
162
|
self.info = {} # storing special information, like $gp values for MIPS32
|
|
162
163
|
self.tags = () # store function tags. can be set manually by performing CodeTagging analysis.
|
|
163
164
|
|
|
165
|
+
# Initialize _cyclomatic_complexity to None
|
|
166
|
+
self._cyclomatic_complexity = None
|
|
167
|
+
|
|
164
168
|
# TODO: Can we remove the following two members?
|
|
165
169
|
# Register offsets of those arguments passed in registers
|
|
166
170
|
self._argument_registers = []
|
|
@@ -302,6 +306,42 @@ class Function(Serializable):
|
|
|
302
306
|
except (SimEngineError, SimMemoryError):
|
|
303
307
|
pass
|
|
304
308
|
|
|
309
|
+
@property
|
|
310
|
+
def cyclomatic_complexity(self):
|
|
311
|
+
"""
|
|
312
|
+
The cyclomatic complexity of the function.
|
|
313
|
+
|
|
314
|
+
Cyclomatic complexity is a software metric used to indicate the complexity of a program.
|
|
315
|
+
It is a quantitative measure of the number of linearly independent paths through a program's source code.
|
|
316
|
+
It is computed using the formula: M = E - N + 2P, where
|
|
317
|
+
E = the number of edges in the graph,
|
|
318
|
+
N = the number of nodes in the graph,
|
|
319
|
+
P = the number of connected components.
|
|
320
|
+
|
|
321
|
+
The cyclomatic complexity value is lazily computed and cached for future use.
|
|
322
|
+
Initially this value is None until it is computed for the first time
|
|
323
|
+
|
|
324
|
+
:return: The cyclomatic complexity of the function.
|
|
325
|
+
:rtype: int
|
|
326
|
+
"""
|
|
327
|
+
if self._cyclomatic_complexity is None:
|
|
328
|
+
self._cyclomatic_complexity = (
|
|
329
|
+
self.transition_graph.number_of_edges() - self.transition_graph.number_of_nodes() + 2
|
|
330
|
+
)
|
|
331
|
+
return self._cyclomatic_complexity
|
|
332
|
+
|
|
333
|
+
@property
|
|
334
|
+
def xrefs(self):
|
|
335
|
+
"""
|
|
336
|
+
An iterator of all xrefs of the current function.
|
|
337
|
+
|
|
338
|
+
:return: angr.knowledge_plugins.xrefs.xref.XRef instances.
|
|
339
|
+
"""
|
|
340
|
+
for block in self.blocks:
|
|
341
|
+
yield from self._function_manager._kb.xrefs.get_xrefs_by_ins_addr_region(
|
|
342
|
+
block.addr, block.addr + block.size
|
|
343
|
+
)
|
|
344
|
+
|
|
305
345
|
@property
|
|
306
346
|
def block_addrs(self):
|
|
307
347
|
"""
|
|
@@ -413,49 +453,28 @@ class Function(Serializable):
|
|
|
413
453
|
"""
|
|
414
454
|
return FunctionParser.parse_from_cmsg(cmsg, **kwargs)
|
|
415
455
|
|
|
416
|
-
def string_references(self, minimum_length=2
|
|
456
|
+
def string_references(self, minimum_length=2):
|
|
417
457
|
"""
|
|
418
458
|
All of the constant string references used by this function.
|
|
419
459
|
|
|
420
460
|
:param minimum_length: The minimum length of strings to find (default is 1)
|
|
421
|
-
:
|
|
422
|
-
|
|
423
|
-
memory.
|
|
461
|
+
:return: A generator yielding tuples of (address, string) where is address
|
|
462
|
+
is the location of the string in memory.
|
|
424
463
|
"""
|
|
425
|
-
strings = []
|
|
426
|
-
memory = self._project.loader.memory
|
|
427
464
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
try:
|
|
442
|
-
possible_pointer = memory.unpack_word(addr)
|
|
443
|
-
if addr not in known_executable_addresses and possible_pointer not in known_executable_addresses:
|
|
444
|
-
# build string
|
|
445
|
-
stn = ""
|
|
446
|
-
offset = 0
|
|
447
|
-
current_char = chr(memory[addr + offset])
|
|
448
|
-
while current_char in string.printable:
|
|
449
|
-
stn += current_char
|
|
450
|
-
offset += 1
|
|
451
|
-
current_char = chr(memory[addr + offset])
|
|
452
|
-
|
|
453
|
-
# check that the string was a null terminated string with minimum length
|
|
454
|
-
if current_char == "\x00" and len(stn) >= minimum_length:
|
|
455
|
-
strings.append((addr, stn))
|
|
456
|
-
except KeyError:
|
|
457
|
-
pass
|
|
458
|
-
return strings
|
|
465
|
+
cfg = self._function_manager._kb.cfgs.get_most_accurate()
|
|
466
|
+
|
|
467
|
+
for x in self.xrefs:
|
|
468
|
+
try:
|
|
469
|
+
md = cfg.memory_data[x.dst]
|
|
470
|
+
except KeyError:
|
|
471
|
+
continue
|
|
472
|
+
if md.sort not in {MemoryDataSort.String, MemoryDataSort.UnicodeString}:
|
|
473
|
+
continue
|
|
474
|
+
if len(md.content) < minimum_length:
|
|
475
|
+
continue
|
|
476
|
+
|
|
477
|
+
yield (md.addr, md.content)
|
|
459
478
|
|
|
460
479
|
@property
|
|
461
480
|
def local_runtime_values(self):
|
|
@@ -574,6 +593,7 @@ class Function(Serializable):
|
|
|
574
593
|
s += " Alignment: %s\n" % (self.alignment)
|
|
575
594
|
s += f" Arguments: reg: {self._argument_registers}, stack: {self._argument_stack_variables}\n"
|
|
576
595
|
s += " Blocks: [%s]\n" % ", ".join(["%#x" % i for i in self.block_addrs])
|
|
596
|
+
s += " Cyclomatic Complexity: %s\n" % self.cyclomatic_complexity
|
|
577
597
|
s += " Calling convention: %s" % self.calling_convention
|
|
578
598
|
return s
|
|
579
599
|
|
|
@@ -74,7 +74,7 @@ class DefinitionAnnotation(Annotation):
|
|
|
74
74
|
and self.eliminatable == other.eliminatable
|
|
75
75
|
)
|
|
76
76
|
else:
|
|
77
|
-
|
|
77
|
+
return False
|
|
78
78
|
|
|
79
79
|
def __repr__(self):
|
|
80
80
|
return f"<{self.__class__.__name__}({repr(self.definition)})"
|