angr 9.2.124__py3-none-manylinux2014_x86_64.whl → 9.2.126__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/__init__.py +13 -1
- angr/analyses/codecave.py +77 -0
- angr/analyses/decompiler/ail_simplifier.py +1 -0
- angr/analyses/decompiler/callsite_maker.py +9 -1
- angr/analyses/decompiler/clinic.py +32 -2
- angr/analyses/decompiler/condition_processor.py +104 -66
- angr/analyses/decompiler/decompiler.py +7 -0
- angr/analyses/decompiler/optimization_passes/__init__.py +18 -1
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +6 -0
- angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
- angr/analyses/decompiler/return_maker.py +1 -0
- angr/analyses/decompiler/ssailification/rewriting.py +4 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
- angr/analyses/decompiler/structured_codegen/c.py +18 -2
- angr/analyses/deobfuscator/__init__.py +18 -0
- angr/analyses/deobfuscator/api_obf_finder.py +313 -0
- angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
- angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
- angr/analyses/deobfuscator/string_obf_finder.py +774 -0
- angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
- angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
- angr/analyses/patchfinder.py +137 -0
- angr/analyses/pathfinder.py +282 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
- angr/analyses/smc.py +159 -0
- angr/analyses/unpacker/__init__.py +6 -0
- angr/analyses/unpacker/obfuscation_detector.py +103 -0
- angr/analyses/unpacker/packing_detector.py +138 -0
- angr/angrdb/models.py +1 -2
- angr/calling_conventions.py +3 -1
- angr/engines/vex/claripy/irop.py +10 -5
- angr/engines/vex/heavy/heavy.py +2 -0
- angr/exploration_techniques/spiller_db.py +1 -2
- angr/knowledge_plugins/__init__.py +2 -0
- angr/knowledge_plugins/functions/function.py +4 -0
- angr/knowledge_plugins/functions/function_manager.py +18 -9
- angr/knowledge_plugins/functions/function_parser.py +1 -1
- angr/knowledge_plugins/functions/soot_function.py +1 -0
- angr/knowledge_plugins/obfuscations.py +36 -0
- angr/misc/ux.py +2 -2
- angr/project.py +17 -1
- angr/state_plugins/history.py +6 -4
- angr/utils/bits.py +4 -0
- angr/utils/tagged_interval_map.py +112 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/RECORD +52 -35
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
angr/analyses/smc.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import logging
|
|
3
|
+
import random
|
|
4
|
+
|
|
5
|
+
from enum import auto, IntFlag
|
|
6
|
+
from collections.abc import Generator
|
|
7
|
+
|
|
8
|
+
import angr
|
|
9
|
+
from angr.analyses import Analysis, AnalysesHub
|
|
10
|
+
from angr.knowledge_plugins import Function
|
|
11
|
+
from angr.sim_state import SimState
|
|
12
|
+
|
|
13
|
+
from angr.utils.tagged_interval_map import TaggedIntervalMap
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
log = logging.getLogger(__name__)
|
|
17
|
+
log.setLevel(logging.INFO)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TraceActions(IntFlag):
|
|
21
|
+
"""
|
|
22
|
+
Describe memory access actions.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
WRITE = auto()
|
|
26
|
+
EXECUTE = auto()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TraceClassifier:
|
|
30
|
+
"""
|
|
31
|
+
Classify traces.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, state: SimState | None = None):
|
|
35
|
+
self.map = TaggedIntervalMap()
|
|
36
|
+
if state:
|
|
37
|
+
self.instrument(state)
|
|
38
|
+
|
|
39
|
+
def act_mem_write(self, state) -> None:
|
|
40
|
+
"""
|
|
41
|
+
SimInspect callback for memory writes.
|
|
42
|
+
"""
|
|
43
|
+
addr = state.solver.eval(state.inspect.mem_write_address)
|
|
44
|
+
length = state.inspect.mem_write_length
|
|
45
|
+
if not isinstance(length, int):
|
|
46
|
+
length = state.solver.eval(length)
|
|
47
|
+
self.map.add(addr, length, TraceActions.WRITE)
|
|
48
|
+
|
|
49
|
+
def act_instruction(self, state) -> None:
|
|
50
|
+
"""
|
|
51
|
+
SimInspect callback for instruction execution.
|
|
52
|
+
"""
|
|
53
|
+
addr = state.inspect.instruction
|
|
54
|
+
if addr is None:
|
|
55
|
+
log.warning("Symbolic addr")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
# FIXME: Ensure block size is correct
|
|
59
|
+
self.map.add(addr, state.block().size, TraceActions.EXECUTE)
|
|
60
|
+
|
|
61
|
+
def instrument(self, state) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Instrument `state` for tracing.
|
|
64
|
+
"""
|
|
65
|
+
state.inspect.b("mem_write", when=angr.BP_BEFORE, action=self.act_mem_write)
|
|
66
|
+
state.inspect.b("instruction", when=angr.BP_BEFORE, action=self.act_instruction)
|
|
67
|
+
|
|
68
|
+
def get_smc_address_and_lengths(self) -> Generator[tuple[int, int]]:
|
|
69
|
+
"""
|
|
70
|
+
Evaluate the trace to find which areas of memory were both written to and executed.
|
|
71
|
+
"""
|
|
72
|
+
smc_flags = TraceActions.WRITE | TraceActions.EXECUTE
|
|
73
|
+
for addr, size, flags in self.map.irange():
|
|
74
|
+
if (flags & smc_flags) == smc_flags:
|
|
75
|
+
yield (addr, size)
|
|
76
|
+
|
|
77
|
+
def determine_smc(self) -> bool:
|
|
78
|
+
"""
|
|
79
|
+
Evaluate the trace to find areas of memory that were both written to and executed.
|
|
80
|
+
"""
|
|
81
|
+
return any(self.get_smc_address_and_lengths())
|
|
82
|
+
|
|
83
|
+
def pp(self):
|
|
84
|
+
for a, b, c in self.map.irange():
|
|
85
|
+
print(f"{a:8x} {b} {c}")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class SelfModifyingCodeAnalysis(Analysis):
|
|
89
|
+
"""
|
|
90
|
+
Determine if some piece of code is self-modifying.
|
|
91
|
+
|
|
92
|
+
This determination is made by simply executing. If an address is executed
|
|
93
|
+
that is also written to, the code is determined to be self-modifying. The
|
|
94
|
+
determination is stored in the `result` property. The `regions` property
|
|
95
|
+
contains a list of (addr, length) regions that were both written to and
|
|
96
|
+
executed.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
result: bool
|
|
100
|
+
regions: list[tuple[int, int]]
|
|
101
|
+
|
|
102
|
+
def __init__(self, subject: None | int | str | Function, max_bytes: int = 0, state: SimState | None = None):
|
|
103
|
+
"""
|
|
104
|
+
:param subject: Subject of analysis
|
|
105
|
+
:param max_bytes: Maximum number of bytes from subject address. 0 for no limit (default).
|
|
106
|
+
:param state: State to begin executing from from.
|
|
107
|
+
"""
|
|
108
|
+
assert self.project.selfmodifying_code
|
|
109
|
+
|
|
110
|
+
if subject is None:
|
|
111
|
+
subject = self.project.entry
|
|
112
|
+
if isinstance(subject, str):
|
|
113
|
+
try:
|
|
114
|
+
addr = self.project.kb.labels.lookup(subject)
|
|
115
|
+
except KeyError:
|
|
116
|
+
addr = self.project.kb.functions[subject].addr
|
|
117
|
+
elif isinstance(subject, Function):
|
|
118
|
+
addr = subject.addr
|
|
119
|
+
elif isinstance(subject, int):
|
|
120
|
+
addr = subject
|
|
121
|
+
else:
|
|
122
|
+
raise ValueError("Not a supported subject")
|
|
123
|
+
|
|
124
|
+
if state is None:
|
|
125
|
+
init_state = self.project.factory.call_state(addr)
|
|
126
|
+
else:
|
|
127
|
+
init_state = state.copy()
|
|
128
|
+
init_state.regs.pc = addr
|
|
129
|
+
|
|
130
|
+
init_state.options -= angr.sim_options.simplification
|
|
131
|
+
|
|
132
|
+
self._trace_classifier = TraceClassifier(init_state)
|
|
133
|
+
simgr = self.project.factory.simgr(init_state)
|
|
134
|
+
|
|
135
|
+
kwargs = {}
|
|
136
|
+
if max_bytes:
|
|
137
|
+
kwargs["filter_func"] = lambda s: (
|
|
138
|
+
"active" if s.solver.eval(addr <= s.regs.pc) and s.solver.eval(s.regs.pc < addr + max_bytes) else "oob"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# FIXME: Early out on SMC detect
|
|
142
|
+
# FIXME: Configurable step threshold
|
|
143
|
+
# FIXME: Loop analysis
|
|
144
|
+
|
|
145
|
+
for n in range(100):
|
|
146
|
+
self._update_progress(n)
|
|
147
|
+
simgr.step(n=3)
|
|
148
|
+
random.shuffle(simgr.active)
|
|
149
|
+
simgr.split(from_stash="active", to_stash=simgr.DROP, limit=10)
|
|
150
|
+
|
|
151
|
+
# Classify any out of bound entrypoints
|
|
152
|
+
for state_ in simgr.stashes["oob"]:
|
|
153
|
+
self._trace_classifier.act_instruction(state_)
|
|
154
|
+
|
|
155
|
+
self.regions = list(self._trace_classifier.get_smc_address_and_lengths())
|
|
156
|
+
self.result = len(self.regions) > 0
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
AnalysesHub.register_default("SMC", SelfModifyingCodeAnalysis)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import networkx
|
|
5
|
+
|
|
6
|
+
from angr.analyses.analysis import Analysis, AnalysesHub
|
|
7
|
+
from angr.knowledge_plugins.cfg import CFGModel
|
|
8
|
+
|
|
9
|
+
_l = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ObfuscationDetector(Analysis):
|
|
13
|
+
"""
|
|
14
|
+
This analysis detects, usually in ways that are more robust than section name matching or signature matching, the
|
|
15
|
+
existence of obfuscation techniques in a binary.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, cfg: CFGModel | None = None):
|
|
19
|
+
self.obfuscated: bool = False
|
|
20
|
+
self.possible_obfuscators: list[str] = []
|
|
21
|
+
|
|
22
|
+
if cfg is None:
|
|
23
|
+
_l.warning(
|
|
24
|
+
"PackingDetector is using a most accurate CFG model in the knowledge base. We assume it is "
|
|
25
|
+
"generated with force_smart_scan=False and force_complete_scan=False."
|
|
26
|
+
)
|
|
27
|
+
self._cfg = self.kb.cfgs.get_most_accurate()
|
|
28
|
+
else:
|
|
29
|
+
self._cfg = cfg
|
|
30
|
+
|
|
31
|
+
self.analyze()
|
|
32
|
+
|
|
33
|
+
def analyze(self):
|
|
34
|
+
|
|
35
|
+
analysis_routines = [
|
|
36
|
+
self._analyze_vmprotect,
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
for routine in analysis_routines:
|
|
40
|
+
tool = routine()
|
|
41
|
+
if tool:
|
|
42
|
+
self.obfuscated = True
|
|
43
|
+
self.possible_obfuscators.append(tool)
|
|
44
|
+
|
|
45
|
+
def _analyze_vmprotect(self) -> str | None:
|
|
46
|
+
"""
|
|
47
|
+
We detect VMProtect v3 (with control-flow obfuscation) based on two main characteristics:
|
|
48
|
+
|
|
49
|
+
- In amd64 binaries, there exists a strongly connected component in the call graph with over 1,000 nodes.
|
|
50
|
+
Edge/node ratio is >= 1.3
|
|
51
|
+
- There is a high number of pushf and popf instructions in the visible functions.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
high_scc_node_edge_ratio = False
|
|
55
|
+
high_pushf = False
|
|
56
|
+
high_popf = False
|
|
57
|
+
high_clc = False # pylint:disable=unused-variable
|
|
58
|
+
|
|
59
|
+
if self.project.arch.name == "AMD64":
|
|
60
|
+
cg = self.kb.functions.callgraph
|
|
61
|
+
sccs = networkx.strongly_connected_components(cg)
|
|
62
|
+
|
|
63
|
+
for scc in sccs:
|
|
64
|
+
subgraph = networkx.subgraph(cg, scc)
|
|
65
|
+
node_count = len(scc)
|
|
66
|
+
if node_count > 1000:
|
|
67
|
+
edge_count = len(subgraph.edges)
|
|
68
|
+
|
|
69
|
+
if edge_count / node_count >= 1.3:
|
|
70
|
+
high_scc_node_edge_ratio = True
|
|
71
|
+
break
|
|
72
|
+
else:
|
|
73
|
+
high_scc_node_edge_ratio = True
|
|
74
|
+
|
|
75
|
+
pushf_ctr = 0
|
|
76
|
+
popf_ctr = 0
|
|
77
|
+
clc_ctr = 0 # only used for x86
|
|
78
|
+
is_x86 = self.project.arch.name == "X86"
|
|
79
|
+
cfg_node_count = len(self._cfg.graph)
|
|
80
|
+
for node in self._cfg.nodes():
|
|
81
|
+
if node.size > 0 and node.instruction_addrs:
|
|
82
|
+
block = node.block
|
|
83
|
+
for insn in block.capstone.insns:
|
|
84
|
+
if insn.mnemonic in {"pushf", "pushfd", "pushfq"}:
|
|
85
|
+
pushf_ctr += 1
|
|
86
|
+
elif insn.mnemonic in {"popf", "popfd", "popfq"}:
|
|
87
|
+
popf_ctr += 1
|
|
88
|
+
elif is_x86 and insn.mnemonic == "clc":
|
|
89
|
+
clc_ctr += 1
|
|
90
|
+
|
|
91
|
+
if pushf_ctr > cfg_node_count * 0.002:
|
|
92
|
+
high_pushf = True
|
|
93
|
+
if popf_ctr > cfg_node_count * 0.002:
|
|
94
|
+
high_popf = True
|
|
95
|
+
if not is_x86 or clc_ctr > cfg_node_count * 0.002:
|
|
96
|
+
high_clc = True # noqa: F841
|
|
97
|
+
|
|
98
|
+
if high_scc_node_edge_ratio and high_pushf and high_popf:
|
|
99
|
+
return "vmprotect"
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
AnalysesHub.register_default("ObfuscationDetector", ObfuscationDetector)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
import math
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from angr.analyses.analysis import Analysis, AnalysesHub
|
|
7
|
+
from angr.knowledge_plugins.cfg import CFGModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from cle import Section
|
|
12
|
+
|
|
13
|
+
_l = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PackingDetector(Analysis):
|
|
17
|
+
"""
|
|
18
|
+
This analysis detects if a binary is likely packed or not. We may extend it to identify which packer is in use in
|
|
19
|
+
the future.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
PACKED_MIN_BYTES = 256
|
|
23
|
+
PACKED_ENTROPY_MIN_THRESHOLD = 0.88
|
|
24
|
+
|
|
25
|
+
def __init__(self, cfg: CFGModel | None = None, region_size_threshold: int = 0x20):
|
|
26
|
+
self.packed: bool = False
|
|
27
|
+
self.region_size_threshold: int = region_size_threshold
|
|
28
|
+
|
|
29
|
+
if cfg is None:
|
|
30
|
+
_l.warning(
|
|
31
|
+
"PackingDetector is using a most accurate CFG model in the knowledge base. We assume it is "
|
|
32
|
+
"generated with force_smart_scan=False and force_complete_scan=False."
|
|
33
|
+
)
|
|
34
|
+
self._cfg = self.kb.cfgs.get_most_accurate()
|
|
35
|
+
else:
|
|
36
|
+
self._cfg = cfg
|
|
37
|
+
|
|
38
|
+
self.analyze()
|
|
39
|
+
|
|
40
|
+
def analyze(self):
|
|
41
|
+
# assume we already have a CFG with complete scanning disabled
|
|
42
|
+
# collect all regions that are not covered by the CFG in r+x sections, and then compute the entropy. we believe
|
|
43
|
+
# the binary is packed if it is beyond a threshold
|
|
44
|
+
|
|
45
|
+
covered_regions: list[tuple[int, int]] = []
|
|
46
|
+
last_known_section: Section | None = None
|
|
47
|
+
for node in sorted(self._cfg.nodes(), key=lambda n: n.addr):
|
|
48
|
+
section = None
|
|
49
|
+
if last_known_section is not None and last_known_section.contains_addr(node.addr):
|
|
50
|
+
section = last_known_section
|
|
51
|
+
if section is None:
|
|
52
|
+
section = self.project.loader.find_section_containing(node.addr)
|
|
53
|
+
if section is None:
|
|
54
|
+
# this node does not belong to any known section - ignore it
|
|
55
|
+
continue
|
|
56
|
+
if section.is_readable and section.is_executable:
|
|
57
|
+
last_known_section = section
|
|
58
|
+
|
|
59
|
+
if section is None:
|
|
60
|
+
# the node does not belong to any section. ignore it
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
if node.size == 0:
|
|
64
|
+
# ignore empty nodes
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
if not covered_regions:
|
|
68
|
+
covered_regions.append((node.addr, node.addr + node.size))
|
|
69
|
+
else:
|
|
70
|
+
last_item = covered_regions[-1]
|
|
71
|
+
if last_item[0] <= node.addr <= last_item[1] < node.addr + node.size:
|
|
72
|
+
# update the last item
|
|
73
|
+
covered_regions[-1] = last_item[0], node.addr + node.size
|
|
74
|
+
else:
|
|
75
|
+
# add a new item
|
|
76
|
+
covered_regions.append((node.addr, node.addr + node.size))
|
|
77
|
+
|
|
78
|
+
# now we get the uncovered regions
|
|
79
|
+
uncovered_regions: list[tuple[int, int]] = self._get_uncovered_regions(covered_regions)
|
|
80
|
+
|
|
81
|
+
# compute entropy
|
|
82
|
+
total_bytes, entropy = self._compute_entropy(uncovered_regions)
|
|
83
|
+
|
|
84
|
+
self.packed = total_bytes >= self.PACKED_MIN_BYTES and entropy >= self.PACKED_ENTROPY_MIN_THRESHOLD
|
|
85
|
+
|
|
86
|
+
def _get_uncovered_regions(self, covered_regions: list[tuple[int, int]]) -> list[tuple[int, int]]:
|
|
87
|
+
# FIXME: We only support binaries with sections. Add support for segments in the future
|
|
88
|
+
all_executable_sections = [
|
|
89
|
+
sec
|
|
90
|
+
for sec in self.project.loader.main_object.sections
|
|
91
|
+
if sec.is_executable and sec.is_readable and not sec.only_contains_uninitialized_data
|
|
92
|
+
]
|
|
93
|
+
all_executable_sections = sorted(all_executable_sections, key=lambda sec: sec.vaddr)
|
|
94
|
+
idx = 0
|
|
95
|
+
|
|
96
|
+
uncovered_regions: list[tuple[int, int]] = []
|
|
97
|
+
for section in all_executable_sections:
|
|
98
|
+
if idx >= len(covered_regions):
|
|
99
|
+
if section.memsize > self.region_size_threshold:
|
|
100
|
+
uncovered_regions.append((section.vaddr, section.vaddr + section.memsize))
|
|
101
|
+
else:
|
|
102
|
+
i = idx
|
|
103
|
+
last_end = section.vaddr
|
|
104
|
+
while i < len(covered_regions):
|
|
105
|
+
region_start, region_end = covered_regions[i]
|
|
106
|
+
if region_end >= section.vaddr + section.memsize:
|
|
107
|
+
# move on to the next section
|
|
108
|
+
break
|
|
109
|
+
if last_end < region_start and region_start - last_end > self.region_size_threshold:
|
|
110
|
+
uncovered_regions.append((last_end, region_start))
|
|
111
|
+
i += 1
|
|
112
|
+
last_end = max(last_end, region_end)
|
|
113
|
+
idx = i
|
|
114
|
+
|
|
115
|
+
return uncovered_regions
|
|
116
|
+
|
|
117
|
+
def _compute_entropy(self, regions: list[tuple[int, int]]) -> tuple[int, float]:
|
|
118
|
+
byte_counts = [0] * 256
|
|
119
|
+
|
|
120
|
+
for start, end in regions:
|
|
121
|
+
for b in self.project.loader.memory.load(start, end - start):
|
|
122
|
+
byte_counts[b] += 1
|
|
123
|
+
|
|
124
|
+
total = sum(byte_counts)
|
|
125
|
+
if total == 0:
|
|
126
|
+
return 0, 0.0
|
|
127
|
+
|
|
128
|
+
entropy = 0.0
|
|
129
|
+
for count in byte_counts:
|
|
130
|
+
if count == 0:
|
|
131
|
+
continue
|
|
132
|
+
p = 1.0 * count / total
|
|
133
|
+
entropy -= p * math.log(p, 256)
|
|
134
|
+
|
|
135
|
+
return total, entropy
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
AnalysesHub.register_default("PackingDetector", PackingDetector)
|
angr/angrdb/models.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from sqlalchemy import Column, Integer, String, Boolean, BLOB, ForeignKey
|
|
3
|
-
from sqlalchemy.orm import relationship
|
|
4
|
-
from sqlalchemy.ext.declarative import declarative_base
|
|
3
|
+
from sqlalchemy.orm import declarative_base, relationship
|
|
5
4
|
|
|
6
5
|
Base = declarative_base()
|
|
7
6
|
|
angr/calling_conventions.py
CHANGED
|
@@ -1061,7 +1061,9 @@ class SimCC:
|
|
|
1061
1061
|
if isinstance(arg, claripy.ast.BV):
|
|
1062
1062
|
if isinstance(ty, (SimTypeReg, SimTypeNum)):
|
|
1063
1063
|
if len(arg) != ty.size:
|
|
1064
|
-
|
|
1064
|
+
if arg.concrete:
|
|
1065
|
+
return claripy.BVV(arg.concrete_value, ty.size)
|
|
1066
|
+
raise TypeError("Type mismatch of symbolic data: expected %s, got %d bits" % (ty, len(arg)))
|
|
1065
1067
|
return arg
|
|
1066
1068
|
if isinstance(ty, (SimTypeFloat)):
|
|
1067
1069
|
raise TypeError(
|
angr/engines/vex/claripy/irop.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
This module contains symbolic implementations of VEX operations.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
# pylint:disable=no-member
|
|
5
6
|
from __future__ import annotations
|
|
6
7
|
|
|
7
8
|
from functools import partial
|
|
@@ -10,14 +11,17 @@ import itertools
|
|
|
10
11
|
import operator
|
|
11
12
|
import math
|
|
12
13
|
import re
|
|
13
|
-
|
|
14
14
|
import logging
|
|
15
15
|
|
|
16
|
-
l = logging.getLogger(name=__name__)
|
|
17
|
-
|
|
18
16
|
import pyvex
|
|
19
17
|
import claripy
|
|
20
18
|
|
|
19
|
+
from angr.errors import UnsupportedIROpError, SimOperationError, SimValueError, SimZeroDivisionException
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
l = logging.getLogger(name=__name__)
|
|
23
|
+
|
|
24
|
+
|
|
21
25
|
#
|
|
22
26
|
# The more sane approach
|
|
23
27
|
#
|
|
@@ -1044,6 +1048,9 @@ class SimIROp:
|
|
|
1044
1048
|
exp_threshold = (2 ** (exp_bits - 1) - 1) + mantissa_bits
|
|
1045
1049
|
return claripy.If(exp_bv >= exp_threshold, args[1].raw_to_fp(), rounded_fp)
|
|
1046
1050
|
|
|
1051
|
+
def _op_fgeneric_RSqrtEst(self, arg): # pylint:disable=no-self-use
|
|
1052
|
+
return claripy.BVS("RSqrtEst", arg.size())
|
|
1053
|
+
|
|
1047
1054
|
def _generic_pack_saturation(self, args, src_size, dst_size, src_signed, dst_signed):
|
|
1048
1055
|
"""
|
|
1049
1056
|
Generic pack with saturation.
|
|
@@ -1255,6 +1262,4 @@ def vexop_to_simop(op, extended=True, fp=True):
|
|
|
1255
1262
|
return res
|
|
1256
1263
|
|
|
1257
1264
|
|
|
1258
|
-
from angr.errors import UnsupportedIROpError, SimOperationError, SimValueError, SimZeroDivisionException
|
|
1259
|
-
|
|
1260
1265
|
make_operations()
|
angr/engines/vex/heavy/heavy.py
CHANGED
|
@@ -90,6 +90,7 @@ class HeavyVEXMixin(SuccessorsMixin, ClaripyDataMixin, SimStateStorageMixin, VEX
|
|
|
90
90
|
num_inst=None,
|
|
91
91
|
extra_stop_points=None,
|
|
92
92
|
opt_level=None,
|
|
93
|
+
strict_block_end=None,
|
|
93
94
|
**kwargs,
|
|
94
95
|
):
|
|
95
96
|
if not pyvex.lifting.lifters[self.state.arch.name] or type(successors.addr) is not int:
|
|
@@ -144,6 +145,7 @@ class HeavyVEXMixin(SuccessorsMixin, ClaripyDataMixin, SimStateStorageMixin, VEX
|
|
|
144
145
|
num_inst=num_inst,
|
|
145
146
|
extra_stop_points=extra_stop_points,
|
|
146
147
|
opt_level=opt_level,
|
|
148
|
+
strict_block_end=strict_block_end,
|
|
147
149
|
)
|
|
148
150
|
|
|
149
151
|
if (
|
|
@@ -5,8 +5,7 @@ import datetime
|
|
|
5
5
|
try:
|
|
6
6
|
import sqlalchemy
|
|
7
7
|
from sqlalchemy import Column, Integer, String, Boolean, DateTime, create_engine
|
|
8
|
-
from sqlalchemy.orm import sessionmaker
|
|
9
|
-
from sqlalchemy.ext.declarative import declarative_base
|
|
8
|
+
from sqlalchemy.orm import declarative_base, sessionmaker
|
|
10
9
|
from sqlalchemy.exc import OperationalError
|
|
11
10
|
|
|
12
11
|
Base = declarative_base()
|
|
@@ -18,6 +18,7 @@ from .types import TypesStore
|
|
|
18
18
|
from .callsite_prototypes import CallsitePrototypes
|
|
19
19
|
from .custom_strings import CustomStrings
|
|
20
20
|
from .decompilation import DecompilationManager
|
|
21
|
+
from .obfuscations import Obfuscations
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
__all__ = (
|
|
@@ -40,4 +41,5 @@ __all__ = (
|
|
|
40
41
|
"CallsitePrototypes",
|
|
41
42
|
"CustomStrings",
|
|
42
43
|
"DecompilationManager",
|
|
44
|
+
"Obfuscations",
|
|
43
45
|
)
|
|
@@ -56,6 +56,7 @@ class Function(Serializable):
|
|
|
56
56
|
"addr",
|
|
57
57
|
"is_simprocedure",
|
|
58
58
|
"_name",
|
|
59
|
+
"previous_names",
|
|
59
60
|
"is_default_name",
|
|
60
61
|
"from_signature",
|
|
61
62
|
"binary_name",
|
|
@@ -224,6 +225,7 @@ class Function(Serializable):
|
|
|
224
225
|
else:
|
|
225
226
|
self.is_default_name = False
|
|
226
227
|
self._name = name
|
|
228
|
+
self.previous_names = []
|
|
227
229
|
self.from_signature = None
|
|
228
230
|
|
|
229
231
|
# Determine the name the binary where this function is.
|
|
@@ -274,6 +276,7 @@ class Function(Serializable):
|
|
|
274
276
|
|
|
275
277
|
@name.setter
|
|
276
278
|
def name(self, v):
|
|
279
|
+
self.previous_names.append(self._name)
|
|
277
280
|
self._name = v
|
|
278
281
|
self._function_manager._kb.labels[self.addr] = v
|
|
279
282
|
|
|
@@ -1667,6 +1670,7 @@ class Function(Serializable):
|
|
|
1667
1670
|
func._endpoints = self._endpoints.copy()
|
|
1668
1671
|
func._call_sites = self._call_sites.copy()
|
|
1669
1672
|
func._project = self._project
|
|
1673
|
+
func.previous_names = list(self.previous_names)
|
|
1670
1674
|
func.is_plt = self.is_plt
|
|
1671
1675
|
func.is_simprocedure = self.is_simprocedure
|
|
1672
1676
|
func.binary_name = self.binary_name
|
|
@@ -313,7 +313,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
313
313
|
if isinstance(k, self.function_address_types):
|
|
314
314
|
f = self.function(addr=k)
|
|
315
315
|
elif type(k) is str:
|
|
316
|
-
f = self.function(name=k)
|
|
316
|
+
f = self.function(name=k) or self.function(name=k, check_previous_names=True)
|
|
317
317
|
else:
|
|
318
318
|
raise ValueError(f"FunctionManager.__getitem__ does not support keys of type {type(k)}")
|
|
319
319
|
|
|
@@ -350,9 +350,9 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
350
350
|
def get_by_addr(self, addr) -> Function:
|
|
351
351
|
return self._function_map.get(addr)
|
|
352
352
|
|
|
353
|
-
def get_by_name(self, name: str) -> Generator[Function]:
|
|
353
|
+
def get_by_name(self, name: str, check_previous_names: bool = False) -> Generator[Function]:
|
|
354
354
|
for f in self._function_map.values():
|
|
355
|
-
if f.name == name:
|
|
355
|
+
if f.name == name or (check_previous_names and name in f.previous_names):
|
|
356
356
|
yield f
|
|
357
357
|
|
|
358
358
|
def _function_added(self, func: Function):
|
|
@@ -411,7 +411,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
411
411
|
except KeyError:
|
|
412
412
|
return None
|
|
413
413
|
|
|
414
|
-
def query(self, query: str) -> Function | None:
|
|
414
|
+
def query(self, query: str, check_previous_names: bool = False) -> Function | None:
|
|
415
415
|
"""
|
|
416
416
|
Query for a function using selectors to disambiguate. Supported variations:
|
|
417
417
|
|
|
@@ -430,19 +430,21 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
430
430
|
addr = int(matches.group(2), 0)
|
|
431
431
|
try:
|
|
432
432
|
func = self._function_map.get(addr)
|
|
433
|
-
if func.name == name:
|
|
433
|
+
if func.name == name or (check_previous_names and name in func.previous_names):
|
|
434
434
|
return func
|
|
435
435
|
except KeyError:
|
|
436
436
|
pass
|
|
437
437
|
|
|
438
438
|
obj_name = selector or self._kb._project.loader.main_object.binary_basename
|
|
439
|
-
for func in self.get_by_name(name):
|
|
439
|
+
for func in self.get_by_name(name, check_previous_names=check_previous_names):
|
|
440
440
|
if func.binary_name == obj_name:
|
|
441
441
|
return func
|
|
442
442
|
|
|
443
443
|
return None
|
|
444
444
|
|
|
445
|
-
def function(
|
|
445
|
+
def function(
|
|
446
|
+
self, addr=None, name=None, check_previous_names=False, create=False, syscall=False, plt=None
|
|
447
|
+
) -> Function | None:
|
|
446
448
|
"""
|
|
447
449
|
Get a function object from the function manager.
|
|
448
450
|
|
|
@@ -457,6 +459,13 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
457
459
|
:return: The Function instance, or None if the function is not found and create is False.
|
|
458
460
|
:rtype: Function or None
|
|
459
461
|
"""
|
|
462
|
+
if name is not None and name.startswith("sub_"):
|
|
463
|
+
try:
|
|
464
|
+
addr = int(name.split("_")[-1], 16)
|
|
465
|
+
name = None
|
|
466
|
+
except ValueError:
|
|
467
|
+
pass
|
|
468
|
+
|
|
460
469
|
if addr is not None:
|
|
461
470
|
try:
|
|
462
471
|
f = self._function_map.get(addr)
|
|
@@ -472,11 +481,11 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
|
|
|
472
481
|
f.is_syscall = True
|
|
473
482
|
return f
|
|
474
483
|
elif name is not None:
|
|
475
|
-
func = self.query(name)
|
|
484
|
+
func = self.query(name, check_previous_names=check_previous_names)
|
|
476
485
|
if func is not None:
|
|
477
486
|
return func
|
|
478
487
|
|
|
479
|
-
for func in self.get_by_name(name):
|
|
488
|
+
for func in self.get_by_name(name, check_previous_names=check_previous_names):
|
|
480
489
|
if plt is None or func.is_plt == plt:
|
|
481
490
|
return func
|
|
482
491
|
|
|
@@ -33,7 +33,7 @@ class FunctionParser:
|
|
|
33
33
|
obj.is_syscall = function.is_syscall
|
|
34
34
|
obj.is_simprocedure = function.is_simprocedure
|
|
35
35
|
obj.returning = function.returning
|
|
36
|
-
obj.alignment = function.
|
|
36
|
+
obj.alignment = function.is_alignment
|
|
37
37
|
obj.binary_name = function.binary_name or ""
|
|
38
38
|
obj.normalized = function.normalized
|
|
39
39
|
|
|
@@ -34,6 +34,7 @@ class SootFunction(Function):
|
|
|
34
34
|
# block nodes (basic block nodes) at whose ends the function terminates
|
|
35
35
|
# in theory, if everything works fine, endpoints == ret_sites | jumpout_sites | callout_sites
|
|
36
36
|
self._endpoints = defaultdict(set)
|
|
37
|
+
self.previous_names = []
|
|
37
38
|
|
|
38
39
|
self._call_sites = {}
|
|
39
40
|
self.addr = addr
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .plugin import KnowledgeBasePlugin
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Obfuscations(KnowledgeBasePlugin):
|
|
7
|
+
"""
|
|
8
|
+
Store discovered information and artifacts about (string) obfuscation techniques in the project.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, kb):
|
|
12
|
+
super().__init__(kb)
|
|
13
|
+
|
|
14
|
+
self.obfuscated_strings_analyzed: bool = False
|
|
15
|
+
self.type1_deobfuscated_strings = {}
|
|
16
|
+
self.type1_string_loader_candidates = set()
|
|
17
|
+
self.type2_deobfuscated_strings = {}
|
|
18
|
+
self.type2_string_loader_candidates = set()
|
|
19
|
+
self.type3_deobfuscated_strings = {} # from the address of the call instruction to the actual string (in bytes)
|
|
20
|
+
|
|
21
|
+
self.obfuscated_apis_analyzed: bool = False
|
|
22
|
+
self.type1_deobfuscated_apis: dict[int, tuple[str, str]] = {}
|
|
23
|
+
|
|
24
|
+
def copy(self):
|
|
25
|
+
o = Obfuscations(self._kb)
|
|
26
|
+
o.type1_deobfuscated_strings = dict(self.type1_deobfuscated_strings)
|
|
27
|
+
o.type1_string_loader_candidates = self.type1_string_loader_candidates.copy()
|
|
28
|
+
o.type2_deobfuscated_strings = dict(self.type2_deobfuscated_strings)
|
|
29
|
+
o.type2_string_loader_candidates = self.type2_string_loader_candidates.copy()
|
|
30
|
+
o.type3_deobfuscated_strings = self.type3_deobfuscated_strings.copy()
|
|
31
|
+
|
|
32
|
+
o.type1_deobfuscated_apis = self.type1_deobfuscated_apis.copy()
|
|
33
|
+
return o
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
KnowledgeBasePlugin.register_default("obfuscations", Obfuscations)
|