angr 9.2.125__py3-none-macosx_11_0_arm64.whl → 9.2.127__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/__init__.py +4 -0
- angr/analyses/analysis.py +8 -2
- angr/analyses/cfg/cfg_fast.py +12 -1
- angr/analyses/decompiler/ail_simplifier.py +1 -0
- angr/analyses/decompiler/callsite_maker.py +9 -1
- angr/analyses/decompiler/clinic.py +2 -1
- angr/analyses/decompiler/condition_processor.py +109 -73
- angr/analyses/decompiler/decompilation_cache.py +4 -0
- angr/analyses/decompiler/decompiler.py +21 -3
- angr/analyses/decompiler/dephication/graph_vvar_mapping.py +1 -2
- angr/analyses/decompiler/optimization_passes/__init__.py +15 -1
- angr/analyses/decompiler/return_maker.py +1 -0
- angr/analyses/decompiler/ssailification/rewriting.py +4 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
- angr/analyses/decompiler/ssailification/traversal.py +1 -0
- angr/analyses/decompiler/ssailification/traversal_engine.py +15 -0
- angr/analyses/decompiler/structured_codegen/c.py +18 -5
- angr/analyses/decompiler/structured_codegen/dwarf_import.py +4 -1
- angr/analyses/deobfuscator/__init__.py +18 -0
- angr/analyses/deobfuscator/api_obf_finder.py +313 -0
- angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
- angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
- angr/analyses/deobfuscator/string_obf_finder.py +774 -0
- angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
- angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
- angr/analyses/reaching_definitions/function_handler_library/string.py +2 -2
- angr/analyses/s_liveness.py +3 -3
- angr/analyses/s_propagator.py +74 -3
- angr/analyses/unpacker/__init__.py +6 -0
- angr/analyses/unpacker/obfuscation_detector.py +103 -0
- angr/analyses/unpacker/packing_detector.py +138 -0
- angr/angrdb/models.py +2 -1
- angr/angrdb/serializers/kb.py +3 -3
- angr/angrdb/serializers/structured_code.py +5 -3
- angr/calling_conventions.py +4 -2
- angr/engines/vex/claripy/irop.py +10 -5
- angr/knowledge_base.py +1 -1
- angr/knowledge_plugins/__init__.py +2 -2
- angr/knowledge_plugins/obfuscations.py +36 -0
- angr/knowledge_plugins/structured_code.py +1 -1
- angr/lib/angr_native.dylib +0 -0
- angr/utils/ssa/__init__.py +8 -3
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/METADATA +6 -6
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/RECORD +50 -40
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/WHEEL +1 -1
- angr/knowledge_plugins/decompilation.py +0 -45
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/LICENSE +0 -0
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/entry_points.txt +0 -0
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCal
|
|
|
7
7
|
from angr.engines.light import SimEngineLight, SimEngineLightAILMixin
|
|
8
8
|
from angr.utils.ssa import get_reg_offset_base
|
|
9
9
|
from angr.utils.orderedset import OrderedSet
|
|
10
|
+
from angr.calling_conventions import default_cc
|
|
10
11
|
from .traversal_state import TraversalState
|
|
11
12
|
|
|
12
13
|
|
|
@@ -23,6 +24,7 @@ class SimEngineSSATraversal(
|
|
|
23
24
|
def __init__(
|
|
24
25
|
self,
|
|
25
26
|
arch,
|
|
27
|
+
simos,
|
|
26
28
|
sp_tracker=None,
|
|
27
29
|
bp_as_gpr: bool = False,
|
|
28
30
|
def_to_loc=None,
|
|
@@ -33,6 +35,7 @@ class SimEngineSSATraversal(
|
|
|
33
35
|
super().__init__()
|
|
34
36
|
|
|
35
37
|
self.arch = arch
|
|
38
|
+
self.simos = simos
|
|
36
39
|
self.sp_tracker = sp_tracker
|
|
37
40
|
self.bp_as_gpr = bp_as_gpr
|
|
38
41
|
self.stackvars = stackvars
|
|
@@ -75,6 +78,18 @@ class SimEngineSSATraversal(
|
|
|
75
78
|
self._expr(stmt.false_target)
|
|
76
79
|
|
|
77
80
|
def _handle_Call(self, stmt: Call):
|
|
81
|
+
|
|
82
|
+
# kill caller-saved registers
|
|
83
|
+
cc = (
|
|
84
|
+
default_cc(self.arch.name, platform=self.simos.name if self.simos is not None else None)
|
|
85
|
+
if stmt.calling_convention is None
|
|
86
|
+
else stmt.calling_convention
|
|
87
|
+
)
|
|
88
|
+
for reg_name in cc.CALLER_SAVED_REGS:
|
|
89
|
+
reg_offset = self.arch.registers[reg_name][0]
|
|
90
|
+
base_off = get_reg_offset_base(reg_offset, self.arch)
|
|
91
|
+
self.state.live_registers.discard(base_off)
|
|
92
|
+
|
|
78
93
|
if stmt.ret_expr is not None and isinstance(stmt.ret_expr, Register):
|
|
79
94
|
codeloc = self._codeloc()
|
|
80
95
|
self.def_to_loc.append((stmt.ret_expr, codeloc))
|
|
@@ -2148,6 +2148,12 @@ class CConstant(CExpression):
|
|
|
2148
2148
|
elif isinstance(v, Function):
|
|
2149
2149
|
yield get_cpp_function_name(v.demangled_name, specialized=False, qualified=True), self
|
|
2150
2150
|
return
|
|
2151
|
+
elif isinstance(v, str):
|
|
2152
|
+
yield CConstant.str_to_c_str(v), self
|
|
2153
|
+
return
|
|
2154
|
+
elif isinstance(v, bytes):
|
|
2155
|
+
yield CConstant.str_to_c_str(v.replace(b"\x00", b"").decode("utf-8")), self
|
|
2156
|
+
return
|
|
2151
2157
|
|
|
2152
2158
|
if self.reference_values is not None and self._type is not None and self._type in self.reference_values:
|
|
2153
2159
|
if isinstance(self._type, SimTypeInt):
|
|
@@ -2505,9 +2511,6 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2505
2511
|
|
|
2506
2512
|
self._analyze()
|
|
2507
2513
|
|
|
2508
|
-
if flavor is not None:
|
|
2509
|
-
self.kb.structured_code[(func.addr, flavor)] = self
|
|
2510
|
-
|
|
2511
2514
|
def reapply_options(self, options):
|
|
2512
2515
|
for option, value in options:
|
|
2513
2516
|
if option.param == "braces_on_own_lines":
|
|
@@ -3415,7 +3418,17 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3415
3418
|
if reference_values is None:
|
|
3416
3419
|
reference_values = {}
|
|
3417
3420
|
type_ = unpack_typeref(type_)
|
|
3418
|
-
if
|
|
3421
|
+
if expr.value in self.kb.obfuscations.type1_deobfuscated_strings:
|
|
3422
|
+
reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type1_deobfuscated_strings[
|
|
3423
|
+
expr.value
|
|
3424
|
+
]
|
|
3425
|
+
inline_string = True
|
|
3426
|
+
elif expr.value in self.kb.obfuscations.type2_deobfuscated_strings:
|
|
3427
|
+
reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type2_deobfuscated_strings[
|
|
3428
|
+
expr.value
|
|
3429
|
+
]
|
|
3430
|
+
inline_string = True
|
|
3431
|
+
elif isinstance(type_, SimTypePointer) and isinstance(type_.pts_to, SimTypeChar):
|
|
3419
3432
|
# char*
|
|
3420
3433
|
# Try to get a string
|
|
3421
3434
|
if (
|
|
@@ -3433,7 +3446,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3433
3446
|
# edge cases: (void*)"this is a constant string pointer". in this case, the type_ will be a void*
|
|
3434
3447
|
# (BOT*) instead of a char*.
|
|
3435
3448
|
|
|
3436
|
-
if isinstance(expr.value, int):
|
|
3449
|
+
if not reference_values and isinstance(expr.value, int):
|
|
3437
3450
|
if expr.value in self.project.kb.functions:
|
|
3438
3451
|
# It's a function pointer
|
|
3439
3452
|
# We don't care about the actual prototype here
|
|
@@ -5,6 +5,7 @@ import logging
|
|
|
5
5
|
from sortedcontainers import SortedList
|
|
6
6
|
|
|
7
7
|
from angr.analyses import Analysis, register_analysis
|
|
8
|
+
from angr.analyses.decompiler.decompilation_cache import DecompilationCache
|
|
8
9
|
from .base import BaseStructuredCodeGenerator, InstructionMapping, PositionMapping
|
|
9
10
|
from angr.knowledge_plugins.functions.function import Function
|
|
10
11
|
|
|
@@ -30,7 +31,9 @@ class ImportSourceCode(BaseStructuredCodeGenerator, Analysis):
|
|
|
30
31
|
self.regenerate_text()
|
|
31
32
|
|
|
32
33
|
if flavor is not None and self.text:
|
|
33
|
-
|
|
34
|
+
if (function.addr, flavor) not in self.kb.decompilations:
|
|
35
|
+
self.kb.decompilations[(function.addr, flavor)] = DecompilationCache(function.addr)
|
|
36
|
+
self.kb.decompilations[(function.addr, flavor)].codegen = self
|
|
34
37
|
|
|
35
38
|
def regenerate_text(self):
|
|
36
39
|
cache = {}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# deobfuscator is a collection of analyses that automatically identifies functions where obfuscation techniques are
|
|
2
|
+
# in-use.
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .string_obf_finder import StringObfuscationFinder
|
|
6
|
+
from .string_obf_peephole_optimizer import StringObfType1PeepholeOptimizer
|
|
7
|
+
from .string_obf_opt_passes import StringObfType3Rewriter
|
|
8
|
+
from .api_obf_finder import APIObfuscationFinder
|
|
9
|
+
from .api_obf_peephole_optimizer import APIObfType1PeepholeOptimizer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
__all__ = (
|
|
13
|
+
"StringObfuscationFinder",
|
|
14
|
+
"StringObfType1PeepholeOptimizer",
|
|
15
|
+
"StringObfType3Rewriter",
|
|
16
|
+
"APIObfuscationFinder",
|
|
17
|
+
"APIObfType1PeepholeOptimizer",
|
|
18
|
+
)
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# pylint:disable=missing-class-docstring,too-many-boolean-expressions
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any
|
|
4
|
+
from enum import IntEnum
|
|
5
|
+
import string
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
import networkx
|
|
9
|
+
|
|
10
|
+
import claripy
|
|
11
|
+
|
|
12
|
+
from angr import SIM_LIBRARIES
|
|
13
|
+
from angr.calling_conventions import SimRegArg
|
|
14
|
+
from angr.errors import SimMemoryMissingError
|
|
15
|
+
from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
|
|
16
|
+
from angr.sim_type import SimTypePointer, SimTypeChar
|
|
17
|
+
from angr.analyses import Analysis, AnalysesHub
|
|
18
|
+
from angr.procedures.definitions import SimSyscallLibrary
|
|
19
|
+
from angr.sim_variable import SimMemoryVariable
|
|
20
|
+
from angr.analyses.decompiler.structured_codegen.c import (
|
|
21
|
+
CStructuredCodeWalker,
|
|
22
|
+
CFunctionCall,
|
|
23
|
+
CConstant,
|
|
24
|
+
CAssignment,
|
|
25
|
+
CVariable,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_l = logging.getLogger(name=__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class APIObfuscationType(IntEnum):
|
|
32
|
+
TYPE_1 = 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class APIDeobFuncDescriptor:
|
|
36
|
+
def __init__(self, type_: APIObfuscationType, func_addr=None, libname_argidx=None, funcname_argidx=None):
|
|
37
|
+
self.type = type_
|
|
38
|
+
self.func_addr = func_addr
|
|
39
|
+
self.libname_argidx = libname_argidx
|
|
40
|
+
self.funcname_argidx = funcname_argidx
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Type1AssignmentFinder(CStructuredCodeWalker):
|
|
44
|
+
def __init__(self, func_addr: int, desc: APIDeobFuncDescriptor):
|
|
45
|
+
self.func_addr = func_addr
|
|
46
|
+
self.desc = desc
|
|
47
|
+
self.assignments: dict[int, tuple[str, str]] = {}
|
|
48
|
+
|
|
49
|
+
def handle_CAssignment(self, obj: CAssignment):
|
|
50
|
+
if (
|
|
51
|
+
isinstance(obj.lhs, CVariable)
|
|
52
|
+
and isinstance(obj.lhs.variable, SimMemoryVariable)
|
|
53
|
+
and isinstance(obj.lhs.variable.addr, int)
|
|
54
|
+
and isinstance(obj.rhs, CFunctionCall)
|
|
55
|
+
and isinstance(obj.rhs.callee_target, CConstant)
|
|
56
|
+
and obj.rhs.callee_target.value == self.func_addr
|
|
57
|
+
):
|
|
58
|
+
# found it!
|
|
59
|
+
func_args = obj.rhs.args
|
|
60
|
+
if self.desc.funcname_argidx < len(func_args) and self.desc.libname_argidx < len(func_args):
|
|
61
|
+
funcname_arg = func_args[self.desc.funcname_argidx]
|
|
62
|
+
libname_arg = func_args[self.desc.libname_argidx]
|
|
63
|
+
if isinstance(funcname_arg, CConstant) and isinstance(libname_arg, CConstant):
|
|
64
|
+
# load two strings
|
|
65
|
+
funcname, libname = None, None
|
|
66
|
+
if funcname_arg.type in funcname_arg.reference_values and isinstance(
|
|
67
|
+
funcname_arg.reference_values[funcname_arg.type].content, bytes
|
|
68
|
+
):
|
|
69
|
+
funcname = funcname_arg.reference_values[funcname_arg.type].content.decode("utf-8")
|
|
70
|
+
if libname_arg.type in libname_arg.reference_values and isinstance(
|
|
71
|
+
libname_arg.reference_values[libname_arg.type].content, bytes
|
|
72
|
+
):
|
|
73
|
+
libname = libname_arg.reference_values[libname_arg.type].content.decode("utf-8")
|
|
74
|
+
|
|
75
|
+
if funcname and libname:
|
|
76
|
+
if obj.lhs.variable.addr in self.assignments:
|
|
77
|
+
if self.assignments[obj.lhs.variable.addr] != (libname, funcname):
|
|
78
|
+
_l.warning(
|
|
79
|
+
"Observed more than one assignment for variable at %#x.", obj.lhs.variable.addr
|
|
80
|
+
)
|
|
81
|
+
else:
|
|
82
|
+
self.assignments[obj.lhs.variable.addr] = libname, funcname
|
|
83
|
+
|
|
84
|
+
return super().handle_CAssignment(obj)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class APIObfuscationFinder(Analysis):
|
|
88
|
+
"""
|
|
89
|
+
An analysis that automatically finds API "obfuscation" routines.
|
|
90
|
+
|
|
91
|
+
Currently, we support the following API "obfuscation" styles:
|
|
92
|
+
|
|
93
|
+
- sub_A("dll_name", "api_name) where sub_a ends up calling LoadLibrary.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(self):
|
|
97
|
+
self.type1_candidates = []
|
|
98
|
+
|
|
99
|
+
self.analyze()
|
|
100
|
+
|
|
101
|
+
def analyze(self):
|
|
102
|
+
self.type1_candidates = self._find_type1()
|
|
103
|
+
|
|
104
|
+
if self.type1_candidates:
|
|
105
|
+
for desc in self.type1_candidates:
|
|
106
|
+
type1_deobfuscated = self._analyze_type1(desc.func_addr, desc)
|
|
107
|
+
self.kb.obfuscations.type1_deobfuscated_apis.update(type1_deobfuscated)
|
|
108
|
+
|
|
109
|
+
def _find_type1(self):
|
|
110
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
111
|
+
load_library_funcs = []
|
|
112
|
+
|
|
113
|
+
if "LoadLibraryA" in self.kb.functions:
|
|
114
|
+
load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryA"))
|
|
115
|
+
if "LoadLibraryW" in self.kb.functions:
|
|
116
|
+
load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryW"))
|
|
117
|
+
if "LoadLibrary" in self.kb.functions:
|
|
118
|
+
load_library_funcs += list(self.kb.functions.get_by_name("LoadLibrary"))
|
|
119
|
+
|
|
120
|
+
load_library_funcs = [func for func in load_library_funcs if func.is_simprocedure]
|
|
121
|
+
|
|
122
|
+
if not load_library_funcs:
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
# find callers of each load library func, up to three callers back
|
|
126
|
+
callgraph = self.kb.functions.callgraph
|
|
127
|
+
candidates = []
|
|
128
|
+
for load_library_func in load_library_funcs:
|
|
129
|
+
subtree = self._build_caller_subtree(callgraph, load_library_func.addr, 3)
|
|
130
|
+
for _, succs in networkx.bfs_successors(subtree, load_library_func.addr):
|
|
131
|
+
for succ_addr in succs:
|
|
132
|
+
func = self.kb.functions.get_by_addr(succ_addr)
|
|
133
|
+
likely, info = self._is_likely_type1_func(func, cfg)
|
|
134
|
+
if likely:
|
|
135
|
+
candidates.append((func.addr, info))
|
|
136
|
+
|
|
137
|
+
descs = []
|
|
138
|
+
for func_addr, info in candidates:
|
|
139
|
+
desc = APIDeobFuncDescriptor(
|
|
140
|
+
APIObfuscationType.TYPE_1,
|
|
141
|
+
func_addr=func_addr,
|
|
142
|
+
libname_argidx=info["libname_arg_idx"],
|
|
143
|
+
funcname_argidx=info["funcname_arg_idx"],
|
|
144
|
+
)
|
|
145
|
+
descs.append(desc)
|
|
146
|
+
|
|
147
|
+
return descs
|
|
148
|
+
|
|
149
|
+
def _is_likely_type1_func(self, func, cfg):
|
|
150
|
+
if func.prototype is None:
|
|
151
|
+
return False, None
|
|
152
|
+
if len(func.prototype.args) < 2:
|
|
153
|
+
return False, None
|
|
154
|
+
|
|
155
|
+
arch = self.project.arch
|
|
156
|
+
valid_apiname_charset = {ord(ch) for ch in (string.ascii_letters + string.digits + "._")}
|
|
157
|
+
|
|
158
|
+
# decompile the function to get a prototype with types
|
|
159
|
+
_ = self.project.analyses.Decompiler(func, cfg=cfg)
|
|
160
|
+
|
|
161
|
+
char_ptr_args = [
|
|
162
|
+
idx
|
|
163
|
+
for (idx, arg) in enumerate(func.prototype.args)
|
|
164
|
+
if isinstance(arg, SimTypePointer) and isinstance(arg.pts_to, SimTypeChar)
|
|
165
|
+
]
|
|
166
|
+
if len(char_ptr_args) != 2:
|
|
167
|
+
return False, None
|
|
168
|
+
|
|
169
|
+
libname_arg_idx = None
|
|
170
|
+
funcname_arg_idx = None
|
|
171
|
+
# who's calling it?
|
|
172
|
+
caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func.addr)))
|
|
173
|
+
for caller_addr in caller_addrs:
|
|
174
|
+
# what arguments are used to call this function with?
|
|
175
|
+
callsite_nodes = [
|
|
176
|
+
pred
|
|
177
|
+
for pred in cfg.get_predecessors(cfg.get_any_node(func.addr))
|
|
178
|
+
if pred.function_address == caller_addr and pred.instruction_addrs
|
|
179
|
+
]
|
|
180
|
+
observation_points = []
|
|
181
|
+
for callsite_node in callsite_nodes:
|
|
182
|
+
observation_points.append(("insn", callsite_node.instruction_addrs[-1], ObservationPointType.OP_BEFORE))
|
|
183
|
+
rda = self.project.analyses.ReachingDefinitions(
|
|
184
|
+
self.kb.functions[caller_addr],
|
|
185
|
+
observe_all=False,
|
|
186
|
+
observation_points=observation_points,
|
|
187
|
+
)
|
|
188
|
+
for callsite_node in callsite_nodes:
|
|
189
|
+
observ = rda.model.get_observation_by_insn(
|
|
190
|
+
callsite_node.instruction_addrs[-1],
|
|
191
|
+
ObservationPointType.OP_BEFORE,
|
|
192
|
+
)
|
|
193
|
+
args: list[tuple[int, Any]] = []
|
|
194
|
+
for arg_idx, func_arg in enumerate(func.arguments):
|
|
195
|
+
# FIXME: We are ignoring all non-register function arguments until we see a test case where
|
|
196
|
+
# FIXME: stack-passing arguments are used
|
|
197
|
+
if isinstance(func_arg, SimRegArg):
|
|
198
|
+
reg_offset, reg_size = arch.registers[func_arg.reg_name]
|
|
199
|
+
try:
|
|
200
|
+
mv = observ.registers.load(reg_offset, size=reg_size)
|
|
201
|
+
except SimMemoryMissingError:
|
|
202
|
+
args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
|
|
203
|
+
continue
|
|
204
|
+
arg_value = mv.one_value()
|
|
205
|
+
if arg_value is None:
|
|
206
|
+
arg_value = claripy.BVV(0xDEADBEEF, self.project.arch.bits)
|
|
207
|
+
args.append((arg_idx, arg_value))
|
|
208
|
+
|
|
209
|
+
# the args must have at least one concrete address that points to an initialized memory location
|
|
210
|
+
acceptable_args = True
|
|
211
|
+
arg_strs: list[tuple[int, str]] = []
|
|
212
|
+
for idx, arg in args:
|
|
213
|
+
if arg is not None and arg.concrete:
|
|
214
|
+
v = arg.concrete_value
|
|
215
|
+
section = self.project.loader.find_section_containing(v)
|
|
216
|
+
if section is not None:
|
|
217
|
+
# what string is it?
|
|
218
|
+
max_size = min(64, section.max_addr - v)
|
|
219
|
+
try:
|
|
220
|
+
value = self.project.loader.memory.load(v, max_size)
|
|
221
|
+
except KeyError:
|
|
222
|
+
acceptable_args = False
|
|
223
|
+
break
|
|
224
|
+
if b"\x00" in value:
|
|
225
|
+
value = value[: value.index(b"\x00")]
|
|
226
|
+
if not all(ch in valid_apiname_charset for ch in value):
|
|
227
|
+
acceptable_args = False
|
|
228
|
+
break
|
|
229
|
+
arg_strs.append((idx, value.decode("utf-8")))
|
|
230
|
+
if acceptable_args:
|
|
231
|
+
libname_arg_idx, funcname_arg_idx = None, None
|
|
232
|
+
assert len(arg_strs) == 2
|
|
233
|
+
for arg_idx, name in arg_strs:
|
|
234
|
+
if self.is_libname(name):
|
|
235
|
+
libname_arg_idx = arg_idx
|
|
236
|
+
elif self.is_apiname(name):
|
|
237
|
+
funcname_arg_idx = arg_idx
|
|
238
|
+
|
|
239
|
+
if libname_arg_idx is not None and funcname_arg_idx is not None:
|
|
240
|
+
break
|
|
241
|
+
|
|
242
|
+
if libname_arg_idx is not None and funcname_arg_idx is not None:
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
if libname_arg_idx is not None and funcname_arg_idx is not None:
|
|
246
|
+
return True, {"libname_arg_idx": libname_arg_idx, "funcname_arg_idx": funcname_arg_idx}
|
|
247
|
+
return False, None
|
|
248
|
+
|
|
249
|
+
def _analyze_type1(self, func_addr, desc: APIDeobFuncDescriptor) -> dict[int, tuple[str, str]]:
|
|
250
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
251
|
+
|
|
252
|
+
assignments: dict[int, tuple[str, str]] = {}
|
|
253
|
+
|
|
254
|
+
# get all call sites
|
|
255
|
+
caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func_addr)))
|
|
256
|
+
for caller_addr in caller_addrs:
|
|
257
|
+
# decompile the function and get all assignments of the return value of the func at func_addr
|
|
258
|
+
try:
|
|
259
|
+
dec = self.project.analyses.Decompiler(self.kb.functions.get_by_addr(caller_addr), cfg=cfg)
|
|
260
|
+
except Exception: # pylint:disable=broad-exception-caught
|
|
261
|
+
continue
|
|
262
|
+
if dec.codegen is None:
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
finder = Type1AssignmentFinder(func_addr, desc)
|
|
266
|
+
finder.handle(dec.codegen.cfunc)
|
|
267
|
+
|
|
268
|
+
duplicate_addrs = set(assignments.keys()).intersection(set(finder.assignments.keys()))
|
|
269
|
+
if duplicate_addrs:
|
|
270
|
+
# duplicate entries
|
|
271
|
+
_l.warning(
|
|
272
|
+
"Observed duplicate assignments at the following addresses: %s.",
|
|
273
|
+
str(map(hex, sorted(duplicate_addrs))), # pylint:disable=bad-builtin
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
assignments.update(finder.assignments)
|
|
277
|
+
|
|
278
|
+
return assignments
|
|
279
|
+
|
|
280
|
+
@staticmethod
|
|
281
|
+
def _build_caller_subtree(callgraph: networkx.DiGraph, func_addr: int, max_level: int) -> networkx.DiGraph:
|
|
282
|
+
tree = networkx.DiGraph()
|
|
283
|
+
|
|
284
|
+
if func_addr not in callgraph:
|
|
285
|
+
return tree
|
|
286
|
+
|
|
287
|
+
queue = [(0, func_addr)]
|
|
288
|
+
traversed = {func_addr}
|
|
289
|
+
while queue:
|
|
290
|
+
level, addr = queue.pop(0)
|
|
291
|
+
for pred in callgraph.predecessors(addr):
|
|
292
|
+
if pred not in traversed and level + 1 <= max_level:
|
|
293
|
+
traversed.add(pred)
|
|
294
|
+
queue.append((level + 1, pred))
|
|
295
|
+
tree.add_edge(addr, pred)
|
|
296
|
+
|
|
297
|
+
return tree
|
|
298
|
+
|
|
299
|
+
@staticmethod
|
|
300
|
+
def is_libname(name: str) -> bool:
|
|
301
|
+
name = name.lower()
|
|
302
|
+
if name in SIM_LIBRARIES:
|
|
303
|
+
return True
|
|
304
|
+
if "." not in name:
|
|
305
|
+
return name + ".dll" in SIM_LIBRARIES or name + ".exe" in SIM_LIBRARIES
|
|
306
|
+
return False
|
|
307
|
+
|
|
308
|
+
@staticmethod
|
|
309
|
+
def is_apiname(name: str) -> bool:
|
|
310
|
+
return any(not isinstance(lib, SimSyscallLibrary) and lib.has_prototype(name) for lib in SIM_LIBRARIES.values())
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
AnalysesHub.register_default("APIObfuscationFinder", APIObfuscationFinder)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from ailment.expression import Const, Load
|
|
3
|
+
|
|
4
|
+
from angr import SIM_LIBRARIES
|
|
5
|
+
from angr.calling_conventions import default_cc
|
|
6
|
+
from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
|
|
7
|
+
from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class APIObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
|
|
11
|
+
"""
|
|
12
|
+
Integrate type-1 deobfuscated API into decompilation output.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
__slots__ = ()
|
|
16
|
+
|
|
17
|
+
NAME = "Simplify Type 1 API obfuscation references"
|
|
18
|
+
expr_classes = (Load,)
|
|
19
|
+
|
|
20
|
+
def optimize(self, expr: Load, **kwargs):
|
|
21
|
+
if (
|
|
22
|
+
isinstance(expr.addr, Const)
|
|
23
|
+
and (expr.addr.value in self.kb.obfuscations.type1_deobfuscated_apis)
|
|
24
|
+
and expr.bits == self.project.arch.bits
|
|
25
|
+
):
|
|
26
|
+
# this is actually a function calling a known API
|
|
27
|
+
# replace it with the actual API and the actual arguments
|
|
28
|
+
_, funcname = self.kb.obfuscations.type1_deobfuscated_apis[expr.addr.value]
|
|
29
|
+
if funcname not in self.kb.functions:
|
|
30
|
+
# assign a new function on-demand
|
|
31
|
+
symbol = self.project.loader.extern_object.make_extern(funcname)
|
|
32
|
+
hook_addr = self.project.hook_symbol(
|
|
33
|
+
symbol.rebased_addr, SIM_LIBRARIES["linux"].get_stub(funcname, self.project.arch)
|
|
34
|
+
)
|
|
35
|
+
func = self.kb.functions.function(addr=hook_addr, name=funcname, create=True)
|
|
36
|
+
func.is_simprocedure = True
|
|
37
|
+
|
|
38
|
+
default_cc_kwargs = {}
|
|
39
|
+
if self.project.simos is not None:
|
|
40
|
+
default_cc_kwargs["platform"] = self.project.simos.name
|
|
41
|
+
default_cc_cls = default_cc(self.project.arch.name, **default_cc_kwargs)
|
|
42
|
+
if default_cc_cls is not None:
|
|
43
|
+
func.calling_convention = default_cc_cls(self.project.arch)
|
|
44
|
+
func.find_declaration(ignore_binary_name=True)
|
|
45
|
+
else:
|
|
46
|
+
func = self.kb.functions[funcname]
|
|
47
|
+
return Const(expr.idx, None, func.addr, self.project.arch.bits, **expr.tags)
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
EXPR_OPTS.append(APIObfType1PeepholeOptimizer)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# pylint:disable=no-self-use,unused-argument,attribute-defined-outside-init
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pyvex
|
|
5
|
+
|
|
6
|
+
from angr.engines.light import SimEngineLightVEXMixin
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class IRSBRegisterCollector(SimEngineLightVEXMixin):
|
|
10
|
+
"""
|
|
11
|
+
Scan the VEX IRSB to collect all registers that are read.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, block, *args, **kwargs):
|
|
15
|
+
super().__init__(*args, **kwargs)
|
|
16
|
+
|
|
17
|
+
self.block = block
|
|
18
|
+
self.reg_reads: set[tuple[int, int]] = set()
|
|
19
|
+
|
|
20
|
+
def process(self):
|
|
21
|
+
self.tmps = {}
|
|
22
|
+
self.tyenv = self.block.vex.tyenv
|
|
23
|
+
|
|
24
|
+
self._process_Stmt()
|
|
25
|
+
|
|
26
|
+
self.stmt_idx = None
|
|
27
|
+
self.ins_addr = None
|
|
28
|
+
|
|
29
|
+
def _handle_Put(self, stmt):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def _handle_Load(self, expr):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def _handle_Store(self, stmt):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def _handle_LoadG(self, stmt):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def _handle_LLSC(self, stmt: pyvex.IRStmt.LLSC):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def _handle_StoreG(self, stmt):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
def _handle_Get(self, expr: pyvex.IRExpr.Get):
|
|
48
|
+
self.reg_reads.add((expr.offset, expr.result_size(self.tyenv)))
|
|
49
|
+
|
|
50
|
+
def _handle_RdTmp(self, expr):
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def _handle_Conversion(self, expr: pyvex.IRExpr.Unop):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def _handle_16HLto32(self, expr):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def _handle_Cmp_v(self, expr, _vector_size, _vector_count):
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
_handle_CmpEQ_v = _handle_Cmp_v
|
|
63
|
+
_handle_CmpNE_v = _handle_Cmp_v
|
|
64
|
+
_handle_CmpLE_v = _handle_Cmp_v
|
|
65
|
+
_handle_CmpLT_v = _handle_Cmp_v
|
|
66
|
+
_handle_CmpGE_v = _handle_Cmp_v
|
|
67
|
+
_handle_CmpGT_v = _handle_Cmp_v
|
|
68
|
+
|
|
69
|
+
def _handle_ExpCmpNE64(self, expr):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
def _handle_CCall(self, expr):
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
def _handle_function(self, func_addr):
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
def _handle_Unop(self, expr):
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
def _handle_Binop(self, expr: pyvex.IRExpr.Binop):
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
def _handle_Triop(self, expr: pyvex.IRExpr.Triop):
|
|
85
|
+
pass
|