angr 9.2.125__py3-none-manylinux2014_aarch64.whl → 9.2.126__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/__init__.py +4 -0
- angr/analyses/decompiler/ail_simplifier.py +1 -0
- angr/analyses/decompiler/callsite_maker.py +9 -1
- angr/analyses/decompiler/clinic.py +1 -1
- angr/analyses/decompiler/condition_processor.py +104 -66
- angr/analyses/decompiler/decompiler.py +3 -0
- angr/analyses/decompiler/optimization_passes/__init__.py +15 -1
- angr/analyses/decompiler/return_maker.py +1 -0
- angr/analyses/decompiler/ssailification/rewriting.py +4 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
- angr/analyses/decompiler/structured_codegen/c.py +18 -2
- angr/analyses/deobfuscator/__init__.py +18 -0
- angr/analyses/deobfuscator/api_obf_finder.py +313 -0
- angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
- angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
- angr/analyses/deobfuscator/string_obf_finder.py +774 -0
- angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
- angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
- angr/analyses/unpacker/__init__.py +6 -0
- angr/analyses/unpacker/obfuscation_detector.py +103 -0
- angr/analyses/unpacker/packing_detector.py +138 -0
- angr/calling_conventions.py +3 -1
- angr/engines/vex/claripy/irop.py +10 -5
- angr/knowledge_plugins/__init__.py +2 -0
- angr/knowledge_plugins/obfuscations.py +36 -0
- {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
- {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/RECORD +33 -22
- {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
- {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
- {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
- {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,774 @@
|
|
|
1
|
+
# pylint:disable=missing-class-docstring,too-many-boolean-expressions
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any
|
|
4
|
+
import string
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import capstone
|
|
8
|
+
import networkx
|
|
9
|
+
|
|
10
|
+
import claripy
|
|
11
|
+
|
|
12
|
+
from angr import sim_options
|
|
13
|
+
from angr.analyses import Analysis, AnalysesHub
|
|
14
|
+
from angr.errors import SimMemoryMissingError, AngrCallableMultistateError, AngrCallableError
|
|
15
|
+
from angr.calling_conventions import SimRegArg, default_cc
|
|
16
|
+
from angr.state_plugins.sim_action import SimActionData
|
|
17
|
+
from angr.sim_type import SimTypeFunction, SimTypeBottom, SimTypePointer
|
|
18
|
+
from angr.analyses.reaching_definitions import ObservationPointType
|
|
19
|
+
from angr.utils.graph import GraphUtils
|
|
20
|
+
|
|
21
|
+
from .irsb_reg_collector import IRSBRegisterCollector
|
|
22
|
+
|
|
23
|
+
_l = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class StringDeobFuncDescriptor:
|
|
27
|
+
def __init__(self):
|
|
28
|
+
self.string_input_arg_idx = None
|
|
29
|
+
self.string_output_arg_idx = None
|
|
30
|
+
self.string_length_arg_idx = None
|
|
31
|
+
self.string_null_terminating: bool | None = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class StringObfuscationFinder(Analysis):
|
|
35
|
+
"""
|
|
36
|
+
An analysis that automatically finds string obfuscation routines.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
self.type1_candidates = []
|
|
41
|
+
self.type2_candidates = []
|
|
42
|
+
self.type3_candidates = []
|
|
43
|
+
|
|
44
|
+
self.analyze()
|
|
45
|
+
|
|
46
|
+
def analyze(self):
|
|
47
|
+
_l.debug("Finding type 1 candidates.")
|
|
48
|
+
self.type1_candidates = self._find_type1()
|
|
49
|
+
_l.debug("Got %d type 1 candidates.", len(self.type1_candidates))
|
|
50
|
+
|
|
51
|
+
_l.debug("Finding type 2 candidates.")
|
|
52
|
+
self.type2_candidates = self._find_type2()
|
|
53
|
+
_l.debug("Got %d type 2 candidates.", len(self.type2_candidates))
|
|
54
|
+
|
|
55
|
+
_l.debug("Finding type 3 candidates.")
|
|
56
|
+
self.type3_candidates = self._find_type3()
|
|
57
|
+
_l.debug("Got %d type 3 candidates.", len(self.type3_candidates))
|
|
58
|
+
_l.debug("Done.")
|
|
59
|
+
|
|
60
|
+
if self.type1_candidates:
|
|
61
|
+
for type1_func_addr, desc in self.type1_candidates:
|
|
62
|
+
_l.debug("Analyzing type 1 candidates.")
|
|
63
|
+
type1_deobfuscated, type1_string_loader_candidates = self._analyze_type1(type1_func_addr, desc)
|
|
64
|
+
self.kb.obfuscations.type1_deobfuscated_strings.update(type1_deobfuscated)
|
|
65
|
+
self.kb.obfuscations.type1_string_loader_candidates |= type1_string_loader_candidates
|
|
66
|
+
|
|
67
|
+
if self.type2_candidates:
|
|
68
|
+
for type2_func_addr, desc, string_candidates in self.type2_candidates:
|
|
69
|
+
_l.debug("Analyzing type 2 candidates.")
|
|
70
|
+
type2_string_loader_candidates = self._analyze_type2(
|
|
71
|
+
type2_func_addr, desc, {addr for addr, _, _ in string_candidates}
|
|
72
|
+
)
|
|
73
|
+
type2_deobfuscated_strings = {addr: s for addr, _, s in string_candidates}
|
|
74
|
+
self.kb.obfuscations.type2_deobfuscated_strings.update(type2_deobfuscated_strings)
|
|
75
|
+
self.kb.obfuscations.type2_string_loader_candidates |= type2_string_loader_candidates
|
|
76
|
+
|
|
77
|
+
if self.type3_candidates:
|
|
78
|
+
for type3_func_addr, desc in self.type3_candidates:
|
|
79
|
+
_l.debug("Analyzing type 3 candidates.")
|
|
80
|
+
type3_strings = self._analyze_type3(type3_func_addr, desc)
|
|
81
|
+
self.kb.obfuscations.type3_deobfuscated_strings.update(type3_strings)
|
|
82
|
+
|
|
83
|
+
def _find_type1(self) -> list[tuple[int, StringDeobFuncDescriptor]]:
|
|
84
|
+
# Type 1 string deobfuscation functions
|
|
85
|
+
# - Take a constant string or local string as input
|
|
86
|
+
# - Output strings that are reasonable
|
|
87
|
+
# - Do not call other functions (i.e., these functions are leaf functions)
|
|
88
|
+
#
|
|
89
|
+
# Type 1 string deobfuscation functions will decrypt each string once and for good.
|
|
90
|
+
|
|
91
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
92
|
+
arch = self.project.arch
|
|
93
|
+
|
|
94
|
+
type1_candidates: list[tuple[int, StringDeobFuncDescriptor]] = []
|
|
95
|
+
|
|
96
|
+
for func in self.project.kb.functions.values():
|
|
97
|
+
if func.is_simprocedure or func.is_plt or func.is_alignment:
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
if func.prototype is None or len(func.prototype.args) < 1:
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
if self.project.kb.functions.callgraph.out_degree[func.addr] != 0:
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# find its callsites and arguments
|
|
107
|
+
callers = [
|
|
108
|
+
pred for pred in self.project.kb.functions.callgraph.predecessors(func.addr) if pred != func.addr
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
if not callers:
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
if len(func.block_addrs_set) <= 2:
|
|
115
|
+
# function is too small...
|
|
116
|
+
continue
|
|
117
|
+
if len(func.block_addrs_set) >= 50:
|
|
118
|
+
# function is too big...
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
# decompile this function and see if it "looks like" a deobfuscation function
|
|
122
|
+
try:
|
|
123
|
+
dec = self.project.analyses.Decompiler(func, cfg=cfg)
|
|
124
|
+
except Exception: # pylint:disable=broad-exception-caught
|
|
125
|
+
continue
|
|
126
|
+
if dec.codegen is None or not self._like_type1_deobfuscation_function(dec.codegen.text):
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
args_list = []
|
|
130
|
+
for caller in callers:
|
|
131
|
+
callsite_nodes = [
|
|
132
|
+
pred
|
|
133
|
+
for pred in cfg.get_predecessors(cfg.get_any_node(func.addr))
|
|
134
|
+
if pred.function_address == caller and pred.instruction_addrs
|
|
135
|
+
]
|
|
136
|
+
observation_points = []
|
|
137
|
+
for callsite_node in callsite_nodes:
|
|
138
|
+
observation_points.append(
|
|
139
|
+
("insn", callsite_node.instruction_addrs[-1], ObservationPointType.OP_BEFORE)
|
|
140
|
+
)
|
|
141
|
+
rda = self.project.analyses.ReachingDefinitions(
|
|
142
|
+
self.project.kb.functions[caller],
|
|
143
|
+
observe_all=False,
|
|
144
|
+
observation_points=observation_points,
|
|
145
|
+
)
|
|
146
|
+
for callsite_node in callsite_nodes:
|
|
147
|
+
observ = rda.model.get_observation_by_insn(
|
|
148
|
+
callsite_node.instruction_addrs[-1],
|
|
149
|
+
ObservationPointType.OP_BEFORE,
|
|
150
|
+
)
|
|
151
|
+
# load values for each function argument
|
|
152
|
+
args: list[tuple[int, Any]] = []
|
|
153
|
+
for arg_idx, func_arg in enumerate(func.arguments):
|
|
154
|
+
# FIXME: We are ignoring all non-register function arguments until we see a test case where
|
|
155
|
+
# FIXME: stack-passing arguments are used
|
|
156
|
+
if isinstance(func_arg, SimRegArg):
|
|
157
|
+
reg_offset, reg_size = arch.registers[func_arg.reg_name]
|
|
158
|
+
try:
|
|
159
|
+
mv = observ.registers.load(reg_offset, size=reg_size)
|
|
160
|
+
except SimMemoryMissingError:
|
|
161
|
+
args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
|
|
162
|
+
continue
|
|
163
|
+
arg_value = mv.one_value()
|
|
164
|
+
if arg_value is None:
|
|
165
|
+
arg_value = claripy.BVV(0xDEADBEEF, self.project.arch.bits)
|
|
166
|
+
args.append((arg_idx, arg_value))
|
|
167
|
+
|
|
168
|
+
# the args must have at least one concrete address that points to an initialized memory location
|
|
169
|
+
acceptable_args = False
|
|
170
|
+
for _, arg in args:
|
|
171
|
+
if arg is not None and arg.concrete:
|
|
172
|
+
v = arg.concrete_value
|
|
173
|
+
section = self.project.loader.find_section_containing(v)
|
|
174
|
+
if section is not None:
|
|
175
|
+
acceptable_args = True
|
|
176
|
+
break
|
|
177
|
+
if acceptable_args:
|
|
178
|
+
args_list.append(args)
|
|
179
|
+
|
|
180
|
+
if not args_list:
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
is_candidate = False
|
|
184
|
+
desc = StringDeobFuncDescriptor()
|
|
185
|
+
# now that we have good arguments, let's test the function!
|
|
186
|
+
for args in args_list:
|
|
187
|
+
func_call = self.project.factory.callable(
|
|
188
|
+
func.addr, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# before calling the function, let's record the crime scene
|
|
192
|
+
values: list[tuple[int, int, bytes]] = []
|
|
193
|
+
for arg_idx, arg in args:
|
|
194
|
+
if arg is not None and arg.concrete:
|
|
195
|
+
v = arg.concrete_value
|
|
196
|
+
section = self.project.loader.find_section_containing(v)
|
|
197
|
+
if section is not None:
|
|
198
|
+
values.append((arg_idx, v, self.project.loader.memory.load(v, 100)))
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
func_call(*[arg for _, arg in args])
|
|
202
|
+
except (AngrCallableMultistateError, AngrCallableError):
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
# let's see what this amazing function has done
|
|
206
|
+
# TODO: Support cases where input and output are using different function arguments
|
|
207
|
+
for arg_idx, addr, old_value in values:
|
|
208
|
+
out = func_call.result_state.solver.eval(
|
|
209
|
+
func_call.result_state.memory.load(addr, size=len(old_value)), cast_to=bytes
|
|
210
|
+
)
|
|
211
|
+
if out == old_value:
|
|
212
|
+
continue
|
|
213
|
+
if self._is_string_reasonable(out):
|
|
214
|
+
# found it!
|
|
215
|
+
_l.debug("[+] Deobfuscated string by function %s: %s", repr(func), out)
|
|
216
|
+
is_candidate = True
|
|
217
|
+
desc.string_input_arg_idx = arg_idx
|
|
218
|
+
desc.string_output_arg_idx = arg_idx
|
|
219
|
+
desc.string_null_terminating = True # FIXME
|
|
220
|
+
break
|
|
221
|
+
|
|
222
|
+
if is_candidate:
|
|
223
|
+
type1_candidates.append((func.addr, desc))
|
|
224
|
+
|
|
225
|
+
return type1_candidates
|
|
226
|
+
|
|
227
|
+
def _analyze_type1(self, func_addr: int, desc: StringDeobFuncDescriptor) -> tuple[dict, set]:
|
|
228
|
+
"""
|
|
229
|
+
Analyze Type 1 string deobfuscation functions, determine the following information:
|
|
230
|
+
|
|
231
|
+
- Deobfuscated strings, lengths, and their addresses
|
|
232
|
+
- Functions that load deobfuscated strings
|
|
233
|
+
|
|
234
|
+
:param func_addr:
|
|
235
|
+
:param desc:
|
|
236
|
+
:return:
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
deobfuscated_strings = {}
|
|
240
|
+
|
|
241
|
+
arch = self.project.arch
|
|
242
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
243
|
+
func = self.kb.functions.get_by_addr(func_addr)
|
|
244
|
+
func_node = cfg.get_any_node(func_addr)
|
|
245
|
+
assert func_node is not None
|
|
246
|
+
# Find all call sites for this function
|
|
247
|
+
call_sites = cfg.get_predecessors(func_node)
|
|
248
|
+
rda_cache = {}
|
|
249
|
+
for callsite_node in call_sites:
|
|
250
|
+
# dump arguments
|
|
251
|
+
if callsite_node.function_address in rda_cache:
|
|
252
|
+
rda = rda_cache[callsite_node.function_address]
|
|
253
|
+
else:
|
|
254
|
+
rda = self.project.analyses.ReachingDefinitions(
|
|
255
|
+
self.project.kb.functions[callsite_node.function_address],
|
|
256
|
+
observe_all=True,
|
|
257
|
+
).model
|
|
258
|
+
rda_cache[callsite_node.function_address] = rda
|
|
259
|
+
observ = rda.get_observation_by_insn(
|
|
260
|
+
callsite_node.instruction_addrs[-1],
|
|
261
|
+
ObservationPointType.OP_BEFORE,
|
|
262
|
+
)
|
|
263
|
+
args = []
|
|
264
|
+
for func_arg in func.arguments:
|
|
265
|
+
# FIXME: We are ignoring all non-register function arguments until we see a test case where
|
|
266
|
+
# FIXME: stack-passing arguments are used
|
|
267
|
+
if isinstance(func_arg, SimRegArg):
|
|
268
|
+
reg_offset, reg_size = arch.registers[func_arg.reg_name]
|
|
269
|
+
try:
|
|
270
|
+
mv = observ.registers.load(reg_offset, size=reg_size)
|
|
271
|
+
except SimMemoryMissingError:
|
|
272
|
+
args.append(claripy.BVV(0xDEADBEEF, self.project.arch.bits))
|
|
273
|
+
continue
|
|
274
|
+
v = mv.one_value()
|
|
275
|
+
if v is not None and v.concrete:
|
|
276
|
+
args.append(v)
|
|
277
|
+
else:
|
|
278
|
+
args.append(claripy.BVV(0xDEADBEEF, self.project.arch.bits))
|
|
279
|
+
|
|
280
|
+
if None in args:
|
|
281
|
+
_l.debug(
|
|
282
|
+
"At least one argument cannot be concretized. Skip the call at %#x.",
|
|
283
|
+
callsite_node.instruction_addrs[-1],
|
|
284
|
+
)
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
# call the function
|
|
288
|
+
func_call = self.project.factory.callable(
|
|
289
|
+
func.addr, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
|
|
290
|
+
)
|
|
291
|
+
try:
|
|
292
|
+
func_call(*args)
|
|
293
|
+
except AngrCallableMultistateError:
|
|
294
|
+
_l.debug(
|
|
295
|
+
"State branching encountered during string deobfuscation. Skip the call at %#x.",
|
|
296
|
+
callsite_node.instruction_addrs[-1],
|
|
297
|
+
)
|
|
298
|
+
continue
|
|
299
|
+
except AngrCallableError:
|
|
300
|
+
_l.debug(
|
|
301
|
+
"No path returned. Skip the call at %#x.",
|
|
302
|
+
callsite_node.instruction_addrs[-1],
|
|
303
|
+
)
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
# dump the decrypted string!
|
|
307
|
+
output_addr = args[desc.string_output_arg_idx]
|
|
308
|
+
length = args[desc.string_length_arg_idx].concrete_value if desc.string_length_arg_idx is not None else 256
|
|
309
|
+
output_str = func_call.result_state.solver.eval(
|
|
310
|
+
func_call.result_state.memory.load(output_addr, size=length),
|
|
311
|
+
cast_to=bytes,
|
|
312
|
+
)
|
|
313
|
+
if desc.string_null_terminating and b"\x00" in output_str:
|
|
314
|
+
output_str = output_str[: output_str.index(b"\x00")]
|
|
315
|
+
deobfuscated_strings[output_addr.concrete_value] = output_str
|
|
316
|
+
|
|
317
|
+
# for each deobfuscated string, we find its string loader function
|
|
318
|
+
# an obvious candidate function is 0x140001ae4
|
|
319
|
+
xrefs = self.kb.xrefs
|
|
320
|
+
string_loader_candidates = set()
|
|
321
|
+
for str_addr in deobfuscated_strings:
|
|
322
|
+
xref_set = xrefs.get_xrefs_by_dst(str_addr)
|
|
323
|
+
block_addrs = {xref.block_addr for xref in xref_set}
|
|
324
|
+
for block_addr in block_addrs:
|
|
325
|
+
node = cfg.get_any_node(block_addr)
|
|
326
|
+
if node is not None:
|
|
327
|
+
callees = list(self.kb.functions.callgraph.successors(node.function_address))
|
|
328
|
+
if callees:
|
|
329
|
+
# string loader function should not call anything else
|
|
330
|
+
continue
|
|
331
|
+
string_loader_candidates.add(node.function_address)
|
|
332
|
+
|
|
333
|
+
return deobfuscated_strings, string_loader_candidates
|
|
334
|
+
|
|
335
|
+
def _find_type2(self) -> list[tuple[int, StringDeobFuncDescriptor, list[tuple[int, int, bytes]]]]:
|
|
336
|
+
# Type 2 string deobfuscation functions
|
|
337
|
+
# - Deobfuscates an entire table of encrypted strings
|
|
338
|
+
# - May or may not take any arguments. All arguments should be concrete.
|
|
339
|
+
#
|
|
340
|
+
# Type 2 string deobfuscation functions will decrypt each string once and for good.
|
|
341
|
+
|
|
342
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
343
|
+
|
|
344
|
+
type2_candidates: list[tuple[int, StringDeobFuncDescriptor, list[tuple[int, int, bytes]]]] = []
|
|
345
|
+
|
|
346
|
+
for func in self.project.kb.functions.values():
|
|
347
|
+
if func.is_simprocedure or func.is_plt or func.is_alignment:
|
|
348
|
+
continue
|
|
349
|
+
|
|
350
|
+
if func.prototype is None or len(func.prototype.args) > 1:
|
|
351
|
+
# FIXME: Handle deobfuscation functions that take arguments. Find such a case first
|
|
352
|
+
continue
|
|
353
|
+
|
|
354
|
+
if self.project.kb.functions.callgraph.out_degree[func.addr] != 0:
|
|
355
|
+
continue
|
|
356
|
+
|
|
357
|
+
# find its callsites and arguments
|
|
358
|
+
callers = [
|
|
359
|
+
pred for pred in self.project.kb.functions.callgraph.predecessors(func.addr) if pred != func.addr
|
|
360
|
+
]
|
|
361
|
+
|
|
362
|
+
if not callers:
|
|
363
|
+
continue
|
|
364
|
+
|
|
365
|
+
if len(func.block_addrs_set) <= 2:
|
|
366
|
+
# function is too small...
|
|
367
|
+
continue
|
|
368
|
+
if len(func.block_addrs_set) >= 50:
|
|
369
|
+
# function is too big...
|
|
370
|
+
continue
|
|
371
|
+
|
|
372
|
+
# decompile this function and see if it "looks like" a deobfuscation function
|
|
373
|
+
try:
|
|
374
|
+
dec = self.project.analyses.Decompiler(func, cfg=cfg, expr_collapse_depth=64)
|
|
375
|
+
except Exception: # pylint:disable=broad-exception-caught
|
|
376
|
+
continue
|
|
377
|
+
if dec.codegen is None or not self._like_type2_deobfuscation_function(dec.codegen.text):
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
desc = StringDeobFuncDescriptor()
|
|
381
|
+
# now that we have good arguments, let's test the function!
|
|
382
|
+
func_call = self.project.factory.callable(
|
|
383
|
+
func.addr,
|
|
384
|
+
concrete_only=True,
|
|
385
|
+
cc=func.calling_convention,
|
|
386
|
+
prototype=func.prototype,
|
|
387
|
+
add_options={sim_options.TRACK_MEMORY_ACTIONS},
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
func_call()
|
|
392
|
+
except (AngrCallableMultistateError, AngrCallableError):
|
|
393
|
+
continue
|
|
394
|
+
|
|
395
|
+
# where are the reads and writes?
|
|
396
|
+
all_global_reads = []
|
|
397
|
+
all_global_writes = []
|
|
398
|
+
for action in func_call.result_state.history.actions:
|
|
399
|
+
if not isinstance(action, SimActionData):
|
|
400
|
+
continue
|
|
401
|
+
if not action.actual_addrs:
|
|
402
|
+
if not action.addr.ast.concrete:
|
|
403
|
+
continue
|
|
404
|
+
actual_addrs = [action.addr.ast.concrete_value]
|
|
405
|
+
else:
|
|
406
|
+
actual_addrs = action.actual_addrs
|
|
407
|
+
if action.type == "mem":
|
|
408
|
+
if action.action == "read":
|
|
409
|
+
for a in actual_addrs:
|
|
410
|
+
for size in range(action.size.ast // 8):
|
|
411
|
+
all_global_reads.append(a + size)
|
|
412
|
+
elif action.action == "write":
|
|
413
|
+
for a in actual_addrs:
|
|
414
|
+
for size in range(action.size.ast // 8):
|
|
415
|
+
all_global_writes.append(a + size)
|
|
416
|
+
|
|
417
|
+
# find likely memory access regions
|
|
418
|
+
all_global_reads = sorted(set(all_global_reads))
|
|
419
|
+
all_global_writes = sorted(set(all_global_writes))
|
|
420
|
+
all_global_write_set = set(all_global_writes)
|
|
421
|
+
# TODO: Handle cases where reads and writes are not going to the same place
|
|
422
|
+
region_candidates: list[tuple[int, int, bytes]] = []
|
|
423
|
+
idx = 0
|
|
424
|
+
while idx < len(all_global_reads):
|
|
425
|
+
starting_offset = all_global_reads[idx]
|
|
426
|
+
if starting_offset not in all_global_write_set:
|
|
427
|
+
idx += 1
|
|
428
|
+
continue
|
|
429
|
+
|
|
430
|
+
stride = 0
|
|
431
|
+
for j in range(idx + 1, len(all_global_reads)):
|
|
432
|
+
if (
|
|
433
|
+
all_global_reads[j] - all_global_reads[j - 1] == 1
|
|
434
|
+
and all_global_reads[j] in all_global_write_set
|
|
435
|
+
):
|
|
436
|
+
stride += 1
|
|
437
|
+
else:
|
|
438
|
+
break
|
|
439
|
+
if stride >= 5:
|
|
440
|
+
# got one region
|
|
441
|
+
section = self.project.loader.find_section_containing(starting_offset)
|
|
442
|
+
if section is not None:
|
|
443
|
+
initial_data = self.project.loader.memory.load(starting_offset, stride)
|
|
444
|
+
end_data = func_call.result_state.solver.eval(
|
|
445
|
+
func_call.result_state.memory.load(starting_offset, stride), cast_to=bytes
|
|
446
|
+
)
|
|
447
|
+
if initial_data != end_data and self._is_string_reasonable(end_data):
|
|
448
|
+
region_candidates.append((starting_offset, stride, end_data))
|
|
449
|
+
idx += stride
|
|
450
|
+
else:
|
|
451
|
+
idx += 1
|
|
452
|
+
|
|
453
|
+
if region_candidates:
|
|
454
|
+
type2_candidates.append((func.addr, desc, region_candidates))
|
|
455
|
+
|
|
456
|
+
return type2_candidates
|
|
457
|
+
|
|
458
|
+
def _analyze_type2(
|
|
459
|
+
self, func_addr: int, desc: StringDeobFuncDescriptor, table_addrs: set[int] # pylint:disable=unused-argument
|
|
460
|
+
) -> set:
|
|
461
|
+
"""
|
|
462
|
+
Analyze Type 2 string deobfuscation functions, determine the following information:
|
|
463
|
+
|
|
464
|
+
- Functions that load deobfuscated strings
|
|
465
|
+
|
|
466
|
+
:param func_addr:
|
|
467
|
+
:param desc:
|
|
468
|
+
:return:
|
|
469
|
+
"""
|
|
470
|
+
|
|
471
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
472
|
+
|
|
473
|
+
# for each string table address, we find its string loader function
|
|
474
|
+
# an obvious candidate function is 0x140001b20
|
|
475
|
+
xrefs = self.kb.xrefs
|
|
476
|
+
string_loader_candidates = set()
|
|
477
|
+
for table_addr in table_addrs:
|
|
478
|
+
xref_set = xrefs.get_xrefs_by_dst(table_addr)
|
|
479
|
+
block_addrs = {xref.block_addr for xref in xref_set}
|
|
480
|
+
for block_addr in block_addrs:
|
|
481
|
+
node = cfg.get_any_node(block_addr)
|
|
482
|
+
if node is not None:
|
|
483
|
+
callees = list(self.kb.functions.callgraph.successors(node.function_address))
|
|
484
|
+
if callees:
|
|
485
|
+
# string loader function should not call anything else
|
|
486
|
+
continue
|
|
487
|
+
string_loader_candidates.add(node.function_address)
|
|
488
|
+
|
|
489
|
+
return string_loader_candidates
|
|
490
|
+
|
|
491
|
+
def _find_type3(self) -> list[tuple[int, StringDeobFuncDescriptor]]:
|
|
492
|
+
# Type 3 string deobfuscation functions
|
|
493
|
+
# - Uses a buffer in the stack frame of its parent function
|
|
494
|
+
# - Before the call, the values in the buffer or the struct are initialized during runtime
|
|
495
|
+
# - The entire call can be simulated (it does not involve any other functions that angr does not support or do
|
|
496
|
+
# not have a SimProcedure for)
|
|
497
|
+
|
|
498
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
499
|
+
functions = self.kb.functions
|
|
500
|
+
callgraph_digraph = networkx.DiGraph(functions.callgraph)
|
|
501
|
+
|
|
502
|
+
sorted_funcs = GraphUtils.quasi_topological_sort_nodes(callgraph_digraph)
|
|
503
|
+
tree_has_unsupported_funcs = {}
|
|
504
|
+
function_candidates = []
|
|
505
|
+
for func_addr in sorted_funcs:
|
|
506
|
+
if functions.get_by_addr(func_addr).is_simprocedure:
|
|
507
|
+
# is this a stub SimProcedure?
|
|
508
|
+
hooker = self.project.hooked_by(func_addr)
|
|
509
|
+
if hooker is not None and hooker.is_stub:
|
|
510
|
+
tree_has_unsupported_funcs[func_addr] = True
|
|
511
|
+
else:
|
|
512
|
+
# which functions does it call?
|
|
513
|
+
callees = list(callgraph_digraph.successors(func_addr))
|
|
514
|
+
if any(tree_has_unsupported_funcs.get(callee, False) is True for callee in callees):
|
|
515
|
+
tree_has_unsupported_funcs[func_addr] = True
|
|
516
|
+
else:
|
|
517
|
+
function_candidates.append(functions.get_by_addr(func_addr))
|
|
518
|
+
|
|
519
|
+
type3_functions = []
|
|
520
|
+
|
|
521
|
+
for func in function_candidates:
|
|
522
|
+
if not 8 <= len(func.block_addrs_set) < 14:
|
|
523
|
+
continue
|
|
524
|
+
|
|
525
|
+
# if it has a prototype recovered, it must have four arguments
|
|
526
|
+
if func.prototype is not None and len(func.prototype.args) != 4:
|
|
527
|
+
continue
|
|
528
|
+
|
|
529
|
+
# the function must call some other functions
|
|
530
|
+
if callgraph_digraph.out_degree[func.addr] == 0:
|
|
531
|
+
continue
|
|
532
|
+
|
|
533
|
+
# take a look at its call sites
|
|
534
|
+
func_node = cfg.get_any_node(func.addr)
|
|
535
|
+
if func_node is None:
|
|
536
|
+
continue
|
|
537
|
+
call_sites = cfg.get_predecessors(func_node, jumpkind="Ijk_Call")
|
|
538
|
+
if not call_sites:
|
|
539
|
+
continue
|
|
540
|
+
|
|
541
|
+
# examine the first 100 call sites and see if any of them sets up enough constants
|
|
542
|
+
valid = False
|
|
543
|
+
for i in range(min(100, len(call_sites))):
|
|
544
|
+
call_site_block = self.project.factory.block(call_sites[i].addr)
|
|
545
|
+
if self._is_block_setting_constants_to_stack(call_site_block):
|
|
546
|
+
valid = True
|
|
547
|
+
break
|
|
548
|
+
if not valid:
|
|
549
|
+
continue
|
|
550
|
+
|
|
551
|
+
# take a look at the content
|
|
552
|
+
try:
|
|
553
|
+
dec = self.project.analyses.Decompiler(func, cfg=cfg)
|
|
554
|
+
except Exception: # pylint:disable=broad-exception-caught
|
|
555
|
+
# catch all exceptions
|
|
556
|
+
continue
|
|
557
|
+
if dec.codegen is None:
|
|
558
|
+
continue
|
|
559
|
+
if not self._like_type3_deobfuscation_function(dec.codegen.text):
|
|
560
|
+
continue
|
|
561
|
+
|
|
562
|
+
# examine the first 100 call sites and see if any of them returns a valid string
|
|
563
|
+
valid = False
|
|
564
|
+
for i in range(min(100, len(call_sites))):
|
|
565
|
+
call_site_block = self.project.factory.block(call_sites[i].addr)
|
|
566
|
+
if not self._is_block_setting_constants_to_stack(call_site_block):
|
|
567
|
+
continue
|
|
568
|
+
|
|
569
|
+
# simulate an execution to see if it really works
|
|
570
|
+
data = self._type3_prepare_and_execute(
|
|
571
|
+
func.addr, call_sites[i].addr, call_sites[i].function_address, cfg
|
|
572
|
+
)
|
|
573
|
+
if data is None:
|
|
574
|
+
continue
|
|
575
|
+
if len(data) > 3 and all(chr(x) in string.printable for x in data):
|
|
576
|
+
valid = True
|
|
577
|
+
break
|
|
578
|
+
|
|
579
|
+
if valid:
|
|
580
|
+
desc = StringDeobFuncDescriptor()
|
|
581
|
+
desc.string_output_arg_idx = 0
|
|
582
|
+
desc.string_length_arg_idx = 1
|
|
583
|
+
desc.string_null_terminating = False
|
|
584
|
+
type3_functions.append((func.addr, desc))
|
|
585
|
+
|
|
586
|
+
return type3_functions
|
|
587
|
+
|
|
588
|
+
def _analyze_type3(
|
|
589
|
+
self, func_addr: int, desc: StringDeobFuncDescriptor # pylint:disable=unused-argument
|
|
590
|
+
) -> dict[int, bytes]:
|
|
591
|
+
"""
|
|
592
|
+
Analyze Type 3 string deobfuscation functions, determine the following information:
|
|
593
|
+
|
|
594
|
+
- The call sites
|
|
595
|
+
- For each call site, the actual de-obfuscated content (in bytes)
|
|
596
|
+
|
|
597
|
+
Decompiler will output the following code:
|
|
598
|
+
|
|
599
|
+
*ptr = strdup("The deobfuscated string");
|
|
600
|
+
*(ptr+8) = the string length;
|
|
601
|
+
|
|
602
|
+
:param func_addr:
|
|
603
|
+
:param desc:
|
|
604
|
+
:return:
|
|
605
|
+
"""
|
|
606
|
+
|
|
607
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
608
|
+
|
|
609
|
+
call_sites = cfg.get_predecessors(cfg.get_any_node(func_addr))
|
|
610
|
+
callinsn2content = {}
|
|
611
|
+
for idx, call_site in enumerate(call_sites):
|
|
612
|
+
_l.debug("Analyzing type 3 candidate call site %#x (%d/%d)...", call_site.addr, idx + 1, len(call_sites))
|
|
613
|
+
data = self._type3_prepare_and_execute(func_addr, call_site.addr, call_site.function_address, cfg)
|
|
614
|
+
if data:
|
|
615
|
+
callinsn2content[call_site.instruction_addrs[-1]] = data
|
|
616
|
+
# print(hex(call_site.addr), data)
|
|
617
|
+
|
|
618
|
+
return callinsn2content
|
|
619
|
+
|
|
620
|
+
#
|
|
621
|
+
# Type 1 helpers
|
|
622
|
+
#
|
|
623
|
+
|
|
624
|
+
@staticmethod
|
|
625
|
+
def _like_type1_deobfuscation_function(code: str) -> bool:
|
|
626
|
+
return bool("^" in code or ">>" in code or "<<" in code)
|
|
627
|
+
|
|
628
|
+
#
|
|
629
|
+
# Type 2 helpers
|
|
630
|
+
#
|
|
631
|
+
|
|
632
|
+
@staticmethod
|
|
633
|
+
def _like_type2_deobfuscation_function(code: str) -> bool:
|
|
634
|
+
return bool(
|
|
635
|
+
("^" in code or ">>" in code or "<<" in code) and ("do" in code or "while" in code or "for" in code)
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
#
|
|
639
|
+
# Type 3 helpers
|
|
640
|
+
#
|
|
641
|
+
|
|
642
|
+
@staticmethod
|
|
643
|
+
def _like_type3_deobfuscation_function(code: str) -> bool:
|
|
644
|
+
return bool(
|
|
645
|
+
("^" in code or ">>" in code or "<<" in code or "~" in code)
|
|
646
|
+
and ("do" in code or "while" in code or "for" in code)
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
def _type3_prepare_and_execute(self, func_addr: int, call_site_addr: int, call_site_func_addr: int, cfg):
|
|
650
|
+
blocks_at_callsite = [call_site_addr]
|
|
651
|
+
|
|
652
|
+
# backtrack from call site to include all previous consecutive blocks
|
|
653
|
+
while True:
|
|
654
|
+
pred_and_jumpkinds = cfg.get_predecessors_and_jumpkinds(
|
|
655
|
+
cfg.get_any_node(call_site_addr), excluding_fakeret=False
|
|
656
|
+
)
|
|
657
|
+
if len(pred_and_jumpkinds) == 1:
|
|
658
|
+
pred, jumpkind = pred_and_jumpkinds[0]
|
|
659
|
+
if (
|
|
660
|
+
cfg.graph.out_degree[pred] == 1
|
|
661
|
+
and pred.addr + pred.size == call_site_addr
|
|
662
|
+
and jumpkind == "Ijk_Boring"
|
|
663
|
+
):
|
|
664
|
+
blocks_at_callsite.insert(0, pred.addr)
|
|
665
|
+
call_site_addr = pred.addr
|
|
666
|
+
continue
|
|
667
|
+
break
|
|
668
|
+
|
|
669
|
+
# take a look at the call-site block to see what registers are used
|
|
670
|
+
reg_reads = set()
|
|
671
|
+
for block_addr in blocks_at_callsite:
|
|
672
|
+
reg_collector = IRSBRegisterCollector(self.project.factory.block(block_addr))
|
|
673
|
+
reg_collector.process()
|
|
674
|
+
reg_reads |= set(reg_collector.reg_reads)
|
|
675
|
+
|
|
676
|
+
# run constant propagation to track constant registers
|
|
677
|
+
prop = self.project.analyses.Propagator(
|
|
678
|
+
func=self.kb.functions.get_by_addr(call_site_func_addr),
|
|
679
|
+
only_consts=True,
|
|
680
|
+
do_binops=True,
|
|
681
|
+
vex_cross_insn_opt=True,
|
|
682
|
+
load_callback=None,
|
|
683
|
+
cache_results=True,
|
|
684
|
+
key_prefix="cfg_intermediate",
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
# execute the block at the call site
|
|
688
|
+
state = self.project.factory.blank_state(
|
|
689
|
+
addr=call_site_addr,
|
|
690
|
+
add_options={sim_options.ZERO_FILL_UNCONSTRAINED_REGISTERS, sim_options.ZERO_FILL_UNCONSTRAINED_MEMORY},
|
|
691
|
+
)
|
|
692
|
+
# setup sp and bp, just in case
|
|
693
|
+
state.regs._sp = 0x7FFF0000
|
|
694
|
+
bp_set = False
|
|
695
|
+
prop_state = prop.model.input_states.get(call_site_addr, None)
|
|
696
|
+
if prop_state is not None:
|
|
697
|
+
for reg_offset, reg_width in reg_reads:
|
|
698
|
+
if reg_offset == state.arch.sp_offset:
|
|
699
|
+
continue
|
|
700
|
+
if reg_width < 8:
|
|
701
|
+
# at least a byte
|
|
702
|
+
continue
|
|
703
|
+
con = prop_state.load_register(reg_offset, reg_width // 8)
|
|
704
|
+
if isinstance(con, claripy.ast.Base) and con.op == "BVV":
|
|
705
|
+
state.registers.store(reg_offset, claripy.BVV(con.concrete_value, reg_width))
|
|
706
|
+
if reg_offset == state.arch.bp_offset:
|
|
707
|
+
bp_set = True
|
|
708
|
+
if not bp_set:
|
|
709
|
+
state.regs._bp = 0x7FFF3000
|
|
710
|
+
simgr = self.project.factory.simgr(state)
|
|
711
|
+
|
|
712
|
+
# step until the call instruction
|
|
713
|
+
for idx, block_addr in enumerate(blocks_at_callsite):
|
|
714
|
+
if idx == len(blocks_at_callsite) - 1:
|
|
715
|
+
inst = self.project.factory.block(block_addr).instructions
|
|
716
|
+
simgr.step(num_inst=inst - 1)
|
|
717
|
+
else:
|
|
718
|
+
simgr.step()
|
|
719
|
+
if not simgr.active:
|
|
720
|
+
return None
|
|
721
|
+
|
|
722
|
+
in_state = simgr.active[0]
|
|
723
|
+
|
|
724
|
+
cc = default_cc(self.project.arch.name, self.project.simos.name)(self.project.arch)
|
|
725
|
+
cc.STACKARG_SP_BUFF = 0 # disable shadow stack space because the binary code already sets it if needed
|
|
726
|
+
cc.STACK_ALIGNMENT = 1 # disable stack address aligning because the binary code already sets it if needed
|
|
727
|
+
prototype_0 = SimTypeFunction([], SimTypePointer(pts_to=SimTypeBottom(label="void"))).with_arch(
|
|
728
|
+
self.project.arch
|
|
729
|
+
)
|
|
730
|
+
callable_0 = self.project.factory.callable(
|
|
731
|
+
func_addr, concrete_only=True, base_state=in_state, cc=cc, prototype=prototype_0
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
try:
|
|
735
|
+
ret_value = callable_0()
|
|
736
|
+
except (AngrCallableMultistateError, AngrCallableError):
|
|
737
|
+
return None
|
|
738
|
+
|
|
739
|
+
out_state = callable_0.result_state
|
|
740
|
+
|
|
741
|
+
# figure out what was written
|
|
742
|
+
ptr = out_state.memory.load(ret_value, size=self.project.arch.bytes, endness=self.project.arch.memory_endness)
|
|
743
|
+
size = out_state.memory.load(ret_value + 8, size=4, endness=self.project.arch.memory_endness)
|
|
744
|
+
# TODO: Support lists with varied-length elements
|
|
745
|
+
data = out_state.memory.load(ptr, size=size, endness="Iend_BE")
|
|
746
|
+
if data.symbolic:
|
|
747
|
+
return None
|
|
748
|
+
|
|
749
|
+
return out_state.solver.eval(data, cast_to=bytes)
|
|
750
|
+
|
|
751
|
+
@staticmethod
|
|
752
|
+
def _is_block_setting_constants_to_stack(block, threshold: int = 5) -> bool:
|
|
753
|
+
insn_setting_consts = 0
|
|
754
|
+
for insn in block.capstone.insns:
|
|
755
|
+
if (
|
|
756
|
+
insn.mnemonic.startswith("mov")
|
|
757
|
+
and len(insn.operands) == 2
|
|
758
|
+
and insn.operands[0].type == capstone.x86.X86_OP_MEM
|
|
759
|
+
and insn.operands[0].mem.base in {capstone.x86.X86_REG_RSP, capstone.x86.X86_REG_RBP}
|
|
760
|
+
and insn.operands[1].type == capstone.x86.X86_OP_IMM
|
|
761
|
+
):
|
|
762
|
+
insn_setting_consts += 1
|
|
763
|
+
return insn_setting_consts >= threshold
|
|
764
|
+
|
|
765
|
+
@staticmethod
|
|
766
|
+
def _is_string_reasonable(s: bytes) -> bool:
|
|
767
|
+
# test if the string is printable and is free of nonsense characters
|
|
768
|
+
|
|
769
|
+
# TODO: Ask a local LLM
|
|
770
|
+
s = s.replace(b"\x00", b"")
|
|
771
|
+
return all(chr(ch) in string.printable for ch in s)
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
AnalysesHub.register_default("StringObfuscationFinder", StringObfuscationFinder)
|