angr 9.2.124__py3-none-macosx_11_0_arm64.whl → 9.2.126__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (53) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +13 -1
  3. angr/analyses/codecave.py +77 -0
  4. angr/analyses/decompiler/ail_simplifier.py +1 -0
  5. angr/analyses/decompiler/callsite_maker.py +9 -1
  6. angr/analyses/decompiler/clinic.py +32 -2
  7. angr/analyses/decompiler/condition_processor.py +104 -66
  8. angr/analyses/decompiler/decompiler.py +7 -0
  9. angr/analyses/decompiler/optimization_passes/__init__.py +18 -1
  10. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +6 -0
  11. angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
  12. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
  13. angr/analyses/decompiler/return_maker.py +1 -0
  14. angr/analyses/decompiler/ssailification/rewriting.py +4 -0
  15. angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
  16. angr/analyses/decompiler/structured_codegen/c.py +18 -2
  17. angr/analyses/deobfuscator/__init__.py +18 -0
  18. angr/analyses/deobfuscator/api_obf_finder.py +313 -0
  19. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
  20. angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
  21. angr/analyses/deobfuscator/string_obf_finder.py +774 -0
  22. angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
  23. angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
  24. angr/analyses/patchfinder.py +137 -0
  25. angr/analyses/pathfinder.py +282 -0
  26. angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
  27. angr/analyses/smc.py +159 -0
  28. angr/analyses/unpacker/__init__.py +6 -0
  29. angr/analyses/unpacker/obfuscation_detector.py +103 -0
  30. angr/analyses/unpacker/packing_detector.py +138 -0
  31. angr/angrdb/models.py +1 -2
  32. angr/calling_conventions.py +3 -1
  33. angr/engines/vex/claripy/irop.py +10 -5
  34. angr/engines/vex/heavy/heavy.py +2 -0
  35. angr/exploration_techniques/spiller_db.py +1 -2
  36. angr/knowledge_plugins/__init__.py +2 -0
  37. angr/knowledge_plugins/functions/function.py +4 -0
  38. angr/knowledge_plugins/functions/function_manager.py +18 -9
  39. angr/knowledge_plugins/functions/function_parser.py +1 -1
  40. angr/knowledge_plugins/functions/soot_function.py +1 -0
  41. angr/knowledge_plugins/obfuscations.py +36 -0
  42. angr/lib/angr_native.dylib +0 -0
  43. angr/misc/ux.py +2 -2
  44. angr/project.py +17 -1
  45. angr/state_plugins/history.py +6 -4
  46. angr/utils/bits.py +4 -0
  47. angr/utils/tagged_interval_map.py +112 -0
  48. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
  49. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/RECORD +53 -36
  50. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
  51. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
  52. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
  53. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,774 @@
1
+ # pylint:disable=missing-class-docstring,too-many-boolean-expressions
2
+ from __future__ import annotations
3
+ from typing import Any
4
+ import string
5
+ import logging
6
+
7
+ import capstone
8
+ import networkx
9
+
10
+ import claripy
11
+
12
+ from angr import sim_options
13
+ from angr.analyses import Analysis, AnalysesHub
14
+ from angr.errors import SimMemoryMissingError, AngrCallableMultistateError, AngrCallableError
15
+ from angr.calling_conventions import SimRegArg, default_cc
16
+ from angr.state_plugins.sim_action import SimActionData
17
+ from angr.sim_type import SimTypeFunction, SimTypeBottom, SimTypePointer
18
+ from angr.analyses.reaching_definitions import ObservationPointType
19
+ from angr.utils.graph import GraphUtils
20
+
21
+ from .irsb_reg_collector import IRSBRegisterCollector
22
+
23
+ _l = logging.getLogger(__name__)
24
+
25
+
26
+ class StringDeobFuncDescriptor:
27
+ def __init__(self):
28
+ self.string_input_arg_idx = None
29
+ self.string_output_arg_idx = None
30
+ self.string_length_arg_idx = None
31
+ self.string_null_terminating: bool | None = None
32
+
33
+
34
+ class StringObfuscationFinder(Analysis):
35
+ """
36
+ An analysis that automatically finds string obfuscation routines.
37
+ """
38
+
39
+ def __init__(self):
40
+ self.type1_candidates = []
41
+ self.type2_candidates = []
42
+ self.type3_candidates = []
43
+
44
+ self.analyze()
45
+
46
+ def analyze(self):
47
+ _l.debug("Finding type 1 candidates.")
48
+ self.type1_candidates = self._find_type1()
49
+ _l.debug("Got %d type 1 candidates.", len(self.type1_candidates))
50
+
51
+ _l.debug("Finding type 2 candidates.")
52
+ self.type2_candidates = self._find_type2()
53
+ _l.debug("Got %d type 2 candidates.", len(self.type2_candidates))
54
+
55
+ _l.debug("Finding type 3 candidates.")
56
+ self.type3_candidates = self._find_type3()
57
+ _l.debug("Got %d type 3 candidates.", len(self.type3_candidates))
58
+ _l.debug("Done.")
59
+
60
+ if self.type1_candidates:
61
+ for type1_func_addr, desc in self.type1_candidates:
62
+ _l.debug("Analyzing type 1 candidates.")
63
+ type1_deobfuscated, type1_string_loader_candidates = self._analyze_type1(type1_func_addr, desc)
64
+ self.kb.obfuscations.type1_deobfuscated_strings.update(type1_deobfuscated)
65
+ self.kb.obfuscations.type1_string_loader_candidates |= type1_string_loader_candidates
66
+
67
+ if self.type2_candidates:
68
+ for type2_func_addr, desc, string_candidates in self.type2_candidates:
69
+ _l.debug("Analyzing type 2 candidates.")
70
+ type2_string_loader_candidates = self._analyze_type2(
71
+ type2_func_addr, desc, {addr for addr, _, _ in string_candidates}
72
+ )
73
+ type2_deobfuscated_strings = {addr: s for addr, _, s in string_candidates}
74
+ self.kb.obfuscations.type2_deobfuscated_strings.update(type2_deobfuscated_strings)
75
+ self.kb.obfuscations.type2_string_loader_candidates |= type2_string_loader_candidates
76
+
77
+ if self.type3_candidates:
78
+ for type3_func_addr, desc in self.type3_candidates:
79
+ _l.debug("Analyzing type 3 candidates.")
80
+ type3_strings = self._analyze_type3(type3_func_addr, desc)
81
+ self.kb.obfuscations.type3_deobfuscated_strings.update(type3_strings)
82
+
83
+ def _find_type1(self) -> list[tuple[int, StringDeobFuncDescriptor]]:
84
+ # Type 1 string deobfuscation functions
85
+ # - Take a constant string or local string as input
86
+ # - Output strings that are reasonable
87
+ # - Do not call other functions (i.e., these functions are leaf functions)
88
+ #
89
+ # Type 1 string deobfuscation functions will decrypt each string once and for good.
90
+
91
+ cfg = self.kb.cfgs.get_most_accurate()
92
+ arch = self.project.arch
93
+
94
+ type1_candidates: list[tuple[int, StringDeobFuncDescriptor]] = []
95
+
96
+ for func in self.project.kb.functions.values():
97
+ if func.is_simprocedure or func.is_plt or func.is_alignment:
98
+ continue
99
+
100
+ if func.prototype is None or len(func.prototype.args) < 1:
101
+ continue
102
+
103
+ if self.project.kb.functions.callgraph.out_degree[func.addr] != 0:
104
+ continue
105
+
106
+ # find its callsites and arguments
107
+ callers = [
108
+ pred for pred in self.project.kb.functions.callgraph.predecessors(func.addr) if pred != func.addr
109
+ ]
110
+
111
+ if not callers:
112
+ continue
113
+
114
+ if len(func.block_addrs_set) <= 2:
115
+ # function is too small...
116
+ continue
117
+ if len(func.block_addrs_set) >= 50:
118
+ # function is too big...
119
+ continue
120
+
121
+ # decompile this function and see if it "looks like" a deobfuscation function
122
+ try:
123
+ dec = self.project.analyses.Decompiler(func, cfg=cfg)
124
+ except Exception: # pylint:disable=broad-exception-caught
125
+ continue
126
+ if dec.codegen is None or not self._like_type1_deobfuscation_function(dec.codegen.text):
127
+ continue
128
+
129
+ args_list = []
130
+ for caller in callers:
131
+ callsite_nodes = [
132
+ pred
133
+ for pred in cfg.get_predecessors(cfg.get_any_node(func.addr))
134
+ if pred.function_address == caller and pred.instruction_addrs
135
+ ]
136
+ observation_points = []
137
+ for callsite_node in callsite_nodes:
138
+ observation_points.append(
139
+ ("insn", callsite_node.instruction_addrs[-1], ObservationPointType.OP_BEFORE)
140
+ )
141
+ rda = self.project.analyses.ReachingDefinitions(
142
+ self.project.kb.functions[caller],
143
+ observe_all=False,
144
+ observation_points=observation_points,
145
+ )
146
+ for callsite_node in callsite_nodes:
147
+ observ = rda.model.get_observation_by_insn(
148
+ callsite_node.instruction_addrs[-1],
149
+ ObservationPointType.OP_BEFORE,
150
+ )
151
+ # load values for each function argument
152
+ args: list[tuple[int, Any]] = []
153
+ for arg_idx, func_arg in enumerate(func.arguments):
154
+ # FIXME: We are ignoring all non-register function arguments until we see a test case where
155
+ # FIXME: stack-passing arguments are used
156
+ if isinstance(func_arg, SimRegArg):
157
+ reg_offset, reg_size = arch.registers[func_arg.reg_name]
158
+ try:
159
+ mv = observ.registers.load(reg_offset, size=reg_size)
160
+ except SimMemoryMissingError:
161
+ args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
162
+ continue
163
+ arg_value = mv.one_value()
164
+ if arg_value is None:
165
+ arg_value = claripy.BVV(0xDEADBEEF, self.project.arch.bits)
166
+ args.append((arg_idx, arg_value))
167
+
168
+ # the args must have at least one concrete address that points to an initialized memory location
169
+ acceptable_args = False
170
+ for _, arg in args:
171
+ if arg is not None and arg.concrete:
172
+ v = arg.concrete_value
173
+ section = self.project.loader.find_section_containing(v)
174
+ if section is not None:
175
+ acceptable_args = True
176
+ break
177
+ if acceptable_args:
178
+ args_list.append(args)
179
+
180
+ if not args_list:
181
+ continue
182
+
183
+ is_candidate = False
184
+ desc = StringDeobFuncDescriptor()
185
+ # now that we have good arguments, let's test the function!
186
+ for args in args_list:
187
+ func_call = self.project.factory.callable(
188
+ func.addr, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
189
+ )
190
+
191
+ # before calling the function, let's record the crime scene
192
+ values: list[tuple[int, int, bytes]] = []
193
+ for arg_idx, arg in args:
194
+ if arg is not None and arg.concrete:
195
+ v = arg.concrete_value
196
+ section = self.project.loader.find_section_containing(v)
197
+ if section is not None:
198
+ values.append((arg_idx, v, self.project.loader.memory.load(v, 100)))
199
+
200
+ try:
201
+ func_call(*[arg for _, arg in args])
202
+ except (AngrCallableMultistateError, AngrCallableError):
203
+ continue
204
+
205
+ # let's see what this amazing function has done
206
+ # TODO: Support cases where input and output are using different function arguments
207
+ for arg_idx, addr, old_value in values:
208
+ out = func_call.result_state.solver.eval(
209
+ func_call.result_state.memory.load(addr, size=len(old_value)), cast_to=bytes
210
+ )
211
+ if out == old_value:
212
+ continue
213
+ if self._is_string_reasonable(out):
214
+ # found it!
215
+ _l.debug("[+] Deobfuscated string by function %s: %s", repr(func), out)
216
+ is_candidate = True
217
+ desc.string_input_arg_idx = arg_idx
218
+ desc.string_output_arg_idx = arg_idx
219
+ desc.string_null_terminating = True # FIXME
220
+ break
221
+
222
+ if is_candidate:
223
+ type1_candidates.append((func.addr, desc))
224
+
225
+ return type1_candidates
226
+
227
+ def _analyze_type1(self, func_addr: int, desc: StringDeobFuncDescriptor) -> tuple[dict, set]:
228
+ """
229
+ Analyze Type 1 string deobfuscation functions, determine the following information:
230
+
231
+ - Deobfuscated strings, lengths, and their addresses
232
+ - Functions that load deobfuscated strings
233
+
234
+ :param func_addr:
235
+ :param desc:
236
+ :return:
237
+ """
238
+
239
+ deobfuscated_strings = {}
240
+
241
+ arch = self.project.arch
242
+ cfg = self.kb.cfgs.get_most_accurate()
243
+ func = self.kb.functions.get_by_addr(func_addr)
244
+ func_node = cfg.get_any_node(func_addr)
245
+ assert func_node is not None
246
+ # Find all call sites for this function
247
+ call_sites = cfg.get_predecessors(func_node)
248
+ rda_cache = {}
249
+ for callsite_node in call_sites:
250
+ # dump arguments
251
+ if callsite_node.function_address in rda_cache:
252
+ rda = rda_cache[callsite_node.function_address]
253
+ else:
254
+ rda = self.project.analyses.ReachingDefinitions(
255
+ self.project.kb.functions[callsite_node.function_address],
256
+ observe_all=True,
257
+ ).model
258
+ rda_cache[callsite_node.function_address] = rda
259
+ observ = rda.get_observation_by_insn(
260
+ callsite_node.instruction_addrs[-1],
261
+ ObservationPointType.OP_BEFORE,
262
+ )
263
+ args = []
264
+ for func_arg in func.arguments:
265
+ # FIXME: We are ignoring all non-register function arguments until we see a test case where
266
+ # FIXME: stack-passing arguments are used
267
+ if isinstance(func_arg, SimRegArg):
268
+ reg_offset, reg_size = arch.registers[func_arg.reg_name]
269
+ try:
270
+ mv = observ.registers.load(reg_offset, size=reg_size)
271
+ except SimMemoryMissingError:
272
+ args.append(claripy.BVV(0xDEADBEEF, self.project.arch.bits))
273
+ continue
274
+ v = mv.one_value()
275
+ if v is not None and v.concrete:
276
+ args.append(v)
277
+ else:
278
+ args.append(claripy.BVV(0xDEADBEEF, self.project.arch.bits))
279
+
280
+ if None in args:
281
+ _l.debug(
282
+ "At least one argument cannot be concretized. Skip the call at %#x.",
283
+ callsite_node.instruction_addrs[-1],
284
+ )
285
+ continue
286
+
287
+ # call the function
288
+ func_call = self.project.factory.callable(
289
+ func.addr, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
290
+ )
291
+ try:
292
+ func_call(*args)
293
+ except AngrCallableMultistateError:
294
+ _l.debug(
295
+ "State branching encountered during string deobfuscation. Skip the call at %#x.",
296
+ callsite_node.instruction_addrs[-1],
297
+ )
298
+ continue
299
+ except AngrCallableError:
300
+ _l.debug(
301
+ "No path returned. Skip the call at %#x.",
302
+ callsite_node.instruction_addrs[-1],
303
+ )
304
+ continue
305
+
306
+ # dump the decrypted string!
307
+ output_addr = args[desc.string_output_arg_idx]
308
+ length = args[desc.string_length_arg_idx].concrete_value if desc.string_length_arg_idx is not None else 256
309
+ output_str = func_call.result_state.solver.eval(
310
+ func_call.result_state.memory.load(output_addr, size=length),
311
+ cast_to=bytes,
312
+ )
313
+ if desc.string_null_terminating and b"\x00" in output_str:
314
+ output_str = output_str[: output_str.index(b"\x00")]
315
+ deobfuscated_strings[output_addr.concrete_value] = output_str
316
+
317
+ # for each deobfuscated string, we find its string loader function
318
+ # an obvious candidate function is 0x140001ae4
319
+ xrefs = self.kb.xrefs
320
+ string_loader_candidates = set()
321
+ for str_addr in deobfuscated_strings:
322
+ xref_set = xrefs.get_xrefs_by_dst(str_addr)
323
+ block_addrs = {xref.block_addr for xref in xref_set}
324
+ for block_addr in block_addrs:
325
+ node = cfg.get_any_node(block_addr)
326
+ if node is not None:
327
+ callees = list(self.kb.functions.callgraph.successors(node.function_address))
328
+ if callees:
329
+ # string loader function should not call anything else
330
+ continue
331
+ string_loader_candidates.add(node.function_address)
332
+
333
+ return deobfuscated_strings, string_loader_candidates
334
+
335
+ def _find_type2(self) -> list[tuple[int, StringDeobFuncDescriptor, list[tuple[int, int, bytes]]]]:
336
+ # Type 2 string deobfuscation functions
337
+ # - Deobfuscates an entire table of encrypted strings
338
+ # - May or may not take any arguments. All arguments should be concrete.
339
+ #
340
+ # Type 2 string deobfuscation functions will decrypt each string once and for good.
341
+
342
+ cfg = self.kb.cfgs.get_most_accurate()
343
+
344
+ type2_candidates: list[tuple[int, StringDeobFuncDescriptor, list[tuple[int, int, bytes]]]] = []
345
+
346
+ for func in self.project.kb.functions.values():
347
+ if func.is_simprocedure or func.is_plt or func.is_alignment:
348
+ continue
349
+
350
+ if func.prototype is None or len(func.prototype.args) > 1:
351
+ # FIXME: Handle deobfuscation functions that take arguments. Find such a case first
352
+ continue
353
+
354
+ if self.project.kb.functions.callgraph.out_degree[func.addr] != 0:
355
+ continue
356
+
357
+ # find its callsites and arguments
358
+ callers = [
359
+ pred for pred in self.project.kb.functions.callgraph.predecessors(func.addr) if pred != func.addr
360
+ ]
361
+
362
+ if not callers:
363
+ continue
364
+
365
+ if len(func.block_addrs_set) <= 2:
366
+ # function is too small...
367
+ continue
368
+ if len(func.block_addrs_set) >= 50:
369
+ # function is too big...
370
+ continue
371
+
372
+ # decompile this function and see if it "looks like" a deobfuscation function
373
+ try:
374
+ dec = self.project.analyses.Decompiler(func, cfg=cfg, expr_collapse_depth=64)
375
+ except Exception: # pylint:disable=broad-exception-caught
376
+ continue
377
+ if dec.codegen is None or not self._like_type2_deobfuscation_function(dec.codegen.text):
378
+ continue
379
+
380
+ desc = StringDeobFuncDescriptor()
381
+ # now that we have good arguments, let's test the function!
382
+ func_call = self.project.factory.callable(
383
+ func.addr,
384
+ concrete_only=True,
385
+ cc=func.calling_convention,
386
+ prototype=func.prototype,
387
+ add_options={sim_options.TRACK_MEMORY_ACTIONS},
388
+ )
389
+
390
+ try:
391
+ func_call()
392
+ except (AngrCallableMultistateError, AngrCallableError):
393
+ continue
394
+
395
+ # where are the reads and writes?
396
+ all_global_reads = []
397
+ all_global_writes = []
398
+ for action in func_call.result_state.history.actions:
399
+ if not isinstance(action, SimActionData):
400
+ continue
401
+ if not action.actual_addrs:
402
+ if not action.addr.ast.concrete:
403
+ continue
404
+ actual_addrs = [action.addr.ast.concrete_value]
405
+ else:
406
+ actual_addrs = action.actual_addrs
407
+ if action.type == "mem":
408
+ if action.action == "read":
409
+ for a in actual_addrs:
410
+ for size in range(action.size.ast // 8):
411
+ all_global_reads.append(a + size)
412
+ elif action.action == "write":
413
+ for a in actual_addrs:
414
+ for size in range(action.size.ast // 8):
415
+ all_global_writes.append(a + size)
416
+
417
+ # find likely memory access regions
418
+ all_global_reads = sorted(set(all_global_reads))
419
+ all_global_writes = sorted(set(all_global_writes))
420
+ all_global_write_set = set(all_global_writes)
421
+ # TODO: Handle cases where reads and writes are not going to the same place
422
+ region_candidates: list[tuple[int, int, bytes]] = []
423
+ idx = 0
424
+ while idx < len(all_global_reads):
425
+ starting_offset = all_global_reads[idx]
426
+ if starting_offset not in all_global_write_set:
427
+ idx += 1
428
+ continue
429
+
430
+ stride = 0
431
+ for j in range(idx + 1, len(all_global_reads)):
432
+ if (
433
+ all_global_reads[j] - all_global_reads[j - 1] == 1
434
+ and all_global_reads[j] in all_global_write_set
435
+ ):
436
+ stride += 1
437
+ else:
438
+ break
439
+ if stride >= 5:
440
+ # got one region
441
+ section = self.project.loader.find_section_containing(starting_offset)
442
+ if section is not None:
443
+ initial_data = self.project.loader.memory.load(starting_offset, stride)
444
+ end_data = func_call.result_state.solver.eval(
445
+ func_call.result_state.memory.load(starting_offset, stride), cast_to=bytes
446
+ )
447
+ if initial_data != end_data and self._is_string_reasonable(end_data):
448
+ region_candidates.append((starting_offset, stride, end_data))
449
+ idx += stride
450
+ else:
451
+ idx += 1
452
+
453
+ if region_candidates:
454
+ type2_candidates.append((func.addr, desc, region_candidates))
455
+
456
+ return type2_candidates
457
+
458
+ def _analyze_type2(
459
+ self, func_addr: int, desc: StringDeobFuncDescriptor, table_addrs: set[int] # pylint:disable=unused-argument
460
+ ) -> set:
461
+ """
462
+ Analyze Type 2 string deobfuscation functions, determine the following information:
463
+
464
+ - Functions that load deobfuscated strings
465
+
466
+ :param func_addr:
467
+ :param desc:
468
+ :return:
469
+ """
470
+
471
+ cfg = self.kb.cfgs.get_most_accurate()
472
+
473
+ # for each string table address, we find its string loader function
474
+ # an obvious candidate function is 0x140001b20
475
+ xrefs = self.kb.xrefs
476
+ string_loader_candidates = set()
477
+ for table_addr in table_addrs:
478
+ xref_set = xrefs.get_xrefs_by_dst(table_addr)
479
+ block_addrs = {xref.block_addr for xref in xref_set}
480
+ for block_addr in block_addrs:
481
+ node = cfg.get_any_node(block_addr)
482
+ if node is not None:
483
+ callees = list(self.kb.functions.callgraph.successors(node.function_address))
484
+ if callees:
485
+ # string loader function should not call anything else
486
+ continue
487
+ string_loader_candidates.add(node.function_address)
488
+
489
+ return string_loader_candidates
490
+
491
+ def _find_type3(self) -> list[tuple[int, StringDeobFuncDescriptor]]:
492
+ # Type 3 string deobfuscation functions
493
+ # - Uses a buffer in the stack frame of its parent function
494
+ # - Before the call, the values in the buffer or the struct are initialized during runtime
495
+ # - The entire call can be simulated (it does not involve any other functions that angr does not support or do
496
+ # not have a SimProcedure for)
497
+
498
+ cfg = self.kb.cfgs.get_most_accurate()
499
+ functions = self.kb.functions
500
+ callgraph_digraph = networkx.DiGraph(functions.callgraph)
501
+
502
+ sorted_funcs = GraphUtils.quasi_topological_sort_nodes(callgraph_digraph)
503
+ tree_has_unsupported_funcs = {}
504
+ function_candidates = []
505
+ for func_addr in sorted_funcs:
506
+ if functions.get_by_addr(func_addr).is_simprocedure:
507
+ # is this a stub SimProcedure?
508
+ hooker = self.project.hooked_by(func_addr)
509
+ if hooker is not None and hooker.is_stub:
510
+ tree_has_unsupported_funcs[func_addr] = True
511
+ else:
512
+ # which functions does it call?
513
+ callees = list(callgraph_digraph.successors(func_addr))
514
+ if any(tree_has_unsupported_funcs.get(callee, False) is True for callee in callees):
515
+ tree_has_unsupported_funcs[func_addr] = True
516
+ else:
517
+ function_candidates.append(functions.get_by_addr(func_addr))
518
+
519
+ type3_functions = []
520
+
521
+ for func in function_candidates:
522
+ if not 8 <= len(func.block_addrs_set) < 14:
523
+ continue
524
+
525
+ # if it has a prototype recovered, it must have four arguments
526
+ if func.prototype is not None and len(func.prototype.args) != 4:
527
+ continue
528
+
529
+ # the function must call some other functions
530
+ if callgraph_digraph.out_degree[func.addr] == 0:
531
+ continue
532
+
533
+ # take a look at its call sites
534
+ func_node = cfg.get_any_node(func.addr)
535
+ if func_node is None:
536
+ continue
537
+ call_sites = cfg.get_predecessors(func_node, jumpkind="Ijk_Call")
538
+ if not call_sites:
539
+ continue
540
+
541
+ # examine the first 100 call sites and see if any of them sets up enough constants
542
+ valid = False
543
+ for i in range(min(100, len(call_sites))):
544
+ call_site_block = self.project.factory.block(call_sites[i].addr)
545
+ if self._is_block_setting_constants_to_stack(call_site_block):
546
+ valid = True
547
+ break
548
+ if not valid:
549
+ continue
550
+
551
+ # take a look at the content
552
+ try:
553
+ dec = self.project.analyses.Decompiler(func, cfg=cfg)
554
+ except Exception: # pylint:disable=broad-exception-caught
555
+ # catch all exceptions
556
+ continue
557
+ if dec.codegen is None:
558
+ continue
559
+ if not self._like_type3_deobfuscation_function(dec.codegen.text):
560
+ continue
561
+
562
+ # examine the first 100 call sites and see if any of them returns a valid string
563
+ valid = False
564
+ for i in range(min(100, len(call_sites))):
565
+ call_site_block = self.project.factory.block(call_sites[i].addr)
566
+ if not self._is_block_setting_constants_to_stack(call_site_block):
567
+ continue
568
+
569
+ # simulate an execution to see if it really works
570
+ data = self._type3_prepare_and_execute(
571
+ func.addr, call_sites[i].addr, call_sites[i].function_address, cfg
572
+ )
573
+ if data is None:
574
+ continue
575
+ if len(data) > 3 and all(chr(x) in string.printable for x in data):
576
+ valid = True
577
+ break
578
+
579
+ if valid:
580
+ desc = StringDeobFuncDescriptor()
581
+ desc.string_output_arg_idx = 0
582
+ desc.string_length_arg_idx = 1
583
+ desc.string_null_terminating = False
584
+ type3_functions.append((func.addr, desc))
585
+
586
+ return type3_functions
587
+
588
+ def _analyze_type3(
589
+ self, func_addr: int, desc: StringDeobFuncDescriptor # pylint:disable=unused-argument
590
+ ) -> dict[int, bytes]:
591
+ """
592
+ Analyze Type 3 string deobfuscation functions, determine the following information:
593
+
594
+ - The call sites
595
+ - For each call site, the actual de-obfuscated content (in bytes)
596
+
597
+ Decompiler will output the following code:
598
+
599
+ *ptr = strdup("The deobfuscated string");
600
+ *(ptr+8) = the string length;
601
+
602
+ :param func_addr:
603
+ :param desc:
604
+ :return:
605
+ """
606
+
607
+ cfg = self.kb.cfgs.get_most_accurate()
608
+
609
+ call_sites = cfg.get_predecessors(cfg.get_any_node(func_addr))
610
+ callinsn2content = {}
611
+ for idx, call_site in enumerate(call_sites):
612
+ _l.debug("Analyzing type 3 candidate call site %#x (%d/%d)...", call_site.addr, idx + 1, len(call_sites))
613
+ data = self._type3_prepare_and_execute(func_addr, call_site.addr, call_site.function_address, cfg)
614
+ if data:
615
+ callinsn2content[call_site.instruction_addrs[-1]] = data
616
+ # print(hex(call_site.addr), data)
617
+
618
+ return callinsn2content
619
+
620
+ #
621
+ # Type 1 helpers
622
+ #
623
+
624
+ @staticmethod
625
+ def _like_type1_deobfuscation_function(code: str) -> bool:
626
+ return bool("^" in code or ">>" in code or "<<" in code)
627
+
628
+ #
629
+ # Type 2 helpers
630
+ #
631
+
632
+ @staticmethod
633
+ def _like_type2_deobfuscation_function(code: str) -> bool:
634
+ return bool(
635
+ ("^" in code or ">>" in code or "<<" in code) and ("do" in code or "while" in code or "for" in code)
636
+ )
637
+
638
+ #
639
+ # Type 3 helpers
640
+ #
641
+
642
+ @staticmethod
643
+ def _like_type3_deobfuscation_function(code: str) -> bool:
644
+ return bool(
645
+ ("^" in code or ">>" in code or "<<" in code or "~" in code)
646
+ and ("do" in code or "while" in code or "for" in code)
647
+ )
648
+
649
+ def _type3_prepare_and_execute(self, func_addr: int, call_site_addr: int, call_site_func_addr: int, cfg):
650
+ blocks_at_callsite = [call_site_addr]
651
+
652
+ # backtrack from call site to include all previous consecutive blocks
653
+ while True:
654
+ pred_and_jumpkinds = cfg.get_predecessors_and_jumpkinds(
655
+ cfg.get_any_node(call_site_addr), excluding_fakeret=False
656
+ )
657
+ if len(pred_and_jumpkinds) == 1:
658
+ pred, jumpkind = pred_and_jumpkinds[0]
659
+ if (
660
+ cfg.graph.out_degree[pred] == 1
661
+ and pred.addr + pred.size == call_site_addr
662
+ and jumpkind == "Ijk_Boring"
663
+ ):
664
+ blocks_at_callsite.insert(0, pred.addr)
665
+ call_site_addr = pred.addr
666
+ continue
667
+ break
668
+
669
+ # take a look at the call-site block to see what registers are used
670
+ reg_reads = set()
671
+ for block_addr in blocks_at_callsite:
672
+ reg_collector = IRSBRegisterCollector(self.project.factory.block(block_addr))
673
+ reg_collector.process()
674
+ reg_reads |= set(reg_collector.reg_reads)
675
+
676
+ # run constant propagation to track constant registers
677
+ prop = self.project.analyses.Propagator(
678
+ func=self.kb.functions.get_by_addr(call_site_func_addr),
679
+ only_consts=True,
680
+ do_binops=True,
681
+ vex_cross_insn_opt=True,
682
+ load_callback=None,
683
+ cache_results=True,
684
+ key_prefix="cfg_intermediate",
685
+ )
686
+
687
+ # execute the block at the call site
688
+ state = self.project.factory.blank_state(
689
+ addr=call_site_addr,
690
+ add_options={sim_options.ZERO_FILL_UNCONSTRAINED_REGISTERS, sim_options.ZERO_FILL_UNCONSTRAINED_MEMORY},
691
+ )
692
+ # setup sp and bp, just in case
693
+ state.regs._sp = 0x7FFF0000
694
+ bp_set = False
695
+ prop_state = prop.model.input_states.get(call_site_addr, None)
696
+ if prop_state is not None:
697
+ for reg_offset, reg_width in reg_reads:
698
+ if reg_offset == state.arch.sp_offset:
699
+ continue
700
+ if reg_width < 8:
701
+ # at least a byte
702
+ continue
703
+ con = prop_state.load_register(reg_offset, reg_width // 8)
704
+ if isinstance(con, claripy.ast.Base) and con.op == "BVV":
705
+ state.registers.store(reg_offset, claripy.BVV(con.concrete_value, reg_width))
706
+ if reg_offset == state.arch.bp_offset:
707
+ bp_set = True
708
+ if not bp_set:
709
+ state.regs._bp = 0x7FFF3000
710
+ simgr = self.project.factory.simgr(state)
711
+
712
+ # step until the call instruction
713
+ for idx, block_addr in enumerate(blocks_at_callsite):
714
+ if idx == len(blocks_at_callsite) - 1:
715
+ inst = self.project.factory.block(block_addr).instructions
716
+ simgr.step(num_inst=inst - 1)
717
+ else:
718
+ simgr.step()
719
+ if not simgr.active:
720
+ return None
721
+
722
+ in_state = simgr.active[0]
723
+
724
+ cc = default_cc(self.project.arch.name, self.project.simos.name)(self.project.arch)
725
+ cc.STACKARG_SP_BUFF = 0 # disable shadow stack space because the binary code already sets it if needed
726
+ cc.STACK_ALIGNMENT = 1 # disable stack address aligning because the binary code already sets it if needed
727
+ prototype_0 = SimTypeFunction([], SimTypePointer(pts_to=SimTypeBottom(label="void"))).with_arch(
728
+ self.project.arch
729
+ )
730
+ callable_0 = self.project.factory.callable(
731
+ func_addr, concrete_only=True, base_state=in_state, cc=cc, prototype=prototype_0
732
+ )
733
+
734
+ try:
735
+ ret_value = callable_0()
736
+ except (AngrCallableMultistateError, AngrCallableError):
737
+ return None
738
+
739
+ out_state = callable_0.result_state
740
+
741
+ # figure out what was written
742
+ ptr = out_state.memory.load(ret_value, size=self.project.arch.bytes, endness=self.project.arch.memory_endness)
743
+ size = out_state.memory.load(ret_value + 8, size=4, endness=self.project.arch.memory_endness)
744
+ # TODO: Support lists with varied-length elements
745
+ data = out_state.memory.load(ptr, size=size, endness="Iend_BE")
746
+ if data.symbolic:
747
+ return None
748
+
749
+ return out_state.solver.eval(data, cast_to=bytes)
750
+
751
+ @staticmethod
752
+ def _is_block_setting_constants_to_stack(block, threshold: int = 5) -> bool:
753
+ insn_setting_consts = 0
754
+ for insn in block.capstone.insns:
755
+ if (
756
+ insn.mnemonic.startswith("mov")
757
+ and len(insn.operands) == 2
758
+ and insn.operands[0].type == capstone.x86.X86_OP_MEM
759
+ and insn.operands[0].mem.base in {capstone.x86.X86_REG_RSP, capstone.x86.X86_REG_RBP}
760
+ and insn.operands[1].type == capstone.x86.X86_OP_IMM
761
+ ):
762
+ insn_setting_consts += 1
763
+ return insn_setting_consts >= threshold
764
+
765
+ @staticmethod
766
+ def _is_string_reasonable(s: bytes) -> bool:
767
+ # test if the string is printable and is free of nonsense characters
768
+
769
+ # TODO: Ask a local LLM
770
+ s = s.replace(b"\x00", b"")
771
+ return all(chr(ch) in string.printable for ch in s)
772
+
773
+
774
+ AnalysesHub.register_default("StringObfuscationFinder", StringObfuscationFinder)