angr 9.2.124__py3-none-manylinux2014_aarch64.whl → 9.2.126__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/__init__.py +13 -1
- angr/analyses/codecave.py +77 -0
- angr/analyses/decompiler/ail_simplifier.py +1 -0
- angr/analyses/decompiler/callsite_maker.py +9 -1
- angr/analyses/decompiler/clinic.py +32 -2
- angr/analyses/decompiler/condition_processor.py +104 -66
- angr/analyses/decompiler/decompiler.py +7 -0
- angr/analyses/decompiler/optimization_passes/__init__.py +18 -1
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +6 -0
- angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
- angr/analyses/decompiler/return_maker.py +1 -0
- angr/analyses/decompiler/ssailification/rewriting.py +4 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
- angr/analyses/decompiler/structured_codegen/c.py +18 -2
- angr/analyses/deobfuscator/__init__.py +18 -0
- angr/analyses/deobfuscator/api_obf_finder.py +313 -0
- angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
- angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
- angr/analyses/deobfuscator/string_obf_finder.py +774 -0
- angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
- angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
- angr/analyses/patchfinder.py +137 -0
- angr/analyses/pathfinder.py +282 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
- angr/analyses/smc.py +159 -0
- angr/analyses/unpacker/__init__.py +6 -0
- angr/analyses/unpacker/obfuscation_detector.py +103 -0
- angr/analyses/unpacker/packing_detector.py +138 -0
- angr/angrdb/models.py +1 -2
- angr/calling_conventions.py +3 -1
- angr/engines/vex/claripy/irop.py +10 -5
- angr/engines/vex/heavy/heavy.py +2 -0
- angr/exploration_techniques/spiller_db.py +1 -2
- angr/knowledge_plugins/__init__.py +2 -0
- angr/knowledge_plugins/functions/function.py +4 -0
- angr/knowledge_plugins/functions/function_manager.py +18 -9
- angr/knowledge_plugins/functions/function_parser.py +1 -1
- angr/knowledge_plugins/functions/soot_function.py +1 -0
- angr/knowledge_plugins/obfuscations.py +36 -0
- angr/misc/ux.py +2 -2
- angr/project.py +17 -1
- angr/state_plugins/history.py +6 -4
- angr/utils/bits.py +4 -0
- angr/utils/tagged_interval_map.py +112 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/RECORD +52 -35
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
- {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from ailment.expression import Load, Const
|
|
3
|
-
from cle.backends import Blob
|
|
3
|
+
from cle.backends import Blob, Hex
|
|
4
4
|
|
|
5
5
|
from .base import PeepholeOptimizationExprBase
|
|
6
6
|
|
|
@@ -32,7 +32,7 @@ class ConstantDereferences(PeepholeOptimizationExprBase):
|
|
|
32
32
|
|
|
33
33
|
# is it loading from a blob?
|
|
34
34
|
obj = self.project.loader.find_object_containing(expr.addr.value)
|
|
35
|
-
if obj is not None and isinstance(obj, Blob):
|
|
35
|
+
if obj is not None and isinstance(obj, (Blob, Hex)):
|
|
36
36
|
# do we know the value that it's reading?
|
|
37
37
|
try:
|
|
38
38
|
val = self.project.loader.memory.unpack_word(expr.addr.value, size=self.project.arch.bytes)
|
|
@@ -119,6 +119,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
|
|
|
119
119
|
self._ail_manager.next_atom(),
|
|
120
120
|
reg_bits,
|
|
121
121
|
src_and_vvars=[], # back patch later
|
|
122
|
+
ins_addr=node.addr,
|
|
122
123
|
)
|
|
123
124
|
phi_dst = VirtualVariable(
|
|
124
125
|
self._ail_manager.next_atom(),
|
|
@@ -126,6 +127,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
|
|
|
126
127
|
reg_bits,
|
|
127
128
|
VirtualVariableCategory.REGISTER,
|
|
128
129
|
oident=reg_offset,
|
|
130
|
+
ins_addr=node.addr,
|
|
129
131
|
)
|
|
130
132
|
|
|
131
133
|
case "stack":
|
|
@@ -135,6 +137,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
|
|
|
135
137
|
self._ail_manager.next_atom(),
|
|
136
138
|
stack_size * self.project.arch.byte_width,
|
|
137
139
|
src_and_vvars=[], # back patch later
|
|
140
|
+
ins_addr=node.addr,
|
|
138
141
|
)
|
|
139
142
|
phi_dst = VirtualVariable(
|
|
140
143
|
self._ail_manager.next_atom(),
|
|
@@ -142,6 +145,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
|
|
|
142
145
|
stack_size * self.project.arch.byte_width,
|
|
143
146
|
VirtualVariableCategory.STACK,
|
|
144
147
|
oident=stack_offset,
|
|
148
|
+
ins_addr=node.addr,
|
|
145
149
|
)
|
|
146
150
|
case _:
|
|
147
151
|
raise NotImplementedError
|
|
@@ -525,7 +525,7 @@ class SimEngineSSARewriting(
|
|
|
525
525
|
**expr.tags,
|
|
526
526
|
)
|
|
527
527
|
|
|
528
|
-
def _get_full_reg_vvar(self, reg_offset: int, size: int) -> VirtualVariable:
|
|
528
|
+
def _get_full_reg_vvar(self, reg_offset: int, size: int, ins_addr: int | None = None) -> VirtualVariable:
|
|
529
529
|
base_off, base_size = get_reg_offset_base_and_size(reg_offset, self.arch, size=size)
|
|
530
530
|
if (
|
|
531
531
|
base_off not in self.state.registers
|
|
@@ -534,13 +534,16 @@ class SimEngineSSARewriting(
|
|
|
534
534
|
):
|
|
535
535
|
# somehow it's never defined before...
|
|
536
536
|
_l.debug("Creating a new virtual variable for an undefined register (%d [%d]).", base_off, base_size)
|
|
537
|
+
tags = {}
|
|
538
|
+
if ins_addr is not None:
|
|
539
|
+
tags["ins_addr"] = ins_addr
|
|
537
540
|
vvar = VirtualVariable(
|
|
538
541
|
self.ail_manager.next_atom(),
|
|
539
542
|
self.next_vvar_id(),
|
|
540
543
|
base_size * self.arch.byte_width,
|
|
541
544
|
category=VirtualVariableCategory.REGISTER,
|
|
542
545
|
oident=base_off,
|
|
543
|
-
|
|
546
|
+
**tags,
|
|
544
547
|
)
|
|
545
548
|
self.state.registers[base_off][base_size] = vvar
|
|
546
549
|
return vvar
|
|
@@ -628,7 +631,11 @@ class SimEngineSSARewriting(
|
|
|
628
631
|
|
|
629
632
|
# no good size available
|
|
630
633
|
# get the full register, then extract from there
|
|
631
|
-
vvar = self._get_full_reg_vvar(
|
|
634
|
+
vvar = self._get_full_reg_vvar(
|
|
635
|
+
reg_expr.reg_offset,
|
|
636
|
+
reg_expr.size,
|
|
637
|
+
ins_addr=reg_expr.ins_addr,
|
|
638
|
+
)
|
|
632
639
|
# extract
|
|
633
640
|
shift_amount = Const(
|
|
634
641
|
self.ail_manager.next_atom(),
|
|
@@ -2148,6 +2148,12 @@ class CConstant(CExpression):
|
|
|
2148
2148
|
elif isinstance(v, Function):
|
|
2149
2149
|
yield get_cpp_function_name(v.demangled_name, specialized=False, qualified=True), self
|
|
2150
2150
|
return
|
|
2151
|
+
elif isinstance(v, str):
|
|
2152
|
+
yield CConstant.str_to_c_str(v), self
|
|
2153
|
+
return
|
|
2154
|
+
elif isinstance(v, bytes):
|
|
2155
|
+
yield CConstant.str_to_c_str(v.replace(b"\x00", b"").decode("utf-8")), self
|
|
2156
|
+
return
|
|
2151
2157
|
|
|
2152
2158
|
if self.reference_values is not None and self._type is not None and self._type in self.reference_values:
|
|
2153
2159
|
if isinstance(self._type, SimTypeInt):
|
|
@@ -3415,7 +3421,17 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3415
3421
|
if reference_values is None:
|
|
3416
3422
|
reference_values = {}
|
|
3417
3423
|
type_ = unpack_typeref(type_)
|
|
3418
|
-
if
|
|
3424
|
+
if expr.value in self.kb.obfuscations.type1_deobfuscated_strings:
|
|
3425
|
+
reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type1_deobfuscated_strings[
|
|
3426
|
+
expr.value
|
|
3427
|
+
]
|
|
3428
|
+
inline_string = True
|
|
3429
|
+
elif expr.value in self.kb.obfuscations.type2_deobfuscated_strings:
|
|
3430
|
+
reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type2_deobfuscated_strings[
|
|
3431
|
+
expr.value
|
|
3432
|
+
]
|
|
3433
|
+
inline_string = True
|
|
3434
|
+
elif isinstance(type_, SimTypePointer) and isinstance(type_.pts_to, SimTypeChar):
|
|
3419
3435
|
# char*
|
|
3420
3436
|
# Try to get a string
|
|
3421
3437
|
if (
|
|
@@ -3433,7 +3449,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3433
3449
|
# edge cases: (void*)"this is a constant string pointer". in this case, the type_ will be a void*
|
|
3434
3450
|
# (BOT*) instead of a char*.
|
|
3435
3451
|
|
|
3436
|
-
if isinstance(expr.value, int):
|
|
3452
|
+
if not reference_values and isinstance(expr.value, int):
|
|
3437
3453
|
if expr.value in self.project.kb.functions:
|
|
3438
3454
|
# It's a function pointer
|
|
3439
3455
|
# We don't care about the actual prototype here
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# deobfuscator is a collection of analyses that automatically identifies functions where obfuscation techniques are
|
|
2
|
+
# in-use.
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .string_obf_finder import StringObfuscationFinder
|
|
6
|
+
from .string_obf_peephole_optimizer import StringObfType1PeepholeOptimizer
|
|
7
|
+
from .string_obf_opt_passes import StringObfType3Rewriter
|
|
8
|
+
from .api_obf_finder import APIObfuscationFinder
|
|
9
|
+
from .api_obf_peephole_optimizer import APIObfType1PeepholeOptimizer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
__all__ = (
|
|
13
|
+
"StringObfuscationFinder",
|
|
14
|
+
"StringObfType1PeepholeOptimizer",
|
|
15
|
+
"StringObfType3Rewriter",
|
|
16
|
+
"APIObfuscationFinder",
|
|
17
|
+
"APIObfType1PeepholeOptimizer",
|
|
18
|
+
)
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# pylint:disable=missing-class-docstring,too-many-boolean-expressions
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any
|
|
4
|
+
from enum import IntEnum
|
|
5
|
+
import string
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
import networkx
|
|
9
|
+
|
|
10
|
+
import claripy
|
|
11
|
+
|
|
12
|
+
from angr import SIM_LIBRARIES
|
|
13
|
+
from angr.calling_conventions import SimRegArg
|
|
14
|
+
from angr.errors import SimMemoryMissingError
|
|
15
|
+
from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
|
|
16
|
+
from angr.sim_type import SimTypePointer, SimTypeChar
|
|
17
|
+
from angr.analyses import Analysis, AnalysesHub
|
|
18
|
+
from angr.procedures.definitions import SimSyscallLibrary
|
|
19
|
+
from angr.sim_variable import SimMemoryVariable
|
|
20
|
+
from angr.analyses.decompiler.structured_codegen.c import (
|
|
21
|
+
CStructuredCodeWalker,
|
|
22
|
+
CFunctionCall,
|
|
23
|
+
CConstant,
|
|
24
|
+
CAssignment,
|
|
25
|
+
CVariable,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_l = logging.getLogger(name=__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class APIObfuscationType(IntEnum):
|
|
32
|
+
TYPE_1 = 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class APIDeobFuncDescriptor:
|
|
36
|
+
def __init__(self, type_: APIObfuscationType, func_addr=None, libname_argidx=None, funcname_argidx=None):
|
|
37
|
+
self.type = type_
|
|
38
|
+
self.func_addr = func_addr
|
|
39
|
+
self.libname_argidx = libname_argidx
|
|
40
|
+
self.funcname_argidx = funcname_argidx
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Type1AssignmentFinder(CStructuredCodeWalker):
|
|
44
|
+
def __init__(self, func_addr: int, desc: APIDeobFuncDescriptor):
|
|
45
|
+
self.func_addr = func_addr
|
|
46
|
+
self.desc = desc
|
|
47
|
+
self.assignments: dict[int, tuple[str, str]] = {}
|
|
48
|
+
|
|
49
|
+
def handle_CAssignment(self, obj: CAssignment):
|
|
50
|
+
if (
|
|
51
|
+
isinstance(obj.lhs, CVariable)
|
|
52
|
+
and isinstance(obj.lhs.variable, SimMemoryVariable)
|
|
53
|
+
and isinstance(obj.lhs.variable.addr, int)
|
|
54
|
+
and isinstance(obj.rhs, CFunctionCall)
|
|
55
|
+
and isinstance(obj.rhs.callee_target, CConstant)
|
|
56
|
+
and obj.rhs.callee_target.value == self.func_addr
|
|
57
|
+
):
|
|
58
|
+
# found it!
|
|
59
|
+
func_args = obj.rhs.args
|
|
60
|
+
if self.desc.funcname_argidx < len(func_args) and self.desc.libname_argidx < len(func_args):
|
|
61
|
+
funcname_arg = func_args[self.desc.funcname_argidx]
|
|
62
|
+
libname_arg = func_args[self.desc.libname_argidx]
|
|
63
|
+
if isinstance(funcname_arg, CConstant) and isinstance(libname_arg, CConstant):
|
|
64
|
+
# load two strings
|
|
65
|
+
funcname, libname = None, None
|
|
66
|
+
if funcname_arg.type in funcname_arg.reference_values and isinstance(
|
|
67
|
+
funcname_arg.reference_values[funcname_arg.type].content, bytes
|
|
68
|
+
):
|
|
69
|
+
funcname = funcname_arg.reference_values[funcname_arg.type].content.decode("utf-8")
|
|
70
|
+
if libname_arg.type in libname_arg.reference_values and isinstance(
|
|
71
|
+
libname_arg.reference_values[libname_arg.type].content, bytes
|
|
72
|
+
):
|
|
73
|
+
libname = libname_arg.reference_values[libname_arg.type].content.decode("utf-8")
|
|
74
|
+
|
|
75
|
+
if funcname and libname:
|
|
76
|
+
if obj.lhs.variable.addr in self.assignments:
|
|
77
|
+
if self.assignments[obj.lhs.variable.addr] != (libname, funcname):
|
|
78
|
+
_l.warning(
|
|
79
|
+
"Observed more than one assignment for variable at %#x.", obj.lhs.variable.addr
|
|
80
|
+
)
|
|
81
|
+
else:
|
|
82
|
+
self.assignments[obj.lhs.variable.addr] = libname, funcname
|
|
83
|
+
|
|
84
|
+
return super().handle_CAssignment(obj)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class APIObfuscationFinder(Analysis):
|
|
88
|
+
"""
|
|
89
|
+
An analysis that automatically finds API "obfuscation" routines.
|
|
90
|
+
|
|
91
|
+
Currently, we support the following API "obfuscation" styles:
|
|
92
|
+
|
|
93
|
+
- sub_A("dll_name", "api_name) where sub_a ends up calling LoadLibrary.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(self):
|
|
97
|
+
self.type1_candidates = []
|
|
98
|
+
|
|
99
|
+
self.analyze()
|
|
100
|
+
|
|
101
|
+
def analyze(self):
|
|
102
|
+
self.type1_candidates = self._find_type1()
|
|
103
|
+
|
|
104
|
+
if self.type1_candidates:
|
|
105
|
+
for desc in self.type1_candidates:
|
|
106
|
+
type1_deobfuscated = self._analyze_type1(desc.func_addr, desc)
|
|
107
|
+
self.kb.obfuscations.type1_deobfuscated_apis.update(type1_deobfuscated)
|
|
108
|
+
|
|
109
|
+
def _find_type1(self):
|
|
110
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
111
|
+
load_library_funcs = []
|
|
112
|
+
|
|
113
|
+
if "LoadLibraryA" in self.kb.functions:
|
|
114
|
+
load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryA"))
|
|
115
|
+
if "LoadLibraryW" in self.kb.functions:
|
|
116
|
+
load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryW"))
|
|
117
|
+
if "LoadLibrary" in self.kb.functions:
|
|
118
|
+
load_library_funcs += list(self.kb.functions.get_by_name("LoadLibrary"))
|
|
119
|
+
|
|
120
|
+
load_library_funcs = [func for func in load_library_funcs if func.is_simprocedure]
|
|
121
|
+
|
|
122
|
+
if not load_library_funcs:
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
# find callers of each load library func, up to three callers back
|
|
126
|
+
callgraph = self.kb.functions.callgraph
|
|
127
|
+
candidates = []
|
|
128
|
+
for load_library_func in load_library_funcs:
|
|
129
|
+
subtree = self._build_caller_subtree(callgraph, load_library_func.addr, 3)
|
|
130
|
+
for _, succs in networkx.bfs_successors(subtree, load_library_func.addr):
|
|
131
|
+
for succ_addr in succs:
|
|
132
|
+
func = self.kb.functions.get_by_addr(succ_addr)
|
|
133
|
+
likely, info = self._is_likely_type1_func(func, cfg)
|
|
134
|
+
if likely:
|
|
135
|
+
candidates.append((func.addr, info))
|
|
136
|
+
|
|
137
|
+
descs = []
|
|
138
|
+
for func_addr, info in candidates:
|
|
139
|
+
desc = APIDeobFuncDescriptor(
|
|
140
|
+
APIObfuscationType.TYPE_1,
|
|
141
|
+
func_addr=func_addr,
|
|
142
|
+
libname_argidx=info["libname_arg_idx"],
|
|
143
|
+
funcname_argidx=info["funcname_arg_idx"],
|
|
144
|
+
)
|
|
145
|
+
descs.append(desc)
|
|
146
|
+
|
|
147
|
+
return descs
|
|
148
|
+
|
|
149
|
+
def _is_likely_type1_func(self, func, cfg):
|
|
150
|
+
if func.prototype is None:
|
|
151
|
+
return False, None
|
|
152
|
+
if len(func.prototype.args) < 2:
|
|
153
|
+
return False, None
|
|
154
|
+
|
|
155
|
+
arch = self.project.arch
|
|
156
|
+
valid_apiname_charset = {ord(ch) for ch in (string.ascii_letters + string.digits + "._")}
|
|
157
|
+
|
|
158
|
+
# decompile the function to get a prototype with types
|
|
159
|
+
_ = self.project.analyses.Decompiler(func, cfg=cfg)
|
|
160
|
+
|
|
161
|
+
char_ptr_args = [
|
|
162
|
+
idx
|
|
163
|
+
for (idx, arg) in enumerate(func.prototype.args)
|
|
164
|
+
if isinstance(arg, SimTypePointer) and isinstance(arg.pts_to, SimTypeChar)
|
|
165
|
+
]
|
|
166
|
+
if len(char_ptr_args) != 2:
|
|
167
|
+
return False, None
|
|
168
|
+
|
|
169
|
+
libname_arg_idx = None
|
|
170
|
+
funcname_arg_idx = None
|
|
171
|
+
# who's calling it?
|
|
172
|
+
caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func.addr)))
|
|
173
|
+
for caller_addr in caller_addrs:
|
|
174
|
+
# what arguments are used to call this function with?
|
|
175
|
+
callsite_nodes = [
|
|
176
|
+
pred
|
|
177
|
+
for pred in cfg.get_predecessors(cfg.get_any_node(func.addr))
|
|
178
|
+
if pred.function_address == caller_addr and pred.instruction_addrs
|
|
179
|
+
]
|
|
180
|
+
observation_points = []
|
|
181
|
+
for callsite_node in callsite_nodes:
|
|
182
|
+
observation_points.append(("insn", callsite_node.instruction_addrs[-1], ObservationPointType.OP_BEFORE))
|
|
183
|
+
rda = self.project.analyses.ReachingDefinitions(
|
|
184
|
+
self.kb.functions[caller_addr],
|
|
185
|
+
observe_all=False,
|
|
186
|
+
observation_points=observation_points,
|
|
187
|
+
)
|
|
188
|
+
for callsite_node in callsite_nodes:
|
|
189
|
+
observ = rda.model.get_observation_by_insn(
|
|
190
|
+
callsite_node.instruction_addrs[-1],
|
|
191
|
+
ObservationPointType.OP_BEFORE,
|
|
192
|
+
)
|
|
193
|
+
args: list[tuple[int, Any]] = []
|
|
194
|
+
for arg_idx, func_arg in enumerate(func.arguments):
|
|
195
|
+
# FIXME: We are ignoring all non-register function arguments until we see a test case where
|
|
196
|
+
# FIXME: stack-passing arguments are used
|
|
197
|
+
if isinstance(func_arg, SimRegArg):
|
|
198
|
+
reg_offset, reg_size = arch.registers[func_arg.reg_name]
|
|
199
|
+
try:
|
|
200
|
+
mv = observ.registers.load(reg_offset, size=reg_size)
|
|
201
|
+
except SimMemoryMissingError:
|
|
202
|
+
args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
|
|
203
|
+
continue
|
|
204
|
+
arg_value = mv.one_value()
|
|
205
|
+
if arg_value is None:
|
|
206
|
+
arg_value = claripy.BVV(0xDEADBEEF, self.project.arch.bits)
|
|
207
|
+
args.append((arg_idx, arg_value))
|
|
208
|
+
|
|
209
|
+
# the args must have at least one concrete address that points to an initialized memory location
|
|
210
|
+
acceptable_args = True
|
|
211
|
+
arg_strs: list[tuple[int, str]] = []
|
|
212
|
+
for idx, arg in args:
|
|
213
|
+
if arg is not None and arg.concrete:
|
|
214
|
+
v = arg.concrete_value
|
|
215
|
+
section = self.project.loader.find_section_containing(v)
|
|
216
|
+
if section is not None:
|
|
217
|
+
# what string is it?
|
|
218
|
+
max_size = min(64, section.max_addr - v)
|
|
219
|
+
try:
|
|
220
|
+
value = self.project.loader.memory.load(v, max_size)
|
|
221
|
+
except KeyError:
|
|
222
|
+
acceptable_args = False
|
|
223
|
+
break
|
|
224
|
+
if b"\x00" in value:
|
|
225
|
+
value = value[: value.index(b"\x00")]
|
|
226
|
+
if not all(ch in valid_apiname_charset for ch in value):
|
|
227
|
+
acceptable_args = False
|
|
228
|
+
break
|
|
229
|
+
arg_strs.append((idx, value.decode("utf-8")))
|
|
230
|
+
if acceptable_args:
|
|
231
|
+
libname_arg_idx, funcname_arg_idx = None, None
|
|
232
|
+
assert len(arg_strs) == 2
|
|
233
|
+
for arg_idx, name in arg_strs:
|
|
234
|
+
if self.is_libname(name):
|
|
235
|
+
libname_arg_idx = arg_idx
|
|
236
|
+
elif self.is_apiname(name):
|
|
237
|
+
funcname_arg_idx = arg_idx
|
|
238
|
+
|
|
239
|
+
if libname_arg_idx is not None and funcname_arg_idx is not None:
|
|
240
|
+
break
|
|
241
|
+
|
|
242
|
+
if libname_arg_idx is not None and funcname_arg_idx is not None:
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
if libname_arg_idx is not None and funcname_arg_idx is not None:
|
|
246
|
+
return True, {"libname_arg_idx": libname_arg_idx, "funcname_arg_idx": funcname_arg_idx}
|
|
247
|
+
return False, None
|
|
248
|
+
|
|
249
|
+
def _analyze_type1(self, func_addr, desc: APIDeobFuncDescriptor) -> dict[int, tuple[str, str]]:
|
|
250
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
251
|
+
|
|
252
|
+
assignments: dict[int, tuple[str, str]] = {}
|
|
253
|
+
|
|
254
|
+
# get all call sites
|
|
255
|
+
caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func_addr)))
|
|
256
|
+
for caller_addr in caller_addrs:
|
|
257
|
+
# decompile the function and get all assignments of the return value of the func at func_addr
|
|
258
|
+
try:
|
|
259
|
+
dec = self.project.analyses.Decompiler(self.kb.functions.get_by_addr(caller_addr), cfg=cfg)
|
|
260
|
+
except Exception: # pylint:disable=broad-exception-caught
|
|
261
|
+
continue
|
|
262
|
+
if dec.codegen is None:
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
finder = Type1AssignmentFinder(func_addr, desc)
|
|
266
|
+
finder.handle(dec.codegen.cfunc)
|
|
267
|
+
|
|
268
|
+
duplicate_addrs = set(assignments.keys()).intersection(set(finder.assignments.keys()))
|
|
269
|
+
if duplicate_addrs:
|
|
270
|
+
# duplicate entries
|
|
271
|
+
_l.warning(
|
|
272
|
+
"Observed duplicate assignments at the following addresses: %s.",
|
|
273
|
+
str(map(hex, sorted(duplicate_addrs))), # pylint:disable=bad-builtin
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
assignments.update(finder.assignments)
|
|
277
|
+
|
|
278
|
+
return assignments
|
|
279
|
+
|
|
280
|
+
@staticmethod
|
|
281
|
+
def _build_caller_subtree(callgraph: networkx.DiGraph, func_addr: int, max_level: int) -> networkx.DiGraph:
|
|
282
|
+
tree = networkx.DiGraph()
|
|
283
|
+
|
|
284
|
+
if func_addr not in callgraph:
|
|
285
|
+
return tree
|
|
286
|
+
|
|
287
|
+
queue = [(0, func_addr)]
|
|
288
|
+
traversed = {func_addr}
|
|
289
|
+
while queue:
|
|
290
|
+
level, addr = queue.pop(0)
|
|
291
|
+
for pred in callgraph.predecessors(addr):
|
|
292
|
+
if pred not in traversed and level + 1 <= max_level:
|
|
293
|
+
traversed.add(pred)
|
|
294
|
+
queue.append((level + 1, pred))
|
|
295
|
+
tree.add_edge(addr, pred)
|
|
296
|
+
|
|
297
|
+
return tree
|
|
298
|
+
|
|
299
|
+
@staticmethod
|
|
300
|
+
def is_libname(name: str) -> bool:
|
|
301
|
+
name = name.lower()
|
|
302
|
+
if name in SIM_LIBRARIES:
|
|
303
|
+
return True
|
|
304
|
+
if "." not in name:
|
|
305
|
+
return name + ".dll" in SIM_LIBRARIES or name + ".exe" in SIM_LIBRARIES
|
|
306
|
+
return False
|
|
307
|
+
|
|
308
|
+
@staticmethod
|
|
309
|
+
def is_apiname(name: str) -> bool:
|
|
310
|
+
return any(not isinstance(lib, SimSyscallLibrary) and lib.has_prototype(name) for lib in SIM_LIBRARIES.values())
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
AnalysesHub.register_default("APIObfuscationFinder", APIObfuscationFinder)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from ailment.expression import Const, Load
|
|
3
|
+
|
|
4
|
+
from angr import SIM_LIBRARIES
|
|
5
|
+
from angr.calling_conventions import default_cc
|
|
6
|
+
from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
|
|
7
|
+
from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class APIObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
|
|
11
|
+
"""
|
|
12
|
+
Integrate type-1 deobfuscated API into decompilation output.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
__slots__ = ()
|
|
16
|
+
|
|
17
|
+
NAME = "Simplify Type 1 API obfuscation references"
|
|
18
|
+
expr_classes = (Load,)
|
|
19
|
+
|
|
20
|
+
def optimize(self, expr: Load, **kwargs):
|
|
21
|
+
if (
|
|
22
|
+
isinstance(expr.addr, Const)
|
|
23
|
+
and (expr.addr.value in self.kb.obfuscations.type1_deobfuscated_apis)
|
|
24
|
+
and expr.bits == self.project.arch.bits
|
|
25
|
+
):
|
|
26
|
+
# this is actually a function calling a known API
|
|
27
|
+
# replace it with the actual API and the actual arguments
|
|
28
|
+
_, funcname = self.kb.obfuscations.type1_deobfuscated_apis[expr.addr.value]
|
|
29
|
+
if funcname not in self.kb.functions:
|
|
30
|
+
# assign a new function on-demand
|
|
31
|
+
symbol = self.project.loader.extern_object.make_extern(funcname)
|
|
32
|
+
hook_addr = self.project.hook_symbol(
|
|
33
|
+
symbol.rebased_addr, SIM_LIBRARIES["linux"].get_stub(funcname, self.project.arch)
|
|
34
|
+
)
|
|
35
|
+
func = self.kb.functions.function(addr=hook_addr, name=funcname, create=True)
|
|
36
|
+
func.is_simprocedure = True
|
|
37
|
+
|
|
38
|
+
default_cc_kwargs = {}
|
|
39
|
+
if self.project.simos is not None:
|
|
40
|
+
default_cc_kwargs["platform"] = self.project.simos.name
|
|
41
|
+
default_cc_cls = default_cc(self.project.arch.name, **default_cc_kwargs)
|
|
42
|
+
if default_cc_cls is not None:
|
|
43
|
+
func.calling_convention = default_cc_cls(self.project.arch)
|
|
44
|
+
func.find_declaration(ignore_binary_name=True)
|
|
45
|
+
else:
|
|
46
|
+
func = self.kb.functions[funcname]
|
|
47
|
+
return Const(expr.idx, None, func.addr, self.project.arch.bits, **expr.tags)
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
EXPR_OPTS.append(APIObfType1PeepholeOptimizer)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# pylint:disable=no-self-use,unused-argument,attribute-defined-outside-init
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pyvex
|
|
5
|
+
|
|
6
|
+
from angr.engines.light import SimEngineLightVEXMixin
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class IRSBRegisterCollector(SimEngineLightVEXMixin):
|
|
10
|
+
"""
|
|
11
|
+
Scan the VEX IRSB to collect all registers that are read.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, block, *args, **kwargs):
|
|
15
|
+
super().__init__(*args, **kwargs)
|
|
16
|
+
|
|
17
|
+
self.block = block
|
|
18
|
+
self.reg_reads: set[tuple[int, int]] = set()
|
|
19
|
+
|
|
20
|
+
def process(self):
|
|
21
|
+
self.tmps = {}
|
|
22
|
+
self.tyenv = self.block.vex.tyenv
|
|
23
|
+
|
|
24
|
+
self._process_Stmt()
|
|
25
|
+
|
|
26
|
+
self.stmt_idx = None
|
|
27
|
+
self.ins_addr = None
|
|
28
|
+
|
|
29
|
+
def _handle_Put(self, stmt):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def _handle_Load(self, expr):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def _handle_Store(self, stmt):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def _handle_LoadG(self, stmt):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def _handle_LLSC(self, stmt: pyvex.IRStmt.LLSC):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def _handle_StoreG(self, stmt):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
def _handle_Get(self, expr: pyvex.IRExpr.Get):
|
|
48
|
+
self.reg_reads.add((expr.offset, expr.result_size(self.tyenv)))
|
|
49
|
+
|
|
50
|
+
def _handle_RdTmp(self, expr):
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def _handle_Conversion(self, expr: pyvex.IRExpr.Unop):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def _handle_16HLto32(self, expr):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def _handle_Cmp_v(self, expr, _vector_size, _vector_count):
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
_handle_CmpEQ_v = _handle_Cmp_v
|
|
63
|
+
_handle_CmpNE_v = _handle_Cmp_v
|
|
64
|
+
_handle_CmpLE_v = _handle_Cmp_v
|
|
65
|
+
_handle_CmpLT_v = _handle_Cmp_v
|
|
66
|
+
_handle_CmpGE_v = _handle_Cmp_v
|
|
67
|
+
_handle_CmpGT_v = _handle_Cmp_v
|
|
68
|
+
|
|
69
|
+
def _handle_ExpCmpNE64(self, expr):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
def _handle_CCall(self, expr):
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
def _handle_function(self, func_addr):
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
def _handle_Unop(self, expr):
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
def _handle_Binop(self, expr: pyvex.IRExpr.Binop):
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
def _handle_Triop(self, expr: pyvex.IRExpr.Triop):
|
|
85
|
+
pass
|