angr 9.2.125__py3-none-macosx_11_0_arm64.whl → 9.2.127__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/__init__.py +4 -0
- angr/analyses/analysis.py +8 -2
- angr/analyses/cfg/cfg_fast.py +12 -1
- angr/analyses/decompiler/ail_simplifier.py +1 -0
- angr/analyses/decompiler/callsite_maker.py +9 -1
- angr/analyses/decompiler/clinic.py +2 -1
- angr/analyses/decompiler/condition_processor.py +109 -73
- angr/analyses/decompiler/decompilation_cache.py +4 -0
- angr/analyses/decompiler/decompiler.py +21 -3
- angr/analyses/decompiler/dephication/graph_vvar_mapping.py +1 -2
- angr/analyses/decompiler/optimization_passes/__init__.py +15 -1
- angr/analyses/decompiler/return_maker.py +1 -0
- angr/analyses/decompiler/ssailification/rewriting.py +4 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
- angr/analyses/decompiler/ssailification/traversal.py +1 -0
- angr/analyses/decompiler/ssailification/traversal_engine.py +15 -0
- angr/analyses/decompiler/structured_codegen/c.py +18 -5
- angr/analyses/decompiler/structured_codegen/dwarf_import.py +4 -1
- angr/analyses/deobfuscator/__init__.py +18 -0
- angr/analyses/deobfuscator/api_obf_finder.py +313 -0
- angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
- angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
- angr/analyses/deobfuscator/string_obf_finder.py +774 -0
- angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
- angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
- angr/analyses/reaching_definitions/function_handler_library/string.py +2 -2
- angr/analyses/s_liveness.py +3 -3
- angr/analyses/s_propagator.py +74 -3
- angr/analyses/unpacker/__init__.py +6 -0
- angr/analyses/unpacker/obfuscation_detector.py +103 -0
- angr/analyses/unpacker/packing_detector.py +138 -0
- angr/angrdb/models.py +2 -1
- angr/angrdb/serializers/kb.py +3 -3
- angr/angrdb/serializers/structured_code.py +5 -3
- angr/calling_conventions.py +4 -2
- angr/engines/vex/claripy/irop.py +10 -5
- angr/knowledge_base.py +1 -1
- angr/knowledge_plugins/__init__.py +2 -2
- angr/knowledge_plugins/obfuscations.py +36 -0
- angr/knowledge_plugins/structured_code.py +1 -1
- angr/lib/angr_native.dylib +0 -0
- angr/utils/ssa/__init__.py +8 -3
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/METADATA +6 -6
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/RECORD +50 -40
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/WHEEL +1 -1
- angr/knowledge_plugins/decompilation.py +0 -45
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/LICENSE +0 -0
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/entry_points.txt +0 -0
- {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# pylint:disable=too-many-boolean-expressions
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import archinfo
|
|
5
|
+
|
|
6
|
+
from ailment import Block
|
|
7
|
+
from ailment.statement import Statement, Call, Assignment
|
|
8
|
+
from ailment.expression import Const, Register, VirtualVariable
|
|
9
|
+
|
|
10
|
+
from angr.analyses.decompiler.optimization_passes.optimization_pass import OptimizationPass, OptimizationPassStage
|
|
11
|
+
from angr.analyses.decompiler.optimization_passes import register_optimization_pass
|
|
12
|
+
|
|
13
|
+
WIN64_REG_ARGS = {
|
|
14
|
+
archinfo.ArchAMD64().registers["rcx"][0],
|
|
15
|
+
archinfo.ArchAMD64().registers["rdx"][0],
|
|
16
|
+
archinfo.ArchAMD64().registers["r8"][0],
|
|
17
|
+
archinfo.ArchAMD64().registers["r9"][0],
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class StringObfType3Rewriter(OptimizationPass):
|
|
22
|
+
"""
|
|
23
|
+
Type-3 optimization pass replaces deobfuscate_string calls with the deobfuscated strings, and then removes
|
|
24
|
+
arguments on the stack.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
ARCHES = ["X86", "AMD64"]
|
|
28
|
+
PLATFORMS = ["windows"]
|
|
29
|
+
STAGE = OptimizationPassStage.AFTER_MAKING_CALLSITES
|
|
30
|
+
|
|
31
|
+
NAME = "Simplify Type 3 string deobfuscation calls"
|
|
32
|
+
DESCRIPTION = "Simplify Type 3 string deobfuscation calls"
|
|
33
|
+
stmt_classes = ()
|
|
34
|
+
|
|
35
|
+
def __init__(self, func, **kwargs):
|
|
36
|
+
super().__init__(func, **kwargs)
|
|
37
|
+
|
|
38
|
+
self.analyze()
|
|
39
|
+
|
|
40
|
+
def _check(self):
|
|
41
|
+
if self.kb.obfuscations.type3_deobfuscated_strings:
|
|
42
|
+
return True, None
|
|
43
|
+
return False, None
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def is_call_or_call_assignment(stmt) -> bool:
|
|
47
|
+
return isinstance(stmt, Call) or isinstance(stmt, Assignment) and isinstance(stmt.src, Call)
|
|
48
|
+
|
|
49
|
+
def _analyze(self, cache=None):
|
|
50
|
+
|
|
51
|
+
# find all blocks with type-3 deobfuscation calls
|
|
52
|
+
for block in list(self._graph):
|
|
53
|
+
if not block.statements:
|
|
54
|
+
continue
|
|
55
|
+
last_stmt = block.statements[-1]
|
|
56
|
+
if (
|
|
57
|
+
self.is_call_or_call_assignment(last_stmt)
|
|
58
|
+
and last_stmt.ins_addr in self.kb.obfuscations.type3_deobfuscated_strings
|
|
59
|
+
):
|
|
60
|
+
new_block = self._process_block(
|
|
61
|
+
block, self.kb.obfuscations.type3_deobfuscated_strings[block.statements[-1].ins_addr]
|
|
62
|
+
)
|
|
63
|
+
if new_block is not None:
|
|
64
|
+
self._update_block(block, new_block)
|
|
65
|
+
|
|
66
|
+
def _process_block(self, block: Block, deobf_content: bytes):
|
|
67
|
+
# FIXME: This rewriter is very specific to the implementation of the deobfuscation scheme. we can make it more
|
|
68
|
+
# generic when there are more cases available in the wild.
|
|
69
|
+
|
|
70
|
+
# TODO: Support multiple blocks
|
|
71
|
+
|
|
72
|
+
# replace the call
|
|
73
|
+
old_stmt: Statement = block.statements[-1]
|
|
74
|
+
str_id = self.kb.custom_strings.allocate(deobf_content)
|
|
75
|
+
old_call: Call = old_stmt.src if isinstance(old_stmt, Assignment) else old_stmt
|
|
76
|
+
new_call = Call(
|
|
77
|
+
old_call.idx,
|
|
78
|
+
"init_str",
|
|
79
|
+
args=[
|
|
80
|
+
old_call.args[0],
|
|
81
|
+
Const(None, None, str_id, self.project.arch.bits, custom_string=True),
|
|
82
|
+
Const(None, None, len(deobf_content), self.project.arch.bits),
|
|
83
|
+
],
|
|
84
|
+
ret_expr=old_call.ret_expr,
|
|
85
|
+
bits=old_call.bits,
|
|
86
|
+
**old_call.tags,
|
|
87
|
+
)
|
|
88
|
+
if isinstance(old_stmt, Assignment):
|
|
89
|
+
new_stmt = Assignment(old_stmt.idx, old_stmt.dst, new_call, **old_stmt.tags)
|
|
90
|
+
else:
|
|
91
|
+
new_stmt = new_call
|
|
92
|
+
|
|
93
|
+
statements = block.statements[:-1] + [new_stmt]
|
|
94
|
+
|
|
95
|
+
# remove N-2 continuous stack assignment
|
|
96
|
+
if len(deobf_content) > 2:
|
|
97
|
+
stack_offset_to_stmtid: dict[int, int] = {}
|
|
98
|
+
for idx, stmt in enumerate(statements):
|
|
99
|
+
if (
|
|
100
|
+
isinstance(stmt, Assignment)
|
|
101
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
102
|
+
and stmt.dst.was_stack
|
|
103
|
+
and isinstance(stmt.dst.stack_offset, int)
|
|
104
|
+
and isinstance(stmt.src, Const)
|
|
105
|
+
and stmt.src.value <= 0xFF
|
|
106
|
+
):
|
|
107
|
+
stack_offset_to_stmtid[stmt.dst.stack_offset] = idx
|
|
108
|
+
sorted_offsets = sorted(stack_offset_to_stmtid)
|
|
109
|
+
if sorted_offsets:
|
|
110
|
+
spacing = 8 # FIXME: Make it adjustable
|
|
111
|
+
distance = min(len(deobf_content) - 2, len(sorted_offsets) - 1)
|
|
112
|
+
for start_idx in range(len(sorted_offsets) - distance):
|
|
113
|
+
if sorted_offsets[start_idx] + spacing * distance == sorted_offsets[start_idx + distance]:
|
|
114
|
+
# found them
|
|
115
|
+
# remove these statements
|
|
116
|
+
for i in range(start_idx, start_idx + distance + 1):
|
|
117
|
+
statements[stack_offset_to_stmtid[sorted_offsets[i]]] = None
|
|
118
|
+
break
|
|
119
|
+
statements = [stmt for stmt in statements if stmt is not None]
|
|
120
|
+
|
|
121
|
+
# remove writes to rdx, rcx, r8, and r9
|
|
122
|
+
if self.project.arch.name == "AMD64":
|
|
123
|
+
statements = [stmt for stmt in statements if not self._stmt_sets_win64_reg_arg(stmt)]
|
|
124
|
+
|
|
125
|
+
# return the new block
|
|
126
|
+
return block.copy(statements=statements)
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def _stmt_sets_win64_reg_arg(stmt) -> bool:
|
|
130
|
+
return isinstance(stmt, Assignment) and isinstance(stmt.dst, Register) and stmt.dst.reg_offset in WIN64_REG_ARGS
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
register_optimization_pass(StringObfType3Rewriter, presets=["fast", "full"])
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from ailment.statement import Call
|
|
3
|
+
from ailment.expression import Const
|
|
4
|
+
import claripy
|
|
5
|
+
|
|
6
|
+
from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
|
|
7
|
+
from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
|
|
8
|
+
from angr.errors import AngrCallableMultistateError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class StringObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
|
|
12
|
+
"""
|
|
13
|
+
Integrate type-1 deobfuscated strings into decompilation output.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__slots__ = ()
|
|
17
|
+
|
|
18
|
+
NAME = "Simplify Type 1/2 string deobfuscation references"
|
|
19
|
+
expr_classes = (Call,)
|
|
20
|
+
|
|
21
|
+
def optimize(self, expr: Call, **kwargs):
|
|
22
|
+
if isinstance(expr.target, Const) and ( # noqa: SIM102
|
|
23
|
+
expr.target.value in self.kb.obfuscations.type1_string_loader_candidates
|
|
24
|
+
or expr.target.value in self.kb.obfuscations.type2_string_loader_candidates
|
|
25
|
+
):
|
|
26
|
+
# this is a function calling a type1 or a type2 string loader
|
|
27
|
+
# optimize this call away if possible
|
|
28
|
+
if expr.args and all(isinstance(arg, Const) for arg in expr.args):
|
|
29
|
+
# execute the function with the given argument
|
|
30
|
+
func = self.kb.functions[expr.target.value]
|
|
31
|
+
func_call = self.project.factory.callable(
|
|
32
|
+
expr.target.value, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
|
|
33
|
+
)
|
|
34
|
+
try:
|
|
35
|
+
out = func_call(*[claripy.BVV(arg.value, arg.bits) for arg in expr.args])
|
|
36
|
+
except AngrCallableMultistateError:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
if out.concrete:
|
|
40
|
+
return Const(
|
|
41
|
+
None, None, out.concrete_value, self.project.arch.bits, **expr.tags
|
|
42
|
+
) # FIXME: use out.bits when the function prototype recovery is more reliable
|
|
43
|
+
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
EXPR_OPTS.append(StringObfType1PeepholeOptimizer)
|
|
@@ -197,6 +197,11 @@ def handle_printf(
|
|
|
197
197
|
buf_data = state.get_values(buf_atoms)
|
|
198
198
|
if buf_data is not None:
|
|
199
199
|
buf_data = buf_data.extract(0, len(buf_data) // 8 - 1, archinfo.Endness.BE)
|
|
200
|
+
else:
|
|
201
|
+
top_val = state.top(state.arch.bits)
|
|
202
|
+
for defn in state.get_definitions(atom):
|
|
203
|
+
top_val = state.annotate_with_def(top_val, defn)
|
|
204
|
+
buf_data = MultiValues(top_val)
|
|
200
205
|
elif fmt == "%u":
|
|
201
206
|
buf_atoms = atom
|
|
202
207
|
buf_data = state.get_concrete_value(buf_atoms)
|
|
@@ -217,7 +222,9 @@ def handle_printf(
|
|
|
217
222
|
else:
|
|
218
223
|
_l.warning("Unimplemented printf format string %s", fmt)
|
|
219
224
|
buf_atoms = set()
|
|
220
|
-
|
|
225
|
+
top_val = state.top(state.arch.bits)
|
|
226
|
+
buf_data = MultiValues(top_val)
|
|
227
|
+
|
|
221
228
|
if result is not None and buf_data is not None:
|
|
222
229
|
result = result.concat(buf_data)
|
|
223
230
|
source_atoms.update(buf_atoms)
|
|
@@ -46,8 +46,8 @@ class LibcStringHandlers(FunctionHandler):
|
|
|
46
46
|
|
|
47
47
|
@FunctionCallDataUnwrapped.decorate
|
|
48
48
|
def handle_impl_strncpy(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
|
|
49
|
-
n = state.get_concrete_value(data.args_atoms[
|
|
50
|
-
src_atom = state.deref(data.args_atoms[
|
|
49
|
+
n = state.get_concrete_value(data.args_atoms[2])
|
|
50
|
+
src_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE if n is None else n)
|
|
51
51
|
src_str = state.get_values(src_atom)
|
|
52
52
|
if src_str is not None:
|
|
53
53
|
dst_atom = state.deref(data.args_atoms[0], len(src_str) // 8)
|
angr/analyses/s_liveness.py
CHANGED
|
@@ -5,7 +5,7 @@ from ailment.expression import VirtualVariable
|
|
|
5
5
|
from ailment.statement import Assignment
|
|
6
6
|
|
|
7
7
|
from angr.analyses import Analysis, register_analysis
|
|
8
|
-
from angr.utils.ssa import
|
|
8
|
+
from angr.utils.ssa import VVarUsesCollector, phi_assignment_get_src
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class SLivenessModel:
|
|
@@ -85,8 +85,8 @@ class SLivenessAnalysis(Analysis):
|
|
|
85
85
|
if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
|
|
86
86
|
live.discard(stmt.dst.varid)
|
|
87
87
|
|
|
88
|
-
|
|
89
|
-
if
|
|
88
|
+
phi_expr = phi_assignment_get_src(stmt)
|
|
89
|
+
if phi_expr is not None:
|
|
90
90
|
for src, vvar in phi_expr.src_and_vvars:
|
|
91
91
|
if src not in live_in_by_pred:
|
|
92
92
|
live_in_by_pred[src] = live.copy()
|
angr/analyses/s_propagator.py
CHANGED
|
@@ -4,7 +4,7 @@ import contextlib
|
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
|
|
6
6
|
from ailment.block import Block
|
|
7
|
-
from ailment.expression import Const, VirtualVariable, VirtualVariableCategory, StackBaseOffset
|
|
7
|
+
from ailment.expression import Const, VirtualVariable, VirtualVariableCategory, StackBaseOffset, Load, Convert
|
|
8
8
|
from ailment.statement import Assignment, Store, Return, Jump
|
|
9
9
|
|
|
10
10
|
from angr.knowledge_plugins.functions import Function
|
|
@@ -21,6 +21,7 @@ from angr.utils.ssa import (
|
|
|
21
21
|
is_const_vvar_tmp_assignment,
|
|
22
22
|
get_tmp_uselocs,
|
|
23
23
|
get_tmp_deflocs,
|
|
24
|
+
phi_assignment_get_src,
|
|
24
25
|
)
|
|
25
26
|
|
|
26
27
|
|
|
@@ -129,8 +130,8 @@ class SPropagatorAnalysis(Analysis):
|
|
|
129
130
|
replacements[useloc][vvar_at_use] = v
|
|
130
131
|
continue
|
|
131
132
|
|
|
132
|
-
|
|
133
|
-
if
|
|
133
|
+
v = phi_assignment_get_src(stmt)
|
|
134
|
+
if v is not None:
|
|
134
135
|
src_varids = {vvar.varid if vvar is not None else None for _, vvar in v.src_and_vvars}
|
|
135
136
|
if None not in src_varids and all(varid in const_vvars for varid in src_varids):
|
|
136
137
|
src_values = {
|
|
@@ -182,6 +183,31 @@ class SPropagatorAnalysis(Analysis):
|
|
|
182
183
|
# this vvar is used once if we exclude its uses at ret sites or jump sites. we can propagate it
|
|
183
184
|
for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
|
|
184
185
|
replacements[vvar_useloc][vvar_used] = stmt.src
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# special logic for global variables: if it's used once or multiple times, and the variable is never
|
|
189
|
+
# updated before it's used, we will propagate the load
|
|
190
|
+
if isinstance(stmt, Assignment):
|
|
191
|
+
stmt_src = stmt.src
|
|
192
|
+
# unpack conversions
|
|
193
|
+
while isinstance(stmt_src, Convert):
|
|
194
|
+
stmt_src = stmt_src.operand
|
|
195
|
+
if isinstance(stmt_src, Load) and isinstance(stmt_src.addr, Const):
|
|
196
|
+
gv_updated = False
|
|
197
|
+
for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
|
|
198
|
+
gv_updated |= self.is_global_variable_updated(
|
|
199
|
+
self.func_graph,
|
|
200
|
+
blocks,
|
|
201
|
+
vvar.varid,
|
|
202
|
+
stmt_src.addr.value,
|
|
203
|
+
stmt_src.size,
|
|
204
|
+
defloc,
|
|
205
|
+
vvar_useloc,
|
|
206
|
+
)
|
|
207
|
+
if not gv_updated:
|
|
208
|
+
for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
|
|
209
|
+
replacements[vvar_useloc][vvar_used] = stmt.src
|
|
210
|
+
continue
|
|
185
211
|
|
|
186
212
|
for vvar_id, uselocs in vvar_uselocs.items():
|
|
187
213
|
vvar = next(iter(uselocs))[0] if vvar_id not in vvarid_to_vvar else vvarid_to_vvar[vvar_id]
|
|
@@ -257,5 +283,50 @@ class SPropagatorAnalysis(Analysis):
|
|
|
257
283
|
|
|
258
284
|
self.model.replacements = replacements
|
|
259
285
|
|
|
286
|
+
@staticmethod
|
|
287
|
+
def is_global_variable_updated(
|
|
288
|
+
func_graph, block_dict, varid: int, gv_addr: int, gv_size: int, defloc: CodeLocation, useloc: CodeLocation
|
|
289
|
+
) -> bool:
|
|
290
|
+
defblock = block_dict[(defloc.block_addr, defloc.block_idx)]
|
|
291
|
+
useblock = block_dict[(useloc.block_addr, useloc.block_idx)]
|
|
292
|
+
|
|
293
|
+
# traverse a graph slice from the def block to the use block and check if the global variable is updated
|
|
294
|
+
seen = {defblock}
|
|
295
|
+
queue = [defblock]
|
|
296
|
+
while queue:
|
|
297
|
+
block = queue.pop(0)
|
|
298
|
+
|
|
299
|
+
start_stmt_idx = defloc.stmt_idx if block is defblock else 0 # inclusive
|
|
300
|
+
end_stmt_idx = useloc.stmt_idx if block is useblock else len(block.statements) # exclusive
|
|
301
|
+
|
|
302
|
+
for idx in range(start_stmt_idx, end_stmt_idx):
|
|
303
|
+
stmt = block.statements[idx]
|
|
304
|
+
if isinstance(stmt, Store) and isinstance(stmt.addr, Const):
|
|
305
|
+
store_addr = stmt.addr.value
|
|
306
|
+
store_size = stmt.size
|
|
307
|
+
if gv_addr <= store_addr < gv_addr + gv_size or store_addr <= gv_addr < store_addr + store_size:
|
|
308
|
+
return True
|
|
309
|
+
|
|
310
|
+
if block is useblock:
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
for succ in func_graph.successors(block):
|
|
314
|
+
if succ not in seen:
|
|
315
|
+
abort_path = False
|
|
316
|
+
for stmt in succ.statements:
|
|
317
|
+
if is_phi_assignment(stmt) and any(
|
|
318
|
+
vvar.varid == varid for _, vvar in stmt.src.src_and_vvars if vvar is not None
|
|
319
|
+
):
|
|
320
|
+
# the virtual variable is no longer live after this point
|
|
321
|
+
abort_path = True
|
|
322
|
+
break
|
|
323
|
+
if abort_path:
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
seen.add(succ)
|
|
327
|
+
queue.append(succ)
|
|
328
|
+
|
|
329
|
+
return False
|
|
330
|
+
|
|
260
331
|
|
|
261
332
|
register_analysis(SPropagatorAnalysis, "SPropagator")
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import networkx
|
|
5
|
+
|
|
6
|
+
from angr.analyses.analysis import Analysis, AnalysesHub
|
|
7
|
+
from angr.knowledge_plugins.cfg import CFGModel
|
|
8
|
+
|
|
9
|
+
_l = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ObfuscationDetector(Analysis):
|
|
13
|
+
"""
|
|
14
|
+
This analysis detects, usually in ways that are more robust than section name matching or signature matching, the
|
|
15
|
+
existence of obfuscation techniques in a binary.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, cfg: CFGModel | None = None):
|
|
19
|
+
self.obfuscated: bool = False
|
|
20
|
+
self.possible_obfuscators: list[str] = []
|
|
21
|
+
|
|
22
|
+
if cfg is None:
|
|
23
|
+
_l.warning(
|
|
24
|
+
"PackingDetector is using a most accurate CFG model in the knowledge base. We assume it is "
|
|
25
|
+
"generated with force_smart_scan=False and force_complete_scan=False."
|
|
26
|
+
)
|
|
27
|
+
self._cfg = self.kb.cfgs.get_most_accurate()
|
|
28
|
+
else:
|
|
29
|
+
self._cfg = cfg
|
|
30
|
+
|
|
31
|
+
self.analyze()
|
|
32
|
+
|
|
33
|
+
def analyze(self):
|
|
34
|
+
|
|
35
|
+
analysis_routines = [
|
|
36
|
+
self._analyze_vmprotect,
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
for routine in analysis_routines:
|
|
40
|
+
tool = routine()
|
|
41
|
+
if tool:
|
|
42
|
+
self.obfuscated = True
|
|
43
|
+
self.possible_obfuscators.append(tool)
|
|
44
|
+
|
|
45
|
+
def _analyze_vmprotect(self) -> str | None:
|
|
46
|
+
"""
|
|
47
|
+
We detect VMProtect v3 (with control-flow obfuscation) based on two main characteristics:
|
|
48
|
+
|
|
49
|
+
- In amd64 binaries, there exists a strongly connected component in the call graph with over 1,000 nodes.
|
|
50
|
+
Edge/node ratio is >= 1.3
|
|
51
|
+
- There is a high number of pushf and popf instructions in the visible functions.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
high_scc_node_edge_ratio = False
|
|
55
|
+
high_pushf = False
|
|
56
|
+
high_popf = False
|
|
57
|
+
high_clc = False # pylint:disable=unused-variable
|
|
58
|
+
|
|
59
|
+
if self.project.arch.name == "AMD64":
|
|
60
|
+
cg = self.kb.functions.callgraph
|
|
61
|
+
sccs = networkx.strongly_connected_components(cg)
|
|
62
|
+
|
|
63
|
+
for scc in sccs:
|
|
64
|
+
subgraph = networkx.subgraph(cg, scc)
|
|
65
|
+
node_count = len(scc)
|
|
66
|
+
if node_count > 1000:
|
|
67
|
+
edge_count = len(subgraph.edges)
|
|
68
|
+
|
|
69
|
+
if edge_count / node_count >= 1.3:
|
|
70
|
+
high_scc_node_edge_ratio = True
|
|
71
|
+
break
|
|
72
|
+
else:
|
|
73
|
+
high_scc_node_edge_ratio = True
|
|
74
|
+
|
|
75
|
+
pushf_ctr = 0
|
|
76
|
+
popf_ctr = 0
|
|
77
|
+
clc_ctr = 0 # only used for x86
|
|
78
|
+
is_x86 = self.project.arch.name == "X86"
|
|
79
|
+
cfg_node_count = len(self._cfg.graph)
|
|
80
|
+
for node in self._cfg.nodes():
|
|
81
|
+
if node.size > 0 and node.instruction_addrs:
|
|
82
|
+
block = node.block
|
|
83
|
+
for insn in block.capstone.insns:
|
|
84
|
+
if insn.mnemonic in {"pushf", "pushfd", "pushfq"}:
|
|
85
|
+
pushf_ctr += 1
|
|
86
|
+
elif insn.mnemonic in {"popf", "popfd", "popfq"}:
|
|
87
|
+
popf_ctr += 1
|
|
88
|
+
elif is_x86 and insn.mnemonic == "clc":
|
|
89
|
+
clc_ctr += 1
|
|
90
|
+
|
|
91
|
+
if pushf_ctr > cfg_node_count * 0.002:
|
|
92
|
+
high_pushf = True
|
|
93
|
+
if popf_ctr > cfg_node_count * 0.002:
|
|
94
|
+
high_popf = True
|
|
95
|
+
if not is_x86 or clc_ctr > cfg_node_count * 0.002:
|
|
96
|
+
high_clc = True # noqa: F841
|
|
97
|
+
|
|
98
|
+
if high_scc_node_edge_ratio and high_pushf and high_popf:
|
|
99
|
+
return "vmprotect"
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
AnalysesHub.register_default("ObfuscationDetector", ObfuscationDetector)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
import math
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from angr.analyses.analysis import Analysis, AnalysesHub
|
|
7
|
+
from angr.knowledge_plugins.cfg import CFGModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from cle import Section
|
|
12
|
+
|
|
13
|
+
_l = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PackingDetector(Analysis):
|
|
17
|
+
"""
|
|
18
|
+
This analysis detects if a binary is likely packed or not. We may extend it to identify which packer is in use in
|
|
19
|
+
the future.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
PACKED_MIN_BYTES = 256
|
|
23
|
+
PACKED_ENTROPY_MIN_THRESHOLD = 0.88
|
|
24
|
+
|
|
25
|
+
def __init__(self, cfg: CFGModel | None = None, region_size_threshold: int = 0x20):
|
|
26
|
+
self.packed: bool = False
|
|
27
|
+
self.region_size_threshold: int = region_size_threshold
|
|
28
|
+
|
|
29
|
+
if cfg is None:
|
|
30
|
+
_l.warning(
|
|
31
|
+
"PackingDetector is using a most accurate CFG model in the knowledge base. We assume it is "
|
|
32
|
+
"generated with force_smart_scan=False and force_complete_scan=False."
|
|
33
|
+
)
|
|
34
|
+
self._cfg = self.kb.cfgs.get_most_accurate()
|
|
35
|
+
else:
|
|
36
|
+
self._cfg = cfg
|
|
37
|
+
|
|
38
|
+
self.analyze()
|
|
39
|
+
|
|
40
|
+
def analyze(self):
|
|
41
|
+
# assume we already have a CFG with complete scanning disabled
|
|
42
|
+
# collect all regions that are not covered by the CFG in r+x sections, and then compute the entropy. we believe
|
|
43
|
+
# the binary is packed if it is beyond a threshold
|
|
44
|
+
|
|
45
|
+
covered_regions: list[tuple[int, int]] = []
|
|
46
|
+
last_known_section: Section | None = None
|
|
47
|
+
for node in sorted(self._cfg.nodes(), key=lambda n: n.addr):
|
|
48
|
+
section = None
|
|
49
|
+
if last_known_section is not None and last_known_section.contains_addr(node.addr):
|
|
50
|
+
section = last_known_section
|
|
51
|
+
if section is None:
|
|
52
|
+
section = self.project.loader.find_section_containing(node.addr)
|
|
53
|
+
if section is None:
|
|
54
|
+
# this node does not belong to any known section - ignore it
|
|
55
|
+
continue
|
|
56
|
+
if section.is_readable and section.is_executable:
|
|
57
|
+
last_known_section = section
|
|
58
|
+
|
|
59
|
+
if section is None:
|
|
60
|
+
# the node does not belong to any section. ignore it
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
if node.size == 0:
|
|
64
|
+
# ignore empty nodes
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
if not covered_regions:
|
|
68
|
+
covered_regions.append((node.addr, node.addr + node.size))
|
|
69
|
+
else:
|
|
70
|
+
last_item = covered_regions[-1]
|
|
71
|
+
if last_item[0] <= node.addr <= last_item[1] < node.addr + node.size:
|
|
72
|
+
# update the last item
|
|
73
|
+
covered_regions[-1] = last_item[0], node.addr + node.size
|
|
74
|
+
else:
|
|
75
|
+
# add a new item
|
|
76
|
+
covered_regions.append((node.addr, node.addr + node.size))
|
|
77
|
+
|
|
78
|
+
# now we get the uncovered regions
|
|
79
|
+
uncovered_regions: list[tuple[int, int]] = self._get_uncovered_regions(covered_regions)
|
|
80
|
+
|
|
81
|
+
# compute entropy
|
|
82
|
+
total_bytes, entropy = self._compute_entropy(uncovered_regions)
|
|
83
|
+
|
|
84
|
+
self.packed = total_bytes >= self.PACKED_MIN_BYTES and entropy >= self.PACKED_ENTROPY_MIN_THRESHOLD
|
|
85
|
+
|
|
86
|
+
def _get_uncovered_regions(self, covered_regions: list[tuple[int, int]]) -> list[tuple[int, int]]:
|
|
87
|
+
# FIXME: We only support binaries with sections. Add support for segments in the future
|
|
88
|
+
all_executable_sections = [
|
|
89
|
+
sec
|
|
90
|
+
for sec in self.project.loader.main_object.sections
|
|
91
|
+
if sec.is_executable and sec.is_readable and not sec.only_contains_uninitialized_data
|
|
92
|
+
]
|
|
93
|
+
all_executable_sections = sorted(all_executable_sections, key=lambda sec: sec.vaddr)
|
|
94
|
+
idx = 0
|
|
95
|
+
|
|
96
|
+
uncovered_regions: list[tuple[int, int]] = []
|
|
97
|
+
for section in all_executable_sections:
|
|
98
|
+
if idx >= len(covered_regions):
|
|
99
|
+
if section.memsize > self.region_size_threshold:
|
|
100
|
+
uncovered_regions.append((section.vaddr, section.vaddr + section.memsize))
|
|
101
|
+
else:
|
|
102
|
+
i = idx
|
|
103
|
+
last_end = section.vaddr
|
|
104
|
+
while i < len(covered_regions):
|
|
105
|
+
region_start, region_end = covered_regions[i]
|
|
106
|
+
if region_end >= section.vaddr + section.memsize:
|
|
107
|
+
# move on to the next section
|
|
108
|
+
break
|
|
109
|
+
if last_end < region_start and region_start - last_end > self.region_size_threshold:
|
|
110
|
+
uncovered_regions.append((last_end, region_start))
|
|
111
|
+
i += 1
|
|
112
|
+
last_end = max(last_end, region_end)
|
|
113
|
+
idx = i
|
|
114
|
+
|
|
115
|
+
return uncovered_regions
|
|
116
|
+
|
|
117
|
+
def _compute_entropy(self, regions: list[tuple[int, int]]) -> tuple[int, float]:
|
|
118
|
+
byte_counts = [0] * 256
|
|
119
|
+
|
|
120
|
+
for start, end in regions:
|
|
121
|
+
for b in self.project.loader.memory.load(start, end - start):
|
|
122
|
+
byte_counts[b] += 1
|
|
123
|
+
|
|
124
|
+
total = sum(byte_counts)
|
|
125
|
+
if total == 0:
|
|
126
|
+
return 0, 0.0
|
|
127
|
+
|
|
128
|
+
entropy = 0.0
|
|
129
|
+
for count in byte_counts:
|
|
130
|
+
if count == 0:
|
|
131
|
+
continue
|
|
132
|
+
p = 1.0 * count / total
|
|
133
|
+
entropy -= p * math.log(p, 256)
|
|
134
|
+
|
|
135
|
+
return total, entropy
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
AnalysesHub.register_default("PackingDetector", PackingDetector)
|
angr/angrdb/models.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
from sqlalchemy import Column, Integer, String, Boolean, BLOB, ForeignKey
|
|
2
|
+
from sqlalchemy import Column, Integer, String, Boolean, BLOB, TEXT, ForeignKey
|
|
3
3
|
from sqlalchemy.orm import declarative_base, relationship
|
|
4
4
|
|
|
5
5
|
Base = declarative_base()
|
|
@@ -127,6 +127,7 @@ class DbStructuredCode(Base):
|
|
|
127
127
|
configuration = Column(BLOB, nullable=True)
|
|
128
128
|
const_formats = Column(BLOB, nullable=True)
|
|
129
129
|
ite_exprs = Column(BLOB, nullable=True)
|
|
130
|
+
errors = Column(TEXT, nullable=True)
|
|
130
131
|
|
|
131
132
|
|
|
132
133
|
class DbXRefs(Base):
|
angr/angrdb/serializers/kb.py
CHANGED
|
@@ -16,7 +16,7 @@ class KnowledgeBaseSerializer:
|
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
@staticmethod
|
|
19
|
-
def dump(session, kb):
|
|
19
|
+
def dump(session, kb: KnowledgeBase):
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
:param session: The database session object.
|
|
@@ -40,7 +40,7 @@ class KnowledgeBaseSerializer:
|
|
|
40
40
|
CommentsSerializer.dump(session, db_kb, kb.comments)
|
|
41
41
|
LabelsSerializer.dump(session, db_kb, kb.labels)
|
|
42
42
|
VariableManagerSerializer.dump(session, db_kb, kb.variables)
|
|
43
|
-
StructuredCodeManagerSerializer.dump(session, db_kb, kb.
|
|
43
|
+
StructuredCodeManagerSerializer.dump(session, db_kb, kb.decompilations)
|
|
44
44
|
|
|
45
45
|
@staticmethod
|
|
46
46
|
def load(session, project, name):
|
|
@@ -89,7 +89,7 @@ class KnowledgeBaseSerializer:
|
|
|
89
89
|
# Load structured code
|
|
90
90
|
structured_code = StructuredCodeManagerSerializer.load(session, db_kb, kb)
|
|
91
91
|
if structured_code is not None:
|
|
92
|
-
kb.
|
|
92
|
+
kb.decompilations = structured_code
|
|
93
93
|
|
|
94
94
|
if cfg_model is not None:
|
|
95
95
|
# CFG may not exist for all knowledge bases
|
|
@@ -37,15 +37,15 @@ class StructuredCodeManagerSerializer:
|
|
|
37
37
|
# TODO: Cache types
|
|
38
38
|
|
|
39
39
|
expr_comments = None
|
|
40
|
-
if cache.codegen.expr_comments:
|
|
40
|
+
if cache.codegen is not None and cache.codegen.expr_comments:
|
|
41
41
|
expr_comments = json.dumps(cache.codegen.expr_comments).encode("utf-8")
|
|
42
42
|
|
|
43
43
|
stmt_comments = None
|
|
44
|
-
if cache.codegen.stmt_comments:
|
|
44
|
+
if cache.codegen is not None and cache.codegen.stmt_comments:
|
|
45
45
|
stmt_comments = json.dumps(cache.codegen.stmt_comments).encode("utf-8")
|
|
46
46
|
|
|
47
47
|
const_formats = None
|
|
48
|
-
if cache.codegen.const_formats:
|
|
48
|
+
if cache.codegen is not None and cache.codegen.const_formats:
|
|
49
49
|
const_formats = pickle.dumps(cache.codegen.const_formats)
|
|
50
50
|
|
|
51
51
|
ite_exprs = None
|
|
@@ -60,6 +60,7 @@ class StructuredCodeManagerSerializer:
|
|
|
60
60
|
stmt_comments=stmt_comments,
|
|
61
61
|
const_formats=const_formats,
|
|
62
62
|
ite_exprs=ite_exprs,
|
|
63
|
+
errors="\n\n\n".join(cache.errors),
|
|
63
64
|
# configuration=configuration,
|
|
64
65
|
)
|
|
65
66
|
session.add(db_code)
|
|
@@ -118,6 +119,7 @@ class StructuredCodeManagerSerializer:
|
|
|
118
119
|
cache = DecompilationCache(db_code.func_addr)
|
|
119
120
|
cache.codegen = dummy_codegen
|
|
120
121
|
cache.ite_exprs = ite_exprs
|
|
122
|
+
cache.errors = db_code.errors.split("\n\n\n")
|
|
121
123
|
manager[(db_code.func_addr, db_code.flavor)] = cache
|
|
122
124
|
|
|
123
125
|
return manager
|