angr 9.2.125__py3-none-win_amd64.whl → 9.2.126__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (34) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +4 -0
  3. angr/analyses/decompiler/ail_simplifier.py +1 -0
  4. angr/analyses/decompiler/callsite_maker.py +9 -1
  5. angr/analyses/decompiler/clinic.py +1 -1
  6. angr/analyses/decompiler/condition_processor.py +104 -66
  7. angr/analyses/decompiler/decompiler.py +3 -0
  8. angr/analyses/decompiler/optimization_passes/__init__.py +15 -1
  9. angr/analyses/decompiler/return_maker.py +1 -0
  10. angr/analyses/decompiler/ssailification/rewriting.py +4 -0
  11. angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
  12. angr/analyses/decompiler/structured_codegen/c.py +18 -2
  13. angr/analyses/deobfuscator/__init__.py +18 -0
  14. angr/analyses/deobfuscator/api_obf_finder.py +313 -0
  15. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
  16. angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
  17. angr/analyses/deobfuscator/string_obf_finder.py +774 -0
  18. angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
  19. angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
  20. angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
  21. angr/analyses/unpacker/__init__.py +6 -0
  22. angr/analyses/unpacker/obfuscation_detector.py +103 -0
  23. angr/analyses/unpacker/packing_detector.py +138 -0
  24. angr/calling_conventions.py +3 -1
  25. angr/engines/vex/claripy/irop.py +10 -5
  26. angr/knowledge_plugins/__init__.py +2 -0
  27. angr/knowledge_plugins/obfuscations.py +36 -0
  28. angr/lib/angr_native.dll +0 -0
  29. {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
  30. {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/RECORD +34 -23
  31. {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
  32. {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
  33. {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
  34. {angr-9.2.125.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,313 @@
1
+ # pylint:disable=missing-class-docstring,too-many-boolean-expressions
2
+ from __future__ import annotations
3
+ from typing import Any
4
+ from enum import IntEnum
5
+ import string
6
+ import logging
7
+
8
+ import networkx
9
+
10
+ import claripy
11
+
12
+ from angr import SIM_LIBRARIES
13
+ from angr.calling_conventions import SimRegArg
14
+ from angr.errors import SimMemoryMissingError
15
+ from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
16
+ from angr.sim_type import SimTypePointer, SimTypeChar
17
+ from angr.analyses import Analysis, AnalysesHub
18
+ from angr.procedures.definitions import SimSyscallLibrary
19
+ from angr.sim_variable import SimMemoryVariable
20
+ from angr.analyses.decompiler.structured_codegen.c import (
21
+ CStructuredCodeWalker,
22
+ CFunctionCall,
23
+ CConstant,
24
+ CAssignment,
25
+ CVariable,
26
+ )
27
+
28
+ _l = logging.getLogger(name=__name__)
29
+
30
+
31
+ class APIObfuscationType(IntEnum):
32
+ TYPE_1 = 0
33
+
34
+
35
+ class APIDeobFuncDescriptor:
36
+ def __init__(self, type_: APIObfuscationType, func_addr=None, libname_argidx=None, funcname_argidx=None):
37
+ self.type = type_
38
+ self.func_addr = func_addr
39
+ self.libname_argidx = libname_argidx
40
+ self.funcname_argidx = funcname_argidx
41
+
42
+
43
+ class Type1AssignmentFinder(CStructuredCodeWalker):
44
+ def __init__(self, func_addr: int, desc: APIDeobFuncDescriptor):
45
+ self.func_addr = func_addr
46
+ self.desc = desc
47
+ self.assignments: dict[int, tuple[str, str]] = {}
48
+
49
+ def handle_CAssignment(self, obj: CAssignment):
50
+ if (
51
+ isinstance(obj.lhs, CVariable)
52
+ and isinstance(obj.lhs.variable, SimMemoryVariable)
53
+ and isinstance(obj.lhs.variable.addr, int)
54
+ and isinstance(obj.rhs, CFunctionCall)
55
+ and isinstance(obj.rhs.callee_target, CConstant)
56
+ and obj.rhs.callee_target.value == self.func_addr
57
+ ):
58
+ # found it!
59
+ func_args = obj.rhs.args
60
+ if self.desc.funcname_argidx < len(func_args) and self.desc.libname_argidx < len(func_args):
61
+ funcname_arg = func_args[self.desc.funcname_argidx]
62
+ libname_arg = func_args[self.desc.libname_argidx]
63
+ if isinstance(funcname_arg, CConstant) and isinstance(libname_arg, CConstant):
64
+ # load two strings
65
+ funcname, libname = None, None
66
+ if funcname_arg.type in funcname_arg.reference_values and isinstance(
67
+ funcname_arg.reference_values[funcname_arg.type].content, bytes
68
+ ):
69
+ funcname = funcname_arg.reference_values[funcname_arg.type].content.decode("utf-8")
70
+ if libname_arg.type in libname_arg.reference_values and isinstance(
71
+ libname_arg.reference_values[libname_arg.type].content, bytes
72
+ ):
73
+ libname = libname_arg.reference_values[libname_arg.type].content.decode("utf-8")
74
+
75
+ if funcname and libname:
76
+ if obj.lhs.variable.addr in self.assignments:
77
+ if self.assignments[obj.lhs.variable.addr] != (libname, funcname):
78
+ _l.warning(
79
+ "Observed more than one assignment for variable at %#x.", obj.lhs.variable.addr
80
+ )
81
+ else:
82
+ self.assignments[obj.lhs.variable.addr] = libname, funcname
83
+
84
+ return super().handle_CAssignment(obj)
85
+
86
+
87
+ class APIObfuscationFinder(Analysis):
88
+ """
89
+ An analysis that automatically finds API "obfuscation" routines.
90
+
91
+ Currently, we support the following API "obfuscation" styles:
92
+
93
+ - sub_A("dll_name", "api_name) where sub_a ends up calling LoadLibrary.
94
+ """
95
+
96
+ def __init__(self):
97
+ self.type1_candidates = []
98
+
99
+ self.analyze()
100
+
101
+ def analyze(self):
102
+ self.type1_candidates = self._find_type1()
103
+
104
+ if self.type1_candidates:
105
+ for desc in self.type1_candidates:
106
+ type1_deobfuscated = self._analyze_type1(desc.func_addr, desc)
107
+ self.kb.obfuscations.type1_deobfuscated_apis.update(type1_deobfuscated)
108
+
109
+ def _find_type1(self):
110
+ cfg = self.kb.cfgs.get_most_accurate()
111
+ load_library_funcs = []
112
+
113
+ if "LoadLibraryA" in self.kb.functions:
114
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryA"))
115
+ if "LoadLibraryW" in self.kb.functions:
116
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryW"))
117
+ if "LoadLibrary" in self.kb.functions:
118
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibrary"))
119
+
120
+ load_library_funcs = [func for func in load_library_funcs if func.is_simprocedure]
121
+
122
+ if not load_library_funcs:
123
+ return None
124
+
125
+ # find callers of each load library func, up to three callers back
126
+ callgraph = self.kb.functions.callgraph
127
+ candidates = []
128
+ for load_library_func in load_library_funcs:
129
+ subtree = self._build_caller_subtree(callgraph, load_library_func.addr, 3)
130
+ for _, succs in networkx.bfs_successors(subtree, load_library_func.addr):
131
+ for succ_addr in succs:
132
+ func = self.kb.functions.get_by_addr(succ_addr)
133
+ likely, info = self._is_likely_type1_func(func, cfg)
134
+ if likely:
135
+ candidates.append((func.addr, info))
136
+
137
+ descs = []
138
+ for func_addr, info in candidates:
139
+ desc = APIDeobFuncDescriptor(
140
+ APIObfuscationType.TYPE_1,
141
+ func_addr=func_addr,
142
+ libname_argidx=info["libname_arg_idx"],
143
+ funcname_argidx=info["funcname_arg_idx"],
144
+ )
145
+ descs.append(desc)
146
+
147
+ return descs
148
+
149
+ def _is_likely_type1_func(self, func, cfg):
150
+ if func.prototype is None:
151
+ return False, None
152
+ if len(func.prototype.args) < 2:
153
+ return False, None
154
+
155
+ arch = self.project.arch
156
+ valid_apiname_charset = {ord(ch) for ch in (string.ascii_letters + string.digits + "._")}
157
+
158
+ # decompile the function to get a prototype with types
159
+ _ = self.project.analyses.Decompiler(func, cfg=cfg)
160
+
161
+ char_ptr_args = [
162
+ idx
163
+ for (idx, arg) in enumerate(func.prototype.args)
164
+ if isinstance(arg, SimTypePointer) and isinstance(arg.pts_to, SimTypeChar)
165
+ ]
166
+ if len(char_ptr_args) != 2:
167
+ return False, None
168
+
169
+ libname_arg_idx = None
170
+ funcname_arg_idx = None
171
+ # who's calling it?
172
+ caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func.addr)))
173
+ for caller_addr in caller_addrs:
174
+ # what arguments are used to call this function with?
175
+ callsite_nodes = [
176
+ pred
177
+ for pred in cfg.get_predecessors(cfg.get_any_node(func.addr))
178
+ if pred.function_address == caller_addr and pred.instruction_addrs
179
+ ]
180
+ observation_points = []
181
+ for callsite_node in callsite_nodes:
182
+ observation_points.append(("insn", callsite_node.instruction_addrs[-1], ObservationPointType.OP_BEFORE))
183
+ rda = self.project.analyses.ReachingDefinitions(
184
+ self.kb.functions[caller_addr],
185
+ observe_all=False,
186
+ observation_points=observation_points,
187
+ )
188
+ for callsite_node in callsite_nodes:
189
+ observ = rda.model.get_observation_by_insn(
190
+ callsite_node.instruction_addrs[-1],
191
+ ObservationPointType.OP_BEFORE,
192
+ )
193
+ args: list[tuple[int, Any]] = []
194
+ for arg_idx, func_arg in enumerate(func.arguments):
195
+ # FIXME: We are ignoring all non-register function arguments until we see a test case where
196
+ # FIXME: stack-passing arguments are used
197
+ if isinstance(func_arg, SimRegArg):
198
+ reg_offset, reg_size = arch.registers[func_arg.reg_name]
199
+ try:
200
+ mv = observ.registers.load(reg_offset, size=reg_size)
201
+ except SimMemoryMissingError:
202
+ args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
203
+ continue
204
+ arg_value = mv.one_value()
205
+ if arg_value is None:
206
+ arg_value = claripy.BVV(0xDEADBEEF, self.project.arch.bits)
207
+ args.append((arg_idx, arg_value))
208
+
209
+ # the args must have at least one concrete address that points to an initialized memory location
210
+ acceptable_args = True
211
+ arg_strs: list[tuple[int, str]] = []
212
+ for idx, arg in args:
213
+ if arg is not None and arg.concrete:
214
+ v = arg.concrete_value
215
+ section = self.project.loader.find_section_containing(v)
216
+ if section is not None:
217
+ # what string is it?
218
+ max_size = min(64, section.max_addr - v)
219
+ try:
220
+ value = self.project.loader.memory.load(v, max_size)
221
+ except KeyError:
222
+ acceptable_args = False
223
+ break
224
+ if b"\x00" in value:
225
+ value = value[: value.index(b"\x00")]
226
+ if not all(ch in valid_apiname_charset for ch in value):
227
+ acceptable_args = False
228
+ break
229
+ arg_strs.append((idx, value.decode("utf-8")))
230
+ if acceptable_args:
231
+ libname_arg_idx, funcname_arg_idx = None, None
232
+ assert len(arg_strs) == 2
233
+ for arg_idx, name in arg_strs:
234
+ if self.is_libname(name):
235
+ libname_arg_idx = arg_idx
236
+ elif self.is_apiname(name):
237
+ funcname_arg_idx = arg_idx
238
+
239
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
240
+ break
241
+
242
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
243
+ break
244
+
245
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
246
+ return True, {"libname_arg_idx": libname_arg_idx, "funcname_arg_idx": funcname_arg_idx}
247
+ return False, None
248
+
249
+ def _analyze_type1(self, func_addr, desc: APIDeobFuncDescriptor) -> dict[int, tuple[str, str]]:
250
+ cfg = self.kb.cfgs.get_most_accurate()
251
+
252
+ assignments: dict[int, tuple[str, str]] = {}
253
+
254
+ # get all call sites
255
+ caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func_addr)))
256
+ for caller_addr in caller_addrs:
257
+ # decompile the function and get all assignments of the return value of the func at func_addr
258
+ try:
259
+ dec = self.project.analyses.Decompiler(self.kb.functions.get_by_addr(caller_addr), cfg=cfg)
260
+ except Exception: # pylint:disable=broad-exception-caught
261
+ continue
262
+ if dec.codegen is None:
263
+ continue
264
+
265
+ finder = Type1AssignmentFinder(func_addr, desc)
266
+ finder.handle(dec.codegen.cfunc)
267
+
268
+ duplicate_addrs = set(assignments.keys()).intersection(set(finder.assignments.keys()))
269
+ if duplicate_addrs:
270
+ # duplicate entries
271
+ _l.warning(
272
+ "Observed duplicate assignments at the following addresses: %s.",
273
+ str(map(hex, sorted(duplicate_addrs))), # pylint:disable=bad-builtin
274
+ )
275
+
276
+ assignments.update(finder.assignments)
277
+
278
+ return assignments
279
+
280
+ @staticmethod
281
+ def _build_caller_subtree(callgraph: networkx.DiGraph, func_addr: int, max_level: int) -> networkx.DiGraph:
282
+ tree = networkx.DiGraph()
283
+
284
+ if func_addr not in callgraph:
285
+ return tree
286
+
287
+ queue = [(0, func_addr)]
288
+ traversed = {func_addr}
289
+ while queue:
290
+ level, addr = queue.pop(0)
291
+ for pred in callgraph.predecessors(addr):
292
+ if pred not in traversed and level + 1 <= max_level:
293
+ traversed.add(pred)
294
+ queue.append((level + 1, pred))
295
+ tree.add_edge(addr, pred)
296
+
297
+ return tree
298
+
299
+ @staticmethod
300
+ def is_libname(name: str) -> bool:
301
+ name = name.lower()
302
+ if name in SIM_LIBRARIES:
303
+ return True
304
+ if "." not in name:
305
+ return name + ".dll" in SIM_LIBRARIES or name + ".exe" in SIM_LIBRARIES
306
+ return False
307
+
308
+ @staticmethod
309
+ def is_apiname(name: str) -> bool:
310
+ return any(not isinstance(lib, SimSyscallLibrary) and lib.has_prototype(name) for lib in SIM_LIBRARIES.values())
311
+
312
+
313
+ AnalysesHub.register_default("APIObfuscationFinder", APIObfuscationFinder)
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+ from ailment.expression import Const, Load
3
+
4
+ from angr import SIM_LIBRARIES
5
+ from angr.calling_conventions import default_cc
6
+ from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
7
+ from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
8
+
9
+
10
+ class APIObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
11
+ """
12
+ Integrate type-1 deobfuscated API into decompilation output.
13
+ """
14
+
15
+ __slots__ = ()
16
+
17
+ NAME = "Simplify Type 1 API obfuscation references"
18
+ expr_classes = (Load,)
19
+
20
+ def optimize(self, expr: Load, **kwargs):
21
+ if (
22
+ isinstance(expr.addr, Const)
23
+ and (expr.addr.value in self.kb.obfuscations.type1_deobfuscated_apis)
24
+ and expr.bits == self.project.arch.bits
25
+ ):
26
+ # this is actually a function calling a known API
27
+ # replace it with the actual API and the actual arguments
28
+ _, funcname = self.kb.obfuscations.type1_deobfuscated_apis[expr.addr.value]
29
+ if funcname not in self.kb.functions:
30
+ # assign a new function on-demand
31
+ symbol = self.project.loader.extern_object.make_extern(funcname)
32
+ hook_addr = self.project.hook_symbol(
33
+ symbol.rebased_addr, SIM_LIBRARIES["linux"].get_stub(funcname, self.project.arch)
34
+ )
35
+ func = self.kb.functions.function(addr=hook_addr, name=funcname, create=True)
36
+ func.is_simprocedure = True
37
+
38
+ default_cc_kwargs = {}
39
+ if self.project.simos is not None:
40
+ default_cc_kwargs["platform"] = self.project.simos.name
41
+ default_cc_cls = default_cc(self.project.arch.name, **default_cc_kwargs)
42
+ if default_cc_cls is not None:
43
+ func.calling_convention = default_cc_cls(self.project.arch)
44
+ func.find_declaration(ignore_binary_name=True)
45
+ else:
46
+ func = self.kb.functions[funcname]
47
+ return Const(expr.idx, None, func.addr, self.project.arch.bits, **expr.tags)
48
+ return None
49
+
50
+
51
+ EXPR_OPTS.append(APIObfType1PeepholeOptimizer)
@@ -0,0 +1,85 @@
1
+ # pylint:disable=no-self-use,unused-argument,attribute-defined-outside-init
2
+ from __future__ import annotations
3
+
4
+ import pyvex
5
+
6
+ from angr.engines.light import SimEngineLightVEXMixin
7
+
8
+
9
+ class IRSBRegisterCollector(SimEngineLightVEXMixin):
10
+ """
11
+ Scan the VEX IRSB to collect all registers that are read.
12
+ """
13
+
14
+ def __init__(self, block, *args, **kwargs):
15
+ super().__init__(*args, **kwargs)
16
+
17
+ self.block = block
18
+ self.reg_reads: set[tuple[int, int]] = set()
19
+
20
+ def process(self):
21
+ self.tmps = {}
22
+ self.tyenv = self.block.vex.tyenv
23
+
24
+ self._process_Stmt()
25
+
26
+ self.stmt_idx = None
27
+ self.ins_addr = None
28
+
29
+ def _handle_Put(self, stmt):
30
+ pass
31
+
32
+ def _handle_Load(self, expr):
33
+ pass
34
+
35
+ def _handle_Store(self, stmt):
36
+ pass
37
+
38
+ def _handle_LoadG(self, stmt):
39
+ pass
40
+
41
+ def _handle_LLSC(self, stmt: pyvex.IRStmt.LLSC):
42
+ pass
43
+
44
+ def _handle_StoreG(self, stmt):
45
+ pass
46
+
47
+ def _handle_Get(self, expr: pyvex.IRExpr.Get):
48
+ self.reg_reads.add((expr.offset, expr.result_size(self.tyenv)))
49
+
50
+ def _handle_RdTmp(self, expr):
51
+ pass
52
+
53
+ def _handle_Conversion(self, expr: pyvex.IRExpr.Unop):
54
+ pass
55
+
56
+ def _handle_16HLto32(self, expr):
57
+ pass
58
+
59
+ def _handle_Cmp_v(self, expr, _vector_size, _vector_count):
60
+ pass
61
+
62
+ _handle_CmpEQ_v = _handle_Cmp_v
63
+ _handle_CmpNE_v = _handle_Cmp_v
64
+ _handle_CmpLE_v = _handle_Cmp_v
65
+ _handle_CmpLT_v = _handle_Cmp_v
66
+ _handle_CmpGE_v = _handle_Cmp_v
67
+ _handle_CmpGT_v = _handle_Cmp_v
68
+
69
+ def _handle_ExpCmpNE64(self, expr):
70
+ pass
71
+
72
+ def _handle_CCall(self, expr):
73
+ pass
74
+
75
+ def _handle_function(self, func_addr):
76
+ pass
77
+
78
+ def _handle_Unop(self, expr):
79
+ pass
80
+
81
+ def _handle_Binop(self, expr: pyvex.IRExpr.Binop):
82
+ pass
83
+
84
+ def _handle_Triop(self, expr: pyvex.IRExpr.Triop):
85
+ pass