angr 9.2.125__py3-none-win_amd64.whl → 9.2.127__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (51) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +4 -0
  3. angr/analyses/analysis.py +8 -2
  4. angr/analyses/cfg/cfg_fast.py +12 -1
  5. angr/analyses/decompiler/ail_simplifier.py +1 -0
  6. angr/analyses/decompiler/callsite_maker.py +9 -1
  7. angr/analyses/decompiler/clinic.py +2 -1
  8. angr/analyses/decompiler/condition_processor.py +109 -73
  9. angr/analyses/decompiler/decompilation_cache.py +4 -0
  10. angr/analyses/decompiler/decompiler.py +21 -3
  11. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +1 -2
  12. angr/analyses/decompiler/optimization_passes/__init__.py +15 -1
  13. angr/analyses/decompiler/return_maker.py +1 -0
  14. angr/analyses/decompiler/ssailification/rewriting.py +4 -0
  15. angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
  16. angr/analyses/decompiler/ssailification/traversal.py +1 -0
  17. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -0
  18. angr/analyses/decompiler/structured_codegen/c.py +18 -5
  19. angr/analyses/decompiler/structured_codegen/dwarf_import.py +4 -1
  20. angr/analyses/deobfuscator/__init__.py +18 -0
  21. angr/analyses/deobfuscator/api_obf_finder.py +313 -0
  22. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
  23. angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
  24. angr/analyses/deobfuscator/string_obf_finder.py +774 -0
  25. angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
  26. angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
  27. angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
  28. angr/analyses/reaching_definitions/function_handler_library/string.py +2 -2
  29. angr/analyses/s_liveness.py +3 -3
  30. angr/analyses/s_propagator.py +74 -3
  31. angr/analyses/unpacker/__init__.py +6 -0
  32. angr/analyses/unpacker/obfuscation_detector.py +103 -0
  33. angr/analyses/unpacker/packing_detector.py +138 -0
  34. angr/angrdb/models.py +2 -1
  35. angr/angrdb/serializers/kb.py +3 -3
  36. angr/angrdb/serializers/structured_code.py +5 -3
  37. angr/calling_conventions.py +4 -2
  38. angr/engines/vex/claripy/irop.py +10 -5
  39. angr/knowledge_base.py +1 -1
  40. angr/knowledge_plugins/__init__.py +2 -2
  41. angr/knowledge_plugins/obfuscations.py +36 -0
  42. angr/knowledge_plugins/structured_code.py +1 -1
  43. angr/lib/angr_native.dll +0 -0
  44. angr/utils/ssa/__init__.py +8 -3
  45. {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/METADATA +6 -6
  46. {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/RECORD +50 -40
  47. {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/WHEEL +1 -1
  48. angr/knowledge_plugins/decompilation.py +0 -45
  49. {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/LICENSE +0 -0
  50. {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/entry_points.txt +0 -0
  51. {angr-9.2.125.dist-info → angr-9.2.127.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCal
7
7
  from angr.engines.light import SimEngineLight, SimEngineLightAILMixin
8
8
  from angr.utils.ssa import get_reg_offset_base
9
9
  from angr.utils.orderedset import OrderedSet
10
+ from angr.calling_conventions import default_cc
10
11
  from .traversal_state import TraversalState
11
12
 
12
13
 
@@ -23,6 +24,7 @@ class SimEngineSSATraversal(
23
24
  def __init__(
24
25
  self,
25
26
  arch,
27
+ simos,
26
28
  sp_tracker=None,
27
29
  bp_as_gpr: bool = False,
28
30
  def_to_loc=None,
@@ -33,6 +35,7 @@ class SimEngineSSATraversal(
33
35
  super().__init__()
34
36
 
35
37
  self.arch = arch
38
+ self.simos = simos
36
39
  self.sp_tracker = sp_tracker
37
40
  self.bp_as_gpr = bp_as_gpr
38
41
  self.stackvars = stackvars
@@ -75,6 +78,18 @@ class SimEngineSSATraversal(
75
78
  self._expr(stmt.false_target)
76
79
 
77
80
  def _handle_Call(self, stmt: Call):
81
+
82
+ # kill caller-saved registers
83
+ cc = (
84
+ default_cc(self.arch.name, platform=self.simos.name if self.simos is not None else None)
85
+ if stmt.calling_convention is None
86
+ else stmt.calling_convention
87
+ )
88
+ for reg_name in cc.CALLER_SAVED_REGS:
89
+ reg_offset = self.arch.registers[reg_name][0]
90
+ base_off = get_reg_offset_base(reg_offset, self.arch)
91
+ self.state.live_registers.discard(base_off)
92
+
78
93
  if stmt.ret_expr is not None and isinstance(stmt.ret_expr, Register):
79
94
  codeloc = self._codeloc()
80
95
  self.def_to_loc.append((stmt.ret_expr, codeloc))
@@ -2148,6 +2148,12 @@ class CConstant(CExpression):
2148
2148
  elif isinstance(v, Function):
2149
2149
  yield get_cpp_function_name(v.demangled_name, specialized=False, qualified=True), self
2150
2150
  return
2151
+ elif isinstance(v, str):
2152
+ yield CConstant.str_to_c_str(v), self
2153
+ return
2154
+ elif isinstance(v, bytes):
2155
+ yield CConstant.str_to_c_str(v.replace(b"\x00", b"").decode("utf-8")), self
2156
+ return
2151
2157
 
2152
2158
  if self.reference_values is not None and self._type is not None and self._type in self.reference_values:
2153
2159
  if isinstance(self._type, SimTypeInt):
@@ -2505,9 +2511,6 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2505
2511
 
2506
2512
  self._analyze()
2507
2513
 
2508
- if flavor is not None:
2509
- self.kb.structured_code[(func.addr, flavor)] = self
2510
-
2511
2514
  def reapply_options(self, options):
2512
2515
  for option, value in options:
2513
2516
  if option.param == "braces_on_own_lines":
@@ -3415,7 +3418,17 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3415
3418
  if reference_values is None:
3416
3419
  reference_values = {}
3417
3420
  type_ = unpack_typeref(type_)
3418
- if isinstance(type_, SimTypePointer) and isinstance(type_.pts_to, SimTypeChar):
3421
+ if expr.value in self.kb.obfuscations.type1_deobfuscated_strings:
3422
+ reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type1_deobfuscated_strings[
3423
+ expr.value
3424
+ ]
3425
+ inline_string = True
3426
+ elif expr.value in self.kb.obfuscations.type2_deobfuscated_strings:
3427
+ reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type2_deobfuscated_strings[
3428
+ expr.value
3429
+ ]
3430
+ inline_string = True
3431
+ elif isinstance(type_, SimTypePointer) and isinstance(type_.pts_to, SimTypeChar):
3419
3432
  # char*
3420
3433
  # Try to get a string
3421
3434
  if (
@@ -3433,7 +3446,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3433
3446
  # edge cases: (void*)"this is a constant string pointer". in this case, the type_ will be a void*
3434
3447
  # (BOT*) instead of a char*.
3435
3448
 
3436
- if isinstance(expr.value, int):
3449
+ if not reference_values and isinstance(expr.value, int):
3437
3450
  if expr.value in self.project.kb.functions:
3438
3451
  # It's a function pointer
3439
3452
  # We don't care about the actual prototype here
@@ -5,6 +5,7 @@ import logging
5
5
  from sortedcontainers import SortedList
6
6
 
7
7
  from angr.analyses import Analysis, register_analysis
8
+ from angr.analyses.decompiler.decompilation_cache import DecompilationCache
8
9
  from .base import BaseStructuredCodeGenerator, InstructionMapping, PositionMapping
9
10
  from angr.knowledge_plugins.functions.function import Function
10
11
 
@@ -30,7 +31,9 @@ class ImportSourceCode(BaseStructuredCodeGenerator, Analysis):
30
31
  self.regenerate_text()
31
32
 
32
33
  if flavor is not None and self.text:
33
- self.kb.structured_code[(function.addr, flavor)] = self
34
+ if (function.addr, flavor) not in self.kb.decompilations:
35
+ self.kb.decompilations[(function.addr, flavor)] = DecompilationCache(function.addr)
36
+ self.kb.decompilations[(function.addr, flavor)].codegen = self
34
37
 
35
38
  def regenerate_text(self):
36
39
  cache = {}
@@ -0,0 +1,18 @@
1
+ # deobfuscator is a collection of analyses that automatically identifies functions where obfuscation techniques are
2
+ # in-use.
3
+ from __future__ import annotations
4
+
5
+ from .string_obf_finder import StringObfuscationFinder
6
+ from .string_obf_peephole_optimizer import StringObfType1PeepholeOptimizer
7
+ from .string_obf_opt_passes import StringObfType3Rewriter
8
+ from .api_obf_finder import APIObfuscationFinder
9
+ from .api_obf_peephole_optimizer import APIObfType1PeepholeOptimizer
10
+
11
+
12
+ __all__ = (
13
+ "StringObfuscationFinder",
14
+ "StringObfType1PeepholeOptimizer",
15
+ "StringObfType3Rewriter",
16
+ "APIObfuscationFinder",
17
+ "APIObfType1PeepholeOptimizer",
18
+ )
@@ -0,0 +1,313 @@
1
+ # pylint:disable=missing-class-docstring,too-many-boolean-expressions
2
+ from __future__ import annotations
3
+ from typing import Any
4
+ from enum import IntEnum
5
+ import string
6
+ import logging
7
+
8
+ import networkx
9
+
10
+ import claripy
11
+
12
+ from angr import SIM_LIBRARIES
13
+ from angr.calling_conventions import SimRegArg
14
+ from angr.errors import SimMemoryMissingError
15
+ from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
16
+ from angr.sim_type import SimTypePointer, SimTypeChar
17
+ from angr.analyses import Analysis, AnalysesHub
18
+ from angr.procedures.definitions import SimSyscallLibrary
19
+ from angr.sim_variable import SimMemoryVariable
20
+ from angr.analyses.decompiler.structured_codegen.c import (
21
+ CStructuredCodeWalker,
22
+ CFunctionCall,
23
+ CConstant,
24
+ CAssignment,
25
+ CVariable,
26
+ )
27
+
28
+ _l = logging.getLogger(name=__name__)
29
+
30
+
31
+ class APIObfuscationType(IntEnum):
32
+ TYPE_1 = 0
33
+
34
+
35
+ class APIDeobFuncDescriptor:
36
+ def __init__(self, type_: APIObfuscationType, func_addr=None, libname_argidx=None, funcname_argidx=None):
37
+ self.type = type_
38
+ self.func_addr = func_addr
39
+ self.libname_argidx = libname_argidx
40
+ self.funcname_argidx = funcname_argidx
41
+
42
+
43
+ class Type1AssignmentFinder(CStructuredCodeWalker):
44
+ def __init__(self, func_addr: int, desc: APIDeobFuncDescriptor):
45
+ self.func_addr = func_addr
46
+ self.desc = desc
47
+ self.assignments: dict[int, tuple[str, str]] = {}
48
+
49
+ def handle_CAssignment(self, obj: CAssignment):
50
+ if (
51
+ isinstance(obj.lhs, CVariable)
52
+ and isinstance(obj.lhs.variable, SimMemoryVariable)
53
+ and isinstance(obj.lhs.variable.addr, int)
54
+ and isinstance(obj.rhs, CFunctionCall)
55
+ and isinstance(obj.rhs.callee_target, CConstant)
56
+ and obj.rhs.callee_target.value == self.func_addr
57
+ ):
58
+ # found it!
59
+ func_args = obj.rhs.args
60
+ if self.desc.funcname_argidx < len(func_args) and self.desc.libname_argidx < len(func_args):
61
+ funcname_arg = func_args[self.desc.funcname_argidx]
62
+ libname_arg = func_args[self.desc.libname_argidx]
63
+ if isinstance(funcname_arg, CConstant) and isinstance(libname_arg, CConstant):
64
+ # load two strings
65
+ funcname, libname = None, None
66
+ if funcname_arg.type in funcname_arg.reference_values and isinstance(
67
+ funcname_arg.reference_values[funcname_arg.type].content, bytes
68
+ ):
69
+ funcname = funcname_arg.reference_values[funcname_arg.type].content.decode("utf-8")
70
+ if libname_arg.type in libname_arg.reference_values and isinstance(
71
+ libname_arg.reference_values[libname_arg.type].content, bytes
72
+ ):
73
+ libname = libname_arg.reference_values[libname_arg.type].content.decode("utf-8")
74
+
75
+ if funcname and libname:
76
+ if obj.lhs.variable.addr in self.assignments:
77
+ if self.assignments[obj.lhs.variable.addr] != (libname, funcname):
78
+ _l.warning(
79
+ "Observed more than one assignment for variable at %#x.", obj.lhs.variable.addr
80
+ )
81
+ else:
82
+ self.assignments[obj.lhs.variable.addr] = libname, funcname
83
+
84
+ return super().handle_CAssignment(obj)
85
+
86
+
87
+ class APIObfuscationFinder(Analysis):
88
+ """
89
+ An analysis that automatically finds API "obfuscation" routines.
90
+
91
+ Currently, we support the following API "obfuscation" styles:
92
+
93
+ - sub_A("dll_name", "api_name) where sub_a ends up calling LoadLibrary.
94
+ """
95
+
96
+ def __init__(self):
97
+ self.type1_candidates = []
98
+
99
+ self.analyze()
100
+
101
+ def analyze(self):
102
+ self.type1_candidates = self._find_type1()
103
+
104
+ if self.type1_candidates:
105
+ for desc in self.type1_candidates:
106
+ type1_deobfuscated = self._analyze_type1(desc.func_addr, desc)
107
+ self.kb.obfuscations.type1_deobfuscated_apis.update(type1_deobfuscated)
108
+
109
+ def _find_type1(self):
110
+ cfg = self.kb.cfgs.get_most_accurate()
111
+ load_library_funcs = []
112
+
113
+ if "LoadLibraryA" in self.kb.functions:
114
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryA"))
115
+ if "LoadLibraryW" in self.kb.functions:
116
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryW"))
117
+ if "LoadLibrary" in self.kb.functions:
118
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibrary"))
119
+
120
+ load_library_funcs = [func for func in load_library_funcs if func.is_simprocedure]
121
+
122
+ if not load_library_funcs:
123
+ return None
124
+
125
+ # find callers of each load library func, up to three callers back
126
+ callgraph = self.kb.functions.callgraph
127
+ candidates = []
128
+ for load_library_func in load_library_funcs:
129
+ subtree = self._build_caller_subtree(callgraph, load_library_func.addr, 3)
130
+ for _, succs in networkx.bfs_successors(subtree, load_library_func.addr):
131
+ for succ_addr in succs:
132
+ func = self.kb.functions.get_by_addr(succ_addr)
133
+ likely, info = self._is_likely_type1_func(func, cfg)
134
+ if likely:
135
+ candidates.append((func.addr, info))
136
+
137
+ descs = []
138
+ for func_addr, info in candidates:
139
+ desc = APIDeobFuncDescriptor(
140
+ APIObfuscationType.TYPE_1,
141
+ func_addr=func_addr,
142
+ libname_argidx=info["libname_arg_idx"],
143
+ funcname_argidx=info["funcname_arg_idx"],
144
+ )
145
+ descs.append(desc)
146
+
147
+ return descs
148
+
149
+ def _is_likely_type1_func(self, func, cfg):
150
+ if func.prototype is None:
151
+ return False, None
152
+ if len(func.prototype.args) < 2:
153
+ return False, None
154
+
155
+ arch = self.project.arch
156
+ valid_apiname_charset = {ord(ch) for ch in (string.ascii_letters + string.digits + "._")}
157
+
158
+ # decompile the function to get a prototype with types
159
+ _ = self.project.analyses.Decompiler(func, cfg=cfg)
160
+
161
+ char_ptr_args = [
162
+ idx
163
+ for (idx, arg) in enumerate(func.prototype.args)
164
+ if isinstance(arg, SimTypePointer) and isinstance(arg.pts_to, SimTypeChar)
165
+ ]
166
+ if len(char_ptr_args) != 2:
167
+ return False, None
168
+
169
+ libname_arg_idx = None
170
+ funcname_arg_idx = None
171
+ # who's calling it?
172
+ caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func.addr)))
173
+ for caller_addr in caller_addrs:
174
+ # what arguments are used to call this function with?
175
+ callsite_nodes = [
176
+ pred
177
+ for pred in cfg.get_predecessors(cfg.get_any_node(func.addr))
178
+ if pred.function_address == caller_addr and pred.instruction_addrs
179
+ ]
180
+ observation_points = []
181
+ for callsite_node in callsite_nodes:
182
+ observation_points.append(("insn", callsite_node.instruction_addrs[-1], ObservationPointType.OP_BEFORE))
183
+ rda = self.project.analyses.ReachingDefinitions(
184
+ self.kb.functions[caller_addr],
185
+ observe_all=False,
186
+ observation_points=observation_points,
187
+ )
188
+ for callsite_node in callsite_nodes:
189
+ observ = rda.model.get_observation_by_insn(
190
+ callsite_node.instruction_addrs[-1],
191
+ ObservationPointType.OP_BEFORE,
192
+ )
193
+ args: list[tuple[int, Any]] = []
194
+ for arg_idx, func_arg in enumerate(func.arguments):
195
+ # FIXME: We are ignoring all non-register function arguments until we see a test case where
196
+ # FIXME: stack-passing arguments are used
197
+ if isinstance(func_arg, SimRegArg):
198
+ reg_offset, reg_size = arch.registers[func_arg.reg_name]
199
+ try:
200
+ mv = observ.registers.load(reg_offset, size=reg_size)
201
+ except SimMemoryMissingError:
202
+ args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
203
+ continue
204
+ arg_value = mv.one_value()
205
+ if arg_value is None:
206
+ arg_value = claripy.BVV(0xDEADBEEF, self.project.arch.bits)
207
+ args.append((arg_idx, arg_value))
208
+
209
+ # the args must have at least one concrete address that points to an initialized memory location
210
+ acceptable_args = True
211
+ arg_strs: list[tuple[int, str]] = []
212
+ for idx, arg in args:
213
+ if arg is not None and arg.concrete:
214
+ v = arg.concrete_value
215
+ section = self.project.loader.find_section_containing(v)
216
+ if section is not None:
217
+ # what string is it?
218
+ max_size = min(64, section.max_addr - v)
219
+ try:
220
+ value = self.project.loader.memory.load(v, max_size)
221
+ except KeyError:
222
+ acceptable_args = False
223
+ break
224
+ if b"\x00" in value:
225
+ value = value[: value.index(b"\x00")]
226
+ if not all(ch in valid_apiname_charset for ch in value):
227
+ acceptable_args = False
228
+ break
229
+ arg_strs.append((idx, value.decode("utf-8")))
230
+ if acceptable_args:
231
+ libname_arg_idx, funcname_arg_idx = None, None
232
+ assert len(arg_strs) == 2
233
+ for arg_idx, name in arg_strs:
234
+ if self.is_libname(name):
235
+ libname_arg_idx = arg_idx
236
+ elif self.is_apiname(name):
237
+ funcname_arg_idx = arg_idx
238
+
239
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
240
+ break
241
+
242
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
243
+ break
244
+
245
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
246
+ return True, {"libname_arg_idx": libname_arg_idx, "funcname_arg_idx": funcname_arg_idx}
247
+ return False, None
248
+
249
+ def _analyze_type1(self, func_addr, desc: APIDeobFuncDescriptor) -> dict[int, tuple[str, str]]:
250
+ cfg = self.kb.cfgs.get_most_accurate()
251
+
252
+ assignments: dict[int, tuple[str, str]] = {}
253
+
254
+ # get all call sites
255
+ caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func_addr)))
256
+ for caller_addr in caller_addrs:
257
+ # decompile the function and get all assignments of the return value of the func at func_addr
258
+ try:
259
+ dec = self.project.analyses.Decompiler(self.kb.functions.get_by_addr(caller_addr), cfg=cfg)
260
+ except Exception: # pylint:disable=broad-exception-caught
261
+ continue
262
+ if dec.codegen is None:
263
+ continue
264
+
265
+ finder = Type1AssignmentFinder(func_addr, desc)
266
+ finder.handle(dec.codegen.cfunc)
267
+
268
+ duplicate_addrs = set(assignments.keys()).intersection(set(finder.assignments.keys()))
269
+ if duplicate_addrs:
270
+ # duplicate entries
271
+ _l.warning(
272
+ "Observed duplicate assignments at the following addresses: %s.",
273
+ str(map(hex, sorted(duplicate_addrs))), # pylint:disable=bad-builtin
274
+ )
275
+
276
+ assignments.update(finder.assignments)
277
+
278
+ return assignments
279
+
280
+ @staticmethod
281
+ def _build_caller_subtree(callgraph: networkx.DiGraph, func_addr: int, max_level: int) -> networkx.DiGraph:
282
+ tree = networkx.DiGraph()
283
+
284
+ if func_addr not in callgraph:
285
+ return tree
286
+
287
+ queue = [(0, func_addr)]
288
+ traversed = {func_addr}
289
+ while queue:
290
+ level, addr = queue.pop(0)
291
+ for pred in callgraph.predecessors(addr):
292
+ if pred not in traversed and level + 1 <= max_level:
293
+ traversed.add(pred)
294
+ queue.append((level + 1, pred))
295
+ tree.add_edge(addr, pred)
296
+
297
+ return tree
298
+
299
+ @staticmethod
300
+ def is_libname(name: str) -> bool:
301
+ name = name.lower()
302
+ if name in SIM_LIBRARIES:
303
+ return True
304
+ if "." not in name:
305
+ return name + ".dll" in SIM_LIBRARIES or name + ".exe" in SIM_LIBRARIES
306
+ return False
307
+
308
+ @staticmethod
309
+ def is_apiname(name: str) -> bool:
310
+ return any(not isinstance(lib, SimSyscallLibrary) and lib.has_prototype(name) for lib in SIM_LIBRARIES.values())
311
+
312
+
313
+ AnalysesHub.register_default("APIObfuscationFinder", APIObfuscationFinder)
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+ from ailment.expression import Const, Load
3
+
4
+ from angr import SIM_LIBRARIES
5
+ from angr.calling_conventions import default_cc
6
+ from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
7
+ from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
8
+
9
+
10
+ class APIObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
11
+ """
12
+ Integrate type-1 deobfuscated API into decompilation output.
13
+ """
14
+
15
+ __slots__ = ()
16
+
17
+ NAME = "Simplify Type 1 API obfuscation references"
18
+ expr_classes = (Load,)
19
+
20
+ def optimize(self, expr: Load, **kwargs):
21
+ if (
22
+ isinstance(expr.addr, Const)
23
+ and (expr.addr.value in self.kb.obfuscations.type1_deobfuscated_apis)
24
+ and expr.bits == self.project.arch.bits
25
+ ):
26
+ # this is actually a function calling a known API
27
+ # replace it with the actual API and the actual arguments
28
+ _, funcname = self.kb.obfuscations.type1_deobfuscated_apis[expr.addr.value]
29
+ if funcname not in self.kb.functions:
30
+ # assign a new function on-demand
31
+ symbol = self.project.loader.extern_object.make_extern(funcname)
32
+ hook_addr = self.project.hook_symbol(
33
+ symbol.rebased_addr, SIM_LIBRARIES["linux"].get_stub(funcname, self.project.arch)
34
+ )
35
+ func = self.kb.functions.function(addr=hook_addr, name=funcname, create=True)
36
+ func.is_simprocedure = True
37
+
38
+ default_cc_kwargs = {}
39
+ if self.project.simos is not None:
40
+ default_cc_kwargs["platform"] = self.project.simos.name
41
+ default_cc_cls = default_cc(self.project.arch.name, **default_cc_kwargs)
42
+ if default_cc_cls is not None:
43
+ func.calling_convention = default_cc_cls(self.project.arch)
44
+ func.find_declaration(ignore_binary_name=True)
45
+ else:
46
+ func = self.kb.functions[funcname]
47
+ return Const(expr.idx, None, func.addr, self.project.arch.bits, **expr.tags)
48
+ return None
49
+
50
+
51
+ EXPR_OPTS.append(APIObfType1PeepholeOptimizer)
@@ -0,0 +1,85 @@
1
+ # pylint:disable=no-self-use,unused-argument,attribute-defined-outside-init
2
+ from __future__ import annotations
3
+
4
+ import pyvex
5
+
6
+ from angr.engines.light import SimEngineLightVEXMixin
7
+
8
+
9
+ class IRSBRegisterCollector(SimEngineLightVEXMixin):
10
+ """
11
+ Scan the VEX IRSB to collect all registers that are read.
12
+ """
13
+
14
+ def __init__(self, block, *args, **kwargs):
15
+ super().__init__(*args, **kwargs)
16
+
17
+ self.block = block
18
+ self.reg_reads: set[tuple[int, int]] = set()
19
+
20
+ def process(self):
21
+ self.tmps = {}
22
+ self.tyenv = self.block.vex.tyenv
23
+
24
+ self._process_Stmt()
25
+
26
+ self.stmt_idx = None
27
+ self.ins_addr = None
28
+
29
+ def _handle_Put(self, stmt):
30
+ pass
31
+
32
+ def _handle_Load(self, expr):
33
+ pass
34
+
35
+ def _handle_Store(self, stmt):
36
+ pass
37
+
38
+ def _handle_LoadG(self, stmt):
39
+ pass
40
+
41
+ def _handle_LLSC(self, stmt: pyvex.IRStmt.LLSC):
42
+ pass
43
+
44
+ def _handle_StoreG(self, stmt):
45
+ pass
46
+
47
+ def _handle_Get(self, expr: pyvex.IRExpr.Get):
48
+ self.reg_reads.add((expr.offset, expr.result_size(self.tyenv)))
49
+
50
+ def _handle_RdTmp(self, expr):
51
+ pass
52
+
53
+ def _handle_Conversion(self, expr: pyvex.IRExpr.Unop):
54
+ pass
55
+
56
+ def _handle_16HLto32(self, expr):
57
+ pass
58
+
59
+ def _handle_Cmp_v(self, expr, _vector_size, _vector_count):
60
+ pass
61
+
62
+ _handle_CmpEQ_v = _handle_Cmp_v
63
+ _handle_CmpNE_v = _handle_Cmp_v
64
+ _handle_CmpLE_v = _handle_Cmp_v
65
+ _handle_CmpLT_v = _handle_Cmp_v
66
+ _handle_CmpGE_v = _handle_Cmp_v
67
+ _handle_CmpGT_v = _handle_Cmp_v
68
+
69
+ def _handle_ExpCmpNE64(self, expr):
70
+ pass
71
+
72
+ def _handle_CCall(self, expr):
73
+ pass
74
+
75
+ def _handle_function(self, func_addr):
76
+ pass
77
+
78
+ def _handle_Unop(self, expr):
79
+ pass
80
+
81
+ def _handle_Binop(self, expr: pyvex.IRExpr.Binop):
82
+ pass
83
+
84
+ def _handle_Triop(self, expr: pyvex.IRExpr.Triop):
85
+ pass