angr 9.2.124__py3-none-macosx_11_0_arm64.whl → 9.2.126__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (53) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +13 -1
  3. angr/analyses/codecave.py +77 -0
  4. angr/analyses/decompiler/ail_simplifier.py +1 -0
  5. angr/analyses/decompiler/callsite_maker.py +9 -1
  6. angr/analyses/decompiler/clinic.py +32 -2
  7. angr/analyses/decompiler/condition_processor.py +104 -66
  8. angr/analyses/decompiler/decompiler.py +7 -0
  9. angr/analyses/decompiler/optimization_passes/__init__.py +18 -1
  10. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +6 -0
  11. angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
  12. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
  13. angr/analyses/decompiler/return_maker.py +1 -0
  14. angr/analyses/decompiler/ssailification/rewriting.py +4 -0
  15. angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
  16. angr/analyses/decompiler/structured_codegen/c.py +18 -2
  17. angr/analyses/deobfuscator/__init__.py +18 -0
  18. angr/analyses/deobfuscator/api_obf_finder.py +313 -0
  19. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
  20. angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
  21. angr/analyses/deobfuscator/string_obf_finder.py +774 -0
  22. angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
  23. angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
  24. angr/analyses/patchfinder.py +137 -0
  25. angr/analyses/pathfinder.py +282 -0
  26. angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
  27. angr/analyses/smc.py +159 -0
  28. angr/analyses/unpacker/__init__.py +6 -0
  29. angr/analyses/unpacker/obfuscation_detector.py +103 -0
  30. angr/analyses/unpacker/packing_detector.py +138 -0
  31. angr/angrdb/models.py +1 -2
  32. angr/calling_conventions.py +3 -1
  33. angr/engines/vex/claripy/irop.py +10 -5
  34. angr/engines/vex/heavy/heavy.py +2 -0
  35. angr/exploration_techniques/spiller_db.py +1 -2
  36. angr/knowledge_plugins/__init__.py +2 -0
  37. angr/knowledge_plugins/functions/function.py +4 -0
  38. angr/knowledge_plugins/functions/function_manager.py +18 -9
  39. angr/knowledge_plugins/functions/function_parser.py +1 -1
  40. angr/knowledge_plugins/functions/soot_function.py +1 -0
  41. angr/knowledge_plugins/obfuscations.py +36 -0
  42. angr/lib/angr_native.dylib +0 -0
  43. angr/misc/ux.py +2 -2
  44. angr/project.py +17 -1
  45. angr/state_plugins/history.py +6 -4
  46. angr/utils/bits.py +4 -0
  47. angr/utils/tagged_interval_map.py +112 -0
  48. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
  49. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/RECORD +53 -36
  50. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
  51. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
  52. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
  53. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
  from ailment.expression import Load, Const
3
- from cle.backends import Blob
3
+ from cle.backends import Blob, Hex
4
4
 
5
5
  from .base import PeepholeOptimizationExprBase
6
6
 
@@ -32,7 +32,7 @@ class ConstantDereferences(PeepholeOptimizationExprBase):
32
32
 
33
33
  # is it loading from a blob?
34
34
  obj = self.project.loader.find_object_containing(expr.addr.value)
35
- if obj is not None and isinstance(obj, Blob):
35
+ if obj is not None and isinstance(obj, (Blob, Hex)):
36
36
  # do we know the value that it's reading?
37
37
  try:
38
38
  val = self.project.loader.memory.unpack_word(expr.addr.value, size=self.project.arch.bytes)
@@ -48,6 +48,7 @@ class ReturnMaker(AILGraphWalker):
48
48
  reg[0],
49
49
  ret_val.size * self.arch.byte_width,
50
50
  reg_name=self.arch.translate_register_name(reg[0], ret_val.size),
51
+ ins_addr=stmt.ins_addr,
51
52
  )
52
53
  )
53
54
  else:
@@ -119,6 +119,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
119
119
  self._ail_manager.next_atom(),
120
120
  reg_bits,
121
121
  src_and_vvars=[], # back patch later
122
+ ins_addr=node.addr,
122
123
  )
123
124
  phi_dst = VirtualVariable(
124
125
  self._ail_manager.next_atom(),
@@ -126,6 +127,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
126
127
  reg_bits,
127
128
  VirtualVariableCategory.REGISTER,
128
129
  oident=reg_offset,
130
+ ins_addr=node.addr,
129
131
  )
130
132
 
131
133
  case "stack":
@@ -135,6 +137,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
135
137
  self._ail_manager.next_atom(),
136
138
  stack_size * self.project.arch.byte_width,
137
139
  src_and_vvars=[], # back patch later
140
+ ins_addr=node.addr,
138
141
  )
139
142
  phi_dst = VirtualVariable(
140
143
  self._ail_manager.next_atom(),
@@ -142,6 +145,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, NodeType, object, object
142
145
  stack_size * self.project.arch.byte_width,
143
146
  VirtualVariableCategory.STACK,
144
147
  oident=stack_offset,
148
+ ins_addr=node.addr,
145
149
  )
146
150
  case _:
147
151
  raise NotImplementedError
@@ -525,7 +525,7 @@ class SimEngineSSARewriting(
525
525
  **expr.tags,
526
526
  )
527
527
 
528
- def _get_full_reg_vvar(self, reg_offset: int, size: int) -> VirtualVariable:
528
+ def _get_full_reg_vvar(self, reg_offset: int, size: int, ins_addr: int | None = None) -> VirtualVariable:
529
529
  base_off, base_size = get_reg_offset_base_and_size(reg_offset, self.arch, size=size)
530
530
  if (
531
531
  base_off not in self.state.registers
@@ -534,13 +534,16 @@ class SimEngineSSARewriting(
534
534
  ):
535
535
  # somehow it's never defined before...
536
536
  _l.debug("Creating a new virtual variable for an undefined register (%d [%d]).", base_off, base_size)
537
+ tags = {}
538
+ if ins_addr is not None:
539
+ tags["ins_addr"] = ins_addr
537
540
  vvar = VirtualVariable(
538
541
  self.ail_manager.next_atom(),
539
542
  self.next_vvar_id(),
540
543
  base_size * self.arch.byte_width,
541
544
  category=VirtualVariableCategory.REGISTER,
542
545
  oident=base_off,
543
- # FIXME: tags
546
+ **tags,
544
547
  )
545
548
  self.state.registers[base_off][base_size] = vvar
546
549
  return vvar
@@ -628,7 +631,11 @@ class SimEngineSSARewriting(
628
631
 
629
632
  # no good size available
630
633
  # get the full register, then extract from there
631
- vvar = self._get_full_reg_vvar(reg_expr.reg_offset, reg_expr.size)
634
+ vvar = self._get_full_reg_vvar(
635
+ reg_expr.reg_offset,
636
+ reg_expr.size,
637
+ ins_addr=reg_expr.ins_addr,
638
+ )
632
639
  # extract
633
640
  shift_amount = Const(
634
641
  self.ail_manager.next_atom(),
@@ -2148,6 +2148,12 @@ class CConstant(CExpression):
2148
2148
  elif isinstance(v, Function):
2149
2149
  yield get_cpp_function_name(v.demangled_name, specialized=False, qualified=True), self
2150
2150
  return
2151
+ elif isinstance(v, str):
2152
+ yield CConstant.str_to_c_str(v), self
2153
+ return
2154
+ elif isinstance(v, bytes):
2155
+ yield CConstant.str_to_c_str(v.replace(b"\x00", b"").decode("utf-8")), self
2156
+ return
2151
2157
 
2152
2158
  if self.reference_values is not None and self._type is not None and self._type in self.reference_values:
2153
2159
  if isinstance(self._type, SimTypeInt):
@@ -3415,7 +3421,17 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3415
3421
  if reference_values is None:
3416
3422
  reference_values = {}
3417
3423
  type_ = unpack_typeref(type_)
3418
- if isinstance(type_, SimTypePointer) and isinstance(type_.pts_to, SimTypeChar):
3424
+ if expr.value in self.kb.obfuscations.type1_deobfuscated_strings:
3425
+ reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type1_deobfuscated_strings[
3426
+ expr.value
3427
+ ]
3428
+ inline_string = True
3429
+ elif expr.value in self.kb.obfuscations.type2_deobfuscated_strings:
3430
+ reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type2_deobfuscated_strings[
3431
+ expr.value
3432
+ ]
3433
+ inline_string = True
3434
+ elif isinstance(type_, SimTypePointer) and isinstance(type_.pts_to, SimTypeChar):
3419
3435
  # char*
3420
3436
  # Try to get a string
3421
3437
  if (
@@ -3433,7 +3449,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3433
3449
  # edge cases: (void*)"this is a constant string pointer". in this case, the type_ will be a void*
3434
3450
  # (BOT*) instead of a char*.
3435
3451
 
3436
- if isinstance(expr.value, int):
3452
+ if not reference_values and isinstance(expr.value, int):
3437
3453
  if expr.value in self.project.kb.functions:
3438
3454
  # It's a function pointer
3439
3455
  # We don't care about the actual prototype here
@@ -0,0 +1,18 @@
1
+ # deobfuscator is a collection of analyses that automatically identifies functions where obfuscation techniques are
2
+ # in-use.
3
+ from __future__ import annotations
4
+
5
+ from .string_obf_finder import StringObfuscationFinder
6
+ from .string_obf_peephole_optimizer import StringObfType1PeepholeOptimizer
7
+ from .string_obf_opt_passes import StringObfType3Rewriter
8
+ from .api_obf_finder import APIObfuscationFinder
9
+ from .api_obf_peephole_optimizer import APIObfType1PeepholeOptimizer
10
+
11
+
12
+ __all__ = (
13
+ "StringObfuscationFinder",
14
+ "StringObfType1PeepholeOptimizer",
15
+ "StringObfType3Rewriter",
16
+ "APIObfuscationFinder",
17
+ "APIObfType1PeepholeOptimizer",
18
+ )
@@ -0,0 +1,313 @@
1
+ # pylint:disable=missing-class-docstring,too-many-boolean-expressions
2
+ from __future__ import annotations
3
+ from typing import Any
4
+ from enum import IntEnum
5
+ import string
6
+ import logging
7
+
8
+ import networkx
9
+
10
+ import claripy
11
+
12
+ from angr import SIM_LIBRARIES
13
+ from angr.calling_conventions import SimRegArg
14
+ from angr.errors import SimMemoryMissingError
15
+ from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
16
+ from angr.sim_type import SimTypePointer, SimTypeChar
17
+ from angr.analyses import Analysis, AnalysesHub
18
+ from angr.procedures.definitions import SimSyscallLibrary
19
+ from angr.sim_variable import SimMemoryVariable
20
+ from angr.analyses.decompiler.structured_codegen.c import (
21
+ CStructuredCodeWalker,
22
+ CFunctionCall,
23
+ CConstant,
24
+ CAssignment,
25
+ CVariable,
26
+ )
27
+
28
+ _l = logging.getLogger(name=__name__)
29
+
30
+
31
+ class APIObfuscationType(IntEnum):
32
+ TYPE_1 = 0
33
+
34
+
35
+ class APIDeobFuncDescriptor:
36
+ def __init__(self, type_: APIObfuscationType, func_addr=None, libname_argidx=None, funcname_argidx=None):
37
+ self.type = type_
38
+ self.func_addr = func_addr
39
+ self.libname_argidx = libname_argidx
40
+ self.funcname_argidx = funcname_argidx
41
+
42
+
43
+ class Type1AssignmentFinder(CStructuredCodeWalker):
44
+ def __init__(self, func_addr: int, desc: APIDeobFuncDescriptor):
45
+ self.func_addr = func_addr
46
+ self.desc = desc
47
+ self.assignments: dict[int, tuple[str, str]] = {}
48
+
49
+ def handle_CAssignment(self, obj: CAssignment):
50
+ if (
51
+ isinstance(obj.lhs, CVariable)
52
+ and isinstance(obj.lhs.variable, SimMemoryVariable)
53
+ and isinstance(obj.lhs.variable.addr, int)
54
+ and isinstance(obj.rhs, CFunctionCall)
55
+ and isinstance(obj.rhs.callee_target, CConstant)
56
+ and obj.rhs.callee_target.value == self.func_addr
57
+ ):
58
+ # found it!
59
+ func_args = obj.rhs.args
60
+ if self.desc.funcname_argidx < len(func_args) and self.desc.libname_argidx < len(func_args):
61
+ funcname_arg = func_args[self.desc.funcname_argidx]
62
+ libname_arg = func_args[self.desc.libname_argidx]
63
+ if isinstance(funcname_arg, CConstant) and isinstance(libname_arg, CConstant):
64
+ # load two strings
65
+ funcname, libname = None, None
66
+ if funcname_arg.type in funcname_arg.reference_values and isinstance(
67
+ funcname_arg.reference_values[funcname_arg.type].content, bytes
68
+ ):
69
+ funcname = funcname_arg.reference_values[funcname_arg.type].content.decode("utf-8")
70
+ if libname_arg.type in libname_arg.reference_values and isinstance(
71
+ libname_arg.reference_values[libname_arg.type].content, bytes
72
+ ):
73
+ libname = libname_arg.reference_values[libname_arg.type].content.decode("utf-8")
74
+
75
+ if funcname and libname:
76
+ if obj.lhs.variable.addr in self.assignments:
77
+ if self.assignments[obj.lhs.variable.addr] != (libname, funcname):
78
+ _l.warning(
79
+ "Observed more than one assignment for variable at %#x.", obj.lhs.variable.addr
80
+ )
81
+ else:
82
+ self.assignments[obj.lhs.variable.addr] = libname, funcname
83
+
84
+ return super().handle_CAssignment(obj)
85
+
86
+
87
+ class APIObfuscationFinder(Analysis):
88
+ """
89
+ An analysis that automatically finds API "obfuscation" routines.
90
+
91
+ Currently, we support the following API "obfuscation" styles:
92
+
93
+ - sub_A("dll_name", "api_name) where sub_a ends up calling LoadLibrary.
94
+ """
95
+
96
+ def __init__(self):
97
+ self.type1_candidates = []
98
+
99
+ self.analyze()
100
+
101
+ def analyze(self):
102
+ self.type1_candidates = self._find_type1()
103
+
104
+ if self.type1_candidates:
105
+ for desc in self.type1_candidates:
106
+ type1_deobfuscated = self._analyze_type1(desc.func_addr, desc)
107
+ self.kb.obfuscations.type1_deobfuscated_apis.update(type1_deobfuscated)
108
+
109
+ def _find_type1(self):
110
+ cfg = self.kb.cfgs.get_most_accurate()
111
+ load_library_funcs = []
112
+
113
+ if "LoadLibraryA" in self.kb.functions:
114
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryA"))
115
+ if "LoadLibraryW" in self.kb.functions:
116
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibraryW"))
117
+ if "LoadLibrary" in self.kb.functions:
118
+ load_library_funcs += list(self.kb.functions.get_by_name("LoadLibrary"))
119
+
120
+ load_library_funcs = [func for func in load_library_funcs if func.is_simprocedure]
121
+
122
+ if not load_library_funcs:
123
+ return None
124
+
125
+ # find callers of each load library func, up to three callers back
126
+ callgraph = self.kb.functions.callgraph
127
+ candidates = []
128
+ for load_library_func in load_library_funcs:
129
+ subtree = self._build_caller_subtree(callgraph, load_library_func.addr, 3)
130
+ for _, succs in networkx.bfs_successors(subtree, load_library_func.addr):
131
+ for succ_addr in succs:
132
+ func = self.kb.functions.get_by_addr(succ_addr)
133
+ likely, info = self._is_likely_type1_func(func, cfg)
134
+ if likely:
135
+ candidates.append((func.addr, info))
136
+
137
+ descs = []
138
+ for func_addr, info in candidates:
139
+ desc = APIDeobFuncDescriptor(
140
+ APIObfuscationType.TYPE_1,
141
+ func_addr=func_addr,
142
+ libname_argidx=info["libname_arg_idx"],
143
+ funcname_argidx=info["funcname_arg_idx"],
144
+ )
145
+ descs.append(desc)
146
+
147
+ return descs
148
+
149
+ def _is_likely_type1_func(self, func, cfg):
150
+ if func.prototype is None:
151
+ return False, None
152
+ if len(func.prototype.args) < 2:
153
+ return False, None
154
+
155
+ arch = self.project.arch
156
+ valid_apiname_charset = {ord(ch) for ch in (string.ascii_letters + string.digits + "._")}
157
+
158
+ # decompile the function to get a prototype with types
159
+ _ = self.project.analyses.Decompiler(func, cfg=cfg)
160
+
161
+ char_ptr_args = [
162
+ idx
163
+ for (idx, arg) in enumerate(func.prototype.args)
164
+ if isinstance(arg, SimTypePointer) and isinstance(arg.pts_to, SimTypeChar)
165
+ ]
166
+ if len(char_ptr_args) != 2:
167
+ return False, None
168
+
169
+ libname_arg_idx = None
170
+ funcname_arg_idx = None
171
+ # who's calling it?
172
+ caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func.addr)))
173
+ for caller_addr in caller_addrs:
174
+ # what arguments are used to call this function with?
175
+ callsite_nodes = [
176
+ pred
177
+ for pred in cfg.get_predecessors(cfg.get_any_node(func.addr))
178
+ if pred.function_address == caller_addr and pred.instruction_addrs
179
+ ]
180
+ observation_points = []
181
+ for callsite_node in callsite_nodes:
182
+ observation_points.append(("insn", callsite_node.instruction_addrs[-1], ObservationPointType.OP_BEFORE))
183
+ rda = self.project.analyses.ReachingDefinitions(
184
+ self.kb.functions[caller_addr],
185
+ observe_all=False,
186
+ observation_points=observation_points,
187
+ )
188
+ for callsite_node in callsite_nodes:
189
+ observ = rda.model.get_observation_by_insn(
190
+ callsite_node.instruction_addrs[-1],
191
+ ObservationPointType.OP_BEFORE,
192
+ )
193
+ args: list[tuple[int, Any]] = []
194
+ for arg_idx, func_arg in enumerate(func.arguments):
195
+ # FIXME: We are ignoring all non-register function arguments until we see a test case where
196
+ # FIXME: stack-passing arguments are used
197
+ if isinstance(func_arg, SimRegArg):
198
+ reg_offset, reg_size = arch.registers[func_arg.reg_name]
199
+ try:
200
+ mv = observ.registers.load(reg_offset, size=reg_size)
201
+ except SimMemoryMissingError:
202
+ args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
203
+ continue
204
+ arg_value = mv.one_value()
205
+ if arg_value is None:
206
+ arg_value = claripy.BVV(0xDEADBEEF, self.project.arch.bits)
207
+ args.append((arg_idx, arg_value))
208
+
209
+ # the args must have at least one concrete address that points to an initialized memory location
210
+ acceptable_args = True
211
+ arg_strs: list[tuple[int, str]] = []
212
+ for idx, arg in args:
213
+ if arg is not None and arg.concrete:
214
+ v = arg.concrete_value
215
+ section = self.project.loader.find_section_containing(v)
216
+ if section is not None:
217
+ # what string is it?
218
+ max_size = min(64, section.max_addr - v)
219
+ try:
220
+ value = self.project.loader.memory.load(v, max_size)
221
+ except KeyError:
222
+ acceptable_args = False
223
+ break
224
+ if b"\x00" in value:
225
+ value = value[: value.index(b"\x00")]
226
+ if not all(ch in valid_apiname_charset for ch in value):
227
+ acceptable_args = False
228
+ break
229
+ arg_strs.append((idx, value.decode("utf-8")))
230
+ if acceptable_args:
231
+ libname_arg_idx, funcname_arg_idx = None, None
232
+ assert len(arg_strs) == 2
233
+ for arg_idx, name in arg_strs:
234
+ if self.is_libname(name):
235
+ libname_arg_idx = arg_idx
236
+ elif self.is_apiname(name):
237
+ funcname_arg_idx = arg_idx
238
+
239
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
240
+ break
241
+
242
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
243
+ break
244
+
245
+ if libname_arg_idx is not None and funcname_arg_idx is not None:
246
+ return True, {"libname_arg_idx": libname_arg_idx, "funcname_arg_idx": funcname_arg_idx}
247
+ return False, None
248
+
249
+ def _analyze_type1(self, func_addr, desc: APIDeobFuncDescriptor) -> dict[int, tuple[str, str]]:
250
+ cfg = self.kb.cfgs.get_most_accurate()
251
+
252
+ assignments: dict[int, tuple[str, str]] = {}
253
+
254
+ # get all call sites
255
+ caller_addrs = sorted(set(self.kb.functions.callgraph.predecessors(func_addr)))
256
+ for caller_addr in caller_addrs:
257
+ # decompile the function and get all assignments of the return value of the func at func_addr
258
+ try:
259
+ dec = self.project.analyses.Decompiler(self.kb.functions.get_by_addr(caller_addr), cfg=cfg)
260
+ except Exception: # pylint:disable=broad-exception-caught
261
+ continue
262
+ if dec.codegen is None:
263
+ continue
264
+
265
+ finder = Type1AssignmentFinder(func_addr, desc)
266
+ finder.handle(dec.codegen.cfunc)
267
+
268
+ duplicate_addrs = set(assignments.keys()).intersection(set(finder.assignments.keys()))
269
+ if duplicate_addrs:
270
+ # duplicate entries
271
+ _l.warning(
272
+ "Observed duplicate assignments at the following addresses: %s.",
273
+ str(map(hex, sorted(duplicate_addrs))), # pylint:disable=bad-builtin
274
+ )
275
+
276
+ assignments.update(finder.assignments)
277
+
278
+ return assignments
279
+
280
+ @staticmethod
281
+ def _build_caller_subtree(callgraph: networkx.DiGraph, func_addr: int, max_level: int) -> networkx.DiGraph:
282
+ tree = networkx.DiGraph()
283
+
284
+ if func_addr not in callgraph:
285
+ return tree
286
+
287
+ queue = [(0, func_addr)]
288
+ traversed = {func_addr}
289
+ while queue:
290
+ level, addr = queue.pop(0)
291
+ for pred in callgraph.predecessors(addr):
292
+ if pred not in traversed and level + 1 <= max_level:
293
+ traversed.add(pred)
294
+ queue.append((level + 1, pred))
295
+ tree.add_edge(addr, pred)
296
+
297
+ return tree
298
+
299
+ @staticmethod
300
+ def is_libname(name: str) -> bool:
301
+ name = name.lower()
302
+ if name in SIM_LIBRARIES:
303
+ return True
304
+ if "." not in name:
305
+ return name + ".dll" in SIM_LIBRARIES or name + ".exe" in SIM_LIBRARIES
306
+ return False
307
+
308
+ @staticmethod
309
+ def is_apiname(name: str) -> bool:
310
+ return any(not isinstance(lib, SimSyscallLibrary) and lib.has_prototype(name) for lib in SIM_LIBRARIES.values())
311
+
312
+
313
+ AnalysesHub.register_default("APIObfuscationFinder", APIObfuscationFinder)
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+ from ailment.expression import Const, Load
3
+
4
+ from angr import SIM_LIBRARIES
5
+ from angr.calling_conventions import default_cc
6
+ from angr.analyses.decompiler.peephole_optimizations.base import PeepholeOptimizationExprBase
7
+ from angr.analyses.decompiler.peephole_optimizations import EXPR_OPTS
8
+
9
+
10
+ class APIObfType1PeepholeOptimizer(PeepholeOptimizationExprBase):
11
+ """
12
+ Integrate type-1 deobfuscated API into decompilation output.
13
+ """
14
+
15
+ __slots__ = ()
16
+
17
+ NAME = "Simplify Type 1 API obfuscation references"
18
+ expr_classes = (Load,)
19
+
20
+ def optimize(self, expr: Load, **kwargs):
21
+ if (
22
+ isinstance(expr.addr, Const)
23
+ and (expr.addr.value in self.kb.obfuscations.type1_deobfuscated_apis)
24
+ and expr.bits == self.project.arch.bits
25
+ ):
26
+ # this is actually a function calling a known API
27
+ # replace it with the actual API and the actual arguments
28
+ _, funcname = self.kb.obfuscations.type1_deobfuscated_apis[expr.addr.value]
29
+ if funcname not in self.kb.functions:
30
+ # assign a new function on-demand
31
+ symbol = self.project.loader.extern_object.make_extern(funcname)
32
+ hook_addr = self.project.hook_symbol(
33
+ symbol.rebased_addr, SIM_LIBRARIES["linux"].get_stub(funcname, self.project.arch)
34
+ )
35
+ func = self.kb.functions.function(addr=hook_addr, name=funcname, create=True)
36
+ func.is_simprocedure = True
37
+
38
+ default_cc_kwargs = {}
39
+ if self.project.simos is not None:
40
+ default_cc_kwargs["platform"] = self.project.simos.name
41
+ default_cc_cls = default_cc(self.project.arch.name, **default_cc_kwargs)
42
+ if default_cc_cls is not None:
43
+ func.calling_convention = default_cc_cls(self.project.arch)
44
+ func.find_declaration(ignore_binary_name=True)
45
+ else:
46
+ func = self.kb.functions[funcname]
47
+ return Const(expr.idx, None, func.addr, self.project.arch.bits, **expr.tags)
48
+ return None
49
+
50
+
51
+ EXPR_OPTS.append(APIObfType1PeepholeOptimizer)
@@ -0,0 +1,85 @@
1
+ # pylint:disable=no-self-use,unused-argument,attribute-defined-outside-init
2
+ from __future__ import annotations
3
+
4
+ import pyvex
5
+
6
+ from angr.engines.light import SimEngineLightVEXMixin
7
+
8
+
9
+ class IRSBRegisterCollector(SimEngineLightVEXMixin):
10
+ """
11
+ Scan the VEX IRSB to collect all registers that are read.
12
+ """
13
+
14
+ def __init__(self, block, *args, **kwargs):
15
+ super().__init__(*args, **kwargs)
16
+
17
+ self.block = block
18
+ self.reg_reads: set[tuple[int, int]] = set()
19
+
20
+ def process(self):
21
+ self.tmps = {}
22
+ self.tyenv = self.block.vex.tyenv
23
+
24
+ self._process_Stmt()
25
+
26
+ self.stmt_idx = None
27
+ self.ins_addr = None
28
+
29
+ def _handle_Put(self, stmt):
30
+ pass
31
+
32
+ def _handle_Load(self, expr):
33
+ pass
34
+
35
+ def _handle_Store(self, stmt):
36
+ pass
37
+
38
+ def _handle_LoadG(self, stmt):
39
+ pass
40
+
41
+ def _handle_LLSC(self, stmt: pyvex.IRStmt.LLSC):
42
+ pass
43
+
44
+ def _handle_StoreG(self, stmt):
45
+ pass
46
+
47
+ def _handle_Get(self, expr: pyvex.IRExpr.Get):
48
+ self.reg_reads.add((expr.offset, expr.result_size(self.tyenv)))
49
+
50
+ def _handle_RdTmp(self, expr):
51
+ pass
52
+
53
+ def _handle_Conversion(self, expr: pyvex.IRExpr.Unop):
54
+ pass
55
+
56
+ def _handle_16HLto32(self, expr):
57
+ pass
58
+
59
+ def _handle_Cmp_v(self, expr, _vector_size, _vector_count):
60
+ pass
61
+
62
+ _handle_CmpEQ_v = _handle_Cmp_v
63
+ _handle_CmpNE_v = _handle_Cmp_v
64
+ _handle_CmpLE_v = _handle_Cmp_v
65
+ _handle_CmpLT_v = _handle_Cmp_v
66
+ _handle_CmpGE_v = _handle_Cmp_v
67
+ _handle_CmpGT_v = _handle_Cmp_v
68
+
69
+ def _handle_ExpCmpNE64(self, expr):
70
+ pass
71
+
72
+ def _handle_CCall(self, expr):
73
+ pass
74
+
75
+ def _handle_function(self, func_addr):
76
+ pass
77
+
78
+ def _handle_Unop(self, expr):
79
+ pass
80
+
81
+ def _handle_Binop(self, expr: pyvex.IRExpr.Binop):
82
+ pass
83
+
84
+ def _handle_Triop(self, expr: pyvex.IRExpr.Triop):
85
+ pass