angr 9.2.124__py3-none-macosx_11_0_arm64.whl → 9.2.126__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (53) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/__init__.py +13 -1
  3. angr/analyses/codecave.py +77 -0
  4. angr/analyses/decompiler/ail_simplifier.py +1 -0
  5. angr/analyses/decompiler/callsite_maker.py +9 -1
  6. angr/analyses/decompiler/clinic.py +32 -2
  7. angr/analyses/decompiler/condition_processor.py +104 -66
  8. angr/analyses/decompiler/decompiler.py +7 -0
  9. angr/analyses/decompiler/optimization_passes/__init__.py +18 -1
  10. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +6 -0
  11. angr/analyses/decompiler/optimization_passes/tag_slicer.py +41 -0
  12. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +2 -2
  13. angr/analyses/decompiler/return_maker.py +1 -0
  14. angr/analyses/decompiler/ssailification/rewriting.py +4 -0
  15. angr/analyses/decompiler/ssailification/rewriting_engine.py +10 -3
  16. angr/analyses/decompiler/structured_codegen/c.py +18 -2
  17. angr/analyses/deobfuscator/__init__.py +18 -0
  18. angr/analyses/deobfuscator/api_obf_finder.py +313 -0
  19. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +51 -0
  20. angr/analyses/deobfuscator/irsb_reg_collector.py +85 -0
  21. angr/analyses/deobfuscator/string_obf_finder.py +774 -0
  22. angr/analyses/deobfuscator/string_obf_opt_passes.py +133 -0
  23. angr/analyses/deobfuscator/string_obf_peephole_optimizer.py +47 -0
  24. angr/analyses/patchfinder.py +137 -0
  25. angr/analyses/pathfinder.py +282 -0
  26. angr/analyses/reaching_definitions/function_handler_library/stdio.py +8 -1
  27. angr/analyses/smc.py +159 -0
  28. angr/analyses/unpacker/__init__.py +6 -0
  29. angr/analyses/unpacker/obfuscation_detector.py +103 -0
  30. angr/analyses/unpacker/packing_detector.py +138 -0
  31. angr/angrdb/models.py +1 -2
  32. angr/calling_conventions.py +3 -1
  33. angr/engines/vex/claripy/irop.py +10 -5
  34. angr/engines/vex/heavy/heavy.py +2 -0
  35. angr/exploration_techniques/spiller_db.py +1 -2
  36. angr/knowledge_plugins/__init__.py +2 -0
  37. angr/knowledge_plugins/functions/function.py +4 -0
  38. angr/knowledge_plugins/functions/function_manager.py +18 -9
  39. angr/knowledge_plugins/functions/function_parser.py +1 -1
  40. angr/knowledge_plugins/functions/soot_function.py +1 -0
  41. angr/knowledge_plugins/obfuscations.py +36 -0
  42. angr/lib/angr_native.dylib +0 -0
  43. angr/misc/ux.py +2 -2
  44. angr/project.py +17 -1
  45. angr/state_plugins/history.py +6 -4
  46. angr/utils/bits.py +4 -0
  47. angr/utils/tagged_interval_map.py +112 -0
  48. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/METADATA +6 -6
  49. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/RECORD +53 -36
  50. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/WHEEL +1 -1
  51. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/LICENSE +0 -0
  52. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/entry_points.txt +0 -0
  53. {angr-9.2.124.dist-info → angr-9.2.126.dist-info}/top_level.txt +0 -0
angr/analyses/smc.py ADDED
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+ import logging
3
+ import random
4
+
5
+ from enum import auto, IntFlag
6
+ from collections.abc import Generator
7
+
8
+ import angr
9
+ from angr.analyses import Analysis, AnalysesHub
10
+ from angr.knowledge_plugins import Function
11
+ from angr.sim_state import SimState
12
+
13
+ from angr.utils.tagged_interval_map import TaggedIntervalMap
14
+
15
+
16
+ log = logging.getLogger(__name__)
17
+ log.setLevel(logging.INFO)
18
+
19
+
20
+ class TraceActions(IntFlag):
21
+ """
22
+ Describe memory access actions.
23
+ """
24
+
25
+ WRITE = auto()
26
+ EXECUTE = auto()
27
+
28
+
29
+ class TraceClassifier:
30
+ """
31
+ Classify traces.
32
+ """
33
+
34
+ def __init__(self, state: SimState | None = None):
35
+ self.map = TaggedIntervalMap()
36
+ if state:
37
+ self.instrument(state)
38
+
39
+ def act_mem_write(self, state) -> None:
40
+ """
41
+ SimInspect callback for memory writes.
42
+ """
43
+ addr = state.solver.eval(state.inspect.mem_write_address)
44
+ length = state.inspect.mem_write_length
45
+ if not isinstance(length, int):
46
+ length = state.solver.eval(length)
47
+ self.map.add(addr, length, TraceActions.WRITE)
48
+
49
+ def act_instruction(self, state) -> None:
50
+ """
51
+ SimInspect callback for instruction execution.
52
+ """
53
+ addr = state.inspect.instruction
54
+ if addr is None:
55
+ log.warning("Symbolic addr")
56
+ return
57
+
58
+ # FIXME: Ensure block size is correct
59
+ self.map.add(addr, state.block().size, TraceActions.EXECUTE)
60
+
61
+ def instrument(self, state) -> None:
62
+ """
63
+ Instrument `state` for tracing.
64
+ """
65
+ state.inspect.b("mem_write", when=angr.BP_BEFORE, action=self.act_mem_write)
66
+ state.inspect.b("instruction", when=angr.BP_BEFORE, action=self.act_instruction)
67
+
68
+ def get_smc_address_and_lengths(self) -> Generator[tuple[int, int]]:
69
+ """
70
+ Evaluate the trace to find which areas of memory were both written to and executed.
71
+ """
72
+ smc_flags = TraceActions.WRITE | TraceActions.EXECUTE
73
+ for addr, size, flags in self.map.irange():
74
+ if (flags & smc_flags) == smc_flags:
75
+ yield (addr, size)
76
+
77
+ def determine_smc(self) -> bool:
78
+ """
79
+ Evaluate the trace to find areas of memory that were both written to and executed.
80
+ """
81
+ return any(self.get_smc_address_and_lengths())
82
+
83
+ def pp(self):
84
+ for a, b, c in self.map.irange():
85
+ print(f"{a:8x} {b} {c}")
86
+
87
+
88
+ class SelfModifyingCodeAnalysis(Analysis):
89
+ """
90
+ Determine if some piece of code is self-modifying.
91
+
92
+ This determination is made by simply executing. If an address is executed
93
+ that is also written to, the code is determined to be self-modifying. The
94
+ determination is stored in the `result` property. The `regions` property
95
+ contains a list of (addr, length) regions that were both written to and
96
+ executed.
97
+ """
98
+
99
+ result: bool
100
+ regions: list[tuple[int, int]]
101
+
102
+ def __init__(self, subject: None | int | str | Function, max_bytes: int = 0, state: SimState | None = None):
103
+ """
104
+ :param subject: Subject of analysis
105
+ :param max_bytes: Maximum number of bytes from subject address. 0 for no limit (default).
106
+ :param state: State to begin executing from from.
107
+ """
108
+ assert self.project.selfmodifying_code
109
+
110
+ if subject is None:
111
+ subject = self.project.entry
112
+ if isinstance(subject, str):
113
+ try:
114
+ addr = self.project.kb.labels.lookup(subject)
115
+ except KeyError:
116
+ addr = self.project.kb.functions[subject].addr
117
+ elif isinstance(subject, Function):
118
+ addr = subject.addr
119
+ elif isinstance(subject, int):
120
+ addr = subject
121
+ else:
122
+ raise ValueError("Not a supported subject")
123
+
124
+ if state is None:
125
+ init_state = self.project.factory.call_state(addr)
126
+ else:
127
+ init_state = state.copy()
128
+ init_state.regs.pc = addr
129
+
130
+ init_state.options -= angr.sim_options.simplification
131
+
132
+ self._trace_classifier = TraceClassifier(init_state)
133
+ simgr = self.project.factory.simgr(init_state)
134
+
135
+ kwargs = {}
136
+ if max_bytes:
137
+ kwargs["filter_func"] = lambda s: (
138
+ "active" if s.solver.eval(addr <= s.regs.pc) and s.solver.eval(s.regs.pc < addr + max_bytes) else "oob"
139
+ )
140
+
141
+ # FIXME: Early out on SMC detect
142
+ # FIXME: Configurable step threshold
143
+ # FIXME: Loop analysis
144
+
145
+ for n in range(100):
146
+ self._update_progress(n)
147
+ simgr.step(n=3)
148
+ random.shuffle(simgr.active)
149
+ simgr.split(from_stash="active", to_stash=simgr.DROP, limit=10)
150
+
151
+ # Classify any out of bound entrypoints
152
+ for state_ in simgr.stashes["oob"]:
153
+ self._trace_classifier.act_instruction(state_)
154
+
155
+ self.regions = list(self._trace_classifier.get_smc_address_and_lengths())
156
+ self.result = len(self.regions) > 0
157
+
158
+
159
+ AnalysesHub.register_default("SMC", SelfModifyingCodeAnalysis)
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+ from .packing_detector import PackingDetector
3
+ from .obfuscation_detector import ObfuscationDetector
4
+
5
+
6
+ __all__ = ("PackingDetector", "ObfuscationDetector")
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+ import logging
3
+
4
+ import networkx
5
+
6
+ from angr.analyses.analysis import Analysis, AnalysesHub
7
+ from angr.knowledge_plugins.cfg import CFGModel
8
+
9
+ _l = logging.getLogger(__name__)
10
+
11
+
12
+ class ObfuscationDetector(Analysis):
13
+ """
14
+ This analysis detects, usually in ways that are more robust than section name matching or signature matching, the
15
+ existence of obfuscation techniques in a binary.
16
+ """
17
+
18
+ def __init__(self, cfg: CFGModel | None = None):
19
+ self.obfuscated: bool = False
20
+ self.possible_obfuscators: list[str] = []
21
+
22
+ if cfg is None:
23
+ _l.warning(
24
+ "PackingDetector is using a most accurate CFG model in the knowledge base. We assume it is "
25
+ "generated with force_smart_scan=False and force_complete_scan=False."
26
+ )
27
+ self._cfg = self.kb.cfgs.get_most_accurate()
28
+ else:
29
+ self._cfg = cfg
30
+
31
+ self.analyze()
32
+
33
+ def analyze(self):
34
+
35
+ analysis_routines = [
36
+ self._analyze_vmprotect,
37
+ ]
38
+
39
+ for routine in analysis_routines:
40
+ tool = routine()
41
+ if tool:
42
+ self.obfuscated = True
43
+ self.possible_obfuscators.append(tool)
44
+
45
+ def _analyze_vmprotect(self) -> str | None:
46
+ """
47
+ We detect VMProtect v3 (with control-flow obfuscation) based on two main characteristics:
48
+
49
+ - In amd64 binaries, there exists a strongly connected component in the call graph with over 1,000 nodes.
50
+ Edge/node ratio is >= 1.3
51
+ - There is a high number of pushf and popf instructions in the visible functions.
52
+ """
53
+
54
+ high_scc_node_edge_ratio = False
55
+ high_pushf = False
56
+ high_popf = False
57
+ high_clc = False # pylint:disable=unused-variable
58
+
59
+ if self.project.arch.name == "AMD64":
60
+ cg = self.kb.functions.callgraph
61
+ sccs = networkx.strongly_connected_components(cg)
62
+
63
+ for scc in sccs:
64
+ subgraph = networkx.subgraph(cg, scc)
65
+ node_count = len(scc)
66
+ if node_count > 1000:
67
+ edge_count = len(subgraph.edges)
68
+
69
+ if edge_count / node_count >= 1.3:
70
+ high_scc_node_edge_ratio = True
71
+ break
72
+ else:
73
+ high_scc_node_edge_ratio = True
74
+
75
+ pushf_ctr = 0
76
+ popf_ctr = 0
77
+ clc_ctr = 0 # only used for x86
78
+ is_x86 = self.project.arch.name == "X86"
79
+ cfg_node_count = len(self._cfg.graph)
80
+ for node in self._cfg.nodes():
81
+ if node.size > 0 and node.instruction_addrs:
82
+ block = node.block
83
+ for insn in block.capstone.insns:
84
+ if insn.mnemonic in {"pushf", "pushfd", "pushfq"}:
85
+ pushf_ctr += 1
86
+ elif insn.mnemonic in {"popf", "popfd", "popfq"}:
87
+ popf_ctr += 1
88
+ elif is_x86 and insn.mnemonic == "clc":
89
+ clc_ctr += 1
90
+
91
+ if pushf_ctr > cfg_node_count * 0.002:
92
+ high_pushf = True
93
+ if popf_ctr > cfg_node_count * 0.002:
94
+ high_popf = True
95
+ if not is_x86 or clc_ctr > cfg_node_count * 0.002:
96
+ high_clc = True # noqa: F841
97
+
98
+ if high_scc_node_edge_ratio and high_pushf and high_popf:
99
+ return "vmprotect"
100
+ return None
101
+
102
+
103
+ AnalysesHub.register_default("ObfuscationDetector", ObfuscationDetector)
@@ -0,0 +1,138 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+ import math
4
+ import logging
5
+
6
+ from angr.analyses.analysis import Analysis, AnalysesHub
7
+ from angr.knowledge_plugins.cfg import CFGModel
8
+
9
+
10
+ if TYPE_CHECKING:
11
+ from cle import Section
12
+
13
+ _l = logging.getLogger(__name__)
14
+
15
+
16
+ class PackingDetector(Analysis):
17
+ """
18
+ This analysis detects if a binary is likely packed or not. We may extend it to identify which packer is in use in
19
+ the future.
20
+ """
21
+
22
+ PACKED_MIN_BYTES = 256
23
+ PACKED_ENTROPY_MIN_THRESHOLD = 0.88
24
+
25
+ def __init__(self, cfg: CFGModel | None = None, region_size_threshold: int = 0x20):
26
+ self.packed: bool = False
27
+ self.region_size_threshold: int = region_size_threshold
28
+
29
+ if cfg is None:
30
+ _l.warning(
31
+ "PackingDetector is using a most accurate CFG model in the knowledge base. We assume it is "
32
+ "generated with force_smart_scan=False and force_complete_scan=False."
33
+ )
34
+ self._cfg = self.kb.cfgs.get_most_accurate()
35
+ else:
36
+ self._cfg = cfg
37
+
38
+ self.analyze()
39
+
40
+ def analyze(self):
41
+ # assume we already have a CFG with complete scanning disabled
42
+ # collect all regions that are not covered by the CFG in r+x sections, and then compute the entropy. we believe
43
+ # the binary is packed if it is beyond a threshold
44
+
45
+ covered_regions: list[tuple[int, int]] = []
46
+ last_known_section: Section | None = None
47
+ for node in sorted(self._cfg.nodes(), key=lambda n: n.addr):
48
+ section = None
49
+ if last_known_section is not None and last_known_section.contains_addr(node.addr):
50
+ section = last_known_section
51
+ if section is None:
52
+ section = self.project.loader.find_section_containing(node.addr)
53
+ if section is None:
54
+ # this node does not belong to any known section - ignore it
55
+ continue
56
+ if section.is_readable and section.is_executable:
57
+ last_known_section = section
58
+
59
+ if section is None:
60
+ # the node does not belong to any section. ignore it
61
+ continue
62
+
63
+ if node.size == 0:
64
+ # ignore empty nodes
65
+ continue
66
+
67
+ if not covered_regions:
68
+ covered_regions.append((node.addr, node.addr + node.size))
69
+ else:
70
+ last_item = covered_regions[-1]
71
+ if last_item[0] <= node.addr <= last_item[1] < node.addr + node.size:
72
+ # update the last item
73
+ covered_regions[-1] = last_item[0], node.addr + node.size
74
+ else:
75
+ # add a new item
76
+ covered_regions.append((node.addr, node.addr + node.size))
77
+
78
+ # now we get the uncovered regions
79
+ uncovered_regions: list[tuple[int, int]] = self._get_uncovered_regions(covered_regions)
80
+
81
+ # compute entropy
82
+ total_bytes, entropy = self._compute_entropy(uncovered_regions)
83
+
84
+ self.packed = total_bytes >= self.PACKED_MIN_BYTES and entropy >= self.PACKED_ENTROPY_MIN_THRESHOLD
85
+
86
+ def _get_uncovered_regions(self, covered_regions: list[tuple[int, int]]) -> list[tuple[int, int]]:
87
+ # FIXME: We only support binaries with sections. Add support for segments in the future
88
+ all_executable_sections = [
89
+ sec
90
+ for sec in self.project.loader.main_object.sections
91
+ if sec.is_executable and sec.is_readable and not sec.only_contains_uninitialized_data
92
+ ]
93
+ all_executable_sections = sorted(all_executable_sections, key=lambda sec: sec.vaddr)
94
+ idx = 0
95
+
96
+ uncovered_regions: list[tuple[int, int]] = []
97
+ for section in all_executable_sections:
98
+ if idx >= len(covered_regions):
99
+ if section.memsize > self.region_size_threshold:
100
+ uncovered_regions.append((section.vaddr, section.vaddr + section.memsize))
101
+ else:
102
+ i = idx
103
+ last_end = section.vaddr
104
+ while i < len(covered_regions):
105
+ region_start, region_end = covered_regions[i]
106
+ if region_end >= section.vaddr + section.memsize:
107
+ # move on to the next section
108
+ break
109
+ if last_end < region_start and region_start - last_end > self.region_size_threshold:
110
+ uncovered_regions.append((last_end, region_start))
111
+ i += 1
112
+ last_end = max(last_end, region_end)
113
+ idx = i
114
+
115
+ return uncovered_regions
116
+
117
+ def _compute_entropy(self, regions: list[tuple[int, int]]) -> tuple[int, float]:
118
+ byte_counts = [0] * 256
119
+
120
+ for start, end in regions:
121
+ for b in self.project.loader.memory.load(start, end - start):
122
+ byte_counts[b] += 1
123
+
124
+ total = sum(byte_counts)
125
+ if total == 0:
126
+ return 0, 0.0
127
+
128
+ entropy = 0.0
129
+ for count in byte_counts:
130
+ if count == 0:
131
+ continue
132
+ p = 1.0 * count / total
133
+ entropy -= p * math.log(p, 256)
134
+
135
+ return total, entropy
136
+
137
+
138
+ AnalysesHub.register_default("PackingDetector", PackingDetector)
angr/angrdb/models.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
  from sqlalchemy import Column, Integer, String, Boolean, BLOB, ForeignKey
3
- from sqlalchemy.orm import relationship
4
- from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import declarative_base, relationship
5
4
 
6
5
  Base = declarative_base()
7
6
 
@@ -1061,7 +1061,9 @@ class SimCC:
1061
1061
  if isinstance(arg, claripy.ast.BV):
1062
1062
  if isinstance(ty, (SimTypeReg, SimTypeNum)):
1063
1063
  if len(arg) != ty.size:
1064
- raise TypeError("Type mismatch: expected %s, got %d bits" % (ty, len(arg)))
1064
+ if arg.concrete:
1065
+ return claripy.BVV(arg.concrete_value, ty.size)
1066
+ raise TypeError("Type mismatch of symbolic data: expected %s, got %d bits" % (ty, len(arg)))
1065
1067
  return arg
1066
1068
  if isinstance(ty, (SimTypeFloat)):
1067
1069
  raise TypeError(
@@ -2,6 +2,7 @@
2
2
  This module contains symbolic implementations of VEX operations.
3
3
  """
4
4
 
5
+ # pylint:disable=no-member
5
6
  from __future__ import annotations
6
7
 
7
8
  from functools import partial
@@ -10,14 +11,17 @@ import itertools
10
11
  import operator
11
12
  import math
12
13
  import re
13
-
14
14
  import logging
15
15
 
16
- l = logging.getLogger(name=__name__)
17
-
18
16
  import pyvex
19
17
  import claripy
20
18
 
19
+ from angr.errors import UnsupportedIROpError, SimOperationError, SimValueError, SimZeroDivisionException
20
+
21
+
22
+ l = logging.getLogger(name=__name__)
23
+
24
+
21
25
  #
22
26
  # The more sane approach
23
27
  #
@@ -1044,6 +1048,9 @@ class SimIROp:
1044
1048
  exp_threshold = (2 ** (exp_bits - 1) - 1) + mantissa_bits
1045
1049
  return claripy.If(exp_bv >= exp_threshold, args[1].raw_to_fp(), rounded_fp)
1046
1050
 
1051
+ def _op_fgeneric_RSqrtEst(self, arg): # pylint:disable=no-self-use
1052
+ return claripy.BVS("RSqrtEst", arg.size())
1053
+
1047
1054
  def _generic_pack_saturation(self, args, src_size, dst_size, src_signed, dst_signed):
1048
1055
  """
1049
1056
  Generic pack with saturation.
@@ -1255,6 +1262,4 @@ def vexop_to_simop(op, extended=True, fp=True):
1255
1262
  return res
1256
1263
 
1257
1264
 
1258
- from angr.errors import UnsupportedIROpError, SimOperationError, SimValueError, SimZeroDivisionException
1259
-
1260
1265
  make_operations()
@@ -90,6 +90,7 @@ class HeavyVEXMixin(SuccessorsMixin, ClaripyDataMixin, SimStateStorageMixin, VEX
90
90
  num_inst=None,
91
91
  extra_stop_points=None,
92
92
  opt_level=None,
93
+ strict_block_end=None,
93
94
  **kwargs,
94
95
  ):
95
96
  if not pyvex.lifting.lifters[self.state.arch.name] or type(successors.addr) is not int:
@@ -144,6 +145,7 @@ class HeavyVEXMixin(SuccessorsMixin, ClaripyDataMixin, SimStateStorageMixin, VEX
144
145
  num_inst=num_inst,
145
146
  extra_stop_points=extra_stop_points,
146
147
  opt_level=opt_level,
148
+ strict_block_end=strict_block_end,
147
149
  )
148
150
 
149
151
  if (
@@ -5,8 +5,7 @@ import datetime
5
5
  try:
6
6
  import sqlalchemy
7
7
  from sqlalchemy import Column, Integer, String, Boolean, DateTime, create_engine
8
- from sqlalchemy.orm import sessionmaker
9
- from sqlalchemy.ext.declarative import declarative_base
8
+ from sqlalchemy.orm import declarative_base, sessionmaker
10
9
  from sqlalchemy.exc import OperationalError
11
10
 
12
11
  Base = declarative_base()
@@ -18,6 +18,7 @@ from .types import TypesStore
18
18
  from .callsite_prototypes import CallsitePrototypes
19
19
  from .custom_strings import CustomStrings
20
20
  from .decompilation import DecompilationManager
21
+ from .obfuscations import Obfuscations
21
22
 
22
23
 
23
24
  __all__ = (
@@ -40,4 +41,5 @@ __all__ = (
40
41
  "CallsitePrototypes",
41
42
  "CustomStrings",
42
43
  "DecompilationManager",
44
+ "Obfuscations",
43
45
  )
@@ -56,6 +56,7 @@ class Function(Serializable):
56
56
  "addr",
57
57
  "is_simprocedure",
58
58
  "_name",
59
+ "previous_names",
59
60
  "is_default_name",
60
61
  "from_signature",
61
62
  "binary_name",
@@ -224,6 +225,7 @@ class Function(Serializable):
224
225
  else:
225
226
  self.is_default_name = False
226
227
  self._name = name
228
+ self.previous_names = []
227
229
  self.from_signature = None
228
230
 
229
231
  # Determine the name the binary where this function is.
@@ -274,6 +276,7 @@ class Function(Serializable):
274
276
 
275
277
  @name.setter
276
278
  def name(self, v):
279
+ self.previous_names.append(self._name)
277
280
  self._name = v
278
281
  self._function_manager._kb.labels[self.addr] = v
279
282
 
@@ -1667,6 +1670,7 @@ class Function(Serializable):
1667
1670
  func._endpoints = self._endpoints.copy()
1668
1671
  func._call_sites = self._call_sites.copy()
1669
1672
  func._project = self._project
1673
+ func.previous_names = list(self.previous_names)
1670
1674
  func.is_plt = self.is_plt
1671
1675
  func.is_simprocedure = self.is_simprocedure
1672
1676
  func.binary_name = self.binary_name
@@ -313,7 +313,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
313
313
  if isinstance(k, self.function_address_types):
314
314
  f = self.function(addr=k)
315
315
  elif type(k) is str:
316
- f = self.function(name=k)
316
+ f = self.function(name=k) or self.function(name=k, check_previous_names=True)
317
317
  else:
318
318
  raise ValueError(f"FunctionManager.__getitem__ does not support keys of type {type(k)}")
319
319
 
@@ -350,9 +350,9 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
350
350
  def get_by_addr(self, addr) -> Function:
351
351
  return self._function_map.get(addr)
352
352
 
353
- def get_by_name(self, name: str) -> Generator[Function]:
353
+ def get_by_name(self, name: str, check_previous_names: bool = False) -> Generator[Function]:
354
354
  for f in self._function_map.values():
355
- if f.name == name:
355
+ if f.name == name or (check_previous_names and name in f.previous_names):
356
356
  yield f
357
357
 
358
358
  def _function_added(self, func: Function):
@@ -411,7 +411,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
411
411
  except KeyError:
412
412
  return None
413
413
 
414
- def query(self, query: str) -> Function | None:
414
+ def query(self, query: str, check_previous_names: bool = False) -> Function | None:
415
415
  """
416
416
  Query for a function using selectors to disambiguate. Supported variations:
417
417
 
@@ -430,19 +430,21 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
430
430
  addr = int(matches.group(2), 0)
431
431
  try:
432
432
  func = self._function_map.get(addr)
433
- if func.name == name:
433
+ if func.name == name or (check_previous_names and name in func.previous_names):
434
434
  return func
435
435
  except KeyError:
436
436
  pass
437
437
 
438
438
  obj_name = selector or self._kb._project.loader.main_object.binary_basename
439
- for func in self.get_by_name(name):
439
+ for func in self.get_by_name(name, check_previous_names=check_previous_names):
440
440
  if func.binary_name == obj_name:
441
441
  return func
442
442
 
443
443
  return None
444
444
 
445
- def function(self, addr=None, name=None, create=False, syscall=False, plt=None) -> Function | None:
445
+ def function(
446
+ self, addr=None, name=None, check_previous_names=False, create=False, syscall=False, plt=None
447
+ ) -> Function | None:
446
448
  """
447
449
  Get a function object from the function manager.
448
450
 
@@ -457,6 +459,13 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
457
459
  :return: The Function instance, or None if the function is not found and create is False.
458
460
  :rtype: Function or None
459
461
  """
462
+ if name is not None and name.startswith("sub_"):
463
+ try:
464
+ addr = int(name.split("_")[-1], 16)
465
+ name = None
466
+ except ValueError:
467
+ pass
468
+
460
469
  if addr is not None:
461
470
  try:
462
471
  f = self._function_map.get(addr)
@@ -472,11 +481,11 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
472
481
  f.is_syscall = True
473
482
  return f
474
483
  elif name is not None:
475
- func = self.query(name)
484
+ func = self.query(name, check_previous_names=check_previous_names)
476
485
  if func is not None:
477
486
  return func
478
487
 
479
- for func in self.get_by_name(name):
488
+ for func in self.get_by_name(name, check_previous_names=check_previous_names):
480
489
  if plt is None or func.is_plt == plt:
481
490
  return func
482
491
 
@@ -33,7 +33,7 @@ class FunctionParser:
33
33
  obj.is_syscall = function.is_syscall
34
34
  obj.is_simprocedure = function.is_simprocedure
35
35
  obj.returning = function.returning
36
- obj.alignment = function.alignment
36
+ obj.alignment = function.is_alignment
37
37
  obj.binary_name = function.binary_name or ""
38
38
  obj.normalized = function.normalized
39
39
 
@@ -34,6 +34,7 @@ class SootFunction(Function):
34
34
  # block nodes (basic block nodes) at whose ends the function terminates
35
35
  # in theory, if everything works fine, endpoints == ret_sites | jumpout_sites | callout_sites
36
36
  self._endpoints = defaultdict(set)
37
+ self.previous_names = []
37
38
 
38
39
  self._call_sites = {}
39
40
  self.addr = addr
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from .plugin import KnowledgeBasePlugin
4
+
5
+
6
+ class Obfuscations(KnowledgeBasePlugin):
7
+ """
8
+ Store discovered information and artifacts about (string) obfuscation techniques in the project.
9
+ """
10
+
11
+ def __init__(self, kb):
12
+ super().__init__(kb)
13
+
14
+ self.obfuscated_strings_analyzed: bool = False
15
+ self.type1_deobfuscated_strings = {}
16
+ self.type1_string_loader_candidates = set()
17
+ self.type2_deobfuscated_strings = {}
18
+ self.type2_string_loader_candidates = set()
19
+ self.type3_deobfuscated_strings = {} # from the address of the call instruction to the actual string (in bytes)
20
+
21
+ self.obfuscated_apis_analyzed: bool = False
22
+ self.type1_deobfuscated_apis: dict[int, tuple[str, str]] = {}
23
+
24
+ def copy(self):
25
+ o = Obfuscations(self._kb)
26
+ o.type1_deobfuscated_strings = dict(self.type1_deobfuscated_strings)
27
+ o.type1_string_loader_candidates = self.type1_string_loader_candidates.copy()
28
+ o.type2_deobfuscated_strings = dict(self.type2_deobfuscated_strings)
29
+ o.type2_string_loader_candidates = self.type2_string_loader_candidates.copy()
30
+ o.type3_deobfuscated_strings = self.type3_deobfuscated_strings.copy()
31
+
32
+ o.type1_deobfuscated_apis = self.type1_deobfuscated_apis.copy()
33
+ return o
34
+
35
+
36
+ KnowledgeBasePlugin.register_default("obfuscations", Obfuscations)