angr 9.2.156__cp310-cp310-macosx_11_0_arm64.whl → 9.2.157__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (35) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +87 -71
  3. angr/analyses/cfg/cfg_fast.py +5 -0
  4. angr/analyses/decompiler/clinic.py +182 -104
  5. angr/analyses/decompiler/decompiler.py +11 -0
  6. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +1 -1
  7. angr/analyses/decompiler/structured_codegen/c.py +18 -5
  8. angr/analyses/disassembly.py +5 -11
  9. angr/analyses/s_propagator.py +2 -4
  10. angr/analyses/stack_pointer_tracker.py +3 -7
  11. angr/analyses/typehoon/simple_solver.py +3 -3
  12. angr/analyses/variable_recovery/engine_base.py +2 -8
  13. angr/analyses/variable_recovery/variable_recovery.py +4 -3
  14. angr/calling_conventions.py +3 -3
  15. angr/engines/hook.py +1 -1
  16. angr/engines/icicle.py +229 -0
  17. angr/engines/pcode/behavior.py +1 -4
  18. angr/engines/pcode/emulate.py +1 -4
  19. angr/engines/pcode/lifter.py +2 -10
  20. angr/engines/vex/claripy/irop.py +2 -2
  21. angr/knowledge_plugins/functions/function.py +18 -10
  22. angr/knowledge_plugins/functions/function_manager.py +68 -5
  23. angr/knowledge_plugins/variables/variable_manager.py +15 -3
  24. angr/lib/angr_native.dylib +0 -0
  25. angr/rustylib.cpython-310-darwin.so +0 -0
  26. angr/sim_variable.py +31 -0
  27. angr/storage/memory_mixins/address_concretization_mixin.py +2 -2
  28. angr/storage/memory_mixins/convenient_mappings_mixin.py +1 -1
  29. {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/METADATA +7 -8
  30. {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/RECORD +34 -34
  31. {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/WHEEL +1 -1
  32. angr/rustylib.pyi +0 -165
  33. {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/entry_points.txt +0 -0
  34. {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/licenses/LICENSE +0 -0
  35. {angr-9.2.156.dist-info → angr-9.2.157.dist-info}/top_level.txt +0 -0
@@ -676,7 +676,7 @@ class SimpleSolver:
676
676
 
677
677
  @staticmethod
678
678
  def _get_all_paths(
679
- graph: networkx.DiGraph,
679
+ graph: networkx.DiGraph[TypeVariable | DerivedTypeVariable],
680
680
  sketch: Sketch,
681
681
  node: DerivedTypeVariable,
682
682
  visited: dict[TypeVariable | DerivedTypeVariable, SketchNode],
@@ -684,7 +684,7 @@ class SimpleSolver:
684
684
  if node not in graph:
685
685
  return
686
686
  curr_node = visited[node]
687
- for _, succ, data in graph.out_edges(node, data=True):
687
+ for _, succ, data in sorted(graph.out_edges(node, data=True), key=lambda x: str(x[1])):
688
688
  label = data["label"]
689
689
  if succ not in visited:
690
690
  if isinstance(curr_node.typevar, DerivedTypeVariable):
@@ -1408,7 +1408,7 @@ class SimpleSolver:
1408
1408
  visited.add(curr_node)
1409
1409
 
1410
1410
  out_edges = sketch.graph.out_edges(curr_node, data=True)
1411
- for _, succ, data in out_edges:
1411
+ for _, succ, data in sorted(out_edges, key=lambda x: str(x[1])):
1412
1412
  if isinstance(succ, RecursiveRefNode):
1413
1413
  ref = succ
1414
1414
  succ: SketchNode | None = sketch.lookup(succ.target) # type: ignore
@@ -784,20 +784,14 @@ class SimEngineVRBase(
784
784
 
785
785
  all_vars = {(0, variable) for variable in variables}
786
786
 
787
- all_vars_list = list(all_vars)
787
+ all_vars_list = sorted(all_vars, key=lambda val: (val[0], val[1].key), reverse=True)
788
788
 
789
789
  if len(all_vars_list) > 1:
790
- # sort by some value so that the outcome here isn't random
791
- cast(list[tuple[int, SimStackVariable]], all_vars_list).sort(
792
- reverse=True,
793
- key=lambda val: (val[0], val[1].offset, val[1].base, val[1].base_addr, val[1].size),
794
- )
795
-
796
790
  l.warning(
797
791
  "Reading memory with overlapping variables: %s. Ignoring all but the first one.", all_vars_list
798
792
  )
799
793
 
800
- var_offset, var = next(iter(all_vars_list)) # won't fail
794
+ var_offset, var = all_vars_list[0] # won't fail
801
795
  # calculate variable_offset
802
796
  if dynamic_offset is None:
803
797
  offset_into_variable = var_offset
@@ -324,14 +324,15 @@ class VariableRecoveryState(VariableRecoveryStateBase):
324
324
  except SimMemoryMissingError:
325
325
  pass
326
326
 
327
- if len(existing_variables) > 1:
327
+ existing_vars_list = sorted(existing_variables, key=lambda val: (val[0], val[1].key), reverse=True)
328
+ if len(existing_vars_list) > 1:
328
329
  # create a phi node for all other variables
329
330
  l.warning(
330
331
  "Reading memory with overlapping variables: %s. Ignoring all but the first one.", existing_variables
331
332
  )
332
333
 
333
- if existing_variables:
334
- offset, variable = next(iter(existing_variables))
334
+ if existing_vars_list:
335
+ offset, variable = existing_vars_list[0]
335
336
  self.variable_manager[self.func_addr].read_from(variable, offset, self._codeloc_from_state(state))
336
337
 
337
338
  def _hook_memory_write(self, state):
@@ -725,7 +725,7 @@ class SimCC:
725
725
  """
726
726
  session = self.ArgSession(self)
727
727
  if self.return_in_implicit_outparam(ret_ty):
728
- self.next_arg(session, SimTypePointer(SimTypeBottom()))
728
+ self.next_arg(session, SimTypePointer(SimTypeBottom()).with_arch(self.arch))
729
729
  return session
730
730
 
731
731
  def return_in_implicit_outparam(self, ty) -> bool: # pylint:disable=unused-argument
@@ -762,7 +762,7 @@ class SimCC:
762
762
  assert self.RETURN_VAL is not None
763
763
  ptr_loc = self.RETURN_VAL
764
764
  else:
765
- ptr_loc = self.next_arg(self.ArgSession(self), SimTypePointer(SimTypeBottom()))
765
+ ptr_loc = self.next_arg(self.ArgSession(self), SimTypePointer(SimTypeBottom()).with_arch(self.arch))
766
766
  return SimReferenceArgument(
767
767
  ptr_loc, SimStackArg(0, ty.size // self.arch.byte_width, is_fp=isinstance(ty, SimTypeFloat))
768
768
  )
@@ -1445,7 +1445,7 @@ class SimCCMicrosoftAMD64(SimCC):
1445
1445
  size = subty.size
1446
1446
  if chosen is None:
1447
1447
  # fallback to void*
1448
- chosen = SimTypePointer(SimTypeBottom())
1448
+ chosen = SimTypePointer(SimTypeBottom()).with_arch(self.arch)
1449
1449
  return self.return_val(chosen, perspective_returned=perspective_returned)
1450
1450
 
1451
1451
  if not isinstance(ty, SimStruct):
angr/engines/hook.py CHANGED
@@ -49,7 +49,7 @@ class HooksMixin(SuccessorsEngine, ProcedureMixin):
49
49
 
50
50
  return None
51
51
 
52
- def process_successors(self, successors, procedure=None, **kwargs):
52
+ def process_successors(self, successors, *, procedure=None, **kwargs):
53
53
  state = self.state
54
54
  if procedure is None:
55
55
  procedure = self._lookup_hook(state, procedure)
angr/engines/icicle.py ADDED
@@ -0,0 +1,229 @@
1
+ """icicle.py: An angr engine that uses Icicle to execute code."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from dataclasses import dataclass
8
+
9
+ import claripy
10
+ import pypcode
11
+ from archinfo import Arch, Endness
12
+
13
+ from angr.engines.failure import SimEngineFailure
14
+ from angr.engines.hook import HooksMixin
15
+ from angr.engines.successors import SuccessorsEngine
16
+ from angr.engines.syscall import SimEngineSyscall
17
+ from angr.rustylib.icicle import Icicle, VmExit, ExceptionCode
18
+ from angr.sim_state import SimState
19
+
20
+ log = logging.getLogger(__name__)
21
+
22
+
23
+ PROCESSORS_DIR = os.path.join(os.path.dirname(pypcode.__file__), "processors")
24
+
25
+
26
+ @dataclass
27
+ class IcicleStateTranslationData:
28
+ """
29
+ Represents the saved information needed to convert an Icicle state back
30
+ to an angr state.
31
+ """
32
+
33
+ base_state: SimState
34
+ registers: set[str]
35
+ writable_pages: set[int]
36
+
37
+
38
+ class IcicleEngine(SuccessorsEngine):
39
+ """
40
+ An angr engine that uses Icicle to execute concrete states. The purpose of
41
+ this implementation is to provide a high-performance concrete execution
42
+ engine in angr. While historically, angr has focused on symbolic execution,
43
+ better support for concrete execution enables new use cases such as fuzzing
44
+ in angr. This is ideal for testing bespoke binary targets, such as
45
+ microcontroller firmware, which may be difficult to correctly harness for
46
+ use with traditional fuzzing engines.
47
+
48
+ This class is the base class for the Icicle engine. It implements execution
49
+ by creating an Icicle instance, copying the state from angr to Icicle, and then
50
+ running the Icicle instance. The results are then copied back to the angr
51
+ state. It is likely the case that this can be improved by re-using the Icicle
52
+ instance across multiple runs and only copying the state when necessary.
53
+
54
+ For a more complete implementation, use the UberIcicleEngine class, which
55
+ intends to provide a more complete set of features, such as hooks and syscalls.
56
+ """
57
+
58
+ @staticmethod
59
+ def __make_icicle_arch(arch: Arch) -> str | None:
60
+ """
61
+ Convert an angr architecture to an Icicle architecture. Not particularly
62
+ accurate, just a set of heuristics to get the right architecture. When
63
+ adding a new architecture, this function may need to be updated.
64
+ """
65
+ if arch.linux_name == "arm":
66
+ return "armv7a" if arch.memory_endness == Endness.LE else "armeb"
67
+ return arch.linux_name
68
+
69
+ @staticmethod
70
+ def __is_arm(icicle_arch: str) -> bool:
71
+ """
72
+ Check if the architecture is arm based on the address.
73
+ """
74
+ return icicle_arch.startswith(("arm", "thumb"))
75
+
76
+ @staticmethod
77
+ def __is_thumb(icicle_arch: str, addr: int) -> bool:
78
+ """
79
+ Check if the architecture is thumb based on the address.
80
+ """
81
+ return IcicleEngine.__is_arm(icicle_arch) and addr & 1 == 1
82
+
83
+ @staticmethod
84
+ def __get_pages(state: SimState) -> set[int]:
85
+ """
86
+ Unfortunately, the memory model doesn't have a way to get all pages.
87
+ Instead, we can get all of the backers from the loader, then all of the
88
+ pages from the PagedMemoryMixin and then do some math.
89
+ """
90
+ pages = set()
91
+ page_size = state.memory.page_size
92
+
93
+ # pages from loader segments
94
+ proj = state.project
95
+ if proj is not None:
96
+ for addr, backer in proj.loader.memory.backers():
97
+ start = addr // page_size
98
+ end = (addr + len(backer) - 1) // page_size
99
+ pages.update(range(start, end + 1))
100
+
101
+ # pages from the memory model
102
+ pages.update(state.memory._pages)
103
+
104
+ return pages
105
+
106
+ @staticmethod
107
+ def __convert_angr_state_to_icicle(state: SimState) -> tuple[Icicle, IcicleStateTranslationData]:
108
+ icicle_arch = IcicleEngine.__make_icicle_arch(state.arch)
109
+ if icicle_arch is None:
110
+ raise ValueError("Unsupported architecture")
111
+
112
+ proj = state.project
113
+ if proj is None:
114
+ raise ValueError("IcicleEngine requires a project to be set")
115
+
116
+ emu = Icicle(icicle_arch, PROCESSORS_DIR)
117
+
118
+ copied_registers = set()
119
+
120
+ # To create a state in Icicle, we need to do the following:
121
+ # 1. Copy the register values
122
+ for register in state.arch.register_list:
123
+ register = register.vex_name.lower() if register.vex_name is not None else register.name
124
+ try:
125
+ emu.reg_write(register, state.solver.eval(state.registers.load(register), cast_to=int))
126
+ copied_registers.add(register)
127
+ except KeyError:
128
+ log.debug("Register %s not found in icicle", register)
129
+
130
+ # Unset the thumb bit if necessary
131
+ if IcicleEngine.__is_thumb(icicle_arch, state.addr):
132
+ emu.pc = state.addr & ~1
133
+ emu.isa_mode = 1
134
+ elif "arm" in icicle_arch: # Hack to work around us calling it r15t
135
+ emu.pc = state.addr
136
+
137
+ # Special case for x86 gs register
138
+ if state.arch.name == "X86":
139
+ emu.reg_write("GS_OFFSET", state.registers.load("gs").concrete_value << 16)
140
+
141
+ # 2. Copy the memory contents
142
+
143
+ mapped_pages = IcicleEngine.__get_pages(state)
144
+ writable_pages = set()
145
+ for page_num in mapped_pages:
146
+ addr = page_num * state.memory.page_size
147
+ size = state.memory.page_size
148
+ perm_bits = state.memory.permissions(addr).concrete_value
149
+ emu.mem_map(addr, size, perm_bits)
150
+ memory = state.memory.concrete_load(addr, size)
151
+ emu.mem_write(addr, memory)
152
+
153
+ if perm_bits & 2:
154
+ writable_pages.add(page_num)
155
+
156
+ # Add breakpoints for simprocedures
157
+ for addr in proj._sim_procedures:
158
+ emu.add_breakpoint(addr)
159
+
160
+ translation_data = IcicleStateTranslationData(
161
+ base_state=state,
162
+ registers=copied_registers,
163
+ writable_pages=writable_pages,
164
+ )
165
+
166
+ return (emu, translation_data)
167
+
168
+ @staticmethod
169
+ def __convert_icicle_state_to_angr(emu: Icicle, translation_data: IcicleStateTranslationData) -> SimState:
170
+ state = translation_data.base_state.copy()
171
+
172
+ # 1. Copy the register values
173
+ for register in translation_data.registers:
174
+ state.registers.store(register, emu.reg_read(register))
175
+
176
+ if IcicleEngine.__is_arm(emu.architecture): # Hack to work around us calling it r15t
177
+ state.registers.store("pc", (emu.pc | 1) if emu.isa_mode == 1 else emu.pc)
178
+
179
+ # 2. Copy the memory contents
180
+ for page_num in translation_data.writable_pages:
181
+ addr = page_num * state.memory.page_size
182
+ state.memory.store(addr, emu.mem_read(addr, state.memory.page_size))
183
+
184
+ return state
185
+
186
+ def process_successors(self, successors, *, num_inst=0, **kwargs):
187
+ if len(kwargs) > 0:
188
+ log.warning("IcicleEngine.process_successors received unknown kwargs: %s", kwargs)
189
+
190
+ emu, translation_data = self.__convert_angr_state_to_icicle(self.state)
191
+
192
+ if num_inst > 0:
193
+ emu.icount_limit = num_inst
194
+
195
+ status = emu.run() # pylint: ignore=assignment-from-no-return (pylint bug)
196
+ exc = emu.exception_code
197
+
198
+ if status == VmExit.UnhandledException:
199
+ if exc in (
200
+ ExceptionCode.ReadUnmapped,
201
+ ExceptionCode.ReadPerm,
202
+ ExceptionCode.WriteUnmapped,
203
+ ExceptionCode.WritePerm,
204
+ ExceptionCode.ExecViolation,
205
+ ):
206
+ jumpkind = "Ijk_SigSEGV"
207
+ elif exc == ExceptionCode.Syscall:
208
+ jumpkind = "Ijk_Syscall"
209
+ elif exc == ExceptionCode.Halt:
210
+ jumpkind = "Ijk_Exit"
211
+ elif exc == ExceptionCode.InvalidInstruction:
212
+ jumpkind = "Ijk_NoDecode"
213
+ else:
214
+ jumpkind = "Ijk_EmFail"
215
+ else:
216
+ jumpkind = "Ijk_Boring"
217
+
218
+ successor_state = IcicleEngine.__convert_icicle_state_to_angr(emu, translation_data)
219
+ successors.add_successor(successor_state, successor_state.ip, claripy.true(), jumpkind, add_guard=False)
220
+
221
+ successors.processed = True
222
+
223
+
224
+ class UberIcicleEngine(SimEngineFailure, SimEngineSyscall, HooksMixin, IcicleEngine):
225
+ """
226
+ An extension of the IcicleEngine that uses mixins to add support for
227
+ syscalls and hooks. Most users will prefer to use this engine instead of the
228
+ IcicleEngine directly.
229
+ """
@@ -4,13 +4,10 @@ from collections.abc import Callable, Iterable
4
4
 
5
5
  import claripy
6
6
  from claripy.ast.bv import BV
7
+ from pypcode import OpCode
7
8
 
8
9
  from angr.errors import AngrError
9
10
 
10
- try:
11
- from pypcode import OpCode
12
- except ImportError:
13
- OpCode = None
14
11
 
15
12
  # pylint:disable=abstract-method
16
13
 
@@ -3,6 +3,7 @@ import logging
3
3
 
4
4
  import claripy
5
5
  from claripy.ast.bv import BV
6
+ from pypcode import OpCode, Varnode, PcodeOp
6
7
 
7
8
  from angr.engines.engine import SimEngine
8
9
  from angr.utils.constants import DEFAULT_STATEMENT
@@ -10,10 +11,6 @@ from .lifter import IRSB
10
11
  from .behavior import OpBehavior
11
12
  from angr.errors import AngrError
12
13
  from angr.state_plugins.inspect import BP_BEFORE, BP_AFTER
13
- import contextlib
14
-
15
- with contextlib.suppress(ImportError):
16
- from pypcode import OpCode, Varnode, PcodeOp
17
14
 
18
15
 
19
16
  l = logging.getLogger(__name__)
@@ -11,8 +11,9 @@ from typing import Any, TYPE_CHECKING
11
11
  from collections.abc import Iterable, Sequence
12
12
 
13
13
  import archinfo
14
- from archinfo import ArchARM, ArchPcode
15
14
  import cle
15
+ import pypcode
16
+ from archinfo import ArchARM, ArchPcode
16
17
  from cachetools import LRUCache
17
18
 
18
19
  # FIXME: Reusing these errors from pyvex for compatibility. Eventually these
@@ -28,16 +29,7 @@ from angr.errors import SimEngineError, SimTranslationError, SimError
28
29
  from angr import sim_options as o
29
30
  from angr.block import DisassemblerBlock, DisassemblerInsn
30
31
 
31
-
32
- try:
33
- import pypcode
34
- except ImportError:
35
- pypcode = None
36
-
37
-
38
32
  if TYPE_CHECKING:
39
- # this is to make pyright happy; otherwise it believes pypcode is None
40
- import pypcode
41
33
  from pypcode import PcodeOp, Context
42
34
 
43
35
 
@@ -982,10 +982,10 @@ class SimIROp:
982
982
  return self._fp_vector_comparison(claripy.fpEQ, a0, a1)
983
983
 
984
984
  def _op_fgeneric_CmpLE(self, a0, a1):
985
- return self._fp_vector_comparison(claripy.fpLT, a0, a1)
985
+ return self._fp_vector_comparison(claripy.fpLEQ, a0, a1)
986
986
 
987
987
  def _op_fgeneric_CmpLT(self, a0, a1):
988
- return self._fp_vector_comparison(claripy.fpLEQ, a0, a1)
988
+ return self._fp_vector_comparison(claripy.fpLT, a0, a1)
989
989
 
990
990
  def _auto_vectorize(self, f, args, rm=None, rm_passed=False):
991
991
  if rm is not None:
@@ -206,14 +206,17 @@ class Function(Serializable):
206
206
  if is_plt is not None:
207
207
  self.is_plt = is_plt
208
208
  else:
209
- # Whether this function is a PLT entry or not is primarily relying on the PLT detection in CLE; it may also
210
- # be updated (to True) during CFG recovery.
211
- if self.project is None:
212
- raise ValueError(
213
- "'is_plt' must be specified if you do not specify a function manager for this new function."
214
- )
215
-
216
- self.is_plt = self.project.loader.find_plt_stub_name(addr) is not None
209
+ if self._function_manager is not None:
210
+ # use the faster cached version
211
+ self.is_plt = self._function_manager.is_plt_cached(addr)
212
+ else:
213
+ # Whether this function is a PLT entry or not is primarily relying on the PLT detection in CLE; it may
214
+ # also be updated (to True) during CFG recovery.
215
+ if self.project is None:
216
+ raise ValueError(
217
+ "'is_plt' must be specified if you do not specify a function manager for this new function."
218
+ )
219
+ self.is_plt = self.project.loader.find_plt_stub_name(addr) is not None
217
220
 
218
221
  # Determine the name of this function
219
222
  if name is None:
@@ -726,8 +729,13 @@ class Function(Serializable):
726
729
  if hooker is not None:
727
730
  binary_name = hooker.library_name
728
731
 
729
- if binary_name is None and self.binary is not None and self.binary.binary:
730
- binary_name = os.path.basename(self.binary.binary)
732
+ if binary_name is None:
733
+ if self._function_manager is not None:
734
+ # use the faster cached version
735
+ binary_name = self._function_manager.get_binary_name_cached(self.addr)
736
+ else:
737
+ if self.binary is not None and self.binary.binary:
738
+ binary_name = os.path.basename(self.binary.binary)
731
739
 
732
740
  return binary_name
733
741
 
@@ -7,11 +7,14 @@ import logging
7
7
  import collections.abc
8
8
  import re
9
9
  import weakref
10
+ import bisect
11
+ import os
10
12
  from sortedcontainers import SortedDict
11
13
 
12
14
  import networkx
13
15
 
14
16
  from archinfo.arch_soot import SootMethodDescriptor
17
+ import cle
15
18
 
16
19
  from angr.errors import SimEngineError
17
20
  from angr.knowledge_plugins.plugin import KnowledgeBasePlugin
@@ -49,7 +52,8 @@ class FunctionDict(SortedDict):
49
52
  t = Function(self._backref, addr)
50
53
  with contextlib.suppress(Exception):
51
54
  self[addr] = t
52
- self._backref._function_added(t)
55
+ if self._backref is not None:
56
+ self._backref._function_added(t)
53
57
  return t
54
58
 
55
59
  def get(self, addr):
@@ -85,7 +89,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
85
89
  super().__init__(kb=kb)
86
90
  self.function_address_types = self._kb._project.arch.function_address_types
87
91
  self.address_types = self._kb._project.arch.address_types
88
- self._function_map: dict[int, Function] = FunctionDict(self, key_types=self.function_address_types)
92
+ self._function_map: FunctionDict[int, Function] = FunctionDict(self, key_types=self.function_address_types)
89
93
  self.function_addrs_set: set = set()
90
94
  self.callgraph = networkx.MultiDiGraph()
91
95
  self.block_map = {}
@@ -93,6 +97,12 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
93
97
  # Registers used for passing arguments around
94
98
  self._arg_registers = kb._project.arch.argument_registers
95
99
 
100
+ # local PLT dictionary cache
101
+ self._rplt_cache_ranges: None | list[tuple[int, int]] = None
102
+ self._rplt_cache: None | set[int] = None
103
+ # local binary name cache: min_addr -> (max_addr, binary_name)
104
+ self._binname_cache: None | SortedDict[int, tuple[int, str | None]] = None
105
+
96
106
  def __setstate__(self, state):
97
107
  self._kb = state["_kb"]
98
108
  self.function_address_types = state["function_address_types"]
@@ -101,7 +111,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
101
111
  self.callgraph = state["callgraph"]
102
112
  self.block_map = state["block_map"]
103
113
 
104
- self._function_map._backref = self
114
+ self._function_map._backref = weakref.proxy(self)
105
115
  for func in self._function_map.values():
106
116
  func._function_manager = self
107
117
 
@@ -131,18 +141,71 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
131
141
  self.callgraph = networkx.MultiDiGraph()
132
142
  self.block_map.clear()
133
143
  self.function_addrs_set = set()
144
+ # cache
145
+ self._rplt_cache = None
146
+ self._rplt_cache_ranges = None
147
+ self._binname_cache = None
134
148
 
135
- def _genenare_callmap_sif(self, filepath):
149
+ def _genenate_callmap_sif(self, filepath):
136
150
  """
137
151
  Generate a sif file from the call map.
138
152
 
139
153
  :param filepath: Path of the sif file
140
154
  :return: None
141
155
  """
142
- with open(filepath, "wb") as f:
156
+ with open(filepath, "w", encoding="utf-8") as f:
143
157
  for src, dst in self.callgraph.edges():
144
158
  f.write(f"{src:#x}\tDirectEdge\t{dst:#x}\n")
145
159
 
160
+ def _addr_in_plt_cached_ranges(self, addr: int) -> bool:
161
+ if self._rplt_cache_ranges is None:
162
+ return False
163
+ pos = bisect.bisect_left(self._rplt_cache_ranges, addr, key=lambda x: x[0])
164
+ return pos > 0 and self._rplt_cache_ranges[pos - 1][0] <= addr < self._rplt_cache_ranges[pos - 1][1]
165
+
166
+ def is_plt_cached(self, addr: int) -> bool:
167
+ # check if the addr is in the cache range
168
+ if not self._addr_in_plt_cached_ranges(addr):
169
+ # find the object containing this addr
170
+ obj = self._kb._project.loader.find_object_containing(addr, membership_check=False)
171
+ if obj is None:
172
+ return False
173
+ if self._rplt_cache_ranges is None:
174
+ self._rplt_cache_ranges = []
175
+ obj_range = obj.min_addr, obj.max_addr
176
+ idx = bisect.bisect_left(self._rplt_cache_ranges, obj_range)
177
+ if not (idx < len(self._rplt_cache_ranges) and self._rplt_cache_ranges[idx] == obj_range):
178
+ self._rplt_cache_ranges.insert(idx, obj_range)
179
+ if isinstance(obj, cle.MetaELF):
180
+ if self._rplt_cache is None:
181
+ self._rplt_cache = set()
182
+ self._rplt_cache |= set(obj.reverse_plt)
183
+
184
+ return addr in self._rplt_cache if self._rplt_cache is not None else False
185
+
186
+ def _binname_cache_get_addr_base(self, addr: int) -> int | None:
187
+ if self._binname_cache is None:
188
+ return None
189
+ try:
190
+ base_addr = next(self._binname_cache.irange(maximum=addr, reverse=True))
191
+ except StopIteration:
192
+ return None
193
+ return base_addr if base_addr <= addr < self._binname_cache[base_addr][0] else None
194
+
195
+ def get_binary_name_cached(self, addr: int) -> str | None:
196
+ base_addr = self._binname_cache_get_addr_base(addr)
197
+ if base_addr is None:
198
+ # not cached; cache it first
199
+ obj = self._kb._project.loader.find_object_containing(addr, membership_check=False)
200
+ if obj is None:
201
+ return None
202
+ if self._binname_cache is None:
203
+ self._binname_cache = SortedDict()
204
+ binary_basename = os.path.basename(obj.binary) if obj.binary else None
205
+ self._binname_cache[obj.min_addr] = obj.max_addr, binary_basename
206
+ base_addr = obj.min_addr
207
+ return self._binname_cache[base_addr][1] if self._binname_cache is not None else None
208
+
146
209
  def _add_node(self, function_addr, node, syscall=None, size=None):
147
210
  if isinstance(node, self.address_types):
148
211
  node = self._kb._project.factory.snippet(node, size=size)
@@ -582,7 +582,7 @@ class VariableManagerInternal(Serializable):
582
582
  return phi
583
583
 
584
584
  # allocate a new phi variable
585
- repre = next(iter(variables))
585
+ repre = sorted(variables, key=lambda val: val.key)[0]
586
586
  repre_type = type(repre)
587
587
  repre_size = max(var.size for var in variables)
588
588
  if repre_type is SimRegisterVariable:
@@ -635,7 +635,13 @@ class VariableManagerInternal(Serializable):
635
635
  return loc in self._variable_to_stmt[variable]
636
636
 
637
637
  def find_variable_by_stmt(self, block_addr, stmt_idx, sort, block_idx: int | None = None):
638
- return next(iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort, block_idx=block_idx)), None)
638
+ variables = sorted(
639
+ self.find_variables_by_stmt(block_addr, stmt_idx, sort, block_idx=block_idx),
640
+ key=lambda var: (var[1], var[0].key),
641
+ )
642
+ if variables:
643
+ return variables[0]
644
+ return None
639
645
 
640
646
  def find_variables_by_stmt(
641
647
  self, block_addr: int, stmt_idx: int, sort: str, block_idx: int | None = None
@@ -667,7 +673,13 @@ class VariableManagerInternal(Serializable):
667
673
  return var_and_offsets
668
674
 
669
675
  def find_variable_by_atom(self, block_addr, stmt_idx, atom, block_idx: int | None = None):
670
- return next(iter(self.find_variables_by_atom(block_addr, stmt_idx, atom, block_idx=block_idx)), None)
676
+ variables = sorted(
677
+ self.find_variables_by_atom(block_addr, stmt_idx, atom, block_idx=block_idx),
678
+ key=lambda val: (val[1], val[0].key),
679
+ )
680
+ if variables:
681
+ return variables[0]
682
+ return None
671
683
 
672
684
  def find_variables_by_atom(
673
685
  self, block_addr, stmt_idx, atom, block_idx: int | None = None
Binary file
Binary file