angr 9.2.147__py3-none-macosx_11_0_arm64.whl → 9.2.148__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (53) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/analysis.py +3 -11
  3. angr/analyses/calling_convention/fact_collector.py +5 -4
  4. angr/analyses/calling_convention/utils.py +1 -0
  5. angr/analyses/cfg/cfg_base.py +3 -59
  6. angr/analyses/cfg/cfg_emulated.py +12 -12
  7. angr/analyses/cfg/cfg_fast.py +20 -17
  8. angr/analyses/cfg/cfg_fast_soot.py +3 -3
  9. angr/analyses/decompiler/callsite_maker.py +28 -18
  10. angr/analyses/decompiler/clinic.py +4 -4
  11. angr/analyses/decompiler/condition_processor.py +0 -21
  12. angr/analyses/decompiler/counters/call_counter.py +3 -0
  13. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
  14. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +14 -0
  15. angr/analyses/decompiler/structured_codegen/c.py +5 -5
  16. angr/analyses/decompiler/structuring/phoenix.py +11 -3
  17. angr/analyses/deobfuscator/api_obf_finder.py +5 -1
  18. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +1 -1
  19. angr/analyses/forward_analysis/visitors/graph.py +0 -8
  20. angr/analyses/identifier/runner.py +1 -1
  21. angr/analyses/reaching_definitions/function_handler.py +4 -4
  22. angr/analyses/reassembler.py +1 -1
  23. angr/analyses/stack_pointer_tracker.py +1 -1
  24. angr/analyses/static_hooker.py +11 -9
  25. angr/analyses/variable_recovery/engine_ail.py +8 -8
  26. angr/analyses/variable_recovery/engine_base.py +2 -0
  27. angr/calling_conventions.py +74 -23
  28. angr/exploration_techniques/director.py +1 -1
  29. angr/knowledge_plugins/functions/function.py +41 -38
  30. angr/knowledge_plugins/functions/function_manager.py +9 -0
  31. angr/knowledge_plugins/functions/function_parser.py +9 -1
  32. angr/knowledge_plugins/functions/soot_function.py +1 -1
  33. angr/knowledge_plugins/key_definitions/key_definition_manager.py +1 -1
  34. angr/lib/angr_native.dylib +0 -0
  35. angr/procedures/definitions/__init__.py +14 -11
  36. angr/procedures/stubs/format_parser.py +1 -1
  37. angr/project.py +23 -29
  38. angr/protos/cfg_pb2.py +14 -25
  39. angr/protos/function_pb2.py +11 -22
  40. angr/protos/primitives_pb2.py +36 -47
  41. angr/protos/variables_pb2.py +28 -39
  42. angr/protos/xrefs_pb2.py +8 -19
  43. angr/sim_type.py +0 -16
  44. angr/simos/cgc.py +1 -1
  45. angr/simos/linux.py +5 -5
  46. angr/simos/windows.py +5 -5
  47. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +1 -1
  48. {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/METADATA +8 -7
  49. {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/RECORD +53 -53
  50. {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/WHEEL +1 -1
  51. {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/entry_points.txt +0 -0
  52. {angr-9.2.147.dist-info → angr-9.2.148.dist-info/licenses}/LICENSE +0 -0
  53. {angr-9.2.147.dist-info → angr-9.2.148.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,6 @@ from typing import TypeVar, Generic
3
3
  from collections.abc import Collection, Iterator
4
4
  from collections import defaultdict
5
5
 
6
- from angr.misc.ux import deprecated
7
6
  from angr.utils.algo import binary_insert
8
7
 
9
8
  NodeType = TypeVar("NodeType")
@@ -94,13 +93,6 @@ class GraphVisitor(Generic[NodeType]):
94
93
 
95
94
  return iter(self.sort_nodes())
96
95
 
97
- @deprecated(replacement="nodes")
98
- def nodes_iter(self):
99
- """
100
- (Deprecated) Return an iterator of nodes following an optimal traversal order. Will be removed in the future.
101
- """
102
- return self.nodes()
103
-
104
96
  # Traversal
105
97
 
106
98
  def reset(self):
@@ -29,7 +29,7 @@ assert len(FLAG_DATA) == 0x1000
29
29
  class Runner:
30
30
  def __init__(self, project, cfg):
31
31
  # this is kind of fucked up
32
- project.simos.syscall_library.update(SIM_LIBRARIES["cgcabi_tracer"])
32
+ project.simos.syscall_library.update(SIM_LIBRARIES["cgcabi_tracer"][0])
33
33
 
34
34
  self.project = project
35
35
  self.cfg = cfg
@@ -401,10 +401,10 @@ class FunctionHandler:
401
401
  )
402
402
  type_collections = []
403
403
  if prototype_libname is not None and prototype_libname in SIM_LIBRARIES:
404
- prototype_lib = SIM_LIBRARIES[prototype_libname]
405
- if prototype_lib.type_collection_names:
406
- for typelib_name in prototype_lib.type_collection_names:
407
- type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
404
+ for prototype_lib in SIM_LIBRARIES[prototype_libname]:
405
+ if prototype_lib.type_collection_names:
406
+ for typelib_name in prototype_lib.type_collection_names:
407
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
408
408
  if type_collections:
409
409
  prototype = dereference_simtype(data.prototype, type_collections).with_arch(state.arch)
410
410
  data.prototype = cast(SimTypeFunction, prototype)
@@ -2410,7 +2410,7 @@ class Reassembler(Analysis):
2410
2410
 
2411
2411
  # collect address of all instructions
2412
2412
  l.debug("Collecting instruction addresses...")
2413
- for cfg_node in self.cfg.nodes():
2413
+ for cfg_node in self.cfg.model.nodes():
2414
2414
  self.all_insn_addrs |= set(cfg_node.instruction_addrs)
2415
2415
 
2416
2416
  # Functions
@@ -520,7 +520,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
520
520
  # Setting register values to fresh ones will cause problems down the line when merging with normal
521
521
  # register values happen. therefore, we set their values to BOTTOM. these BOTTOMs will be replaced once
522
522
  # a merge with normal blocks happen.
523
- initial_regs = {r: BOTTOM for r in self.reg_offsets}
523
+ initial_regs = dict.fromkeys(self.reg_offsets, BOTTOM)
524
524
 
525
525
  return StackPointerTrackerState(
526
526
  regs=initial_regs, memory={}, is_tracking_memory=self.track_mem, resilient=self._resilient
@@ -21,7 +21,7 @@ class StaticHooker(Analysis):
21
21
  def __init__(self, library, binary=None):
22
22
  self.results = {}
23
23
  try:
24
- lib = SIM_LIBRARIES[library]
24
+ libs = SIM_LIBRARIES[library]
25
25
  except KeyError as err:
26
26
  raise AngrValueError(f"No such library {library}") from err
27
27
 
@@ -36,14 +36,16 @@ class StaticHooker(Analysis):
36
36
  l.debug("Skipping %s at %#x, already hooked", func.name, func.rebased_addr)
37
37
  continue
38
38
 
39
- if lib.has_implementation(func.name):
40
- proc = lib.get(func.name, self.project.arch)
41
- self.results[func.rebased_addr] = proc
42
- if self.project.is_hooked(func.rebased_addr):
43
- l.debug("Skipping %s at %#x, already hooked", func.name, func.rebased_addr)
44
- else:
45
- self.project.hook(func.rebased_addr, proc)
46
- l.info("Hooked %s at %#x", func.name, func.rebased_addr)
39
+ for lib in libs:
40
+ if lib.has_implementation(func.name):
41
+ proc = lib.get(func.name, self.project.arch)
42
+ self.results[func.rebased_addr] = proc
43
+ if self.project.is_hooked(func.rebased_addr):
44
+ l.debug("Skipping %s at %#x, already hooked", func.name, func.rebased_addr)
45
+ else:
46
+ self.project.hook(func.rebased_addr, proc)
47
+ l.info("Hooked %s at %#x", func.name, func.rebased_addr)
48
+ break
47
49
  else:
48
50
  l.debug("Failed to hook %s at %#x", func.name, func.rebased_addr)
49
51
 
@@ -169,10 +169,10 @@ class SimEngineVRAIL(
169
169
 
170
170
  type_collections = []
171
171
  if prototype_libname is not None:
172
- prototype_lib = SIM_LIBRARIES[prototype_libname]
173
- if prototype_lib.type_collection_names:
174
- for typelib_name in prototype_lib.type_collection_names:
175
- type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
172
+ for prototype_lib in SIM_LIBRARIES[prototype_libname]:
173
+ if prototype_lib.type_collection_names:
174
+ for typelib_name in prototype_lib.type_collection_names:
175
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
176
176
 
177
177
  for arg, arg_type in zip(args, prototype.args):
178
178
  if arg.typevar is not None:
@@ -262,10 +262,10 @@ class SimEngineVRAIL(
262
262
 
263
263
  type_collections = []
264
264
  if prototype_libname is not None:
265
- prototype_lib = SIM_LIBRARIES[prototype_libname]
266
- if prototype_lib.type_collection_names:
267
- for typelib_name in prototype_lib.type_collection_names:
268
- type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
265
+ for prototype_lib in SIM_LIBRARIES[prototype_libname]:
266
+ if prototype_lib.type_collection_names:
267
+ for typelib_name in prototype_lib.type_collection_names:
268
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
269
269
 
270
270
  for arg, arg_type in zip(args, prototype.args):
271
271
  if arg.typevar is not None:
@@ -417,6 +417,7 @@ class SimEngineVRBase(
417
417
  vvar.size,
418
418
  ident=self.state.variable_manager[self.func_addr].next_variable_ident("stack"),
419
419
  region=self.func_addr,
420
+ base="bp",
420
421
  )
421
422
  self.state.variable_manager[self.func_addr].add_variable("stack", vvar.stack_offset, variable)
422
423
  elif vvar.was_parameter:
@@ -1079,6 +1080,7 @@ class SimEngineVRBase(
1079
1080
  vvar.size,
1080
1081
  ident=self.state.variable_manager[self.func_addr].next_variable_ident("stack"),
1081
1082
  region=self.func_addr,
1083
+ base="bp",
1082
1084
  )
1083
1085
  value = self.state.annotate_with_variables(value, [(0, variable)])
1084
1086
  self.state.variable_manager[self.func_addr].add_variable("stack", vvar.stack_offset, variable)
@@ -1,9 +1,9 @@
1
1
  # pylint:disable=line-too-long,missing-class-docstring,no-self-use
2
2
  from __future__ import annotations
3
3
  import logging
4
- from typing import cast
4
+ from typing import Generic, cast, TypeVar
5
5
 
6
- from collections.abc import Iterable, Sequence
6
+ from collections.abc import Iterable
7
7
  from collections import defaultdict
8
8
  import contextlib
9
9
 
@@ -39,6 +39,8 @@ from .state_plugins.sim_action_object import SimActionObject
39
39
  l = logging.getLogger(name=__name__)
40
40
  l.addFilter(UniqueLogFilter())
41
41
 
42
+ T = TypeVar("T", bound="SimFunctionArgument")
43
+
42
44
 
43
45
  class PointerWrapper:
44
46
  def __init__(self, value, buffer=False):
@@ -386,12 +388,12 @@ class SimStackArg(SimFunctionArgument):
386
388
  return SimStackArg(self.stack_offset + offset, size, is_fp)
387
389
 
388
390
 
389
- class SimComboArg(SimFunctionArgument):
391
+ class SimComboArg(SimFunctionArgument, Generic[T]):
390
392
  """
391
393
  An argument which spans multiple storage locations. Locations should be given least-significant first.
392
394
  """
393
395
 
394
- def __init__(self, locations, is_fp=False):
396
+ def __init__(self, locations: list[T], is_fp=False):
395
397
  super().__init__(sum(x.size for x in locations), is_fp=is_fp)
396
398
  self.locations = locations
397
399
 
@@ -449,6 +451,45 @@ class SimStructArg(SimFunctionArgument):
449
451
 
450
452
  return others
451
453
 
454
+ def get_single_footprint(self) -> SimStackArg | SimRegArg | SimComboArg:
455
+ if self.struct._arch is None:
456
+ raise TypeError("Can't tell the size of a struct without an arch")
457
+ stack_min = None
458
+ stack_max = None
459
+ regs = []
460
+ for field in self.struct.fields:
461
+ loc = self.locs[field]
462
+ if isinstance(loc, SimStackArg):
463
+ if stack_min is None or stack_max is None:
464
+ stack_min = loc.stack_offset
465
+ stack_max = loc.stack_offset
466
+ else:
467
+ # sanity check that arguments are laid out in order...
468
+ assert loc.stack_offset >= stack_max
469
+ stack_max = loc.stack_offset + loc.size
470
+ elif isinstance(loc, SimRegArg):
471
+ regs.append(loc)
472
+ else:
473
+ assert False, "Why would a struct have layout elements other than stack and reg?"
474
+
475
+ # things to consider...
476
+ # what happens if we return the concat of two registers but there's slack space missing?
477
+ # an example of this would be big-endian struct { long a; int b; }
478
+ # do any CCs do this??
479
+ # for now assume no
480
+
481
+ if stack_min is not None:
482
+ if regs:
483
+ assert (
484
+ False
485
+ ), "Unknown CC argument passing structure - why are we passing both regs and stack at the same time?"
486
+ return SimStackArg(stack_min, self.struct.size // self.struct._arch.byte_width)
487
+ if not regs:
488
+ assert False, "huh??????"
489
+ if len(regs) == 1:
490
+ return regs[0]
491
+ return SimComboArg(regs)
492
+
452
493
  def get_value(self, state, **kwargs):
453
494
  return SimStructValue(
454
495
  self.struct, {field: getter.get_value(state, **kwargs) for field, getter in self.locs.items()}
@@ -486,7 +527,7 @@ class SimReferenceArgument(SimFunctionArgument):
486
527
  zero on the stack. It will be passed ``stack_base=ptr_loc.get_value(state)``
487
528
  """
488
529
 
489
- def __init__(self, ptr_loc, main_loc):
530
+ def __init__(self, ptr_loc: SimFunctionArgument, main_loc: SimFunctionArgument):
490
531
  super().__init__(ptr_loc.size) # ???
491
532
  self.ptr_loc = ptr_loc
492
533
  self.main_loc = main_loc
@@ -700,6 +741,7 @@ class SimCC:
700
741
  )
701
742
  if self.return_in_implicit_outparam(ty):
702
743
  if perspective_returned:
744
+ assert self.RETURN_VAL is not None
703
745
  ptr_loc = self.RETURN_VAL
704
746
  else:
705
747
  ptr_loc = self.next_arg(self.ArgSession(self), SimTypePointer(SimTypeBottom()))
@@ -713,6 +755,7 @@ class SimCC:
713
755
  if self.RETURN_VAL is None or isinstance(ty, SimTypeBottom):
714
756
  return None
715
757
  if ty.size > self.RETURN_VAL.size * self.arch.byte_width:
758
+ assert self.OVERFLOW_RETURN_VAL is not None
716
759
  return SimComboArg([self.RETURN_VAL, self.OVERFLOW_RETURN_VAL])
717
760
  return self.RETURN_VAL.refine(size=ty.size // self.arch.byte_width, arch=self.arch, is_fp=False)
718
761
 
@@ -991,7 +1034,8 @@ class SimCC:
991
1034
  else:
992
1035
  raise TypeError("PointerWrapper(buffer=True) can only be used with a bitvector or a bytestring")
993
1036
  else:
994
- child_type = SimTypeArray(ty.pts_to) if type(arg.value) in (str, bytes, list) else ty.pts_to
1037
+ sub = ty.pts_to if isinstance(ty, SimTypePointer) else ty.refs
1038
+ child_type = SimTypeArray(sub) if isinstance(arg.value, (str, bytes, list)) else sub
995
1039
  try:
996
1040
  real_value = SimCC._standardize_value(arg.value, child_type, state, alloc)
997
1041
  except TypeError as e: # this is a dangerous catch...
@@ -1003,32 +1047,34 @@ class SimCC:
1003
1047
 
1004
1048
  if isinstance(arg, (str, bytes)):
1005
1049
  # sanitize the argument and request standardization again with SimTypeArray
1006
- if type(arg) is str:
1050
+ if isinstance(arg, str):
1007
1051
  arg = arg.encode()
1008
1052
  arg += b"\0"
1009
1053
  if isinstance(ty, SimTypePointer) and isinstance(ty.pts_to, SimTypeChar):
1010
1054
  pass
1011
- elif isinstance(ty, SimTypeFixedSizeArray) and isinstance(ty.elem_type, SimTypeChar):
1012
- if len(arg) > ty.length:
1013
- raise TypeError(f"String {arg!r} is too long for {ty}")
1014
- arg = arg.ljust(ty.length, b"\0")
1015
- elif isinstance(ty, SimTypeArray) and isinstance(ty.elem_type, SimTypeChar):
1055
+ elif (isinstance(ty, SimTypeFixedSizeArray) and isinstance(ty.elem_type, SimTypeChar)) or (
1056
+ isinstance(ty, SimTypeArray) and isinstance(ty.elem_type, SimTypeChar)
1057
+ ):
1016
1058
  if ty.length is not None:
1017
1059
  if len(arg) > ty.length:
1018
1060
  raise TypeError(f"String {arg!r} is too long for {ty}")
1019
1061
  arg = arg.ljust(ty.length, b"\0")
1020
1062
  elif isinstance(ty, SimTypeString):
1021
- if len(arg) > ty.length + 1:
1022
- raise TypeError(f"String {arg!r} is too long for {ty}")
1023
- arg = arg.ljust(ty.length + 1, b"\0")
1063
+ if ty.length is not None:
1064
+ if len(arg) > ty.length + 1:
1065
+ raise TypeError(f"String {arg!r} is too long for {ty}")
1066
+ arg = arg.ljust(ty.length + 1, b"\0")
1024
1067
  else:
1025
1068
  raise TypeError(f"Type mismatch: Expected {ty}, got char*")
1026
1069
  return SimCC._standardize_value(list(arg), SimTypeArray(SimTypeChar(), len(arg)), state, alloc)
1027
1070
 
1028
1071
  if isinstance(arg, list):
1029
- if isinstance(ty, (SimTypePointer, SimTypeReference)):
1072
+ if isinstance(ty, SimTypePointer):
1030
1073
  ref = True
1031
1074
  subty = ty.pts_to
1075
+ elif isinstance(ty, SimTypeReference):
1076
+ ref = True
1077
+ subty = ty.refs
1032
1078
  elif isinstance(ty, SimTypeArray):
1033
1079
  ref = True
1034
1080
  subty = ty.elem_type
@@ -1045,7 +1091,7 @@ class SimCC:
1045
1091
  if isinstance(arg, (tuple, dict, SimStructValue)):
1046
1092
  if not isinstance(ty, SimStruct):
1047
1093
  raise TypeError(f"Type mismatch: Expected {ty}, got {type(arg)} (i.e. struct)")
1048
- if type(arg) is not SimStructValue:
1094
+ if not isinstance(arg, SimStructValue):
1049
1095
  if len(arg) != len(ty.fields):
1050
1096
  raise TypeError(f"Wrong number of fields in struct, expected {len(ty.fields)} got {len(arg)}")
1051
1097
  arg = SimStructValue(ty, arg)
@@ -1075,14 +1121,16 @@ class SimCC:
1075
1121
  raise TypeError(f"Type mismatch: expected {ty}, got {arg.sort}")
1076
1122
  return arg
1077
1123
  if isinstance(ty, (SimTypeReg, SimTypeNum)):
1078
- return arg.val_to_bv(ty.size, ty.signed)
1124
+ return arg.val_to_bv(ty.size, ty.signed if isinstance(ty, SimTypeNum) else False)
1079
1125
  raise TypeError(f"Type mismatch: expected {ty}, got {arg.sort}")
1080
1126
 
1081
1127
  if isinstance(arg, claripy.ast.BV):
1082
1128
  if isinstance(ty, (SimTypeReg, SimTypeNum)):
1083
1129
  if len(arg) != ty.size:
1084
1130
  if arg.concrete:
1085
- return claripy.BVV(arg.concrete_value, ty.size)
1131
+ size = ty.size
1132
+ assert size is not None
1133
+ return claripy.BVV(arg.concrete_value, size)
1086
1134
  raise TypeError(f"Type mismatch of symbolic data: expected {ty}, got {len(arg)} bits")
1087
1135
  return arg
1088
1136
  if isinstance(ty, (SimTypeFloat)):
@@ -1101,7 +1149,7 @@ class SimCC:
1101
1149
  return isinstance(other, self.__class__)
1102
1150
 
1103
1151
  @classmethod
1104
- def _match(cls, arch, args: list, sp_delta):
1152
+ def _match(cls, arch, args: list[SimRegArg | SimStackArg], sp_delta):
1105
1153
  if (
1106
1154
  cls.arches() is not None and ":" not in arch.name and not isinstance(arch, cls.arches())
1107
1155
  ): # pylint:disable=isinstance-second-argument-not-valid-type
@@ -1139,13 +1187,16 @@ class SimCC:
1139
1187
  @classmethod
1140
1188
  def _guess_arg_count(cls, args, limit: int = 64) -> int:
1141
1189
  # pylint:disable=not-callable
1190
+ assert cls.ARCH is not None
1142
1191
  stack_args = [a for a in args if isinstance(a, SimStackArg)]
1143
- stack_arg_count = (max(a.stack_offset for a in stack_args) // cls.ARCH().bytes + 1) if stack_args else 0
1192
+ stack_arg_count = (
1193
+ (max(a.stack_offset for a in stack_args) // cls.ARCH(archinfo.Endness.LE).bytes + 1) if stack_args else 0
1194
+ )
1144
1195
  return min(limit, max(len(args), stack_arg_count))
1145
1196
 
1146
1197
  @staticmethod
1147
1198
  def find_cc(
1148
- arch: archinfo.Arch, args: Sequence[SimFunctionArgument], sp_delta: int, platform: str = "Linux"
1199
+ arch: archinfo.Arch, args: list[SimRegArg | SimStackArg], sp_delta: int, platform: str = "Linux"
1149
1200
  ) -> SimCC | None:
1150
1201
  """
1151
1202
  Pinpoint the best-fit calling convention and return the corresponding SimCC instance, or None if no fit is
@@ -1229,7 +1280,7 @@ class SimCCUsercall(SimCC):
1229
1280
  def next_arg(self, session, arg_type):
1230
1281
  return next(session.real_args)
1231
1282
 
1232
- def return_val(self, ty, **kwargs):
1283
+ def return_val(self, ty, **kwargs): # pylint: disable=unused-argument
1233
1284
  return self.ret_loc
1234
1285
 
1235
1286
 
@@ -71,7 +71,7 @@ class BaseGoal:
71
71
 
72
72
  block_id = cfg._generate_block_id(call_stack_suffix, state.addr, is_syscall)
73
73
 
74
- return cfg.get_node(block_id)
74
+ return cfg.model.get_node(block_id)
75
75
 
76
76
  @staticmethod
77
77
  def _dfs_edges(graph, source, max_steps=None):
@@ -23,7 +23,6 @@ from angr.procedures import SIM_LIBRARIES
23
23
  from angr.procedures.definitions import SimSyscallLibrary
24
24
  from angr.protos import function_pb2
25
25
  from angr.calling_conventions import DEFAULT_CC, default_cc
26
- from angr.misc.ux import deprecated
27
26
  from angr.sim_type import SimTypeFunction, parse_defns
28
27
  from angr.calling_conventions import SimCC
29
28
  from angr.project import Project
@@ -92,6 +91,10 @@ class Function(Serializable):
92
91
  is_plt: bool | None = None,
93
92
  returning=None,
94
93
  alignment=False,
94
+ calling_convention: SimCC | None = None,
95
+ prototype: SimTypeFunction | None = None,
96
+ prototype_libname: str | None = None,
97
+ is_prototype_guessed: bool = True,
95
98
  ):
96
99
  """
97
100
  Function constructor. If the optional parameters are not provided, they will be automatically determined upon
@@ -139,11 +142,11 @@ class Function(Serializable):
139
142
  self.retaddr_on_stack = False
140
143
  self.sp_delta = 0
141
144
  # Calling convention
142
- self.calling_convention: SimCC | None = None
145
+ self.calling_convention = calling_convention
143
146
  # Function prototype
144
- self.prototype: SimTypeFunction | None = None
145
- self.prototype_libname: str | None = None
146
- self.is_prototype_guessed: bool = True
147
+ self.prototype = prototype
148
+ self.prototype_libname = prototype_libname
149
+ self.is_prototype_guessed = is_prototype_guessed
147
150
  # Whether this function returns or not. `None` means it's not determined yet
148
151
  self._returning = None
149
152
 
@@ -239,15 +242,6 @@ class Function(Serializable):
239
242
 
240
243
  self._init_prototype_and_calling_convention()
241
244
 
242
- @property
243
- @deprecated(".is_alignment")
244
- def alignment(self):
245
- return self.is_alignment
246
-
247
- @alignment.setter
248
- def alignment(self, value):
249
- self.is_alignment = value
250
-
251
245
  @property
252
246
  def name(self):
253
247
  return self._name
@@ -357,7 +351,8 @@ class Function(Serializable):
357
351
  # we know the size
358
352
  size = self._block_sizes[addr]
359
353
 
360
- block = self._project.factory.block(addr, size=size, byte_string=byte_string)
354
+ assert self.project is not None
355
+ block = self.project.factory.block(addr, size=size, byte_string=byte_string)
361
356
  if size is None:
362
357
  # update block_size dict
363
358
  self._block_sizes[addr] = block.size
@@ -460,18 +455,19 @@ class Function(Serializable):
460
455
  """
461
456
  constants = set()
462
457
 
463
- if not self._project.loader.main_object.contains_addr(self.addr):
458
+ assert self.project is not None
459
+ if not self.project.loader.main_object.contains_addr(self.addr):
464
460
  return constants
465
461
 
466
462
  # FIXME the old way was better for architectures like mips, but we need the initial irsb
467
463
  # reanalyze function with a new initial state (use persistent registers)
468
464
  # initial_state = self._function_manager._cfg.get_any_irsb(self.addr).initial_state
469
- # fresh_state = self._project.factory.blank_state(mode="fastpath")
465
+ # fresh_state = self.project.factory.blank_state(mode="fastpath")
470
466
  # for reg in initial_state.arch.persistent_regs + ['ip']:
471
467
  # fresh_state.registers.store(reg, initial_state.registers.load(reg))
472
468
 
473
469
  # reanalyze function with a new initial state
474
- fresh_state = self._project.factory.blank_state(mode="fastpath")
470
+ fresh_state = self.project.factory.blank_state(mode="fastpath")
475
471
  fresh_state.regs.ip = self.addr
476
472
 
477
473
  graph_addrs = {x.addr for x in self.graph.nodes() if isinstance(x, BlockNode)}
@@ -486,10 +482,10 @@ class Function(Serializable):
486
482
  if state.solver.eval(state.ip) not in graph_addrs:
487
483
  continue
488
484
  # don't trace into simprocedures
489
- if self._project.is_hooked(state.solver.eval(state.ip)):
485
+ if self.project.is_hooked(state.solver.eval(state.ip)):
490
486
  continue
491
487
  # don't trace outside of the binary
492
- if not self._project.loader.main_object.contains_addr(state.solver.eval(state.ip)):
488
+ if not self.project.loader.main_object.contains_addr(state.solver.eval(state.ip)):
493
489
  continue
494
490
  # don't trace unreachable blocks
495
491
  if state.history.jumpkind in {
@@ -506,7 +502,7 @@ class Function(Serializable):
506
502
  curr_ip = state.solver.eval(state.ip)
507
503
 
508
504
  # get runtime values from logs of successors
509
- successors = self._project.factory.successors(state)
505
+ successors = self.project.factory.successors(state)
510
506
  for succ in successors.flat_successors + successors.unsat_successors:
511
507
  for a in succ.history.recent_actions:
512
508
  for ao in a.all_objects:
@@ -562,7 +558,7 @@ class Function(Serializable):
562
558
  f" SP difference: {self.sp_delta}\n"
563
559
  f" Has return: {self.has_return}\n"
564
560
  f" Returning: {'Unknown' if self.returning is None else self.returning}\n"
565
- f" Alignment: {self.alignment}\n"
561
+ f" Alignment: {self.is_alignment}\n"
566
562
  f" Arguments: reg: {self._argument_registers}, stack: {self._argument_stack_variables}\n"
567
563
  f" Blocks: [{', '.join(f'{i:#x}' for i in self.block_addrs)}]\n"
568
564
  f" Cyclomatic Complexity: {self.cyclomatic_complexity}\n"
@@ -612,7 +608,7 @@ class Function(Serializable):
612
608
 
613
609
  @property
614
610
  def size(self):
615
- return sum(self._block_sizes.values())
611
+ return sum(self._block_sizes[addr] for addr in self._local_blocks)
616
612
 
617
613
  @property
618
614
  def binary(self):
@@ -620,8 +616,8 @@ class Function(Serializable):
620
616
  Get the object this function belongs to.
621
617
  :return: The object this function belongs to.
622
618
  """
623
-
624
- return self._project.loader.find_object_containing(self.addr, membership_check=False)
619
+ assert self.project is not None
620
+ return self.project.loader.find_object_containing(self.addr, membership_check=False)
625
621
 
626
622
  @property
627
623
  def offset(self) -> int:
@@ -698,10 +694,12 @@ class Function(Serializable):
698
694
  project = self.project
699
695
  if project.is_hooked(addr):
700
696
  hooker = project.hooked_by(addr)
701
- name = hooker.display_name
697
+ if hooker is not None:
698
+ name = hooker.display_name
702
699
  elif project.simos.is_syscall_addr(addr):
703
700
  syscall_inst = project.simos.syscall_from_addr(addr)
704
- name = syscall_inst.display_name
701
+ if syscall_inst is not None:
702
+ name = syscall_inst.display_name
705
703
 
706
704
  # generate an IDA-style sub_X name
707
705
  if name is None:
@@ -1338,7 +1336,8 @@ class Function(Serializable):
1338
1336
 
1339
1337
  @property
1340
1338
  def callable(self):
1341
- return self._project.factory.callable(self.addr)
1339
+ assert self.project is not None
1340
+ return self.project.factory.callable(self.addr)
1342
1341
 
1343
1342
  def normalize(self):
1344
1343
  """
@@ -1349,6 +1348,7 @@ class Function(Serializable):
1349
1348
 
1350
1349
  :return: None
1351
1350
  """
1351
+ assert self.project is not None
1352
1352
 
1353
1353
  # let's put a check here
1354
1354
  if self.startpoint is None:
@@ -1377,8 +1377,8 @@ class Function(Serializable):
1377
1377
 
1378
1378
  # Break other nodes
1379
1379
  for n in other_nodes:
1380
- new_size = get_real_address_if_arm(self._project.arch, smallest_node.addr) - get_real_address_if_arm(
1381
- self._project.arch, n.addr
1380
+ new_size = get_real_address_if_arm(self.project.arch, smallest_node.addr) - get_real_address_if_arm(
1381
+ self.project.arch, n.addr
1382
1382
  )
1383
1383
  if new_size == 0:
1384
1384
  # This is the node that has the same size as the smallest one
@@ -1511,20 +1511,21 @@ class Function(Serializable):
1511
1511
  lib = SIM_LIBRARIES.get(binary_name, None)
1512
1512
  libraries = set()
1513
1513
  if lib is not None:
1514
- libraries.add(lib)
1514
+ libraries.update(lib)
1515
1515
 
1516
1516
  else:
1517
1517
  # try all libraries or all libraries that match the given library name hint
1518
1518
  libraries = set()
1519
- for lib_name, lib in SIM_LIBRARIES.items():
1519
+ for lib_name, libs in SIM_LIBRARIES.items():
1520
1520
  # TODO: Add support for syscall libraries. Note that syscall libraries have different function
1521
1521
  # prototypes for .has_prototype() and .get_prototype()...
1522
- if not isinstance(lib, SimSyscallLibrary):
1523
- if binary_name_hint:
1524
- if binary_name_hint.lower() in lib_name.lower():
1522
+ for lib in libs:
1523
+ if not isinstance(lib, SimSyscallLibrary):
1524
+ if binary_name_hint:
1525
+ if binary_name_hint.lower() in lib_name.lower():
1526
+ libraries.add(lib)
1527
+ else:
1525
1528
  libraries.add(lib)
1526
- else:
1527
- libraries.add(lib)
1528
1529
 
1529
1530
  if not libraries:
1530
1531
  return False
@@ -1597,6 +1598,7 @@ class Function(Serializable):
1597
1598
  ::<addr>::<name> when the function binary is an unnamed non-main object, or when multiple functions with
1598
1599
  the same name are defined in the function binary.
1599
1600
  """
1601
+ assert self.project is not None
1600
1602
  must_disambiguate_by_addr = self.binary is not self.project.loader.main_object and self.binary_name is None
1601
1603
 
1602
1604
  # If there are multiple functions with the same name in the same object, disambiguate by address
@@ -1615,6 +1617,7 @@ class Function(Serializable):
1615
1617
  return n + (display_name or self.name)
1616
1618
 
1617
1619
  def apply_definition(self, definition: str, calling_convention: SimCC | type[SimCC] | None = None) -> None:
1620
+ assert self.project is not None
1618
1621
  if not definition.endswith(";"):
1619
1622
  definition += ";"
1620
1623
  func_def = parse_defns(definition, arch=self.project.arch)
@@ -1677,7 +1680,7 @@ class Function(Serializable):
1677
1680
  func.calling_convention = self.calling_convention
1678
1681
  func.prototype = self.prototype
1679
1682
  func._returning = self._returning
1680
- func.alignment = self.is_alignment
1683
+ func.is_alignment = self.is_alignment
1681
1684
  func.startpoint = self.startpoint
1682
1685
  func._addr_to_block_node = self._addr_to_block_node.copy()
1683
1686
  func._block_sizes = self._block_sizes.copy()
@@ -505,6 +505,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
505
505
 
506
506
  def rebuild_callgraph(self):
507
507
  self.callgraph = networkx.MultiDiGraph()
508
+ cfg = self._kb.cfgs.get_most_accurate()
508
509
  for func_addr in self._function_map:
509
510
  self.callgraph.add_node(func_addr)
510
511
  for func in self._function_map.values():
@@ -512,6 +513,14 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
512
513
  for node in func.transition_graph.nodes():
513
514
  if isinstance(node, Function):
514
515
  self.callgraph.add_edge(func.addr, node.addr)
516
+ else:
517
+ cfgnode = cfg.get_any_node(node.addr)
518
+ if (
519
+ cfgnode is not None
520
+ and cfgnode.function_address is not None
521
+ and cfgnode.function_address != func.addr
522
+ ):
523
+ self.callgraph.add_edge(func.addr, cfgnode.function_address)
515
524
 
516
525
 
517
526
  KnowledgeBasePlugin.register_default("functions", FunctionManager)