angr 9.2.146__py3-none-manylinux2014_aarch64.whl → 9.2.148__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (66) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/analysis.py +3 -11
  3. angr/analyses/bindiff.py +343 -68
  4. angr/analyses/calling_convention/fact_collector.py +5 -4
  5. angr/analyses/calling_convention/utils.py +1 -0
  6. angr/analyses/cfg/cfg_arch_options.py +10 -0
  7. angr/analyses/cfg/cfg_base.py +42 -74
  8. angr/analyses/cfg/cfg_emulated.py +12 -12
  9. angr/analyses/cfg/cfg_fast.py +39 -20
  10. angr/analyses/cfg/cfg_fast_soot.py +3 -3
  11. angr/analyses/decompiler/callsite_maker.py +28 -18
  12. angr/analyses/decompiler/clinic.py +4 -4
  13. angr/analyses/decompiler/condition_processor.py +0 -21
  14. angr/analyses/decompiler/counters/call_counter.py +3 -0
  15. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
  16. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +14 -0
  17. angr/analyses/decompiler/structured_codegen/c.py +5 -5
  18. angr/analyses/decompiler/structuring/phoenix.py +11 -3
  19. angr/analyses/deobfuscator/api_obf_finder.py +5 -1
  20. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +1 -1
  21. angr/analyses/flirt/__init__.py +47 -0
  22. angr/analyses/flirt/consts.py +160 -0
  23. angr/analyses/{flirt.py → flirt/flirt.py} +99 -38
  24. angr/analyses/flirt/flirt_function.py +20 -0
  25. angr/analyses/flirt/flirt_matcher.py +351 -0
  26. angr/analyses/flirt/flirt_module.py +32 -0
  27. angr/analyses/flirt/flirt_node.py +23 -0
  28. angr/analyses/flirt/flirt_sig.py +356 -0
  29. angr/analyses/flirt/flirt_utils.py +31 -0
  30. angr/analyses/forward_analysis/visitors/graph.py +0 -8
  31. angr/analyses/identifier/runner.py +1 -1
  32. angr/analyses/reaching_definitions/function_handler.py +4 -4
  33. angr/analyses/reassembler.py +1 -1
  34. angr/analyses/stack_pointer_tracker.py +35 -1
  35. angr/analyses/static_hooker.py +11 -9
  36. angr/analyses/variable_recovery/engine_ail.py +8 -8
  37. angr/analyses/variable_recovery/engine_base.py +2 -0
  38. angr/block.py +6 -6
  39. angr/calling_conventions.py +74 -23
  40. angr/engines/vex/heavy/concretizers.py +10 -0
  41. angr/exploration_techniques/director.py +1 -1
  42. angr/flirt/__init__.py +15 -44
  43. angr/knowledge_plugins/functions/function.py +42 -39
  44. angr/knowledge_plugins/functions/function_manager.py +9 -0
  45. angr/knowledge_plugins/functions/function_parser.py +9 -1
  46. angr/knowledge_plugins/functions/soot_function.py +1 -1
  47. angr/knowledge_plugins/key_definitions/key_definition_manager.py +1 -1
  48. angr/procedures/definitions/__init__.py +14 -11
  49. angr/procedures/stubs/format_parser.py +1 -1
  50. angr/project.py +23 -29
  51. angr/protos/cfg_pb2.py +14 -25
  52. angr/protos/function_pb2.py +11 -22
  53. angr/protos/primitives_pb2.py +36 -47
  54. angr/protos/variables_pb2.py +28 -39
  55. angr/protos/xrefs_pb2.py +8 -19
  56. angr/sim_type.py +0 -16
  57. angr/simos/cgc.py +1 -1
  58. angr/simos/linux.py +5 -5
  59. angr/simos/windows.py +5 -5
  60. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +1 -1
  61. {angr-9.2.146.dist-info → angr-9.2.148.dist-info}/METADATA +8 -8
  62. {angr-9.2.146.dist-info → angr-9.2.148.dist-info}/RECORD +66 -58
  63. {angr-9.2.146.dist-info → angr-9.2.148.dist-info}/WHEEL +1 -1
  64. {angr-9.2.146.dist-info → angr-9.2.148.dist-info}/entry_points.txt +0 -0
  65. {angr-9.2.146.dist-info → angr-9.2.148.dist-info/licenses}/LICENSE +0 -0
  66. {angr-9.2.146.dist-info → angr-9.2.148.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
1
  # pylint:disable=line-too-long,missing-class-docstring,no-self-use
2
2
  from __future__ import annotations
3
3
  import logging
4
- from typing import cast
4
+ from typing import Generic, cast, TypeVar
5
5
 
6
- from collections.abc import Iterable, Sequence
6
+ from collections.abc import Iterable
7
7
  from collections import defaultdict
8
8
  import contextlib
9
9
 
@@ -39,6 +39,8 @@ from .state_plugins.sim_action_object import SimActionObject
39
39
  l = logging.getLogger(name=__name__)
40
40
  l.addFilter(UniqueLogFilter())
41
41
 
42
+ T = TypeVar("T", bound="SimFunctionArgument")
43
+
42
44
 
43
45
  class PointerWrapper:
44
46
  def __init__(self, value, buffer=False):
@@ -386,12 +388,12 @@ class SimStackArg(SimFunctionArgument):
386
388
  return SimStackArg(self.stack_offset + offset, size, is_fp)
387
389
 
388
390
 
389
- class SimComboArg(SimFunctionArgument):
391
+ class SimComboArg(SimFunctionArgument, Generic[T]):
390
392
  """
391
393
  An argument which spans multiple storage locations. Locations should be given least-significant first.
392
394
  """
393
395
 
394
- def __init__(self, locations, is_fp=False):
396
+ def __init__(self, locations: list[T], is_fp=False):
395
397
  super().__init__(sum(x.size for x in locations), is_fp=is_fp)
396
398
  self.locations = locations
397
399
 
@@ -449,6 +451,45 @@ class SimStructArg(SimFunctionArgument):
449
451
 
450
452
  return others
451
453
 
454
+ def get_single_footprint(self) -> SimStackArg | SimRegArg | SimComboArg:
455
+ if self.struct._arch is None:
456
+ raise TypeError("Can't tell the size of a struct without an arch")
457
+ stack_min = None
458
+ stack_max = None
459
+ regs = []
460
+ for field in self.struct.fields:
461
+ loc = self.locs[field]
462
+ if isinstance(loc, SimStackArg):
463
+ if stack_min is None or stack_max is None:
464
+ stack_min = loc.stack_offset
465
+ stack_max = loc.stack_offset
466
+ else:
467
+ # sanity check that arguments are laid out in order...
468
+ assert loc.stack_offset >= stack_max
469
+ stack_max = loc.stack_offset + loc.size
470
+ elif isinstance(loc, SimRegArg):
471
+ regs.append(loc)
472
+ else:
473
+ assert False, "Why would a struct have layout elements other than stack and reg?"
474
+
475
+ # things to consider...
476
+ # what happens if we return the concat of two registers but there's slack space missing?
477
+ # an example of this would be big-endian struct { long a; int b; }
478
+ # do any CCs do this??
479
+ # for now assume no
480
+
481
+ if stack_min is not None:
482
+ if regs:
483
+ assert (
484
+ False
485
+ ), "Unknown CC argument passing structure - why are we passing both regs and stack at the same time?"
486
+ return SimStackArg(stack_min, self.struct.size // self.struct._arch.byte_width)
487
+ if not regs:
488
+ assert False, "huh??????"
489
+ if len(regs) == 1:
490
+ return regs[0]
491
+ return SimComboArg(regs)
492
+
452
493
  def get_value(self, state, **kwargs):
453
494
  return SimStructValue(
454
495
  self.struct, {field: getter.get_value(state, **kwargs) for field, getter in self.locs.items()}
@@ -486,7 +527,7 @@ class SimReferenceArgument(SimFunctionArgument):
486
527
  zero on the stack. It will be passed ``stack_base=ptr_loc.get_value(state)``
487
528
  """
488
529
 
489
- def __init__(self, ptr_loc, main_loc):
530
+ def __init__(self, ptr_loc: SimFunctionArgument, main_loc: SimFunctionArgument):
490
531
  super().__init__(ptr_loc.size) # ???
491
532
  self.ptr_loc = ptr_loc
492
533
  self.main_loc = main_loc
@@ -700,6 +741,7 @@ class SimCC:
700
741
  )
701
742
  if self.return_in_implicit_outparam(ty):
702
743
  if perspective_returned:
744
+ assert self.RETURN_VAL is not None
703
745
  ptr_loc = self.RETURN_VAL
704
746
  else:
705
747
  ptr_loc = self.next_arg(self.ArgSession(self), SimTypePointer(SimTypeBottom()))
@@ -713,6 +755,7 @@ class SimCC:
713
755
  if self.RETURN_VAL is None or isinstance(ty, SimTypeBottom):
714
756
  return None
715
757
  if ty.size > self.RETURN_VAL.size * self.arch.byte_width:
758
+ assert self.OVERFLOW_RETURN_VAL is not None
716
759
  return SimComboArg([self.RETURN_VAL, self.OVERFLOW_RETURN_VAL])
717
760
  return self.RETURN_VAL.refine(size=ty.size // self.arch.byte_width, arch=self.arch, is_fp=False)
718
761
 
@@ -991,7 +1034,8 @@ class SimCC:
991
1034
  else:
992
1035
  raise TypeError("PointerWrapper(buffer=True) can only be used with a bitvector or a bytestring")
993
1036
  else:
994
- child_type = SimTypeArray(ty.pts_to) if type(arg.value) in (str, bytes, list) else ty.pts_to
1037
+ sub = ty.pts_to if isinstance(ty, SimTypePointer) else ty.refs
1038
+ child_type = SimTypeArray(sub) if isinstance(arg.value, (str, bytes, list)) else sub
995
1039
  try:
996
1040
  real_value = SimCC._standardize_value(arg.value, child_type, state, alloc)
997
1041
  except TypeError as e: # this is a dangerous catch...
@@ -1003,32 +1047,34 @@ class SimCC:
1003
1047
 
1004
1048
  if isinstance(arg, (str, bytes)):
1005
1049
  # sanitize the argument and request standardization again with SimTypeArray
1006
- if type(arg) is str:
1050
+ if isinstance(arg, str):
1007
1051
  arg = arg.encode()
1008
1052
  arg += b"\0"
1009
1053
  if isinstance(ty, SimTypePointer) and isinstance(ty.pts_to, SimTypeChar):
1010
1054
  pass
1011
- elif isinstance(ty, SimTypeFixedSizeArray) and isinstance(ty.elem_type, SimTypeChar):
1012
- if len(arg) > ty.length:
1013
- raise TypeError(f"String {arg!r} is too long for {ty}")
1014
- arg = arg.ljust(ty.length, b"\0")
1015
- elif isinstance(ty, SimTypeArray) and isinstance(ty.elem_type, SimTypeChar):
1055
+ elif (isinstance(ty, SimTypeFixedSizeArray) and isinstance(ty.elem_type, SimTypeChar)) or (
1056
+ isinstance(ty, SimTypeArray) and isinstance(ty.elem_type, SimTypeChar)
1057
+ ):
1016
1058
  if ty.length is not None:
1017
1059
  if len(arg) > ty.length:
1018
1060
  raise TypeError(f"String {arg!r} is too long for {ty}")
1019
1061
  arg = arg.ljust(ty.length, b"\0")
1020
1062
  elif isinstance(ty, SimTypeString):
1021
- if len(arg) > ty.length + 1:
1022
- raise TypeError(f"String {arg!r} is too long for {ty}")
1023
- arg = arg.ljust(ty.length + 1, b"\0")
1063
+ if ty.length is not None:
1064
+ if len(arg) > ty.length + 1:
1065
+ raise TypeError(f"String {arg!r} is too long for {ty}")
1066
+ arg = arg.ljust(ty.length + 1, b"\0")
1024
1067
  else:
1025
1068
  raise TypeError(f"Type mismatch: Expected {ty}, got char*")
1026
1069
  return SimCC._standardize_value(list(arg), SimTypeArray(SimTypeChar(), len(arg)), state, alloc)
1027
1070
 
1028
1071
  if isinstance(arg, list):
1029
- if isinstance(ty, (SimTypePointer, SimTypeReference)):
1072
+ if isinstance(ty, SimTypePointer):
1030
1073
  ref = True
1031
1074
  subty = ty.pts_to
1075
+ elif isinstance(ty, SimTypeReference):
1076
+ ref = True
1077
+ subty = ty.refs
1032
1078
  elif isinstance(ty, SimTypeArray):
1033
1079
  ref = True
1034
1080
  subty = ty.elem_type
@@ -1045,7 +1091,7 @@ class SimCC:
1045
1091
  if isinstance(arg, (tuple, dict, SimStructValue)):
1046
1092
  if not isinstance(ty, SimStruct):
1047
1093
  raise TypeError(f"Type mismatch: Expected {ty}, got {type(arg)} (i.e. struct)")
1048
- if type(arg) is not SimStructValue:
1094
+ if not isinstance(arg, SimStructValue):
1049
1095
  if len(arg) != len(ty.fields):
1050
1096
  raise TypeError(f"Wrong number of fields in struct, expected {len(ty.fields)} got {len(arg)}")
1051
1097
  arg = SimStructValue(ty, arg)
@@ -1075,14 +1121,16 @@ class SimCC:
1075
1121
  raise TypeError(f"Type mismatch: expected {ty}, got {arg.sort}")
1076
1122
  return arg
1077
1123
  if isinstance(ty, (SimTypeReg, SimTypeNum)):
1078
- return arg.val_to_bv(ty.size, ty.signed)
1124
+ return arg.val_to_bv(ty.size, ty.signed if isinstance(ty, SimTypeNum) else False)
1079
1125
  raise TypeError(f"Type mismatch: expected {ty}, got {arg.sort}")
1080
1126
 
1081
1127
  if isinstance(arg, claripy.ast.BV):
1082
1128
  if isinstance(ty, (SimTypeReg, SimTypeNum)):
1083
1129
  if len(arg) != ty.size:
1084
1130
  if arg.concrete:
1085
- return claripy.BVV(arg.concrete_value, ty.size)
1131
+ size = ty.size
1132
+ assert size is not None
1133
+ return claripy.BVV(arg.concrete_value, size)
1086
1134
  raise TypeError(f"Type mismatch of symbolic data: expected {ty}, got {len(arg)} bits")
1087
1135
  return arg
1088
1136
  if isinstance(ty, (SimTypeFloat)):
@@ -1101,7 +1149,7 @@ class SimCC:
1101
1149
  return isinstance(other, self.__class__)
1102
1150
 
1103
1151
  @classmethod
1104
- def _match(cls, arch, args: list, sp_delta):
1152
+ def _match(cls, arch, args: list[SimRegArg | SimStackArg], sp_delta):
1105
1153
  if (
1106
1154
  cls.arches() is not None and ":" not in arch.name and not isinstance(arch, cls.arches())
1107
1155
  ): # pylint:disable=isinstance-second-argument-not-valid-type
@@ -1139,13 +1187,16 @@ class SimCC:
1139
1187
  @classmethod
1140
1188
  def _guess_arg_count(cls, args, limit: int = 64) -> int:
1141
1189
  # pylint:disable=not-callable
1190
+ assert cls.ARCH is not None
1142
1191
  stack_args = [a for a in args if isinstance(a, SimStackArg)]
1143
- stack_arg_count = (max(a.stack_offset for a in stack_args) // cls.ARCH().bytes + 1) if stack_args else 0
1192
+ stack_arg_count = (
1193
+ (max(a.stack_offset for a in stack_args) // cls.ARCH(archinfo.Endness.LE).bytes + 1) if stack_args else 0
1194
+ )
1144
1195
  return min(limit, max(len(args), stack_arg_count))
1145
1196
 
1146
1197
  @staticmethod
1147
1198
  def find_cc(
1148
- arch: archinfo.Arch, args: Sequence[SimFunctionArgument], sp_delta: int, platform: str = "Linux"
1199
+ arch: archinfo.Arch, args: list[SimRegArg | SimStackArg], sp_delta: int, platform: str = "Linux"
1149
1200
  ) -> SimCC | None:
1150
1201
  """
1151
1202
  Pinpoint the best-fit calling convention and return the corresponding SimCC instance, or None if no fit is
@@ -1229,7 +1280,7 @@ class SimCCUsercall(SimCC):
1229
1280
  def next_arg(self, session, arg_type):
1230
1281
  return next(session.real_args)
1231
1282
 
1232
- def return_val(self, ty, **kwargs):
1283
+ def return_val(self, ty, **kwargs): # pylint: disable=unused-argument
1233
1284
  return self.ret_loc
1234
1285
 
1235
1286
 
@@ -160,6 +160,15 @@ def concretize_fscale(state, args):
160
160
  return claripy.FPV(arg_x * math.pow(2, arg_y), claripy.FSORT_DOUBLE)
161
161
 
162
162
 
163
+ def concretize_fsqrt32(state, args):
164
+ # Concretize floating point square root. Z3 does support square root but unsure if that includes floating point
165
+ arg_1 = state.solver.eval(args[1])
166
+ if arg_1 < 0 or math.isnan(arg_1):
167
+ return claripy.FPV(math.nan, claripy.FSORT_FLOAT)
168
+
169
+ return claripy.FPV(math.sqrt(arg_1), claripy.FSORT_FLOAT)
170
+
171
+
163
172
  def concretize_fsqrt(state, args):
164
173
  # Concretize floating point square root. Z3 does support square root but unsure if that includes floating point
165
174
  arg_1 = state.solver.eval(args[1])
@@ -365,6 +374,7 @@ concretizers = {
365
374
  "Iop_Yl2xF64": concretize_yl2x,
366
375
  "Iop_ScaleF64": concretize_fscale,
367
376
  "Iop_2xm1F64": concretize_2xm1,
377
+ "Iop_SqrtF32": concretize_fsqrt32,
368
378
  "Iop_SqrtF64": concretize_fsqrt,
369
379
  "Iop_CosF64": concretize_trig_cos,
370
380
  "Iop_SinF64": concretize_trig_sin,
@@ -71,7 +71,7 @@ class BaseGoal:
71
71
 
72
72
  block_id = cfg._generate_block_id(call_stack_suffix, state.addr, is_syscall)
73
73
 
74
- return cfg.get_node(block_id)
74
+ return cfg.model.get_node(block_id)
75
75
 
76
76
  @staticmethod
77
77
  def _dfs_edges(graph, source, max_steps=None):
angr/flirt/__init__.py CHANGED
@@ -6,44 +6,16 @@ import json
6
6
  from collections import defaultdict
7
7
  import logging
8
8
 
9
- import nampa
9
+ from angr.analyses.flirt import (
10
+ FlirtSignature,
11
+ FlirtSignatureParsed,
12
+ FlirtSignatureError,
13
+ flirt_arch_to_arch_name,
14
+ flirt_os_type_to_os_name,
15
+ )
10
16
 
11
- _l = logging.getLogger(__name__)
12
-
13
-
14
- class FlirtSignature:
15
- """
16
- This class describes a FLIRT signature.
17
- """
18
17
 
19
- def __init__(
20
- self,
21
- arch: str,
22
- platform: str,
23
- sig_name: str,
24
- sig_path: str,
25
- unique_strings: set[str] | None = None,
26
- compiler: str | None = None,
27
- compiler_version: str | None = None,
28
- os_name: str | None = None,
29
- os_version: str | None = None,
30
- ):
31
- self.arch = arch
32
- self.platform = platform
33
- self.sig_name = sig_name
34
- self.sig_path = sig_path
35
- self.unique_strings = unique_strings
36
- self.compiler = compiler
37
- self.compiler_version = compiler_version
38
- self.os_name = os_name
39
- self.os_version = os_version
40
-
41
- def __repr__(self):
42
- if self.os_name:
43
- if self.os_version:
44
- return f"<{self.sig_name}@{self.arch}-{self.os_name}-{self.os_version}>"
45
- return f"<{self.sig_name}@{self.arch}-{self.os_name}>"
46
- return f"<{self.sig_name}@{self.arch}-{self.platform}>"
18
+ _l = logging.getLogger(__name__)
47
19
 
48
20
 
49
21
  FS = FlirtSignature
@@ -72,8 +44,8 @@ def load_signatures(path: str) -> None:
72
44
  sig_path = os.path.join(root, filename)
73
45
  try:
74
46
  with open(sig_path, "rb") as f:
75
- flirt_header = nampa.flirt.parse_header(f)
76
- except nampa.flirt.FlirtException:
47
+ sig_parsed = FlirtSignatureParsed.parse(f)
48
+ except FlirtSignatureError:
77
49
  _l.warning("Failed to load FLIRT signature file %s.", sig_path)
78
50
  continue
79
51
 
@@ -84,8 +56,8 @@ def load_signatures(path: str) -> None:
84
56
  with open(meta_path) as f:
85
57
  meta = json.load(f)
86
58
 
87
- arch = meta.get("arch", None)
88
- platform = meta.get("platform", None)
59
+ arch = str(meta.get("arch", "Unknown"))
60
+ platform = str(meta.get("platform", "UnknownOS"))
89
61
  os_name = meta.get("os", None)
90
62
  os_version = meta.get("os_version", None)
91
63
  compiler = meta.get("compiler", None)
@@ -94,9 +66,8 @@ def load_signatures(path: str) -> None:
94
66
 
95
67
  else:
96
68
  # nope... we need to extract information from the signature file
97
- # TODO: Convert them to angr-specific strings
98
- arch = flirt_header.arch
99
- platform = flirt_header.os_types
69
+ arch = flirt_arch_to_arch_name(sig_parsed.arch, sig_parsed.app_types)
70
+ platform = flirt_os_type_to_os_name(sig_parsed.os_types)
100
71
  os_name = None
101
72
  os_version = None
102
73
  unique_strings = None
@@ -106,7 +77,7 @@ def load_signatures(path: str) -> None:
106
77
  signature = FlirtSignature(
107
78
  arch,
108
79
  platform,
109
- flirt_header.library_name.decode("utf-8"),
80
+ sig_parsed.libname,
110
81
  sig_path,
111
82
  unique_strings=unique_strings,
112
83
  compiler=compiler,
@@ -23,7 +23,6 @@ from angr.procedures import SIM_LIBRARIES
23
23
  from angr.procedures.definitions import SimSyscallLibrary
24
24
  from angr.protos import function_pb2
25
25
  from angr.calling_conventions import DEFAULT_CC, default_cc
26
- from angr.misc.ux import deprecated
27
26
  from angr.sim_type import SimTypeFunction, parse_defns
28
27
  from angr.calling_conventions import SimCC
29
28
  from angr.project import Project
@@ -92,6 +91,10 @@ class Function(Serializable):
92
91
  is_plt: bool | None = None,
93
92
  returning=None,
94
93
  alignment=False,
94
+ calling_convention: SimCC | None = None,
95
+ prototype: SimTypeFunction | None = None,
96
+ prototype_libname: str | None = None,
97
+ is_prototype_guessed: bool = True,
95
98
  ):
96
99
  """
97
100
  Function constructor. If the optional parameters are not provided, they will be automatically determined upon
@@ -139,11 +142,11 @@ class Function(Serializable):
139
142
  self.retaddr_on_stack = False
140
143
  self.sp_delta = 0
141
144
  # Calling convention
142
- self.calling_convention: SimCC | None = None
145
+ self.calling_convention = calling_convention
143
146
  # Function prototype
144
- self.prototype: SimTypeFunction | None = None
145
- self.prototype_libname: str | None = None
146
- self.is_prototype_guessed: bool = True
147
+ self.prototype = prototype
148
+ self.prototype_libname = prototype_libname
149
+ self.is_prototype_guessed = is_prototype_guessed
147
150
  # Whether this function returns or not. `None` means it's not determined yet
148
151
  self._returning = None
149
152
 
@@ -218,7 +221,7 @@ class Function(Serializable):
218
221
  self.is_default_name = False
219
222
  self._name = name
220
223
  self.previous_names = []
221
- self.from_signature = None
224
+ self.from_signature: str | None = None
222
225
 
223
226
  # Determine the name the binary where this function is.
224
227
  if binary_name is not None:
@@ -239,15 +242,6 @@ class Function(Serializable):
239
242
 
240
243
  self._init_prototype_and_calling_convention()
241
244
 
242
- @property
243
- @deprecated(".is_alignment")
244
- def alignment(self):
245
- return self.is_alignment
246
-
247
- @alignment.setter
248
- def alignment(self, value):
249
- self.is_alignment = value
250
-
251
245
  @property
252
246
  def name(self):
253
247
  return self._name
@@ -357,7 +351,8 @@ class Function(Serializable):
357
351
  # we know the size
358
352
  size = self._block_sizes[addr]
359
353
 
360
- block = self._project.factory.block(addr, size=size, byte_string=byte_string)
354
+ assert self.project is not None
355
+ block = self.project.factory.block(addr, size=size, byte_string=byte_string)
361
356
  if size is None:
362
357
  # update block_size dict
363
358
  self._block_sizes[addr] = block.size
@@ -460,18 +455,19 @@ class Function(Serializable):
460
455
  """
461
456
  constants = set()
462
457
 
463
- if not self._project.loader.main_object.contains_addr(self.addr):
458
+ assert self.project is not None
459
+ if not self.project.loader.main_object.contains_addr(self.addr):
464
460
  return constants
465
461
 
466
462
  # FIXME the old way was better for architectures like mips, but we need the initial irsb
467
463
  # reanalyze function with a new initial state (use persistent registers)
468
464
  # initial_state = self._function_manager._cfg.get_any_irsb(self.addr).initial_state
469
- # fresh_state = self._project.factory.blank_state(mode="fastpath")
465
+ # fresh_state = self.project.factory.blank_state(mode="fastpath")
470
466
  # for reg in initial_state.arch.persistent_regs + ['ip']:
471
467
  # fresh_state.registers.store(reg, initial_state.registers.load(reg))
472
468
 
473
469
  # reanalyze function with a new initial state
474
- fresh_state = self._project.factory.blank_state(mode="fastpath")
470
+ fresh_state = self.project.factory.blank_state(mode="fastpath")
475
471
  fresh_state.regs.ip = self.addr
476
472
 
477
473
  graph_addrs = {x.addr for x in self.graph.nodes() if isinstance(x, BlockNode)}
@@ -486,10 +482,10 @@ class Function(Serializable):
486
482
  if state.solver.eval(state.ip) not in graph_addrs:
487
483
  continue
488
484
  # don't trace into simprocedures
489
- if self._project.is_hooked(state.solver.eval(state.ip)):
485
+ if self.project.is_hooked(state.solver.eval(state.ip)):
490
486
  continue
491
487
  # don't trace outside of the binary
492
- if not self._project.loader.main_object.contains_addr(state.solver.eval(state.ip)):
488
+ if not self.project.loader.main_object.contains_addr(state.solver.eval(state.ip)):
493
489
  continue
494
490
  # don't trace unreachable blocks
495
491
  if state.history.jumpkind in {
@@ -506,7 +502,7 @@ class Function(Serializable):
506
502
  curr_ip = state.solver.eval(state.ip)
507
503
 
508
504
  # get runtime values from logs of successors
509
- successors = self._project.factory.successors(state)
505
+ successors = self.project.factory.successors(state)
510
506
  for succ in successors.flat_successors + successors.unsat_successors:
511
507
  for a in succ.history.recent_actions:
512
508
  for ao in a.all_objects:
@@ -562,7 +558,7 @@ class Function(Serializable):
562
558
  f" SP difference: {self.sp_delta}\n"
563
559
  f" Has return: {self.has_return}\n"
564
560
  f" Returning: {'Unknown' if self.returning is None else self.returning}\n"
565
- f" Alignment: {self.alignment}\n"
561
+ f" Alignment: {self.is_alignment}\n"
566
562
  f" Arguments: reg: {self._argument_registers}, stack: {self._argument_stack_variables}\n"
567
563
  f" Blocks: [{', '.join(f'{i:#x}' for i in self.block_addrs)}]\n"
568
564
  f" Cyclomatic Complexity: {self.cyclomatic_complexity}\n"
@@ -612,7 +608,7 @@ class Function(Serializable):
612
608
 
613
609
  @property
614
610
  def size(self):
615
- return sum(b.size for b in self.blocks)
611
+ return sum(self._block_sizes[addr] for addr in self._local_blocks)
616
612
 
617
613
  @property
618
614
  def binary(self):
@@ -620,8 +616,8 @@ class Function(Serializable):
620
616
  Get the object this function belongs to.
621
617
  :return: The object this function belongs to.
622
618
  """
623
-
624
- return self._project.loader.find_object_containing(self.addr, membership_check=False)
619
+ assert self.project is not None
620
+ return self.project.loader.find_object_containing(self.addr, membership_check=False)
625
621
 
626
622
  @property
627
623
  def offset(self) -> int:
@@ -698,10 +694,12 @@ class Function(Serializable):
698
694
  project = self.project
699
695
  if project.is_hooked(addr):
700
696
  hooker = project.hooked_by(addr)
701
- name = hooker.display_name
697
+ if hooker is not None:
698
+ name = hooker.display_name
702
699
  elif project.simos.is_syscall_addr(addr):
703
700
  syscall_inst = project.simos.syscall_from_addr(addr)
704
- name = syscall_inst.display_name
701
+ if syscall_inst is not None:
702
+ name = syscall_inst.display_name
705
703
 
706
704
  # generate an IDA-style sub_X name
707
705
  if name is None:
@@ -1338,7 +1336,8 @@ class Function(Serializable):
1338
1336
 
1339
1337
  @property
1340
1338
  def callable(self):
1341
- return self._project.factory.callable(self.addr)
1339
+ assert self.project is not None
1340
+ return self.project.factory.callable(self.addr)
1342
1341
 
1343
1342
  def normalize(self):
1344
1343
  """
@@ -1349,6 +1348,7 @@ class Function(Serializable):
1349
1348
 
1350
1349
  :return: None
1351
1350
  """
1351
+ assert self.project is not None
1352
1352
 
1353
1353
  # let's put a check here
1354
1354
  if self.startpoint is None:
@@ -1377,8 +1377,8 @@ class Function(Serializable):
1377
1377
 
1378
1378
  # Break other nodes
1379
1379
  for n in other_nodes:
1380
- new_size = get_real_address_if_arm(self._project.arch, smallest_node.addr) - get_real_address_if_arm(
1381
- self._project.arch, n.addr
1380
+ new_size = get_real_address_if_arm(self.project.arch, smallest_node.addr) - get_real_address_if_arm(
1381
+ self.project.arch, n.addr
1382
1382
  )
1383
1383
  if new_size == 0:
1384
1384
  # This is the node that has the same size as the smallest one
@@ -1511,20 +1511,21 @@ class Function(Serializable):
1511
1511
  lib = SIM_LIBRARIES.get(binary_name, None)
1512
1512
  libraries = set()
1513
1513
  if lib is not None:
1514
- libraries.add(lib)
1514
+ libraries.update(lib)
1515
1515
 
1516
1516
  else:
1517
1517
  # try all libraries or all libraries that match the given library name hint
1518
1518
  libraries = set()
1519
- for lib_name, lib in SIM_LIBRARIES.items():
1519
+ for lib_name, libs in SIM_LIBRARIES.items():
1520
1520
  # TODO: Add support for syscall libraries. Note that syscall libraries have different function
1521
1521
  # prototypes for .has_prototype() and .get_prototype()...
1522
- if not isinstance(lib, SimSyscallLibrary):
1523
- if binary_name_hint:
1524
- if binary_name_hint.lower() in lib_name.lower():
1522
+ for lib in libs:
1523
+ if not isinstance(lib, SimSyscallLibrary):
1524
+ if binary_name_hint:
1525
+ if binary_name_hint.lower() in lib_name.lower():
1526
+ libraries.add(lib)
1527
+ else:
1525
1528
  libraries.add(lib)
1526
- else:
1527
- libraries.add(lib)
1528
1529
 
1529
1530
  if not libraries:
1530
1531
  return False
@@ -1597,6 +1598,7 @@ class Function(Serializable):
1597
1598
  ::<addr>::<name> when the function binary is an unnamed non-main object, or when multiple functions with
1598
1599
  the same name are defined in the function binary.
1599
1600
  """
1601
+ assert self.project is not None
1600
1602
  must_disambiguate_by_addr = self.binary is not self.project.loader.main_object and self.binary_name is None
1601
1603
 
1602
1604
  # If there are multiple functions with the same name in the same object, disambiguate by address
@@ -1615,6 +1617,7 @@ class Function(Serializable):
1615
1617
  return n + (display_name or self.name)
1616
1618
 
1617
1619
  def apply_definition(self, definition: str, calling_convention: SimCC | type[SimCC] | None = None) -> None:
1620
+ assert self.project is not None
1618
1621
  if not definition.endswith(";"):
1619
1622
  definition += ";"
1620
1623
  func_def = parse_defns(definition, arch=self.project.arch)
@@ -1677,7 +1680,7 @@ class Function(Serializable):
1677
1680
  func.calling_convention = self.calling_convention
1678
1681
  func.prototype = self.prototype
1679
1682
  func._returning = self._returning
1680
- func.alignment = self.is_alignment
1683
+ func.is_alignment = self.is_alignment
1681
1684
  func.startpoint = self.startpoint
1682
1685
  func._addr_to_block_node = self._addr_to_block_node.copy()
1683
1686
  func._block_sizes = self._block_sizes.copy()
@@ -505,6 +505,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
505
505
 
506
506
  def rebuild_callgraph(self):
507
507
  self.callgraph = networkx.MultiDiGraph()
508
+ cfg = self._kb.cfgs.get_most_accurate()
508
509
  for func_addr in self._function_map:
509
510
  self.callgraph.add_node(func_addr)
510
511
  for func in self._function_map.values():
@@ -512,6 +513,14 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
512
513
  for node in func.transition_graph.nodes():
513
514
  if isinstance(node, Function):
514
515
  self.callgraph.add_edge(func.addr, node.addr)
516
+ else:
517
+ cfgnode = cfg.get_any_node(node.addr)
518
+ if (
519
+ cfgnode is not None
520
+ and cfgnode.function_address is not None
521
+ and cfgnode.function_address != func.addr
522
+ ):
523
+ self.callgraph.add_edge(func.addr, cfgnode.function_address)
515
524
 
516
525
 
517
526
  KnowledgeBasePlugin.register_default("functions", FunctionManager)
@@ -36,6 +36,10 @@ class FunctionParser:
36
36
  obj.alignment = function.is_alignment
37
37
  obj.binary_name = function.binary_name or ""
38
38
  obj.normalized = function.normalized
39
+ obj.calling_convention = pickle.dumps(function.calling_convention)
40
+ obj.prototype = pickle.dumps(function.prototype)
41
+ obj.prototype_libname = (function.prototype_libname or "").encode()
42
+ obj.is_prototype_guessed = function.is_prototype_guessed
39
43
 
40
44
  # signature matched?
41
45
  if not function.from_signature:
@@ -107,6 +111,10 @@ class FunctionParser:
107
111
  returning=cmsg.returning,
108
112
  alignment=cmsg.alignment,
109
113
  binary_name=None if not cmsg.binary_name else cmsg.binary_name,
114
+ calling_convention=pickle.loads(cmsg.calling_convention),
115
+ prototype=pickle.loads(cmsg.prototype),
116
+ prototype_libname=cmsg.prototype_libname if cmsg.prototype_libname else None,
117
+ is_prototype_guessed=cmsg.is_prototype_guessed,
110
118
  )
111
119
  obj._project = project
112
120
  obj.normalized = cmsg.normalized
@@ -209,7 +217,7 @@ class FunctionParser:
209
217
  stmt_idx=stmt_idx,
210
218
  is_exception=edge_type == "exception",
211
219
  )
212
- elif edge_type == "call":
220
+ elif edge_type in ("call", "syscall"):
213
221
  # find the corresponding fake_ret edge
214
222
  fake_ret_edge = next(
215
223
  iter(edge_ for edge_ in fake_return_edges[src_addr] if edge_[1].addr == src.addr + src.size), None
@@ -83,7 +83,7 @@ class SootFunction(Function):
83
83
  # Whether this function returns or not. `None` means it's not determined yet
84
84
  self._returning = None
85
85
 
86
- self.alignment = None
86
+ self.is_alignment = None
87
87
 
88
88
  # Determine returning status for SimProcedures and Syscalls
89
89
  hooker = None
@@ -51,7 +51,7 @@ class KeyDefinitionManager(KnowledgeBasePlugin):
51
51
  if not self._kb.functions.contains_addr(func_addr):
52
52
  return None
53
53
  func = self._kb.functions[func_addr]
54
- if func.is_simprocedure or func.is_plt or func.alignment:
54
+ if func.is_simprocedure or func.is_plt or func.is_alignment:
55
55
  return None
56
56
  callsites = list(func.get_call_sites())
57
57
  if not callsites: