angr 9.2.147__py3-none-manylinux2014_aarch64.whl → 9.2.149__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (90) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/analysis.py +3 -11
  3. angr/analyses/calling_convention/calling_convention.py +42 -2
  4. angr/analyses/calling_convention/fact_collector.py +5 -4
  5. angr/analyses/calling_convention/utils.py +1 -0
  6. angr/analyses/cfg/cfg_base.py +3 -59
  7. angr/analyses/cfg/cfg_emulated.py +17 -14
  8. angr/analyses/cfg/cfg_fast.py +68 -63
  9. angr/analyses/cfg/cfg_fast_soot.py +3 -3
  10. angr/analyses/decompiler/ail_simplifier.py +65 -32
  11. angr/analyses/decompiler/block_simplifier.py +20 -6
  12. angr/analyses/decompiler/callsite_maker.py +28 -18
  13. angr/analyses/decompiler/clinic.py +84 -17
  14. angr/analyses/decompiler/condition_processor.py +0 -21
  15. angr/analyses/decompiler/counters/call_counter.py +3 -0
  16. angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
  17. angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
  18. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
  19. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
  21. angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
  22. angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
  23. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
  24. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
  25. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
  26. angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
  27. angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
  28. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  29. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
  30. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
  31. angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
  32. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
  33. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +14 -0
  34. angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
  35. angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
  36. angr/analyses/decompiler/presets/fast.py +2 -0
  37. angr/analyses/decompiler/presets/full.py +2 -0
  38. angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
  39. angr/analyses/decompiler/ssailification/ssailification.py +23 -3
  40. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
  41. angr/analyses/decompiler/structured_codegen/c.py +146 -15
  42. angr/analyses/decompiler/structuring/phoenix.py +11 -3
  43. angr/analyses/decompiler/utils.py +6 -1
  44. angr/analyses/deobfuscator/api_obf_finder.py +5 -1
  45. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +1 -1
  46. angr/analyses/forward_analysis/visitors/graph.py +0 -8
  47. angr/analyses/identifier/runner.py +1 -1
  48. angr/analyses/reaching_definitions/function_handler.py +4 -4
  49. angr/analyses/reassembler.py +1 -1
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
  51. angr/analyses/stack_pointer_tracker.py +1 -1
  52. angr/analyses/static_hooker.py +11 -9
  53. angr/analyses/typehoon/lifter.py +20 -0
  54. angr/analyses/typehoon/simple_solver.py +42 -9
  55. angr/analyses/typehoon/translator.py +4 -1
  56. angr/analyses/typehoon/typeconsts.py +17 -6
  57. angr/analyses/typehoon/typehoon.py +21 -5
  58. angr/analyses/variable_recovery/engine_ail.py +52 -13
  59. angr/analyses/variable_recovery/engine_base.py +37 -12
  60. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
  61. angr/calling_conventions.py +96 -27
  62. angr/engines/light/engine.py +7 -0
  63. angr/exploration_techniques/director.py +1 -1
  64. angr/knowledge_plugins/functions/function.py +109 -38
  65. angr/knowledge_plugins/functions/function_manager.py +9 -0
  66. angr/knowledge_plugins/functions/function_parser.py +9 -1
  67. angr/knowledge_plugins/functions/soot_function.py +1 -1
  68. angr/knowledge_plugins/key_definitions/key_definition_manager.py +1 -1
  69. angr/knowledge_plugins/propagations/states.py +5 -2
  70. angr/knowledge_plugins/variables/variable_manager.py +3 -3
  71. angr/procedures/definitions/__init__.py +15 -12
  72. angr/procedures/definitions/types_stl.py +22 -0
  73. angr/procedures/stubs/format_parser.py +1 -1
  74. angr/project.py +23 -29
  75. angr/protos/cfg_pb2.py +14 -25
  76. angr/protos/function_pb2.py +11 -22
  77. angr/protos/primitives_pb2.py +36 -47
  78. angr/protos/variables_pb2.py +28 -39
  79. angr/protos/xrefs_pb2.py +8 -19
  80. angr/sim_type.py +251 -146
  81. angr/simos/cgc.py +1 -1
  82. angr/simos/linux.py +5 -5
  83. angr/simos/windows.py +5 -5
  84. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +1 -1
  85. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/METADATA +9 -8
  86. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/RECORD +90 -84
  87. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
  88. {angr-9.2.147.dist-info → angr-9.2.149.dist-info/licenses}/LICENSE +3 -0
  89. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
  90. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
angr/sim_type.py CHANGED
@@ -11,12 +11,14 @@ from typing import Literal, Any, TYPE_CHECKING, cast, overload
11
11
 
12
12
  from archinfo import Endness, Arch
13
13
  import claripy
14
- import CppHeaderParser
14
+ import cxxheaderparser.simple
15
+ import cxxheaderparser.errors
16
+ import cxxheaderparser.types
15
17
  import pycparser
18
+ from pycparser import c_ast
16
19
 
17
20
  from angr.errors import AngrMissingTypeError, AngrTypeError
18
21
  from angr.sim_state import SimState
19
- from .misc.ux import deprecated
20
22
 
21
23
  if TYPE_CHECKING:
22
24
  from angr.procedures.definitions import SimTypeCollection
@@ -269,7 +271,7 @@ class SimTypeBottom(SimType):
269
271
  self, name=None, full=0, memo=None, indent=0, name_parens: bool = True
270
272
  ): # pylint: disable=unused-argument
271
273
  if name is None:
272
- return "int"
274
+ return "int" if self.label is None else self.label
273
275
  return f'{"int" if self.label is None else self.label} {name}'
274
276
 
275
277
  def _init_str(self):
@@ -502,7 +504,9 @@ class SimTypeFixedSizeInt(SimTypeInt):
502
504
  _base_name: str = "int"
503
505
  _fixed_size: int = 32
504
506
 
505
- def c_repr(self, name=None, full=0, memo=None, indent=0):
507
+ def c_repr(
508
+ self, name=None, full=0, memo=None, indent=0, name_parens: bool = True # pylint:disable=unused-argument
509
+ ):
506
510
  out = self._base_name
507
511
  if not self.signed:
508
512
  out = "u" + out
@@ -1216,10 +1220,12 @@ class SimTypeCppFunction(SimTypeFunction):
1216
1220
  arg_names: Iterable[str] | None = None,
1217
1221
  ctor: bool = False,
1218
1222
  dtor: bool = False,
1223
+ convention: str | None = None,
1219
1224
  ):
1220
1225
  super().__init__(args, returnty, label=label, arg_names=arg_names, variadic=False)
1221
1226
  self.ctor = ctor
1222
1227
  self.dtor = dtor
1228
+ self.convention = convention
1223
1229
 
1224
1230
  def __repr__(self):
1225
1231
  argstrs = [str(a) for a in self.args]
@@ -1237,6 +1243,19 @@ class SimTypeCppFunction(SimTypeFunction):
1237
1243
  ", variadic=True" if self.variadic else "",
1238
1244
  )
1239
1245
 
1246
+ def _with_arch(self, arch):
1247
+ out = SimTypeCppFunction(
1248
+ [a.with_arch(arch) for a in self.args],
1249
+ self.returnty.with_arch(arch) if self.returnty is not None else None,
1250
+ label=self.label,
1251
+ arg_names=self.arg_names,
1252
+ ctor=self.ctor,
1253
+ dtor=self.dtor,
1254
+ convention=self.convention,
1255
+ )
1256
+ out._arch = arch
1257
+ return out
1258
+
1240
1259
  def copy(self):
1241
1260
  return SimTypeCppFunction(
1242
1261
  self.args,
@@ -1245,6 +1264,7 @@ class SimTypeCppFunction(SimTypeFunction):
1245
1264
  arg_names=self.arg_names,
1246
1265
  ctor=self.ctor,
1247
1266
  dtor=self.dtor,
1267
+ convention=self.convention,
1248
1268
  )
1249
1269
 
1250
1270
 
@@ -1505,6 +1525,7 @@ class SimStruct(NamedTypeMixin, SimType):
1505
1525
  else:
1506
1526
  raise TypeError(f"Can't store struct of type {type(value)}")
1507
1527
 
1528
+ assert isinstance(value, dict)
1508
1529
  if len(value) != len(self.fields):
1509
1530
  raise ValueError(f"Passed bad values for {self}; expected {len(self.offsets)}, got {len(value)}")
1510
1531
 
@@ -1750,14 +1771,17 @@ class SimUnionValue:
1750
1771
  class SimCppClass(SimStruct):
1751
1772
  def __init__(
1752
1773
  self,
1774
+ *,
1775
+ unique_name: str | None = None,
1776
+ name: str | None = None,
1753
1777
  members: dict[str, SimType] | None = None,
1754
1778
  function_members: dict[str, SimTypeCppFunction] | None = None,
1755
1779
  vtable_ptrs=None,
1756
- name: str | None = None,
1757
1780
  pack: bool = False,
1758
1781
  align=None,
1759
1782
  ):
1760
1783
  super().__init__(members or {}, name=name, pack=pack, align=align)
1784
+ self.unique_name = unique_name
1761
1785
  # these are actually addresses in the binary
1762
1786
  self.function_members = function_members
1763
1787
  # this should also be added to the fields once we know the offsets of the members of this object
@@ -1767,8 +1791,12 @@ class SimCppClass(SimStruct):
1767
1791
  def members(self):
1768
1792
  return self.fields
1769
1793
 
1794
+ @members.setter
1795
+ def members(self, value):
1796
+ self.fields = value
1797
+
1770
1798
  def __repr__(self):
1771
- return f"class {self.name}"
1799
+ return f"class {self.name}" if not self.name.startswith("class") else self.name
1772
1800
 
1773
1801
  def extract(self, state, addr, concrete=False) -> SimCppClassValue:
1774
1802
  values = {}
@@ -1790,6 +1818,7 @@ class SimCppClass(SimStruct):
1790
1818
  else:
1791
1819
  raise TypeError(f"Can't store struct of type {type(value)}")
1792
1820
 
1821
+ assert isinstance(value, dict)
1793
1822
  if len(value) != len(self.fields):
1794
1823
  raise ValueError(f"Passed bad values for {self}; expected {len(self.offsets)}, got {len(value)}")
1795
1824
 
@@ -1797,10 +1826,43 @@ class SimCppClass(SimStruct):
1797
1826
  ty = self.fields[field]
1798
1827
  ty.store(state, addr + offset, value[field])
1799
1828
 
1829
+ def _with_arch(self, arch) -> SimCppClass:
1830
+ if arch.name in self._arch_memo:
1831
+ return self._arch_memo[arch.name]
1832
+
1833
+ out = SimCppClass(
1834
+ unique_name=self.unique_name,
1835
+ name=self.name,
1836
+ members={},
1837
+ function_members={},
1838
+ vtable_ptrs=self.vtable_ptrs,
1839
+ pack=self._pack,
1840
+ align=self._align,
1841
+ )
1842
+ out._arch = arch
1843
+ self._arch_memo[arch.name] = out
1844
+
1845
+ out.members = OrderedDict((k, v.with_arch(arch)) for k, v in self.members.items())
1846
+ out.function_members = (
1847
+ OrderedDict((k, v.with_arch(arch)) for k, v in self.function_members.items())
1848
+ if self.function_members is not None
1849
+ else None
1850
+ )
1851
+
1852
+ # Fixup the offsets to byte aligned addresses for all SimTypeNumOffset types
1853
+ offset_so_far = 0
1854
+ for _, ty in out.members.items():
1855
+ if isinstance(ty, SimTypeNumOffset):
1856
+ out._pack = True
1857
+ ty.offset = offset_so_far % arch.byte_width
1858
+ offset_so_far += ty.size
1859
+ return out
1860
+
1800
1861
  def copy(self):
1801
1862
  return SimCppClass(
1802
- dict(self.fields),
1863
+ unique_name=self.unique_name,
1803
1864
  name=self.name,
1865
+ members=dict(self.fields),
1804
1866
  pack=self._pack,
1805
1867
  align=self._align,
1806
1868
  function_members=self.function_members,
@@ -2042,10 +2104,11 @@ GLIBC_EXTERNAL_BASIC_TYPES = {
2042
2104
  }
2043
2105
  ALL_TYPES.update(GLIBC_EXTERNAL_BASIC_TYPES)
2044
2106
 
2045
-
2107
+ # TODO: switch to stl types declared in types_stl
2046
2108
  CXX_TYPES = {
2047
2109
  "string": SimTypeString(),
2048
2110
  "wstring": SimTypeWString(),
2111
+ "std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>": SimTypeString(),
2049
2112
  "basic_string": SimTypeString(),
2050
2113
  "CharT": SimTypeChar(),
2051
2114
  }
@@ -2997,21 +3060,6 @@ def _make_scope(predefined_types=None):
2997
3060
  return [scope]
2998
3061
 
2999
3062
 
3000
- @deprecated(replacement="register_types(parse_type(struct_expr))")
3001
- def define_struct(defn):
3002
- """
3003
- Register a struct definition globally
3004
-
3005
- >>> define_struct('struct abcd {int x; int y;}')
3006
- """
3007
- struct = parse_type(defn)
3008
- if not isinstance(struct, SimStruct):
3009
- raise AngrTypeError("Passed a non-struct type to define_struct")
3010
- ALL_TYPES[struct.name] = struct
3011
- ALL_TYPES["struct " + struct.name] = struct
3012
- return struct
3013
-
3014
-
3015
3063
  def register_types(types):
3016
3064
  """
3017
3065
  Pass in some types and they will be registered to the global type store.
@@ -3093,7 +3141,7 @@ def parse_file(defn, preprocess=True, predefined_types: dict[Any, SimType] | Non
3093
3141
 
3094
3142
  # pylint: disable=unexpected-keyword-arg
3095
3143
  node = pycparser.c_parser.CParser().parse(defn, scope_stack=_make_scope(predefined_types))
3096
- if not isinstance(node, pycparser.c_ast.FileAST):
3144
+ if not isinstance(node, c_ast.FileAST):
3097
3145
  raise ValueError("Something went horribly wrong using pycparser")
3098
3146
  out = {}
3099
3147
  extra_types = {}
@@ -3103,9 +3151,9 @@ def parse_file(defn, preprocess=True, predefined_types: dict[Any, SimType] | Non
3103
3151
  extra_types = dict(predefined_types)
3104
3152
 
3105
3153
  for piece in node.ext:
3106
- if isinstance(piece, pycparser.c_ast.FuncDef):
3154
+ if isinstance(piece, c_ast.FuncDef):
3107
3155
  out[piece.decl.name] = _decl_to_type(piece.decl.type, extra_types, arch=arch)
3108
- elif isinstance(piece, pycparser.c_ast.Decl):
3156
+ elif isinstance(piece, c_ast.Decl):
3109
3157
  ty = _decl_to_type(piece.type, extra_types, arch=arch)
3110
3158
  if piece.name is not None:
3111
3159
  out[piece.name] = ty
@@ -3121,7 +3169,7 @@ def parse_file(defn, preprocess=True, predefined_types: dict[Any, SimType] | Non
3121
3169
  assert isinstance(i, SimUnion)
3122
3170
  i.members = ty.members
3123
3171
 
3124
- elif isinstance(piece, pycparser.c_ast.Typedef):
3172
+ elif isinstance(piece, c_ast.Typedef):
3125
3173
  extra_types[piece.name] = copy.copy(_decl_to_type(piece.type, extra_types, arch=arch))
3126
3174
  extra_types[piece.name].label = piece.name
3127
3175
 
@@ -3142,6 +3190,7 @@ def type_parser_singleton() -> pycparser.CParser:
3142
3190
  optimize=False,
3143
3191
  errorlog=errorlog,
3144
3192
  )
3193
+ assert _type_parser_singleton is not None
3145
3194
  return _type_parser_singleton
3146
3195
 
3147
3196
 
@@ -3171,7 +3220,7 @@ def parse_type_with_name(
3171
3220
 
3172
3221
  # pylint: disable=unexpected-keyword-arg
3173
3222
  node = type_parser_singleton().parse(text=defn, scope_stack=_make_scope(predefined_types))
3174
- if not isinstance(node, pycparser.c_ast.Typename) and not isinstance(node, pycparser.c_ast.Decl):
3223
+ if not isinstance(node, c_ast.Typename) and not isinstance(node, c_ast.Decl):
3175
3224
  raise pycparser.c_parser.ParseError("Got an unexpected type out of pycparser")
3176
3225
 
3177
3226
  decl = node.type
@@ -3200,17 +3249,17 @@ def _decl_to_type(
3200
3249
  if extra_types is None:
3201
3250
  extra_types = {}
3202
3251
 
3203
- if isinstance(decl, pycparser.c_ast.FuncDecl):
3252
+ if isinstance(decl, c_ast.FuncDecl):
3204
3253
  argtyps = (
3205
3254
  ()
3206
3255
  if decl.args is None
3207
3256
  else [
3208
3257
  (
3209
3258
  ...
3210
- if type(x) is pycparser.c_ast.EllipsisParam
3259
+ if type(x) is c_ast.EllipsisParam
3211
3260
  else (
3212
3261
  SimTypeBottom().with_arch(arch)
3213
- if type(x) is pycparser.c_ast.ID
3262
+ if type(x) is c_ast.ID
3214
3263
  else _decl_to_type(x.type, extra_types, arch=arch)
3215
3264
  )
3216
3265
  )
@@ -3218,9 +3267,7 @@ def _decl_to_type(
3218
3267
  ]
3219
3268
  )
3220
3269
  arg_names = (
3221
- [arg.name for arg in decl.args.params if type(arg) is not pycparser.c_ast.EllipsisParam]
3222
- if decl.args
3223
- else None
3270
+ [arg.name for arg in decl.args.params if type(arg) is not c_ast.EllipsisParam] if decl.args else None
3224
3271
  )
3225
3272
  # special handling: func(void) is func()
3226
3273
  if (
@@ -3245,20 +3292,20 @@ def _decl_to_type(
3245
3292
  r._arch = arch
3246
3293
  return r
3247
3294
 
3248
- if isinstance(decl, pycparser.c_ast.TypeDecl):
3295
+ if isinstance(decl, c_ast.TypeDecl):
3249
3296
  if decl.declname == "TOP":
3250
3297
  r = SimTypeTop()
3251
3298
  r._arch = arch
3252
3299
  return r
3253
3300
  return _decl_to_type(decl.type, extra_types, bitsize=bitsize, arch=arch)
3254
3301
 
3255
- if isinstance(decl, pycparser.c_ast.PtrDecl):
3302
+ if isinstance(decl, c_ast.PtrDecl):
3256
3303
  pts_to = _decl_to_type(decl.type, extra_types, arch=arch)
3257
3304
  r = SimTypePointer(pts_to)
3258
3305
  r._arch = arch
3259
3306
  return r
3260
3307
 
3261
- if isinstance(decl, pycparser.c_ast.ArrayDecl):
3308
+ if isinstance(decl, c_ast.ArrayDecl):
3262
3309
  elem_type = _decl_to_type(decl.type, extra_types, arch=arch)
3263
3310
 
3264
3311
  if decl.dim is None:
@@ -3274,7 +3321,7 @@ def _decl_to_type(
3274
3321
  r._arch = arch
3275
3322
  return r
3276
3323
 
3277
- if isinstance(decl, pycparser.c_ast.Struct):
3324
+ if isinstance(decl, c_ast.Struct):
3278
3325
  if decl.decls is not None:
3279
3326
  fields = OrderedDict(
3280
3327
  (field.name, _decl_to_type(field.type, extra_types, bitsize=field.bitsize, arch=arch))
@@ -3313,7 +3360,7 @@ def _decl_to_type(
3313
3360
  struct._arch = arch
3314
3361
  return struct
3315
3362
 
3316
- if isinstance(decl, pycparser.c_ast.Union):
3363
+ if isinstance(decl, c_ast.Union):
3317
3364
  if decl.decls is not None:
3318
3365
  fields = {field.name: _decl_to_type(field.type, extra_types, arch=arch) for field in decl.decls}
3319
3366
  else:
@@ -3347,7 +3394,7 @@ def _decl_to_type(
3347
3394
  union._arch = arch
3348
3395
  return union
3349
3396
 
3350
- if isinstance(decl, pycparser.c_ast.IdentifierType):
3397
+ if isinstance(decl, c_ast.IdentifierType):
3351
3398
  key = " ".join(decl.names)
3352
3399
  if bitsize is not None:
3353
3400
  return SimTypeNumOffset(int(bitsize.value), signed=False)
@@ -3357,7 +3404,7 @@ def _decl_to_type(
3357
3404
  return ALL_TYPES[key].with_arch(arch)
3358
3405
  raise TypeError(f"Unknown type '{key}'")
3359
3406
 
3360
- if isinstance(decl, pycparser.c_ast.Enum):
3407
+ if isinstance(decl, c_ast.Enum):
3361
3408
  # See C99 at 6.7.2.2
3362
3409
  return ALL_TYPES["int"].with_arch(arch)
3363
3410
 
@@ -3365,9 +3412,9 @@ def _decl_to_type(
3365
3412
 
3366
3413
 
3367
3414
  def _parse_const(c, arch=None, extra_types=None):
3368
- if type(c) is pycparser.c_ast.Constant:
3415
+ if type(c) is c_ast.Constant:
3369
3416
  return int(c.value, base=0)
3370
- if type(c) is pycparser.c_ast.BinaryOp:
3417
+ if type(c) is c_ast.BinaryOp:
3371
3418
  if c.op == "+":
3372
3419
  return _parse_const(c.children()[0][1], arch, extra_types) + _parse_const(
3373
3420
  c.children()[1][1], arch, extra_types
@@ -3393,156 +3440,214 @@ def _parse_const(c, arch=None, extra_types=None):
3393
3440
  c.children()[1][1], arch, extra_types
3394
3441
  )
3395
3442
  raise ValueError(f"Binary op {c.op}")
3396
- if type(c) is pycparser.c_ast.UnaryOp:
3443
+ if type(c) is c_ast.UnaryOp:
3397
3444
  if c.op == "sizeof":
3398
3445
  return _decl_to_type(c.expr.type, extra_types=extra_types, arch=arch).size
3399
3446
  raise ValueError(f"Unary op {c.op}")
3400
- if type(c) is pycparser.c_ast.Cast:
3447
+ if type(c) is c_ast.Cast:
3401
3448
  return _parse_const(c.expr, arch, extra_types)
3402
3449
  raise ValueError(c)
3403
3450
 
3404
3451
 
3405
- def _cpp_decl_to_type(decl: Any, extra_types: dict[str, SimType], opaque_classes=True):
3406
- if CppHeaderParser is None:
3407
- raise ImportError("Please install CppHeaderParser to parse C++ definitions")
3408
- if isinstance(decl, CppHeaderParser.CppMethod):
3452
+ CPP_DECL_TYPES = (
3453
+ cxxheaderparser.types.Method
3454
+ | cxxheaderparser.types.Array
3455
+ | cxxheaderparser.types.Pointer
3456
+ | cxxheaderparser.types.MoveReference
3457
+ | cxxheaderparser.types.Reference
3458
+ | cxxheaderparser.types.FunctionType
3459
+ | cxxheaderparser.types.Function
3460
+ | cxxheaderparser.types.Type
3461
+ )
3462
+
3463
+
3464
+ def _cpp_decl_to_type(
3465
+ decl: CPP_DECL_TYPES, extra_types: dict[str, SimType], opaque_classes: bool = True
3466
+ ) -> (
3467
+ SimTypeCppFunction
3468
+ | SimTypeFunction
3469
+ | SimCppClass
3470
+ | SimTypeReference
3471
+ | SimTypePointer
3472
+ | SimTypeArray
3473
+ | SimTypeBottom
3474
+ ):
3475
+ if cxxheaderparser is None:
3476
+ raise ImportError("Please install cxxheaderparser to parse C++ definitions")
3477
+ if isinstance(decl, cxxheaderparser.types.Method):
3409
3478
  the_func = decl
3410
- func_name = the_func["name"]
3411
- if "__deleting_dtor__" in func_name or "__base_dtor__" in func_name or "__dtor__" in func_name:
3412
- the_func["destructor"] = True
3479
+ func_name = the_func.name.format()
3413
3480
  # translate parameters
3414
3481
  args = []
3415
3482
  arg_names: list[str] = []
3416
- for param in the_func["parameters"]:
3417
- arg_type = param["type"]
3483
+ for idx, param in enumerate(the_func.parameters):
3484
+ arg_type = param.type
3418
3485
  args.append(_cpp_decl_to_type(arg_type, extra_types, opaque_classes=opaque_classes))
3419
- arg_name = param["name"]
3486
+ arg_name = param.name if param.name is not None else f"unknown_{idx}"
3420
3487
  arg_names.append(arg_name)
3421
3488
 
3422
3489
  args = tuple(args)
3423
3490
  arg_names_tuple: tuple[str, ...] = tuple(arg_names)
3491
+
3492
+ # note that the constructor and destructor handling in cxxheaderparser is a bit weird and I could not get it to
3493
+ # work, hence the following hack
3494
+ ctor = dtor = False
3495
+ convention = the_func.msvc_convention
3496
+ if len(the_func.name.segments) >= 2:
3497
+ seg1, seg0 = the_func.name.segments[-2:]
3498
+ seg1 = seg1.format()
3499
+ seg0 = seg0.format()
3500
+ if seg0 == seg1:
3501
+ ctor = True
3502
+ if the_func.return_type is not None:
3503
+ convention = the_func.return_type.format() # it's usually just "__thiscall"
3504
+ elif seg0 == "~" + seg1:
3505
+ dtor = True
3506
+ if the_func.return_type is not None:
3507
+ convention = the_func.return_type.format() # it's usually just "__thiscall"
3424
3508
  # returns
3425
- if not the_func["returns"].strip():
3509
+ if the_func.return_type is None or ctor or dtor:
3426
3510
  returnty = SimTypeBottom()
3427
3511
  else:
3428
- returnty = _cpp_decl_to_type(the_func["returns"].strip(), extra_types, opaque_classes=opaque_classes)
3429
- # other properties
3430
- ctor = the_func["constructor"]
3431
- dtor = the_func["destructor"]
3432
- return SimTypeCppFunction(args, returnty, arg_names=arg_names_tuple, ctor=ctor, dtor=dtor)
3433
-
3434
- if isinstance(decl, str):
3435
- # a string that represents type
3436
- if decl.endswith("&"):
3437
- # reference
3438
- subdecl = decl.rstrip("&").strip()
3439
- subt = _cpp_decl_to_type(subdecl, extra_types, opaque_classes=opaque_classes)
3440
- return SimTypeReference(subt)
3441
-
3442
- if decl.endswith("*"):
3443
- # pointer
3444
- subdecl = decl.rstrip("*").strip()
3445
- subt = _cpp_decl_to_type(subdecl, extra_types, opaque_classes=opaque_classes)
3446
- return SimTypePointer(subt)
3447
-
3448
- if decl.endswith(" const"):
3449
- # drop const
3450
- return _cpp_decl_to_type(decl[:-6].strip(), extra_types, opaque_classes=opaque_classes)
3451
-
3452
- unqualified_name = decl.split("::")[-1] if "::" in decl else decl
3453
-
3454
- key = unqualified_name
3455
- if key in extra_types:
3456
- t = extra_types[key]
3457
- elif key in ALL_TYPES:
3458
- t = ALL_TYPES[key]
3512
+ returnty = _cpp_decl_to_type(the_func.return_type, extra_types, opaque_classes=opaque_classes)
3513
+ return SimTypeCppFunction(
3514
+ args,
3515
+ returnty,
3516
+ label=func_name,
3517
+ arg_names=arg_names_tuple,
3518
+ ctor=ctor,
3519
+ dtor=dtor,
3520
+ convention=convention,
3521
+ )
3522
+
3523
+ if isinstance(decl, cxxheaderparser.types.Function):
3524
+ # a function declaration
3525
+ the_func = decl
3526
+ func_name = the_func.name.format()
3527
+ # translate parameters
3528
+ args = []
3529
+ arg_names: list[str] = []
3530
+ for idx, param in enumerate(the_func.parameters):
3531
+ arg_type = param.type
3532
+ args.append(_cpp_decl_to_type(arg_type, extra_types, opaque_classes=opaque_classes))
3533
+ arg_name = param.name if param.name is not None else f"unknown_{idx}"
3534
+ arg_names.append(arg_name)
3535
+
3536
+ args = tuple(args)
3537
+ arg_names_tuple: tuple[str, ...] = tuple(arg_names)
3538
+ # returns
3539
+ if the_func.return_type is None:
3540
+ returnty = SimTypeBottom()
3541
+ else:
3542
+ returnty = _cpp_decl_to_type(the_func.return_type, extra_types, opaque_classes=opaque_classes)
3543
+
3544
+ return SimTypeFunction(args, returnty, label=func_name, arg_names=arg_names_tuple)
3545
+
3546
+ if isinstance(decl, cxxheaderparser.types.Type):
3547
+ # attempt to parse it as one of the existing types
3548
+ lbl = decl.format()
3549
+ lbl = lbl.removeprefix("const ")
3550
+ if lbl in extra_types:
3551
+ t = extra_types[lbl]
3552
+ elif lbl in ALL_TYPES:
3553
+ t = ALL_TYPES[lbl]
3459
3554
  elif opaque_classes is True:
3460
3555
  # create a class without knowing the internal members
3461
- t = SimCppClass({}, name=decl)
3556
+ t = SimCppClass(unique_name=lbl, name=lbl, members={})
3462
3557
  else:
3463
- raise TypeError("Unknown type '{}'".format(" ".join(key)))
3558
+ raise TypeError(f'Unknown type "{lbl}"')
3464
3559
 
3465
- if unqualified_name != decl and isinstance(t, NamedTypeMixin):
3560
+ if isinstance(t, NamedTypeMixin):
3466
3561
  t = t.copy()
3467
- t.name = decl # pylint:disable=attribute-defined-outside-init
3468
- return t
3562
+ t.name = lbl # pylint:disable=attribute-defined-outside-init
3563
+ return t # type:ignore
3564
+
3565
+ if isinstance(decl, cxxheaderparser.types.Array):
3566
+ subt = _cpp_decl_to_type(decl.array_of, extra_types, opaque_classes=opaque_classes)
3567
+ return SimTypeArray(subt, length=decl.size)
3568
+
3569
+ if isinstance(decl, cxxheaderparser.types.MoveReference):
3570
+ subt = _cpp_decl_to_type(decl.moveref_to, extra_types, opaque_classes=opaque_classes)
3571
+ return SimTypeReference(subt) # FIXME: Move reference vs reference
3572
+
3573
+ if isinstance(decl, cxxheaderparser.types.Reference):
3574
+ subt = _cpp_decl_to_type(decl.ref_to, extra_types, opaque_classes=opaque_classes)
3575
+ return SimTypeReference(subt)
3576
+
3577
+ if isinstance(decl, cxxheaderparser.types.Pointer):
3578
+ subt = _cpp_decl_to_type(decl.ptr_to, extra_types, opaque_classes=opaque_classes)
3579
+ return SimTypePointer(subt)
3580
+
3581
+ if isinstance(decl, cxxheaderparser.types.FunctionType):
3582
+ params = tuple(
3583
+ _cpp_decl_to_type(param.type, extra_types, opaque_classes=opaque_classes) for param in decl.parameters
3584
+ )
3585
+ param_names = (
3586
+ tuple(param.name.format() for param in decl.parameters) # type:ignore
3587
+ if all(param.name is not None for param in decl.parameters)
3588
+ else None
3589
+ )
3590
+ returnty = _cpp_decl_to_type(decl.return_type, extra_types, opaque_classes=opaque_classes)
3591
+ return SimTypeCppFunction(params, returnty, arg_names=param_names, convention=decl.msvc_convention)
3469
3592
 
3470
3593
  raise NotImplementedError
3471
3594
 
3472
3595
 
3473
3596
  def normalize_cpp_function_name(name: str) -> str:
3474
- _s = name
3475
- s = None
3476
- while s != _s:
3477
- _s = s if s is not None else _s
3478
- s = re.sub(r"<[^<>]+>", "", _s)
3479
- assert s is not None
3597
+ # strip access specifiers
3598
+ prefixes = ["public:", "protected:", "private:"]
3599
+ for pre in prefixes:
3600
+ name = name.removeprefix(pre)
3601
+
3602
+ if name.startswith("operator"):
3603
+ # the return type is missing; give it a default type
3604
+ name = "int " + name
3480
3605
 
3481
- m = re.search(r"{([a-z\s]+)}", s)
3482
- if m is not None:
3483
- s = s[: m.start()] + "__" + m.group(1).replace(" ", "_") + "__" + s[m.end() :]
3484
- return s
3606
+ return name.removesuffix(";")
3485
3607
 
3486
3608
 
3487
3609
  def parse_cpp_file(cpp_decl, with_param_names: bool = False):
3488
3610
  #
3489
- # A series of hacks to make CppHeaderParser happy with whatever C++ function prototypes we feed in
3611
+ # A series of hacks to make cxxheaderparser happy with whatever C++ function prototypes we feed in
3490
3612
  #
3491
3613
 
3492
- if CppHeaderParser is None:
3493
- raise ImportError("Please install CppHeaderParser to parse C++ definitions")
3614
+ if cxxheaderparser is None:
3615
+ raise ImportError("Please install cxxheaderparser to parse C++ definitions")
3494
3616
 
3495
3617
  # CppHeaderParser does not support specialization
3496
3618
  s = normalize_cpp_function_name(cpp_decl)
3497
3619
 
3498
- # CppHeaderParser does not like missing parameter names
3499
- # FIXME: The following logic is only dealing with *one* C++ function declaration. Support multiple declarations
3500
- # FIXME: when needed in the future.
3501
- if not with_param_names:
3502
- last_pos = 0
3503
- i = 0
3504
- while True:
3505
- idx = s.find(",", last_pos)
3506
- if idx == -1:
3507
- break
3508
- arg_name = f"a{i}"
3509
- i += 1
3510
- s = s[:idx] + " " + arg_name + s[idx:]
3511
- last_pos = idx + len(arg_name) + 1 + 1
3512
-
3513
- # the last parameter
3514
- idx = s.find(")", last_pos)
3515
- # TODO: consider the case where there are one or multiple spaces between ( and )
3516
- if idx != -1 and s[idx - 1] != "(":
3517
- arg_name = f"a{i}"
3518
- s = s[:idx] + " " + arg_name + s[idx:]
3519
-
3520
3620
  # CppHeaderParser does not like missing function body
3521
3621
  s += "\n\n{}"
3522
3622
 
3523
3623
  try:
3524
- h = CppHeaderParser.CppHeader(s, argType="string")
3525
- except CppHeaderParser.CppParseError:
3526
- return None, None
3527
- if not h.functions:
3624
+ h = cxxheaderparser.simple.parse_string(s)
3625
+ except cxxheaderparser.errors.CxxParseError:
3626
+ # GCC-mangled (and thus, demangled) function names do not have return types encoded; let's try to prefix s with
3627
+ # "void" and try again
3628
+ s = "void " + s
3629
+ try:
3630
+ h = cxxheaderparser.simple.parse_string(s)
3631
+ except cxxheaderparser.errors.CxxParseError:
3632
+ # if it still fails, we give up
3633
+ return None, None
3634
+
3635
+ if not h.namespace:
3528
3636
  return None, None
3529
3637
 
3530
- func_decls: dict[str, SimTypeCppFunction] = {}
3531
- for the_func in h.functions:
3638
+ func_decls: dict[str, SimTypeCppFunction | SimTypeFunction] = {}
3639
+ for the_func in h.namespace.functions + h.namespace.method_impls:
3532
3640
  # FIXME: We always assume that there is a "this" pointer but it is not the case for static methods.
3533
- proto = cast(SimTypeCppFunction | None, _cpp_decl_to_type(the_func, {}, opaque_classes=True))
3534
- if proto is not None and the_func["class"]:
3535
- func_name = cast(str, the_func["class"] + "::" + the_func["name"])
3536
- proto.args = (
3537
- SimTypePointer(pts_to=SimTypeBottom(label="void")),
3538
- *proto.args,
3539
- ) # pylint:disable=attribute-defined-outside-init
3540
- proto.arg_names = ("this", *proto.arg_names) # pylint:disable=attribute-defined-outside-init
3541
- elif proto is None:
3542
- raise ValueError("proto is None but class is also None... not sure what this edge case means")
3543
- else:
3544
- func_name = cast(str, the_func["name"])
3545
- func_decls[func_name] = proto
3641
+ proto = cast(SimTypeCppFunction | SimTypeFunction | None, _cpp_decl_to_type(the_func, {}, opaque_classes=True))
3642
+ if proto is not None:
3643
+ func_name = the_func.name.format()
3644
+ if isinstance(proto, SimTypeCppFunction):
3645
+ proto.args = (
3646
+ SimTypePointer(pts_to=SimTypeBottom(label="void")),
3647
+ *proto.args,
3648
+ ) # pylint:disable=attribute-defined-outside-init
3649
+ proto.arg_names = ("this", *proto.arg_names) # pylint:disable=attribute-defined-outside-init
3650
+ func_decls[func_name] = proto
3546
3651
 
3547
3652
  return func_decls, {}
3548
3653
 
angr/simos/cgc.py CHANGED
@@ -18,7 +18,7 @@ class SimCGC(SimUserland):
18
18
  """
19
19
 
20
20
  def __init__(self, project, **kwargs):
21
- super().__init__(project, syscall_library=L["cgcabi"], syscall_addr_alignment=1, name="CGC", **kwargs)
21
+ super().__init__(project, syscall_library=L["cgcabi"][0], syscall_addr_alignment=1, name="CGC", **kwargs)
22
22
 
23
23
  # pylint: disable=arguments-differ
24
24
  def state_blank(self, flag_page=None, allocate_stack_page_count=0x100, **kwargs):