angr 9.2.147__py3-none-manylinux2014_x86_64.whl → 9.2.149__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (90) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/analysis.py +3 -11
  3. angr/analyses/calling_convention/calling_convention.py +42 -2
  4. angr/analyses/calling_convention/fact_collector.py +5 -4
  5. angr/analyses/calling_convention/utils.py +1 -0
  6. angr/analyses/cfg/cfg_base.py +3 -59
  7. angr/analyses/cfg/cfg_emulated.py +17 -14
  8. angr/analyses/cfg/cfg_fast.py +68 -63
  9. angr/analyses/cfg/cfg_fast_soot.py +3 -3
  10. angr/analyses/decompiler/ail_simplifier.py +65 -32
  11. angr/analyses/decompiler/block_simplifier.py +20 -6
  12. angr/analyses/decompiler/callsite_maker.py +28 -18
  13. angr/analyses/decompiler/clinic.py +84 -17
  14. angr/analyses/decompiler/condition_processor.py +0 -21
  15. angr/analyses/decompiler/counters/call_counter.py +3 -0
  16. angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
  17. angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
  18. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
  19. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
  21. angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
  22. angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
  23. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
  24. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
  25. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
  26. angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
  27. angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
  28. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  29. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
  30. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
  31. angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
  32. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
  33. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +14 -0
  34. angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
  35. angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
  36. angr/analyses/decompiler/presets/fast.py +2 -0
  37. angr/analyses/decompiler/presets/full.py +2 -0
  38. angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
  39. angr/analyses/decompiler/ssailification/ssailification.py +23 -3
  40. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
  41. angr/analyses/decompiler/structured_codegen/c.py +146 -15
  42. angr/analyses/decompiler/structuring/phoenix.py +11 -3
  43. angr/analyses/decompiler/utils.py +6 -1
  44. angr/analyses/deobfuscator/api_obf_finder.py +5 -1
  45. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +1 -1
  46. angr/analyses/forward_analysis/visitors/graph.py +0 -8
  47. angr/analyses/identifier/runner.py +1 -1
  48. angr/analyses/reaching_definitions/function_handler.py +4 -4
  49. angr/analyses/reassembler.py +1 -1
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
  51. angr/analyses/stack_pointer_tracker.py +1 -1
  52. angr/analyses/static_hooker.py +11 -9
  53. angr/analyses/typehoon/lifter.py +20 -0
  54. angr/analyses/typehoon/simple_solver.py +42 -9
  55. angr/analyses/typehoon/translator.py +4 -1
  56. angr/analyses/typehoon/typeconsts.py +17 -6
  57. angr/analyses/typehoon/typehoon.py +21 -5
  58. angr/analyses/variable_recovery/engine_ail.py +52 -13
  59. angr/analyses/variable_recovery/engine_base.py +37 -12
  60. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
  61. angr/calling_conventions.py +96 -27
  62. angr/engines/light/engine.py +7 -0
  63. angr/exploration_techniques/director.py +1 -1
  64. angr/knowledge_plugins/functions/function.py +109 -38
  65. angr/knowledge_plugins/functions/function_manager.py +9 -0
  66. angr/knowledge_plugins/functions/function_parser.py +9 -1
  67. angr/knowledge_plugins/functions/soot_function.py +1 -1
  68. angr/knowledge_plugins/key_definitions/key_definition_manager.py +1 -1
  69. angr/knowledge_plugins/propagations/states.py +5 -2
  70. angr/knowledge_plugins/variables/variable_manager.py +3 -3
  71. angr/procedures/definitions/__init__.py +15 -12
  72. angr/procedures/definitions/types_stl.py +22 -0
  73. angr/procedures/stubs/format_parser.py +1 -1
  74. angr/project.py +23 -29
  75. angr/protos/cfg_pb2.py +14 -25
  76. angr/protos/function_pb2.py +11 -22
  77. angr/protos/primitives_pb2.py +36 -47
  78. angr/protos/variables_pb2.py +28 -39
  79. angr/protos/xrefs_pb2.py +8 -19
  80. angr/sim_type.py +251 -146
  81. angr/simos/cgc.py +1 -1
  82. angr/simos/linux.py +5 -5
  83. angr/simos/windows.py +5 -5
  84. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +1 -1
  85. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/METADATA +9 -8
  86. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/RECORD +90 -84
  87. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
  88. {angr-9.2.147.dist-info → angr-9.2.149.dist-info/licenses}/LICENSE +3 -0
  89. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
  90. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
@@ -23,10 +23,10 @@ from angr.procedures import SIM_LIBRARIES
23
23
  from angr.procedures.definitions import SimSyscallLibrary
24
24
  from angr.protos import function_pb2
25
25
  from angr.calling_conventions import DEFAULT_CC, default_cc
26
- from angr.misc.ux import deprecated
27
26
  from angr.sim_type import SimTypeFunction, parse_defns
28
27
  from angr.calling_conventions import SimCC
29
28
  from angr.project import Project
29
+ from angr.utils.library import get_cpp_function_name
30
30
  from .function_parser import FunctionParser
31
31
 
32
32
  l = logging.getLogger(name=__name__)
@@ -92,6 +92,10 @@ class Function(Serializable):
92
92
  is_plt: bool | None = None,
93
93
  returning=None,
94
94
  alignment=False,
95
+ calling_convention: SimCC | None = None,
96
+ prototype: SimTypeFunction | None = None,
97
+ prototype_libname: str | None = None,
98
+ is_prototype_guessed: bool = True,
95
99
  ):
96
100
  """
97
101
  Function constructor. If the optional parameters are not provided, they will be automatically determined upon
@@ -139,11 +143,11 @@ class Function(Serializable):
139
143
  self.retaddr_on_stack = False
140
144
  self.sp_delta = 0
141
145
  # Calling convention
142
- self.calling_convention: SimCC | None = None
146
+ self.calling_convention = calling_convention
143
147
  # Function prototype
144
- self.prototype: SimTypeFunction | None = None
145
- self.prototype_libname: str | None = None
146
- self.is_prototype_guessed: bool = True
148
+ self.prototype = prototype
149
+ self.prototype_libname = prototype_libname
150
+ self.is_prototype_guessed = is_prototype_guessed
147
151
  # Whether this function returns or not. `None` means it's not determined yet
148
152
  self._returning = None
149
153
 
@@ -239,15 +243,6 @@ class Function(Serializable):
239
243
 
240
244
  self._init_prototype_and_calling_convention()
241
245
 
242
- @property
243
- @deprecated(".is_alignment")
244
- def alignment(self):
245
- return self.is_alignment
246
-
247
- @alignment.setter
248
- def alignment(self, value):
249
- self.is_alignment = value
250
-
251
246
  @property
252
247
  def name(self):
253
248
  return self._name
@@ -357,7 +352,8 @@ class Function(Serializable):
357
352
  # we know the size
358
353
  size = self._block_sizes[addr]
359
354
 
360
- block = self._project.factory.block(addr, size=size, byte_string=byte_string)
355
+ assert self.project is not None
356
+ block = self.project.factory.block(addr, size=size, byte_string=byte_string)
361
357
  if size is None:
362
358
  # update block_size dict
363
359
  self._block_sizes[addr] = block.size
@@ -460,18 +456,19 @@ class Function(Serializable):
460
456
  """
461
457
  constants = set()
462
458
 
463
- if not self._project.loader.main_object.contains_addr(self.addr):
459
+ assert self.project is not None
460
+ if not self.project.loader.main_object.contains_addr(self.addr):
464
461
  return constants
465
462
 
466
463
  # FIXME the old way was better for architectures like mips, but we need the initial irsb
467
464
  # reanalyze function with a new initial state (use persistent registers)
468
465
  # initial_state = self._function_manager._cfg.get_any_irsb(self.addr).initial_state
469
- # fresh_state = self._project.factory.blank_state(mode="fastpath")
466
+ # fresh_state = self.project.factory.blank_state(mode="fastpath")
470
467
  # for reg in initial_state.arch.persistent_regs + ['ip']:
471
468
  # fresh_state.registers.store(reg, initial_state.registers.load(reg))
472
469
 
473
470
  # reanalyze function with a new initial state
474
- fresh_state = self._project.factory.blank_state(mode="fastpath")
471
+ fresh_state = self.project.factory.blank_state(mode="fastpath")
475
472
  fresh_state.regs.ip = self.addr
476
473
 
477
474
  graph_addrs = {x.addr for x in self.graph.nodes() if isinstance(x, BlockNode)}
@@ -486,10 +483,10 @@ class Function(Serializable):
486
483
  if state.solver.eval(state.ip) not in graph_addrs:
487
484
  continue
488
485
  # don't trace into simprocedures
489
- if self._project.is_hooked(state.solver.eval(state.ip)):
486
+ if self.project.is_hooked(state.solver.eval(state.ip)):
490
487
  continue
491
488
  # don't trace outside of the binary
492
- if not self._project.loader.main_object.contains_addr(state.solver.eval(state.ip)):
489
+ if not self.project.loader.main_object.contains_addr(state.solver.eval(state.ip)):
493
490
  continue
494
491
  # don't trace unreachable blocks
495
492
  if state.history.jumpkind in {
@@ -506,7 +503,7 @@ class Function(Serializable):
506
503
  curr_ip = state.solver.eval(state.ip)
507
504
 
508
505
  # get runtime values from logs of successors
509
- successors = self._project.factory.successors(state)
506
+ successors = self.project.factory.successors(state)
510
507
  for succ in successors.flat_successors + successors.unsat_successors:
511
508
  for a in succ.history.recent_actions:
512
509
  for ao in a.all_objects:
@@ -562,7 +559,7 @@ class Function(Serializable):
562
559
  f" SP difference: {self.sp_delta}\n"
563
560
  f" Has return: {self.has_return}\n"
564
561
  f" Returning: {'Unknown' if self.returning is None else self.returning}\n"
565
- f" Alignment: {self.alignment}\n"
562
+ f" Alignment: {self.is_alignment}\n"
566
563
  f" Arguments: reg: {self._argument_registers}, stack: {self._argument_stack_variables}\n"
567
564
  f" Blocks: [{', '.join(f'{i:#x}' for i in self.block_addrs)}]\n"
568
565
  f" Cyclomatic Complexity: {self.cyclomatic_complexity}\n"
@@ -612,7 +609,7 @@ class Function(Serializable):
612
609
 
613
610
  @property
614
611
  def size(self):
615
- return sum(self._block_sizes.values())
612
+ return sum(self._block_sizes[addr] for addr in self._local_blocks)
616
613
 
617
614
  @property
618
615
  def binary(self):
@@ -620,8 +617,8 @@ class Function(Serializable):
620
617
  Get the object this function belongs to.
621
618
  :return: The object this function belongs to.
622
619
  """
623
-
624
- return self._project.loader.find_object_containing(self.addr, membership_check=False)
620
+ assert self.project is not None
621
+ return self.project.loader.find_object_containing(self.addr, membership_check=False)
625
622
 
626
623
  @property
627
624
  def offset(self) -> int:
@@ -698,10 +695,12 @@ class Function(Serializable):
698
695
  project = self.project
699
696
  if project.is_hooked(addr):
700
697
  hooker = project.hooked_by(addr)
701
- name = hooker.display_name
698
+ if hooker is not None:
699
+ name = hooker.display_name
702
700
  elif project.simos.is_syscall_addr(addr):
703
701
  syscall_inst = project.simos.syscall_from_addr(addr)
704
- name = syscall_inst.display_name
702
+ if syscall_inst is not None:
703
+ name = syscall_inst.display_name
705
704
 
706
705
  # generate an IDA-style sub_X name
707
706
  if name is None:
@@ -1338,7 +1337,8 @@ class Function(Serializable):
1338
1337
 
1339
1338
  @property
1340
1339
  def callable(self):
1341
- return self._project.factory.callable(self.addr)
1340
+ assert self.project is not None
1341
+ return self.project.factory.callable(self.addr)
1342
1342
 
1343
1343
  def normalize(self):
1344
1344
  """
@@ -1349,6 +1349,7 @@ class Function(Serializable):
1349
1349
 
1350
1350
  :return: None
1351
1351
  """
1352
+ assert self.project is not None
1352
1353
 
1353
1354
  # let's put a check here
1354
1355
  if self.startpoint is None:
@@ -1377,8 +1378,8 @@ class Function(Serializable):
1377
1378
 
1378
1379
  # Break other nodes
1379
1380
  for n in other_nodes:
1380
- new_size = get_real_address_if_arm(self._project.arch, smallest_node.addr) - get_real_address_if_arm(
1381
- self._project.arch, n.addr
1381
+ new_size = get_real_address_if_arm(self.project.arch, smallest_node.addr) - get_real_address_if_arm(
1382
+ self.project.arch, n.addr
1382
1383
  )
1383
1384
  if new_size == 0:
1384
1385
  # This is the node that has the same size as the smallest one
@@ -1511,20 +1512,21 @@ class Function(Serializable):
1511
1512
  lib = SIM_LIBRARIES.get(binary_name, None)
1512
1513
  libraries = set()
1513
1514
  if lib is not None:
1514
- libraries.add(lib)
1515
+ libraries.update(lib)
1515
1516
 
1516
1517
  else:
1517
1518
  # try all libraries or all libraries that match the given library name hint
1518
1519
  libraries = set()
1519
- for lib_name, lib in SIM_LIBRARIES.items():
1520
+ for lib_name, libs in SIM_LIBRARIES.items():
1520
1521
  # TODO: Add support for syscall libraries. Note that syscall libraries have different function
1521
1522
  # prototypes for .has_prototype() and .get_prototype()...
1522
- if not isinstance(lib, SimSyscallLibrary):
1523
- if binary_name_hint:
1524
- if binary_name_hint.lower() in lib_name.lower():
1523
+ for lib in libs:
1524
+ if not isinstance(lib, SimSyscallLibrary):
1525
+ if binary_name_hint:
1526
+ if binary_name_hint.lower() in lib_name.lower():
1527
+ libraries.add(lib)
1528
+ else:
1525
1529
  libraries.add(lib)
1526
- else:
1527
- libraries.add(lib)
1528
1530
 
1529
1531
  if not libraries:
1530
1532
  return False
@@ -1581,11 +1583,78 @@ class Function(Serializable):
1581
1583
  # int, long
1582
1584
  return addr
1583
1585
 
1586
+ def is_rust_function(self):
1587
+ ast = pydemumble.demangle(self.name)
1588
+ if ast:
1589
+ nodes = ast.split("::")
1590
+ if len(nodes) >= 2:
1591
+ last_node = nodes[-1]
1592
+ return (
1593
+ len(last_node) == 17
1594
+ and last_node.startswith("h")
1595
+ and all(c in "0123456789abcdef" for c in last_node[1:])
1596
+ )
1597
+ return False
1598
+
1599
+ @staticmethod
1600
+ def _rust_fmt_node(node):
1601
+ result = []
1602
+ rest = node
1603
+ if rest.startswith("_$"):
1604
+ rest = rest[1:]
1605
+ while True:
1606
+ if rest.startswith("."):
1607
+ if len(rest) > 1 and rest[1] == ".":
1608
+ result.append("::")
1609
+ rest = rest[2:]
1610
+ else:
1611
+ result.append(".")
1612
+ rest = rest[1:]
1613
+ elif rest.startswith("$"):
1614
+ if "$" in rest[1:]:
1615
+ escape, rest = rest[1:].split("$", 1)
1616
+ else:
1617
+ break
1618
+
1619
+ unescaped = {"SP": "@", "BP": "*", "RF": "&", "LT": "<", "GT": ">", "LP": "(", "RP": ")", "C": ","}.get(
1620
+ escape
1621
+ )
1622
+
1623
+ if unescaped is None and escape.startswith("u"):
1624
+ digits = escape[1:]
1625
+ if all(c in "0123456789abcdef" for c in digits):
1626
+ c = chr(int(digits, 16))
1627
+ if ord(c) >= 32 and ord(c) != 127:
1628
+ result.append(c)
1629
+ continue
1630
+ if unescaped:
1631
+ result.append(unescaped)
1632
+ else:
1633
+ break
1634
+ else:
1635
+ idx = min((rest.find(c) for c in "$." if c in rest), default=len(rest))
1636
+ result.append(rest[:idx])
1637
+ rest = rest[idx:]
1638
+ if not rest:
1639
+ break
1640
+ return "".join(result)
1641
+
1584
1642
  @property
1585
1643
  def demangled_name(self):
1586
1644
  ast = pydemumble.demangle(self.name)
1645
+ if self.is_rust_function():
1646
+ nodes = ast.split("::")[:-1]
1647
+ ast = "::".join([Function._rust_fmt_node(node) for node in nodes])
1587
1648
  return ast if ast else self.name
1588
1649
 
1650
+ @property
1651
+ def short_name(self):
1652
+ if self.is_rust_function():
1653
+ ast = pydemumble.demangle(self.name)
1654
+ return Function._rust_fmt_node(ast.split("::")[-2])
1655
+ func_name = get_cpp_function_name(self.demangled_name, specialized=False, qualified=True)
1656
+ return func_name.split("::")[-1]
1657
+
1589
1658
  def get_unambiguous_name(self, display_name: str | None = None) -> str:
1590
1659
  """
1591
1660
  Get a disambiguated function name.
@@ -1597,6 +1666,7 @@ class Function(Serializable):
1597
1666
  ::<addr>::<name> when the function binary is an unnamed non-main object, or when multiple functions with
1598
1667
  the same name are defined in the function binary.
1599
1668
  """
1669
+ assert self.project is not None
1600
1670
  must_disambiguate_by_addr = self.binary is not self.project.loader.main_object and self.binary_name is None
1601
1671
 
1602
1672
  # If there are multiple functions with the same name in the same object, disambiguate by address
@@ -1615,6 +1685,7 @@ class Function(Serializable):
1615
1685
  return n + (display_name or self.name)
1616
1686
 
1617
1687
  def apply_definition(self, definition: str, calling_convention: SimCC | type[SimCC] | None = None) -> None:
1688
+ assert self.project is not None
1618
1689
  if not definition.endswith(";"):
1619
1690
  definition += ";"
1620
1691
  func_def = parse_defns(definition, arch=self.project.arch)
@@ -1677,7 +1748,7 @@ class Function(Serializable):
1677
1748
  func.calling_convention = self.calling_convention
1678
1749
  func.prototype = self.prototype
1679
1750
  func._returning = self._returning
1680
- func.alignment = self.is_alignment
1751
+ func.is_alignment = self.is_alignment
1681
1752
  func.startpoint = self.startpoint
1682
1753
  func._addr_to_block_node = self._addr_to_block_node.copy()
1683
1754
  func._block_sizes = self._block_sizes.copy()
@@ -505,6 +505,7 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
505
505
 
506
506
  def rebuild_callgraph(self):
507
507
  self.callgraph = networkx.MultiDiGraph()
508
+ cfg = self._kb.cfgs.get_most_accurate()
508
509
  for func_addr in self._function_map:
509
510
  self.callgraph.add_node(func_addr)
510
511
  for func in self._function_map.values():
@@ -512,6 +513,14 @@ class FunctionManager(KnowledgeBasePlugin, collections.abc.Mapping):
512
513
  for node in func.transition_graph.nodes():
513
514
  if isinstance(node, Function):
514
515
  self.callgraph.add_edge(func.addr, node.addr)
516
+ else:
517
+ cfgnode = cfg.get_any_node(node.addr)
518
+ if (
519
+ cfgnode is not None
520
+ and cfgnode.function_address is not None
521
+ and cfgnode.function_address != func.addr
522
+ ):
523
+ self.callgraph.add_edge(func.addr, cfgnode.function_address)
515
524
 
516
525
 
517
526
  KnowledgeBasePlugin.register_default("functions", FunctionManager)
@@ -36,6 +36,10 @@ class FunctionParser:
36
36
  obj.alignment = function.is_alignment
37
37
  obj.binary_name = function.binary_name or ""
38
38
  obj.normalized = function.normalized
39
+ obj.calling_convention = pickle.dumps(function.calling_convention)
40
+ obj.prototype = pickle.dumps(function.prototype)
41
+ obj.prototype_libname = (function.prototype_libname or "").encode()
42
+ obj.is_prototype_guessed = function.is_prototype_guessed
39
43
 
40
44
  # signature matched?
41
45
  if not function.from_signature:
@@ -107,6 +111,10 @@ class FunctionParser:
107
111
  returning=cmsg.returning,
108
112
  alignment=cmsg.alignment,
109
113
  binary_name=None if not cmsg.binary_name else cmsg.binary_name,
114
+ calling_convention=pickle.loads(cmsg.calling_convention),
115
+ prototype=pickle.loads(cmsg.prototype),
116
+ prototype_libname=cmsg.prototype_libname if cmsg.prototype_libname else None,
117
+ is_prototype_guessed=cmsg.is_prototype_guessed,
110
118
  )
111
119
  obj._project = project
112
120
  obj.normalized = cmsg.normalized
@@ -209,7 +217,7 @@ class FunctionParser:
209
217
  stmt_idx=stmt_idx,
210
218
  is_exception=edge_type == "exception",
211
219
  )
212
- elif edge_type == "call":
220
+ elif edge_type in ("call", "syscall"):
213
221
  # find the corresponding fake_ret edge
214
222
  fake_ret_edge = next(
215
223
  iter(edge_ for edge_ in fake_return_edges[src_addr] if edge_[1].addr == src.addr + src.size), None
@@ -83,7 +83,7 @@ class SootFunction(Function):
83
83
  # Whether this function returns or not. `None` means it's not determined yet
84
84
  self._returning = None
85
85
 
86
- self.alignment = None
86
+ self.is_alignment = None
87
87
 
88
88
  # Determine returning status for SimProcedures and Syscalls
89
89
  hooker = None
@@ -51,7 +51,7 @@ class KeyDefinitionManager(KnowledgeBasePlugin):
51
51
  if not self._kb.functions.contains_addr(func_addr):
52
52
  return None
53
53
  func = self._kb.functions[func_addr]
54
- if func.is_simprocedure or func.is_plt or func.alignment:
54
+ if func.is_simprocedure or func.is_plt or func.is_alignment:
55
55
  return None
56
56
  callsites = list(func.get_call_sites())
57
57
  if not callsites:
@@ -527,12 +527,14 @@ class Equivalence:
527
527
  "atom0",
528
528
  "atom1",
529
529
  "codeloc",
530
+ "is_weakassignment",
530
531
  )
531
532
 
532
- def __init__(self, codeloc, atom0, atom1):
533
+ def __init__(self, codeloc, atom0, atom1, is_weakassignment: bool = False):
533
534
  self.codeloc = codeloc
534
535
  self.atom0 = atom0
535
536
  self.atom1 = atom1
537
+ self.is_weakassignment = is_weakassignment
536
538
 
537
539
  def __repr__(self):
538
540
  return f"<Eq@{self.codeloc!r}: {self.atom0!r}=={self.atom1!r}>"
@@ -543,7 +545,8 @@ class Equivalence:
543
545
  and other.codeloc == self.codeloc
544
546
  and other.atom0 == self.atom0
545
547
  and other.atom1 == self.atom1
548
+ and other.is_weakassignment == self.is_weakassignment
546
549
  )
547
550
 
548
551
  def __hash__(self):
549
- return hash((Equivalence, self.codeloc, self.atom0, self.atom1))
552
+ return hash((Equivalence, self.codeloc, self.atom0, self.atom1, self.is_weakassignment))
@@ -934,7 +934,7 @@ class VariableManagerInternal(Serializable):
934
934
 
935
935
  for var in chain(sorted_stack_variables, sorted_reg_variables, phi_only_vars):
936
936
  idx = next(var_ctr)
937
- if var.name is not None and not reset:
937
+ if var.name is not None and var.name != var.ident and not reset:
938
938
  continue
939
939
  if isinstance(var, (SimStackVariable, SimRegisterVariable)):
940
940
  var.name = f"v{idx}"
@@ -946,7 +946,7 @@ class VariableManagerInternal(Serializable):
946
946
  arg_vars = sorted(arg_vars, key=lambda v: _id_from_varident(v.ident))
947
947
  for var in arg_vars:
948
948
  idx = next(arg_ctr)
949
- if var.name is not None and not reset:
949
+ if var.name is not None and var.name != var.ident and not reset:
950
950
  continue
951
951
  var.name = arg_names[idx] if arg_names else f"a{idx}"
952
952
  var._hash = None
@@ -1040,7 +1040,7 @@ class VariableManagerInternal(Serializable):
1040
1040
  reg_vars: set[SimRegisterVariable] = set()
1041
1041
 
1042
1042
  # unify stack variables based on their locations
1043
- for v in self.get_variables():
1043
+ for v in self.get_variables() + list(self._phi_variables):
1044
1044
  if v in self._variables_to_unified_variables:
1045
1045
  # do not unify twice
1046
1046
  continue
@@ -25,7 +25,7 @@ if TYPE_CHECKING:
25
25
 
26
26
 
27
27
  l = logging.getLogger(name=__name__)
28
- SIM_LIBRARIES: dict[str, SimLibrary] = {}
28
+ SIM_LIBRARIES: dict[str, list[SimLibrary]] = {}
29
29
  SIM_TYPE_COLLECTIONS: dict[str, SimTypeCollection] = {}
30
30
 
31
31
 
@@ -38,8 +38,8 @@ class SimTypeCollection:
38
38
  self.names: list[str] | None = None
39
39
  self.types: dict[str, SimType] = {}
40
40
 
41
- def set_names(self, *names):
42
- self.names = names
41
+ def set_names(self, *names: str):
42
+ self.names = list(names)
43
43
  for name in names:
44
44
  SIM_TYPE_COLLECTIONS[name] = self
45
45
 
@@ -121,7 +121,7 @@ class SimLibrary:
121
121
  o.names = list(self.names)
122
122
  return o
123
123
 
124
- def update(self, other):
124
+ def update(self, other: SimLibrary):
125
125
  """
126
126
  Augment this SimLibrary with the information from another SimLibrary
127
127
 
@@ -147,7 +147,10 @@ class SimLibrary:
147
147
  """
148
148
  for name in names:
149
149
  self.names.append(name)
150
- SIM_LIBRARIES[name] = self
150
+ if name in SIM_LIBRARIES:
151
+ SIM_LIBRARIES[name].append(self)
152
+ else:
153
+ SIM_LIBRARIES[name] = [self]
151
154
 
152
155
  def set_default_cc(self, arch_name, cc_cls):
153
156
  """
@@ -252,7 +255,7 @@ class SimLibrary:
252
255
  proc.guessed_prototype = False
253
256
  if proc.prototype.arg_names is None:
254
257
  # Use inspect to extract the parameters from the run python function
255
- proc.prototype.arg_names = inspect.getfullargspec(proc.run).args[1:]
258
+ proc.prototype.arg_names = tuple(inspect.getfullargspec(proc.run).args[1:])
256
259
  if not proc.ARGS_MISMATCH:
257
260
  proc.num_args = len(proc.prototype.args)
258
261
  if proc.display_name in self.non_returning:
@@ -394,13 +397,12 @@ class SimCppLibrary(SimLibrary):
394
397
  stub = super().get_stub(demangled_name, arch)
395
398
  # try to determine a prototype from the function name if possible
396
399
  if demangled_name != name:
397
- # itanium-mangled function name
400
+ # mangled function name
398
401
  stub.prototype = self._proto_from_demangled_name(demangled_name)
399
402
  if stub.prototype is not None:
400
403
  stub.prototype = stub.prototype.with_arch(arch)
401
404
  stub.guessed_prototype = False
402
405
  if not stub.ARGS_MISMATCH:
403
- stub.cc.num_args = len(stub.prototype.args)
404
406
  stub.num_args = len(stub.prototype.args)
405
407
  return stub
406
408
 
@@ -482,9 +484,10 @@ class SimSyscallLibrary(SimLibrary):
482
484
 
483
485
  def update(self, other):
484
486
  super().update(other)
485
- self.syscall_number_mapping.update(other.syscall_number_mapping)
486
- self.syscall_name_mapping.update(other.syscall_name_mapping)
487
- self.default_cc_mapping.update(other.default_cc_mapping)
487
+ if isinstance(other, SimSyscallLibrary):
488
+ self.syscall_number_mapping.update(other.syscall_number_mapping)
489
+ self.syscall_name_mapping.update(other.syscall_name_mapping)
490
+ self.default_cc_mapping.update(other.default_cc_mapping)
488
491
 
489
492
  def minimum_syscall_number(self, abi):
490
493
  """
@@ -523,7 +526,7 @@ class SimSyscallLibrary(SimLibrary):
523
526
  :param mapping: A dict mapping syscall numbers to function names
524
527
  """
525
528
  self.syscall_number_mapping[abi].update(mapping)
526
- self.syscall_name_mapping[abi].update(dict(reversed(i) for i in mapping.items()))
529
+ self.syscall_name_mapping[abi].update({b: a for a, b in mapping.items()})
527
530
 
528
531
  def set_abi_cc(self, abi, cc_cls):
529
532
  """
@@ -0,0 +1,22 @@
1
+ # pylint:disable=line-too-long
2
+ from __future__ import annotations
3
+ from collections import OrderedDict
4
+
5
+ from angr.procedures.definitions import SimTypeCollection
6
+ from angr.sim_type import SimCppClass, SimTypePointer, SimTypeChar, SimTypeInt
7
+
8
+ typelib = SimTypeCollection()
9
+ typelib.set_names("cpp::std")
10
+ typelib.types = {
11
+ "class std::basic_string<char, struct std::char_traits<char>, class std::allocator<char>>": SimCppClass(
12
+ unique_name="class std::basic_string<char, struct std::char_traits<char>, class std::allocator<char>>",
13
+ name="std::string",
14
+ members=OrderedDict(
15
+ [
16
+ ("m_data", SimTypePointer(SimTypeChar())),
17
+ ("m_size", SimTypeInt(signed=False)),
18
+ ("m_capacity", SimTypeInt(signed=False)),
19
+ ]
20
+ ),
21
+ ),
22
+ }
@@ -164,7 +164,7 @@ class FormatString:
164
164
  negative = claripy.SLT(target_variable, 0)
165
165
 
166
166
  # how many digits does it take to represent this variable fully?
167
- max_digits = int(math.ceil(math.log(2**bits, base)))
167
+ max_digits = math.ceil(math.log(2**bits, base))
168
168
 
169
169
  # how many digits does the format specify?
170
170
  spec_digits = component.length_spec
angr/project.py CHANGED
@@ -14,7 +14,6 @@ from archinfo.arch_soot import SootAddressDescriptor, ArchSoot
14
14
  import cle
15
15
  from .sim_procedure import SimProcedure
16
16
 
17
- from .misc.ux import deprecated
18
17
  from .errors import AngrNoPluginError
19
18
 
20
19
  l = logging.getLogger(name=__name__)
@@ -300,16 +299,17 @@ class Project:
300
299
  missing_libs = []
301
300
  for lib_name in self.loader.missing_dependencies:
302
301
  try:
303
- missing_libs.append(SIM_LIBRARIES[lib_name])
302
+ missing_libs.extend(SIM_LIBRARIES[lib_name])
304
303
  except KeyError:
305
304
  l.info("There are no simprocedures for missing library %s :(", lib_name)
306
305
  # additionally provide libraries we _have_ loaded as a fallback fallback
307
306
  # this helps in the case that e.g. CLE picked up a linux arm libc to satisfy an android arm binary
308
307
  for lib in self.loader.all_objects:
309
308
  if lib.provides is not None and lib.provides in SIM_LIBRARIES:
310
- simlib = SIM_LIBRARIES[lib.provides]
311
- if simlib not in missing_libs:
312
- missing_libs.append(simlib)
309
+ simlibs = SIM_LIBRARIES[lib.provides]
310
+ for simlib in simlibs:
311
+ if simlib not in missing_libs:
312
+ missing_libs.append(simlib)
313
313
 
314
314
  # Step 2: Categorize every "import" symbol in each object.
315
315
  # If it's IGNORED, mark it for stubbing
@@ -362,11 +362,13 @@ class Project:
362
362
  owner_name = owner_name.lower()
363
363
  if owner_name not in SIM_LIBRARIES:
364
364
  continue
365
- sim_lib = SIM_LIBRARIES[owner_name]
366
- if not sim_lib.has_implementation(export.name):
367
- continue
368
- l.info("Using builtin SimProcedure for %s from %s", export.name, sim_lib.name)
369
- self.hook_symbol(export.rebased_addr, sim_lib.get(export.name, sim_proc_arch))
365
+ sim_libs = SIM_LIBRARIES[owner_name]
366
+ for sim_lib in sim_libs:
367
+ if not sim_lib.has_implementation(export.name):
368
+ continue
369
+ l.info("Using builtin SimProcedure for %s from %s", export.name, sim_lib.name)
370
+ self.hook_symbol(export.rebased_addr, sim_lib.get(export.name, sim_proc_arch))
371
+ break
370
372
 
371
373
  # Step 2.3: If 2.2 didn't work, check if the symbol wants to be resolved
372
374
  # by a library we already know something about. Resolve it appropriately.
@@ -375,7 +377,7 @@ class Project:
375
377
  # we still want to try as hard as we can to figure out where it comes from
376
378
  # so we can get the calling convention as close to right as possible.
377
379
  elif reloc.resolvewith is not None and reloc.resolvewith in SIM_LIBRARIES:
378
- sim_lib = SIM_LIBRARIES[reloc.resolvewith]
380
+ sim_lib = sorted(SIM_LIBRARIES[reloc.resolvewith], key=lambda lib: lib.has_prototype(export.name))[-1]
379
381
  if self._check_user_blacklists(export.name):
380
382
  if not func.is_weak:
381
383
  l.info("Using stub SimProcedure for unresolved %s from %s", func.name, sim_lib.name)
@@ -407,7 +409,7 @@ class Project:
407
409
  if export.name and export.name.startswith("_Z"):
408
410
  # GNU C++ name. Use a C++ library to create the stub
409
411
  if "libstdc++.so" in SIM_LIBRARIES:
410
- the_lib = SIM_LIBRARIES["libstdc++.so"]
412
+ the_lib = SIM_LIBRARIES["libstdc++.so"][0]
411
413
  else:
412
414
  l.critical(
413
415
  "Does not find any C++ library in SIM_LIBRARIES. We may not correctly "
@@ -437,16 +439,17 @@ class Project:
437
439
  """
438
440
  # First, filter the SIM_LIBRARIES to a reasonable subset based on the hint
439
441
  if hint == "win":
440
- hinted_libs = filter(lambda lib: lib if lib.endswith(".dll") else None, SIM_LIBRARIES)
442
+ hinted_libs = [lib for lib in SIM_LIBRARIES if lib.endswith(".dll")]
441
443
  else:
442
- hinted_libs = filter(lambda lib: lib if ".so" in lib else None, SIM_LIBRARIES)
444
+ hinted_libs = [lib for lib in SIM_LIBRARIES if ".so" in lib]
443
445
 
444
446
  for lib in hinted_libs:
445
- if SIM_LIBRARIES[lib].has_implementation(f.name):
446
- l.debug("Found implementation for %s in %s", f, lib)
447
- hook_at = f.resolvedby.rebased_addr if f.resolvedby else f.relative_addr # ????
448
- self.hook_symbol(hook_at, (SIM_LIBRARIES[lib].get(f.name, self.arch)))
449
- return True
447
+ for simlib in SIM_LIBRARIES[lib]:
448
+ if simlib.has_implementation(f.name):
449
+ l.debug("Found implementation for %s in %s", f, lib)
450
+ hook_at = f.resolvedby.rebased_addr if f.resolvedby else f.relative_addr # ????
451
+ self.hook_symbol(hook_at, (simlib.get(f.name, self.arch)))
452
+ return True
450
453
 
451
454
  l.debug("Could not find matching SimProcedure for %s, ignoring.", f.name)
452
455
  return False
@@ -826,18 +829,9 @@ class Project:
826
829
  def __repr__(self):
827
830
  return "<Project %s>" % (self.filename if self.filename is not None else "loaded from stream")
828
831
 
829
- #
830
- # Compatibility
831
- #
832
-
833
- @property
834
- @deprecated(replacement="simos")
835
- def _simos(self):
836
- return self.simos
837
-
838
832
 
839
833
  from .factory import AngrObjectFactory
840
- from angr.simos import SimOS, os_mapping
834
+ from .simos import SimOS, os_mapping
841
835
  from .analyses.analysis import AnalysesHub, AnalysesHubWithDefault
842
836
  from .knowledge_base import KnowledgeBase
843
837
  from .procedures import SIM_PROCEDURES, SIM_LIBRARIES