angr 9.2.112__py3-none-macosx_11_0_arm64.whl → 9.2.114__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (34) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +3 -0
  3. angr/analyses/decompiler/condition_processor.py +9 -2
  4. angr/analyses/decompiler/optimization_passes/__init__.py +3 -1
  5. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +364 -0
  6. angr/analyses/decompiler/optimization_passes/deadblock_remover.py +1 -1
  7. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +99 -12
  8. angr/analyses/decompiler/optimization_passes/optimization_pass.py +79 -9
  9. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +21 -0
  10. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +111 -9
  11. angr/analyses/decompiler/redundant_label_remover.py +17 -0
  12. angr/analyses/decompiler/region_simplifiers/switch_cluster_simplifier.py +5 -0
  13. angr/analyses/decompiler/seq_cf_structure_counter.py +37 -0
  14. angr/analyses/decompiler/structured_codegen/c.py +4 -5
  15. angr/analyses/decompiler/structuring/phoenix.py +86 -6
  16. angr/analyses/decompiler/utils.py +6 -1
  17. angr/analyses/reaching_definitions/rd_state.py +2 -0
  18. angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
  19. angr/angrdb/serializers/loader.py +91 -7
  20. angr/calling_conventions.py +21 -13
  21. angr/knowledge_plugins/key_definitions/live_definitions.py +5 -0
  22. angr/knowledge_plugins/propagations/states.py +3 -2
  23. angr/lib/angr_native.dylib +0 -0
  24. angr/procedures/stubs/ReturnUnconstrained.py +1 -2
  25. angr/procedures/stubs/syscall_stub.py +1 -2
  26. angr/sim_type.py +354 -136
  27. angr/state_plugins/debug_variables.py +2 -2
  28. angr/storage/memory_mixins/multi_value_merger_mixin.py +12 -2
  29. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/METADATA +26 -26
  30. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/RECORD +34 -32
  31. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/WHEEL +1 -1
  32. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/LICENSE +0 -0
  33. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/entry_points.txt +0 -0
  34. {angr-9.2.112.dist-info → angr-9.2.114.dist-info}/top_level.txt +0 -0
@@ -76,6 +76,7 @@ class ReachingDefinitionsAnalysis(
76
76
  track_liveness: bool = True,
77
77
  func_addr: int | None = None,
78
78
  element_limit: int = 5,
79
+ merge_into_tops: bool = True,
79
80
  ):
80
81
  """
81
82
  :param subject: The subject of the analysis: a function, or a single basic block
@@ -110,6 +111,10 @@ class ReachingDefinitionsAnalysis(
110
111
  :param track_liveness: Whether to track liveness information. This can consume
111
112
  sizeable amounts of RAM on large functions. (e.g. ~15GB for a function
112
113
  with 4k nodes)
114
+ :param merge_into_tops: Merge known values into TOP if TOP is present.
115
+ If True: {TOP} V {0xabc} = {TOP}
116
+ If False: {TOP} V {0xabc} = {TOP, 0xabc}
117
+
113
118
 
114
119
  """
115
120
 
@@ -134,6 +139,7 @@ class ReachingDefinitionsAnalysis(
134
139
  self._use_callee_saved_regs_at_return = use_callee_saved_regs_at_return
135
140
  self._func_addr = func_addr
136
141
  self._element_limit = element_limit
142
+ self._merge_into_tops = merge_into_tops
137
143
 
138
144
  if dep_graph is None or dep_graph is False:
139
145
  self._dep_graph = None
@@ -473,6 +479,7 @@ class ReachingDefinitionsAnalysis(
473
479
  canonical_size=self._canonical_size,
474
480
  initializer=self._state_initializer,
475
481
  element_limit=self._element_limit,
482
+ merge_into_tops=self._merge_into_tops,
476
483
  )
477
484
 
478
485
  # pylint: disable=no-self-use,arguments-differ
@@ -1,4 +1,9 @@
1
+ from __future__ import annotations
2
+ from typing import Any
1
3
  from io import BytesIO
4
+ import json
5
+ import binascii
6
+ import logging
2
7
 
3
8
  import cle
4
9
 
@@ -6,15 +11,85 @@ from ...errors import AngrCorruptDBError, AngrDBError
6
11
  from ..models import DbObject
7
12
 
8
13
 
14
+ _l = logging.getLogger(__name__)
15
+
16
+
17
+ class LoadArgsJSONEncoder(json.JSONEncoder):
18
+ """
19
+ A JSON encoder that supports serializing bytes.
20
+ """
21
+
22
+ def default(self, o):
23
+ if isinstance(o, bytes):
24
+ return {
25
+ "__custom_type__": "bytes",
26
+ "__v__": binascii.hexlify(o).decode("ascii"),
27
+ }
28
+ return super().default(o)
29
+
30
+
31
+ class LoadArgsJSONDecoder(json.JSONDecoder):
32
+ """
33
+ A JSON decoder that supports unserializing into bytes.
34
+ """
35
+
36
+ def __init__(self):
37
+ super().__init__(object_hook=self._objhook)
38
+
39
+ def _objhook(self, d: dict): # pylint:disable=no-self-use
40
+ if "__custom_type__" in d:
41
+ match d["__custom_type__"]:
42
+ case "bytes":
43
+ if "__v__" in d:
44
+ return binascii.unhexlify(d["__v__"])
45
+ return d
46
+
47
+
9
48
  class LoaderSerializer:
10
49
  """
11
50
  Serialize/unserialize a CLE Loader object into/from an angr DB.
51
+
52
+ Corner cases:
53
+ - For certain backends (e.g., CART), we do not store the data of the main object. angr will unpack the CART file
54
+ again after loading the database.
12
55
  """
13
56
 
57
+ NO_MAINBIN_BACKENDS = [cle.backends.CARTFile]
58
+ LOAD_ARG_BLACKLIST = {"loader", "is_main_bin"}
59
+
14
60
  backend2name = {v: k for k, v in cle.ALL_BACKENDS.items()}
15
61
 
62
+ @staticmethod
63
+ def json_serialize_load_args(load_args: dict[str, Any]) -> str:
64
+ serializable_keys = []
65
+ encoder = LoadArgsJSONEncoder()
66
+ for key, argv in load_args.items():
67
+ if key in LoaderSerializer.LOAD_ARG_BLACKLIST:
68
+ continue
69
+ try:
70
+ encoder.encode(argv)
71
+ except TypeError:
72
+ _l.warning("Cannot serialize %s: %s", key, argv)
73
+ else:
74
+ serializable_keys.append(key)
75
+
76
+ return encoder.encode({k: load_args[k] for k in serializable_keys})
77
+
78
+ @staticmethod
79
+ def should_skip_main_binary(loader) -> tuple[bool, cle.backends.Backend | None]:
80
+ for obj in loader.all_objects:
81
+ for cls in LoaderSerializer.NO_MAINBIN_BACKENDS:
82
+ if isinstance(obj, cls):
83
+ return True, obj
84
+ return False, None
85
+
16
86
  @staticmethod
17
87
  def dump(session, loader):
88
+ main_object_in_db = loader.main_object
89
+ skip_mainbin, new_main_obj = LoaderSerializer.should_skip_main_binary(loader)
90
+ if skip_mainbin and new_main_obj is not None:
91
+ main_object_in_db = new_main_obj
92
+
18
93
  for obj in loader.all_objects:
19
94
  if isinstance(
20
95
  obj,
@@ -27,6 +102,10 @@ class LoaderSerializer:
27
102
  # skip dynamically created objects
28
103
  continue
29
104
 
105
+ # should we skip the main object?
106
+ if skip_mainbin and loader.main_object is obj:
107
+ continue
108
+
30
109
  # does the object exist?
31
110
  exists = session.query(DbObject.id).filter_by(path=obj.binary).scalar() is not None
32
111
  if exists:
@@ -44,11 +123,11 @@ class LoaderSerializer:
44
123
 
45
124
  # save the object
46
125
  o = DbObject(
47
- main_object=loader.main_object is obj,
126
+ main_object=main_object_in_db is obj,
48
127
  path=obj.binary,
49
128
  content=content,
50
129
  backend=LoaderSerializer.backend2name.get(obj.__class__),
51
- backend_args="", # TODO: We will need support from CLE to store loader arguments
130
+ backend_args=LoaderSerializer.json_serialize_load_args(obj.load_args),
52
131
  )
53
132
  session.add(o)
54
133
 
@@ -58,11 +137,15 @@ class LoaderSerializer:
58
137
  main_object = None
59
138
 
60
139
  db_objects: list[DbObject] = session.query(DbObject)
140
+ load_args = {}
141
+
142
+ decoder = LoadArgsJSONDecoder()
61
143
 
62
144
  for db_o in db_objects:
63
145
  all_objects[db_o.path] = db_o
64
146
  if db_o.main_object:
65
147
  main_object = db_o
148
+ load_args[db_o] = decoder.decode(db_o.backend_args) if db_o.backend_args else {}
66
149
 
67
150
  if main_object is None:
68
151
  raise AngrCorruptDBError("Corrupt database: No main object.")
@@ -70,12 +153,13 @@ class LoaderSerializer:
70
153
  # build params
71
154
  # FIXME: Load other objects
72
155
 
73
- loader = cle.Loader(
74
- BytesIO(main_object.content),
75
- )
156
+ loader = cle.Loader(BytesIO(main_object.content), main_opts=load_args[main_object])
157
+
158
+ skip_mainbin, _ = LoaderSerializer.should_skip_main_binary(loader)
76
159
 
77
- # fix the binary name of the main binary
78
160
  loader._main_binary_path = main_object.path
79
- loader.main_object.binary = main_object.path
161
+ if not skip_mainbin:
162
+ # fix the binary name of the main binary
163
+ loader.main_object.binary = main_object.path
80
164
 
81
165
  return loader
@@ -1,6 +1,6 @@
1
1
  # pylint:disable=line-too-long,missing-class-docstring,no-self-use
2
2
  import logging
3
- from typing import Optional, Union
3
+ from typing import Optional, Union, cast
4
4
  from collections import defaultdict
5
5
 
6
6
  import claripy
@@ -768,16 +768,16 @@ class SimCC:
768
768
  result = prototype if prototype is not None else SimTypeFunction([], charp)
769
769
  for arg in args[len(result.args) :]:
770
770
  if type(arg) in (int, bytes, PointerWrapper):
771
- result.args.append(charp)
771
+ result.args += (charp,)
772
772
  elif type(arg) is float:
773
- result.args.append(SimTypeDouble())
773
+ result.args += (SimTypeDouble(),)
774
774
  elif isinstance(arg, claripy.ast.BV):
775
- result.args.append(SimTypeNum(len(arg), False))
775
+ result.args += (SimTypeNum(len(arg), False),)
776
776
  elif isinstance(arg, claripy.ast.FP):
777
777
  if arg.sort == claripy.FSORT_FLOAT:
778
- result.args.append(SimTypeFloat())
778
+ result.args += (SimTypeFloat(),)
779
779
  elif arg.sort == claripy.FSORT_DOUBLE:
780
- result.args.append(SimTypeDouble())
780
+ result.args += (SimTypeDouble(),)
781
781
  else:
782
782
  raise TypeError("WHAT kind of floating point is this")
783
783
  else:
@@ -797,6 +797,8 @@ class SimCC:
797
797
 
798
798
  def set_return_val(self, state, val, ty, stack_base=None, perspective_returned=False):
799
799
  loc = self.return_val(ty, perspective_returned=perspective_returned)
800
+ if loc is None:
801
+ raise ValueError("Cannot set return value - there is no return value location")
800
802
  loc.set_value(state, val, stack_base=stack_base)
801
803
 
802
804
  def setup_callsite(self, state, ret_addr, args, prototype, stack_base=None, alloc_base=None, grow_like_stack=True):
@@ -919,7 +921,7 @@ class SimCC:
919
921
  TODO: support the stack_base parameter from setup_callsite...? Does that make sense in this context?
920
922
  Maybe it could make sense by saying that you pass it in as something like the "saved base pointer" value?
921
923
  """
922
- if return_val is not None and not isinstance(prototype.returnty, SimTypeBottom):
924
+ if return_val is not None and prototype is not None and not isinstance(prototype.returnty, SimTypeBottom):
923
925
  self.set_return_val(state, return_val, prototype.returnty)
924
926
  # ummmmmmmm hack
925
927
  loc = self.return_val(prototype.returnty)
@@ -928,11 +930,11 @@ class SimCC:
928
930
 
929
931
  ret_addr = self.return_addr.get_value(state)
930
932
 
931
- if state.arch.sp_offset is not None:
933
+ if state.arch.sp_offset is not None and prototype is not None:
932
934
  if force_callee_cleanup or self.CALLEE_CLEANUP:
933
935
  session = self.arg_session(prototype.returnty)
934
936
  if self.return_in_implicit_outparam(prototype.returnty):
935
- extra = [self.return_val(prototype.returnty).ptr_loc]
937
+ extra = [cast(SimReferenceArgument, self.return_val(prototype.returnty)).ptr_loc]
936
938
  else:
937
939
  extra = []
938
940
  state.regs.sp += self.stack_space(extra + [self.next_arg(session, x) for x in prototype.args])
@@ -2245,7 +2247,7 @@ ARCH_NAME_ALIASES = {
2245
2247
  "ARMEL": [],
2246
2248
  "ARMHF": [],
2247
2249
  "ARMCortexM": [],
2248
- "AARCH64": ["arm64"],
2250
+ "AARCH64": ["arm64", "aarch64"],
2249
2251
  "MIPS32": [],
2250
2252
  "MIPS64": [],
2251
2253
  "PPC32": ["powerpc32"],
@@ -2313,10 +2315,16 @@ def unify_arch_name(arch: str) -> str:
2313
2315
  # Sleigh architecture names
2314
2316
  chunks = arch.lower().split(":")
2315
2317
  if len(chunks) >= 3:
2316
- arch_base, endianness, bits = chunks[:3] # pylint:disable=unused-variable
2317
- arch = f"{arch_base}{bits}"
2318
+ arch_base, _, bits = chunks[:3]
2319
+
2320
+ if arch_base in ALIAS_TO_ARCH_NAME:
2321
+ return ALIAS_TO_ARCH_NAME[arch_base]
2322
+
2323
+ base_with_bits = f"{arch_base}{bits}"
2324
+ if base_with_bits in ALIAS_TO_ARCH_NAME:
2325
+ return ALIAS_TO_ARCH_NAME[base_with_bits]
2318
2326
 
2319
- return ALIAS_TO_ARCH_NAME.get(arch, arch)
2327
+ return arch
2320
2328
 
2321
2329
 
2322
2330
  SYSCALL_CC: dict[str, dict[str, type[SimCCSyscall]]] = {
@@ -128,6 +128,7 @@ class LiveDefinitions:
128
128
  tmp_uses=None,
129
129
  other_uses=None,
130
130
  element_limit=5,
131
+ merge_into_tops: bool = True,
131
132
  ):
132
133
  self.project: Optional["Project"] = None
133
134
  self.arch = arch
@@ -143,6 +144,7 @@ class LiveDefinitions:
143
144
  page_kwargs={"mo_cmp": self._mo_cmp},
144
145
  endness=self.arch.register_endness,
145
146
  element_limit=element_limit,
147
+ merge_into_top=merge_into_tops,
146
148
  )
147
149
  if registers is None
148
150
  else registers
@@ -155,6 +157,7 @@ class LiveDefinitions:
155
157
  skip_missing_values_during_merging=False,
156
158
  page_kwargs={"mo_cmp": self._mo_cmp},
157
159
  element_limit=element_limit,
160
+ merge_into_top=merge_into_tops,
158
161
  )
159
162
  if stack is None
160
163
  else stack
@@ -167,6 +170,7 @@ class LiveDefinitions:
167
170
  skip_missing_values_during_merging=False,
168
171
  page_kwargs={"mo_cmp": self._mo_cmp},
169
172
  element_limit=element_limit,
173
+ merge_into_top=merge_into_tops,
170
174
  )
171
175
  if memory is None
172
176
  else memory
@@ -179,6 +183,7 @@ class LiveDefinitions:
179
183
  skip_missing_values_during_merging=False,
180
184
  page_kwargs={"mo_cmp": self._mo_cmp},
181
185
  element_limit=element_limit,
186
+ merge_into_top=merge_into_tops,
182
187
  )
183
188
  if heap is None
184
189
  else heap
@@ -519,8 +519,9 @@ class PropagatorVEXState(PropagatorState):
519
519
 
520
520
  def load_register(self, offset, size):
521
521
  # TODO: Fix me
522
- if size != self.gpr_size:
523
- return self.top(size * self.arch.byte_width).annotate(RegisterAnnotation(offset, size))
522
+ # load register even if size != self.gpr_size
523
+ # if size != self.gpr_size:
524
+ # return self.top(size * self.arch.byte_width).annotate(RegisterAnnotation(offset, size))
524
525
 
525
526
  try:
526
527
  v = self._registers.load(offset, size=size)
Binary file
@@ -11,8 +11,7 @@ class ReturnUnconstrained(angr.SimProcedure):
11
11
  if return_val is None:
12
12
  # code duplicated to syscall_stub
13
13
  size = self.prototype.returnty.size
14
- # ummmmm do we really want to rely on this behavior?
15
- if size is NotImplemented:
14
+ if size is None:
16
15
  o = None
17
16
  else:
18
17
  o = self.state.solver.Unconstrained(
@@ -11,8 +11,7 @@ class syscall(angr.SimProcedure):
11
11
 
12
12
  # code duplicated from ReturnUnconstrained
13
13
  size = self.prototype.returnty.size
14
- # ummmmm do we really want to rely on this behavior?
15
- if size is NotImplemented:
14
+ if size is None:
16
15
  return None
17
16
  else:
18
17
  return self.state.solver.Unconstrained(