angr 9.2.118__py3-none-macosx_11_0_arm64.whl → 9.2.119__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (77) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/analysis.py +43 -1
  3. angr/analyses/cfg/cfg_fast.py +135 -23
  4. angr/analyses/decompiler/ail_simplifier.py +1 -1
  5. angr/analyses/decompiler/clinic.py +23 -12
  6. angr/analyses/decompiler/condition_processor.py +41 -16
  7. angr/analyses/decompiler/decompiler.py +3 -0
  8. angr/analyses/decompiler/jumptable_entry_condition_rewriter.py +1 -1
  9. angr/analyses/decompiler/optimization_passes/duplication_reverter/ail_merge_graph.py +7 -4
  10. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +6 -2
  11. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +19 -19
  12. angr/analyses/decompiler/structured_codegen/c.py +9 -2
  13. angr/analyses/decompiler/structuring/dream.py +8 -7
  14. angr/analyses/decompiler/structuring/phoenix.py +3 -3
  15. angr/analyses/propagator/engine_ail.py +2 -1
  16. angr/analyses/reaching_definitions/function_handler.py +6 -2
  17. angr/analyses/stack_pointer_tracker.py +29 -11
  18. angr/analyses/typehoon/translator.py +19 -2
  19. angr/analyses/typehoon/typeconsts.py +8 -0
  20. angr/analyses/variable_recovery/engine_vex.py +7 -10
  21. angr/calling_conventions.py +69 -24
  22. angr/concretization_strategies/norepeats.py +3 -3
  23. angr/engines/concrete.py +1 -1
  24. angr/engines/light/engine.py +6 -11
  25. angr/engines/pcode/engine.py +2 -2
  26. angr/engines/soot/engine.py +5 -5
  27. angr/engines/soot/expressions/condition.py +1 -1
  28. angr/engines/soot/statements/goto.py +1 -1
  29. angr/engines/soot/statements/if_.py +1 -1
  30. angr/engines/soot/statements/throw.py +1 -1
  31. angr/engines/successors.py +1 -1
  32. angr/engines/unicorn.py +2 -2
  33. angr/engines/vex/heavy/heavy.py +2 -2
  34. angr/errors.py +4 -0
  35. angr/exploration_techniques/driller_core.py +2 -3
  36. angr/exploration_techniques/suggestions.py +2 -2
  37. angr/knowledge_plugins/cfg/cfg_model.py +2 -1
  38. angr/knowledge_plugins/cfg/memory_data.py +1 -0
  39. angr/lib/angr_native.dylib +0 -0
  40. angr/misc/telemetry.py +54 -0
  41. angr/procedures/java/unconstrained.py +1 -1
  42. angr/procedures/java_jni/__init__.py +21 -13
  43. angr/procedures/java_jni/string_operations.py +1 -1
  44. angr/procedures/java_lang/double.py +1 -1
  45. angr/procedures/java_lang/string.py +1 -1
  46. angr/procedures/java_util/scanner_nextline.py +1 -1
  47. angr/procedures/linux_kernel/vsyscall.py +1 -1
  48. angr/procedures/stubs/Redirect.py +1 -1
  49. angr/procedures/stubs/UserHook.py +1 -1
  50. angr/procedures/stubs/format_parser.py +1 -1
  51. angr/sim_procedure.py +5 -5
  52. angr/sim_state.py +21 -34
  53. angr/sim_type.py +42 -0
  54. angr/simos/javavm.py +7 -12
  55. angr/simos/linux.py +1 -1
  56. angr/simos/simos.py +1 -1
  57. angr/simos/windows.py +1 -1
  58. angr/state_hierarchy.py +1 -1
  59. angr/state_plugins/preconstrainer.py +2 -2
  60. angr/state_plugins/scratch.py +1 -1
  61. angr/state_plugins/solver.py +1 -1
  62. angr/state_plugins/trace_additions.py +8 -8
  63. angr/storage/file.py +12 -12
  64. angr/storage/memory_mixins/actions_mixin.py +1 -1
  65. angr/storage/memory_mixins/convenient_mappings_mixin.py +6 -8
  66. angr/storage/memory_mixins/multi_value_merger_mixin.py +5 -5
  67. angr/storage/memory_mixins/paged_memory/pages/ultra_page.py +1 -1
  68. angr/storage/memory_mixins/size_resolution_mixin.py +1 -1
  69. angr/storage/memory_mixins/smart_find_mixin.py +2 -2
  70. angr/storage/memory_object.py +7 -9
  71. angr/utils/timing.py +30 -18
  72. {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/METADATA +8 -6
  73. {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/RECORD +77 -76
  74. {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/LICENSE +0 -0
  75. {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/WHEEL +0 -0
  76. {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/entry_points.txt +0 -0
  77. {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.118"
5
+ __version__ = "9.2.119"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
angr/analyses/analysis.py CHANGED
@@ -5,8 +5,10 @@ import sys
5
5
  import contextlib
6
6
  from collections import defaultdict
7
7
  from inspect import Signature
8
- from typing import TYPE_CHECKING, TypeVar, Type, Generic, Optional
8
+ from typing import TYPE_CHECKING, TypeVar, Generic, cast
9
9
  from collections.abc import Callable
10
+ from types import NoneType
11
+ from itertools import chain
10
12
 
11
13
  import logging
12
14
  import time
@@ -16,6 +18,7 @@ from rich import progress
16
18
 
17
19
  from ..misc.plugins import PluginVendor, VendorPreset
18
20
  from ..misc.ux import deprecated
21
+ from ..misc import telemetry
19
22
 
20
23
  if TYPE_CHECKING:
21
24
  from ..knowledge_base import KnowledgeBase
@@ -55,6 +58,7 @@ if TYPE_CHECKING:
55
58
  AnalysisParams = ParamSpec("AnalysisParams")
56
59
 
57
60
  l = logging.getLogger(name=__name__)
61
+ t = telemetry.get_tracer(name=__name__)
58
62
 
59
63
 
60
64
  class AnalysisLogEntry:
@@ -186,7 +190,45 @@ class AnalysisFactory(Generic[A]):
186
190
  show_progressbar: bool = False,
187
191
  ) -> type[A]:
188
192
  @functools.wraps(self._analysis_cls.__init__)
193
+ @t.start_as_current_span(self._analysis_cls.__name__)
189
194
  def wrapper(*args, **kwargs):
195
+ span = telemetry.get_current_span()
196
+ sig = cast(Signature, self.__call__.__func__.__signature__)
197
+ bound = sig.bind(None, *args, **kwargs)
198
+ for name, val in chain(bound.arguments.items(), bound.arguments.get("kwargs", {}).items()):
199
+ if name in ("kwargs", "self"):
200
+ continue
201
+ if isinstance(val, (str, bytes, bool, int, float, NoneType)):
202
+ if val is None:
203
+ span.set_attribute(f"arg.{name}.is_none", True)
204
+ else:
205
+ span.set_attribute(f"arg.{name}", val)
206
+ elif isinstance(val, (list, tuple, set, frozenset)):
207
+ listval = list(val)
208
+ if not listval or (
209
+ isinstance(listval[0], (str, bytes, bool, int, float))
210
+ and all(type(sval) == type(listval[0]) for sval in listval)
211
+ ):
212
+ span.set_attribute(f"arg.{name}", listval)
213
+ elif isinstance(val, dict):
214
+ listval_keys = list(val)
215
+ listval_values = list(val.values())
216
+ if not listval_keys or (
217
+ isinstance(listval_keys[0], (str, bytes, bool, int, float))
218
+ and all(type(sval) == type(listval_keys[0]) for sval in listval_keys)
219
+ ):
220
+ span.set_attribute(f"arg.{name}.keys", listval_keys)
221
+ if not listval_values or (
222
+ isinstance(listval_values[0], (str, bytes, bool, int, float))
223
+ and all(type(sval) == type(listval_values[0]) for sval in listval_values)
224
+ ):
225
+ span.set_attribute(f"arg.{name}.values", listval_values)
226
+ else:
227
+ span.set_attribute(f"arg.{name}.unrepresentable", True)
228
+ if self._project.filename is not None:
229
+ span.set_attribute("project.binary_name", self._project.filename)
230
+ span.set_attribute("project.arch_name", self._project.arch.name)
231
+
190
232
  oself = object.__new__(self._analysis_cls)
191
233
  oself.named_errors = defaultdict(list)
192
234
  oself.errors = []
@@ -1049,15 +1049,14 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1049
1049
  # no wide string is found
1050
1050
  return 0
1051
1051
 
1052
- def _scan_for_repeating_bytes(self, start_addr, repeating_byte, threshold=2):
1052
+ def _scan_for_repeating_bytes(self, start_addr: int, repeating_byte: int, threshold: int = 2) -> int:
1053
1053
  """
1054
1054
  Scan from a given address and determine the occurrences of a given byte.
1055
1055
 
1056
- :param int start_addr: The address in memory to start scanning.
1057
- :param int repeating_byte: The repeating byte to scan for.
1058
- :param int threshold: The minimum occurrences.
1059
- :return: The occurrences of a given byte.
1060
- :rtype: int
1056
+ :param start_addr: The address in memory to start scanning.
1057
+ :param repeating_byte: The repeating byte to scan for.
1058
+ :param threshold: The minimum occurrences.
1059
+ :return: The occurrences of a given byte.
1061
1060
  """
1062
1061
 
1063
1062
  addr = start_addr
@@ -1078,6 +1077,70 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1078
1077
  return repeating_length
1079
1078
  return 0
1080
1079
 
1080
+ def _scan_for_consecutive_pointers(self, start_addr: int, threshold: int = 2) -> int:
1081
+ """
1082
+ Scan from a given address and determine if there are at least `threshold` of pointers.
1083
+
1084
+ This function will yield high numbers of false positives if the mapped memory regions are too low (for example,
1085
+ <= 0x100000). It is recommended to set `threshold` to a higher value in such cases.
1086
+
1087
+ :param start_addr: The address to start scanning from.
1088
+ :param threshold: The minimum number of pointers to be found.
1089
+ :return: The number of pointers found.
1090
+ """
1091
+
1092
+ current_object = self.project.loader.find_object_containing(start_addr)
1093
+ addr = start_addr
1094
+ pointer_count = 0
1095
+ pointer_size = self.project.arch.bytes
1096
+
1097
+ while self._inside_regions(addr):
1098
+ val = self._fast_memory_load_pointer(addr)
1099
+ if val is None:
1100
+ break
1101
+ obj = self.project.loader.find_object_containing(val)
1102
+ if obj is not None and obj is current_object:
1103
+ pointer_count += 1
1104
+ else:
1105
+ break
1106
+ addr += pointer_size
1107
+
1108
+ if pointer_count >= threshold:
1109
+ return pointer_count
1110
+ return 0
1111
+
1112
+ def _scan_for_mixed_pointers(self, start_addr: int, threshold: int = 3, window: int = 6) -> int:
1113
+ """
1114
+ Scan from a given address and determine if there are at least `threshold` of pointers within a given window of pointers.
1115
+
1116
+ This function will yield high numbers of false positives if the mapped memory regions are too low (for example,
1117
+ <= 0x100000). It is recommended to set `threshold` to a higher value in such cases.
1118
+
1119
+ :param start_addr: The address to start scanning from.
1120
+ :param threshold: The minimum number of pointers to be found.
1121
+ :return: The number of pointers found.
1122
+ """
1123
+
1124
+ current_object = self.project.loader.find_object_containing(start_addr)
1125
+ addr = start_addr
1126
+ ctr = 0
1127
+ pointer_count = 0
1128
+ pointer_size = self.project.arch.bytes
1129
+
1130
+ while self._inside_regions(addr) and ctr < window:
1131
+ ctr += 1
1132
+ val = self._fast_memory_load_pointer(addr)
1133
+ if val is None:
1134
+ break
1135
+ obj = self.project.loader.find_object_containing(val)
1136
+ if obj is not None and obj is current_object:
1137
+ pointer_count += 1
1138
+ addr += pointer_size
1139
+
1140
+ if pointer_count >= threshold:
1141
+ return ctr
1142
+ return 0
1143
+
1081
1144
  def _next_code_addr_core(self):
1082
1145
  """
1083
1146
  Call _next_unscanned_addr() first to get the next address that is not scanned. Then check if data locates at
@@ -1091,35 +1154,83 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1091
1154
  start_addr = next_addr
1092
1155
 
1093
1156
  while True:
1094
- string_length = self._scan_for_printable_strings(start_addr)
1095
- if string_length == 0:
1096
- string_length = self._scan_for_printable_widestrings(start_addr)
1097
-
1098
- if string_length:
1099
- self._seg_list.occupy(start_addr, string_length, "string")
1100
- start_addr += string_length
1157
+ pointer_length, string_length, cc_length = 0, 0, 0
1158
+ matched_something = False
1159
+
1160
+ if start_addr % self.project.arch.bytes == 0:
1161
+ # find potential pointer array
1162
+ threshold = 6 if start_addr <= 0x100000 else 1
1163
+ pointer_count = self._scan_for_consecutive_pointers(start_addr, threshold=threshold)
1164
+ pointer_length = pointer_count * self.project.arch.bytes
1165
+
1166
+ if pointer_length:
1167
+ matched_something = True
1168
+ self._seg_list.occupy(start_addr, pointer_length, "pointer-array")
1169
+ self.model.memory_data[start_addr] = MemoryData(
1170
+ start_addr, pointer_length, MemoryDataSort.PointerArray
1171
+ )
1172
+ start_addr += pointer_length
1173
+
1174
+ elif start_addr <= 0x100000:
1175
+ # for high addresses, all pointers have been found in _scan_for_consecutive_pointers() because we
1176
+ # set threshold there to 1
1177
+ threshold = 4
1178
+ pointer_count = self._scan_for_mixed_pointers(start_addr, threshold=threshold, window=6)
1179
+ pointer_length = pointer_count * self.project.arch.bytes
1180
+
1181
+ if pointer_length:
1182
+ matched_something = True
1183
+ self._seg_list.occupy(start_addr, pointer_length, "pointer-array")
1184
+ self.model.memory_data[start_addr] = MemoryData(
1185
+ start_addr, pointer_length, MemoryDataSort.PointerArray
1186
+ )
1187
+ start_addr += pointer_length
1188
+
1189
+ if not matched_something:
1190
+ # find strings
1191
+ is_widestring = False
1192
+ string_length = self._scan_for_printable_strings(start_addr)
1193
+ if string_length == 0:
1194
+ is_widestring = True
1195
+ string_length = self._scan_for_printable_widestrings(start_addr)
1196
+
1197
+ if string_length:
1198
+ matched_something = True
1199
+ self._seg_list.occupy(start_addr, string_length, "string")
1200
+ md = MemoryData(
1201
+ start_addr,
1202
+ string_length,
1203
+ MemoryDataSort.String if not is_widestring else MemoryDataSort.UnicodeString,
1204
+ )
1205
+ md.fill_content(self.project.loader)
1206
+ self.model.memory_data[start_addr] = md
1207
+ start_addr += string_length
1101
1208
 
1102
- if self.project.arch.name in ("X86", "AMD64"):
1209
+ if not matched_something and self.project.arch.name in {"X86", "AMD64"}:
1103
1210
  cc_length = self._scan_for_repeating_bytes(start_addr, 0xCC, threshold=1)
1104
1211
  if cc_length:
1212
+ matched_something = True
1105
1213
  self._seg_list.occupy(start_addr, cc_length, "alignment")
1214
+ self.model.memory_data[start_addr] = MemoryData(start_addr, cc_length, MemoryDataSort.Alignment)
1106
1215
  start_addr += cc_length
1107
- else:
1108
- cc_length = 0
1109
1216
 
1110
1217
  zeros_length = self._scan_for_repeating_bytes(start_addr, 0x00)
1111
1218
  if zeros_length:
1219
+ matched_something = True
1112
1220
  self._seg_list.occupy(start_addr, zeros_length, "alignment")
1221
+ self.model.memory_data[start_addr] = MemoryData(start_addr, zeros_length, MemoryDataSort.Alignment)
1113
1222
  start_addr += zeros_length
1114
1223
 
1115
- if string_length == 0 and cc_length == 0 and zeros_length == 0:
1224
+ if not matched_something:
1116
1225
  # umm now it's probably code
1117
1226
  break
1118
1227
 
1119
1228
  instr_alignment = self._initial_state.arch.instruction_alignment
1120
1229
  if start_addr % instr_alignment > 0:
1121
1230
  # occupy those few bytes
1122
- self._seg_list.occupy(start_addr, instr_alignment - (start_addr % instr_alignment), "alignment")
1231
+ size = instr_alignment - (start_addr % instr_alignment)
1232
+ self._seg_list.occupy(start_addr, size, "alignment")
1233
+ self.model.memory_data[start_addr] = MemoryData(start_addr, size, MemoryDataSort.Unknown)
1123
1234
  start_addr = start_addr - start_addr % instr_alignment + instr_alignment
1124
1235
  # trickiness: aligning the start_addr may create a new address that is outside any mapped region.
1125
1236
  if not self._inside_regions(start_addr):
@@ -4272,7 +4383,6 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4272
4383
  # Let's try to create the pyvex IRSB directly, since it's much faster
4273
4384
  nodecode = False
4274
4385
  irsb = None
4275
- irsb_string = None
4276
4386
  lifted_block = None
4277
4387
  try:
4278
4388
  lifted_block = self._lift(
@@ -4283,11 +4393,12 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4283
4393
  load_from_ro_regions=True,
4284
4394
  initial_regs=initial_regs,
4285
4395
  )
4286
- irsb = lifted_block.vex_nostmt
4287
- irsb_string = lifted_block.bytes[: irsb.size]
4396
+ irsb = lifted_block.vex_nostmt # may raise SimTranslationError
4288
4397
  except SimTranslationError:
4289
4398
  nodecode = True
4290
4399
 
4400
+ irsb_string: bytes = lifted_block.bytes[: irsb.size] if irsb is not None else lifted_block.bytes
4401
+
4291
4402
  # special logic during the complete scanning phase
4292
4403
  if cfg_job.job_type == CFGJobType.COMPLETE_SCANNING and is_arm_arch(self.project.arch):
4293
4404
  # it's way too easy to incorrectly disassemble THUMB code contains 0x4f as ARM code svc?? #????
@@ -4324,10 +4435,11 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4324
4435
  initial_regs=initial_regs,
4325
4436
  )
4326
4437
  irsb = lifted_block.vex_nostmt
4327
- irsb_string = lifted_block.bytes[: irsb.size]
4328
4438
  except SimTranslationError:
4329
4439
  nodecode = True
4330
4440
 
4441
+ irsb_string: bytes = lifted_block.bytes[: irsb.size] if irsb is not None else lifted_block.bytes
4442
+
4331
4443
  if not (nodecode or irsb.size == 0 or irsb.jumpkind == "Ijk_NoDecode"):
4332
4444
  # it is decodeable
4333
4445
  if current_function_addr == addr:
@@ -4397,7 +4509,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4397
4509
  nodecode_size = 1
4398
4510
 
4399
4511
  # special handling for ud, ud1, and ud2 on x86 and x86-64
4400
- if irsb_string[-2:] == b"\x0f\x0b" and self.project.arch.name == "AMD64":
4512
+ if self.project.arch.name == "AMD64" and irsb_string[-2:] == b"\x0f\x0b":
4401
4513
  # VEX supports ud2 and make it part of the block size, only in AMD64.
4402
4514
  valid_ins = True
4403
4515
  nodecode_size = 0
@@ -475,7 +475,7 @@ class AILSimplifier(Analysis):
475
475
  assert is_phi_assignment(stmt)
476
476
 
477
477
  for _, vvar in stmt.src.src_and_vvars:
478
- if vvar.varid == def_.atom.varid:
478
+ if vvar is not None and vvar.varid == def_.atom.varid:
479
479
  use_exprs.append((vvar, loc, ("phi-src-expr", (vvar,))))
480
480
 
481
481
  # replace all uses if necessary
@@ -12,6 +12,7 @@ import capstone
12
12
 
13
13
  import ailment
14
14
 
15
+ from angr.errors import AngrDecompilationError
15
16
  from ...knowledge_base import KnowledgeBase
16
17
  from ...knowledge_plugins.functions import Function
17
18
  from ...knowledge_plugins.cfg.memory_data import MemoryDataSort
@@ -1210,6 +1211,7 @@ class Clinic(Analysis):
1210
1211
  # of the graph is applied
1211
1212
  self.unoptimized_graph = self._copy_graph(ail_graph)
1212
1213
 
1214
+ pass_ = timethis(pass_)
1213
1215
  a = pass_(
1214
1216
  self.function,
1215
1217
  blocks_by_addr=addr_to_blocks,
@@ -1794,21 +1796,30 @@ class Clinic(Analysis):
1794
1796
  if blocks_by_addr_and_size is None:
1795
1797
  blocks_by_addr_and_size = self._blocks_by_addr_and_size
1796
1798
 
1797
- node_to_block_mapping = {}
1798
1799
  graph = networkx.DiGraph()
1799
1800
 
1800
- for node in func_graph.nodes():
1801
- ail_block = blocks_by_addr_and_size.get((node.addr, node.size), node)
1802
- node_to_block_mapping[node] = ail_block
1803
-
1804
- if ail_block is not None:
1805
- graph.add_node(ail_block)
1806
-
1807
- for src_node, dst_node, data in func_graph.edges(data=True):
1808
- src = node_to_block_mapping[src_node]
1809
- dst = node_to_block_mapping[dst_node]
1801
+ entry_node = next(iter(node for node in func_graph if node.addr == self._entry_node_addr[0]), None)
1802
+ if entry_node is None:
1803
+ raise AngrDecompilationError(
1804
+ f"Entry node with address {self._entry_node_addr[0]:#x} not found in the function graph"
1805
+ )
1810
1806
 
1811
- if dst is not None:
1807
+ # add the entry node into the graph
1808
+ ail_block = blocks_by_addr_and_size.get((entry_node.addr, entry_node.size))
1809
+ if ail_block is None:
1810
+ raise AngrDecompilationError(f"AIL block at address {entry_node.addr:#x} not found")
1811
+ graph.add_node(ail_block)
1812
+
1813
+ # get all descendants and only include them in the AIL graph.
1814
+ # this way all unreachable blocks will be excluded from the AIL graph.
1815
+ descendants = networkx.descendants(func_graph, entry_node) | {entry_node}
1816
+ for src_node, dst_node, data in networkx.subgraph_view(
1817
+ func_graph, filter_node=lambda n: n in descendants
1818
+ ).edges(data=True):
1819
+ src = blocks_by_addr_and_size.get((src_node.addr, src_node.size))
1820
+ dst = blocks_by_addr_and_size.get((dst_node.addr, dst_node.size))
1821
+
1822
+ if src is not None and dst is not None:
1812
1823
  graph.add_edge(src, dst, **data)
1813
1824
 
1814
1825
  return graph
@@ -56,6 +56,25 @@ _UNIFIABLE_COMPARISONS = {
56
56
  "SGE",
57
57
  }
58
58
 
59
+
60
+ _INVERSE_OPERATIONS = {
61
+ "__eq__": "__ne__",
62
+ "__ne__": "__eq__",
63
+ "__gt__": "__le__",
64
+ "__lt__": "__ge__",
65
+ "__ge__": "__lt__",
66
+ "__le__": "__gt__",
67
+ "ULT": "UGE",
68
+ "UGE": "ULT",
69
+ "UGT": "ULE",
70
+ "ULE": "UGT",
71
+ "SLT": "SGE",
72
+ "SGE": "SLT",
73
+ "SLE": "SGT",
74
+ "SGT": "SLE",
75
+ }
76
+
77
+
59
78
  #
60
79
  # Util methods and mapping used during AIL AST to claripy AST conversion
61
80
  #
@@ -138,6 +157,7 @@ _ail2claripy_op_mapping = {
138
157
  "SBorrow": lambda expr, _, m: _dummy_bvs(expr, m),
139
158
  "ExpCmpNE": lambda expr, _, m: _dummy_bools(expr, m),
140
159
  "CmpORD": lambda expr, _, m: _dummy_bvs(expr, m), # in case CmpORDRewriter fails
160
+ "GetMSBs": lambda expr, _, m: _dummy_bvs(expr, m),
141
161
  }
142
162
 
143
163
  #
@@ -178,7 +198,7 @@ class ConditionProcessor:
178
198
  predicate = self._extract_predicate(src, dst, edge_type)
179
199
  except EmptyBlockNotice:
180
200
  # catch empty block notice - although this should not really happen
181
- predicate = claripy.true
201
+ predicate = claripy.true()
182
202
  return predicate
183
203
 
184
204
  def recover_edge_conditions(self, region, graph=None) -> dict:
@@ -254,15 +274,15 @@ class ConditionProcessor:
254
274
 
255
275
  if node is head:
256
276
  # the head is always reachable
257
- reaching_condition = claripy.true
277
+ reaching_condition = claripy.true()
258
278
  elif idoms is not None and _strictly_postdominates(idoms, node, head):
259
279
  # the node that post dominates the head is always reachable
260
- reaching_conditions[node] = claripy.true
280
+ reaching_conditions[node] = claripy.true()
261
281
  else:
262
282
  for pred in preds:
263
283
  edge = (pred, node)
264
- pred_condition = reaching_conditions.get(pred, claripy.true)
265
- edge_condition = edge_conditions.get(edge, claripy.true)
284
+ pred_condition = reaching_conditions.get(pred, claripy.true())
285
+ edge_condition = edge_conditions.get(edge, claripy.true())
266
286
 
267
287
  if reaching_condition is None:
268
288
  reaching_condition = claripy.And(pred_condition, edge_condition)
@@ -596,7 +616,7 @@ class ConditionProcessor:
596
616
  return claripy.Not(bool_var)
597
617
 
598
618
  if type(src_block) is GraphRegion:
599
- return claripy.true
619
+ return claripy.true()
600
620
 
601
621
  # sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
602
622
  if (
@@ -609,10 +629,10 @@ class ConditionProcessor:
609
629
  last_stmt = self.get_last_statement(src_block)
610
630
 
611
631
  if last_stmt is None:
612
- return claripy.true
632
+ return claripy.true()
613
633
  if type(last_stmt) is ailment.Stmt.Jump:
614
634
  if isinstance(last_stmt.target, ailment.Expr.Const):
615
- return claripy.true
635
+ return claripy.true()
616
636
  # indirect jump
617
637
  target_ast = self.claripy_ast_from_ail_condition(last_stmt.target)
618
638
  return target_ast == dst_block.addr
@@ -622,7 +642,7 @@ class ConditionProcessor:
622
642
  return bool_var
623
643
  return claripy.Not(bool_var)
624
644
 
625
- return claripy.true
645
+ return claripy.true()
626
646
 
627
647
  #
628
648
  # Expression conversion
@@ -727,6 +747,7 @@ class ConditionProcessor:
727
747
  "ZeroExt": lambda cond_, tags: _binary_op_reduce(
728
748
  "Concat", [claripy.BVV(0, cond_.args[0]), cond_.args[1]], tags
729
749
  ),
750
+ "Concat": lambda cond_, tags: _binary_op_reduce("Concat", cond_.args, tags),
730
751
  }
731
752
 
732
753
  if cond.op in _mapping:
@@ -780,8 +801,8 @@ class ConditionProcessor:
780
801
  var = claripy.BoolV(condition.value)
781
802
  else:
782
803
  var = claripy.BVV(condition.value, condition.bits)
783
- if isinstance(var, claripy.Bits) and var.size() == 1:
784
- var = claripy.true if var.concrete_value == 1 else claripy.false
804
+ if isinstance(var, claripy.ast.Bits) and var.size() == 1:
805
+ var = claripy.true() if var.concrete_value == 1 else claripy.false()
785
806
  return var
786
807
  if isinstance(condition, ailment.Expr.Tmp):
787
808
  l.warning("Left-over ailment.Tmp variable %s.", condition)
@@ -839,7 +860,7 @@ class ConditionProcessor:
839
860
 
840
861
  if ast.op in _UNIFIABLE_COMPARISONS:
841
862
  # unify comparisons to enable more simplification opportunities without going "deep" in sympy
842
- inverse_op = getattr(ast.args[0], claripy.operations.inverse_operations[ast.op])
863
+ inverse_op = getattr(ast.args[0], _INVERSE_OPERATIONS[ast.op])
843
864
  return sympy.Not(ConditionProcessor.claripy_ast_to_sympy_expr(inverse_op(ast.args[1]), memo=memo))
844
865
 
845
866
  if memo is not None and ast in memo:
@@ -860,9 +881,9 @@ class ConditionProcessor:
860
881
  if isinstance(expr, sympy.Not):
861
882
  return claripy.Not(ConditionProcessor.sympy_expr_to_claripy_ast(expr.args[0], memo))
862
883
  if isinstance(expr, sympy.logic.boolalg.BooleanTrue):
863
- return claripy.true
884
+ return claripy.true()
864
885
  if isinstance(expr, sympy.logic.boolalg.BooleanFalse):
865
- return claripy.false
886
+ return claripy.false()
866
887
  raise AngrRuntimeError("Unreachable reached")
867
888
 
868
889
  @staticmethod
@@ -1092,7 +1113,9 @@ class ConditionProcessor:
1092
1113
  for term in all_terms_without_negs:
1093
1114
  neg = negations.get(term)
1094
1115
 
1095
- replaced_with_true = ConditionProcessor._replace_term_in_ast(cond, term, claripy.true, neg, claripy.false)
1116
+ replaced_with_true = ConditionProcessor._replace_term_in_ast(
1117
+ cond, term, claripy.true(), neg, claripy.false()
1118
+ )
1096
1119
  sat0 = solver.satisfiable(
1097
1120
  extra_constraints=(
1098
1121
  cond,
@@ -1108,7 +1131,9 @@ class ConditionProcessor:
1108
1131
  if sat0 or sat1:
1109
1132
  continue
1110
1133
 
1111
- replaced_with_false = ConditionProcessor._replace_term_in_ast(cond, term, claripy.false, neg, claripy.true)
1134
+ replaced_with_false = ConditionProcessor._replace_term_in_ast(
1135
+ cond, term, claripy.false(), neg, claripy.true()
1136
+ )
1112
1137
  sat0 = solver.satisfiable(
1113
1138
  extra_constraints=(
1114
1139
  cond,
@@ -334,6 +334,7 @@ class Decompiler(Analysis):
334
334
  )
335
335
  continue
336
336
 
337
+ pass_ = timethis(pass_)
337
338
  a = pass_(
338
339
  self.func,
339
340
  blocks_by_addr=addr_to_blocks,
@@ -389,6 +390,7 @@ class Decompiler(Analysis):
389
390
  )
390
391
  continue
391
392
 
393
+ pass_ = timethis(pass_)
392
394
  a = pass_(
393
395
  self.func,
394
396
  blocks_by_addr=addr_to_blocks,
@@ -425,6 +427,7 @@ class Decompiler(Analysis):
425
427
  if pass_.STAGE != OptimizationPassStage.AFTER_STRUCTURING:
426
428
  continue
427
429
 
430
+ pass_ = timethis(pass_)
428
431
  a = pass_(self.func, seq=seq_node, **kwargs)
429
432
  if a.out_seq:
430
433
  seq_node = a.out_seq
@@ -16,7 +16,7 @@ class JumpTableEntryConditionRewriter(SequenceWalker):
16
16
 
17
17
  def _process_expr(self, expr):
18
18
  if expr in self._jumptable_entry_conds:
19
- return claripy.true
19
+ return claripy.true()
20
20
 
21
21
  new_args = []
22
22
  replaced = False
@@ -140,7 +140,7 @@ class AILMergeGraph:
140
140
  self.starts = []
141
141
  self.original_ends = []
142
142
 
143
- def create_conditionless_graph(self, starting_blocks: list[Block], graph_lcs):
143
+ def create_conditionless_graph(self, starting_blocks: list[Block], graph_lcs) -> dict[Block, Block] | None:
144
144
  # get all the original blocks (reverted from the LCS) and their split blocks.
145
145
  # split-blocks are blocks that need to be split at some stmt index to make the two blocks
146
146
  # equal across both graphs. At a highlevel, the first block in both matching graphs either need
@@ -180,9 +180,12 @@ class AILMergeGraph:
180
180
  # we create a new graph, full of the original blocks of the base, with blocks
181
181
  # that should be split replaced.
182
182
  # this graph is only the initial merge_graph needed, where only the blocks
183
- self.graph, update_blocks = self.clone_graph_replace_splits(
184
- nx.subgraph(self.original_graph, self.original_blocks[merge_base]), base_to_split
185
- )
183
+ subgraph = nx.subgraph(self.original_graph, self.original_blocks[merge_base])
184
+ # ensure all base blocks are within the subgraph
185
+ for block in base_to_split:
186
+ if block not in subgraph:
187
+ return None
188
+ self.graph, update_blocks = self.clone_graph_replace_splits(subgraph, base_to_split)
186
189
  self._update_all_split_refs(update_blocks)
187
190
  for update_block, new_block in update_blocks.items():
188
191
  if update_block in starting_blocks:
@@ -51,7 +51,7 @@ class DuplicationReverter(StructuringOptimizationPass):
51
51
  strictly_less_gotos=False,
52
52
  recover_structure_fails=True,
53
53
  must_improve_rel_quality=True,
54
- max_opt_iters=30,
54
+ max_opt_iters=5,
55
55
  simplify_ail=True,
56
56
  require_gotos=True,
57
57
  readd_labels=True,
@@ -679,6 +679,10 @@ class DuplicationReverter(StructuringOptimizationPass):
679
679
  ail_merge_graph = AILMergeGraph(original_graph=graph)
680
680
  # some blocks in originals may update during this time (if-statements can change)
681
681
  update_blocks = ail_merge_graph.create_conditionless_graph(blocks, graph_lcs)
682
+ if update_blocks is None:
683
+ # failed to create the condition-less graph
684
+ self.candidate_blacklist.add(tuple(blocks))
685
+ raise SAILRSemanticError("Failed to create a condition-less graph, this analysis must skip it")
682
686
 
683
687
  #
684
688
  # SPECIAL CASE: the merged graph contains only 1 node and no splits
@@ -1170,9 +1174,9 @@ class DuplicationReverter(StructuringOptimizationPass):
1170
1174
  entry_blocks = [node for node in graph.nodes if graph.in_degree(node) == 0]
1171
1175
  entry_block = None if len(entry_blocks) != 1 else entry_blocks[0]
1172
1176
 
1173
- self._entry_node_cache[graph] = entry_block
1174
1177
  if entry_block is None:
1175
1178
  return None
1179
+ self._entry_node_cache[graph] = entry_block
1176
1180
 
1177
1181
  entry_blk = self._entry_node_cache[graph]
1178
1182