angr 9.2.118__py3-none-manylinux2014_x86_64.whl → 9.2.119__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/analysis.py +43 -1
- angr/analyses/cfg/cfg_fast.py +135 -23
- angr/analyses/decompiler/ail_simplifier.py +1 -1
- angr/analyses/decompiler/clinic.py +23 -12
- angr/analyses/decompiler/condition_processor.py +41 -16
- angr/analyses/decompiler/decompiler.py +3 -0
- angr/analyses/decompiler/jumptable_entry_condition_rewriter.py +1 -1
- angr/analyses/decompiler/optimization_passes/duplication_reverter/ail_merge_graph.py +7 -4
- angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +6 -2
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +19 -19
- angr/analyses/decompiler/structured_codegen/c.py +9 -2
- angr/analyses/decompiler/structuring/dream.py +8 -7
- angr/analyses/decompiler/structuring/phoenix.py +3 -3
- angr/analyses/propagator/engine_ail.py +2 -1
- angr/analyses/reaching_definitions/function_handler.py +6 -2
- angr/analyses/stack_pointer_tracker.py +29 -11
- angr/analyses/typehoon/translator.py +19 -2
- angr/analyses/typehoon/typeconsts.py +8 -0
- angr/analyses/variable_recovery/engine_vex.py +7 -10
- angr/calling_conventions.py +69 -24
- angr/concretization_strategies/norepeats.py +3 -3
- angr/engines/concrete.py +1 -1
- angr/engines/light/engine.py +6 -11
- angr/engines/pcode/engine.py +2 -2
- angr/engines/soot/engine.py +5 -5
- angr/engines/soot/expressions/condition.py +1 -1
- angr/engines/soot/statements/goto.py +1 -1
- angr/engines/soot/statements/if_.py +1 -1
- angr/engines/soot/statements/throw.py +1 -1
- angr/engines/successors.py +1 -1
- angr/engines/unicorn.py +2 -2
- angr/engines/vex/heavy/heavy.py +2 -2
- angr/errors.py +4 -0
- angr/exploration_techniques/driller_core.py +2 -3
- angr/exploration_techniques/suggestions.py +2 -2
- angr/knowledge_plugins/cfg/cfg_model.py +2 -1
- angr/knowledge_plugins/cfg/memory_data.py +1 -0
- angr/misc/telemetry.py +54 -0
- angr/procedures/java/unconstrained.py +1 -1
- angr/procedures/java_jni/__init__.py +21 -13
- angr/procedures/java_jni/string_operations.py +1 -1
- angr/procedures/java_lang/double.py +1 -1
- angr/procedures/java_lang/string.py +1 -1
- angr/procedures/java_util/scanner_nextline.py +1 -1
- angr/procedures/linux_kernel/vsyscall.py +1 -1
- angr/procedures/stubs/Redirect.py +1 -1
- angr/procedures/stubs/UserHook.py +1 -1
- angr/procedures/stubs/format_parser.py +1 -1
- angr/sim_procedure.py +5 -5
- angr/sim_state.py +21 -34
- angr/sim_type.py +42 -0
- angr/simos/javavm.py +7 -12
- angr/simos/linux.py +1 -1
- angr/simos/simos.py +1 -1
- angr/simos/windows.py +1 -1
- angr/state_hierarchy.py +1 -1
- angr/state_plugins/preconstrainer.py +2 -2
- angr/state_plugins/scratch.py +1 -1
- angr/state_plugins/solver.py +1 -1
- angr/state_plugins/trace_additions.py +8 -8
- angr/storage/file.py +12 -12
- angr/storage/memory_mixins/actions_mixin.py +1 -1
- angr/storage/memory_mixins/convenient_mappings_mixin.py +6 -8
- angr/storage/memory_mixins/multi_value_merger_mixin.py +5 -5
- angr/storage/memory_mixins/paged_memory/pages/ultra_page.py +1 -1
- angr/storage/memory_mixins/size_resolution_mixin.py +1 -1
- angr/storage/memory_mixins/smart_find_mixin.py +2 -2
- angr/storage/memory_object.py +7 -9
- angr/utils/timing.py +30 -18
- {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/METADATA +8 -6
- {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/RECORD +76 -75
- {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/LICENSE +0 -0
- {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/WHEEL +0 -0
- {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/entry_points.txt +0 -0
- {angr-9.2.118.dist-info → angr-9.2.119.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/analyses/analysis.py
CHANGED
|
@@ -5,8 +5,10 @@ import sys
|
|
|
5
5
|
import contextlib
|
|
6
6
|
from collections import defaultdict
|
|
7
7
|
from inspect import Signature
|
|
8
|
-
from typing import TYPE_CHECKING, TypeVar,
|
|
8
|
+
from typing import TYPE_CHECKING, TypeVar, Generic, cast
|
|
9
9
|
from collections.abc import Callable
|
|
10
|
+
from types import NoneType
|
|
11
|
+
from itertools import chain
|
|
10
12
|
|
|
11
13
|
import logging
|
|
12
14
|
import time
|
|
@@ -16,6 +18,7 @@ from rich import progress
|
|
|
16
18
|
|
|
17
19
|
from ..misc.plugins import PluginVendor, VendorPreset
|
|
18
20
|
from ..misc.ux import deprecated
|
|
21
|
+
from ..misc import telemetry
|
|
19
22
|
|
|
20
23
|
if TYPE_CHECKING:
|
|
21
24
|
from ..knowledge_base import KnowledgeBase
|
|
@@ -55,6 +58,7 @@ if TYPE_CHECKING:
|
|
|
55
58
|
AnalysisParams = ParamSpec("AnalysisParams")
|
|
56
59
|
|
|
57
60
|
l = logging.getLogger(name=__name__)
|
|
61
|
+
t = telemetry.get_tracer(name=__name__)
|
|
58
62
|
|
|
59
63
|
|
|
60
64
|
class AnalysisLogEntry:
|
|
@@ -186,7 +190,45 @@ class AnalysisFactory(Generic[A]):
|
|
|
186
190
|
show_progressbar: bool = False,
|
|
187
191
|
) -> type[A]:
|
|
188
192
|
@functools.wraps(self._analysis_cls.__init__)
|
|
193
|
+
@t.start_as_current_span(self._analysis_cls.__name__)
|
|
189
194
|
def wrapper(*args, **kwargs):
|
|
195
|
+
span = telemetry.get_current_span()
|
|
196
|
+
sig = cast(Signature, self.__call__.__func__.__signature__)
|
|
197
|
+
bound = sig.bind(None, *args, **kwargs)
|
|
198
|
+
for name, val in chain(bound.arguments.items(), bound.arguments.get("kwargs", {}).items()):
|
|
199
|
+
if name in ("kwargs", "self"):
|
|
200
|
+
continue
|
|
201
|
+
if isinstance(val, (str, bytes, bool, int, float, NoneType)):
|
|
202
|
+
if val is None:
|
|
203
|
+
span.set_attribute(f"arg.{name}.is_none", True)
|
|
204
|
+
else:
|
|
205
|
+
span.set_attribute(f"arg.{name}", val)
|
|
206
|
+
elif isinstance(val, (list, tuple, set, frozenset)):
|
|
207
|
+
listval = list(val)
|
|
208
|
+
if not listval or (
|
|
209
|
+
isinstance(listval[0], (str, bytes, bool, int, float))
|
|
210
|
+
and all(type(sval) == type(listval[0]) for sval in listval)
|
|
211
|
+
):
|
|
212
|
+
span.set_attribute(f"arg.{name}", listval)
|
|
213
|
+
elif isinstance(val, dict):
|
|
214
|
+
listval_keys = list(val)
|
|
215
|
+
listval_values = list(val.values())
|
|
216
|
+
if not listval_keys or (
|
|
217
|
+
isinstance(listval_keys[0], (str, bytes, bool, int, float))
|
|
218
|
+
and all(type(sval) == type(listval_keys[0]) for sval in listval_keys)
|
|
219
|
+
):
|
|
220
|
+
span.set_attribute(f"arg.{name}.keys", listval_keys)
|
|
221
|
+
if not listval_values or (
|
|
222
|
+
isinstance(listval_values[0], (str, bytes, bool, int, float))
|
|
223
|
+
and all(type(sval) == type(listval_values[0]) for sval in listval_values)
|
|
224
|
+
):
|
|
225
|
+
span.set_attribute(f"arg.{name}.values", listval_values)
|
|
226
|
+
else:
|
|
227
|
+
span.set_attribute(f"arg.{name}.unrepresentable", True)
|
|
228
|
+
if self._project.filename is not None:
|
|
229
|
+
span.set_attribute("project.binary_name", self._project.filename)
|
|
230
|
+
span.set_attribute("project.arch_name", self._project.arch.name)
|
|
231
|
+
|
|
190
232
|
oself = object.__new__(self._analysis_cls)
|
|
191
233
|
oself.named_errors = defaultdict(list)
|
|
192
234
|
oself.errors = []
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -1049,15 +1049,14 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1049
1049
|
# no wide string is found
|
|
1050
1050
|
return 0
|
|
1051
1051
|
|
|
1052
|
-
def _scan_for_repeating_bytes(self, start_addr, repeating_byte, threshold=2):
|
|
1052
|
+
def _scan_for_repeating_bytes(self, start_addr: int, repeating_byte: int, threshold: int = 2) -> int:
|
|
1053
1053
|
"""
|
|
1054
1054
|
Scan from a given address and determine the occurrences of a given byte.
|
|
1055
1055
|
|
|
1056
|
-
:param
|
|
1057
|
-
:param
|
|
1058
|
-
:param
|
|
1059
|
-
:return:
|
|
1060
|
-
:rtype: int
|
|
1056
|
+
:param start_addr: The address in memory to start scanning.
|
|
1057
|
+
:param repeating_byte: The repeating byte to scan for.
|
|
1058
|
+
:param threshold: The minimum occurrences.
|
|
1059
|
+
:return: The occurrences of a given byte.
|
|
1061
1060
|
"""
|
|
1062
1061
|
|
|
1063
1062
|
addr = start_addr
|
|
@@ -1078,6 +1077,70 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1078
1077
|
return repeating_length
|
|
1079
1078
|
return 0
|
|
1080
1079
|
|
|
1080
|
+
def _scan_for_consecutive_pointers(self, start_addr: int, threshold: int = 2) -> int:
|
|
1081
|
+
"""
|
|
1082
|
+
Scan from a given address and determine if there are at least `threshold` of pointers.
|
|
1083
|
+
|
|
1084
|
+
This function will yield high numbers of false positives if the mapped memory regions are too low (for example,
|
|
1085
|
+
<= 0x100000). It is recommended to set `threshold` to a higher value in such cases.
|
|
1086
|
+
|
|
1087
|
+
:param start_addr: The address to start scanning from.
|
|
1088
|
+
:param threshold: The minimum number of pointers to be found.
|
|
1089
|
+
:return: The number of pointers found.
|
|
1090
|
+
"""
|
|
1091
|
+
|
|
1092
|
+
current_object = self.project.loader.find_object_containing(start_addr)
|
|
1093
|
+
addr = start_addr
|
|
1094
|
+
pointer_count = 0
|
|
1095
|
+
pointer_size = self.project.arch.bytes
|
|
1096
|
+
|
|
1097
|
+
while self._inside_regions(addr):
|
|
1098
|
+
val = self._fast_memory_load_pointer(addr)
|
|
1099
|
+
if val is None:
|
|
1100
|
+
break
|
|
1101
|
+
obj = self.project.loader.find_object_containing(val)
|
|
1102
|
+
if obj is not None and obj is current_object:
|
|
1103
|
+
pointer_count += 1
|
|
1104
|
+
else:
|
|
1105
|
+
break
|
|
1106
|
+
addr += pointer_size
|
|
1107
|
+
|
|
1108
|
+
if pointer_count >= threshold:
|
|
1109
|
+
return pointer_count
|
|
1110
|
+
return 0
|
|
1111
|
+
|
|
1112
|
+
def _scan_for_mixed_pointers(self, start_addr: int, threshold: int = 3, window: int = 6) -> int:
|
|
1113
|
+
"""
|
|
1114
|
+
Scan from a given address and determine if there are at least `threshold` of pointers within a given window of pointers.
|
|
1115
|
+
|
|
1116
|
+
This function will yield high numbers of false positives if the mapped memory regions are too low (for example,
|
|
1117
|
+
<= 0x100000). It is recommended to set `threshold` to a higher value in such cases.
|
|
1118
|
+
|
|
1119
|
+
:param start_addr: The address to start scanning from.
|
|
1120
|
+
:param threshold: The minimum number of pointers to be found.
|
|
1121
|
+
:return: The number of pointers found.
|
|
1122
|
+
"""
|
|
1123
|
+
|
|
1124
|
+
current_object = self.project.loader.find_object_containing(start_addr)
|
|
1125
|
+
addr = start_addr
|
|
1126
|
+
ctr = 0
|
|
1127
|
+
pointer_count = 0
|
|
1128
|
+
pointer_size = self.project.arch.bytes
|
|
1129
|
+
|
|
1130
|
+
while self._inside_regions(addr) and ctr < window:
|
|
1131
|
+
ctr += 1
|
|
1132
|
+
val = self._fast_memory_load_pointer(addr)
|
|
1133
|
+
if val is None:
|
|
1134
|
+
break
|
|
1135
|
+
obj = self.project.loader.find_object_containing(val)
|
|
1136
|
+
if obj is not None and obj is current_object:
|
|
1137
|
+
pointer_count += 1
|
|
1138
|
+
addr += pointer_size
|
|
1139
|
+
|
|
1140
|
+
if pointer_count >= threshold:
|
|
1141
|
+
return ctr
|
|
1142
|
+
return 0
|
|
1143
|
+
|
|
1081
1144
|
def _next_code_addr_core(self):
|
|
1082
1145
|
"""
|
|
1083
1146
|
Call _next_unscanned_addr() first to get the next address that is not scanned. Then check if data locates at
|
|
@@ -1091,35 +1154,83 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1091
1154
|
start_addr = next_addr
|
|
1092
1155
|
|
|
1093
1156
|
while True:
|
|
1094
|
-
string_length =
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
start_addr
|
|
1157
|
+
pointer_length, string_length, cc_length = 0, 0, 0
|
|
1158
|
+
matched_something = False
|
|
1159
|
+
|
|
1160
|
+
if start_addr % self.project.arch.bytes == 0:
|
|
1161
|
+
# find potential pointer array
|
|
1162
|
+
threshold = 6 if start_addr <= 0x100000 else 1
|
|
1163
|
+
pointer_count = self._scan_for_consecutive_pointers(start_addr, threshold=threshold)
|
|
1164
|
+
pointer_length = pointer_count * self.project.arch.bytes
|
|
1165
|
+
|
|
1166
|
+
if pointer_length:
|
|
1167
|
+
matched_something = True
|
|
1168
|
+
self._seg_list.occupy(start_addr, pointer_length, "pointer-array")
|
|
1169
|
+
self.model.memory_data[start_addr] = MemoryData(
|
|
1170
|
+
start_addr, pointer_length, MemoryDataSort.PointerArray
|
|
1171
|
+
)
|
|
1172
|
+
start_addr += pointer_length
|
|
1173
|
+
|
|
1174
|
+
elif start_addr <= 0x100000:
|
|
1175
|
+
# for high addresses, all pointers have been found in _scan_for_consecutive_pointers() because we
|
|
1176
|
+
# set threshold there to 1
|
|
1177
|
+
threshold = 4
|
|
1178
|
+
pointer_count = self._scan_for_mixed_pointers(start_addr, threshold=threshold, window=6)
|
|
1179
|
+
pointer_length = pointer_count * self.project.arch.bytes
|
|
1180
|
+
|
|
1181
|
+
if pointer_length:
|
|
1182
|
+
matched_something = True
|
|
1183
|
+
self._seg_list.occupy(start_addr, pointer_length, "pointer-array")
|
|
1184
|
+
self.model.memory_data[start_addr] = MemoryData(
|
|
1185
|
+
start_addr, pointer_length, MemoryDataSort.PointerArray
|
|
1186
|
+
)
|
|
1187
|
+
start_addr += pointer_length
|
|
1188
|
+
|
|
1189
|
+
if not matched_something:
|
|
1190
|
+
# find strings
|
|
1191
|
+
is_widestring = False
|
|
1192
|
+
string_length = self._scan_for_printable_strings(start_addr)
|
|
1193
|
+
if string_length == 0:
|
|
1194
|
+
is_widestring = True
|
|
1195
|
+
string_length = self._scan_for_printable_widestrings(start_addr)
|
|
1196
|
+
|
|
1197
|
+
if string_length:
|
|
1198
|
+
matched_something = True
|
|
1199
|
+
self._seg_list.occupy(start_addr, string_length, "string")
|
|
1200
|
+
md = MemoryData(
|
|
1201
|
+
start_addr,
|
|
1202
|
+
string_length,
|
|
1203
|
+
MemoryDataSort.String if not is_widestring else MemoryDataSort.UnicodeString,
|
|
1204
|
+
)
|
|
1205
|
+
md.fill_content(self.project.loader)
|
|
1206
|
+
self.model.memory_data[start_addr] = md
|
|
1207
|
+
start_addr += string_length
|
|
1101
1208
|
|
|
1102
|
-
if self.project.arch.name in
|
|
1209
|
+
if not matched_something and self.project.arch.name in {"X86", "AMD64"}:
|
|
1103
1210
|
cc_length = self._scan_for_repeating_bytes(start_addr, 0xCC, threshold=1)
|
|
1104
1211
|
if cc_length:
|
|
1212
|
+
matched_something = True
|
|
1105
1213
|
self._seg_list.occupy(start_addr, cc_length, "alignment")
|
|
1214
|
+
self.model.memory_data[start_addr] = MemoryData(start_addr, cc_length, MemoryDataSort.Alignment)
|
|
1106
1215
|
start_addr += cc_length
|
|
1107
|
-
else:
|
|
1108
|
-
cc_length = 0
|
|
1109
1216
|
|
|
1110
1217
|
zeros_length = self._scan_for_repeating_bytes(start_addr, 0x00)
|
|
1111
1218
|
if zeros_length:
|
|
1219
|
+
matched_something = True
|
|
1112
1220
|
self._seg_list.occupy(start_addr, zeros_length, "alignment")
|
|
1221
|
+
self.model.memory_data[start_addr] = MemoryData(start_addr, zeros_length, MemoryDataSort.Alignment)
|
|
1113
1222
|
start_addr += zeros_length
|
|
1114
1223
|
|
|
1115
|
-
if
|
|
1224
|
+
if not matched_something:
|
|
1116
1225
|
# umm now it's probably code
|
|
1117
1226
|
break
|
|
1118
1227
|
|
|
1119
1228
|
instr_alignment = self._initial_state.arch.instruction_alignment
|
|
1120
1229
|
if start_addr % instr_alignment > 0:
|
|
1121
1230
|
# occupy those few bytes
|
|
1122
|
-
|
|
1231
|
+
size = instr_alignment - (start_addr % instr_alignment)
|
|
1232
|
+
self._seg_list.occupy(start_addr, size, "alignment")
|
|
1233
|
+
self.model.memory_data[start_addr] = MemoryData(start_addr, size, MemoryDataSort.Unknown)
|
|
1123
1234
|
start_addr = start_addr - start_addr % instr_alignment + instr_alignment
|
|
1124
1235
|
# trickiness: aligning the start_addr may create a new address that is outside any mapped region.
|
|
1125
1236
|
if not self._inside_regions(start_addr):
|
|
@@ -4272,7 +4383,6 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4272
4383
|
# Let's try to create the pyvex IRSB directly, since it's much faster
|
|
4273
4384
|
nodecode = False
|
|
4274
4385
|
irsb = None
|
|
4275
|
-
irsb_string = None
|
|
4276
4386
|
lifted_block = None
|
|
4277
4387
|
try:
|
|
4278
4388
|
lifted_block = self._lift(
|
|
@@ -4283,11 +4393,12 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4283
4393
|
load_from_ro_regions=True,
|
|
4284
4394
|
initial_regs=initial_regs,
|
|
4285
4395
|
)
|
|
4286
|
-
irsb = lifted_block.vex_nostmt
|
|
4287
|
-
irsb_string = lifted_block.bytes[: irsb.size]
|
|
4396
|
+
irsb = lifted_block.vex_nostmt # may raise SimTranslationError
|
|
4288
4397
|
except SimTranslationError:
|
|
4289
4398
|
nodecode = True
|
|
4290
4399
|
|
|
4400
|
+
irsb_string: bytes = lifted_block.bytes[: irsb.size] if irsb is not None else lifted_block.bytes
|
|
4401
|
+
|
|
4291
4402
|
# special logic during the complete scanning phase
|
|
4292
4403
|
if cfg_job.job_type == CFGJobType.COMPLETE_SCANNING and is_arm_arch(self.project.arch):
|
|
4293
4404
|
# it's way too easy to incorrectly disassemble THUMB code contains 0x4f as ARM code svc?? #????
|
|
@@ -4324,10 +4435,11 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4324
4435
|
initial_regs=initial_regs,
|
|
4325
4436
|
)
|
|
4326
4437
|
irsb = lifted_block.vex_nostmt
|
|
4327
|
-
irsb_string = lifted_block.bytes[: irsb.size]
|
|
4328
4438
|
except SimTranslationError:
|
|
4329
4439
|
nodecode = True
|
|
4330
4440
|
|
|
4441
|
+
irsb_string: bytes = lifted_block.bytes[: irsb.size] if irsb is not None else lifted_block.bytes
|
|
4442
|
+
|
|
4331
4443
|
if not (nodecode or irsb.size == 0 or irsb.jumpkind == "Ijk_NoDecode"):
|
|
4332
4444
|
# it is decodeable
|
|
4333
4445
|
if current_function_addr == addr:
|
|
@@ -4397,7 +4509,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4397
4509
|
nodecode_size = 1
|
|
4398
4510
|
|
|
4399
4511
|
# special handling for ud, ud1, and ud2 on x86 and x86-64
|
|
4400
|
-
if irsb_string[-2:] == b"\x0f\x0b"
|
|
4512
|
+
if self.project.arch.name == "AMD64" and irsb_string[-2:] == b"\x0f\x0b":
|
|
4401
4513
|
# VEX supports ud2 and make it part of the block size, only in AMD64.
|
|
4402
4514
|
valid_ins = True
|
|
4403
4515
|
nodecode_size = 0
|
|
@@ -475,7 +475,7 @@ class AILSimplifier(Analysis):
|
|
|
475
475
|
assert is_phi_assignment(stmt)
|
|
476
476
|
|
|
477
477
|
for _, vvar in stmt.src.src_and_vvars:
|
|
478
|
-
if vvar.varid == def_.atom.varid:
|
|
478
|
+
if vvar is not None and vvar.varid == def_.atom.varid:
|
|
479
479
|
use_exprs.append((vvar, loc, ("phi-src-expr", (vvar,))))
|
|
480
480
|
|
|
481
481
|
# replace all uses if necessary
|
|
@@ -12,6 +12,7 @@ import capstone
|
|
|
12
12
|
|
|
13
13
|
import ailment
|
|
14
14
|
|
|
15
|
+
from angr.errors import AngrDecompilationError
|
|
15
16
|
from ...knowledge_base import KnowledgeBase
|
|
16
17
|
from ...knowledge_plugins.functions import Function
|
|
17
18
|
from ...knowledge_plugins.cfg.memory_data import MemoryDataSort
|
|
@@ -1210,6 +1211,7 @@ class Clinic(Analysis):
|
|
|
1210
1211
|
# of the graph is applied
|
|
1211
1212
|
self.unoptimized_graph = self._copy_graph(ail_graph)
|
|
1212
1213
|
|
|
1214
|
+
pass_ = timethis(pass_)
|
|
1213
1215
|
a = pass_(
|
|
1214
1216
|
self.function,
|
|
1215
1217
|
blocks_by_addr=addr_to_blocks,
|
|
@@ -1794,21 +1796,30 @@ class Clinic(Analysis):
|
|
|
1794
1796
|
if blocks_by_addr_and_size is None:
|
|
1795
1797
|
blocks_by_addr_and_size = self._blocks_by_addr_and_size
|
|
1796
1798
|
|
|
1797
|
-
node_to_block_mapping = {}
|
|
1798
1799
|
graph = networkx.DiGraph()
|
|
1799
1800
|
|
|
1800
|
-
for node in func_graph.
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
graph.add_node(ail_block)
|
|
1806
|
-
|
|
1807
|
-
for src_node, dst_node, data in func_graph.edges(data=True):
|
|
1808
|
-
src = node_to_block_mapping[src_node]
|
|
1809
|
-
dst = node_to_block_mapping[dst_node]
|
|
1801
|
+
entry_node = next(iter(node for node in func_graph if node.addr == self._entry_node_addr[0]), None)
|
|
1802
|
+
if entry_node is None:
|
|
1803
|
+
raise AngrDecompilationError(
|
|
1804
|
+
f"Entry node with address {self._entry_node_addr[0]:#x} not found in the function graph"
|
|
1805
|
+
)
|
|
1810
1806
|
|
|
1811
|
-
|
|
1807
|
+
# add the entry node into the graph
|
|
1808
|
+
ail_block = blocks_by_addr_and_size.get((entry_node.addr, entry_node.size))
|
|
1809
|
+
if ail_block is None:
|
|
1810
|
+
raise AngrDecompilationError(f"AIL block at address {entry_node.addr:#x} not found")
|
|
1811
|
+
graph.add_node(ail_block)
|
|
1812
|
+
|
|
1813
|
+
# get all descendants and only include them in the AIL graph.
|
|
1814
|
+
# this way all unreachable blocks will be excluded from the AIL graph.
|
|
1815
|
+
descendants = networkx.descendants(func_graph, entry_node) | {entry_node}
|
|
1816
|
+
for src_node, dst_node, data in networkx.subgraph_view(
|
|
1817
|
+
func_graph, filter_node=lambda n: n in descendants
|
|
1818
|
+
).edges(data=True):
|
|
1819
|
+
src = blocks_by_addr_and_size.get((src_node.addr, src_node.size))
|
|
1820
|
+
dst = blocks_by_addr_and_size.get((dst_node.addr, dst_node.size))
|
|
1821
|
+
|
|
1822
|
+
if src is not None and dst is not None:
|
|
1812
1823
|
graph.add_edge(src, dst, **data)
|
|
1813
1824
|
|
|
1814
1825
|
return graph
|
|
@@ -56,6 +56,25 @@ _UNIFIABLE_COMPARISONS = {
|
|
|
56
56
|
"SGE",
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
+
|
|
60
|
+
_INVERSE_OPERATIONS = {
|
|
61
|
+
"__eq__": "__ne__",
|
|
62
|
+
"__ne__": "__eq__",
|
|
63
|
+
"__gt__": "__le__",
|
|
64
|
+
"__lt__": "__ge__",
|
|
65
|
+
"__ge__": "__lt__",
|
|
66
|
+
"__le__": "__gt__",
|
|
67
|
+
"ULT": "UGE",
|
|
68
|
+
"UGE": "ULT",
|
|
69
|
+
"UGT": "ULE",
|
|
70
|
+
"ULE": "UGT",
|
|
71
|
+
"SLT": "SGE",
|
|
72
|
+
"SGE": "SLT",
|
|
73
|
+
"SLE": "SGT",
|
|
74
|
+
"SGT": "SLE",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
59
78
|
#
|
|
60
79
|
# Util methods and mapping used during AIL AST to claripy AST conversion
|
|
61
80
|
#
|
|
@@ -138,6 +157,7 @@ _ail2claripy_op_mapping = {
|
|
|
138
157
|
"SBorrow": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
139
158
|
"ExpCmpNE": lambda expr, _, m: _dummy_bools(expr, m),
|
|
140
159
|
"CmpORD": lambda expr, _, m: _dummy_bvs(expr, m), # in case CmpORDRewriter fails
|
|
160
|
+
"GetMSBs": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
141
161
|
}
|
|
142
162
|
|
|
143
163
|
#
|
|
@@ -178,7 +198,7 @@ class ConditionProcessor:
|
|
|
178
198
|
predicate = self._extract_predicate(src, dst, edge_type)
|
|
179
199
|
except EmptyBlockNotice:
|
|
180
200
|
# catch empty block notice - although this should not really happen
|
|
181
|
-
predicate = claripy.true
|
|
201
|
+
predicate = claripy.true()
|
|
182
202
|
return predicate
|
|
183
203
|
|
|
184
204
|
def recover_edge_conditions(self, region, graph=None) -> dict:
|
|
@@ -254,15 +274,15 @@ class ConditionProcessor:
|
|
|
254
274
|
|
|
255
275
|
if node is head:
|
|
256
276
|
# the head is always reachable
|
|
257
|
-
reaching_condition = claripy.true
|
|
277
|
+
reaching_condition = claripy.true()
|
|
258
278
|
elif idoms is not None and _strictly_postdominates(idoms, node, head):
|
|
259
279
|
# the node that post dominates the head is always reachable
|
|
260
|
-
reaching_conditions[node] = claripy.true
|
|
280
|
+
reaching_conditions[node] = claripy.true()
|
|
261
281
|
else:
|
|
262
282
|
for pred in preds:
|
|
263
283
|
edge = (pred, node)
|
|
264
|
-
pred_condition = reaching_conditions.get(pred, claripy.true)
|
|
265
|
-
edge_condition = edge_conditions.get(edge, claripy.true)
|
|
284
|
+
pred_condition = reaching_conditions.get(pred, claripy.true())
|
|
285
|
+
edge_condition = edge_conditions.get(edge, claripy.true())
|
|
266
286
|
|
|
267
287
|
if reaching_condition is None:
|
|
268
288
|
reaching_condition = claripy.And(pred_condition, edge_condition)
|
|
@@ -596,7 +616,7 @@ class ConditionProcessor:
|
|
|
596
616
|
return claripy.Not(bool_var)
|
|
597
617
|
|
|
598
618
|
if type(src_block) is GraphRegion:
|
|
599
|
-
return claripy.true
|
|
619
|
+
return claripy.true()
|
|
600
620
|
|
|
601
621
|
# sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
|
|
602
622
|
if (
|
|
@@ -609,10 +629,10 @@ class ConditionProcessor:
|
|
|
609
629
|
last_stmt = self.get_last_statement(src_block)
|
|
610
630
|
|
|
611
631
|
if last_stmt is None:
|
|
612
|
-
return claripy.true
|
|
632
|
+
return claripy.true()
|
|
613
633
|
if type(last_stmt) is ailment.Stmt.Jump:
|
|
614
634
|
if isinstance(last_stmt.target, ailment.Expr.Const):
|
|
615
|
-
return claripy.true
|
|
635
|
+
return claripy.true()
|
|
616
636
|
# indirect jump
|
|
617
637
|
target_ast = self.claripy_ast_from_ail_condition(last_stmt.target)
|
|
618
638
|
return target_ast == dst_block.addr
|
|
@@ -622,7 +642,7 @@ class ConditionProcessor:
|
|
|
622
642
|
return bool_var
|
|
623
643
|
return claripy.Not(bool_var)
|
|
624
644
|
|
|
625
|
-
return claripy.true
|
|
645
|
+
return claripy.true()
|
|
626
646
|
|
|
627
647
|
#
|
|
628
648
|
# Expression conversion
|
|
@@ -727,6 +747,7 @@ class ConditionProcessor:
|
|
|
727
747
|
"ZeroExt": lambda cond_, tags: _binary_op_reduce(
|
|
728
748
|
"Concat", [claripy.BVV(0, cond_.args[0]), cond_.args[1]], tags
|
|
729
749
|
),
|
|
750
|
+
"Concat": lambda cond_, tags: _binary_op_reduce("Concat", cond_.args, tags),
|
|
730
751
|
}
|
|
731
752
|
|
|
732
753
|
if cond.op in _mapping:
|
|
@@ -780,8 +801,8 @@ class ConditionProcessor:
|
|
|
780
801
|
var = claripy.BoolV(condition.value)
|
|
781
802
|
else:
|
|
782
803
|
var = claripy.BVV(condition.value, condition.bits)
|
|
783
|
-
if isinstance(var, claripy.Bits) and var.size() == 1:
|
|
784
|
-
var = claripy.true if var.concrete_value == 1 else claripy.false
|
|
804
|
+
if isinstance(var, claripy.ast.Bits) and var.size() == 1:
|
|
805
|
+
var = claripy.true() if var.concrete_value == 1 else claripy.false()
|
|
785
806
|
return var
|
|
786
807
|
if isinstance(condition, ailment.Expr.Tmp):
|
|
787
808
|
l.warning("Left-over ailment.Tmp variable %s.", condition)
|
|
@@ -839,7 +860,7 @@ class ConditionProcessor:
|
|
|
839
860
|
|
|
840
861
|
if ast.op in _UNIFIABLE_COMPARISONS:
|
|
841
862
|
# unify comparisons to enable more simplification opportunities without going "deep" in sympy
|
|
842
|
-
inverse_op = getattr(ast.args[0],
|
|
863
|
+
inverse_op = getattr(ast.args[0], _INVERSE_OPERATIONS[ast.op])
|
|
843
864
|
return sympy.Not(ConditionProcessor.claripy_ast_to_sympy_expr(inverse_op(ast.args[1]), memo=memo))
|
|
844
865
|
|
|
845
866
|
if memo is not None and ast in memo:
|
|
@@ -860,9 +881,9 @@ class ConditionProcessor:
|
|
|
860
881
|
if isinstance(expr, sympy.Not):
|
|
861
882
|
return claripy.Not(ConditionProcessor.sympy_expr_to_claripy_ast(expr.args[0], memo))
|
|
862
883
|
if isinstance(expr, sympy.logic.boolalg.BooleanTrue):
|
|
863
|
-
return claripy.true
|
|
884
|
+
return claripy.true()
|
|
864
885
|
if isinstance(expr, sympy.logic.boolalg.BooleanFalse):
|
|
865
|
-
return claripy.false
|
|
886
|
+
return claripy.false()
|
|
866
887
|
raise AngrRuntimeError("Unreachable reached")
|
|
867
888
|
|
|
868
889
|
@staticmethod
|
|
@@ -1092,7 +1113,9 @@ class ConditionProcessor:
|
|
|
1092
1113
|
for term in all_terms_without_negs:
|
|
1093
1114
|
neg = negations.get(term)
|
|
1094
1115
|
|
|
1095
|
-
replaced_with_true = ConditionProcessor._replace_term_in_ast(
|
|
1116
|
+
replaced_with_true = ConditionProcessor._replace_term_in_ast(
|
|
1117
|
+
cond, term, claripy.true(), neg, claripy.false()
|
|
1118
|
+
)
|
|
1096
1119
|
sat0 = solver.satisfiable(
|
|
1097
1120
|
extra_constraints=(
|
|
1098
1121
|
cond,
|
|
@@ -1108,7 +1131,9 @@ class ConditionProcessor:
|
|
|
1108
1131
|
if sat0 or sat1:
|
|
1109
1132
|
continue
|
|
1110
1133
|
|
|
1111
|
-
replaced_with_false = ConditionProcessor._replace_term_in_ast(
|
|
1134
|
+
replaced_with_false = ConditionProcessor._replace_term_in_ast(
|
|
1135
|
+
cond, term, claripy.false(), neg, claripy.true()
|
|
1136
|
+
)
|
|
1112
1137
|
sat0 = solver.satisfiable(
|
|
1113
1138
|
extra_constraints=(
|
|
1114
1139
|
cond,
|
|
@@ -334,6 +334,7 @@ class Decompiler(Analysis):
|
|
|
334
334
|
)
|
|
335
335
|
continue
|
|
336
336
|
|
|
337
|
+
pass_ = timethis(pass_)
|
|
337
338
|
a = pass_(
|
|
338
339
|
self.func,
|
|
339
340
|
blocks_by_addr=addr_to_blocks,
|
|
@@ -389,6 +390,7 @@ class Decompiler(Analysis):
|
|
|
389
390
|
)
|
|
390
391
|
continue
|
|
391
392
|
|
|
393
|
+
pass_ = timethis(pass_)
|
|
392
394
|
a = pass_(
|
|
393
395
|
self.func,
|
|
394
396
|
blocks_by_addr=addr_to_blocks,
|
|
@@ -425,6 +427,7 @@ class Decompiler(Analysis):
|
|
|
425
427
|
if pass_.STAGE != OptimizationPassStage.AFTER_STRUCTURING:
|
|
426
428
|
continue
|
|
427
429
|
|
|
430
|
+
pass_ = timethis(pass_)
|
|
428
431
|
a = pass_(self.func, seq=seq_node, **kwargs)
|
|
429
432
|
if a.out_seq:
|
|
430
433
|
seq_node = a.out_seq
|
|
@@ -140,7 +140,7 @@ class AILMergeGraph:
|
|
|
140
140
|
self.starts = []
|
|
141
141
|
self.original_ends = []
|
|
142
142
|
|
|
143
|
-
def create_conditionless_graph(self, starting_blocks: list[Block], graph_lcs):
|
|
143
|
+
def create_conditionless_graph(self, starting_blocks: list[Block], graph_lcs) -> dict[Block, Block] | None:
|
|
144
144
|
# get all the original blocks (reverted from the LCS) and their split blocks.
|
|
145
145
|
# split-blocks are blocks that need to be split at some stmt index to make the two blocks
|
|
146
146
|
# equal across both graphs. At a highlevel, the first block in both matching graphs either need
|
|
@@ -180,9 +180,12 @@ class AILMergeGraph:
|
|
|
180
180
|
# we create a new graph, full of the original blocks of the base, with blocks
|
|
181
181
|
# that should be split replaced.
|
|
182
182
|
# this graph is only the initial merge_graph needed, where only the blocks
|
|
183
|
-
self.
|
|
184
|
-
|
|
185
|
-
|
|
183
|
+
subgraph = nx.subgraph(self.original_graph, self.original_blocks[merge_base])
|
|
184
|
+
# ensure all base blocks are within the subgraph
|
|
185
|
+
for block in base_to_split:
|
|
186
|
+
if block not in subgraph:
|
|
187
|
+
return None
|
|
188
|
+
self.graph, update_blocks = self.clone_graph_replace_splits(subgraph, base_to_split)
|
|
186
189
|
self._update_all_split_refs(update_blocks)
|
|
187
190
|
for update_block, new_block in update_blocks.items():
|
|
188
191
|
if update_block in starting_blocks:
|
|
@@ -51,7 +51,7 @@ class DuplicationReverter(StructuringOptimizationPass):
|
|
|
51
51
|
strictly_less_gotos=False,
|
|
52
52
|
recover_structure_fails=True,
|
|
53
53
|
must_improve_rel_quality=True,
|
|
54
|
-
max_opt_iters=
|
|
54
|
+
max_opt_iters=5,
|
|
55
55
|
simplify_ail=True,
|
|
56
56
|
require_gotos=True,
|
|
57
57
|
readd_labels=True,
|
|
@@ -679,6 +679,10 @@ class DuplicationReverter(StructuringOptimizationPass):
|
|
|
679
679
|
ail_merge_graph = AILMergeGraph(original_graph=graph)
|
|
680
680
|
# some blocks in originals may update during this time (if-statements can change)
|
|
681
681
|
update_blocks = ail_merge_graph.create_conditionless_graph(blocks, graph_lcs)
|
|
682
|
+
if update_blocks is None:
|
|
683
|
+
# failed to create the condition-less graph
|
|
684
|
+
self.candidate_blacklist.add(tuple(blocks))
|
|
685
|
+
raise SAILRSemanticError("Failed to create a condition-less graph, this analysis must skip it")
|
|
682
686
|
|
|
683
687
|
#
|
|
684
688
|
# SPECIAL CASE: the merged graph contains only 1 node and no splits
|
|
@@ -1170,9 +1174,9 @@ class DuplicationReverter(StructuringOptimizationPass):
|
|
|
1170
1174
|
entry_blocks = [node for node in graph.nodes if graph.in_degree(node) == 0]
|
|
1171
1175
|
entry_block = None if len(entry_blocks) != 1 else entry_blocks[0]
|
|
1172
1176
|
|
|
1173
|
-
self._entry_node_cache[graph] = entry_block
|
|
1174
1177
|
if entry_block is None:
|
|
1175
1178
|
return None
|
|
1179
|
+
self._entry_node_cache[graph] = entry_block
|
|
1176
1180
|
|
|
1177
1181
|
entry_blk = self._entry_node_cache[graph]
|
|
1178
1182
|
|