angr 9.2.175__cp310-abi3-macosx_11_0_arm64.whl → 9.2.177__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +12 -0
- angr/analyses/complete_calling_conventions.py +39 -26
- angr/analyses/decompiler/ail_simplifier.py +14 -12
- angr/analyses/decompiler/ccall_rewriters/rewriter_base.py +5 -1
- angr/analyses/decompiler/clinic.py +54 -40
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +3 -3
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +2 -2
- angr/analyses/decompiler/peephole_optimizations/__init__.py +4 -4
- angr/analyses/decompiler/peephole_optimizations/cas_intrinsics.py +69 -12
- angr/analyses/decompiler/peephole_optimizations/{inlined_wstrcpy.py → inlined_wcscpy.py} +16 -8
- angr/analyses/decompiler/peephole_optimizations/inlined_wcscpy_consolidation.py +296 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +14 -1
- angr/analyses/decompiler/structured_codegen/c.py +6 -5
- angr/analyses/decompiler/structuring/dream.py +2 -2
- angr/analyses/decompiler/structuring/phoenix.py +101 -23
- angr/analyses/decompiler/utils.py +10 -3
- angr/analyses/flirt/flirt.py +5 -4
- angr/analyses/stack_pointer_tracker.py +4 -3
- angr/analyses/typehoon/lifter.py +29 -18
- angr/analyses/typehoon/simple_solver.py +157 -50
- angr/analyses/typehoon/translator.py +34 -34
- angr/analyses/typehoon/typeconsts.py +33 -15
- angr/analyses/typehoon/typevars.py +9 -2
- angr/analyses/variable_recovery/engine_ail.py +43 -2
- angr/analyses/variable_recovery/engine_base.py +4 -1
- angr/analyses/variable_recovery/variable_recovery_fast.py +3 -1
- angr/emulator.py +2 -1
- angr/engines/hook.py +1 -1
- angr/engines/icicle.py +21 -5
- angr/engines/vex/claripy/ccall.py +3 -3
- angr/knowledge_plugins/functions/function.py +19 -2
- angr/procedures/definitions/__init__.py +9 -0
- angr/procedures/definitions/parse_win32json.py +11 -0
- angr/procedures/definitions/wdk/ntoskrnl.json +4 -0
- angr/procedures/posix/pthread.py +4 -4
- angr/procedures/stubs/format_parser.py +3 -3
- angr/rustylib.abi3.so +0 -0
- angr/sim_type.py +11 -6
- angr/simos/windows.py +1 -1
- angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -1
- angr/unicornlib.dylib +0 -0
- angr/utils/constants.py +1 -1
- angr/utils/strings.py +20 -0
- {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/METADATA +5 -5
- {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/RECORD +50 -49
- angr/analyses/decompiler/peephole_optimizations/inlined_wstrcpy_consolidation.py +0 -113
- {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/WHEEL +0 -0
- {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/entry_points.txt +0 -0
- {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/top_level.txt +0 -0
|
@@ -17,17 +17,20 @@ ASCII_PRINTABLES = {ord(x) for x in string.printable}
|
|
|
17
17
|
ASCII_DIGITS = {ord(x) for x in string.digits}
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class
|
|
20
|
+
class InlinedWcscpy(PeepholeOptimizationStmtBase):
|
|
21
21
|
"""
|
|
22
|
-
Simplifies inlined wide string copying logic into calls to
|
|
22
|
+
Simplifies inlined wide string copying logic into calls to wcscpy.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
__slots__ = ()
|
|
26
26
|
|
|
27
|
-
NAME = "Simplifying inlined
|
|
27
|
+
NAME = "Simplifying inlined wcscpy"
|
|
28
28
|
stmt_classes = (Assignment, Store)
|
|
29
29
|
|
|
30
30
|
def optimize(self, stmt: Assignment | Store, stmt_idx: int | None = None, block=None, **kwargs):
|
|
31
|
+
assert self.project is not None
|
|
32
|
+
assert self.kb is not None
|
|
33
|
+
|
|
31
34
|
if (
|
|
32
35
|
isinstance(stmt, Assignment)
|
|
33
36
|
and isinstance(stmt.dst, VirtualVariable)
|
|
@@ -48,11 +51,12 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
|
|
|
48
51
|
r, s = self.is_integer_likely_a_wide_string(value, value_size, self.project.arch.memory_endness)
|
|
49
52
|
if r:
|
|
50
53
|
# replace it with a call to strncpy
|
|
54
|
+
assert s is not None
|
|
51
55
|
str_id = self.kb.custom_strings.allocate(s)
|
|
52
56
|
wstr_type = SimTypePointer(SimTypeWideChar()).with_arch(self.project.arch)
|
|
53
57
|
return Call(
|
|
54
58
|
stmt.idx,
|
|
55
|
-
"
|
|
59
|
+
"wcsncpy",
|
|
56
60
|
args=[
|
|
57
61
|
dst,
|
|
58
62
|
Const(None, None, str_id, self.project.arch.bits, custom_string=True, type=wstr_type),
|
|
@@ -83,6 +87,7 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
|
|
|
83
87
|
integer, size = self.stride_to_int(stride)
|
|
84
88
|
r, s = self.is_integer_likely_a_wide_string(integer, size, Endness.BE, min_length=3)
|
|
85
89
|
if r:
|
|
90
|
+
assert s is not None
|
|
86
91
|
# we remove all involved statements whose statement IDs are greater than the current one
|
|
87
92
|
for _, stmt_idx_, _ in reversed(stride):
|
|
88
93
|
if stmt_idx_ <= stmt_idx:
|
|
@@ -94,7 +99,7 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
|
|
|
94
99
|
wstr_type = SimTypePointer(SimTypeWideChar()).with_arch(self.project.arch)
|
|
95
100
|
return Call(
|
|
96
101
|
stmt.idx,
|
|
97
|
-
"
|
|
102
|
+
"wcsncpy",
|
|
98
103
|
args=[
|
|
99
104
|
dst,
|
|
100
105
|
Const(None, None, str_id, self.project.arch.bits, custom_string=True, type=wstr_type),
|
|
@@ -112,11 +117,14 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
|
|
|
112
117
|
size = 0
|
|
113
118
|
for _, _, v in stride:
|
|
114
119
|
size += v.size
|
|
120
|
+
assert isinstance(v.value, int)
|
|
115
121
|
n <<= v.bits
|
|
116
122
|
n |= v.value
|
|
117
123
|
return n, size
|
|
118
124
|
|
|
119
125
|
def collect_constant_stores(self, block, starting_stmt_idx: int) -> dict[int, tuple[int, Const | None]]:
|
|
126
|
+
assert self.project is not None
|
|
127
|
+
|
|
120
128
|
r = {}
|
|
121
129
|
expected_store_varid: int | None = None
|
|
122
130
|
starting_stmt = block.statements[starting_stmt_idx]
|
|
@@ -224,7 +232,7 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
|
|
|
224
232
|
# unsupported endness
|
|
225
233
|
return False, None
|
|
226
234
|
|
|
227
|
-
if not (
|
|
235
|
+
if not (InlinedWcscpy.even_offsets_are_zero(chars) or InlinedWcscpy.odd_offsets_are_zero(chars)):
|
|
228
236
|
return False, None
|
|
229
237
|
|
|
230
238
|
if chars and len(chars) >= 2 and chars[-1] == 0 and chars[-2] == 0:
|
|
@@ -236,11 +244,11 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
|
|
|
236
244
|
return False, None
|
|
237
245
|
|
|
238
246
|
@staticmethod
|
|
239
|
-
def
|
|
247
|
+
def is_inlined_wcsncpy(stmt: Statement) -> bool:
|
|
240
248
|
return (
|
|
241
249
|
isinstance(stmt, Call)
|
|
242
250
|
and isinstance(stmt.target, str)
|
|
243
|
-
and stmt.target == "
|
|
251
|
+
and stmt.target == "wcsncpy"
|
|
244
252
|
and stmt.args is not None
|
|
245
253
|
and len(stmt.args) == 3
|
|
246
254
|
and isinstance(stmt.args[1], Const)
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
# pylint:disable=arguments-differ
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from angr.ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset, UnaryOp, VirtualVariable
|
|
6
|
+
from angr.ailment.statement import Call, Store, Assignment
|
|
7
|
+
|
|
8
|
+
from angr.sim_type import SimTypePointer, SimTypeWideChar
|
|
9
|
+
from .base import PeepholeOptimizationMultiStmtBase
|
|
10
|
+
from .inlined_wcscpy import InlinedWcscpy
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from angr.ailment.statement import Statement
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def match_statements(stmts: list[Statement], index: int) -> int:
|
|
17
|
+
ending = index
|
|
18
|
+
has_wcsncpy = False
|
|
19
|
+
for i in range(index, len(stmts)):
|
|
20
|
+
stmt = stmts[i]
|
|
21
|
+
if isinstance(stmt, Call):
|
|
22
|
+
if InlinedWcscpy.is_inlined_wcsncpy(stmt):
|
|
23
|
+
has_wcsncpy = True
|
|
24
|
+
else:
|
|
25
|
+
break
|
|
26
|
+
elif isinstance(stmt, Store):
|
|
27
|
+
if not isinstance(stmt.data, Const):
|
|
28
|
+
break
|
|
29
|
+
_, off = InlinedWcscpyConsolidation._parse_addr(stmt.addr)
|
|
30
|
+
if off is None:
|
|
31
|
+
# unsupported offset - bail
|
|
32
|
+
break
|
|
33
|
+
elif (
|
|
34
|
+
isinstance(stmt, Assignment)
|
|
35
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
36
|
+
and stmt.dst.was_stack
|
|
37
|
+
and isinstance(stmt.src, Const)
|
|
38
|
+
):
|
|
39
|
+
pass
|
|
40
|
+
else:
|
|
41
|
+
break
|
|
42
|
+
ending = i + 1
|
|
43
|
+
return ending - index if has_wcsncpy and ending - index >= 2 else 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class InlinedWcscpyConsolidation(PeepholeOptimizationMultiStmtBase):
|
|
47
|
+
"""
|
|
48
|
+
Consolidate multiple inlined wcscpy/wcsncpy calls.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
__slots__ = ()
|
|
52
|
+
|
|
53
|
+
NAME = "Consolidate multiple inlined wcsncpy calls"
|
|
54
|
+
stmt_classes = (match_statements,)
|
|
55
|
+
|
|
56
|
+
def optimize( # type:ignore
|
|
57
|
+
self, stmts: list[Call | Store | Assignment], stmt_idx: int | None = None, block=None, **kwargs
|
|
58
|
+
): # pylint:disable=unused-argument
|
|
59
|
+
reordered_stmts = self._reorder_stmts(stmts)
|
|
60
|
+
if not reordered_stmts or len(reordered_stmts) <= 1:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
new_stmts = []
|
|
64
|
+
optimized = False
|
|
65
|
+
stop = False
|
|
66
|
+
while not stop:
|
|
67
|
+
new_stmts = []
|
|
68
|
+
stop = True
|
|
69
|
+
for i, stmt0 in enumerate(reordered_stmts):
|
|
70
|
+
if i == len(reordered_stmts) - 1:
|
|
71
|
+
new_stmts.append(reordered_stmts[i])
|
|
72
|
+
break
|
|
73
|
+
stmt1 = reordered_stmts[i + 1]
|
|
74
|
+
opt_stmts = self._optimize_pair(stmt0, stmt1)
|
|
75
|
+
if opt_stmts is None:
|
|
76
|
+
new_stmts.append(stmt0)
|
|
77
|
+
else:
|
|
78
|
+
new_stmts += opt_stmts
|
|
79
|
+
# start again from the beginning
|
|
80
|
+
optimized = True
|
|
81
|
+
stop = False
|
|
82
|
+
reordered_stmts = new_stmts + reordered_stmts[i + 2 :]
|
|
83
|
+
break
|
|
84
|
+
|
|
85
|
+
return new_stmts if optimized and new_stmts else None
|
|
86
|
+
|
|
87
|
+
def _reorder_stmts(self, stmts: list[Call | Store | Assignment]) -> list[Call | Store] | None:
|
|
88
|
+
"""
|
|
89
|
+
Order a list of statements based on ascending addresses of their destination buffers.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
if not all(
|
|
93
|
+
(
|
|
94
|
+
InlinedWcscpy.is_inlined_wcsncpy(s)
|
|
95
|
+
or (isinstance(s, Store) and isinstance(s.data, Const))
|
|
96
|
+
or (
|
|
97
|
+
isinstance(s, Assignment)
|
|
98
|
+
and isinstance(s.dst, VirtualVariable)
|
|
99
|
+
and s.dst.was_stack
|
|
100
|
+
and isinstance(s.src, Const)
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
for s in stmts
|
|
104
|
+
):
|
|
105
|
+
return None
|
|
106
|
+
offset_to_stmt = {}
|
|
107
|
+
updated_offsets: set[int] = set()
|
|
108
|
+
known_base = None
|
|
109
|
+
for stmt in stmts:
|
|
110
|
+
if isinstance(stmt, Call):
|
|
111
|
+
assert (
|
|
112
|
+
stmt.args is not None
|
|
113
|
+
and len(stmt.args) == 3
|
|
114
|
+
and stmt.args[0] is not None
|
|
115
|
+
and stmt.args[2] is not None
|
|
116
|
+
)
|
|
117
|
+
base, off = self._parse_addr(stmt.args[0])
|
|
118
|
+
store_size = stmt.args[2].value * 2 if isinstance(stmt.args[2], Const) else None
|
|
119
|
+
elif isinstance(stmt, Store):
|
|
120
|
+
base, off = self._parse_addr(stmt.addr)
|
|
121
|
+
store_size = stmt.size
|
|
122
|
+
elif isinstance(stmt, Assignment):
|
|
123
|
+
base, off = self._parse_addr(stmt.dst)
|
|
124
|
+
store_size = stmt.dst.size
|
|
125
|
+
else:
|
|
126
|
+
# unexpected!
|
|
127
|
+
return None
|
|
128
|
+
if off is None or store_size is None:
|
|
129
|
+
# bad offset or size - bail
|
|
130
|
+
return None
|
|
131
|
+
if known_base is None:
|
|
132
|
+
known_base = base
|
|
133
|
+
elif not base.likes(known_base):
|
|
134
|
+
# bail
|
|
135
|
+
return None
|
|
136
|
+
if off in offset_to_stmt:
|
|
137
|
+
# duplicate offset - bail
|
|
138
|
+
return None
|
|
139
|
+
assert isinstance(store_size, int)
|
|
140
|
+
for i in range(store_size):
|
|
141
|
+
if off + i in updated_offsets:
|
|
142
|
+
# overlapping store - bail
|
|
143
|
+
return None
|
|
144
|
+
updated_offsets.add(off + i)
|
|
145
|
+
|
|
146
|
+
offset_to_stmt[off] = stmt
|
|
147
|
+
|
|
148
|
+
return [offset_to_stmt[k] for k in sorted(offset_to_stmt)]
|
|
149
|
+
|
|
150
|
+
def _optimize_pair(
|
|
151
|
+
self, last_stmt: Call | Store | Assignment, stmt: Call | Store | Assignment
|
|
152
|
+
) -> list[Call] | None:
|
|
153
|
+
# convert (store, wcsncpy()) to (wcsncpy(), store) if they do not overlap
|
|
154
|
+
if (
|
|
155
|
+
isinstance(stmt, Call)
|
|
156
|
+
and InlinedWcscpy.is_inlined_wcsncpy(stmt)
|
|
157
|
+
and stmt.args is not None
|
|
158
|
+
and len(stmt.args) == 3
|
|
159
|
+
and isinstance(stmt.args[2], Const)
|
|
160
|
+
and isinstance(stmt.args[2].value, int)
|
|
161
|
+
and isinstance(last_stmt, (Store, Assignment))
|
|
162
|
+
):
|
|
163
|
+
if isinstance(last_stmt, Store) and isinstance(last_stmt.data, Const):
|
|
164
|
+
store_addr = last_stmt.addr
|
|
165
|
+
store_size = last_stmt.size
|
|
166
|
+
elif isinstance(last_stmt, Assignment):
|
|
167
|
+
store_addr = last_stmt.dst
|
|
168
|
+
store_size = last_stmt.dst.size
|
|
169
|
+
else:
|
|
170
|
+
return None
|
|
171
|
+
# check if they overlap
|
|
172
|
+
wcsncpy_addr = stmt.args[0]
|
|
173
|
+
wcsncpy_size = stmt.args[2].value * 2
|
|
174
|
+
delta = self._get_delta(store_addr, wcsncpy_addr)
|
|
175
|
+
if delta is not None:
|
|
176
|
+
if (0 <= delta <= store_size) or (delta < 0 and -delta <= wcsncpy_size):
|
|
177
|
+
# they overlap, do not switch
|
|
178
|
+
pass
|
|
179
|
+
else:
|
|
180
|
+
last_stmt, stmt = stmt, last_stmt
|
|
181
|
+
|
|
182
|
+
# swap two statements if they are out of order
|
|
183
|
+
if InlinedWcscpy.is_inlined_wcsncpy(last_stmt) and InlinedWcscpy.is_inlined_wcsncpy(stmt):
|
|
184
|
+
assert last_stmt.args is not None and stmt.args is not None
|
|
185
|
+
delta = self._get_delta(last_stmt.args[0], stmt.args[0])
|
|
186
|
+
if delta is not None and delta < 0:
|
|
187
|
+
last_stmt, stmt = stmt, last_stmt
|
|
188
|
+
|
|
189
|
+
if InlinedWcscpy.is_inlined_wcsncpy(last_stmt):
|
|
190
|
+
assert last_stmt.args is not None
|
|
191
|
+
assert self.kb is not None
|
|
192
|
+
s_last: bytes = self.kb.custom_strings[last_stmt.args[1].value]
|
|
193
|
+
addr_last = last_stmt.args[0]
|
|
194
|
+
new_str = None # will be set if consolidation should happen
|
|
195
|
+
|
|
196
|
+
if isinstance(stmt, Call) and InlinedWcscpy.is_inlined_wcsncpy(stmt):
|
|
197
|
+
assert stmt.args is not None
|
|
198
|
+
# consolidating two calls
|
|
199
|
+
s_curr: bytes = self.kb.custom_strings[stmt.args[1].value]
|
|
200
|
+
addr_curr = stmt.args[0]
|
|
201
|
+
# determine if the two addresses are consecutive
|
|
202
|
+
delta = self._get_delta(addr_last, addr_curr)
|
|
203
|
+
if delta is not None and delta == len(s_last):
|
|
204
|
+
# consolidate both calls!
|
|
205
|
+
new_str = s_last + s_curr
|
|
206
|
+
elif isinstance(stmt, Store) and isinstance(stmt.data, Const) and isinstance(stmt.data.value, int):
|
|
207
|
+
# consolidating a call and a store, in case the store statement is storing the suffix of a string (but
|
|
208
|
+
# the suffix is too short to qualify an inlined strcpy optimization)
|
|
209
|
+
addr_curr = stmt.addr
|
|
210
|
+
delta = self._get_delta(addr_last, addr_curr)
|
|
211
|
+
if delta is not None and delta == len(s_last):
|
|
212
|
+
if stmt.size == 2 and stmt.data.value == 0:
|
|
213
|
+
# it's probably the terminating null byte
|
|
214
|
+
r, s = True, b"\x00\x00"
|
|
215
|
+
else:
|
|
216
|
+
r, s = InlinedWcscpy.is_integer_likely_a_wide_string(
|
|
217
|
+
stmt.data.value, stmt.size, stmt.endness, min_length=1 # type:ignore
|
|
218
|
+
)
|
|
219
|
+
if r and s is not None:
|
|
220
|
+
new_str = s_last + s
|
|
221
|
+
elif (
|
|
222
|
+
isinstance(stmt, Assignment)
|
|
223
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
224
|
+
and isinstance(stmt.src, Const)
|
|
225
|
+
and isinstance(stmt.src.value, int)
|
|
226
|
+
):
|
|
227
|
+
# consolidating a call and an assignment, in case the assignment statement is storing the suffix of a
|
|
228
|
+
# string (but the suffix is too short to qualify an inlined strcpy optimization)
|
|
229
|
+
addr_curr = stmt.dst
|
|
230
|
+
delta = self._get_delta(addr_last, addr_curr)
|
|
231
|
+
if delta is not None and delta == len(s_last):
|
|
232
|
+
r, s = InlinedWcscpy.is_integer_likely_a_wide_string(
|
|
233
|
+
stmt.src.value, stmt.dst.size, self.project.arch.memory_endness, min_length=1 # type:ignore
|
|
234
|
+
)
|
|
235
|
+
if r and s is not None:
|
|
236
|
+
new_str = s_last + s
|
|
237
|
+
|
|
238
|
+
if new_str is not None:
|
|
239
|
+
assert self.project is not None
|
|
240
|
+
wstr_type = SimTypePointer(SimTypeWideChar()).with_arch(self.project.arch)
|
|
241
|
+
if new_str.endswith(b"\x00\x00"):
|
|
242
|
+
call_name = "wcsncpy"
|
|
243
|
+
new_str_idx = self.kb.custom_strings.allocate(new_str[:-2])
|
|
244
|
+
args = [
|
|
245
|
+
last_stmt.args[0],
|
|
246
|
+
Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True, type=wstr_type),
|
|
247
|
+
]
|
|
248
|
+
prototype = None
|
|
249
|
+
else:
|
|
250
|
+
call_name = "wcsncpy"
|
|
251
|
+
new_str_idx = self.kb.custom_strings.allocate(new_str)
|
|
252
|
+
args = [
|
|
253
|
+
last_stmt.args[0],
|
|
254
|
+
Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True, type=wstr_type),
|
|
255
|
+
Const(None, None, len(new_str) // 2, self.project.arch.bits),
|
|
256
|
+
]
|
|
257
|
+
prototype = None
|
|
258
|
+
|
|
259
|
+
return [Call(stmt.idx, call_name, args=args, prototype=prototype, **stmt.tags)]
|
|
260
|
+
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
@staticmethod
|
|
264
|
+
def _parse_addr(addr: Expression) -> tuple[Expression, int]:
|
|
265
|
+
# we force the base to 64-bit because it does not really matter when we use it
|
|
266
|
+
|
|
267
|
+
if isinstance(addr, VirtualVariable) and addr.was_stack:
|
|
268
|
+
return StackBaseOffset(None, 64, 0), addr.stack_offset
|
|
269
|
+
if isinstance(addr, Register):
|
|
270
|
+
return addr, 0
|
|
271
|
+
if isinstance(addr, StackBaseOffset):
|
|
272
|
+
return StackBaseOffset(None, 64, 0), addr.offset
|
|
273
|
+
if (
|
|
274
|
+
isinstance(addr, UnaryOp)
|
|
275
|
+
and addr.op == "Reference"
|
|
276
|
+
and isinstance(addr.operand, VirtualVariable)
|
|
277
|
+
and addr.operand.was_stack
|
|
278
|
+
):
|
|
279
|
+
return StackBaseOffset(None, 64, 0), addr.operand.stack_offset
|
|
280
|
+
if isinstance(addr, BinaryOp):
|
|
281
|
+
if addr.op == "Add" and isinstance(addr.operands[1], Const) and isinstance(addr.operands[1].value, int):
|
|
282
|
+
base_0, offset_0 = InlinedWcscpyConsolidation._parse_addr(addr.operands[0])
|
|
283
|
+
return base_0, offset_0 + addr.operands[1].value
|
|
284
|
+
if addr.op == "Sub" and isinstance(addr.operands[1], Const) and isinstance(addr.operands[1].value, int):
|
|
285
|
+
base_0, offset_0 = InlinedWcscpyConsolidation._parse_addr(addr.operands[0])
|
|
286
|
+
return base_0, offset_0 - addr.operands[1].value
|
|
287
|
+
|
|
288
|
+
return addr, 0
|
|
289
|
+
|
|
290
|
+
@staticmethod
|
|
291
|
+
def _get_delta(addr_0: Expression, addr_1: Expression) -> int | None:
|
|
292
|
+
base_0, offset_0 = InlinedWcscpyConsolidation._parse_addr(addr_0)
|
|
293
|
+
base_1, offset_1 = InlinedWcscpyConsolidation._parse_addr(addr_1)
|
|
294
|
+
if base_0.likes(base_1):
|
|
295
|
+
return offset_1 - offset_0
|
|
296
|
+
return None
|
|
@@ -97,6 +97,19 @@ class SimEngineSSARewriting(
|
|
|
97
97
|
self._current_vvar_id += 1
|
|
98
98
|
return self._current_vvar_id
|
|
99
99
|
|
|
100
|
+
#
|
|
101
|
+
# Util functions
|
|
102
|
+
#
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def _is_head_controlled_loop_jump(block, jump_stmt: ConditionalJump) -> bool:
|
|
106
|
+
concrete_targets = []
|
|
107
|
+
if isinstance(jump_stmt.true_target, Const):
|
|
108
|
+
concrete_targets.append(jump_stmt.true_target.value)
|
|
109
|
+
if isinstance(jump_stmt.false_target, Const):
|
|
110
|
+
concrete_targets.append(jump_stmt.false_target.value)
|
|
111
|
+
return not all(block.addr <= t < block.addr + block.original_size for t in concrete_targets)
|
|
112
|
+
|
|
100
113
|
#
|
|
101
114
|
# Handlers
|
|
102
115
|
#
|
|
@@ -303,7 +316,7 @@ class SimEngineSSARewriting(
|
|
|
303
316
|
new_true_target = self._expr(stmt.true_target) if stmt.true_target is not None else None
|
|
304
317
|
new_false_target = self._expr(stmt.false_target) if stmt.false_target is not None else None
|
|
305
318
|
|
|
306
|
-
if self.stmt_idx != len(self.block.statements) - 1:
|
|
319
|
+
if self.stmt_idx != len(self.block.statements) - 1 and self._is_head_controlled_loop_jump(self.block, stmt):
|
|
307
320
|
# the conditional jump is in the middle of the block (e.g., the block generated from lifting rep stosq).
|
|
308
321
|
# we need to make a copy of the state and use the state of this point in its successor
|
|
309
322
|
self.head_controlled_loop_outstate = self.state.copy()
|
|
@@ -42,6 +42,7 @@ from angr.utils.constants import is_alignment_mask
|
|
|
42
42
|
from angr.utils.library import get_cpp_function_name
|
|
43
43
|
from angr.utils.loader import is_in_readonly_segment, is_in_readonly_section
|
|
44
44
|
from angr.utils.types import unpack_typeref, unpack_pointer_and_array, dereference_simtype_by_lib
|
|
45
|
+
from angr.utils.strings import decode_utf16_string
|
|
45
46
|
from angr.analyses.decompiler.utils import structured_node_is_simple_return
|
|
46
47
|
from angr.analyses.decompiler.notes.deobfuscated_strings import DeobfuscatedStringsNote
|
|
47
48
|
from angr.errors import UnsupportedNodeTypeError, AngrRuntimeError
|
|
@@ -2269,9 +2270,9 @@ class CConstant(CExpression):
|
|
|
2269
2270
|
elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeWideChar):
|
|
2270
2271
|
refval = self.reference_values[self._type]
|
|
2271
2272
|
v = (
|
|
2272
|
-
refval.content
|
|
2273
|
+
decode_utf16_string(refval.content)
|
|
2273
2274
|
if isinstance(refval, MemoryData)
|
|
2274
|
-
else refval
|
|
2275
|
+
else decode_utf16_string(refval)
|
|
2275
2276
|
) # it's a string
|
|
2276
2277
|
yield CConstant.str_to_c_str(v, prefix="L"), self
|
|
2277
2278
|
return
|
|
@@ -4076,9 +4077,9 @@ class MakeTypecastsImplicit(CStructuredCodeWalker):
|
|
|
4076
4077
|
class FieldReferenceCleanup(CStructuredCodeWalker):
|
|
4077
4078
|
def handle_CTypeCast(self, obj):
|
|
4078
4079
|
if isinstance(obj.dst_type, SimTypePointer) and not isinstance(obj.dst_type.pts_to, SimTypeBottom):
|
|
4079
|
-
|
|
4080
|
-
if not isinstance(
|
|
4081
|
-
return self.handle(
|
|
4080
|
+
new_obj = obj.codegen._access_reference(obj.expr, obj.dst_type.pts_to)
|
|
4081
|
+
if not isinstance(new_obj, CTypeCast):
|
|
4082
|
+
return self.handle(new_obj)
|
|
4082
4083
|
return super().handle_CTypeCast(obj)
|
|
4083
4084
|
|
|
4084
4085
|
|
|
@@ -548,7 +548,7 @@ class DreamStructurer(StructurerBase):
|
|
|
548
548
|
cmp = switch_extract_cmp_bounds(last_stmt)
|
|
549
549
|
if not cmp:
|
|
550
550
|
return False
|
|
551
|
-
cmp_expr, cmp_lb,
|
|
551
|
+
cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
|
|
552
552
|
|
|
553
553
|
# the real indirect jump
|
|
554
554
|
if len(addr2nodes[target]) != 1:
|
|
@@ -619,7 +619,7 @@ class DreamStructurer(StructurerBase):
|
|
|
619
619
|
cmp = switch_extract_cmp_bounds(last_stmt)
|
|
620
620
|
if not cmp:
|
|
621
621
|
return False
|
|
622
|
-
cmp_expr, cmp_lb,
|
|
622
|
+
cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
|
|
623
623
|
|
|
624
624
|
jumptable_entries = jump_table.jumptable_entries
|
|
625
625
|
assert jumptable_entries is not None
|
|
@@ -929,23 +929,55 @@ class PhoenixStructurer(StructurerBase):
|
|
|
929
929
|
)
|
|
930
930
|
break_node = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
|
|
931
931
|
else:
|
|
932
|
-
|
|
933
|
-
None
|
|
934
|
-
Const(None, None, successor.addr, self.project.arch.bits),
|
|
935
|
-
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
936
|
-
ins_addr=last_src_stmt.ins_addr,
|
|
937
|
-
)
|
|
938
|
-
break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
|
|
939
|
-
fallthrough_node = next(iter(succ for succ in fullgraph.successors(src) if succ is not dst))
|
|
940
|
-
fallthrough_stmt = Jump(
|
|
941
|
-
None,
|
|
942
|
-
Const(None, None, fallthrough_node.addr, self.project.arch.bits),
|
|
943
|
-
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
944
|
-
ins_addr=last_src_stmt.ins_addr,
|
|
945
|
-
)
|
|
946
|
-
break_node_inner_fallthrough = Block(
|
|
947
|
-
last_src_stmt.ins_addr, None, statements=[fallthrough_stmt]
|
|
932
|
+
fallthrough_node = next(
|
|
933
|
+
iter(succ for succ in fullgraph.successors(src) if succ is not dst), None
|
|
948
934
|
)
|
|
935
|
+
if fallthrough_node is not None:
|
|
936
|
+
# we create a conditional jump that will be converted to a conditional break later
|
|
937
|
+
break_stmt = Jump(
|
|
938
|
+
None,
|
|
939
|
+
Const(None, None, successor.addr, self.project.arch.bits),
|
|
940
|
+
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
941
|
+
ins_addr=last_src_stmt.ins_addr,
|
|
942
|
+
)
|
|
943
|
+
break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
|
|
944
|
+
fallthrough_stmt = Jump(
|
|
945
|
+
None,
|
|
946
|
+
Const(None, None, fallthrough_node.addr, self.project.arch.bits),
|
|
947
|
+
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
948
|
+
ins_addr=last_src_stmt.ins_addr,
|
|
949
|
+
)
|
|
950
|
+
break_node_inner_fallthrough = Block(
|
|
951
|
+
last_src_stmt.ins_addr, None, statements=[fallthrough_stmt]
|
|
952
|
+
)
|
|
953
|
+
else:
|
|
954
|
+
# the fallthrough node does not exist in the graph. we create a conditional jump that
|
|
955
|
+
# jumps to an address
|
|
956
|
+
if not isinstance(last_src_stmt, ConditionalJump):
|
|
957
|
+
raise TypeError(f"Unexpected last_src_stmt type {type(last_src_stmt)}")
|
|
958
|
+
other_target = (
|
|
959
|
+
last_src_stmt.true_target
|
|
960
|
+
if isinstance(last_src_stmt.false_target, Const)
|
|
961
|
+
and last_src_stmt.false_target.value == successor.addr
|
|
962
|
+
else last_src_stmt.false_target
|
|
963
|
+
)
|
|
964
|
+
assert other_target is not None
|
|
965
|
+
break_stmt = Jump(
|
|
966
|
+
None,
|
|
967
|
+
Const(None, None, successor.addr, self.project.arch.bits),
|
|
968
|
+
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
969
|
+
ins_addr=last_src_stmt.ins_addr,
|
|
970
|
+
)
|
|
971
|
+
break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
|
|
972
|
+
fallthrough_stmt = Jump(
|
|
973
|
+
None,
|
|
974
|
+
other_target,
|
|
975
|
+
target_idx=successor.idx if isinstance(successor, Block) else None,
|
|
976
|
+
ins_addr=last_src_stmt.ins_addr,
|
|
977
|
+
)
|
|
978
|
+
break_node_inner_fallthrough = Block(
|
|
979
|
+
last_src_stmt.ins_addr, None, statements=[fallthrough_stmt]
|
|
980
|
+
)
|
|
949
981
|
break_node = ConditionNode(
|
|
950
982
|
last_src_stmt.ins_addr,
|
|
951
983
|
None,
|
|
@@ -1454,7 +1486,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1454
1486
|
)
|
|
1455
1487
|
if not cmp:
|
|
1456
1488
|
return False
|
|
1457
|
-
cmp_expr, cmp_lb,
|
|
1489
|
+
cmp_expr, cmp_lb, _cmp_ub = cmp
|
|
1458
1490
|
|
|
1459
1491
|
assert cond_node.addr is not None
|
|
1460
1492
|
switch_head_addr = cond_node.addr
|
|
@@ -1477,7 +1509,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1477
1509
|
cmp = switch_extract_cmp_bounds(last_stmt)
|
|
1478
1510
|
if not cmp:
|
|
1479
1511
|
return False
|
|
1480
|
-
cmp_expr, cmp_lb,
|
|
1512
|
+
cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
|
|
1481
1513
|
|
|
1482
1514
|
switch_head_addr = last_stmt.ins_addr
|
|
1483
1515
|
|
|
@@ -1849,7 +1881,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1849
1881
|
cmp = switch_extract_cmp_bounds(last_stmt)
|
|
1850
1882
|
if not cmp:
|
|
1851
1883
|
return False
|
|
1852
|
-
cmp_expr, cmp_lb,
|
|
1884
|
+
cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
|
|
1853
1885
|
|
|
1854
1886
|
if isinstance(last_stmt.false_target, Const):
|
|
1855
1887
|
default_addr = last_stmt.false_target.value
|
|
@@ -2148,19 +2180,54 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2148
2180
|
jump_node = Block(out_src.addr, 0, statements=[jump_stmt])
|
|
2149
2181
|
case_node.nodes.append(jump_node)
|
|
2150
2182
|
|
|
2151
|
-
|
|
2183
|
+
# out_dst_succ is the successor within the current region
|
|
2184
|
+
# out_dst_succ_fullgraph is the successor outside the current region
|
|
2185
|
+
if out_edges_to_head:
|
|
2152
2186
|
# add an edge from SwitchCaseNode to head so that a loop will be structured later
|
|
2153
2187
|
out_dst_succ = head
|
|
2188
|
+
out_dst_succ_fullgraph = None
|
|
2154
2189
|
else:
|
|
2155
2190
|
# add an edge from SwitchCaseNode to its most immediate successor (if there is one)
|
|
2156
|
-
|
|
2191
|
+
# there might be an in-region successor and an out-of-region successor (especially due to the
|
|
2192
|
+
# introduction of self.dowhile_known_tail_nodes)!
|
|
2193
|
+
# example: 7995a0325b446c462bdb6ae10b692eee2ecadd8e888e9d7729befe4412007afb, function 1400EF820
|
|
2194
|
+
out_dst_succs = []
|
|
2195
|
+
out_dst_succs_fullgraph = []
|
|
2196
|
+
for _, o in other_out_edges:
|
|
2197
|
+
if o in graph:
|
|
2198
|
+
out_dst_succs.append(o)
|
|
2199
|
+
elif o in full_graph:
|
|
2200
|
+
out_dst_succs_fullgraph.append(o)
|
|
2201
|
+
out_dst_succ = sorted(out_dst_succs, key=lambda o: o.addr)[0] if out_dst_succs else None
|
|
2202
|
+
out_dst_succ_fullgraph = (
|
|
2203
|
+
sorted(out_dst_succs_fullgraph, key=lambda o: o.addr)[0] if out_dst_succs_fullgraph else None
|
|
2204
|
+
)
|
|
2205
|
+
if len(out_dst_succs) > 1:
|
|
2206
|
+
assert out_dst_succ is not None
|
|
2207
|
+
l.warning(
|
|
2208
|
+
"Multiple in-region successors detected for switch-case node at %#x. Picking %#x as the "
|
|
2209
|
+
"successor and dropping others.",
|
|
2210
|
+
scnode.addr,
|
|
2211
|
+
out_dst_succ.addr,
|
|
2212
|
+
)
|
|
2213
|
+
if len(out_dst_succs_fullgraph) > 1:
|
|
2214
|
+
assert out_dst_succ_fullgraph is not None
|
|
2215
|
+
l.warning(
|
|
2216
|
+
"Multiple out-of-region successors detected for switch-case node at %#x. Picking %#x as the "
|
|
2217
|
+
"successor and dropping others.",
|
|
2218
|
+
scnode.addr,
|
|
2219
|
+
out_dst_succ_fullgraph.addr,
|
|
2220
|
+
)
|
|
2157
2221
|
|
|
2158
2222
|
if out_dst_succ is not None:
|
|
2159
|
-
|
|
2160
|
-
graph.add_edge(scnode, out_dst_succ)
|
|
2223
|
+
graph.add_edge(scnode, out_dst_succ)
|
|
2161
2224
|
full_graph.add_edge(scnode, out_dst_succ)
|
|
2162
2225
|
if full_graph.has_edge(head, out_dst_succ):
|
|
2163
2226
|
full_graph.remove_edge(head, out_dst_succ)
|
|
2227
|
+
if out_dst_succ_fullgraph is not None:
|
|
2228
|
+
full_graph.add_edge(scnode, out_dst_succ_fullgraph)
|
|
2229
|
+
if full_graph.has_edge(head, out_dst_succ_fullgraph):
|
|
2230
|
+
full_graph.remove_edge(head, out_dst_succ_fullgraph)
|
|
2164
2231
|
|
|
2165
2232
|
# fix full_graph if needed: remove successors that are no longer needed
|
|
2166
2233
|
for _out_src, out_dst in other_out_edges:
|
|
@@ -2925,6 +2992,17 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2925
2992
|
ordered_nodes.remove(postorder_head)
|
|
2926
2993
|
acyclic_graph.remove_node(postorder_head)
|
|
2927
2994
|
node_seq = {nn: (len(ordered_nodes) - idx) for (idx, nn) in enumerate(ordered_nodes)} # post-order
|
|
2995
|
+
if len(node_seq) < len(acyclic_graph):
|
|
2996
|
+
# some nodes are not reachable from head - add them to node_seq as well
|
|
2997
|
+
# but this is usually the result of incorrect structuring, so we may still fail at a later point
|
|
2998
|
+
l.warning("Adding %d unreachable nodes to node_seq", len(acyclic_graph) - len(node_seq))
|
|
2999
|
+
unreachable_nodes = sorted(
|
|
3000
|
+
(nn for nn in acyclic_graph if nn not in node_seq),
|
|
3001
|
+
key=lambda n: (n.addr, (-1 if n.idx is None else n.idx) if hasattr(n, "idx") else 0),
|
|
3002
|
+
)
|
|
3003
|
+
max_seq = max(node_seq.values(), default=0)
|
|
3004
|
+
for i, nn in enumerate(unreachable_nodes):
|
|
3005
|
+
node_seq[nn] = max_seq + i
|
|
2928
3006
|
|
|
2929
3007
|
if all_edges_wo_dominance:
|
|
2930
3008
|
all_edges_wo_dominance = self._order_virtualizable_edges(full_graph, all_edges_wo_dominance, node_seq)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
import pathlib
|
|
4
4
|
import copy
|
|
5
|
+
from types import FunctionType
|
|
5
6
|
from typing import Any
|
|
6
7
|
from collections.abc import Iterable
|
|
7
8
|
import logging
|
|
@@ -958,9 +959,15 @@ def peephole_optimize_multistmts(block, stmt_opts):
|
|
|
958
959
|
for opt in stmt_opts:
|
|
959
960
|
matched = False
|
|
960
961
|
stmt_seq_len = None
|
|
961
|
-
for
|
|
962
|
-
if
|
|
963
|
-
|
|
962
|
+
for stmt_class_seq_or_method in opt.stmt_classes:
|
|
963
|
+
if isinstance(stmt_class_seq_or_method, FunctionType):
|
|
964
|
+
r = stmt_class_seq_or_method(statements, stmt_idx)
|
|
965
|
+
if r > 0:
|
|
966
|
+
stmt_seq_len = r
|
|
967
|
+
matched = True
|
|
968
|
+
break
|
|
969
|
+
elif match_stmt_classes(statements, stmt_idx, stmt_class_seq_or_method):
|
|
970
|
+
stmt_seq_len = len(stmt_class_seq_or_method)
|
|
964
971
|
matched = True
|
|
965
972
|
break
|
|
966
973
|
|