angr 9.2.175__cp310-abi3-win_amd64.whl → 9.2.177__cp310-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (51) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +12 -0
  3. angr/analyses/complete_calling_conventions.py +39 -26
  4. angr/analyses/decompiler/ail_simplifier.py +14 -12
  5. angr/analyses/decompiler/ccall_rewriters/rewriter_base.py +5 -1
  6. angr/analyses/decompiler/clinic.py +54 -40
  7. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +3 -3
  8. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +2 -2
  9. angr/analyses/decompiler/peephole_optimizations/__init__.py +4 -4
  10. angr/analyses/decompiler/peephole_optimizations/cas_intrinsics.py +69 -12
  11. angr/analyses/decompiler/peephole_optimizations/{inlined_wstrcpy.py → inlined_wcscpy.py} +16 -8
  12. angr/analyses/decompiler/peephole_optimizations/inlined_wcscpy_consolidation.py +296 -0
  13. angr/analyses/decompiler/ssailification/rewriting_engine.py +14 -1
  14. angr/analyses/decompiler/structured_codegen/c.py +6 -5
  15. angr/analyses/decompiler/structuring/dream.py +2 -2
  16. angr/analyses/decompiler/structuring/phoenix.py +101 -23
  17. angr/analyses/decompiler/utils.py +10 -3
  18. angr/analyses/flirt/flirt.py +5 -4
  19. angr/analyses/stack_pointer_tracker.py +4 -3
  20. angr/analyses/typehoon/lifter.py +29 -18
  21. angr/analyses/typehoon/simple_solver.py +157 -50
  22. angr/analyses/typehoon/translator.py +34 -34
  23. angr/analyses/typehoon/typeconsts.py +33 -15
  24. angr/analyses/typehoon/typevars.py +9 -2
  25. angr/analyses/variable_recovery/engine_ail.py +43 -2
  26. angr/analyses/variable_recovery/engine_base.py +4 -1
  27. angr/analyses/variable_recovery/variable_recovery_fast.py +3 -1
  28. angr/emulator.py +2 -1
  29. angr/engines/hook.py +1 -1
  30. angr/engines/icicle.py +21 -5
  31. angr/engines/vex/claripy/ccall.py +3 -3
  32. angr/knowledge_plugins/functions/function.py +19 -2
  33. angr/procedures/definitions/__init__.py +9 -0
  34. angr/procedures/definitions/parse_win32json.py +11 -0
  35. angr/procedures/definitions/wdk/ntoskrnl.json +4 -0
  36. angr/procedures/posix/pthread.py +4 -4
  37. angr/procedures/stubs/format_parser.py +3 -3
  38. angr/rustylib.pyd +0 -0
  39. angr/sim_type.py +11 -6
  40. angr/simos/windows.py +1 -1
  41. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -1
  42. angr/unicornlib.dll +0 -0
  43. angr/utils/constants.py +1 -1
  44. angr/utils/strings.py +20 -0
  45. {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/METADATA +5 -5
  46. {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/RECORD +50 -49
  47. angr/analyses/decompiler/peephole_optimizations/inlined_wstrcpy_consolidation.py +0 -113
  48. {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/WHEEL +0 -0
  49. {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/entry_points.txt +0 -0
  50. {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/licenses/LICENSE +0 -0
  51. {angr-9.2.175.dist-info → angr-9.2.177.dist-info}/top_level.txt +0 -0
@@ -17,17 +17,20 @@ ASCII_PRINTABLES = {ord(x) for x in string.printable}
17
17
  ASCII_DIGITS = {ord(x) for x in string.digits}
18
18
 
19
19
 
20
- class InlinedWstrcpy(PeepholeOptimizationStmtBase):
20
+ class InlinedWcscpy(PeepholeOptimizationStmtBase):
21
21
  """
22
- Simplifies inlined wide string copying logic into calls to wstrcpy.
22
+ Simplifies inlined wide string copying logic into calls to wcscpy.
23
23
  """
24
24
 
25
25
  __slots__ = ()
26
26
 
27
- NAME = "Simplifying inlined wstrcpy"
27
+ NAME = "Simplifying inlined wcscpy"
28
28
  stmt_classes = (Assignment, Store)
29
29
 
30
30
  def optimize(self, stmt: Assignment | Store, stmt_idx: int | None = None, block=None, **kwargs):
31
+ assert self.project is not None
32
+ assert self.kb is not None
33
+
31
34
  if (
32
35
  isinstance(stmt, Assignment)
33
36
  and isinstance(stmt.dst, VirtualVariable)
@@ -48,11 +51,12 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
48
51
  r, s = self.is_integer_likely_a_wide_string(value, value_size, self.project.arch.memory_endness)
49
52
  if r:
50
53
  # replace it with a call to strncpy
54
+ assert s is not None
51
55
  str_id = self.kb.custom_strings.allocate(s)
52
56
  wstr_type = SimTypePointer(SimTypeWideChar()).with_arch(self.project.arch)
53
57
  return Call(
54
58
  stmt.idx,
55
- "wstrncpy",
59
+ "wcsncpy",
56
60
  args=[
57
61
  dst,
58
62
  Const(None, None, str_id, self.project.arch.bits, custom_string=True, type=wstr_type),
@@ -83,6 +87,7 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
83
87
  integer, size = self.stride_to_int(stride)
84
88
  r, s = self.is_integer_likely_a_wide_string(integer, size, Endness.BE, min_length=3)
85
89
  if r:
90
+ assert s is not None
86
91
  # we remove all involved statements whose statement IDs are greater than the current one
87
92
  for _, stmt_idx_, _ in reversed(stride):
88
93
  if stmt_idx_ <= stmt_idx:
@@ -94,7 +99,7 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
94
99
  wstr_type = SimTypePointer(SimTypeWideChar()).with_arch(self.project.arch)
95
100
  return Call(
96
101
  stmt.idx,
97
- "wstrncpy",
102
+ "wcsncpy",
98
103
  args=[
99
104
  dst,
100
105
  Const(None, None, str_id, self.project.arch.bits, custom_string=True, type=wstr_type),
@@ -112,11 +117,14 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
112
117
  size = 0
113
118
  for _, _, v in stride:
114
119
  size += v.size
120
+ assert isinstance(v.value, int)
115
121
  n <<= v.bits
116
122
  n |= v.value
117
123
  return n, size
118
124
 
119
125
  def collect_constant_stores(self, block, starting_stmt_idx: int) -> dict[int, tuple[int, Const | None]]:
126
+ assert self.project is not None
127
+
120
128
  r = {}
121
129
  expected_store_varid: int | None = None
122
130
  starting_stmt = block.statements[starting_stmt_idx]
@@ -224,7 +232,7 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
224
232
  # unsupported endness
225
233
  return False, None
226
234
 
227
- if not (InlinedWstrcpy.even_offsets_are_zero(chars) or InlinedWstrcpy.odd_offsets_are_zero(chars)):
235
+ if not (InlinedWcscpy.even_offsets_are_zero(chars) or InlinedWcscpy.odd_offsets_are_zero(chars)):
228
236
  return False, None
229
237
 
230
238
  if chars and len(chars) >= 2 and chars[-1] == 0 and chars[-2] == 0:
@@ -236,11 +244,11 @@ class InlinedWstrcpy(PeepholeOptimizationStmtBase):
236
244
  return False, None
237
245
 
238
246
  @staticmethod
239
- def is_inlined_wstrncpy(stmt: Statement) -> bool:
247
+ def is_inlined_wcsncpy(stmt: Statement) -> bool:
240
248
  return (
241
249
  isinstance(stmt, Call)
242
250
  and isinstance(stmt.target, str)
243
- and stmt.target == "wstrncpy"
251
+ and stmt.target == "wcsncpy"
244
252
  and stmt.args is not None
245
253
  and len(stmt.args) == 3
246
254
  and isinstance(stmt.args[1], Const)
@@ -0,0 +1,296 @@
1
+ # pylint:disable=arguments-differ
2
+ from __future__ import annotations
3
+ from typing import TYPE_CHECKING
4
+
5
+ from angr.ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset, UnaryOp, VirtualVariable
6
+ from angr.ailment.statement import Call, Store, Assignment
7
+
8
+ from angr.sim_type import SimTypePointer, SimTypeWideChar
9
+ from .base import PeepholeOptimizationMultiStmtBase
10
+ from .inlined_wcscpy import InlinedWcscpy
11
+
12
+ if TYPE_CHECKING:
13
+ from angr.ailment.statement import Statement
14
+
15
+
16
+ def match_statements(stmts: list[Statement], index: int) -> int:
17
+ ending = index
18
+ has_wcsncpy = False
19
+ for i in range(index, len(stmts)):
20
+ stmt = stmts[i]
21
+ if isinstance(stmt, Call):
22
+ if InlinedWcscpy.is_inlined_wcsncpy(stmt):
23
+ has_wcsncpy = True
24
+ else:
25
+ break
26
+ elif isinstance(stmt, Store):
27
+ if not isinstance(stmt.data, Const):
28
+ break
29
+ _, off = InlinedWcscpyConsolidation._parse_addr(stmt.addr)
30
+ if off is None:
31
+ # unsupported offset - bail
32
+ break
33
+ elif (
34
+ isinstance(stmt, Assignment)
35
+ and isinstance(stmt.dst, VirtualVariable)
36
+ and stmt.dst.was_stack
37
+ and isinstance(stmt.src, Const)
38
+ ):
39
+ pass
40
+ else:
41
+ break
42
+ ending = i + 1
43
+ return ending - index if has_wcsncpy and ending - index >= 2 else 0
44
+
45
+
46
+ class InlinedWcscpyConsolidation(PeepholeOptimizationMultiStmtBase):
47
+ """
48
+ Consolidate multiple inlined wcscpy/wcsncpy calls.
49
+ """
50
+
51
+ __slots__ = ()
52
+
53
+ NAME = "Consolidate multiple inlined wcsncpy calls"
54
+ stmt_classes = (match_statements,)
55
+
56
+ def optimize( # type:ignore
57
+ self, stmts: list[Call | Store | Assignment], stmt_idx: int | None = None, block=None, **kwargs
58
+ ): # pylint:disable=unused-argument
59
+ reordered_stmts = self._reorder_stmts(stmts)
60
+ if not reordered_stmts or len(reordered_stmts) <= 1:
61
+ return None
62
+
63
+ new_stmts = []
64
+ optimized = False
65
+ stop = False
66
+ while not stop:
67
+ new_stmts = []
68
+ stop = True
69
+ for i, stmt0 in enumerate(reordered_stmts):
70
+ if i == len(reordered_stmts) - 1:
71
+ new_stmts.append(reordered_stmts[i])
72
+ break
73
+ stmt1 = reordered_stmts[i + 1]
74
+ opt_stmts = self._optimize_pair(stmt0, stmt1)
75
+ if opt_stmts is None:
76
+ new_stmts.append(stmt0)
77
+ else:
78
+ new_stmts += opt_stmts
79
+ # start again from the beginning
80
+ optimized = True
81
+ stop = False
82
+ reordered_stmts = new_stmts + reordered_stmts[i + 2 :]
83
+ break
84
+
85
+ return new_stmts if optimized and new_stmts else None
86
+
87
+ def _reorder_stmts(self, stmts: list[Call | Store | Assignment]) -> list[Call | Store] | None:
88
+ """
89
+ Order a list of statements based on ascending addresses of their destination buffers.
90
+ """
91
+
92
+ if not all(
93
+ (
94
+ InlinedWcscpy.is_inlined_wcsncpy(s)
95
+ or (isinstance(s, Store) and isinstance(s.data, Const))
96
+ or (
97
+ isinstance(s, Assignment)
98
+ and isinstance(s.dst, VirtualVariable)
99
+ and s.dst.was_stack
100
+ and isinstance(s.src, Const)
101
+ )
102
+ )
103
+ for s in stmts
104
+ ):
105
+ return None
106
+ offset_to_stmt = {}
107
+ updated_offsets: set[int] = set()
108
+ known_base = None
109
+ for stmt in stmts:
110
+ if isinstance(stmt, Call):
111
+ assert (
112
+ stmt.args is not None
113
+ and len(stmt.args) == 3
114
+ and stmt.args[0] is not None
115
+ and stmt.args[2] is not None
116
+ )
117
+ base, off = self._parse_addr(stmt.args[0])
118
+ store_size = stmt.args[2].value * 2 if isinstance(stmt.args[2], Const) else None
119
+ elif isinstance(stmt, Store):
120
+ base, off = self._parse_addr(stmt.addr)
121
+ store_size = stmt.size
122
+ elif isinstance(stmt, Assignment):
123
+ base, off = self._parse_addr(stmt.dst)
124
+ store_size = stmt.dst.size
125
+ else:
126
+ # unexpected!
127
+ return None
128
+ if off is None or store_size is None:
129
+ # bad offset or size - bail
130
+ return None
131
+ if known_base is None:
132
+ known_base = base
133
+ elif not base.likes(known_base):
134
+ # bail
135
+ return None
136
+ if off in offset_to_stmt:
137
+ # duplicate offset - bail
138
+ return None
139
+ assert isinstance(store_size, int)
140
+ for i in range(store_size):
141
+ if off + i in updated_offsets:
142
+ # overlapping store - bail
143
+ return None
144
+ updated_offsets.add(off + i)
145
+
146
+ offset_to_stmt[off] = stmt
147
+
148
+ return [offset_to_stmt[k] for k in sorted(offset_to_stmt)]
149
+
150
+ def _optimize_pair(
151
+ self, last_stmt: Call | Store | Assignment, stmt: Call | Store | Assignment
152
+ ) -> list[Call] | None:
153
+ # convert (store, wcsncpy()) to (wcsncpy(), store) if they do not overlap
154
+ if (
155
+ isinstance(stmt, Call)
156
+ and InlinedWcscpy.is_inlined_wcsncpy(stmt)
157
+ and stmt.args is not None
158
+ and len(stmt.args) == 3
159
+ and isinstance(stmt.args[2], Const)
160
+ and isinstance(stmt.args[2].value, int)
161
+ and isinstance(last_stmt, (Store, Assignment))
162
+ ):
163
+ if isinstance(last_stmt, Store) and isinstance(last_stmt.data, Const):
164
+ store_addr = last_stmt.addr
165
+ store_size = last_stmt.size
166
+ elif isinstance(last_stmt, Assignment):
167
+ store_addr = last_stmt.dst
168
+ store_size = last_stmt.dst.size
169
+ else:
170
+ return None
171
+ # check if they overlap
172
+ wcsncpy_addr = stmt.args[0]
173
+ wcsncpy_size = stmt.args[2].value * 2
174
+ delta = self._get_delta(store_addr, wcsncpy_addr)
175
+ if delta is not None:
176
+ if (0 <= delta <= store_size) or (delta < 0 and -delta <= wcsncpy_size):
177
+ # they overlap, do not switch
178
+ pass
179
+ else:
180
+ last_stmt, stmt = stmt, last_stmt
181
+
182
+ # swap two statements if they are out of order
183
+ if InlinedWcscpy.is_inlined_wcsncpy(last_stmt) and InlinedWcscpy.is_inlined_wcsncpy(stmt):
184
+ assert last_stmt.args is not None and stmt.args is not None
185
+ delta = self._get_delta(last_stmt.args[0], stmt.args[0])
186
+ if delta is not None and delta < 0:
187
+ last_stmt, stmt = stmt, last_stmt
188
+
189
+ if InlinedWcscpy.is_inlined_wcsncpy(last_stmt):
190
+ assert last_stmt.args is not None
191
+ assert self.kb is not None
192
+ s_last: bytes = self.kb.custom_strings[last_stmt.args[1].value]
193
+ addr_last = last_stmt.args[0]
194
+ new_str = None # will be set if consolidation should happen
195
+
196
+ if isinstance(stmt, Call) and InlinedWcscpy.is_inlined_wcsncpy(stmt):
197
+ assert stmt.args is not None
198
+ # consolidating two calls
199
+ s_curr: bytes = self.kb.custom_strings[stmt.args[1].value]
200
+ addr_curr = stmt.args[0]
201
+ # determine if the two addresses are consecutive
202
+ delta = self._get_delta(addr_last, addr_curr)
203
+ if delta is not None and delta == len(s_last):
204
+ # consolidate both calls!
205
+ new_str = s_last + s_curr
206
+ elif isinstance(stmt, Store) and isinstance(stmt.data, Const) and isinstance(stmt.data.value, int):
207
+ # consolidating a call and a store, in case the store statement is storing the suffix of a string (but
208
+ # the suffix is too short to qualify an inlined strcpy optimization)
209
+ addr_curr = stmt.addr
210
+ delta = self._get_delta(addr_last, addr_curr)
211
+ if delta is not None and delta == len(s_last):
212
+ if stmt.size == 2 and stmt.data.value == 0:
213
+ # it's probably the terminating null byte
214
+ r, s = True, b"\x00\x00"
215
+ else:
216
+ r, s = InlinedWcscpy.is_integer_likely_a_wide_string(
217
+ stmt.data.value, stmt.size, stmt.endness, min_length=1 # type:ignore
218
+ )
219
+ if r and s is not None:
220
+ new_str = s_last + s
221
+ elif (
222
+ isinstance(stmt, Assignment)
223
+ and isinstance(stmt.dst, VirtualVariable)
224
+ and isinstance(stmt.src, Const)
225
+ and isinstance(stmt.src.value, int)
226
+ ):
227
+ # consolidating a call and an assignment, in case the assignment statement is storing the suffix of a
228
+ # string (but the suffix is too short to qualify an inlined strcpy optimization)
229
+ addr_curr = stmt.dst
230
+ delta = self._get_delta(addr_last, addr_curr)
231
+ if delta is not None and delta == len(s_last):
232
+ r, s = InlinedWcscpy.is_integer_likely_a_wide_string(
233
+ stmt.src.value, stmt.dst.size, self.project.arch.memory_endness, min_length=1 # type:ignore
234
+ )
235
+ if r and s is not None:
236
+ new_str = s_last + s
237
+
238
+ if new_str is not None:
239
+ assert self.project is not None
240
+ wstr_type = SimTypePointer(SimTypeWideChar()).with_arch(self.project.arch)
241
+ if new_str.endswith(b"\x00\x00"):
242
+ call_name = "wcsncpy"
243
+ new_str_idx = self.kb.custom_strings.allocate(new_str[:-2])
244
+ args = [
245
+ last_stmt.args[0],
246
+ Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True, type=wstr_type),
247
+ ]
248
+ prototype = None
249
+ else:
250
+ call_name = "wcsncpy"
251
+ new_str_idx = self.kb.custom_strings.allocate(new_str)
252
+ args = [
253
+ last_stmt.args[0],
254
+ Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True, type=wstr_type),
255
+ Const(None, None, len(new_str) // 2, self.project.arch.bits),
256
+ ]
257
+ prototype = None
258
+
259
+ return [Call(stmt.idx, call_name, args=args, prototype=prototype, **stmt.tags)]
260
+
261
+ return None
262
+
263
+ @staticmethod
264
+ def _parse_addr(addr: Expression) -> tuple[Expression, int]:
265
+ # we force the base to 64-bit because it does not really matter when we use it
266
+
267
+ if isinstance(addr, VirtualVariable) and addr.was_stack:
268
+ return StackBaseOffset(None, 64, 0), addr.stack_offset
269
+ if isinstance(addr, Register):
270
+ return addr, 0
271
+ if isinstance(addr, StackBaseOffset):
272
+ return StackBaseOffset(None, 64, 0), addr.offset
273
+ if (
274
+ isinstance(addr, UnaryOp)
275
+ and addr.op == "Reference"
276
+ and isinstance(addr.operand, VirtualVariable)
277
+ and addr.operand.was_stack
278
+ ):
279
+ return StackBaseOffset(None, 64, 0), addr.operand.stack_offset
280
+ if isinstance(addr, BinaryOp):
281
+ if addr.op == "Add" and isinstance(addr.operands[1], Const) and isinstance(addr.operands[1].value, int):
282
+ base_0, offset_0 = InlinedWcscpyConsolidation._parse_addr(addr.operands[0])
283
+ return base_0, offset_0 + addr.operands[1].value
284
+ if addr.op == "Sub" and isinstance(addr.operands[1], Const) and isinstance(addr.operands[1].value, int):
285
+ base_0, offset_0 = InlinedWcscpyConsolidation._parse_addr(addr.operands[0])
286
+ return base_0, offset_0 - addr.operands[1].value
287
+
288
+ return addr, 0
289
+
290
+ @staticmethod
291
+ def _get_delta(addr_0: Expression, addr_1: Expression) -> int | None:
292
+ base_0, offset_0 = InlinedWcscpyConsolidation._parse_addr(addr_0)
293
+ base_1, offset_1 = InlinedWcscpyConsolidation._parse_addr(addr_1)
294
+ if base_0.likes(base_1):
295
+ return offset_1 - offset_0
296
+ return None
@@ -97,6 +97,19 @@ class SimEngineSSARewriting(
97
97
  self._current_vvar_id += 1
98
98
  return self._current_vvar_id
99
99
 
100
+ #
101
+ # Util functions
102
+ #
103
+
104
+ @staticmethod
105
+ def _is_head_controlled_loop_jump(block, jump_stmt: ConditionalJump) -> bool:
106
+ concrete_targets = []
107
+ if isinstance(jump_stmt.true_target, Const):
108
+ concrete_targets.append(jump_stmt.true_target.value)
109
+ if isinstance(jump_stmt.false_target, Const):
110
+ concrete_targets.append(jump_stmt.false_target.value)
111
+ return not all(block.addr <= t < block.addr + block.original_size for t in concrete_targets)
112
+
100
113
  #
101
114
  # Handlers
102
115
  #
@@ -303,7 +316,7 @@ class SimEngineSSARewriting(
303
316
  new_true_target = self._expr(stmt.true_target) if stmt.true_target is not None else None
304
317
  new_false_target = self._expr(stmt.false_target) if stmt.false_target is not None else None
305
318
 
306
- if self.stmt_idx != len(self.block.statements) - 1:
319
+ if self.stmt_idx != len(self.block.statements) - 1 and self._is_head_controlled_loop_jump(self.block, stmt):
307
320
  # the conditional jump is in the middle of the block (e.g., the block generated from lifting rep stosq).
308
321
  # we need to make a copy of the state and use the state of this point in its successor
309
322
  self.head_controlled_loop_outstate = self.state.copy()
@@ -42,6 +42,7 @@ from angr.utils.constants import is_alignment_mask
42
42
  from angr.utils.library import get_cpp_function_name
43
43
  from angr.utils.loader import is_in_readonly_segment, is_in_readonly_section
44
44
  from angr.utils.types import unpack_typeref, unpack_pointer_and_array, dereference_simtype_by_lib
45
+ from angr.utils.strings import decode_utf16_string
45
46
  from angr.analyses.decompiler.utils import structured_node_is_simple_return
46
47
  from angr.analyses.decompiler.notes.deobfuscated_strings import DeobfuscatedStringsNote
47
48
  from angr.errors import UnsupportedNodeTypeError, AngrRuntimeError
@@ -2269,9 +2270,9 @@ class CConstant(CExpression):
2269
2270
  elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeWideChar):
2270
2271
  refval = self.reference_values[self._type]
2271
2272
  v = (
2272
- refval.content.decode("utf_16_le")
2273
+ decode_utf16_string(refval.content)
2273
2274
  if isinstance(refval, MemoryData)
2274
- else refval.decode("utf_16_le")
2275
+ else decode_utf16_string(refval)
2275
2276
  ) # it's a string
2276
2277
  yield CConstant.str_to_c_str(v, prefix="L"), self
2277
2278
  return
@@ -4076,9 +4077,9 @@ class MakeTypecastsImplicit(CStructuredCodeWalker):
4076
4077
  class FieldReferenceCleanup(CStructuredCodeWalker):
4077
4078
  def handle_CTypeCast(self, obj):
4078
4079
  if isinstance(obj.dst_type, SimTypePointer) and not isinstance(obj.dst_type.pts_to, SimTypeBottom):
4079
- obj = obj.codegen._access_reference(obj.expr, obj.dst_type.pts_to)
4080
- if not isinstance(obj, CTypeCast):
4081
- return self.handle(obj)
4080
+ new_obj = obj.codegen._access_reference(obj.expr, obj.dst_type.pts_to)
4081
+ if not isinstance(new_obj, CTypeCast):
4082
+ return self.handle(new_obj)
4082
4083
  return super().handle_CTypeCast(obj)
4083
4084
 
4084
4085
 
@@ -548,7 +548,7 @@ class DreamStructurer(StructurerBase):
548
548
  cmp = switch_extract_cmp_bounds(last_stmt)
549
549
  if not cmp:
550
550
  return False
551
- cmp_expr, cmp_lb, cmp_ub = cmp # pylint:disable=unused-variable
551
+ cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
552
552
 
553
553
  # the real indirect jump
554
554
  if len(addr2nodes[target]) != 1:
@@ -619,7 +619,7 @@ class DreamStructurer(StructurerBase):
619
619
  cmp = switch_extract_cmp_bounds(last_stmt)
620
620
  if not cmp:
621
621
  return False
622
- cmp_expr, cmp_lb, cmp_ub = cmp # pylint:disable=unused-variable
622
+ cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
623
623
 
624
624
  jumptable_entries = jump_table.jumptable_entries
625
625
  assert jumptable_entries is not None
@@ -929,23 +929,55 @@ class PhoenixStructurer(StructurerBase):
929
929
  )
930
930
  break_node = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
931
931
  else:
932
- break_stmt = Jump(
933
- None,
934
- Const(None, None, successor.addr, self.project.arch.bits),
935
- target_idx=successor.idx if isinstance(successor, Block) else None,
936
- ins_addr=last_src_stmt.ins_addr,
937
- )
938
- break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
939
- fallthrough_node = next(iter(succ for succ in fullgraph.successors(src) if succ is not dst))
940
- fallthrough_stmt = Jump(
941
- None,
942
- Const(None, None, fallthrough_node.addr, self.project.arch.bits),
943
- target_idx=successor.idx if isinstance(successor, Block) else None,
944
- ins_addr=last_src_stmt.ins_addr,
945
- )
946
- break_node_inner_fallthrough = Block(
947
- last_src_stmt.ins_addr, None, statements=[fallthrough_stmt]
932
+ fallthrough_node = next(
933
+ iter(succ for succ in fullgraph.successors(src) if succ is not dst), None
948
934
  )
935
+ if fallthrough_node is not None:
936
+ # we create a conditional jump that will be converted to a conditional break later
937
+ break_stmt = Jump(
938
+ None,
939
+ Const(None, None, successor.addr, self.project.arch.bits),
940
+ target_idx=successor.idx if isinstance(successor, Block) else None,
941
+ ins_addr=last_src_stmt.ins_addr,
942
+ )
943
+ break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
944
+ fallthrough_stmt = Jump(
945
+ None,
946
+ Const(None, None, fallthrough_node.addr, self.project.arch.bits),
947
+ target_idx=successor.idx if isinstance(successor, Block) else None,
948
+ ins_addr=last_src_stmt.ins_addr,
949
+ )
950
+ break_node_inner_fallthrough = Block(
951
+ last_src_stmt.ins_addr, None, statements=[fallthrough_stmt]
952
+ )
953
+ else:
954
+ # the fallthrough node does not exist in the graph. we create a conditional jump that
955
+ # jumps to an address
956
+ if not isinstance(last_src_stmt, ConditionalJump):
957
+ raise TypeError(f"Unexpected last_src_stmt type {type(last_src_stmt)}")
958
+ other_target = (
959
+ last_src_stmt.true_target
960
+ if isinstance(last_src_stmt.false_target, Const)
961
+ and last_src_stmt.false_target.value == successor.addr
962
+ else last_src_stmt.false_target
963
+ )
964
+ assert other_target is not None
965
+ break_stmt = Jump(
966
+ None,
967
+ Const(None, None, successor.addr, self.project.arch.bits),
968
+ target_idx=successor.idx if isinstance(successor, Block) else None,
969
+ ins_addr=last_src_stmt.ins_addr,
970
+ )
971
+ break_node_inner = Block(last_src_stmt.ins_addr, None, statements=[break_stmt])
972
+ fallthrough_stmt = Jump(
973
+ None,
974
+ other_target,
975
+ target_idx=successor.idx if isinstance(successor, Block) else None,
976
+ ins_addr=last_src_stmt.ins_addr,
977
+ )
978
+ break_node_inner_fallthrough = Block(
979
+ last_src_stmt.ins_addr, None, statements=[fallthrough_stmt]
980
+ )
949
981
  break_node = ConditionNode(
950
982
  last_src_stmt.ins_addr,
951
983
  None,
@@ -1454,7 +1486,7 @@ class PhoenixStructurer(StructurerBase):
1454
1486
  )
1455
1487
  if not cmp:
1456
1488
  return False
1457
- cmp_expr, cmp_lb, cmp_ub = cmp # pylint:disable=unused-variable
1489
+ cmp_expr, cmp_lb, _cmp_ub = cmp
1458
1490
 
1459
1491
  assert cond_node.addr is not None
1460
1492
  switch_head_addr = cond_node.addr
@@ -1477,7 +1509,7 @@ class PhoenixStructurer(StructurerBase):
1477
1509
  cmp = switch_extract_cmp_bounds(last_stmt)
1478
1510
  if not cmp:
1479
1511
  return False
1480
- cmp_expr, cmp_lb, cmp_ub = cmp # pylint:disable=unused-variable
1512
+ cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
1481
1513
 
1482
1514
  switch_head_addr = last_stmt.ins_addr
1483
1515
 
@@ -1849,7 +1881,7 @@ class PhoenixStructurer(StructurerBase):
1849
1881
  cmp = switch_extract_cmp_bounds(last_stmt)
1850
1882
  if not cmp:
1851
1883
  return False
1852
- cmp_expr, cmp_lb, cmp_ub = cmp # pylint:disable=unused-variable
1884
+ cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
1853
1885
 
1854
1886
  if isinstance(last_stmt.false_target, Const):
1855
1887
  default_addr = last_stmt.false_target.value
@@ -2148,19 +2180,54 @@ class PhoenixStructurer(StructurerBase):
2148
2180
  jump_node = Block(out_src.addr, 0, statements=[jump_stmt])
2149
2181
  case_node.nodes.append(jump_node)
2150
2182
 
2151
- if out_edges_to_head: # noqa:SIM108
2183
+ # out_dst_succ is the successor within the current region
2184
+ # out_dst_succ_fullgraph is the successor outside the current region
2185
+ if out_edges_to_head:
2152
2186
  # add an edge from SwitchCaseNode to head so that a loop will be structured later
2153
2187
  out_dst_succ = head
2188
+ out_dst_succ_fullgraph = None
2154
2189
  else:
2155
2190
  # add an edge from SwitchCaseNode to its most immediate successor (if there is one)
2156
- out_dst_succ = other_out_edges[0][1] if other_out_edges else None
2191
+ # there might be an in-region successor and an out-of-region successor (especially due to the
2192
+ # introduction of self.dowhile_known_tail_nodes)!
2193
+ # example: 7995a0325b446c462bdb6ae10b692eee2ecadd8e888e9d7729befe4412007afb, function 1400EF820
2194
+ out_dst_succs = []
2195
+ out_dst_succs_fullgraph = []
2196
+ for _, o in other_out_edges:
2197
+ if o in graph:
2198
+ out_dst_succs.append(o)
2199
+ elif o in full_graph:
2200
+ out_dst_succs_fullgraph.append(o)
2201
+ out_dst_succ = sorted(out_dst_succs, key=lambda o: o.addr)[0] if out_dst_succs else None
2202
+ out_dst_succ_fullgraph = (
2203
+ sorted(out_dst_succs_fullgraph, key=lambda o: o.addr)[0] if out_dst_succs_fullgraph else None
2204
+ )
2205
+ if len(out_dst_succs) > 1:
2206
+ assert out_dst_succ is not None
2207
+ l.warning(
2208
+ "Multiple in-region successors detected for switch-case node at %#x. Picking %#x as the "
2209
+ "successor and dropping others.",
2210
+ scnode.addr,
2211
+ out_dst_succ.addr,
2212
+ )
2213
+ if len(out_dst_succs_fullgraph) > 1:
2214
+ assert out_dst_succ_fullgraph is not None
2215
+ l.warning(
2216
+ "Multiple out-of-region successors detected for switch-case node at %#x. Picking %#x as the "
2217
+ "successor and dropping others.",
2218
+ scnode.addr,
2219
+ out_dst_succ_fullgraph.addr,
2220
+ )
2157
2221
 
2158
2222
  if out_dst_succ is not None:
2159
- if out_dst_succ in graph:
2160
- graph.add_edge(scnode, out_dst_succ)
2223
+ graph.add_edge(scnode, out_dst_succ)
2161
2224
  full_graph.add_edge(scnode, out_dst_succ)
2162
2225
  if full_graph.has_edge(head, out_dst_succ):
2163
2226
  full_graph.remove_edge(head, out_dst_succ)
2227
+ if out_dst_succ_fullgraph is not None:
2228
+ full_graph.add_edge(scnode, out_dst_succ_fullgraph)
2229
+ if full_graph.has_edge(head, out_dst_succ_fullgraph):
2230
+ full_graph.remove_edge(head, out_dst_succ_fullgraph)
2164
2231
 
2165
2232
  # fix full_graph if needed: remove successors that are no longer needed
2166
2233
  for _out_src, out_dst in other_out_edges:
@@ -2925,6 +2992,17 @@ class PhoenixStructurer(StructurerBase):
2925
2992
  ordered_nodes.remove(postorder_head)
2926
2993
  acyclic_graph.remove_node(postorder_head)
2927
2994
  node_seq = {nn: (len(ordered_nodes) - idx) for (idx, nn) in enumerate(ordered_nodes)} # post-order
2995
+ if len(node_seq) < len(acyclic_graph):
2996
+ # some nodes are not reachable from head - add them to node_seq as well
2997
+ # but this is usually the result of incorrect structuring, so we may still fail at a later point
2998
+ l.warning("Adding %d unreachable nodes to node_seq", len(acyclic_graph) - len(node_seq))
2999
+ unreachable_nodes = sorted(
3000
+ (nn for nn in acyclic_graph if nn not in node_seq),
3001
+ key=lambda n: (n.addr, (-1 if n.idx is None else n.idx) if hasattr(n, "idx") else 0),
3002
+ )
3003
+ max_seq = max(node_seq.values(), default=0)
3004
+ for i, nn in enumerate(unreachable_nodes):
3005
+ node_seq[nn] = max_seq + i
2928
3006
 
2929
3007
  if all_edges_wo_dominance:
2930
3008
  all_edges_wo_dominance = self._order_virtualizable_edges(full_graph, all_edges_wo_dominance, node_seq)
@@ -2,6 +2,7 @@
2
2
  from __future__ import annotations
3
3
  import pathlib
4
4
  import copy
5
+ from types import FunctionType
5
6
  from typing import Any
6
7
  from collections.abc import Iterable
7
8
  import logging
@@ -958,9 +959,15 @@ def peephole_optimize_multistmts(block, stmt_opts):
958
959
  for opt in stmt_opts:
959
960
  matched = False
960
961
  stmt_seq_len = None
961
- for stmt_class_seq in opt.stmt_classes:
962
- if match_stmt_classes(statements, stmt_idx, stmt_class_seq):
963
- stmt_seq_len = len(stmt_class_seq)
962
+ for stmt_class_seq_or_method in opt.stmt_classes:
963
+ if isinstance(stmt_class_seq_or_method, FunctionType):
964
+ r = stmt_class_seq_or_method(statements, stmt_idx)
965
+ if r > 0:
966
+ stmt_seq_len = r
967
+ matched = True
968
+ break
969
+ elif match_stmt_classes(statements, stmt_idx, stmt_class_seq_or_method):
970
+ stmt_seq_len = len(stmt_class_seq_or_method)
964
971
  matched = True
965
972
  break
966
973