pytecode 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytecode/__init__.py +22 -0
- pytecode/analysis.py +2402 -0
- pytecode/attributes.py +868 -0
- pytecode/bytes_utils.py +208 -0
- pytecode/class_reader.py +810 -0
- pytecode/class_writer.py +630 -0
- pytecode/constant_pool.py +196 -0
- pytecode/constant_pool_builder.py +844 -0
- pytecode/constants.py +208 -0
- pytecode/debug_info.py +319 -0
- pytecode/descriptors.py +791 -0
- pytecode/hierarchy.py +561 -0
- pytecode/info.py +123 -0
- pytecode/instructions.py +495 -0
- pytecode/jar.py +271 -0
- pytecode/labels.py +1041 -0
- pytecode/model.py +929 -0
- pytecode/modified_utf8.py +145 -0
- pytecode/operands.py +683 -0
- pytecode/py.typed +0 -0
- pytecode/transforms.py +954 -0
- pytecode/verify.py +1386 -0
- pytecode-0.0.1.dist-info/METADATA +218 -0
- pytecode-0.0.1.dist-info/RECORD +27 -0
- pytecode-0.0.1.dist-info/WHEEL +5 -0
- pytecode-0.0.1.dist-info/licenses/LICENSE +21 -0
- pytecode-0.0.1.dist-info/top_level.txt +1 -0
pytecode/analysis.py
ADDED
|
@@ -0,0 +1,2402 @@
|
|
|
1
|
+
"""Control-flow graph construction and stack/local simulation.
|
|
2
|
+
|
|
3
|
+
Provides analysis infrastructure for JVM bytecode in the editing model:
|
|
4
|
+
|
|
5
|
+
- **Verification type system** (``VType``) mirroring JVM spec §4.10.1.2
|
|
6
|
+
- **Control-flow graph** construction from ``CodeModel`` instructions
|
|
7
|
+
- **Stack and local variable simulation** with forward dataflow analysis
|
|
8
|
+
- **Frame recomputation** for ``max_stack``, ``max_locals``, and ``StackMapTable``
|
|
9
|
+
- **Type merging** at control-flow join points using the class hierarchy
|
|
10
|
+
|
|
11
|
+
All result types are frozen dataclasses — safe to share across threads.
|
|
12
|
+
The module operates on the symbolic editing model (``CodeModel``) so it
|
|
13
|
+
benefits from label-based branch targets, symbolic operands, and
|
|
14
|
+
exception handlers already bound to labels.
|
|
15
|
+
|
|
16
|
+
References:
|
|
17
|
+
JVM spec §4.7.4 — StackMapTable attribute format.
|
|
18
|
+
JVM spec §4.10.1 — Verification by type checking.
|
|
19
|
+
JVM spec §4.10.1.2 — Verification type system and type merging rules.
|
|
20
|
+
JVM spec §6.5 — Individual opcode definitions (stack effects).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from collections import deque
|
|
26
|
+
from collections.abc import Sequence
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
from typing import TYPE_CHECKING
|
|
29
|
+
|
|
30
|
+
from .attributes import (
|
|
31
|
+
AppendFrameInfo,
|
|
32
|
+
ChopFrameInfo,
|
|
33
|
+
DoubleVariableInfo,
|
|
34
|
+
FloatVariableInfo,
|
|
35
|
+
FullFrameInfo,
|
|
36
|
+
IntegerVariableInfo,
|
|
37
|
+
LongVariableInfo,
|
|
38
|
+
NullVariableInfo,
|
|
39
|
+
ObjectVariableInfo,
|
|
40
|
+
SameFrameExtendedInfo,
|
|
41
|
+
SameFrameInfo,
|
|
42
|
+
SameLocals1StackItemFrameExtendedInfo,
|
|
43
|
+
SameLocals1StackItemFrameInfo,
|
|
44
|
+
StackMapFrameInfo,
|
|
45
|
+
StackMapTableAttr,
|
|
46
|
+
TopVariableInfo,
|
|
47
|
+
UninitializedThisVariableInfo,
|
|
48
|
+
UninitializedVariableInfo,
|
|
49
|
+
VerificationTypeInfo,
|
|
50
|
+
)
|
|
51
|
+
from .constants import VerificationType
|
|
52
|
+
from .descriptors import (
|
|
53
|
+
ArrayType as DescArrayType,
|
|
54
|
+
)
|
|
55
|
+
from .descriptors import (
|
|
56
|
+
BaseType,
|
|
57
|
+
FieldDescriptor,
|
|
58
|
+
ObjectType,
|
|
59
|
+
VoidType,
|
|
60
|
+
parse_field_descriptor,
|
|
61
|
+
parse_method_descriptor,
|
|
62
|
+
)
|
|
63
|
+
from .hierarchy import JAVA_LANG_OBJECT, common_superclass
|
|
64
|
+
from .instructions import (
|
|
65
|
+
ArrayType as InsnArrayType,
|
|
66
|
+
)
|
|
67
|
+
from .instructions import (
|
|
68
|
+
InsnInfo,
|
|
69
|
+
InsnInfoType,
|
|
70
|
+
)
|
|
71
|
+
from .labels import (
|
|
72
|
+
BranchInsn,
|
|
73
|
+
CodeItem,
|
|
74
|
+
ExceptionHandler,
|
|
75
|
+
Label,
|
|
76
|
+
LookupSwitchInsn,
|
|
77
|
+
TableSwitchInsn,
|
|
78
|
+
)
|
|
79
|
+
from .operands import (
|
|
80
|
+
FieldInsn,
|
|
81
|
+
IIncInsn,
|
|
82
|
+
InterfaceMethodInsn,
|
|
83
|
+
InvokeDynamicInsn,
|
|
84
|
+
LdcClass,
|
|
85
|
+
LdcDouble,
|
|
86
|
+
LdcFloat,
|
|
87
|
+
LdcInsn,
|
|
88
|
+
LdcInt,
|
|
89
|
+
LdcLong,
|
|
90
|
+
LdcMethodHandle,
|
|
91
|
+
LdcMethodType,
|
|
92
|
+
LdcString,
|
|
93
|
+
MethodInsn,
|
|
94
|
+
MultiANewArrayInsn,
|
|
95
|
+
TypeInsn,
|
|
96
|
+
VarInsn,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if TYPE_CHECKING:
|
|
100
|
+
from .constant_pool_builder import ConstantPoolBuilder
|
|
101
|
+
from .hierarchy import ClassResolver
|
|
102
|
+
from .model import CodeModel, MethodModel
|
|
103
|
+
|
|
104
|
+
# ===================================================================
|
|
105
|
+
# Analysis errors
|
|
106
|
+
# ===================================================================
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class AnalysisError(Exception):
|
|
110
|
+
"""Base class for control-flow and simulation errors."""
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class StackUnderflowError(AnalysisError):
|
|
114
|
+
"""Popped from empty or insufficiently deep stack."""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class InvalidLocalError(AnalysisError):
|
|
118
|
+
"""Read from an uninitialized or out-of-bounds local variable slot."""
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class TypeMergeError(AnalysisError):
|
|
122
|
+
"""Incompatible types at a control-flow join point."""
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ===================================================================
|
|
126
|
+
# Verification type system (JVM spec §4.10.1.2)
|
|
127
|
+
# ===================================================================
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass(frozen=True, slots=True)
|
|
131
|
+
class VTop:
|
|
132
|
+
"""Top type — undefined or second slot of a category-2 value."""
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass(frozen=True, slots=True)
|
|
136
|
+
class VInteger:
|
|
137
|
+
"""Verification type for int, short, byte, char, boolean."""
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@dataclass(frozen=True, slots=True)
|
|
141
|
+
class VFloat:
|
|
142
|
+
"""Verification type for float."""
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@dataclass(frozen=True, slots=True)
|
|
146
|
+
class VLong:
|
|
147
|
+
"""Verification type for long (occupies 2 slots; second is TOP)."""
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass(frozen=True, slots=True)
|
|
151
|
+
class VDouble:
|
|
152
|
+
"""Verification type for double (occupies 2 slots; second is TOP)."""
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass(frozen=True, slots=True)
|
|
156
|
+
class VNull:
|
|
157
|
+
"""Verification type for null — assignable to any reference type."""
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass(frozen=True, slots=True)
|
|
161
|
+
class VObject:
|
|
162
|
+
"""Verification type for a reference to a class, interface, or array.
|
|
163
|
+
|
|
164
|
+
Attributes:
|
|
165
|
+
class_name: JVM internal name (e.g. ``"java/lang/String"`` or ``"[I"``).
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
class_name: str
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dataclass(frozen=True, slots=True)
|
|
172
|
+
class VUninitializedThis:
|
|
173
|
+
"""Verification type for ``this`` before the super/this ``<init>`` call."""
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@dataclass(frozen=True, slots=True)
|
|
177
|
+
class VUninitialized:
|
|
178
|
+
"""Verification type for an object created by NEW before ``<init>``.
|
|
179
|
+
|
|
180
|
+
Analysis inserts synthetic labels for unlabeled ``NEW`` instructions so
|
|
181
|
+
edited code can still refer to allocation sites precisely.
|
|
182
|
+
|
|
183
|
+
Attributes:
|
|
184
|
+
new_label: Label identifying the NEW instruction that created this value.
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
new_label: Label
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
type VType = VTop | VInteger | VFloat | VLong | VDouble | VNull | VObject | VUninitializedThis | VUninitialized
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# --- Singletons for stateless types ---
|
|
194
|
+
|
|
195
|
+
_TOP = VTop()
|
|
196
|
+
_INTEGER = VInteger()
|
|
197
|
+
_FLOAT = VFloat()
|
|
198
|
+
_LONG = VLong()
|
|
199
|
+
_DOUBLE = VDouble()
|
|
200
|
+
_NULL = VNull()
|
|
201
|
+
_UNINIT_THIS = VUninitializedThis()
|
|
202
|
+
_OBJECT_OBJECT = VObject(JAVA_LANG_OBJECT)
|
|
203
|
+
_OBJECT_STRING = VObject("java/lang/String")
|
|
204
|
+
_OBJECT_CLASS = VObject("java/lang/Class")
|
|
205
|
+
_OBJECT_METHOD_TYPE = VObject("java/lang/invoke/MethodType")
|
|
206
|
+
_OBJECT_METHOD_HANDLE = VObject("java/lang/invoke/MethodHandle")
|
|
207
|
+
_OBJECT_THROWABLE = VObject("java/lang/Throwable")
|
|
208
|
+
|
|
209
|
+
# ---------------------------------------------------------------------------
|
|
210
|
+
# VType helpers
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
# Map from NEWARRAY atype codes to the resulting array element descriptor.
|
|
214
|
+
_NEWARRAY_TYPE_MAP: dict[InsnArrayType, str] = {
|
|
215
|
+
InsnArrayType.BOOLEAN: "[Z",
|
|
216
|
+
InsnArrayType.CHAR: "[C",
|
|
217
|
+
InsnArrayType.FLOAT: "[F",
|
|
218
|
+
InsnArrayType.DOUBLE: "[D",
|
|
219
|
+
InsnArrayType.BYTE: "[B",
|
|
220
|
+
InsnArrayType.SHORT: "[S",
|
|
221
|
+
InsnArrayType.INT: "[I",
|
|
222
|
+
InsnArrayType.LONG: "[J",
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def vtype_from_descriptor(fd: FieldDescriptor) -> VType:
|
|
227
|
+
"""Convert a parsed field descriptor to a verification type."""
|
|
228
|
+
if isinstance(fd, BaseType):
|
|
229
|
+
if fd in {BaseType.INT, BaseType.SHORT, BaseType.BYTE, BaseType.CHAR, BaseType.BOOLEAN}:
|
|
230
|
+
return _INTEGER
|
|
231
|
+
if fd is BaseType.FLOAT:
|
|
232
|
+
return _FLOAT
|
|
233
|
+
if fd is BaseType.LONG:
|
|
234
|
+
return _LONG
|
|
235
|
+
if fd is BaseType.DOUBLE:
|
|
236
|
+
return _DOUBLE
|
|
237
|
+
if isinstance(fd, ObjectType):
|
|
238
|
+
return VObject(fd.class_name)
|
|
239
|
+
if isinstance(fd, DescArrayType):
|
|
240
|
+
return VObject(_descriptor_to_internal(fd))
|
|
241
|
+
raise ValueError(f"Unexpected descriptor type: {fd!r}") # pragma: no cover
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def vtype_from_field_descriptor_str(desc: str) -> VType:
|
|
245
|
+
"""Convert a raw field descriptor string to a verification type."""
|
|
246
|
+
return vtype_from_descriptor(parse_field_descriptor(desc))
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _descriptor_to_internal(fd: FieldDescriptor) -> str:
|
|
250
|
+
"""Convert a FieldDescriptor to the JVM internal form used in VObject.class_name."""
|
|
251
|
+
if isinstance(fd, BaseType):
|
|
252
|
+
return fd.value
|
|
253
|
+
if isinstance(fd, ObjectType):
|
|
254
|
+
return fd.class_name
|
|
255
|
+
# fd must be DescArrayType at this point.
|
|
256
|
+
return "[" + _descriptor_to_component_string(fd.component_type)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _descriptor_to_component_string(fd: FieldDescriptor) -> str:
|
|
260
|
+
"""Return the JVM descriptor string for a component type."""
|
|
261
|
+
if isinstance(fd, BaseType):
|
|
262
|
+
return fd.value
|
|
263
|
+
if isinstance(fd, ObjectType):
|
|
264
|
+
return f"L{fd.class_name};"
|
|
265
|
+
# fd must be DescArrayType at this point.
|
|
266
|
+
return "[" + _descriptor_to_component_string(fd.component_type)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def is_category2(vt: VType) -> bool:
|
|
270
|
+
"""Return ``True`` for long and double (category-2 computational types)."""
|
|
271
|
+
return isinstance(vt, VLong | VDouble)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def is_reference(vt: VType) -> bool:
|
|
275
|
+
"""Return ``True`` for reference verification types (null, object, uninitialized)."""
|
|
276
|
+
return isinstance(vt, VNull | VObject | VUninitializedThis | VUninitialized)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def merge_vtypes(a: VType, b: VType, resolver: ClassResolver | None = None) -> VType:
|
|
280
|
+
"""Merge two verification types at a control-flow join point.
|
|
281
|
+
|
|
282
|
+
Follows JVM spec §4.10.1.2 type merging rules:
|
|
283
|
+
|
|
284
|
+
- Identical types → same type
|
|
285
|
+
- Two ``VObject`` → ``VObject(common_superclass(...))``
|
|
286
|
+
- ``VNull`` + reference → the reference type
|
|
287
|
+
- Incompatible types → ``VTop``
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
a: First verification type.
|
|
291
|
+
b: Second verification type.
|
|
292
|
+
resolver: Optional class hierarchy resolver for precise object merging.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
The merged verification type.
|
|
296
|
+
"""
|
|
297
|
+
if a == b:
|
|
298
|
+
return a
|
|
299
|
+
|
|
300
|
+
# VNull merges with any reference to yield the reference type.
|
|
301
|
+
if isinstance(a, VNull) and is_reference(b):
|
|
302
|
+
return b
|
|
303
|
+
if isinstance(b, VNull) and is_reference(a):
|
|
304
|
+
return a
|
|
305
|
+
|
|
306
|
+
# Two VObject references → common superclass.
|
|
307
|
+
if isinstance(a, VObject) and isinstance(b, VObject):
|
|
308
|
+
if resolver is not None:
|
|
309
|
+
try:
|
|
310
|
+
return VObject(common_superclass(resolver, a.class_name, b.class_name))
|
|
311
|
+
except Exception:
|
|
312
|
+
return _OBJECT_OBJECT
|
|
313
|
+
return _OBJECT_OBJECT
|
|
314
|
+
|
|
315
|
+
# Everything else is incompatible.
|
|
316
|
+
return _TOP
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
# ===================================================================
|
|
320
|
+
# Frame state
|
|
321
|
+
# ===================================================================
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@dataclass(frozen=True, slots=True)
|
|
325
|
+
class FrameState:
|
|
326
|
+
"""Immutable snapshot of the operand stack and local variable slots.
|
|
327
|
+
|
|
328
|
+
Category-2 values (long, double) occupy two consecutive slots — the
|
|
329
|
+
value itself followed by ``VTop``.
|
|
330
|
+
|
|
331
|
+
Attributes:
|
|
332
|
+
stack: Operand stack, ordered bottom-to-top.
|
|
333
|
+
locals: Local variable slots indexed by slot number; unset slots
|
|
334
|
+
are ``VTop``.
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
stack: tuple[VType, ...]
|
|
338
|
+
locals: tuple[VType, ...]
|
|
339
|
+
|
|
340
|
+
# -- Stack operations --
|
|
341
|
+
|
|
342
|
+
def push(self, *types: VType) -> FrameState:
|
|
343
|
+
"""Push one or more types onto the stack (category-2 aware)."""
|
|
344
|
+
new_stack = list(self.stack)
|
|
345
|
+
for vt in types:
|
|
346
|
+
new_stack.append(vt)
|
|
347
|
+
if is_category2(vt):
|
|
348
|
+
new_stack.append(_TOP)
|
|
349
|
+
return FrameState(tuple(new_stack), self.locals)
|
|
350
|
+
|
|
351
|
+
def pop(self, n: int = 1) -> tuple[FrameState, tuple[VType, ...]]:
|
|
352
|
+
"""Pop *n* stack slots and return ``(new_state, popped_values)``.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
n: Number of stack slots to pop.
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
A ``(new_state, popped_values)`` tuple where *popped_values* is
|
|
359
|
+
ordered from topmost to deepest.
|
|
360
|
+
|
|
361
|
+
Raises:
|
|
362
|
+
StackUnderflowError: If the stack has fewer than *n* slots.
|
|
363
|
+
"""
|
|
364
|
+
if len(self.stack) < n:
|
|
365
|
+
raise StackUnderflowError(f"Need {n} slots but stack has {len(self.stack)}")
|
|
366
|
+
if n == 0:
|
|
367
|
+
return self, ()
|
|
368
|
+
remaining = self.stack[:-n]
|
|
369
|
+
popped = tuple(reversed(self.stack[-n:]))
|
|
370
|
+
return FrameState(remaining, self.locals), popped
|
|
371
|
+
|
|
372
|
+
def peek(self, depth: int = 0) -> VType:
|
|
373
|
+
"""Return the type at *depth* slots from the top (0 = top).
|
|
374
|
+
|
|
375
|
+
Raises:
|
|
376
|
+
StackUnderflowError: If *depth* exceeds the current stack size.
|
|
377
|
+
"""
|
|
378
|
+
idx = len(self.stack) - 1 - depth
|
|
379
|
+
if idx < 0:
|
|
380
|
+
raise StackUnderflowError(f"Cannot peek at depth {depth} with stack size {len(self.stack)}")
|
|
381
|
+
return self.stack[idx]
|
|
382
|
+
|
|
383
|
+
# -- Local operations --
|
|
384
|
+
|
|
385
|
+
def set_local(self, index: int, vtype: VType) -> FrameState:
|
|
386
|
+
"""Set a local variable slot (category-2 aware)."""
|
|
387
|
+
needed = index + (2 if is_category2(vtype) else 1)
|
|
388
|
+
locals_list = list(self.locals)
|
|
389
|
+
while len(locals_list) < needed:
|
|
390
|
+
locals_list.append(_TOP)
|
|
391
|
+
locals_list[index] = vtype
|
|
392
|
+
if is_category2(vtype):
|
|
393
|
+
locals_list[index + 1] = _TOP
|
|
394
|
+
return FrameState(self.stack, tuple(locals_list))
|
|
395
|
+
|
|
396
|
+
def get_local(self, index: int) -> VType:
|
|
397
|
+
"""Read a local variable slot.
|
|
398
|
+
|
|
399
|
+
Raises:
|
|
400
|
+
InvalidLocalError: If *index* is out of range or the slot is
|
|
401
|
+
uninitialized.
|
|
402
|
+
"""
|
|
403
|
+
if index < 0 or index >= len(self.locals):
|
|
404
|
+
raise InvalidLocalError(f"Local variable slot {index} is out of range (max {len(self.locals) - 1})")
|
|
405
|
+
vt = self.locals[index]
|
|
406
|
+
if isinstance(vt, VTop):
|
|
407
|
+
raise InvalidLocalError(f"Local variable slot {index} is not initialized")
|
|
408
|
+
return vt
|
|
409
|
+
|
|
410
|
+
@property
|
|
411
|
+
def stack_depth(self) -> int:
|
|
412
|
+
"""Number of stack slots currently occupied."""
|
|
413
|
+
return len(self.stack)
|
|
414
|
+
|
|
415
|
+
@property
|
|
416
|
+
def max_local_index(self) -> int:
|
|
417
|
+
"""Highest local slot index in use (or -1 if no locals)."""
|
|
418
|
+
return len(self.locals) - 1
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
_EMPTY_FRAME = FrameState((), ())
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def initial_frame(method: MethodModel, class_name: str) -> FrameState:
|
|
425
|
+
"""Build the entry ``FrameState`` for a method.
|
|
426
|
+
|
|
427
|
+
Slot 0 is ``VObject(class_name)`` for instance methods, or
|
|
428
|
+
``VUninitializedThis`` for ``<init>``. Parameter types follow,
|
|
429
|
+
with category-2 values spanning two slots. Stack is empty.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
method: The method whose initial frame to build.
|
|
433
|
+
class_name: JVM internal name of the enclosing class.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
A ``FrameState`` representing the method entry point.
|
|
437
|
+
"""
|
|
438
|
+
from .constants import MethodAccessFlag
|
|
439
|
+
|
|
440
|
+
md = parse_method_descriptor(method.descriptor)
|
|
441
|
+
locals_list: list[VType] = []
|
|
442
|
+
|
|
443
|
+
if not (method.access_flags & MethodAccessFlag.STATIC):
|
|
444
|
+
if method.name == "<init>":
|
|
445
|
+
locals_list.append(_UNINIT_THIS)
|
|
446
|
+
else:
|
|
447
|
+
locals_list.append(VObject(class_name))
|
|
448
|
+
|
|
449
|
+
for param in md.parameter_types:
|
|
450
|
+
vt = vtype_from_descriptor(param)
|
|
451
|
+
locals_list.append(vt)
|
|
452
|
+
if is_category2(vt):
|
|
453
|
+
locals_list.append(_TOP)
|
|
454
|
+
|
|
455
|
+
return FrameState((), tuple(locals_list))
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
# ===================================================================
|
|
459
|
+
# Merging frame states
|
|
460
|
+
# ===================================================================
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _merge_frames(a: FrameState, b: FrameState, resolver: ClassResolver | None) -> FrameState:
|
|
464
|
+
"""Merge two frame states at a control-flow join point.
|
|
465
|
+
|
|
466
|
+
Stacks must be the same depth (JVM spec requirement). Locals are
|
|
467
|
+
merged slot-by-slot; the shorter locals tuple is padded with ``VTop``.
|
|
468
|
+
"""
|
|
469
|
+
if len(a.stack) != len(b.stack):
|
|
470
|
+
raise TypeMergeError(f"Stack depths differ at join point: {len(a.stack)} vs {len(b.stack)}")
|
|
471
|
+
|
|
472
|
+
merged_stack = tuple(merge_vtypes(sa, sb, resolver) for sa, sb in zip(a.stack, b.stack))
|
|
473
|
+
|
|
474
|
+
max_locals = max(len(a.locals), len(b.locals))
|
|
475
|
+
merged_locals: list[VType] = []
|
|
476
|
+
for i in range(max_locals):
|
|
477
|
+
la = a.locals[i] if i < len(a.locals) else _TOP
|
|
478
|
+
lb = b.locals[i] if i < len(b.locals) else _TOP
|
|
479
|
+
merged_locals.append(merge_vtypes(la, lb, resolver))
|
|
480
|
+
|
|
481
|
+
return FrameState(merged_stack, tuple(merged_locals))
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
# ===================================================================
|
|
485
|
+
# Opcode metadata
|
|
486
|
+
# ===================================================================
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
@dataclass(frozen=True, slots=True)
|
|
490
|
+
class OpcodeEffect:
|
|
491
|
+
"""Static stack effect and control-flow metadata for an opcode.
|
|
492
|
+
|
|
493
|
+
``pops`` and ``pushes`` are ``-1`` for opcodes whose stack effects depend on
|
|
494
|
+
the operand (invoke, field access, LDC, multianewarray). Those are
|
|
495
|
+
computed dynamically during simulation from the instruction's symbolic
|
|
496
|
+
operand metadata.
|
|
497
|
+
|
|
498
|
+
Attributes:
|
|
499
|
+
pops: Number of stack slots consumed (``-1`` if variable).
|
|
500
|
+
pushes: Number of stack slots produced (``-1`` if variable).
|
|
501
|
+
is_branch: ``True`` for branch instructions.
|
|
502
|
+
is_unconditional: ``True`` for unconditional transfers (goto, switch,
|
|
503
|
+
athrow).
|
|
504
|
+
is_switch: ``True`` for tableswitch/lookupswitch.
|
|
505
|
+
is_return: ``True`` for return instructions.
|
|
506
|
+
"""
|
|
507
|
+
|
|
508
|
+
pops: int
|
|
509
|
+
pushes: int
|
|
510
|
+
is_branch: bool = False
|
|
511
|
+
is_unconditional: bool = False
|
|
512
|
+
is_switch: bool = False
|
|
513
|
+
is_return: bool = False
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
_T = InsnInfoType
|
|
517
|
+
|
|
518
|
+
# Shorthand constructors
|
|
519
|
+
_simple = OpcodeEffect
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _branch(p: int, u: bool) -> OpcodeEffect:
|
|
523
|
+
return OpcodeEffect(p, 0, is_branch=True, is_unconditional=u)
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def _ret(p: int) -> OpcodeEffect:
|
|
527
|
+
return OpcodeEffect(p, 0, is_return=True)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
_switch = OpcodeEffect(1, 0, is_branch=True, is_unconditional=True, is_switch=True)
|
|
531
|
+
_var = OpcodeEffect(-1, -1) # variable — resolved during simulation
|
|
532
|
+
|
|
533
|
+
OPCODE_EFFECTS: dict[InsnInfoType, OpcodeEffect] = {
|
|
534
|
+
# --- Constants ---
|
|
535
|
+
_T.NOP: _simple(0, 0),
|
|
536
|
+
_T.ACONST_NULL: _simple(0, 1),
|
|
537
|
+
_T.ICONST_M1: _simple(0, 1),
|
|
538
|
+
_T.ICONST_0: _simple(0, 1),
|
|
539
|
+
_T.ICONST_1: _simple(0, 1),
|
|
540
|
+
_T.ICONST_2: _simple(0, 1),
|
|
541
|
+
_T.ICONST_3: _simple(0, 1),
|
|
542
|
+
_T.ICONST_4: _simple(0, 1),
|
|
543
|
+
_T.ICONST_5: _simple(0, 1),
|
|
544
|
+
_T.LCONST_0: _simple(0, 2),
|
|
545
|
+
_T.LCONST_1: _simple(0, 2),
|
|
546
|
+
_T.FCONST_0: _simple(0, 1),
|
|
547
|
+
_T.FCONST_1: _simple(0, 1),
|
|
548
|
+
_T.FCONST_2: _simple(0, 1),
|
|
549
|
+
_T.DCONST_0: _simple(0, 2),
|
|
550
|
+
_T.DCONST_1: _simple(0, 2),
|
|
551
|
+
_T.BIPUSH: _simple(0, 1),
|
|
552
|
+
_T.SIPUSH: _simple(0, 1),
|
|
553
|
+
_T.LDC: _var,
|
|
554
|
+
_T.LDC_W: _var,
|
|
555
|
+
_T.LDC2_W: _var,
|
|
556
|
+
# --- Loads (raw forms — in editing model these are VarInsn) ---
|
|
557
|
+
_T.ILOAD: _simple(0, 1),
|
|
558
|
+
_T.LLOAD: _simple(0, 2),
|
|
559
|
+
_T.FLOAD: _simple(0, 1),
|
|
560
|
+
_T.DLOAD: _simple(0, 2),
|
|
561
|
+
_T.ALOAD: _simple(0, 1),
|
|
562
|
+
_T.ILOAD_0: _simple(0, 1),
|
|
563
|
+
_T.ILOAD_1: _simple(0, 1),
|
|
564
|
+
_T.ILOAD_2: _simple(0, 1),
|
|
565
|
+
_T.ILOAD_3: _simple(0, 1),
|
|
566
|
+
_T.LLOAD_0: _simple(0, 2),
|
|
567
|
+
_T.LLOAD_1: _simple(0, 2),
|
|
568
|
+
_T.LLOAD_2: _simple(0, 2),
|
|
569
|
+
_T.LLOAD_3: _simple(0, 2),
|
|
570
|
+
_T.FLOAD_0: _simple(0, 1),
|
|
571
|
+
_T.FLOAD_1: _simple(0, 1),
|
|
572
|
+
_T.FLOAD_2: _simple(0, 1),
|
|
573
|
+
_T.FLOAD_3: _simple(0, 1),
|
|
574
|
+
_T.DLOAD_0: _simple(0, 2),
|
|
575
|
+
_T.DLOAD_1: _simple(0, 2),
|
|
576
|
+
_T.DLOAD_2: _simple(0, 2),
|
|
577
|
+
_T.DLOAD_3: _simple(0, 2),
|
|
578
|
+
_T.ALOAD_0: _simple(0, 1),
|
|
579
|
+
_T.ALOAD_1: _simple(0, 1),
|
|
580
|
+
_T.ALOAD_2: _simple(0, 1),
|
|
581
|
+
_T.ALOAD_3: _simple(0, 1),
|
|
582
|
+
# --- Array loads ---
|
|
583
|
+
_T.IALOAD: _simple(2, 1),
|
|
584
|
+
_T.LALOAD: _simple(2, 2),
|
|
585
|
+
_T.FALOAD: _simple(2, 1),
|
|
586
|
+
_T.DALOAD: _simple(2, 2),
|
|
587
|
+
_T.AALOAD: _simple(2, 1),
|
|
588
|
+
_T.BALOAD: _simple(2, 1),
|
|
589
|
+
_T.CALOAD: _simple(2, 1),
|
|
590
|
+
_T.SALOAD: _simple(2, 1),
|
|
591
|
+
# --- Stores (raw forms) ---
|
|
592
|
+
_T.ISTORE: _simple(1, 0),
|
|
593
|
+
_T.LSTORE: _simple(2, 0),
|
|
594
|
+
_T.FSTORE: _simple(1, 0),
|
|
595
|
+
_T.DSTORE: _simple(2, 0),
|
|
596
|
+
_T.ASTORE: _simple(1, 0),
|
|
597
|
+
_T.ISTORE_0: _simple(1, 0),
|
|
598
|
+
_T.ISTORE_1: _simple(1, 0),
|
|
599
|
+
_T.ISTORE_2: _simple(1, 0),
|
|
600
|
+
_T.ISTORE_3: _simple(1, 0),
|
|
601
|
+
_T.LSTORE_0: _simple(2, 0),
|
|
602
|
+
_T.LSTORE_1: _simple(2, 0),
|
|
603
|
+
_T.LSTORE_2: _simple(2, 0),
|
|
604
|
+
_T.LSTORE_3: _simple(2, 0),
|
|
605
|
+
_T.FSTORE_0: _simple(1, 0),
|
|
606
|
+
_T.FSTORE_1: _simple(1, 0),
|
|
607
|
+
_T.FSTORE_2: _simple(1, 0),
|
|
608
|
+
_T.FSTORE_3: _simple(1, 0),
|
|
609
|
+
_T.DSTORE_0: _simple(2, 0),
|
|
610
|
+
_T.DSTORE_1: _simple(2, 0),
|
|
611
|
+
_T.DSTORE_2: _simple(2, 0),
|
|
612
|
+
_T.DSTORE_3: _simple(2, 0),
|
|
613
|
+
_T.ASTORE_0: _simple(1, 0),
|
|
614
|
+
_T.ASTORE_1: _simple(1, 0),
|
|
615
|
+
_T.ASTORE_2: _simple(1, 0),
|
|
616
|
+
_T.ASTORE_3: _simple(1, 0),
|
|
617
|
+
# --- Array stores ---
|
|
618
|
+
_T.IASTORE: _simple(3, 0),
|
|
619
|
+
_T.LASTORE: _simple(4, 0),
|
|
620
|
+
_T.FASTORE: _simple(3, 0),
|
|
621
|
+
_T.DASTORE: _simple(4, 0),
|
|
622
|
+
_T.AASTORE: _simple(3, 0),
|
|
623
|
+
_T.BASTORE: _simple(3, 0),
|
|
624
|
+
_T.CASTORE: _simple(3, 0),
|
|
625
|
+
_T.SASTORE: _simple(3, 0),
|
|
626
|
+
# --- Stack manipulation ---
|
|
627
|
+
_T.POP: _simple(1, 0),
|
|
628
|
+
_T.POP2: _simple(2, 0),
|
|
629
|
+
_T.DUP: _simple(1, 2),
|
|
630
|
+
_T.DUP_X1: _simple(2, 3),
|
|
631
|
+
_T.DUP_X2: _simple(3, 4),
|
|
632
|
+
_T.DUP2: _simple(2, 4),
|
|
633
|
+
_T.DUP2_X1: _simple(3, 5),
|
|
634
|
+
_T.DUP2_X2: _simple(4, 6),
|
|
635
|
+
_T.SWAP: _simple(2, 2),
|
|
636
|
+
# --- Integer arithmetic ---
|
|
637
|
+
_T.IADD: _simple(2, 1),
|
|
638
|
+
_T.ISUB: _simple(2, 1),
|
|
639
|
+
_T.IMUL: _simple(2, 1),
|
|
640
|
+
_T.IDIV: _simple(2, 1),
|
|
641
|
+
_T.IREM: _simple(2, 1),
|
|
642
|
+
_T.INEG: _simple(1, 1),
|
|
643
|
+
_T.ISHL: _simple(2, 1),
|
|
644
|
+
_T.ISHR: _simple(2, 1),
|
|
645
|
+
_T.IUSHR: _simple(2, 1),
|
|
646
|
+
_T.IAND: _simple(2, 1),
|
|
647
|
+
_T.IOR: _simple(2, 1),
|
|
648
|
+
_T.IXOR: _simple(2, 1),
|
|
649
|
+
# --- Long arithmetic ---
|
|
650
|
+
_T.LADD: _simple(4, 2),
|
|
651
|
+
_T.LSUB: _simple(4, 2),
|
|
652
|
+
_T.LMUL: _simple(4, 2),
|
|
653
|
+
_T.LDIV: _simple(4, 2),
|
|
654
|
+
_T.LREM: _simple(4, 2),
|
|
655
|
+
_T.LNEG: _simple(2, 2),
|
|
656
|
+
_T.LSHL: _simple(3, 2),
|
|
657
|
+
_T.LSHR: _simple(3, 2),
|
|
658
|
+
_T.LUSHR: _simple(3, 2),
|
|
659
|
+
_T.LAND: _simple(4, 2),
|
|
660
|
+
_T.LOR: _simple(4, 2),
|
|
661
|
+
_T.LXOR: _simple(4, 2),
|
|
662
|
+
# --- Float arithmetic ---
|
|
663
|
+
_T.FADD: _simple(2, 1),
|
|
664
|
+
_T.FSUB: _simple(2, 1),
|
|
665
|
+
_T.FMUL: _simple(2, 1),
|
|
666
|
+
_T.FDIV: _simple(2, 1),
|
|
667
|
+
_T.FREM: _simple(2, 1),
|
|
668
|
+
_T.FNEG: _simple(1, 1),
|
|
669
|
+
# --- Double arithmetic ---
|
|
670
|
+
_T.DADD: _simple(4, 2),
|
|
671
|
+
_T.DSUB: _simple(4, 2),
|
|
672
|
+
_T.DMUL: _simple(4, 2),
|
|
673
|
+
_T.DDIV: _simple(4, 2),
|
|
674
|
+
_T.DREM: _simple(4, 2),
|
|
675
|
+
_T.DNEG: _simple(2, 2),
|
|
676
|
+
# --- Conversions ---
|
|
677
|
+
_T.I2L: _simple(1, 2),
|
|
678
|
+
_T.I2F: _simple(1, 1),
|
|
679
|
+
_T.I2D: _simple(1, 2),
|
|
680
|
+
_T.L2I: _simple(2, 1),
|
|
681
|
+
_T.L2F: _simple(2, 1),
|
|
682
|
+
_T.L2D: _simple(2, 2),
|
|
683
|
+
_T.F2I: _simple(1, 1),
|
|
684
|
+
_T.F2L: _simple(1, 2),
|
|
685
|
+
_T.F2D: _simple(1, 2),
|
|
686
|
+
_T.D2I: _simple(2, 1),
|
|
687
|
+
_T.D2L: _simple(2, 2),
|
|
688
|
+
_T.D2F: _simple(2, 1),
|
|
689
|
+
_T.I2B: _simple(1, 1),
|
|
690
|
+
_T.I2C: _simple(1, 1),
|
|
691
|
+
_T.I2S: _simple(1, 1),
|
|
692
|
+
# --- Comparisons ---
|
|
693
|
+
_T.LCMP: _simple(4, 1),
|
|
694
|
+
_T.FCMPL: _simple(2, 1),
|
|
695
|
+
_T.FCMPG: _simple(2, 1),
|
|
696
|
+
_T.DCMPL: _simple(4, 1),
|
|
697
|
+
_T.DCMPG: _simple(4, 1),
|
|
698
|
+
# --- Conditional branches (pop 1 int) ---
|
|
699
|
+
_T.IFEQ: _branch(1, False),
|
|
700
|
+
_T.IFNE: _branch(1, False),
|
|
701
|
+
_T.IFLT: _branch(1, False),
|
|
702
|
+
_T.IFGE: _branch(1, False),
|
|
703
|
+
_T.IFGT: _branch(1, False),
|
|
704
|
+
_T.IFLE: _branch(1, False),
|
|
705
|
+
# --- Conditional branches (pop 2 ints) ---
|
|
706
|
+
_T.IF_ICMPEQ: _branch(2, False),
|
|
707
|
+
_T.IF_ICMPNE: _branch(2, False),
|
|
708
|
+
_T.IF_ICMPLT: _branch(2, False),
|
|
709
|
+
_T.IF_ICMPGE: _branch(2, False),
|
|
710
|
+
_T.IF_ICMPGT: _branch(2, False),
|
|
711
|
+
_T.IF_ICMPLE: _branch(2, False),
|
|
712
|
+
# --- Reference conditional branches ---
|
|
713
|
+
_T.IF_ACMPEQ: _branch(2, False),
|
|
714
|
+
_T.IF_ACMPNE: _branch(2, False),
|
|
715
|
+
_T.IFNULL: _branch(1, False),
|
|
716
|
+
_T.IFNONNULL: _branch(1, False),
|
|
717
|
+
# --- Unconditional branches ---
|
|
718
|
+
_T.GOTO: _branch(0, True),
|
|
719
|
+
_T.GOTO_W: _branch(0, True),
|
|
720
|
+
# --- Subroutine (legacy, pre-Java 6) ---
|
|
721
|
+
_T.JSR: OpcodeEffect(0, 1, is_branch=True, is_unconditional=True),
|
|
722
|
+
_T.JSR_W: OpcodeEffect(0, 1, is_branch=True, is_unconditional=True),
|
|
723
|
+
_T.RET: OpcodeEffect(0, 0, is_branch=True, is_unconditional=True),
|
|
724
|
+
# --- Switch ---
|
|
725
|
+
_T.TABLESWITCH: _switch,
|
|
726
|
+
_T.LOOKUPSWITCH: _switch,
|
|
727
|
+
# --- Returns ---
|
|
728
|
+
_T.IRETURN: _ret(1),
|
|
729
|
+
_T.LRETURN: _ret(2),
|
|
730
|
+
_T.FRETURN: _ret(1),
|
|
731
|
+
_T.DRETURN: _ret(2),
|
|
732
|
+
_T.ARETURN: _ret(1),
|
|
733
|
+
_T.RETURN: _ret(0),
|
|
734
|
+
# --- Field access (variable effect) ---
|
|
735
|
+
_T.GETFIELD: _var,
|
|
736
|
+
_T.PUTFIELD: _var,
|
|
737
|
+
_T.GETSTATIC: _var,
|
|
738
|
+
_T.PUTSTATIC: _var,
|
|
739
|
+
# --- Method invocation (variable effect) ---
|
|
740
|
+
_T.INVOKEVIRTUAL: _var,
|
|
741
|
+
_T.INVOKESPECIAL: _var,
|
|
742
|
+
_T.INVOKESTATIC: _var,
|
|
743
|
+
_T.INVOKEINTERFACE: _var,
|
|
744
|
+
_T.INVOKEDYNAMIC: _var,
|
|
745
|
+
# --- Object creation ---
|
|
746
|
+
_T.NEW: _simple(0, 1),
|
|
747
|
+
_T.NEWARRAY: _simple(1, 1),
|
|
748
|
+
_T.ANEWARRAY: _simple(1, 1),
|
|
749
|
+
_T.MULTIANEWARRAY: _var,
|
|
750
|
+
_T.ARRAYLENGTH: _simple(1, 1),
|
|
751
|
+
# --- Type operations ---
|
|
752
|
+
_T.CHECKCAST: _simple(1, 1),
|
|
753
|
+
_T.INSTANCEOF: _simple(1, 1),
|
|
754
|
+
# --- Throw ---
|
|
755
|
+
_T.ATHROW: _ret(1),
|
|
756
|
+
# --- Monitor ---
|
|
757
|
+
_T.MONITORENTER: _simple(1, 0),
|
|
758
|
+
_T.MONITOREXIT: _simple(1, 0),
|
|
759
|
+
# --- IINC (no stack change) ---
|
|
760
|
+
_T.IINC: _simple(0, 0),
|
|
761
|
+
# --- WIDE variants (same effect as non-wide) ---
|
|
762
|
+
_T.WIDE: _simple(0, 0),
|
|
763
|
+
_T.ILOADW: _simple(0, 1),
|
|
764
|
+
_T.LLOADW: _simple(0, 2),
|
|
765
|
+
_T.FLOADW: _simple(0, 1),
|
|
766
|
+
_T.DLOADW: _simple(0, 2),
|
|
767
|
+
_T.ALOADW: _simple(0, 1),
|
|
768
|
+
_T.ISTOREW: _simple(1, 0),
|
|
769
|
+
_T.LSTOREW: _simple(2, 0),
|
|
770
|
+
_T.FSTOREW: _simple(1, 0),
|
|
771
|
+
_T.DSTOREW: _simple(2, 0),
|
|
772
|
+
_T.ASTOREW: _simple(1, 0),
|
|
773
|
+
_T.RETW: OpcodeEffect(0, 0, is_branch=True, is_unconditional=True),
|
|
774
|
+
_T.IINCW: _simple(0, 0),
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def _is_terminal(insn: InsnInfo) -> bool:
|
|
779
|
+
"""Return whether *insn* ends a basic block with no fall-through."""
|
|
780
|
+
effect = OPCODE_EFFECTS.get(insn.type)
|
|
781
|
+
if effect is None:
|
|
782
|
+
return False
|
|
783
|
+
return effect.is_unconditional or effect.is_return
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def _is_branch_or_switch(insn: InsnInfo) -> bool:
|
|
787
|
+
"""Return whether *insn* is a branch or switch instruction."""
|
|
788
|
+
effect = OPCODE_EFFECTS.get(insn.type)
|
|
789
|
+
if effect is None:
|
|
790
|
+
return False
|
|
791
|
+
return effect.is_branch
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
# ===================================================================
|
|
795
|
+
# Control-flow graph
|
|
796
|
+
# ===================================================================
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
@dataclass(slots=True)
|
|
800
|
+
class BasicBlock:
|
|
801
|
+
"""A maximal straight-line sequence of instructions within a method.
|
|
802
|
+
|
|
803
|
+
Mutable during construction, then frozen by ``build_cfg``.
|
|
804
|
+
|
|
805
|
+
Attributes:
|
|
806
|
+
id: Unique block index within the CFG.
|
|
807
|
+
label: Label at the start of this block, if any.
|
|
808
|
+
instructions: Ordered instructions in this block.
|
|
809
|
+
successor_ids: Block ids of normal-flow successors.
|
|
810
|
+
exception_handler_ids: ``(handler_block_id, catch_type)`` pairs for
|
|
811
|
+
active exception handlers.
|
|
812
|
+
"""
|
|
813
|
+
|
|
814
|
+
id: int
|
|
815
|
+
label: Label | None
|
|
816
|
+
instructions: list[InsnInfo]
|
|
817
|
+
successor_ids: list[int]
|
|
818
|
+
exception_handler_ids: list[tuple[int, str | None]]
|
|
819
|
+
|
|
820
|
+
def __repr__(self) -> str:
|
|
821
|
+
label_str = f" ({self.label!r})" if self.label is not None else ""
|
|
822
|
+
return f"BasicBlock(id={self.id}{label_str}, insns={len(self.instructions)}, succs={self.successor_ids})"
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
@dataclass(frozen=True, slots=True)
|
|
826
|
+
class ExceptionEdge:
|
|
827
|
+
"""An exception edge from a protected block to a handler block.
|
|
828
|
+
|
|
829
|
+
Attributes:
|
|
830
|
+
handler_block_id: Block id of the exception handler.
|
|
831
|
+
catch_type: Internal name of the caught exception type, or ``None``
|
|
832
|
+
for a catch-all (``finally``).
|
|
833
|
+
"""
|
|
834
|
+
|
|
835
|
+
handler_block_id: int
|
|
836
|
+
catch_type: str | None
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
@dataclass(frozen=True, slots=True)
|
|
840
|
+
class ControlFlowGraph:
|
|
841
|
+
"""Control-flow graph for a method's code body.
|
|
842
|
+
|
|
843
|
+
Attributes:
|
|
844
|
+
entry: The entry basic block.
|
|
845
|
+
blocks: All blocks, ordered to match the original instruction sequence.
|
|
846
|
+
exception_handlers: Exception handler declarations from the code.
|
|
847
|
+
label_to_block: Mapping from labels to the block they start.
|
|
848
|
+
"""
|
|
849
|
+
|
|
850
|
+
entry: BasicBlock
|
|
851
|
+
blocks: tuple[BasicBlock, ...]
|
|
852
|
+
exception_handlers: tuple[ExceptionHandler, ...]
|
|
853
|
+
label_to_block: dict[Label, BasicBlock]
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
def build_cfg(code: CodeModel) -> ControlFlowGraph:
|
|
857
|
+
"""Construct a control-flow graph from a ``CodeModel``.
|
|
858
|
+
|
|
859
|
+
Partitions the instruction stream into basic blocks and builds edges
|
|
860
|
+
for branches, fall-through, and exception handlers.
|
|
861
|
+
|
|
862
|
+
Args:
|
|
863
|
+
code: The code model to partition into basic blocks.
|
|
864
|
+
|
|
865
|
+
Returns:
|
|
866
|
+
A ``ControlFlowGraph`` with edges for all control-flow paths.
|
|
867
|
+
"""
|
|
868
|
+
items = code.instructions
|
|
869
|
+
if not items:
|
|
870
|
+
empty_block = BasicBlock(id=0, label=None, instructions=[], successor_ids=[], exception_handler_ids=[])
|
|
871
|
+
return ControlFlowGraph(
|
|
872
|
+
entry=empty_block,
|
|
873
|
+
blocks=(empty_block,),
|
|
874
|
+
exception_handlers=tuple(code.exception_handlers),
|
|
875
|
+
label_to_block={},
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
# Step 1: Identify block leaders.
|
|
879
|
+
# A leader is an instruction (not a Label) that starts a new block.
|
|
880
|
+
# We track leaders by their index in the items list.
|
|
881
|
+
leader_indices: set[int] = set()
|
|
882
|
+
|
|
883
|
+
# Collect all labels that are branch targets or exception handler boundaries.
|
|
884
|
+
target_labels: set[int] = set() # id() of labels that start blocks
|
|
885
|
+
|
|
886
|
+
# Labels used as branch targets
|
|
887
|
+
for item in items:
|
|
888
|
+
if isinstance(item, BranchInsn):
|
|
889
|
+
target_labels.add(id(item.target))
|
|
890
|
+
elif isinstance(item, LookupSwitchInsn):
|
|
891
|
+
target_labels.add(id(item.default_target))
|
|
892
|
+
for _, lbl in item.pairs:
|
|
893
|
+
target_labels.add(id(lbl))
|
|
894
|
+
elif isinstance(item, TableSwitchInsn):
|
|
895
|
+
target_labels.add(id(item.default_target))
|
|
896
|
+
for lbl in item.targets:
|
|
897
|
+
target_labels.add(id(lbl))
|
|
898
|
+
|
|
899
|
+
# Labels used in exception handlers
|
|
900
|
+
for eh in code.exception_handlers:
|
|
901
|
+
target_labels.add(id(eh.start))
|
|
902
|
+
target_labels.add(id(eh.end))
|
|
903
|
+
target_labels.add(id(eh.handler))
|
|
904
|
+
|
|
905
|
+
# First real instruction is always a leader.
|
|
906
|
+
first_insn_idx = _find_first_insn(items)
|
|
907
|
+
if first_insn_idx is not None:
|
|
908
|
+
leader_indices.add(first_insn_idx)
|
|
909
|
+
|
|
910
|
+
# Scan for leaders.
|
|
911
|
+
prev_was_terminal = False
|
|
912
|
+
for i, item in enumerate(items):
|
|
913
|
+
if isinstance(item, Label):
|
|
914
|
+
if id(item) in target_labels:
|
|
915
|
+
# The next real instruction after this label is a leader.
|
|
916
|
+
next_insn = _find_next_insn(items, i + 1)
|
|
917
|
+
if next_insn is not None:
|
|
918
|
+
leader_indices.add(next_insn)
|
|
919
|
+
else:
|
|
920
|
+
leader_indices.add(i)
|
|
921
|
+
continue
|
|
922
|
+
|
|
923
|
+
# item is an InsnInfo
|
|
924
|
+
if prev_was_terminal:
|
|
925
|
+
leader_indices.add(i)
|
|
926
|
+
|
|
927
|
+
prev_was_terminal = _is_terminal(item) or (
|
|
928
|
+
_is_branch_or_switch(item) and not OPCODE_EFFECTS[item.type].is_unconditional
|
|
929
|
+
)
|
|
930
|
+
# For conditional branches, the fall-through is the next insn, which
|
|
931
|
+
# is implicitly a leader only if control can reach it from multiple paths.
|
|
932
|
+
# But we still need to split after any branch for clean block boundaries.
|
|
933
|
+
if _is_branch_or_switch(item):
|
|
934
|
+
prev_was_terminal = True
|
|
935
|
+
|
|
936
|
+
if not leader_indices:
|
|
937
|
+
# All labels, no real instructions — create a single empty block.
|
|
938
|
+
empty_block = BasicBlock(id=0, label=None, instructions=[], successor_ids=[], exception_handler_ids=[])
|
|
939
|
+
lbl_map: dict[Label, BasicBlock] = {}
|
|
940
|
+
for item in items:
|
|
941
|
+
if isinstance(item, Label):
|
|
942
|
+
lbl_map[item] = empty_block
|
|
943
|
+
return ControlFlowGraph(
|
|
944
|
+
entry=empty_block,
|
|
945
|
+
blocks=(empty_block,),
|
|
946
|
+
exception_handlers=tuple(code.exception_handlers),
|
|
947
|
+
label_to_block=lbl_map,
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
# Step 2: Build blocks.
|
|
951
|
+
sorted_leaders = sorted(leader_indices)
|
|
952
|
+
leader_set = set(sorted_leaders)
|
|
953
|
+
|
|
954
|
+
blocks: list[BasicBlock] = []
|
|
955
|
+
block_for_index: dict[int, int] = {} # items index → block id
|
|
956
|
+
label_to_block_map: dict[Label, BasicBlock] = {}
|
|
957
|
+
|
|
958
|
+
current_block_id = 0
|
|
959
|
+
current_block: BasicBlock | None = None
|
|
960
|
+
pending_labels: list[Label] = []
|
|
961
|
+
|
|
962
|
+
for i, item in enumerate(items):
|
|
963
|
+
if isinstance(item, Label):
|
|
964
|
+
next_insn = _find_next_insn(items, i + 1)
|
|
965
|
+
if current_block is not None and (next_insn is None or next_insn not in leader_set):
|
|
966
|
+
label_to_block_map[item] = current_block
|
|
967
|
+
else:
|
|
968
|
+
pending_labels.append(item)
|
|
969
|
+
continue
|
|
970
|
+
|
|
971
|
+
# item is an InsnInfo
|
|
972
|
+
if i in leader_set:
|
|
973
|
+
# Start a new block.
|
|
974
|
+
block_label = pending_labels[0] if pending_labels else None
|
|
975
|
+
current_block = BasicBlock(
|
|
976
|
+
id=current_block_id,
|
|
977
|
+
label=block_label,
|
|
978
|
+
instructions=[],
|
|
979
|
+
successor_ids=[],
|
|
980
|
+
exception_handler_ids=[],
|
|
981
|
+
)
|
|
982
|
+
# Map all pending labels to this block.
|
|
983
|
+
for lbl in pending_labels:
|
|
984
|
+
label_to_block_map[lbl] = current_block
|
|
985
|
+
pending_labels = []
|
|
986
|
+
blocks.append(current_block)
|
|
987
|
+
current_block_id += 1
|
|
988
|
+
|
|
989
|
+
if current_block is not None:
|
|
990
|
+
current_block.instructions.append(item)
|
|
991
|
+
block_for_index[i] = current_block.id
|
|
992
|
+
|
|
993
|
+
# Map any trailing labels to the last block.
|
|
994
|
+
if pending_labels and blocks:
|
|
995
|
+
for lbl in pending_labels:
|
|
996
|
+
label_to_block_map[lbl] = blocks[-1]
|
|
997
|
+
|
|
998
|
+
# Also map labels that precede leader instructions to their block.
|
|
999
|
+
# Walk items again to pick up labels immediately before leader instructions.
|
|
1000
|
+
pending_labels_2: list[Label] = []
|
|
1001
|
+
for i, item in enumerate(items):
|
|
1002
|
+
if isinstance(item, Label):
|
|
1003
|
+
pending_labels_2.append(item)
|
|
1004
|
+
else:
|
|
1005
|
+
if pending_labels_2:
|
|
1006
|
+
if i in leader_set:
|
|
1007
|
+
for lbl in pending_labels_2:
|
|
1008
|
+
if lbl not in label_to_block_map:
|
|
1009
|
+
# Find the block for this leader
|
|
1010
|
+
for blk in blocks:
|
|
1011
|
+
if blk.instructions and blk.instructions[0] is item:
|
|
1012
|
+
label_to_block_map[lbl] = blk
|
|
1013
|
+
break
|
|
1014
|
+
pending_labels_2 = []
|
|
1015
|
+
|
|
1016
|
+
# Step 3: Build edges.
|
|
1017
|
+
for idx, block in enumerate(blocks):
|
|
1018
|
+
if not block.instructions:
|
|
1019
|
+
# Empty block falls through to next block.
|
|
1020
|
+
if idx + 1 < len(blocks):
|
|
1021
|
+
block.successor_ids.append(blocks[idx + 1].id)
|
|
1022
|
+
continue
|
|
1023
|
+
|
|
1024
|
+
last_insn = block.instructions[-1]
|
|
1025
|
+
effect = OPCODE_EFFECTS.get(last_insn.type)
|
|
1026
|
+
|
|
1027
|
+
# Branch targets
|
|
1028
|
+
if isinstance(last_insn, BranchInsn):
|
|
1029
|
+
target_block = label_to_block_map.get(last_insn.target)
|
|
1030
|
+
if target_block is not None:
|
|
1031
|
+
block.successor_ids.append(target_block.id)
|
|
1032
|
+
elif isinstance(last_insn, LookupSwitchInsn):
|
|
1033
|
+
default_block = label_to_block_map.get(last_insn.default_target)
|
|
1034
|
+
if default_block is not None:
|
|
1035
|
+
block.successor_ids.append(default_block.id)
|
|
1036
|
+
for _, lbl in last_insn.pairs:
|
|
1037
|
+
target_block = label_to_block_map.get(lbl)
|
|
1038
|
+
if target_block is not None and target_block.id not in block.successor_ids:
|
|
1039
|
+
block.successor_ids.append(target_block.id)
|
|
1040
|
+
elif isinstance(last_insn, TableSwitchInsn):
|
|
1041
|
+
default_block = label_to_block_map.get(last_insn.default_target)
|
|
1042
|
+
if default_block is not None:
|
|
1043
|
+
block.successor_ids.append(default_block.id)
|
|
1044
|
+
for lbl in last_insn.targets:
|
|
1045
|
+
target_block = label_to_block_map.get(lbl)
|
|
1046
|
+
if target_block is not None and target_block.id not in block.successor_ids:
|
|
1047
|
+
block.successor_ids.append(target_block.id)
|
|
1048
|
+
|
|
1049
|
+
# Fall-through edge (only if not unconditional/terminal)
|
|
1050
|
+
is_terminal_insn = effect is not None and (effect.is_unconditional or effect.is_return)
|
|
1051
|
+
if not is_terminal_insn and idx + 1 < len(blocks):
|
|
1052
|
+
block.successor_ids.append(blocks[idx + 1].id)
|
|
1053
|
+
|
|
1054
|
+
# Step 4: Build exception handler edges.
|
|
1055
|
+
# For each exception handler, find blocks in the protected range and add edges.
|
|
1056
|
+
for eh in code.exception_handlers:
|
|
1057
|
+
start_block = label_to_block_map.get(eh.start)
|
|
1058
|
+
end_block = label_to_block_map.get(eh.end)
|
|
1059
|
+
handler_block = label_to_block_map.get(eh.handler)
|
|
1060
|
+
|
|
1061
|
+
if start_block is None or handler_block is None:
|
|
1062
|
+
continue
|
|
1063
|
+
|
|
1064
|
+
start_id = start_block.id
|
|
1065
|
+
end_id = end_block.id if end_block is not None else len(blocks)
|
|
1066
|
+
|
|
1067
|
+
for block in blocks:
|
|
1068
|
+
if start_id <= block.id < end_id:
|
|
1069
|
+
edge = (handler_block.id, eh.catch_type)
|
|
1070
|
+
if edge not in block.exception_handler_ids:
|
|
1071
|
+
block.exception_handler_ids.append(edge)
|
|
1072
|
+
|
|
1073
|
+
return ControlFlowGraph(
|
|
1074
|
+
entry=blocks[0],
|
|
1075
|
+
blocks=tuple(blocks),
|
|
1076
|
+
exception_handlers=tuple(code.exception_handlers),
|
|
1077
|
+
label_to_block=label_to_block_map,
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
|
|
1081
|
+
def _find_first_insn(items: list[CodeItem]) -> int | None:
|
|
1082
|
+
"""Return the index of the first real instruction in *items*."""
|
|
1083
|
+
for i, item in enumerate(items):
|
|
1084
|
+
if isinstance(item, InsnInfo):
|
|
1085
|
+
return i
|
|
1086
|
+
return None
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def _find_next_insn(items: list[CodeItem], start: int) -> int | None:
|
|
1090
|
+
"""Return the index of the next real instruction at or after *start*."""
|
|
1091
|
+
for i in range(start, len(items)):
|
|
1092
|
+
if isinstance(items[i], InsnInfo):
|
|
1093
|
+
return i
|
|
1094
|
+
return None
|
|
1095
|
+
|
|
1096
|
+
|
|
1097
|
+
# ===================================================================
|
|
1098
|
+
# Stack and local simulation
|
|
1099
|
+
# ===================================================================
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
@dataclass(frozen=True, slots=True)
|
|
1103
|
+
class SimulationResult:
|
|
1104
|
+
"""Results of forward dataflow stack/local simulation.
|
|
1105
|
+
|
|
1106
|
+
Attributes:
|
|
1107
|
+
entry_states: Mapping from block id to the frame state on entry.
|
|
1108
|
+
exit_states: Mapping from block id to the frame state on exit.
|
|
1109
|
+
max_stack: Maximum operand stack depth observed.
|
|
1110
|
+
max_locals: Maximum local variable slot count observed.
|
|
1111
|
+
"""
|
|
1112
|
+
|
|
1113
|
+
entry_states: dict[int, FrameState]
|
|
1114
|
+
exit_states: dict[int, FrameState]
|
|
1115
|
+
max_stack: int
|
|
1116
|
+
max_locals: int
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
def simulate(
|
|
1120
|
+
cfg: ControlFlowGraph,
|
|
1121
|
+
code: CodeModel,
|
|
1122
|
+
method: MethodModel,
|
|
1123
|
+
class_name: str,
|
|
1124
|
+
resolver: ClassResolver | None = None,
|
|
1125
|
+
) -> SimulationResult:
|
|
1126
|
+
"""Run forward dataflow analysis over a control-flow graph.
|
|
1127
|
+
|
|
1128
|
+
Propagates ``FrameState`` through each basic block, merging at join
|
|
1129
|
+
points using a worklist algorithm.
|
|
1130
|
+
|
|
1131
|
+
Args:
|
|
1132
|
+
cfg: Control-flow graph to analyze.
|
|
1133
|
+
code: Code model providing the instruction stream.
|
|
1134
|
+
method: Method model (used to derive the initial frame).
|
|
1135
|
+
class_name: JVM internal name of the enclosing class.
|
|
1136
|
+
resolver: Optional class hierarchy resolver for precise type merging.
|
|
1137
|
+
|
|
1138
|
+
Returns:
|
|
1139
|
+
A ``SimulationResult`` with per-block entry/exit states and
|
|
1140
|
+
computed max_stack/max_locals.
|
|
1141
|
+
"""
|
|
1142
|
+
if not cfg.blocks:
|
|
1143
|
+
entry = initial_frame(method, class_name)
|
|
1144
|
+
return SimulationResult(
|
|
1145
|
+
entry_states={},
|
|
1146
|
+
exit_states={},
|
|
1147
|
+
max_stack=0,
|
|
1148
|
+
max_locals=len(entry.locals),
|
|
1149
|
+
)
|
|
1150
|
+
|
|
1151
|
+
analysis_code = _prepare_analysis_code(code)
|
|
1152
|
+
entry_frame = initial_frame(method, class_name)
|
|
1153
|
+
|
|
1154
|
+
entry_states: dict[int, FrameState] = {cfg.entry.id: entry_frame}
|
|
1155
|
+
exit_states: dict[int, FrameState] = {}
|
|
1156
|
+
|
|
1157
|
+
# Worklist: queue of block ids to process.
|
|
1158
|
+
worklist: deque[int] = deque([cfg.entry.id])
|
|
1159
|
+
in_worklist: set[int] = {cfg.entry.id}
|
|
1160
|
+
|
|
1161
|
+
max_stack = 0
|
|
1162
|
+
max_locals = len(entry_frame.locals)
|
|
1163
|
+
|
|
1164
|
+
# Build a quick successor lookup including exception handler targets.
|
|
1165
|
+
block_by_id = {b.id: b for b in cfg.blocks}
|
|
1166
|
+
|
|
1167
|
+
while worklist:
|
|
1168
|
+
block_id = worklist.popleft()
|
|
1169
|
+
in_worklist.discard(block_id)
|
|
1170
|
+
|
|
1171
|
+
block = block_by_id[block_id]
|
|
1172
|
+
if block_id not in entry_states:
|
|
1173
|
+
continue
|
|
1174
|
+
|
|
1175
|
+
state = entry_states[block_id]
|
|
1176
|
+
if state.stack_depth > max_stack:
|
|
1177
|
+
max_stack = state.stack_depth
|
|
1178
|
+
if len(state.locals) > max_locals:
|
|
1179
|
+
max_locals = len(state.locals)
|
|
1180
|
+
|
|
1181
|
+
# Simulate all instructions in this block.
|
|
1182
|
+
for item in block.instructions:
|
|
1183
|
+
if block.exception_handler_ids and _instruction_may_throw(item):
|
|
1184
|
+
_propagate_exception_handlers(
|
|
1185
|
+
block.exception_handler_ids,
|
|
1186
|
+
state,
|
|
1187
|
+
entry_states,
|
|
1188
|
+
worklist,
|
|
1189
|
+
in_worklist,
|
|
1190
|
+
resolver,
|
|
1191
|
+
)
|
|
1192
|
+
state = _simulate_insn(item, state, analysis_code, class_name)
|
|
1193
|
+
if state.stack_depth > max_stack:
|
|
1194
|
+
max_stack = state.stack_depth
|
|
1195
|
+
if len(state.locals) > max_locals:
|
|
1196
|
+
max_locals = len(state.locals)
|
|
1197
|
+
|
|
1198
|
+
exit_states[block_id] = state
|
|
1199
|
+
|
|
1200
|
+
# Propagate to successors.
|
|
1201
|
+
for succ_id in block.successor_ids:
|
|
1202
|
+
_propagate(succ_id, state, entry_states, worklist, in_worklist, resolver)
|
|
1203
|
+
|
|
1204
|
+
return SimulationResult(
|
|
1205
|
+
entry_states=entry_states,
|
|
1206
|
+
exit_states=exit_states,
|
|
1207
|
+
max_stack=max_stack,
|
|
1208
|
+
max_locals=max_locals,
|
|
1209
|
+
)
|
|
1210
|
+
|
|
1211
|
+
|
|
1212
|
+
def _propagate(
|
|
1213
|
+
target_id: int,
|
|
1214
|
+
incoming: FrameState,
|
|
1215
|
+
entry_states: dict[int, FrameState],
|
|
1216
|
+
worklist: deque[int],
|
|
1217
|
+
in_worklist: set[int],
|
|
1218
|
+
resolver: ClassResolver | None,
|
|
1219
|
+
) -> None:
|
|
1220
|
+
"""Merge *incoming* into the entry state of *target_id* and enqueue if changed."""
|
|
1221
|
+
if target_id in entry_states:
|
|
1222
|
+
existing = entry_states[target_id]
|
|
1223
|
+
try:
|
|
1224
|
+
merged = _merge_frames(existing, incoming, resolver)
|
|
1225
|
+
except TypeMergeError as exc:
|
|
1226
|
+
raise TypeMergeError(f"Cannot merge incoming frame into block {target_id}: {exc}") from exc
|
|
1227
|
+
if merged == existing:
|
|
1228
|
+
return # No change — don't re-process.
|
|
1229
|
+
entry_states[target_id] = merged
|
|
1230
|
+
else:
|
|
1231
|
+
entry_states[target_id] = incoming
|
|
1232
|
+
|
|
1233
|
+
if target_id not in in_worklist:
|
|
1234
|
+
worklist.append(target_id)
|
|
1235
|
+
in_worklist.add(target_id)
|
|
1236
|
+
|
|
1237
|
+
|
|
1238
|
+
_NON_THROWING_RAW_OPCODES: frozenset[InsnInfoType] = frozenset(
|
|
1239
|
+
{
|
|
1240
|
+
_T.NOP,
|
|
1241
|
+
_T.ACONST_NULL,
|
|
1242
|
+
_T.ICONST_M1,
|
|
1243
|
+
_T.ICONST_0,
|
|
1244
|
+
_T.ICONST_1,
|
|
1245
|
+
_T.ICONST_2,
|
|
1246
|
+
_T.ICONST_3,
|
|
1247
|
+
_T.ICONST_4,
|
|
1248
|
+
_T.ICONST_5,
|
|
1249
|
+
_T.LCONST_0,
|
|
1250
|
+
_T.LCONST_1,
|
|
1251
|
+
_T.FCONST_0,
|
|
1252
|
+
_T.FCONST_1,
|
|
1253
|
+
_T.FCONST_2,
|
|
1254
|
+
_T.DCONST_0,
|
|
1255
|
+
_T.DCONST_1,
|
|
1256
|
+
_T.BIPUSH,
|
|
1257
|
+
_T.SIPUSH,
|
|
1258
|
+
_T.ILOAD,
|
|
1259
|
+
_T.ILOAD_0,
|
|
1260
|
+
_T.ILOAD_1,
|
|
1261
|
+
_T.ILOAD_2,
|
|
1262
|
+
_T.ILOAD_3,
|
|
1263
|
+
_T.ILOADW,
|
|
1264
|
+
_T.LLOAD,
|
|
1265
|
+
_T.LLOAD_0,
|
|
1266
|
+
_T.LLOAD_1,
|
|
1267
|
+
_T.LLOAD_2,
|
|
1268
|
+
_T.LLOAD_3,
|
|
1269
|
+
_T.LLOADW,
|
|
1270
|
+
_T.FLOAD,
|
|
1271
|
+
_T.FLOAD_0,
|
|
1272
|
+
_T.FLOAD_1,
|
|
1273
|
+
_T.FLOAD_2,
|
|
1274
|
+
_T.FLOAD_3,
|
|
1275
|
+
_T.FLOADW,
|
|
1276
|
+
_T.DLOAD,
|
|
1277
|
+
_T.DLOAD_0,
|
|
1278
|
+
_T.DLOAD_1,
|
|
1279
|
+
_T.DLOAD_2,
|
|
1280
|
+
_T.DLOAD_3,
|
|
1281
|
+
_T.DLOADW,
|
|
1282
|
+
_T.ALOAD,
|
|
1283
|
+
_T.ALOAD_0,
|
|
1284
|
+
_T.ALOAD_1,
|
|
1285
|
+
_T.ALOAD_2,
|
|
1286
|
+
_T.ALOAD_3,
|
|
1287
|
+
_T.ALOADW,
|
|
1288
|
+
_T.ISTORE,
|
|
1289
|
+
_T.ISTORE_0,
|
|
1290
|
+
_T.ISTORE_1,
|
|
1291
|
+
_T.ISTORE_2,
|
|
1292
|
+
_T.ISTORE_3,
|
|
1293
|
+
_T.ISTOREW,
|
|
1294
|
+
_T.LSTORE,
|
|
1295
|
+
_T.LSTORE_0,
|
|
1296
|
+
_T.LSTORE_1,
|
|
1297
|
+
_T.LSTORE_2,
|
|
1298
|
+
_T.LSTORE_3,
|
|
1299
|
+
_T.LSTOREW,
|
|
1300
|
+
_T.FSTORE,
|
|
1301
|
+
_T.FSTORE_0,
|
|
1302
|
+
_T.FSTORE_1,
|
|
1303
|
+
_T.FSTORE_2,
|
|
1304
|
+
_T.FSTORE_3,
|
|
1305
|
+
_T.FSTOREW,
|
|
1306
|
+
_T.DSTORE,
|
|
1307
|
+
_T.DSTORE_0,
|
|
1308
|
+
_T.DSTORE_1,
|
|
1309
|
+
_T.DSTORE_2,
|
|
1310
|
+
_T.DSTORE_3,
|
|
1311
|
+
_T.DSTOREW,
|
|
1312
|
+
_T.ASTORE,
|
|
1313
|
+
_T.ASTORE_0,
|
|
1314
|
+
_T.ASTORE_1,
|
|
1315
|
+
_T.ASTORE_2,
|
|
1316
|
+
_T.ASTORE_3,
|
|
1317
|
+
_T.ASTOREW,
|
|
1318
|
+
_T.POP,
|
|
1319
|
+
_T.POP2,
|
|
1320
|
+
_T.DUP,
|
|
1321
|
+
_T.DUP_X1,
|
|
1322
|
+
_T.DUP_X2,
|
|
1323
|
+
_T.DUP2,
|
|
1324
|
+
_T.DUP2_X1,
|
|
1325
|
+
_T.DUP2_X2,
|
|
1326
|
+
_T.SWAP,
|
|
1327
|
+
_T.IADD,
|
|
1328
|
+
_T.ISUB,
|
|
1329
|
+
_T.IMUL,
|
|
1330
|
+
_T.INEG,
|
|
1331
|
+
_T.ISHL,
|
|
1332
|
+
_T.ISHR,
|
|
1333
|
+
_T.IUSHR,
|
|
1334
|
+
_T.IAND,
|
|
1335
|
+
_T.IOR,
|
|
1336
|
+
_T.IXOR,
|
|
1337
|
+
_T.LADD,
|
|
1338
|
+
_T.LSUB,
|
|
1339
|
+
_T.LMUL,
|
|
1340
|
+
_T.LNEG,
|
|
1341
|
+
_T.LSHL,
|
|
1342
|
+
_T.LSHR,
|
|
1343
|
+
_T.LUSHR,
|
|
1344
|
+
_T.LAND,
|
|
1345
|
+
_T.LOR,
|
|
1346
|
+
_T.LXOR,
|
|
1347
|
+
_T.FADD,
|
|
1348
|
+
_T.FSUB,
|
|
1349
|
+
_T.FMUL,
|
|
1350
|
+
_T.FDIV,
|
|
1351
|
+
_T.FREM,
|
|
1352
|
+
_T.FNEG,
|
|
1353
|
+
_T.DADD,
|
|
1354
|
+
_T.DSUB,
|
|
1355
|
+
_T.DMUL,
|
|
1356
|
+
_T.DDIV,
|
|
1357
|
+
_T.DREM,
|
|
1358
|
+
_T.DNEG,
|
|
1359
|
+
_T.I2L,
|
|
1360
|
+
_T.I2F,
|
|
1361
|
+
_T.I2D,
|
|
1362
|
+
_T.L2I,
|
|
1363
|
+
_T.L2F,
|
|
1364
|
+
_T.L2D,
|
|
1365
|
+
_T.F2I,
|
|
1366
|
+
_T.F2L,
|
|
1367
|
+
_T.F2D,
|
|
1368
|
+
_T.D2I,
|
|
1369
|
+
_T.D2L,
|
|
1370
|
+
_T.D2F,
|
|
1371
|
+
_T.I2B,
|
|
1372
|
+
_T.I2C,
|
|
1373
|
+
_T.I2S,
|
|
1374
|
+
_T.LCMP,
|
|
1375
|
+
_T.FCMPL,
|
|
1376
|
+
_T.FCMPG,
|
|
1377
|
+
_T.DCMPL,
|
|
1378
|
+
_T.DCMPG,
|
|
1379
|
+
_T.IFEQ,
|
|
1380
|
+
_T.IFNE,
|
|
1381
|
+
_T.IFLT,
|
|
1382
|
+
_T.IFGE,
|
|
1383
|
+
_T.IFGT,
|
|
1384
|
+
_T.IFLE,
|
|
1385
|
+
_T.IF_ICMPEQ,
|
|
1386
|
+
_T.IF_ICMPNE,
|
|
1387
|
+
_T.IF_ICMPLT,
|
|
1388
|
+
_T.IF_ICMPGE,
|
|
1389
|
+
_T.IF_ICMPGT,
|
|
1390
|
+
_T.IF_ICMPLE,
|
|
1391
|
+
_T.IF_ACMPEQ,
|
|
1392
|
+
_T.IF_ACMPNE,
|
|
1393
|
+
_T.GOTO,
|
|
1394
|
+
_T.GOTO_W,
|
|
1395
|
+
_T.JSR,
|
|
1396
|
+
_T.JSR_W,
|
|
1397
|
+
_T.RET,
|
|
1398
|
+
_T.RETW,
|
|
1399
|
+
_T.IFNULL,
|
|
1400
|
+
_T.IFNONNULL,
|
|
1401
|
+
_T.TABLESWITCH,
|
|
1402
|
+
_T.LOOKUPSWITCH,
|
|
1403
|
+
_T.IRETURN,
|
|
1404
|
+
_T.LRETURN,
|
|
1405
|
+
_T.FRETURN,
|
|
1406
|
+
_T.DRETURN,
|
|
1407
|
+
_T.ARETURN,
|
|
1408
|
+
_T.RETURN,
|
|
1409
|
+
_T.IINC,
|
|
1410
|
+
_T.IINCW,
|
|
1411
|
+
_T.WIDE,
|
|
1412
|
+
}
|
|
1413
|
+
)
|
|
1414
|
+
|
|
1415
|
+
|
|
1416
|
+
def _instruction_may_throw(insn: InsnInfo) -> bool:
|
|
1417
|
+
"""Return whether an instruction may transfer control to an exception handler.
|
|
1418
|
+
|
|
1419
|
+
The analysis stays conservative by treating any opcode outside the
|
|
1420
|
+
well-understood non-throwing set as potentially exceptional.
|
|
1421
|
+
"""
|
|
1422
|
+
if isinstance(insn, VarInsn | IIncInsn | BranchInsn | LookupSwitchInsn | TableSwitchInsn):
|
|
1423
|
+
return False
|
|
1424
|
+
if isinstance(
|
|
1425
|
+
insn,
|
|
1426
|
+
FieldInsn | MethodInsn | InterfaceMethodInsn | InvokeDynamicInsn | TypeInsn | MultiANewArrayInsn,
|
|
1427
|
+
):
|
|
1428
|
+
return True
|
|
1429
|
+
if isinstance(insn, LdcInsn):
|
|
1430
|
+
return False
|
|
1431
|
+
return insn.type not in _NON_THROWING_RAW_OPCODES
|
|
1432
|
+
|
|
1433
|
+
|
|
1434
|
+
def _propagate_exception_handlers(
|
|
1435
|
+
handler_edges: list[tuple[int, str | None]],
|
|
1436
|
+
state: FrameState,
|
|
1437
|
+
entry_states: dict[int, FrameState],
|
|
1438
|
+
worklist: deque[int],
|
|
1439
|
+
in_worklist: set[int],
|
|
1440
|
+
resolver: ClassResolver | None,
|
|
1441
|
+
) -> None:
|
|
1442
|
+
"""Propagate the pre-instruction state to each active exception handler."""
|
|
1443
|
+
for handler_id, catch_type in handler_edges:
|
|
1444
|
+
if catch_type is not None:
|
|
1445
|
+
handler_stack = (VObject(catch_type),)
|
|
1446
|
+
else:
|
|
1447
|
+
handler_stack = (_OBJECT_THROWABLE,)
|
|
1448
|
+
handler_state = FrameState(handler_stack, state.locals)
|
|
1449
|
+
_propagate(handler_id, handler_state, entry_states, worklist, in_worklist, resolver)
|
|
1450
|
+
|
|
1451
|
+
|
|
1452
|
+
# ===================================================================
|
|
1453
|
+
# Per-instruction simulation
|
|
1454
|
+
# ===================================================================
|
|
1455
|
+
|
|
1456
|
+
|
|
1457
|
+
# VarInsn canonical opcode → type category for loads/stores.
|
|
1458
|
+
_LOAD_TYPE_MAP: dict[InsnInfoType, VType] = {
|
|
1459
|
+
_T.ILOAD: _INTEGER,
|
|
1460
|
+
_T.LLOAD: _LONG,
|
|
1461
|
+
_T.FLOAD: _FLOAT,
|
|
1462
|
+
_T.DLOAD: _DOUBLE,
|
|
1463
|
+
_T.ALOAD: _NULL, # placeholder — actual type comes from the local
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
_STORE_OPCODES: frozenset[InsnInfoType] = frozenset(
|
|
1467
|
+
{
|
|
1468
|
+
_T.ISTORE,
|
|
1469
|
+
_T.LSTORE,
|
|
1470
|
+
_T.FSTORE,
|
|
1471
|
+
_T.DSTORE,
|
|
1472
|
+
_T.ASTORE,
|
|
1473
|
+
}
|
|
1474
|
+
)
|
|
1475
|
+
|
|
1476
|
+
|
|
1477
|
+
def _simulate_insn(
|
|
1478
|
+
insn: InsnInfo,
|
|
1479
|
+
state: FrameState,
|
|
1480
|
+
code: CodeModel,
|
|
1481
|
+
class_name: str,
|
|
1482
|
+
) -> FrameState:
|
|
1483
|
+
"""Apply the effect of one instruction to the frame state."""
|
|
1484
|
+
|
|
1485
|
+
# --- VarInsn (symbolic load/store) ---
|
|
1486
|
+
if isinstance(insn, VarInsn):
|
|
1487
|
+
return _simulate_var_insn(insn, state)
|
|
1488
|
+
|
|
1489
|
+
# --- IIncInsn ---
|
|
1490
|
+
if isinstance(insn, IIncInsn):
|
|
1491
|
+
# No stack change; just verify the local is integer-typed.
|
|
1492
|
+
return state
|
|
1493
|
+
|
|
1494
|
+
# --- FieldInsn ---
|
|
1495
|
+
if isinstance(insn, FieldInsn):
|
|
1496
|
+
return _simulate_field_insn(insn, state)
|
|
1497
|
+
|
|
1498
|
+
# --- MethodInsn ---
|
|
1499
|
+
if isinstance(insn, MethodInsn):
|
|
1500
|
+
return _simulate_method_insn(insn, state, class_name)
|
|
1501
|
+
|
|
1502
|
+
# --- InterfaceMethodInsn ---
|
|
1503
|
+
if isinstance(insn, InterfaceMethodInsn):
|
|
1504
|
+
return _simulate_interface_method_insn(insn, state)
|
|
1505
|
+
|
|
1506
|
+
# --- InvokeDynamicInsn ---
|
|
1507
|
+
if isinstance(insn, InvokeDynamicInsn):
|
|
1508
|
+
return _simulate_invokedynamic_insn(insn, state)
|
|
1509
|
+
|
|
1510
|
+
# --- TypeInsn ---
|
|
1511
|
+
if isinstance(insn, TypeInsn):
|
|
1512
|
+
return _simulate_type_insn(insn, state, code)
|
|
1513
|
+
|
|
1514
|
+
# --- LdcInsn ---
|
|
1515
|
+
if isinstance(insn, LdcInsn):
|
|
1516
|
+
return _simulate_ldc_insn(insn, state)
|
|
1517
|
+
|
|
1518
|
+
# --- MultiANewArrayInsn ---
|
|
1519
|
+
if isinstance(insn, MultiANewArrayInsn):
|
|
1520
|
+
state, _ = state.pop(insn.dimensions)
|
|
1521
|
+
return state.push(VObject(insn.class_name))
|
|
1522
|
+
|
|
1523
|
+
# --- BranchInsn (conditional branches pop operands, GOTO does not) ---
|
|
1524
|
+
if isinstance(insn, BranchInsn):
|
|
1525
|
+
return _simulate_branch_insn(insn, state)
|
|
1526
|
+
|
|
1527
|
+
# --- Switch ---
|
|
1528
|
+
if isinstance(insn, LookupSwitchInsn | TableSwitchInsn):
|
|
1529
|
+
state, _ = state.pop(1) # pop the key
|
|
1530
|
+
return state
|
|
1531
|
+
|
|
1532
|
+
# --- All other InsnInfo (raw opcodes with static effects) ---
|
|
1533
|
+
return _simulate_raw_insn(insn, state)
|
|
1534
|
+
|
|
1535
|
+
|
|
1536
|
+
def _simulate_var_insn(insn: VarInsn, state: FrameState) -> FrameState:
|
|
1537
|
+
"""Simulate a VarInsn (load/store)."""
|
|
1538
|
+
opcode = insn.type
|
|
1539
|
+
slot = insn.slot
|
|
1540
|
+
|
|
1541
|
+
if opcode in _STORE_OPCODES:
|
|
1542
|
+
# Store: pop from stack, write to local.
|
|
1543
|
+
if opcode == _T.LSTORE:
|
|
1544
|
+
state, _ = state.pop(2)
|
|
1545
|
+
return state.set_local(slot, _LONG)
|
|
1546
|
+
elif opcode == _T.DSTORE:
|
|
1547
|
+
state, _ = state.pop(2)
|
|
1548
|
+
return state.set_local(slot, _DOUBLE)
|
|
1549
|
+
else:
|
|
1550
|
+
state, (val,) = state.pop(1)
|
|
1551
|
+
return state.set_local(slot, val)
|
|
1552
|
+
|
|
1553
|
+
if opcode == _T.RET:
|
|
1554
|
+
return state
|
|
1555
|
+
|
|
1556
|
+
# Load: read from local, push to stack.
|
|
1557
|
+
if opcode == _T.ALOAD:
|
|
1558
|
+
val = state.get_local(slot)
|
|
1559
|
+
return state.push(val)
|
|
1560
|
+
|
|
1561
|
+
# For typed loads, we push the type from the load opcode.
|
|
1562
|
+
vt = _LOAD_TYPE_MAP.get(opcode, _INTEGER)
|
|
1563
|
+
state.get_local(slot)
|
|
1564
|
+
return state.push(vt)
|
|
1565
|
+
|
|
1566
|
+
|
|
1567
|
+
def _simulate_field_insn(insn: FieldInsn, state: FrameState) -> FrameState:
|
|
1568
|
+
"""Simulate a field access instruction."""
|
|
1569
|
+
field_type = vtype_from_field_descriptor_str(insn.descriptor)
|
|
1570
|
+
field_slots = 2 if is_category2(field_type) else 1
|
|
1571
|
+
|
|
1572
|
+
if insn.type == _T.GETFIELD:
|
|
1573
|
+
state, _ = state.pop(1) # pop objectref
|
|
1574
|
+
return state.push(field_type)
|
|
1575
|
+
elif insn.type == _T.PUTFIELD:
|
|
1576
|
+
state, _ = state.pop(field_slots) # pop value
|
|
1577
|
+
state, _ = state.pop(1) # pop objectref
|
|
1578
|
+
return state
|
|
1579
|
+
elif insn.type == _T.GETSTATIC:
|
|
1580
|
+
return state.push(field_type)
|
|
1581
|
+
else: # PUTSTATIC
|
|
1582
|
+
state, _ = state.pop(field_slots)
|
|
1583
|
+
return state
|
|
1584
|
+
|
|
1585
|
+
|
|
1586
|
+
def _simulate_method_insn(
|
|
1587
|
+
insn: MethodInsn,
|
|
1588
|
+
state: FrameState,
|
|
1589
|
+
class_name: str,
|
|
1590
|
+
) -> FrameState:
|
|
1591
|
+
"""Simulate INVOKEVIRTUAL, INVOKESPECIAL, INVOKESTATIC."""
|
|
1592
|
+
md = parse_method_descriptor(insn.descriptor)
|
|
1593
|
+
# Pop arguments (right to left in slots).
|
|
1594
|
+
arg_slots = sum(2 if is_category2(vtype_from_descriptor(p)) else 1 for p in md.parameter_types)
|
|
1595
|
+
state, _ = state.pop(arg_slots)
|
|
1596
|
+
|
|
1597
|
+
# Pop objectref for non-static methods.
|
|
1598
|
+
if insn.type != _T.INVOKESTATIC:
|
|
1599
|
+
state, (receiver,) = state.pop(1)
|
|
1600
|
+
# Successful constructor calls initialize either ``this`` or the
|
|
1601
|
+
# freshly allocated object referenced by ``receiver``.
|
|
1602
|
+
if insn.name == "<init>" and isinstance(receiver, VUninitializedThis):
|
|
1603
|
+
state = _replace_uninitialized(state, receiver, VObject(class_name))
|
|
1604
|
+
elif insn.name == "<init>" and isinstance(receiver, VUninitialized):
|
|
1605
|
+
state = _replace_uninitialized(state, receiver, VObject(insn.owner))
|
|
1606
|
+
|
|
1607
|
+
# Push return value.
|
|
1608
|
+
if not isinstance(md.return_type, VoidType):
|
|
1609
|
+
ret_type = vtype_from_descriptor(md.return_type)
|
|
1610
|
+
state = state.push(ret_type)
|
|
1611
|
+
|
|
1612
|
+
return state
|
|
1613
|
+
|
|
1614
|
+
|
|
1615
|
+
def _simulate_interface_method_insn(insn: InterfaceMethodInsn, state: FrameState) -> FrameState:
|
|
1616
|
+
"""Simulate INVOKEINTERFACE."""
|
|
1617
|
+
md = parse_method_descriptor(insn.descriptor)
|
|
1618
|
+
arg_slots = sum(2 if is_category2(vtype_from_descriptor(p)) else 1 for p in md.parameter_types)
|
|
1619
|
+
state, _ = state.pop(arg_slots)
|
|
1620
|
+
state, _ = state.pop(1) # pop objectref
|
|
1621
|
+
|
|
1622
|
+
if not isinstance(md.return_type, VoidType):
|
|
1623
|
+
ret_type = vtype_from_descriptor(md.return_type)
|
|
1624
|
+
state = state.push(ret_type)
|
|
1625
|
+
return state
|
|
1626
|
+
|
|
1627
|
+
|
|
1628
|
+
def _simulate_invokedynamic_insn(insn: InvokeDynamicInsn, state: FrameState) -> FrameState:
|
|
1629
|
+
"""Simulate INVOKEDYNAMIC."""
|
|
1630
|
+
md = parse_method_descriptor(insn.descriptor)
|
|
1631
|
+
arg_slots = sum(2 if is_category2(vtype_from_descriptor(p)) else 1 for p in md.parameter_types)
|
|
1632
|
+
state, _ = state.pop(arg_slots)
|
|
1633
|
+
|
|
1634
|
+
if not isinstance(md.return_type, VoidType):
|
|
1635
|
+
ret_type = vtype_from_descriptor(md.return_type)
|
|
1636
|
+
state = state.push(ret_type)
|
|
1637
|
+
return state
|
|
1638
|
+
|
|
1639
|
+
|
|
1640
|
+
def _simulate_type_insn(
|
|
1641
|
+
insn: TypeInsn,
|
|
1642
|
+
state: FrameState,
|
|
1643
|
+
code: CodeModel,
|
|
1644
|
+
) -> FrameState:
|
|
1645
|
+
"""Simulate NEW, CHECKCAST, INSTANCEOF, ANEWARRAY."""
|
|
1646
|
+
if insn.type == _T.NEW:
|
|
1647
|
+
new_label = _find_label_for_insn(code, insn)
|
|
1648
|
+
if new_label is None:
|
|
1649
|
+
raise AnalysisError("NEW instruction is missing an analysis label")
|
|
1650
|
+
return state.push(VUninitialized(new_label))
|
|
1651
|
+
elif insn.type == _T.CHECKCAST:
|
|
1652
|
+
state, _ = state.pop(1)
|
|
1653
|
+
return state.push(VObject(insn.class_name))
|
|
1654
|
+
elif insn.type == _T.INSTANCEOF:
|
|
1655
|
+
state, _ = state.pop(1)
|
|
1656
|
+
return state.push(_INTEGER)
|
|
1657
|
+
else: # ANEWARRAY
|
|
1658
|
+
state, _ = state.pop(1) # pop count
|
|
1659
|
+
# ANEWARRAY creates an array of reference type.
|
|
1660
|
+
if insn.class_name.startswith("["):
|
|
1661
|
+
return state.push(VObject("[" + insn.class_name))
|
|
1662
|
+
else:
|
|
1663
|
+
return state.push(VObject("[L" + insn.class_name + ";"))
|
|
1664
|
+
|
|
1665
|
+
|
|
1666
|
+
def _simulate_ldc_insn(insn: LdcInsn, state: FrameState) -> FrameState:
|
|
1667
|
+
"""Simulate LDC/LDC_W/LDC2_W."""
|
|
1668
|
+
val = insn.value
|
|
1669
|
+
if isinstance(val, LdcInt):
|
|
1670
|
+
return state.push(_INTEGER)
|
|
1671
|
+
elif isinstance(val, LdcFloat):
|
|
1672
|
+
return state.push(_FLOAT)
|
|
1673
|
+
elif isinstance(val, LdcLong):
|
|
1674
|
+
return state.push(_LONG)
|
|
1675
|
+
elif isinstance(val, LdcDouble):
|
|
1676
|
+
return state.push(_DOUBLE)
|
|
1677
|
+
elif isinstance(val, LdcString):
|
|
1678
|
+
return state.push(_OBJECT_STRING)
|
|
1679
|
+
elif isinstance(val, LdcClass):
|
|
1680
|
+
return state.push(_OBJECT_CLASS)
|
|
1681
|
+
elif isinstance(val, LdcMethodType):
|
|
1682
|
+
return state.push(_OBJECT_METHOD_TYPE)
|
|
1683
|
+
elif isinstance(val, LdcMethodHandle):
|
|
1684
|
+
return state.push(_OBJECT_METHOD_HANDLE)
|
|
1685
|
+
else:
|
|
1686
|
+
# LdcDynamic — type determined by descriptor.
|
|
1687
|
+
vt = vtype_from_field_descriptor_str(val.descriptor)
|
|
1688
|
+
return state.push(vt)
|
|
1689
|
+
|
|
1690
|
+
|
|
1691
|
+
def _simulate_branch_insn(insn: BranchInsn, state: FrameState) -> FrameState:
|
|
1692
|
+
"""Simulate the stack effect of a branch instruction (pop condition operands)."""
|
|
1693
|
+
effect = OPCODE_EFFECTS.get(insn.type)
|
|
1694
|
+
if effect is not None and effect.pops > 0:
|
|
1695
|
+
state, _ = state.pop(effect.pops)
|
|
1696
|
+
# JSR/JSR_W pushes a return address onto the stack.
|
|
1697
|
+
if insn.type in {_T.JSR, _T.JSR_W}:
|
|
1698
|
+
return state.push(_INTEGER)
|
|
1699
|
+
return state
|
|
1700
|
+
|
|
1701
|
+
|
|
1702
|
+
def _simulate_raw_insn(insn: InsnInfo, state: FrameState) -> FrameState:
|
|
1703
|
+
"""Simulate a raw (non-symbolic) instruction using the opcode effect table."""
|
|
1704
|
+
opcode = insn.type
|
|
1705
|
+
|
|
1706
|
+
# --- Stack manipulation (requires type-aware handling) ---
|
|
1707
|
+
if opcode == _T.DUP:
|
|
1708
|
+
val = state.peek(0)
|
|
1709
|
+
return FrameState(state.stack + (val,), state.locals)
|
|
1710
|
+
elif opcode == _T.DUP_X1:
|
|
1711
|
+
v1 = state.peek(0)
|
|
1712
|
+
v2 = state.peek(1)
|
|
1713
|
+
stack = state.stack[:-2] + (v1, v2, v1)
|
|
1714
|
+
return FrameState(stack, state.locals)
|
|
1715
|
+
elif opcode == _T.DUP_X2:
|
|
1716
|
+
v1 = state.peek(0)
|
|
1717
|
+
v2 = state.peek(1)
|
|
1718
|
+
v3 = state.peek(2)
|
|
1719
|
+
stack = state.stack[:-3] + (v1, v3, v2, v1)
|
|
1720
|
+
return FrameState(stack, state.locals)
|
|
1721
|
+
elif opcode == _T.DUP2:
|
|
1722
|
+
v1 = state.peek(0)
|
|
1723
|
+
v2 = state.peek(1)
|
|
1724
|
+
return FrameState(state.stack + (v2, v1), state.locals)
|
|
1725
|
+
elif opcode == _T.DUP2_X1:
|
|
1726
|
+
v1 = state.peek(0)
|
|
1727
|
+
v2 = state.peek(1)
|
|
1728
|
+
v3 = state.peek(2)
|
|
1729
|
+
stack = state.stack[:-3] + (v2, v1, v3, v2, v1)
|
|
1730
|
+
return FrameState(stack, state.locals)
|
|
1731
|
+
elif opcode == _T.DUP2_X2:
|
|
1732
|
+
v1 = state.peek(0)
|
|
1733
|
+
v2 = state.peek(1)
|
|
1734
|
+
v3 = state.peek(2)
|
|
1735
|
+
v4 = state.peek(3)
|
|
1736
|
+
stack = state.stack[:-4] + (v2, v1, v4, v3, v2, v1)
|
|
1737
|
+
return FrameState(stack, state.locals)
|
|
1738
|
+
elif opcode == _T.SWAP:
|
|
1739
|
+
v1 = state.peek(0)
|
|
1740
|
+
v2 = state.peek(1)
|
|
1741
|
+
stack = state.stack[:-2] + (v1, v2)
|
|
1742
|
+
return FrameState(stack, state.locals)
|
|
1743
|
+
elif opcode == _T.POP:
|
|
1744
|
+
state, _ = state.pop(1)
|
|
1745
|
+
return state
|
|
1746
|
+
elif opcode == _T.POP2:
|
|
1747
|
+
state, _ = state.pop(2)
|
|
1748
|
+
return state
|
|
1749
|
+
|
|
1750
|
+
# --- Constants ---
|
|
1751
|
+
if opcode == _T.ACONST_NULL:
|
|
1752
|
+
return state.push(_NULL)
|
|
1753
|
+
if opcode in {_T.ICONST_M1, _T.ICONST_0, _T.ICONST_1, _T.ICONST_2, _T.ICONST_3, _T.ICONST_4, _T.ICONST_5}:
|
|
1754
|
+
return state.push(_INTEGER)
|
|
1755
|
+
if opcode in {_T.LCONST_0, _T.LCONST_1}:
|
|
1756
|
+
return state.push(_LONG)
|
|
1757
|
+
if opcode in {_T.FCONST_0, _T.FCONST_1, _T.FCONST_2}:
|
|
1758
|
+
return state.push(_FLOAT)
|
|
1759
|
+
if opcode in {_T.DCONST_0, _T.DCONST_1}:
|
|
1760
|
+
return state.push(_DOUBLE)
|
|
1761
|
+
if opcode == _T.BIPUSH:
|
|
1762
|
+
return state.push(_INTEGER)
|
|
1763
|
+
if opcode == _T.SIPUSH:
|
|
1764
|
+
return state.push(_INTEGER)
|
|
1765
|
+
|
|
1766
|
+
# --- Arithmetic (result type by opcode prefix) ---
|
|
1767
|
+
if opcode in {_T.IADD, _T.ISUB, _T.IMUL, _T.IDIV, _T.IREM, _T.ISHL, _T.ISHR, _T.IUSHR, _T.IAND, _T.IOR, _T.IXOR}:
|
|
1768
|
+
state, _ = state.pop(2)
|
|
1769
|
+
return state.push(_INTEGER)
|
|
1770
|
+
if opcode == _T.INEG:
|
|
1771
|
+
state, _ = state.pop(1)
|
|
1772
|
+
return state.push(_INTEGER)
|
|
1773
|
+
|
|
1774
|
+
if opcode in {_T.LADD, _T.LSUB, _T.LMUL, _T.LDIV, _T.LREM, _T.LAND, _T.LOR, _T.LXOR}:
|
|
1775
|
+
state, _ = state.pop(4)
|
|
1776
|
+
return state.push(_LONG)
|
|
1777
|
+
if opcode in {_T.LSHL, _T.LSHR, _T.LUSHR}:
|
|
1778
|
+
state, _ = state.pop(3) # long + int shift amount
|
|
1779
|
+
return state.push(_LONG)
|
|
1780
|
+
if opcode == _T.LNEG:
|
|
1781
|
+
state, _ = state.pop(2)
|
|
1782
|
+
return state.push(_LONG)
|
|
1783
|
+
|
|
1784
|
+
if opcode in {_T.FADD, _T.FSUB, _T.FMUL, _T.FDIV, _T.FREM}:
|
|
1785
|
+
state, _ = state.pop(2)
|
|
1786
|
+
return state.push(_FLOAT)
|
|
1787
|
+
if opcode == _T.FNEG:
|
|
1788
|
+
state, _ = state.pop(1)
|
|
1789
|
+
return state.push(_FLOAT)
|
|
1790
|
+
|
|
1791
|
+
if opcode in {_T.DADD, _T.DSUB, _T.DMUL, _T.DDIV, _T.DREM}:
|
|
1792
|
+
state, _ = state.pop(4)
|
|
1793
|
+
return state.push(_DOUBLE)
|
|
1794
|
+
if opcode == _T.DNEG:
|
|
1795
|
+
state, _ = state.pop(2)
|
|
1796
|
+
return state.push(_DOUBLE)
|
|
1797
|
+
|
|
1798
|
+
# --- Conversions ---
|
|
1799
|
+
if opcode == _T.I2L:
|
|
1800
|
+
state, _ = state.pop(1)
|
|
1801
|
+
|
|
1802
|
+
return state.push(_LONG)
|
|
1803
|
+
if opcode == _T.I2F:
|
|
1804
|
+
state, _ = state.pop(1)
|
|
1805
|
+
|
|
1806
|
+
return state.push(_FLOAT)
|
|
1807
|
+
if opcode == _T.I2D:
|
|
1808
|
+
state, _ = state.pop(1)
|
|
1809
|
+
|
|
1810
|
+
return state.push(_DOUBLE)
|
|
1811
|
+
if opcode == _T.L2I:
|
|
1812
|
+
state, _ = state.pop(2)
|
|
1813
|
+
|
|
1814
|
+
return state.push(_INTEGER)
|
|
1815
|
+
if opcode == _T.L2F:
|
|
1816
|
+
state, _ = state.pop(2)
|
|
1817
|
+
|
|
1818
|
+
return state.push(_FLOAT)
|
|
1819
|
+
if opcode == _T.L2D:
|
|
1820
|
+
state, _ = state.pop(2)
|
|
1821
|
+
|
|
1822
|
+
return state.push(_DOUBLE)
|
|
1823
|
+
if opcode == _T.F2I:
|
|
1824
|
+
state, _ = state.pop(1)
|
|
1825
|
+
|
|
1826
|
+
return state.push(_INTEGER)
|
|
1827
|
+
if opcode == _T.F2L:
|
|
1828
|
+
state, _ = state.pop(1)
|
|
1829
|
+
|
|
1830
|
+
return state.push(_LONG)
|
|
1831
|
+
if opcode == _T.F2D:
|
|
1832
|
+
state, _ = state.pop(1)
|
|
1833
|
+
|
|
1834
|
+
return state.push(_DOUBLE)
|
|
1835
|
+
if opcode == _T.D2I:
|
|
1836
|
+
state, _ = state.pop(2)
|
|
1837
|
+
|
|
1838
|
+
return state.push(_INTEGER)
|
|
1839
|
+
if opcode == _T.D2L:
|
|
1840
|
+
state, _ = state.pop(2)
|
|
1841
|
+
|
|
1842
|
+
return state.push(_LONG)
|
|
1843
|
+
if opcode == _T.D2F:
|
|
1844
|
+
state, _ = state.pop(2)
|
|
1845
|
+
|
|
1846
|
+
return state.push(_FLOAT)
|
|
1847
|
+
if opcode in {_T.I2B, _T.I2C, _T.I2S}:
|
|
1848
|
+
state, _ = state.pop(1)
|
|
1849
|
+
|
|
1850
|
+
return state.push(_INTEGER)
|
|
1851
|
+
|
|
1852
|
+
# --- Comparisons ---
|
|
1853
|
+
if opcode == _T.LCMP:
|
|
1854
|
+
state, _ = state.pop(4)
|
|
1855
|
+
|
|
1856
|
+
return state.push(_INTEGER)
|
|
1857
|
+
if opcode in {_T.FCMPL, _T.FCMPG}:
|
|
1858
|
+
state, _ = state.pop(2)
|
|
1859
|
+
|
|
1860
|
+
return state.push(_INTEGER)
|
|
1861
|
+
if opcode in {_T.DCMPL, _T.DCMPG}:
|
|
1862
|
+
state, _ = state.pop(4)
|
|
1863
|
+
|
|
1864
|
+
return state.push(_INTEGER)
|
|
1865
|
+
|
|
1866
|
+
# --- Array loads ---
|
|
1867
|
+
if opcode in {_T.IALOAD, _T.BALOAD, _T.CALOAD, _T.SALOAD}:
|
|
1868
|
+
state, _ = state.pop(2)
|
|
1869
|
+
|
|
1870
|
+
return state.push(_INTEGER)
|
|
1871
|
+
if opcode == _T.LALOAD:
|
|
1872
|
+
state, _ = state.pop(2)
|
|
1873
|
+
|
|
1874
|
+
return state.push(_LONG)
|
|
1875
|
+
if opcode == _T.FALOAD:
|
|
1876
|
+
state, _ = state.pop(2)
|
|
1877
|
+
|
|
1878
|
+
return state.push(_FLOAT)
|
|
1879
|
+
if opcode == _T.DALOAD:
|
|
1880
|
+
state, _ = state.pop(2)
|
|
1881
|
+
|
|
1882
|
+
return state.push(_DOUBLE)
|
|
1883
|
+
if opcode == _T.AALOAD:
|
|
1884
|
+
state, (_, arrayref) = state.pop(2)
|
|
1885
|
+
# Try to determine component type from array reference.
|
|
1886
|
+
if isinstance(arrayref, VObject) and arrayref.class_name.startswith("["):
|
|
1887
|
+
component = arrayref.class_name[1:]
|
|
1888
|
+
if component.startswith("L") and component.endswith(";"):
|
|
1889
|
+
return state.push(VObject(component[1:-1]))
|
|
1890
|
+
elif component.startswith("["):
|
|
1891
|
+
return state.push(VObject(component))
|
|
1892
|
+
# Primitive component (e.g. "[I") — invalid bytecode for AALOAD
|
|
1893
|
+
# (should use IALOAD/FALOAD/etc.), fall through to Object default.
|
|
1894
|
+
return state.push(_OBJECT_OBJECT)
|
|
1895
|
+
|
|
1896
|
+
# --- Array stores ---
|
|
1897
|
+
if opcode in {_T.IASTORE, _T.BASTORE, _T.CASTORE, _T.SASTORE, _T.FASTORE, _T.AASTORE}:
|
|
1898
|
+
state, _ = state.pop(3)
|
|
1899
|
+
|
|
1900
|
+
return state
|
|
1901
|
+
if opcode in {_T.LASTORE, _T.DASTORE}:
|
|
1902
|
+
state, _ = state.pop(4)
|
|
1903
|
+
|
|
1904
|
+
return state
|
|
1905
|
+
|
|
1906
|
+
# --- Returns ---
|
|
1907
|
+
if opcode in {_T.IRETURN, _T.FRETURN, _T.ARETURN}:
|
|
1908
|
+
state, _ = state.pop(1)
|
|
1909
|
+
|
|
1910
|
+
return state
|
|
1911
|
+
if opcode in {_T.LRETURN, _T.DRETURN}:
|
|
1912
|
+
state, _ = state.pop(2)
|
|
1913
|
+
|
|
1914
|
+
return state
|
|
1915
|
+
if opcode == _T.RETURN:
|
|
1916
|
+
return state
|
|
1917
|
+
|
|
1918
|
+
# --- ATHROW ---
|
|
1919
|
+
if opcode == _T.ATHROW:
|
|
1920
|
+
state, _ = state.pop(1)
|
|
1921
|
+
|
|
1922
|
+
return state
|
|
1923
|
+
|
|
1924
|
+
# --- Monitor ---
|
|
1925
|
+
if opcode in {_T.MONITORENTER, _T.MONITOREXIT}:
|
|
1926
|
+
state, _ = state.pop(1)
|
|
1927
|
+
|
|
1928
|
+
return state
|
|
1929
|
+
|
|
1930
|
+
# --- Array length ---
|
|
1931
|
+
if opcode == _T.ARRAYLENGTH:
|
|
1932
|
+
state, _ = state.pop(1)
|
|
1933
|
+
|
|
1934
|
+
return state.push(_INTEGER)
|
|
1935
|
+
|
|
1936
|
+
# --- NEWARRAY ---
|
|
1937
|
+
if opcode == _T.NEWARRAY:
|
|
1938
|
+
state, _ = state.pop(1) # pop count
|
|
1939
|
+
from .instructions import NewArray as NewArrayInsn
|
|
1940
|
+
|
|
1941
|
+
if isinstance(insn, NewArrayInsn):
|
|
1942
|
+
array_desc = _NEWARRAY_TYPE_MAP.get(insn.atype, "[I")
|
|
1943
|
+
return state.push(VObject(array_desc))
|
|
1944
|
+
return state.push(VObject("[I"))
|
|
1945
|
+
|
|
1946
|
+
# --- NOP / IINC / WIDE ---
|
|
1947
|
+
if opcode in {_T.NOP, _T.IINC, _T.IINCW, _T.WIDE}:
|
|
1948
|
+
return state
|
|
1949
|
+
|
|
1950
|
+
# --- JSR (pushes return address) ---
|
|
1951
|
+
if opcode in {_T.JSR, _T.JSR_W}:
|
|
1952
|
+
return state.push(_INTEGER) # return address (treated as integer for simplicity)
|
|
1953
|
+
|
|
1954
|
+
# --- RET ---
|
|
1955
|
+
if opcode in {_T.RET, _T.RETW}:
|
|
1956
|
+
return state
|
|
1957
|
+
|
|
1958
|
+
# --- Raw load/store opcodes (when not lifted to VarInsn) ---
|
|
1959
|
+
# These shouldn't appear in editing model code, but handle gracefully.
|
|
1960
|
+
if opcode in {_T.ILOAD, _T.ILOAD_0, _T.ILOAD_1, _T.ILOAD_2, _T.ILOAD_3, _T.ILOADW}:
|
|
1961
|
+
return state.push(_INTEGER)
|
|
1962
|
+
if opcode in {_T.LLOAD, _T.LLOAD_0, _T.LLOAD_1, _T.LLOAD_2, _T.LLOAD_3, _T.LLOADW}:
|
|
1963
|
+
return state.push(_LONG)
|
|
1964
|
+
if opcode in {_T.FLOAD, _T.FLOAD_0, _T.FLOAD_1, _T.FLOAD_2, _T.FLOAD_3, _T.FLOADW}:
|
|
1965
|
+
return state.push(_FLOAT)
|
|
1966
|
+
if opcode in {_T.DLOAD, _T.DLOAD_0, _T.DLOAD_1, _T.DLOAD_2, _T.DLOAD_3, _T.DLOADW}:
|
|
1967
|
+
return state.push(_DOUBLE)
|
|
1968
|
+
if opcode in {_T.ALOAD, _T.ALOAD_0, _T.ALOAD_1, _T.ALOAD_2, _T.ALOAD_3, _T.ALOADW}:
|
|
1969
|
+
return state.push(_OBJECT_OBJECT)
|
|
1970
|
+
if opcode in {_T.ISTORE, _T.ISTORE_0, _T.ISTORE_1, _T.ISTORE_2, _T.ISTORE_3, _T.ISTOREW}:
|
|
1971
|
+
state, _ = state.pop(1)
|
|
1972
|
+
|
|
1973
|
+
return state
|
|
1974
|
+
if opcode in {_T.LSTORE, _T.LSTORE_0, _T.LSTORE_1, _T.LSTORE_2, _T.LSTORE_3, _T.LSTOREW}:
|
|
1975
|
+
state, _ = state.pop(2)
|
|
1976
|
+
|
|
1977
|
+
return state
|
|
1978
|
+
if opcode in {_T.FSTORE, _T.FSTORE_0, _T.FSTORE_1, _T.FSTORE_2, _T.FSTORE_3, _T.FSTOREW}:
|
|
1979
|
+
state, _ = state.pop(1)
|
|
1980
|
+
|
|
1981
|
+
return state
|
|
1982
|
+
if opcode in {_T.DSTORE, _T.DSTORE_0, _T.DSTORE_1, _T.DSTORE_2, _T.DSTORE_3, _T.DSTOREW}:
|
|
1983
|
+
state, _ = state.pop(2)
|
|
1984
|
+
|
|
1985
|
+
return state
|
|
1986
|
+
if opcode in {_T.ASTORE, _T.ASTORE_0, _T.ASTORE_1, _T.ASTORE_2, _T.ASTORE_3, _T.ASTOREW}:
|
|
1987
|
+
state, _ = state.pop(1)
|
|
1988
|
+
|
|
1989
|
+
return state
|
|
1990
|
+
|
|
1991
|
+
# Unrecognized opcode — conservative no-op.
|
|
1992
|
+
return state
|
|
1993
|
+
|
|
1994
|
+
|
|
1995
|
+
def _replace_uninitialized(
|
|
1996
|
+
state: FrameState,
|
|
1997
|
+
uninit: VUninitialized | VUninitializedThis,
|
|
1998
|
+
replacement: VObject,
|
|
1999
|
+
) -> FrameState:
|
|
2000
|
+
"""Replace all occurrences of *uninit* in the frame with *replacement*.
|
|
2001
|
+
|
|
2002
|
+
After a successful ``<init>`` call, all references to the uninitialized
|
|
2003
|
+
object (on the stack and in locals) must be replaced with the initialized
|
|
2004
|
+
type (JVM spec §4.10.1.4).
|
|
2005
|
+
"""
|
|
2006
|
+
new_stack = tuple(replacement if v == uninit else v for v in state.stack)
|
|
2007
|
+
new_locals = tuple(replacement if v == uninit else v for v in state.locals)
|
|
2008
|
+
return FrameState(new_stack, new_locals)
|
|
2009
|
+
|
|
2010
|
+
|
|
2011
|
+
def _prepare_analysis_code(code: CodeModel) -> CodeModel:
|
|
2012
|
+
"""Insert transient labels before unlabeled ``NEW`` instructions."""
|
|
2013
|
+
prepared_items: list[CodeItem] = []
|
|
2014
|
+
inserted = False
|
|
2015
|
+
prev_was_label = False
|
|
2016
|
+
|
|
2017
|
+
for item in code.instructions:
|
|
2018
|
+
if isinstance(item, Label):
|
|
2019
|
+
prepared_items.append(item)
|
|
2020
|
+
prev_was_label = True
|
|
2021
|
+
continue
|
|
2022
|
+
|
|
2023
|
+
if isinstance(item, TypeInsn) and item.type == _T.NEW and not prev_was_label:
|
|
2024
|
+
prepared_items.append(Label())
|
|
2025
|
+
inserted = True
|
|
2026
|
+
prepared_items.append(item)
|
|
2027
|
+
prev_was_label = False
|
|
2028
|
+
|
|
2029
|
+
if not inserted:
|
|
2030
|
+
return code
|
|
2031
|
+
|
|
2032
|
+
return type(code)(
|
|
2033
|
+
max_stack=code.max_stack,
|
|
2034
|
+
max_locals=code.max_locals,
|
|
2035
|
+
instructions=prepared_items,
|
|
2036
|
+
exception_handlers=code.exception_handlers,
|
|
2037
|
+
line_numbers=code.line_numbers,
|
|
2038
|
+
local_variables=code.local_variables,
|
|
2039
|
+
local_variable_types=code.local_variable_types,
|
|
2040
|
+
attributes=code.attributes,
|
|
2041
|
+
)
|
|
2042
|
+
|
|
2043
|
+
|
|
2044
|
+
def _find_label_for_insn(code: CodeModel, target_insn: InsnInfo) -> Label | None:
|
|
2045
|
+
"""Find the Label immediately preceding *target_insn* in the code.
|
|
2046
|
+
|
|
2047
|
+
Returns ``None`` if no label precedes the instruction. Simulation calls
|
|
2048
|
+
this on the analysis-prepared instruction stream, where unlabeled
|
|
2049
|
+
``NEW`` instructions have already been given a synthetic label.
|
|
2050
|
+
"""
|
|
2051
|
+
prev_label: Label | None = None
|
|
2052
|
+
for item in code.instructions:
|
|
2053
|
+
if item is target_insn:
|
|
2054
|
+
return prev_label
|
|
2055
|
+
if isinstance(item, Label):
|
|
2056
|
+
prev_label = item
|
|
2057
|
+
else:
|
|
2058
|
+
prev_label = None
|
|
2059
|
+
return None
|
|
2060
|
+
|
|
2061
|
+
|
|
2062
|
+
# ===================================================================
|
|
2063
|
+
# VType → VerificationTypeInfo conversion
|
|
2064
|
+
# ===================================================================
|
|
2065
|
+
|
|
2066
|
+
|
|
2067
|
+
def _vtype_to_vti(
|
|
2068
|
+
vtype: VType,
|
|
2069
|
+
cp: ConstantPoolBuilder,
|
|
2070
|
+
label_offsets: dict[Label, int],
|
|
2071
|
+
) -> VerificationTypeInfo:
|
|
2072
|
+
"""Convert a verification type to a raw ``VerificationTypeInfo``."""
|
|
2073
|
+
if isinstance(vtype, VTop):
|
|
2074
|
+
return TopVariableInfo(VerificationType.TOP)
|
|
2075
|
+
if isinstance(vtype, VInteger):
|
|
2076
|
+
return IntegerVariableInfo(VerificationType.INTEGER)
|
|
2077
|
+
if isinstance(vtype, VFloat):
|
|
2078
|
+
return FloatVariableInfo(VerificationType.FLOAT)
|
|
2079
|
+
if isinstance(vtype, VLong):
|
|
2080
|
+
return LongVariableInfo(VerificationType.LONG)
|
|
2081
|
+
if isinstance(vtype, VDouble):
|
|
2082
|
+
return DoubleVariableInfo(VerificationType.DOUBLE)
|
|
2083
|
+
if isinstance(vtype, VNull):
|
|
2084
|
+
return NullVariableInfo(VerificationType.NULL)
|
|
2085
|
+
if isinstance(vtype, VUninitializedThis):
|
|
2086
|
+
return UninitializedThisVariableInfo(VerificationType.UNINITIALIZED_THIS)
|
|
2087
|
+
if isinstance(vtype, VObject):
|
|
2088
|
+
return ObjectVariableInfo(VerificationType.OBJECT, cp.add_class(vtype.class_name))
|
|
2089
|
+
# VUninitialized
|
|
2090
|
+
offset = label_offsets.get(vtype.new_label)
|
|
2091
|
+
if offset is None:
|
|
2092
|
+
raise ValueError(f"missing bytecode offset for uninitialized NEW site {vtype.new_label!r}")
|
|
2093
|
+
return UninitializedVariableInfo(VerificationType.UNINITIALIZED, offset)
|
|
2094
|
+
|
|
2095
|
+
|
|
2096
|
+
def _vtypes_to_vtis(
|
|
2097
|
+
vtypes: tuple[VType, ...],
|
|
2098
|
+
cp: ConstantPoolBuilder,
|
|
2099
|
+
label_offsets: dict[Label, int],
|
|
2100
|
+
) -> list[VerificationTypeInfo]:
|
|
2101
|
+
"""Convert a tuple of verification types to raw ``VerificationTypeInfo`` list."""
|
|
2102
|
+
return [_vtype_to_vti(vt, cp, label_offsets) for vt in vtypes]
|
|
2103
|
+
|
|
2104
|
+
|
|
2105
|
+
def _verification_type_info_size(vti: VerificationTypeInfo) -> int:
|
|
2106
|
+
"""Return the serialized size of a ``verification_type_info``."""
|
|
2107
|
+
if isinstance(vti, ObjectVariableInfo | UninitializedVariableInfo):
|
|
2108
|
+
return 3
|
|
2109
|
+
return 1
|
|
2110
|
+
|
|
2111
|
+
|
|
2112
|
+
def _stack_map_frame_size(frame: StackMapFrameInfo) -> int:
|
|
2113
|
+
"""Return the serialized size of a ``stack_map_frame``."""
|
|
2114
|
+
if isinstance(frame, SameFrameInfo):
|
|
2115
|
+
return 1
|
|
2116
|
+
if isinstance(frame, SameLocals1StackItemFrameInfo):
|
|
2117
|
+
return 1 + _verification_type_info_size(frame.stack)
|
|
2118
|
+
if isinstance(frame, SameLocals1StackItemFrameExtendedInfo):
|
|
2119
|
+
return 3 + _verification_type_info_size(frame.stack)
|
|
2120
|
+
if isinstance(frame, ChopFrameInfo | SameFrameExtendedInfo):
|
|
2121
|
+
return 3
|
|
2122
|
+
if isinstance(frame, AppendFrameInfo):
|
|
2123
|
+
return 3 + sum(_verification_type_info_size(vti) for vti in frame.locals)
|
|
2124
|
+
if isinstance(frame, FullFrameInfo):
|
|
2125
|
+
return (
|
|
2126
|
+
7
|
|
2127
|
+
+ sum(_verification_type_info_size(vti) for vti in frame.locals)
|
|
2128
|
+
+ sum(_verification_type_info_size(vti) for vti in frame.stack)
|
|
2129
|
+
)
|
|
2130
|
+
raise TypeError(f"unsupported stack map frame type: {type(frame).__name__}")
|
|
2131
|
+
|
|
2132
|
+
|
|
2133
|
+
def _stack_map_table_attribute_length(frames: Sequence[StackMapFrameInfo]) -> int:
|
|
2134
|
+
"""Return the serialized ``attribute_length`` for a ``StackMapTable``."""
|
|
2135
|
+
return 2 + sum(_stack_map_frame_size(frame) for frame in frames)
|
|
2136
|
+
|
|
2137
|
+
|
|
2138
|
+
# ===================================================================
|
|
2139
|
+
# Compact frame encoding selection
|
|
2140
|
+
# ===================================================================
|
|
2141
|
+
|
|
2142
|
+
|
|
2143
|
+
def _select_frame(
|
|
2144
|
+
offset_delta: int,
|
|
2145
|
+
prev_locals: Sequence[VerificationTypeInfo],
|
|
2146
|
+
curr_locals: Sequence[VerificationTypeInfo],
|
|
2147
|
+
curr_stack: Sequence[VerificationTypeInfo],
|
|
2148
|
+
) -> StackMapFrameInfo:
|
|
2149
|
+
"""Select the most compact StackMapTable frame encoding.
|
|
2150
|
+
|
|
2151
|
+
Follows JVM spec §4.7.4 frame type selection rules.
|
|
2152
|
+
"""
|
|
2153
|
+
locals_same = prev_locals == curr_locals
|
|
2154
|
+
|
|
2155
|
+
if locals_same and not curr_stack:
|
|
2156
|
+
# same_frame or same_frame_extended
|
|
2157
|
+
if offset_delta <= 63:
|
|
2158
|
+
return SameFrameInfo(frame_type=offset_delta)
|
|
2159
|
+
return SameFrameExtendedInfo(frame_type=251, offset_delta=offset_delta)
|
|
2160
|
+
|
|
2161
|
+
if locals_same and len(curr_stack) == 1:
|
|
2162
|
+
# same_locals_1_stack_item or extended variant
|
|
2163
|
+
if offset_delta <= 63:
|
|
2164
|
+
return SameLocals1StackItemFrameInfo(
|
|
2165
|
+
frame_type=64 + offset_delta,
|
|
2166
|
+
stack=curr_stack[0],
|
|
2167
|
+
)
|
|
2168
|
+
return SameLocals1StackItemFrameExtendedInfo(
|
|
2169
|
+
frame_type=247,
|
|
2170
|
+
offset_delta=offset_delta,
|
|
2171
|
+
stack=curr_stack[0],
|
|
2172
|
+
)
|
|
2173
|
+
|
|
2174
|
+
if not curr_stack:
|
|
2175
|
+
diff = len(curr_locals) - len(prev_locals)
|
|
2176
|
+
|
|
2177
|
+
# chop_frame: 1–3 fewer locals
|
|
2178
|
+
if -3 <= diff < 0 and curr_locals == prev_locals[: len(curr_locals)]:
|
|
2179
|
+
return ChopFrameInfo(
|
|
2180
|
+
frame_type=251 + diff, # 248, 249, or 250
|
|
2181
|
+
offset_delta=offset_delta,
|
|
2182
|
+
)
|
|
2183
|
+
|
|
2184
|
+
# append_frame: 1–3 more locals
|
|
2185
|
+
if 0 < diff <= 3 and curr_locals[: len(prev_locals)] == prev_locals:
|
|
2186
|
+
return AppendFrameInfo(
|
|
2187
|
+
frame_type=251 + diff, # 252, 253, or 254
|
|
2188
|
+
offset_delta=offset_delta,
|
|
2189
|
+
locals=list(curr_locals[len(prev_locals) :]),
|
|
2190
|
+
)
|
|
2191
|
+
|
|
2192
|
+
# full_frame
|
|
2193
|
+
return FullFrameInfo(
|
|
2194
|
+
frame_type=255,
|
|
2195
|
+
offset_delta=offset_delta,
|
|
2196
|
+
number_of_locals=len(curr_locals),
|
|
2197
|
+
locals=list(curr_locals),
|
|
2198
|
+
number_of_stack_items=len(curr_stack),
|
|
2199
|
+
stack=list(curr_stack),
|
|
2200
|
+
)
|
|
2201
|
+
|
|
2202
|
+
|
|
2203
|
+
# ===================================================================
|
|
2204
|
+
# compute_maxs / compute_frames — public API
|
|
2205
|
+
# ===================================================================
|
|
2206
|
+
|
|
2207
|
+
|
|
2208
|
+
def compute_maxs(
|
|
2209
|
+
code: CodeModel,
|
|
2210
|
+
method: MethodModel,
|
|
2211
|
+
class_name: str,
|
|
2212
|
+
resolver: ClassResolver | None = None,
|
|
2213
|
+
) -> tuple[int, int]:
|
|
2214
|
+
"""Recompute ``max_stack`` and ``max_locals`` for a method's code.
|
|
2215
|
+
|
|
2216
|
+
Builds a control-flow graph, runs forward dataflow simulation, and
|
|
2217
|
+
returns ``(max_stack, max_locals)``.
|
|
2218
|
+
|
|
2219
|
+
Args:
|
|
2220
|
+
code: The code model to analyze.
|
|
2221
|
+
method: The method model (used for initial frame).
|
|
2222
|
+
class_name: JVM internal name of the enclosing class.
|
|
2223
|
+
resolver: Optional class hierarchy resolver for precise type merging.
|
|
2224
|
+
|
|
2225
|
+
Returns:
|
|
2226
|
+
A ``(max_stack, max_locals)`` tuple.
|
|
2227
|
+
"""
|
|
2228
|
+
cfg = build_cfg(code)
|
|
2229
|
+
result = simulate(cfg, code, method, class_name, resolver)
|
|
2230
|
+
return result.max_stack, result.max_locals
|
|
2231
|
+
|
|
2232
|
+
|
|
2233
|
+
@dataclass(frozen=True, slots=True)
|
|
2234
|
+
class FrameComputationResult:
|
|
2235
|
+
"""Results of frame computation: limits and StackMapTable.
|
|
2236
|
+
|
|
2237
|
+
Attributes:
|
|
2238
|
+
max_stack: Recomputed maximum operand stack depth.
|
|
2239
|
+
max_locals: Recomputed maximum local variable slot count.
|
|
2240
|
+
stack_map_table: Generated ``StackMapTable`` attribute, or ``None``
|
|
2241
|
+
when no frames are required (e.g. a linear method with no
|
|
2242
|
+
branches or exception handlers).
|
|
2243
|
+
"""
|
|
2244
|
+
|
|
2245
|
+
max_stack: int
|
|
2246
|
+
max_locals: int
|
|
2247
|
+
stack_map_table: StackMapTableAttr | None
|
|
2248
|
+
|
|
2249
|
+
|
|
2250
|
+
def compute_frames(
|
|
2251
|
+
code: CodeModel,
|
|
2252
|
+
method: MethodModel,
|
|
2253
|
+
class_name: str,
|
|
2254
|
+
cp: ConstantPoolBuilder,
|
|
2255
|
+
label_offsets: dict[Label, int],
|
|
2256
|
+
resolver: ClassResolver | None = None,
|
|
2257
|
+
) -> FrameComputationResult:
|
|
2258
|
+
"""Recompute ``max_stack``, ``max_locals``, and ``StackMapTable`` frames.
|
|
2259
|
+
|
|
2260
|
+
Builds a CFG, simulates stack/local states, then generates compact
|
|
2261
|
+
StackMapTable entries at every branch/exception-handler target
|
|
2262
|
+
(JVM spec §4.7.4).
|
|
2263
|
+
|
|
2264
|
+
Args:
|
|
2265
|
+
code: The ``CodeModel`` whose frames to compute.
|
|
2266
|
+
method: The ``MethodModel`` owning this code (used for initial frame).
|
|
2267
|
+
class_name: Internal name of the enclosing class
|
|
2268
|
+
(e.g. ``"com/example/Foo"``).
|
|
2269
|
+
cp: ``ConstantPoolBuilder`` for allocating ``CONSTANT_Class`` entries
|
|
2270
|
+
referenced by ``ObjectVariableInfo``.
|
|
2271
|
+
label_offsets: Mapping from ``Label`` to resolved bytecode offset,
|
|
2272
|
+
as produced by ``resolve_labels()``.
|
|
2273
|
+
resolver: Optional class hierarchy resolver for precise type merging.
|
|
2274
|
+
|
|
2275
|
+
Returns:
|
|
2276
|
+
A ``FrameComputationResult`` with ``max_stack``, ``max_locals``, and
|
|
2277
|
+
an optional ``StackMapTableAttr`` (``None`` if no frames are needed).
|
|
2278
|
+
"""
|
|
2279
|
+
analysis_code = _prepare_analysis_code(code)
|
|
2280
|
+
analysis_label_offsets = label_offsets
|
|
2281
|
+
if analysis_code is not code:
|
|
2282
|
+
from .labels import resolve_labels
|
|
2283
|
+
|
|
2284
|
+
analysis_label_offsets = resolve_labels(list(analysis_code.instructions), cp).label_offsets
|
|
2285
|
+
|
|
2286
|
+
cfg = build_cfg(code)
|
|
2287
|
+
sim = simulate(cfg, analysis_code, method, class_name, resolver)
|
|
2288
|
+
|
|
2289
|
+
if not cfg.blocks:
|
|
2290
|
+
return FrameComputationResult(
|
|
2291
|
+
max_stack=sim.max_stack,
|
|
2292
|
+
max_locals=sim.max_locals,
|
|
2293
|
+
stack_map_table=None,
|
|
2294
|
+
)
|
|
2295
|
+
|
|
2296
|
+
# Identify blocks that need frames: every block except the entry block
|
|
2297
|
+
# that has an entry state (i.e., is reachable).
|
|
2298
|
+
entry_block_id = cfg.entry.id
|
|
2299
|
+
frame_targets: list[tuple[int, int]] = [] # (bytecode_offset, block_id)
|
|
2300
|
+
for block in cfg.blocks:
|
|
2301
|
+
if block.id == entry_block_id:
|
|
2302
|
+
continue
|
|
2303
|
+
if block.id not in sim.entry_states:
|
|
2304
|
+
continue
|
|
2305
|
+
if block.label is None:
|
|
2306
|
+
continue
|
|
2307
|
+
offset = label_offsets.get(block.label)
|
|
2308
|
+
if offset is None:
|
|
2309
|
+
continue
|
|
2310
|
+
frame_targets.append((offset, block.id))
|
|
2311
|
+
|
|
2312
|
+
frame_targets.sort(key=lambda t: t[0])
|
|
2313
|
+
|
|
2314
|
+
if not frame_targets:
|
|
2315
|
+
return FrameComputationResult(
|
|
2316
|
+
max_stack=sim.max_stack,
|
|
2317
|
+
max_locals=sim.max_locals,
|
|
2318
|
+
stack_map_table=None,
|
|
2319
|
+
)
|
|
2320
|
+
|
|
2321
|
+
# Build the initial frame locals as the "previous" frame for delta computation.
|
|
2322
|
+
entry_frame = initial_frame(method, class_name)
|
|
2323
|
+
prev_locals = _vtypes_to_vtis(entry_frame.locals, cp, analysis_label_offsets)
|
|
2324
|
+
prev_offset = -1 # offset_delta for the first frame is (offset - 0)
|
|
2325
|
+
|
|
2326
|
+
frames: list[StackMapFrameInfo] = []
|
|
2327
|
+
for offset, block_id in frame_targets:
|
|
2328
|
+
state = sim.entry_states[block_id]
|
|
2329
|
+
curr_locals = _vtypes_to_vtis(state.locals, cp, analysis_label_offsets)
|
|
2330
|
+
curr_stack = _vtypes_to_vtis(state.stack, cp, analysis_label_offsets)
|
|
2331
|
+
|
|
2332
|
+
# offset_delta = offset - prev_offset - 1 for the first frame,
|
|
2333
|
+
# and offset - prev_offset - 1 for subsequent frames.
|
|
2334
|
+
offset_delta = offset - prev_offset - 1
|
|
2335
|
+
|
|
2336
|
+
frame = _select_frame(offset_delta, prev_locals, curr_locals, curr_stack)
|
|
2337
|
+
frames.append(frame)
|
|
2338
|
+
|
|
2339
|
+
prev_locals = curr_locals
|
|
2340
|
+
prev_offset = offset
|
|
2341
|
+
|
|
2342
|
+
stack_map_table = StackMapTableAttr(
|
|
2343
|
+
attribute_name_index=cp.add_utf8("StackMapTable"),
|
|
2344
|
+
attribute_length=_stack_map_table_attribute_length(frames),
|
|
2345
|
+
number_of_entries=len(frames),
|
|
2346
|
+
entries=frames,
|
|
2347
|
+
)
|
|
2348
|
+
|
|
2349
|
+
return FrameComputationResult(
|
|
2350
|
+
max_stack=sim.max_stack,
|
|
2351
|
+
max_locals=sim.max_locals,
|
|
2352
|
+
stack_map_table=stack_map_table,
|
|
2353
|
+
)
|
|
2354
|
+
|
|
2355
|
+
|
|
2356
|
+
# ===================================================================
|
|
2357
|
+
# Public API
|
|
2358
|
+
# ===================================================================
|
|
2359
|
+
|
|
2360
|
+
|
|
2361
|
+
__all__ = [
|
|
2362
|
+
# Errors
|
|
2363
|
+
"AnalysisError",
|
|
2364
|
+
"InvalidLocalError",
|
|
2365
|
+
"StackUnderflowError",
|
|
2366
|
+
"TypeMergeError",
|
|
2367
|
+
# Verification types
|
|
2368
|
+
"VDouble",
|
|
2369
|
+
"VFloat",
|
|
2370
|
+
"VInteger",
|
|
2371
|
+
"VLong",
|
|
2372
|
+
"VNull",
|
|
2373
|
+
"VObject",
|
|
2374
|
+
"VTop",
|
|
2375
|
+
"VType",
|
|
2376
|
+
"VUninitialized",
|
|
2377
|
+
"VUninitializedThis",
|
|
2378
|
+
# VType helpers
|
|
2379
|
+
"is_category2",
|
|
2380
|
+
"is_reference",
|
|
2381
|
+
"merge_vtypes",
|
|
2382
|
+
"vtype_from_descriptor",
|
|
2383
|
+
"vtype_from_field_descriptor_str",
|
|
2384
|
+
# Frame state
|
|
2385
|
+
"FrameState",
|
|
2386
|
+
"initial_frame",
|
|
2387
|
+
# Opcode metadata
|
|
2388
|
+
"OPCODE_EFFECTS",
|
|
2389
|
+
"OpcodeEffect",
|
|
2390
|
+
# CFG
|
|
2391
|
+
"BasicBlock",
|
|
2392
|
+
"ControlFlowGraph",
|
|
2393
|
+
"ExceptionEdge",
|
|
2394
|
+
"build_cfg",
|
|
2395
|
+
# Simulation
|
|
2396
|
+
"SimulationResult",
|
|
2397
|
+
"simulate",
|
|
2398
|
+
# Frame computation
|
|
2399
|
+
"FrameComputationResult",
|
|
2400
|
+
"compute_frames",
|
|
2401
|
+
"compute_maxs",
|
|
2402
|
+
]
|