pytecode 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pytecode/analysis.py ADDED
@@ -0,0 +1,2402 @@
1
+ """Control-flow graph construction and stack/local simulation.
2
+
3
+ Provides analysis infrastructure for JVM bytecode in the editing model:
4
+
5
+ - **Verification type system** (``VType``) mirroring JVM spec §4.10.1.2
6
+ - **Control-flow graph** construction from ``CodeModel`` instructions
7
+ - **Stack and local variable simulation** with forward dataflow analysis
8
+ - **Frame recomputation** for ``max_stack``, ``max_locals``, and ``StackMapTable``
9
+ - **Type merging** at control-flow join points using the class hierarchy
10
+
11
+ All result types are frozen dataclasses — safe to share across threads.
12
+ The module operates on the symbolic editing model (``CodeModel``) so it
13
+ benefits from label-based branch targets, symbolic operands, and
14
+ exception handlers already bound to labels.
15
+
16
+ References:
17
+ JVM spec §4.7.4 — StackMapTable attribute format.
18
+ JVM spec §4.10.1 — Verification by type checking.
19
+ JVM spec §4.10.1.2 — Verification type system and type merging rules.
20
+ JVM spec §6.5 — Individual opcode definitions (stack effects).
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from collections import deque
26
+ from collections.abc import Sequence
27
+ from dataclasses import dataclass
28
+ from typing import TYPE_CHECKING
29
+
30
+ from .attributes import (
31
+ AppendFrameInfo,
32
+ ChopFrameInfo,
33
+ DoubleVariableInfo,
34
+ FloatVariableInfo,
35
+ FullFrameInfo,
36
+ IntegerVariableInfo,
37
+ LongVariableInfo,
38
+ NullVariableInfo,
39
+ ObjectVariableInfo,
40
+ SameFrameExtendedInfo,
41
+ SameFrameInfo,
42
+ SameLocals1StackItemFrameExtendedInfo,
43
+ SameLocals1StackItemFrameInfo,
44
+ StackMapFrameInfo,
45
+ StackMapTableAttr,
46
+ TopVariableInfo,
47
+ UninitializedThisVariableInfo,
48
+ UninitializedVariableInfo,
49
+ VerificationTypeInfo,
50
+ )
51
+ from .constants import VerificationType
52
+ from .descriptors import (
53
+ ArrayType as DescArrayType,
54
+ )
55
+ from .descriptors import (
56
+ BaseType,
57
+ FieldDescriptor,
58
+ ObjectType,
59
+ VoidType,
60
+ parse_field_descriptor,
61
+ parse_method_descriptor,
62
+ )
63
+ from .hierarchy import JAVA_LANG_OBJECT, common_superclass
64
+ from .instructions import (
65
+ ArrayType as InsnArrayType,
66
+ )
67
+ from .instructions import (
68
+ InsnInfo,
69
+ InsnInfoType,
70
+ )
71
+ from .labels import (
72
+ BranchInsn,
73
+ CodeItem,
74
+ ExceptionHandler,
75
+ Label,
76
+ LookupSwitchInsn,
77
+ TableSwitchInsn,
78
+ )
79
+ from .operands import (
80
+ FieldInsn,
81
+ IIncInsn,
82
+ InterfaceMethodInsn,
83
+ InvokeDynamicInsn,
84
+ LdcClass,
85
+ LdcDouble,
86
+ LdcFloat,
87
+ LdcInsn,
88
+ LdcInt,
89
+ LdcLong,
90
+ LdcMethodHandle,
91
+ LdcMethodType,
92
+ LdcString,
93
+ MethodInsn,
94
+ MultiANewArrayInsn,
95
+ TypeInsn,
96
+ VarInsn,
97
+ )
98
+
99
+ if TYPE_CHECKING:
100
+ from .constant_pool_builder import ConstantPoolBuilder
101
+ from .hierarchy import ClassResolver
102
+ from .model import CodeModel, MethodModel
103
+
104
+ # ===================================================================
105
+ # Analysis errors
106
+ # ===================================================================
107
+
108
+
109
+ class AnalysisError(Exception):
110
+ """Base class for control-flow and simulation errors."""
111
+
112
+
113
+ class StackUnderflowError(AnalysisError):
114
+ """Popped from empty or insufficiently deep stack."""
115
+
116
+
117
+ class InvalidLocalError(AnalysisError):
118
+ """Read from an uninitialized or out-of-bounds local variable slot."""
119
+
120
+
121
+ class TypeMergeError(AnalysisError):
122
+ """Incompatible types at a control-flow join point."""
123
+
124
+
125
+ # ===================================================================
126
+ # Verification type system (JVM spec §4.10.1.2)
127
+ # ===================================================================
128
+
129
+
130
+ @dataclass(frozen=True, slots=True)
131
+ class VTop:
132
+ """Top type — undefined or second slot of a category-2 value."""
133
+
134
+
135
+ @dataclass(frozen=True, slots=True)
136
+ class VInteger:
137
+ """Verification type for int, short, byte, char, boolean."""
138
+
139
+
140
+ @dataclass(frozen=True, slots=True)
141
+ class VFloat:
142
+ """Verification type for float."""
143
+
144
+
145
+ @dataclass(frozen=True, slots=True)
146
+ class VLong:
147
+ """Verification type for long (occupies 2 slots; second is TOP)."""
148
+
149
+
150
+ @dataclass(frozen=True, slots=True)
151
+ class VDouble:
152
+ """Verification type for double (occupies 2 slots; second is TOP)."""
153
+
154
+
155
+ @dataclass(frozen=True, slots=True)
156
+ class VNull:
157
+ """Verification type for null — assignable to any reference type."""
158
+
159
+
160
+ @dataclass(frozen=True, slots=True)
161
+ class VObject:
162
+ """Verification type for a reference to a class, interface, or array.
163
+
164
+ Attributes:
165
+ class_name: JVM internal name (e.g. ``"java/lang/String"`` or ``"[I"``).
166
+ """
167
+
168
+ class_name: str
169
+
170
+
171
+ @dataclass(frozen=True, slots=True)
172
+ class VUninitializedThis:
173
+ """Verification type for ``this`` before the super/this ``<init>`` call."""
174
+
175
+
176
+ @dataclass(frozen=True, slots=True)
177
+ class VUninitialized:
178
+ """Verification type for an object created by NEW before ``<init>``.
179
+
180
+ Analysis inserts synthetic labels for unlabeled ``NEW`` instructions so
181
+ edited code can still refer to allocation sites precisely.
182
+
183
+ Attributes:
184
+ new_label: Label identifying the NEW instruction that created this value.
185
+ """
186
+
187
+ new_label: Label
188
+
189
+
190
+ type VType = VTop | VInteger | VFloat | VLong | VDouble | VNull | VObject | VUninitializedThis | VUninitialized
191
+
192
+
193
+ # --- Singletons for stateless types ---
194
+
195
+ _TOP = VTop()
196
+ _INTEGER = VInteger()
197
+ _FLOAT = VFloat()
198
+ _LONG = VLong()
199
+ _DOUBLE = VDouble()
200
+ _NULL = VNull()
201
+ _UNINIT_THIS = VUninitializedThis()
202
+ _OBJECT_OBJECT = VObject(JAVA_LANG_OBJECT)
203
+ _OBJECT_STRING = VObject("java/lang/String")
204
+ _OBJECT_CLASS = VObject("java/lang/Class")
205
+ _OBJECT_METHOD_TYPE = VObject("java/lang/invoke/MethodType")
206
+ _OBJECT_METHOD_HANDLE = VObject("java/lang/invoke/MethodHandle")
207
+ _OBJECT_THROWABLE = VObject("java/lang/Throwable")
208
+
209
+ # ---------------------------------------------------------------------------
210
+ # VType helpers
211
+ # ---------------------------------------------------------------------------
212
+
213
+ # Map from NEWARRAY atype codes to the resulting array element descriptor.
214
+ _NEWARRAY_TYPE_MAP: dict[InsnArrayType, str] = {
215
+ InsnArrayType.BOOLEAN: "[Z",
216
+ InsnArrayType.CHAR: "[C",
217
+ InsnArrayType.FLOAT: "[F",
218
+ InsnArrayType.DOUBLE: "[D",
219
+ InsnArrayType.BYTE: "[B",
220
+ InsnArrayType.SHORT: "[S",
221
+ InsnArrayType.INT: "[I",
222
+ InsnArrayType.LONG: "[J",
223
+ }
224
+
225
+
226
+ def vtype_from_descriptor(fd: FieldDescriptor) -> VType:
227
+ """Convert a parsed field descriptor to a verification type."""
228
+ if isinstance(fd, BaseType):
229
+ if fd in {BaseType.INT, BaseType.SHORT, BaseType.BYTE, BaseType.CHAR, BaseType.BOOLEAN}:
230
+ return _INTEGER
231
+ if fd is BaseType.FLOAT:
232
+ return _FLOAT
233
+ if fd is BaseType.LONG:
234
+ return _LONG
235
+ if fd is BaseType.DOUBLE:
236
+ return _DOUBLE
237
+ if isinstance(fd, ObjectType):
238
+ return VObject(fd.class_name)
239
+ if isinstance(fd, DescArrayType):
240
+ return VObject(_descriptor_to_internal(fd))
241
+ raise ValueError(f"Unexpected descriptor type: {fd!r}") # pragma: no cover
242
+
243
+
244
+ def vtype_from_field_descriptor_str(desc: str) -> VType:
245
+ """Convert a raw field descriptor string to a verification type."""
246
+ return vtype_from_descriptor(parse_field_descriptor(desc))
247
+
248
+
249
+ def _descriptor_to_internal(fd: FieldDescriptor) -> str:
250
+ """Convert a FieldDescriptor to the JVM internal form used in VObject.class_name."""
251
+ if isinstance(fd, BaseType):
252
+ return fd.value
253
+ if isinstance(fd, ObjectType):
254
+ return fd.class_name
255
+ # fd must be DescArrayType at this point.
256
+ return "[" + _descriptor_to_component_string(fd.component_type)
257
+
258
+
259
+ def _descriptor_to_component_string(fd: FieldDescriptor) -> str:
260
+ """Return the JVM descriptor string for a component type."""
261
+ if isinstance(fd, BaseType):
262
+ return fd.value
263
+ if isinstance(fd, ObjectType):
264
+ return f"L{fd.class_name};"
265
+ # fd must be DescArrayType at this point.
266
+ return "[" + _descriptor_to_component_string(fd.component_type)
267
+
268
+
269
+ def is_category2(vt: VType) -> bool:
270
+ """Return ``True`` for long and double (category-2 computational types)."""
271
+ return isinstance(vt, VLong | VDouble)
272
+
273
+
274
+ def is_reference(vt: VType) -> bool:
275
+ """Return ``True`` for reference verification types (null, object, uninitialized)."""
276
+ return isinstance(vt, VNull | VObject | VUninitializedThis | VUninitialized)
277
+
278
+
279
+ def merge_vtypes(a: VType, b: VType, resolver: ClassResolver | None = None) -> VType:
280
+ """Merge two verification types at a control-flow join point.
281
+
282
+ Follows JVM spec §4.10.1.2 type merging rules:
283
+
284
+ - Identical types → same type
285
+ - Two ``VObject`` → ``VObject(common_superclass(...))``
286
+ - ``VNull`` + reference → the reference type
287
+ - Incompatible types → ``VTop``
288
+
289
+ Args:
290
+ a: First verification type.
291
+ b: Second verification type.
292
+ resolver: Optional class hierarchy resolver for precise object merging.
293
+
294
+ Returns:
295
+ The merged verification type.
296
+ """
297
+ if a == b:
298
+ return a
299
+
300
+ # VNull merges with any reference to yield the reference type.
301
+ if isinstance(a, VNull) and is_reference(b):
302
+ return b
303
+ if isinstance(b, VNull) and is_reference(a):
304
+ return a
305
+
306
+ # Two VObject references → common superclass.
307
+ if isinstance(a, VObject) and isinstance(b, VObject):
308
+ if resolver is not None:
309
+ try:
310
+ return VObject(common_superclass(resolver, a.class_name, b.class_name))
311
+ except Exception:
312
+ return _OBJECT_OBJECT
313
+ return _OBJECT_OBJECT
314
+
315
+ # Everything else is incompatible.
316
+ return _TOP
317
+
318
+
319
+ # ===================================================================
320
+ # Frame state
321
+ # ===================================================================
322
+
323
+
324
+ @dataclass(frozen=True, slots=True)
325
+ class FrameState:
326
+ """Immutable snapshot of the operand stack and local variable slots.
327
+
328
+ Category-2 values (long, double) occupy two consecutive slots — the
329
+ value itself followed by ``VTop``.
330
+
331
+ Attributes:
332
+ stack: Operand stack, ordered bottom-to-top.
333
+ locals: Local variable slots indexed by slot number; unset slots
334
+ are ``VTop``.
335
+ """
336
+
337
+ stack: tuple[VType, ...]
338
+ locals: tuple[VType, ...]
339
+
340
+ # -- Stack operations --
341
+
342
+ def push(self, *types: VType) -> FrameState:
343
+ """Push one or more types onto the stack (category-2 aware)."""
344
+ new_stack = list(self.stack)
345
+ for vt in types:
346
+ new_stack.append(vt)
347
+ if is_category2(vt):
348
+ new_stack.append(_TOP)
349
+ return FrameState(tuple(new_stack), self.locals)
350
+
351
+ def pop(self, n: int = 1) -> tuple[FrameState, tuple[VType, ...]]:
352
+ """Pop *n* stack slots and return ``(new_state, popped_values)``.
353
+
354
+ Args:
355
+ n: Number of stack slots to pop.
356
+
357
+ Returns:
358
+ A ``(new_state, popped_values)`` tuple where *popped_values* is
359
+ ordered from topmost to deepest.
360
+
361
+ Raises:
362
+ StackUnderflowError: If the stack has fewer than *n* slots.
363
+ """
364
+ if len(self.stack) < n:
365
+ raise StackUnderflowError(f"Need {n} slots but stack has {len(self.stack)}")
366
+ if n == 0:
367
+ return self, ()
368
+ remaining = self.stack[:-n]
369
+ popped = tuple(reversed(self.stack[-n:]))
370
+ return FrameState(remaining, self.locals), popped
371
+
372
+ def peek(self, depth: int = 0) -> VType:
373
+ """Return the type at *depth* slots from the top (0 = top).
374
+
375
+ Raises:
376
+ StackUnderflowError: If *depth* exceeds the current stack size.
377
+ """
378
+ idx = len(self.stack) - 1 - depth
379
+ if idx < 0:
380
+ raise StackUnderflowError(f"Cannot peek at depth {depth} with stack size {len(self.stack)}")
381
+ return self.stack[idx]
382
+
383
+ # -- Local operations --
384
+
385
+ def set_local(self, index: int, vtype: VType) -> FrameState:
386
+ """Set a local variable slot (category-2 aware)."""
387
+ needed = index + (2 if is_category2(vtype) else 1)
388
+ locals_list = list(self.locals)
389
+ while len(locals_list) < needed:
390
+ locals_list.append(_TOP)
391
+ locals_list[index] = vtype
392
+ if is_category2(vtype):
393
+ locals_list[index + 1] = _TOP
394
+ return FrameState(self.stack, tuple(locals_list))
395
+
396
+ def get_local(self, index: int) -> VType:
397
+ """Read a local variable slot.
398
+
399
+ Raises:
400
+ InvalidLocalError: If *index* is out of range or the slot is
401
+ uninitialized.
402
+ """
403
+ if index < 0 or index >= len(self.locals):
404
+ raise InvalidLocalError(f"Local variable slot {index} is out of range (max {len(self.locals) - 1})")
405
+ vt = self.locals[index]
406
+ if isinstance(vt, VTop):
407
+ raise InvalidLocalError(f"Local variable slot {index} is not initialized")
408
+ return vt
409
+
410
+ @property
411
+ def stack_depth(self) -> int:
412
+ """Number of stack slots currently occupied."""
413
+ return len(self.stack)
414
+
415
+ @property
416
+ def max_local_index(self) -> int:
417
+ """Highest local slot index in use (or -1 if no locals)."""
418
+ return len(self.locals) - 1
419
+
420
+
421
+ _EMPTY_FRAME = FrameState((), ())
422
+
423
+
424
+ def initial_frame(method: MethodModel, class_name: str) -> FrameState:
425
+ """Build the entry ``FrameState`` for a method.
426
+
427
+ Slot 0 is ``VObject(class_name)`` for instance methods, or
428
+ ``VUninitializedThis`` for ``<init>``. Parameter types follow,
429
+ with category-2 values spanning two slots. Stack is empty.
430
+
431
+ Args:
432
+ method: The method whose initial frame to build.
433
+ class_name: JVM internal name of the enclosing class.
434
+
435
+ Returns:
436
+ A ``FrameState`` representing the method entry point.
437
+ """
438
+ from .constants import MethodAccessFlag
439
+
440
+ md = parse_method_descriptor(method.descriptor)
441
+ locals_list: list[VType] = []
442
+
443
+ if not (method.access_flags & MethodAccessFlag.STATIC):
444
+ if method.name == "<init>":
445
+ locals_list.append(_UNINIT_THIS)
446
+ else:
447
+ locals_list.append(VObject(class_name))
448
+
449
+ for param in md.parameter_types:
450
+ vt = vtype_from_descriptor(param)
451
+ locals_list.append(vt)
452
+ if is_category2(vt):
453
+ locals_list.append(_TOP)
454
+
455
+ return FrameState((), tuple(locals_list))
456
+
457
+
458
+ # ===================================================================
459
+ # Merging frame states
460
+ # ===================================================================
461
+
462
+
463
+ def _merge_frames(a: FrameState, b: FrameState, resolver: ClassResolver | None) -> FrameState:
464
+ """Merge two frame states at a control-flow join point.
465
+
466
+ Stacks must be the same depth (JVM spec requirement). Locals are
467
+ merged slot-by-slot; the shorter locals tuple is padded with ``VTop``.
468
+ """
469
+ if len(a.stack) != len(b.stack):
470
+ raise TypeMergeError(f"Stack depths differ at join point: {len(a.stack)} vs {len(b.stack)}")
471
+
472
+ merged_stack = tuple(merge_vtypes(sa, sb, resolver) for sa, sb in zip(a.stack, b.stack))
473
+
474
+ max_locals = max(len(a.locals), len(b.locals))
475
+ merged_locals: list[VType] = []
476
+ for i in range(max_locals):
477
+ la = a.locals[i] if i < len(a.locals) else _TOP
478
+ lb = b.locals[i] if i < len(b.locals) else _TOP
479
+ merged_locals.append(merge_vtypes(la, lb, resolver))
480
+
481
+ return FrameState(merged_stack, tuple(merged_locals))
482
+
483
+
484
+ # ===================================================================
485
+ # Opcode metadata
486
+ # ===================================================================
487
+
488
+
489
+ @dataclass(frozen=True, slots=True)
490
+ class OpcodeEffect:
491
+ """Static stack effect and control-flow metadata for an opcode.
492
+
493
+ ``pops`` and ``pushes`` are ``-1`` for opcodes whose stack effects depend on
494
+ the operand (invoke, field access, LDC, multianewarray). Those are
495
+ computed dynamically during simulation from the instruction's symbolic
496
+ operand metadata.
497
+
498
+ Attributes:
499
+ pops: Number of stack slots consumed (``-1`` if variable).
500
+ pushes: Number of stack slots produced (``-1`` if variable).
501
+ is_branch: ``True`` for branch instructions.
502
+ is_unconditional: ``True`` for unconditional transfers (goto, switch,
503
+ athrow).
504
+ is_switch: ``True`` for tableswitch/lookupswitch.
505
+ is_return: ``True`` for return instructions.
506
+ """
507
+
508
+ pops: int
509
+ pushes: int
510
+ is_branch: bool = False
511
+ is_unconditional: bool = False
512
+ is_switch: bool = False
513
+ is_return: bool = False
514
+
515
+
516
+ _T = InsnInfoType
517
+
518
+ # Shorthand constructors
519
+ _simple = OpcodeEffect
520
+
521
+
522
+ def _branch(p: int, u: bool) -> OpcodeEffect:
523
+ return OpcodeEffect(p, 0, is_branch=True, is_unconditional=u)
524
+
525
+
526
+ def _ret(p: int) -> OpcodeEffect:
527
+ return OpcodeEffect(p, 0, is_return=True)
528
+
529
+
530
+ _switch = OpcodeEffect(1, 0, is_branch=True, is_unconditional=True, is_switch=True)
531
+ _var = OpcodeEffect(-1, -1) # variable — resolved during simulation
532
+
533
+ OPCODE_EFFECTS: dict[InsnInfoType, OpcodeEffect] = {
534
+ # --- Constants ---
535
+ _T.NOP: _simple(0, 0),
536
+ _T.ACONST_NULL: _simple(0, 1),
537
+ _T.ICONST_M1: _simple(0, 1),
538
+ _T.ICONST_0: _simple(0, 1),
539
+ _T.ICONST_1: _simple(0, 1),
540
+ _T.ICONST_2: _simple(0, 1),
541
+ _T.ICONST_3: _simple(0, 1),
542
+ _T.ICONST_4: _simple(0, 1),
543
+ _T.ICONST_5: _simple(0, 1),
544
+ _T.LCONST_0: _simple(0, 2),
545
+ _T.LCONST_1: _simple(0, 2),
546
+ _T.FCONST_0: _simple(0, 1),
547
+ _T.FCONST_1: _simple(0, 1),
548
+ _T.FCONST_2: _simple(0, 1),
549
+ _T.DCONST_0: _simple(0, 2),
550
+ _T.DCONST_1: _simple(0, 2),
551
+ _T.BIPUSH: _simple(0, 1),
552
+ _T.SIPUSH: _simple(0, 1),
553
+ _T.LDC: _var,
554
+ _T.LDC_W: _var,
555
+ _T.LDC2_W: _var,
556
+ # --- Loads (raw forms — in editing model these are VarInsn) ---
557
+ _T.ILOAD: _simple(0, 1),
558
+ _T.LLOAD: _simple(0, 2),
559
+ _T.FLOAD: _simple(0, 1),
560
+ _T.DLOAD: _simple(0, 2),
561
+ _T.ALOAD: _simple(0, 1),
562
+ _T.ILOAD_0: _simple(0, 1),
563
+ _T.ILOAD_1: _simple(0, 1),
564
+ _T.ILOAD_2: _simple(0, 1),
565
+ _T.ILOAD_3: _simple(0, 1),
566
+ _T.LLOAD_0: _simple(0, 2),
567
+ _T.LLOAD_1: _simple(0, 2),
568
+ _T.LLOAD_2: _simple(0, 2),
569
+ _T.LLOAD_3: _simple(0, 2),
570
+ _T.FLOAD_0: _simple(0, 1),
571
+ _T.FLOAD_1: _simple(0, 1),
572
+ _T.FLOAD_2: _simple(0, 1),
573
+ _T.FLOAD_3: _simple(0, 1),
574
+ _T.DLOAD_0: _simple(0, 2),
575
+ _T.DLOAD_1: _simple(0, 2),
576
+ _T.DLOAD_2: _simple(0, 2),
577
+ _T.DLOAD_3: _simple(0, 2),
578
+ _T.ALOAD_0: _simple(0, 1),
579
+ _T.ALOAD_1: _simple(0, 1),
580
+ _T.ALOAD_2: _simple(0, 1),
581
+ _T.ALOAD_3: _simple(0, 1),
582
+ # --- Array loads ---
583
+ _T.IALOAD: _simple(2, 1),
584
+ _T.LALOAD: _simple(2, 2),
585
+ _T.FALOAD: _simple(2, 1),
586
+ _T.DALOAD: _simple(2, 2),
587
+ _T.AALOAD: _simple(2, 1),
588
+ _T.BALOAD: _simple(2, 1),
589
+ _T.CALOAD: _simple(2, 1),
590
+ _T.SALOAD: _simple(2, 1),
591
+ # --- Stores (raw forms) ---
592
+ _T.ISTORE: _simple(1, 0),
593
+ _T.LSTORE: _simple(2, 0),
594
+ _T.FSTORE: _simple(1, 0),
595
+ _T.DSTORE: _simple(2, 0),
596
+ _T.ASTORE: _simple(1, 0),
597
+ _T.ISTORE_0: _simple(1, 0),
598
+ _T.ISTORE_1: _simple(1, 0),
599
+ _T.ISTORE_2: _simple(1, 0),
600
+ _T.ISTORE_3: _simple(1, 0),
601
+ _T.LSTORE_0: _simple(2, 0),
602
+ _T.LSTORE_1: _simple(2, 0),
603
+ _T.LSTORE_2: _simple(2, 0),
604
+ _T.LSTORE_3: _simple(2, 0),
605
+ _T.FSTORE_0: _simple(1, 0),
606
+ _T.FSTORE_1: _simple(1, 0),
607
+ _T.FSTORE_2: _simple(1, 0),
608
+ _T.FSTORE_3: _simple(1, 0),
609
+ _T.DSTORE_0: _simple(2, 0),
610
+ _T.DSTORE_1: _simple(2, 0),
611
+ _T.DSTORE_2: _simple(2, 0),
612
+ _T.DSTORE_3: _simple(2, 0),
613
+ _T.ASTORE_0: _simple(1, 0),
614
+ _T.ASTORE_1: _simple(1, 0),
615
+ _T.ASTORE_2: _simple(1, 0),
616
+ _T.ASTORE_3: _simple(1, 0),
617
+ # --- Array stores ---
618
+ _T.IASTORE: _simple(3, 0),
619
+ _T.LASTORE: _simple(4, 0),
620
+ _T.FASTORE: _simple(3, 0),
621
+ _T.DASTORE: _simple(4, 0),
622
+ _T.AASTORE: _simple(3, 0),
623
+ _T.BASTORE: _simple(3, 0),
624
+ _T.CASTORE: _simple(3, 0),
625
+ _T.SASTORE: _simple(3, 0),
626
+ # --- Stack manipulation ---
627
+ _T.POP: _simple(1, 0),
628
+ _T.POP2: _simple(2, 0),
629
+ _T.DUP: _simple(1, 2),
630
+ _T.DUP_X1: _simple(2, 3),
631
+ _T.DUP_X2: _simple(3, 4),
632
+ _T.DUP2: _simple(2, 4),
633
+ _T.DUP2_X1: _simple(3, 5),
634
+ _T.DUP2_X2: _simple(4, 6),
635
+ _T.SWAP: _simple(2, 2),
636
+ # --- Integer arithmetic ---
637
+ _T.IADD: _simple(2, 1),
638
+ _T.ISUB: _simple(2, 1),
639
+ _T.IMUL: _simple(2, 1),
640
+ _T.IDIV: _simple(2, 1),
641
+ _T.IREM: _simple(2, 1),
642
+ _T.INEG: _simple(1, 1),
643
+ _T.ISHL: _simple(2, 1),
644
+ _T.ISHR: _simple(2, 1),
645
+ _T.IUSHR: _simple(2, 1),
646
+ _T.IAND: _simple(2, 1),
647
+ _T.IOR: _simple(2, 1),
648
+ _T.IXOR: _simple(2, 1),
649
+ # --- Long arithmetic ---
650
+ _T.LADD: _simple(4, 2),
651
+ _T.LSUB: _simple(4, 2),
652
+ _T.LMUL: _simple(4, 2),
653
+ _T.LDIV: _simple(4, 2),
654
+ _T.LREM: _simple(4, 2),
655
+ _T.LNEG: _simple(2, 2),
656
+ _T.LSHL: _simple(3, 2),
657
+ _T.LSHR: _simple(3, 2),
658
+ _T.LUSHR: _simple(3, 2),
659
+ _T.LAND: _simple(4, 2),
660
+ _T.LOR: _simple(4, 2),
661
+ _T.LXOR: _simple(4, 2),
662
+ # --- Float arithmetic ---
663
+ _T.FADD: _simple(2, 1),
664
+ _T.FSUB: _simple(2, 1),
665
+ _T.FMUL: _simple(2, 1),
666
+ _T.FDIV: _simple(2, 1),
667
+ _T.FREM: _simple(2, 1),
668
+ _T.FNEG: _simple(1, 1),
669
+ # --- Double arithmetic ---
670
+ _T.DADD: _simple(4, 2),
671
+ _T.DSUB: _simple(4, 2),
672
+ _T.DMUL: _simple(4, 2),
673
+ _T.DDIV: _simple(4, 2),
674
+ _T.DREM: _simple(4, 2),
675
+ _T.DNEG: _simple(2, 2),
676
+ # --- Conversions ---
677
+ _T.I2L: _simple(1, 2),
678
+ _T.I2F: _simple(1, 1),
679
+ _T.I2D: _simple(1, 2),
680
+ _T.L2I: _simple(2, 1),
681
+ _T.L2F: _simple(2, 1),
682
+ _T.L2D: _simple(2, 2),
683
+ _T.F2I: _simple(1, 1),
684
+ _T.F2L: _simple(1, 2),
685
+ _T.F2D: _simple(1, 2),
686
+ _T.D2I: _simple(2, 1),
687
+ _T.D2L: _simple(2, 2),
688
+ _T.D2F: _simple(2, 1),
689
+ _T.I2B: _simple(1, 1),
690
+ _T.I2C: _simple(1, 1),
691
+ _T.I2S: _simple(1, 1),
692
+ # --- Comparisons ---
693
+ _T.LCMP: _simple(4, 1),
694
+ _T.FCMPL: _simple(2, 1),
695
+ _T.FCMPG: _simple(2, 1),
696
+ _T.DCMPL: _simple(4, 1),
697
+ _T.DCMPG: _simple(4, 1),
698
+ # --- Conditional branches (pop 1 int) ---
699
+ _T.IFEQ: _branch(1, False),
700
+ _T.IFNE: _branch(1, False),
701
+ _T.IFLT: _branch(1, False),
702
+ _T.IFGE: _branch(1, False),
703
+ _T.IFGT: _branch(1, False),
704
+ _T.IFLE: _branch(1, False),
705
+ # --- Conditional branches (pop 2 ints) ---
706
+ _T.IF_ICMPEQ: _branch(2, False),
707
+ _T.IF_ICMPNE: _branch(2, False),
708
+ _T.IF_ICMPLT: _branch(2, False),
709
+ _T.IF_ICMPGE: _branch(2, False),
710
+ _T.IF_ICMPGT: _branch(2, False),
711
+ _T.IF_ICMPLE: _branch(2, False),
712
+ # --- Reference conditional branches ---
713
+ _T.IF_ACMPEQ: _branch(2, False),
714
+ _T.IF_ACMPNE: _branch(2, False),
715
+ _T.IFNULL: _branch(1, False),
716
+ _T.IFNONNULL: _branch(1, False),
717
+ # --- Unconditional branches ---
718
+ _T.GOTO: _branch(0, True),
719
+ _T.GOTO_W: _branch(0, True),
720
+ # --- Subroutine (legacy, pre-Java 6) ---
721
+ _T.JSR: OpcodeEffect(0, 1, is_branch=True, is_unconditional=True),
722
+ _T.JSR_W: OpcodeEffect(0, 1, is_branch=True, is_unconditional=True),
723
+ _T.RET: OpcodeEffect(0, 0, is_branch=True, is_unconditional=True),
724
+ # --- Switch ---
725
+ _T.TABLESWITCH: _switch,
726
+ _T.LOOKUPSWITCH: _switch,
727
+ # --- Returns ---
728
+ _T.IRETURN: _ret(1),
729
+ _T.LRETURN: _ret(2),
730
+ _T.FRETURN: _ret(1),
731
+ _T.DRETURN: _ret(2),
732
+ _T.ARETURN: _ret(1),
733
+ _T.RETURN: _ret(0),
734
+ # --- Field access (variable effect) ---
735
+ _T.GETFIELD: _var,
736
+ _T.PUTFIELD: _var,
737
+ _T.GETSTATIC: _var,
738
+ _T.PUTSTATIC: _var,
739
+ # --- Method invocation (variable effect) ---
740
+ _T.INVOKEVIRTUAL: _var,
741
+ _T.INVOKESPECIAL: _var,
742
+ _T.INVOKESTATIC: _var,
743
+ _T.INVOKEINTERFACE: _var,
744
+ _T.INVOKEDYNAMIC: _var,
745
+ # --- Object creation ---
746
+ _T.NEW: _simple(0, 1),
747
+ _T.NEWARRAY: _simple(1, 1),
748
+ _T.ANEWARRAY: _simple(1, 1),
749
+ _T.MULTIANEWARRAY: _var,
750
+ _T.ARRAYLENGTH: _simple(1, 1),
751
+ # --- Type operations ---
752
+ _T.CHECKCAST: _simple(1, 1),
753
+ _T.INSTANCEOF: _simple(1, 1),
754
+ # --- Throw ---
755
+ _T.ATHROW: _ret(1),
756
+ # --- Monitor ---
757
+ _T.MONITORENTER: _simple(1, 0),
758
+ _T.MONITOREXIT: _simple(1, 0),
759
+ # --- IINC (no stack change) ---
760
+ _T.IINC: _simple(0, 0),
761
+ # --- WIDE variants (same effect as non-wide) ---
762
+ _T.WIDE: _simple(0, 0),
763
+ _T.ILOADW: _simple(0, 1),
764
+ _T.LLOADW: _simple(0, 2),
765
+ _T.FLOADW: _simple(0, 1),
766
+ _T.DLOADW: _simple(0, 2),
767
+ _T.ALOADW: _simple(0, 1),
768
+ _T.ISTOREW: _simple(1, 0),
769
+ _T.LSTOREW: _simple(2, 0),
770
+ _T.FSTOREW: _simple(1, 0),
771
+ _T.DSTOREW: _simple(2, 0),
772
+ _T.ASTOREW: _simple(1, 0),
773
+ _T.RETW: OpcodeEffect(0, 0, is_branch=True, is_unconditional=True),
774
+ _T.IINCW: _simple(0, 0),
775
+ }
776
+
777
+
778
+ def _is_terminal(insn: InsnInfo) -> bool:
779
+ """Return whether *insn* ends a basic block with no fall-through."""
780
+ effect = OPCODE_EFFECTS.get(insn.type)
781
+ if effect is None:
782
+ return False
783
+ return effect.is_unconditional or effect.is_return
784
+
785
+
786
+ def _is_branch_or_switch(insn: InsnInfo) -> bool:
787
+ """Return whether *insn* is a branch or switch instruction."""
788
+ effect = OPCODE_EFFECTS.get(insn.type)
789
+ if effect is None:
790
+ return False
791
+ return effect.is_branch
792
+
793
+
794
+ # ===================================================================
795
+ # Control-flow graph
796
+ # ===================================================================
797
+
798
+
799
+ @dataclass(slots=True)
800
+ class BasicBlock:
801
+ """A maximal straight-line sequence of instructions within a method.
802
+
803
+ Mutable during construction, then frozen by ``build_cfg``.
804
+
805
+ Attributes:
806
+ id: Unique block index within the CFG.
807
+ label: Label at the start of this block, if any.
808
+ instructions: Ordered instructions in this block.
809
+ successor_ids: Block ids of normal-flow successors.
810
+ exception_handler_ids: ``(handler_block_id, catch_type)`` pairs for
811
+ active exception handlers.
812
+ """
813
+
814
+ id: int
815
+ label: Label | None
816
+ instructions: list[InsnInfo]
817
+ successor_ids: list[int]
818
+ exception_handler_ids: list[tuple[int, str | None]]
819
+
820
+ def __repr__(self) -> str:
821
+ label_str = f" ({self.label!r})" if self.label is not None else ""
822
+ return f"BasicBlock(id={self.id}{label_str}, insns={len(self.instructions)}, succs={self.successor_ids})"
823
+
824
+
825
+ @dataclass(frozen=True, slots=True)
826
+ class ExceptionEdge:
827
+ """An exception edge from a protected block to a handler block.
828
+
829
+ Attributes:
830
+ handler_block_id: Block id of the exception handler.
831
+ catch_type: Internal name of the caught exception type, or ``None``
832
+ for a catch-all (``finally``).
833
+ """
834
+
835
+ handler_block_id: int
836
+ catch_type: str | None
837
+
838
+
839
+ @dataclass(frozen=True, slots=True)
840
+ class ControlFlowGraph:
841
+ """Control-flow graph for a method's code body.
842
+
843
+ Attributes:
844
+ entry: The entry basic block.
845
+ blocks: All blocks, ordered to match the original instruction sequence.
846
+ exception_handlers: Exception handler declarations from the code.
847
+ label_to_block: Mapping from labels to the block they start.
848
+ """
849
+
850
+ entry: BasicBlock
851
+ blocks: tuple[BasicBlock, ...]
852
+ exception_handlers: tuple[ExceptionHandler, ...]
853
+ label_to_block: dict[Label, BasicBlock]
854
+
855
+
856
+ def build_cfg(code: CodeModel) -> ControlFlowGraph:
857
+ """Construct a control-flow graph from a ``CodeModel``.
858
+
859
+ Partitions the instruction stream into basic blocks and builds edges
860
+ for branches, fall-through, and exception handlers.
861
+
862
+ Args:
863
+ code: The code model to partition into basic blocks.
864
+
865
+ Returns:
866
+ A ``ControlFlowGraph`` with edges for all control-flow paths.
867
+ """
868
+ items = code.instructions
869
+ if not items:
870
+ empty_block = BasicBlock(id=0, label=None, instructions=[], successor_ids=[], exception_handler_ids=[])
871
+ return ControlFlowGraph(
872
+ entry=empty_block,
873
+ blocks=(empty_block,),
874
+ exception_handlers=tuple(code.exception_handlers),
875
+ label_to_block={},
876
+ )
877
+
878
+ # Step 1: Identify block leaders.
879
+ # A leader is an instruction (not a Label) that starts a new block.
880
+ # We track leaders by their index in the items list.
881
+ leader_indices: set[int] = set()
882
+
883
+ # Collect all labels that are branch targets or exception handler boundaries.
884
+ target_labels: set[int] = set() # id() of labels that start blocks
885
+
886
+ # Labels used as branch targets
887
+ for item in items:
888
+ if isinstance(item, BranchInsn):
889
+ target_labels.add(id(item.target))
890
+ elif isinstance(item, LookupSwitchInsn):
891
+ target_labels.add(id(item.default_target))
892
+ for _, lbl in item.pairs:
893
+ target_labels.add(id(lbl))
894
+ elif isinstance(item, TableSwitchInsn):
895
+ target_labels.add(id(item.default_target))
896
+ for lbl in item.targets:
897
+ target_labels.add(id(lbl))
898
+
899
+ # Labels used in exception handlers
900
+ for eh in code.exception_handlers:
901
+ target_labels.add(id(eh.start))
902
+ target_labels.add(id(eh.end))
903
+ target_labels.add(id(eh.handler))
904
+
905
+ # First real instruction is always a leader.
906
+ first_insn_idx = _find_first_insn(items)
907
+ if first_insn_idx is not None:
908
+ leader_indices.add(first_insn_idx)
909
+
910
+ # Scan for leaders.
911
+ prev_was_terminal = False
912
+ for i, item in enumerate(items):
913
+ if isinstance(item, Label):
914
+ if id(item) in target_labels:
915
+ # The next real instruction after this label is a leader.
916
+ next_insn = _find_next_insn(items, i + 1)
917
+ if next_insn is not None:
918
+ leader_indices.add(next_insn)
919
+ else:
920
+ leader_indices.add(i)
921
+ continue
922
+
923
+ # item is an InsnInfo
924
+ if prev_was_terminal:
925
+ leader_indices.add(i)
926
+
927
+ prev_was_terminal = _is_terminal(item) or (
928
+ _is_branch_or_switch(item) and not OPCODE_EFFECTS[item.type].is_unconditional
929
+ )
930
+ # For conditional branches, the fall-through is the next insn, which
931
+ # is implicitly a leader only if control can reach it from multiple paths.
932
+ # But we still need to split after any branch for clean block boundaries.
933
+ if _is_branch_or_switch(item):
934
+ prev_was_terminal = True
935
+
936
+ if not leader_indices:
937
+ # All labels, no real instructions — create a single empty block.
938
+ empty_block = BasicBlock(id=0, label=None, instructions=[], successor_ids=[], exception_handler_ids=[])
939
+ lbl_map: dict[Label, BasicBlock] = {}
940
+ for item in items:
941
+ if isinstance(item, Label):
942
+ lbl_map[item] = empty_block
943
+ return ControlFlowGraph(
944
+ entry=empty_block,
945
+ blocks=(empty_block,),
946
+ exception_handlers=tuple(code.exception_handlers),
947
+ label_to_block=lbl_map,
948
+ )
949
+
950
+ # Step 2: Build blocks.
951
+ sorted_leaders = sorted(leader_indices)
952
+ leader_set = set(sorted_leaders)
953
+
954
+ blocks: list[BasicBlock] = []
955
+ block_for_index: dict[int, int] = {} # items index → block id
956
+ label_to_block_map: dict[Label, BasicBlock] = {}
957
+
958
+ current_block_id = 0
959
+ current_block: BasicBlock | None = None
960
+ pending_labels: list[Label] = []
961
+
962
+ for i, item in enumerate(items):
963
+ if isinstance(item, Label):
964
+ next_insn = _find_next_insn(items, i + 1)
965
+ if current_block is not None and (next_insn is None or next_insn not in leader_set):
966
+ label_to_block_map[item] = current_block
967
+ else:
968
+ pending_labels.append(item)
969
+ continue
970
+
971
+ # item is an InsnInfo
972
+ if i in leader_set:
973
+ # Start a new block.
974
+ block_label = pending_labels[0] if pending_labels else None
975
+ current_block = BasicBlock(
976
+ id=current_block_id,
977
+ label=block_label,
978
+ instructions=[],
979
+ successor_ids=[],
980
+ exception_handler_ids=[],
981
+ )
982
+ # Map all pending labels to this block.
983
+ for lbl in pending_labels:
984
+ label_to_block_map[lbl] = current_block
985
+ pending_labels = []
986
+ blocks.append(current_block)
987
+ current_block_id += 1
988
+
989
+ if current_block is not None:
990
+ current_block.instructions.append(item)
991
+ block_for_index[i] = current_block.id
992
+
993
+ # Map any trailing labels to the last block.
994
+ if pending_labels and blocks:
995
+ for lbl in pending_labels:
996
+ label_to_block_map[lbl] = blocks[-1]
997
+
998
+ # Also map labels that precede leader instructions to their block.
999
+ # Walk items again to pick up labels immediately before leader instructions.
1000
+ pending_labels_2: list[Label] = []
1001
+ for i, item in enumerate(items):
1002
+ if isinstance(item, Label):
1003
+ pending_labels_2.append(item)
1004
+ else:
1005
+ if pending_labels_2:
1006
+ if i in leader_set:
1007
+ for lbl in pending_labels_2:
1008
+ if lbl not in label_to_block_map:
1009
+ # Find the block for this leader
1010
+ for blk in blocks:
1011
+ if blk.instructions and blk.instructions[0] is item:
1012
+ label_to_block_map[lbl] = blk
1013
+ break
1014
+ pending_labels_2 = []
1015
+
1016
+ # Step 3: Build edges.
1017
+ for idx, block in enumerate(blocks):
1018
+ if not block.instructions:
1019
+ # Empty block falls through to next block.
1020
+ if idx + 1 < len(blocks):
1021
+ block.successor_ids.append(blocks[idx + 1].id)
1022
+ continue
1023
+
1024
+ last_insn = block.instructions[-1]
1025
+ effect = OPCODE_EFFECTS.get(last_insn.type)
1026
+
1027
+ # Branch targets
1028
+ if isinstance(last_insn, BranchInsn):
1029
+ target_block = label_to_block_map.get(last_insn.target)
1030
+ if target_block is not None:
1031
+ block.successor_ids.append(target_block.id)
1032
+ elif isinstance(last_insn, LookupSwitchInsn):
1033
+ default_block = label_to_block_map.get(last_insn.default_target)
1034
+ if default_block is not None:
1035
+ block.successor_ids.append(default_block.id)
1036
+ for _, lbl in last_insn.pairs:
1037
+ target_block = label_to_block_map.get(lbl)
1038
+ if target_block is not None and target_block.id not in block.successor_ids:
1039
+ block.successor_ids.append(target_block.id)
1040
+ elif isinstance(last_insn, TableSwitchInsn):
1041
+ default_block = label_to_block_map.get(last_insn.default_target)
1042
+ if default_block is not None:
1043
+ block.successor_ids.append(default_block.id)
1044
+ for lbl in last_insn.targets:
1045
+ target_block = label_to_block_map.get(lbl)
1046
+ if target_block is not None and target_block.id not in block.successor_ids:
1047
+ block.successor_ids.append(target_block.id)
1048
+
1049
+ # Fall-through edge (only if not unconditional/terminal)
1050
+ is_terminal_insn = effect is not None and (effect.is_unconditional or effect.is_return)
1051
+ if not is_terminal_insn and idx + 1 < len(blocks):
1052
+ block.successor_ids.append(blocks[idx + 1].id)
1053
+
1054
+ # Step 4: Build exception handler edges.
1055
+ # For each exception handler, find blocks in the protected range and add edges.
1056
+ for eh in code.exception_handlers:
1057
+ start_block = label_to_block_map.get(eh.start)
1058
+ end_block = label_to_block_map.get(eh.end)
1059
+ handler_block = label_to_block_map.get(eh.handler)
1060
+
1061
+ if start_block is None or handler_block is None:
1062
+ continue
1063
+
1064
+ start_id = start_block.id
1065
+ end_id = end_block.id if end_block is not None else len(blocks)
1066
+
1067
+ for block in blocks:
1068
+ if start_id <= block.id < end_id:
1069
+ edge = (handler_block.id, eh.catch_type)
1070
+ if edge not in block.exception_handler_ids:
1071
+ block.exception_handler_ids.append(edge)
1072
+
1073
+ return ControlFlowGraph(
1074
+ entry=blocks[0],
1075
+ blocks=tuple(blocks),
1076
+ exception_handlers=tuple(code.exception_handlers),
1077
+ label_to_block=label_to_block_map,
1078
+ )
1079
+
1080
+
1081
+ def _find_first_insn(items: list[CodeItem]) -> int | None:
1082
+ """Return the index of the first real instruction in *items*."""
1083
+ for i, item in enumerate(items):
1084
+ if isinstance(item, InsnInfo):
1085
+ return i
1086
+ return None
1087
+
1088
+
1089
+ def _find_next_insn(items: list[CodeItem], start: int) -> int | None:
1090
+ """Return the index of the next real instruction at or after *start*."""
1091
+ for i in range(start, len(items)):
1092
+ if isinstance(items[i], InsnInfo):
1093
+ return i
1094
+ return None
1095
+
1096
+
1097
+ # ===================================================================
1098
+ # Stack and local simulation
1099
+ # ===================================================================
1100
+
1101
+
1102
+ @dataclass(frozen=True, slots=True)
1103
+ class SimulationResult:
1104
+ """Results of forward dataflow stack/local simulation.
1105
+
1106
+ Attributes:
1107
+ entry_states: Mapping from block id to the frame state on entry.
1108
+ exit_states: Mapping from block id to the frame state on exit.
1109
+ max_stack: Maximum operand stack depth observed.
1110
+ max_locals: Maximum local variable slot count observed.
1111
+ """
1112
+
1113
+ entry_states: dict[int, FrameState]
1114
+ exit_states: dict[int, FrameState]
1115
+ max_stack: int
1116
+ max_locals: int
1117
+
1118
+
1119
+ def simulate(
1120
+ cfg: ControlFlowGraph,
1121
+ code: CodeModel,
1122
+ method: MethodModel,
1123
+ class_name: str,
1124
+ resolver: ClassResolver | None = None,
1125
+ ) -> SimulationResult:
1126
+ """Run forward dataflow analysis over a control-flow graph.
1127
+
1128
+ Propagates ``FrameState`` through each basic block, merging at join
1129
+ points using a worklist algorithm.
1130
+
1131
+ Args:
1132
+ cfg: Control-flow graph to analyze.
1133
+ code: Code model providing the instruction stream.
1134
+ method: Method model (used to derive the initial frame).
1135
+ class_name: JVM internal name of the enclosing class.
1136
+ resolver: Optional class hierarchy resolver for precise type merging.
1137
+
1138
+ Returns:
1139
+ A ``SimulationResult`` with per-block entry/exit states and
1140
+ computed max_stack/max_locals.
1141
+ """
1142
+ if not cfg.blocks:
1143
+ entry = initial_frame(method, class_name)
1144
+ return SimulationResult(
1145
+ entry_states={},
1146
+ exit_states={},
1147
+ max_stack=0,
1148
+ max_locals=len(entry.locals),
1149
+ )
1150
+
1151
+ analysis_code = _prepare_analysis_code(code)
1152
+ entry_frame = initial_frame(method, class_name)
1153
+
1154
+ entry_states: dict[int, FrameState] = {cfg.entry.id: entry_frame}
1155
+ exit_states: dict[int, FrameState] = {}
1156
+
1157
+ # Worklist: queue of block ids to process.
1158
+ worklist: deque[int] = deque([cfg.entry.id])
1159
+ in_worklist: set[int] = {cfg.entry.id}
1160
+
1161
+ max_stack = 0
1162
+ max_locals = len(entry_frame.locals)
1163
+
1164
+ # Build a quick successor lookup including exception handler targets.
1165
+ block_by_id = {b.id: b for b in cfg.blocks}
1166
+
1167
+ while worklist:
1168
+ block_id = worklist.popleft()
1169
+ in_worklist.discard(block_id)
1170
+
1171
+ block = block_by_id[block_id]
1172
+ if block_id not in entry_states:
1173
+ continue
1174
+
1175
+ state = entry_states[block_id]
1176
+ if state.stack_depth > max_stack:
1177
+ max_stack = state.stack_depth
1178
+ if len(state.locals) > max_locals:
1179
+ max_locals = len(state.locals)
1180
+
1181
+ # Simulate all instructions in this block.
1182
+ for item in block.instructions:
1183
+ if block.exception_handler_ids and _instruction_may_throw(item):
1184
+ _propagate_exception_handlers(
1185
+ block.exception_handler_ids,
1186
+ state,
1187
+ entry_states,
1188
+ worklist,
1189
+ in_worklist,
1190
+ resolver,
1191
+ )
1192
+ state = _simulate_insn(item, state, analysis_code, class_name)
1193
+ if state.stack_depth > max_stack:
1194
+ max_stack = state.stack_depth
1195
+ if len(state.locals) > max_locals:
1196
+ max_locals = len(state.locals)
1197
+
1198
+ exit_states[block_id] = state
1199
+
1200
+ # Propagate to successors.
1201
+ for succ_id in block.successor_ids:
1202
+ _propagate(succ_id, state, entry_states, worklist, in_worklist, resolver)
1203
+
1204
+ return SimulationResult(
1205
+ entry_states=entry_states,
1206
+ exit_states=exit_states,
1207
+ max_stack=max_stack,
1208
+ max_locals=max_locals,
1209
+ )
1210
+
1211
+
1212
+ def _propagate(
1213
+ target_id: int,
1214
+ incoming: FrameState,
1215
+ entry_states: dict[int, FrameState],
1216
+ worklist: deque[int],
1217
+ in_worklist: set[int],
1218
+ resolver: ClassResolver | None,
1219
+ ) -> None:
1220
+ """Merge *incoming* into the entry state of *target_id* and enqueue if changed."""
1221
+ if target_id in entry_states:
1222
+ existing = entry_states[target_id]
1223
+ try:
1224
+ merged = _merge_frames(existing, incoming, resolver)
1225
+ except TypeMergeError as exc:
1226
+ raise TypeMergeError(f"Cannot merge incoming frame into block {target_id}: {exc}") from exc
1227
+ if merged == existing:
1228
+ return # No change — don't re-process.
1229
+ entry_states[target_id] = merged
1230
+ else:
1231
+ entry_states[target_id] = incoming
1232
+
1233
+ if target_id not in in_worklist:
1234
+ worklist.append(target_id)
1235
+ in_worklist.add(target_id)
1236
+
1237
+
1238
+ _NON_THROWING_RAW_OPCODES: frozenset[InsnInfoType] = frozenset(
1239
+ {
1240
+ _T.NOP,
1241
+ _T.ACONST_NULL,
1242
+ _T.ICONST_M1,
1243
+ _T.ICONST_0,
1244
+ _T.ICONST_1,
1245
+ _T.ICONST_2,
1246
+ _T.ICONST_3,
1247
+ _T.ICONST_4,
1248
+ _T.ICONST_5,
1249
+ _T.LCONST_0,
1250
+ _T.LCONST_1,
1251
+ _T.FCONST_0,
1252
+ _T.FCONST_1,
1253
+ _T.FCONST_2,
1254
+ _T.DCONST_0,
1255
+ _T.DCONST_1,
1256
+ _T.BIPUSH,
1257
+ _T.SIPUSH,
1258
+ _T.ILOAD,
1259
+ _T.ILOAD_0,
1260
+ _T.ILOAD_1,
1261
+ _T.ILOAD_2,
1262
+ _T.ILOAD_3,
1263
+ _T.ILOADW,
1264
+ _T.LLOAD,
1265
+ _T.LLOAD_0,
1266
+ _T.LLOAD_1,
1267
+ _T.LLOAD_2,
1268
+ _T.LLOAD_3,
1269
+ _T.LLOADW,
1270
+ _T.FLOAD,
1271
+ _T.FLOAD_0,
1272
+ _T.FLOAD_1,
1273
+ _T.FLOAD_2,
1274
+ _T.FLOAD_3,
1275
+ _T.FLOADW,
1276
+ _T.DLOAD,
1277
+ _T.DLOAD_0,
1278
+ _T.DLOAD_1,
1279
+ _T.DLOAD_2,
1280
+ _T.DLOAD_3,
1281
+ _T.DLOADW,
1282
+ _T.ALOAD,
1283
+ _T.ALOAD_0,
1284
+ _T.ALOAD_1,
1285
+ _T.ALOAD_2,
1286
+ _T.ALOAD_3,
1287
+ _T.ALOADW,
1288
+ _T.ISTORE,
1289
+ _T.ISTORE_0,
1290
+ _T.ISTORE_1,
1291
+ _T.ISTORE_2,
1292
+ _T.ISTORE_3,
1293
+ _T.ISTOREW,
1294
+ _T.LSTORE,
1295
+ _T.LSTORE_0,
1296
+ _T.LSTORE_1,
1297
+ _T.LSTORE_2,
1298
+ _T.LSTORE_3,
1299
+ _T.LSTOREW,
1300
+ _T.FSTORE,
1301
+ _T.FSTORE_0,
1302
+ _T.FSTORE_1,
1303
+ _T.FSTORE_2,
1304
+ _T.FSTORE_3,
1305
+ _T.FSTOREW,
1306
+ _T.DSTORE,
1307
+ _T.DSTORE_0,
1308
+ _T.DSTORE_1,
1309
+ _T.DSTORE_2,
1310
+ _T.DSTORE_3,
1311
+ _T.DSTOREW,
1312
+ _T.ASTORE,
1313
+ _T.ASTORE_0,
1314
+ _T.ASTORE_1,
1315
+ _T.ASTORE_2,
1316
+ _T.ASTORE_3,
1317
+ _T.ASTOREW,
1318
+ _T.POP,
1319
+ _T.POP2,
1320
+ _T.DUP,
1321
+ _T.DUP_X1,
1322
+ _T.DUP_X2,
1323
+ _T.DUP2,
1324
+ _T.DUP2_X1,
1325
+ _T.DUP2_X2,
1326
+ _T.SWAP,
1327
+ _T.IADD,
1328
+ _T.ISUB,
1329
+ _T.IMUL,
1330
+ _T.INEG,
1331
+ _T.ISHL,
1332
+ _T.ISHR,
1333
+ _T.IUSHR,
1334
+ _T.IAND,
1335
+ _T.IOR,
1336
+ _T.IXOR,
1337
+ _T.LADD,
1338
+ _T.LSUB,
1339
+ _T.LMUL,
1340
+ _T.LNEG,
1341
+ _T.LSHL,
1342
+ _T.LSHR,
1343
+ _T.LUSHR,
1344
+ _T.LAND,
1345
+ _T.LOR,
1346
+ _T.LXOR,
1347
+ _T.FADD,
1348
+ _T.FSUB,
1349
+ _T.FMUL,
1350
+ _T.FDIV,
1351
+ _T.FREM,
1352
+ _T.FNEG,
1353
+ _T.DADD,
1354
+ _T.DSUB,
1355
+ _T.DMUL,
1356
+ _T.DDIV,
1357
+ _T.DREM,
1358
+ _T.DNEG,
1359
+ _T.I2L,
1360
+ _T.I2F,
1361
+ _T.I2D,
1362
+ _T.L2I,
1363
+ _T.L2F,
1364
+ _T.L2D,
1365
+ _T.F2I,
1366
+ _T.F2L,
1367
+ _T.F2D,
1368
+ _T.D2I,
1369
+ _T.D2L,
1370
+ _T.D2F,
1371
+ _T.I2B,
1372
+ _T.I2C,
1373
+ _T.I2S,
1374
+ _T.LCMP,
1375
+ _T.FCMPL,
1376
+ _T.FCMPG,
1377
+ _T.DCMPL,
1378
+ _T.DCMPG,
1379
+ _T.IFEQ,
1380
+ _T.IFNE,
1381
+ _T.IFLT,
1382
+ _T.IFGE,
1383
+ _T.IFGT,
1384
+ _T.IFLE,
1385
+ _T.IF_ICMPEQ,
1386
+ _T.IF_ICMPNE,
1387
+ _T.IF_ICMPLT,
1388
+ _T.IF_ICMPGE,
1389
+ _T.IF_ICMPGT,
1390
+ _T.IF_ICMPLE,
1391
+ _T.IF_ACMPEQ,
1392
+ _T.IF_ACMPNE,
1393
+ _T.GOTO,
1394
+ _T.GOTO_W,
1395
+ _T.JSR,
1396
+ _T.JSR_W,
1397
+ _T.RET,
1398
+ _T.RETW,
1399
+ _T.IFNULL,
1400
+ _T.IFNONNULL,
1401
+ _T.TABLESWITCH,
1402
+ _T.LOOKUPSWITCH,
1403
+ _T.IRETURN,
1404
+ _T.LRETURN,
1405
+ _T.FRETURN,
1406
+ _T.DRETURN,
1407
+ _T.ARETURN,
1408
+ _T.RETURN,
1409
+ _T.IINC,
1410
+ _T.IINCW,
1411
+ _T.WIDE,
1412
+ }
1413
+ )
1414
+
1415
+
1416
+ def _instruction_may_throw(insn: InsnInfo) -> bool:
1417
+ """Return whether an instruction may transfer control to an exception handler.
1418
+
1419
+ The analysis stays conservative by treating any opcode outside the
1420
+ well-understood non-throwing set as potentially exceptional.
1421
+ """
1422
+ if isinstance(insn, VarInsn | IIncInsn | BranchInsn | LookupSwitchInsn | TableSwitchInsn):
1423
+ return False
1424
+ if isinstance(
1425
+ insn,
1426
+ FieldInsn | MethodInsn | InterfaceMethodInsn | InvokeDynamicInsn | TypeInsn | MultiANewArrayInsn,
1427
+ ):
1428
+ return True
1429
+ if isinstance(insn, LdcInsn):
1430
+ return False
1431
+ return insn.type not in _NON_THROWING_RAW_OPCODES
1432
+
1433
+
1434
+ def _propagate_exception_handlers(
1435
+ handler_edges: list[tuple[int, str | None]],
1436
+ state: FrameState,
1437
+ entry_states: dict[int, FrameState],
1438
+ worklist: deque[int],
1439
+ in_worklist: set[int],
1440
+ resolver: ClassResolver | None,
1441
+ ) -> None:
1442
+ """Propagate the pre-instruction state to each active exception handler."""
1443
+ for handler_id, catch_type in handler_edges:
1444
+ if catch_type is not None:
1445
+ handler_stack = (VObject(catch_type),)
1446
+ else:
1447
+ handler_stack = (_OBJECT_THROWABLE,)
1448
+ handler_state = FrameState(handler_stack, state.locals)
1449
+ _propagate(handler_id, handler_state, entry_states, worklist, in_worklist, resolver)
1450
+
1451
+
1452
+ # ===================================================================
1453
+ # Per-instruction simulation
1454
+ # ===================================================================
1455
+
1456
+
1457
+ # VarInsn canonical opcode → type category for loads/stores.
1458
+ _LOAD_TYPE_MAP: dict[InsnInfoType, VType] = {
1459
+ _T.ILOAD: _INTEGER,
1460
+ _T.LLOAD: _LONG,
1461
+ _T.FLOAD: _FLOAT,
1462
+ _T.DLOAD: _DOUBLE,
1463
+ _T.ALOAD: _NULL, # placeholder — actual type comes from the local
1464
+ }
1465
+
1466
+ _STORE_OPCODES: frozenset[InsnInfoType] = frozenset(
1467
+ {
1468
+ _T.ISTORE,
1469
+ _T.LSTORE,
1470
+ _T.FSTORE,
1471
+ _T.DSTORE,
1472
+ _T.ASTORE,
1473
+ }
1474
+ )
1475
+
1476
+
1477
+ def _simulate_insn(
1478
+ insn: InsnInfo,
1479
+ state: FrameState,
1480
+ code: CodeModel,
1481
+ class_name: str,
1482
+ ) -> FrameState:
1483
+ """Apply the effect of one instruction to the frame state."""
1484
+
1485
+ # --- VarInsn (symbolic load/store) ---
1486
+ if isinstance(insn, VarInsn):
1487
+ return _simulate_var_insn(insn, state)
1488
+
1489
+ # --- IIncInsn ---
1490
+ if isinstance(insn, IIncInsn):
1491
+ # No stack change; just verify the local is integer-typed.
1492
+ return state
1493
+
1494
+ # --- FieldInsn ---
1495
+ if isinstance(insn, FieldInsn):
1496
+ return _simulate_field_insn(insn, state)
1497
+
1498
+ # --- MethodInsn ---
1499
+ if isinstance(insn, MethodInsn):
1500
+ return _simulate_method_insn(insn, state, class_name)
1501
+
1502
+ # --- InterfaceMethodInsn ---
1503
+ if isinstance(insn, InterfaceMethodInsn):
1504
+ return _simulate_interface_method_insn(insn, state)
1505
+
1506
+ # --- InvokeDynamicInsn ---
1507
+ if isinstance(insn, InvokeDynamicInsn):
1508
+ return _simulate_invokedynamic_insn(insn, state)
1509
+
1510
+ # --- TypeInsn ---
1511
+ if isinstance(insn, TypeInsn):
1512
+ return _simulate_type_insn(insn, state, code)
1513
+
1514
+ # --- LdcInsn ---
1515
+ if isinstance(insn, LdcInsn):
1516
+ return _simulate_ldc_insn(insn, state)
1517
+
1518
+ # --- MultiANewArrayInsn ---
1519
+ if isinstance(insn, MultiANewArrayInsn):
1520
+ state, _ = state.pop(insn.dimensions)
1521
+ return state.push(VObject(insn.class_name))
1522
+
1523
+ # --- BranchInsn (conditional branches pop operands, GOTO does not) ---
1524
+ if isinstance(insn, BranchInsn):
1525
+ return _simulate_branch_insn(insn, state)
1526
+
1527
+ # --- Switch ---
1528
+ if isinstance(insn, LookupSwitchInsn | TableSwitchInsn):
1529
+ state, _ = state.pop(1) # pop the key
1530
+ return state
1531
+
1532
+ # --- All other InsnInfo (raw opcodes with static effects) ---
1533
+ return _simulate_raw_insn(insn, state)
1534
+
1535
+
1536
+ def _simulate_var_insn(insn: VarInsn, state: FrameState) -> FrameState:
1537
+ """Simulate a VarInsn (load/store)."""
1538
+ opcode = insn.type
1539
+ slot = insn.slot
1540
+
1541
+ if opcode in _STORE_OPCODES:
1542
+ # Store: pop from stack, write to local.
1543
+ if opcode == _T.LSTORE:
1544
+ state, _ = state.pop(2)
1545
+ return state.set_local(slot, _LONG)
1546
+ elif opcode == _T.DSTORE:
1547
+ state, _ = state.pop(2)
1548
+ return state.set_local(slot, _DOUBLE)
1549
+ else:
1550
+ state, (val,) = state.pop(1)
1551
+ return state.set_local(slot, val)
1552
+
1553
+ if opcode == _T.RET:
1554
+ return state
1555
+
1556
+ # Load: read from local, push to stack.
1557
+ if opcode == _T.ALOAD:
1558
+ val = state.get_local(slot)
1559
+ return state.push(val)
1560
+
1561
+ # For typed loads, we push the type from the load opcode.
1562
+ vt = _LOAD_TYPE_MAP.get(opcode, _INTEGER)
1563
+ state.get_local(slot)
1564
+ return state.push(vt)
1565
+
1566
+
1567
+ def _simulate_field_insn(insn: FieldInsn, state: FrameState) -> FrameState:
1568
+ """Simulate a field access instruction."""
1569
+ field_type = vtype_from_field_descriptor_str(insn.descriptor)
1570
+ field_slots = 2 if is_category2(field_type) else 1
1571
+
1572
+ if insn.type == _T.GETFIELD:
1573
+ state, _ = state.pop(1) # pop objectref
1574
+ return state.push(field_type)
1575
+ elif insn.type == _T.PUTFIELD:
1576
+ state, _ = state.pop(field_slots) # pop value
1577
+ state, _ = state.pop(1) # pop objectref
1578
+ return state
1579
+ elif insn.type == _T.GETSTATIC:
1580
+ return state.push(field_type)
1581
+ else: # PUTSTATIC
1582
+ state, _ = state.pop(field_slots)
1583
+ return state
1584
+
1585
+
1586
+ def _simulate_method_insn(
1587
+ insn: MethodInsn,
1588
+ state: FrameState,
1589
+ class_name: str,
1590
+ ) -> FrameState:
1591
+ """Simulate INVOKEVIRTUAL, INVOKESPECIAL, INVOKESTATIC."""
1592
+ md = parse_method_descriptor(insn.descriptor)
1593
+ # Pop arguments (right to left in slots).
1594
+ arg_slots = sum(2 if is_category2(vtype_from_descriptor(p)) else 1 for p in md.parameter_types)
1595
+ state, _ = state.pop(arg_slots)
1596
+
1597
+ # Pop objectref for non-static methods.
1598
+ if insn.type != _T.INVOKESTATIC:
1599
+ state, (receiver,) = state.pop(1)
1600
+ # Successful constructor calls initialize either ``this`` or the
1601
+ # freshly allocated object referenced by ``receiver``.
1602
+ if insn.name == "<init>" and isinstance(receiver, VUninitializedThis):
1603
+ state = _replace_uninitialized(state, receiver, VObject(class_name))
1604
+ elif insn.name == "<init>" and isinstance(receiver, VUninitialized):
1605
+ state = _replace_uninitialized(state, receiver, VObject(insn.owner))
1606
+
1607
+ # Push return value.
1608
+ if not isinstance(md.return_type, VoidType):
1609
+ ret_type = vtype_from_descriptor(md.return_type)
1610
+ state = state.push(ret_type)
1611
+
1612
+ return state
1613
+
1614
+
1615
+ def _simulate_interface_method_insn(insn: InterfaceMethodInsn, state: FrameState) -> FrameState:
1616
+ """Simulate INVOKEINTERFACE."""
1617
+ md = parse_method_descriptor(insn.descriptor)
1618
+ arg_slots = sum(2 if is_category2(vtype_from_descriptor(p)) else 1 for p in md.parameter_types)
1619
+ state, _ = state.pop(arg_slots)
1620
+ state, _ = state.pop(1) # pop objectref
1621
+
1622
+ if not isinstance(md.return_type, VoidType):
1623
+ ret_type = vtype_from_descriptor(md.return_type)
1624
+ state = state.push(ret_type)
1625
+ return state
1626
+
1627
+
1628
+ def _simulate_invokedynamic_insn(insn: InvokeDynamicInsn, state: FrameState) -> FrameState:
1629
+ """Simulate INVOKEDYNAMIC."""
1630
+ md = parse_method_descriptor(insn.descriptor)
1631
+ arg_slots = sum(2 if is_category2(vtype_from_descriptor(p)) else 1 for p in md.parameter_types)
1632
+ state, _ = state.pop(arg_slots)
1633
+
1634
+ if not isinstance(md.return_type, VoidType):
1635
+ ret_type = vtype_from_descriptor(md.return_type)
1636
+ state = state.push(ret_type)
1637
+ return state
1638
+
1639
+
1640
+ def _simulate_type_insn(
1641
+ insn: TypeInsn,
1642
+ state: FrameState,
1643
+ code: CodeModel,
1644
+ ) -> FrameState:
1645
+ """Simulate NEW, CHECKCAST, INSTANCEOF, ANEWARRAY."""
1646
+ if insn.type == _T.NEW:
1647
+ new_label = _find_label_for_insn(code, insn)
1648
+ if new_label is None:
1649
+ raise AnalysisError("NEW instruction is missing an analysis label")
1650
+ return state.push(VUninitialized(new_label))
1651
+ elif insn.type == _T.CHECKCAST:
1652
+ state, _ = state.pop(1)
1653
+ return state.push(VObject(insn.class_name))
1654
+ elif insn.type == _T.INSTANCEOF:
1655
+ state, _ = state.pop(1)
1656
+ return state.push(_INTEGER)
1657
+ else: # ANEWARRAY
1658
+ state, _ = state.pop(1) # pop count
1659
+ # ANEWARRAY creates an array of reference type.
1660
+ if insn.class_name.startswith("["):
1661
+ return state.push(VObject("[" + insn.class_name))
1662
+ else:
1663
+ return state.push(VObject("[L" + insn.class_name + ";"))
1664
+
1665
+
1666
+ def _simulate_ldc_insn(insn: LdcInsn, state: FrameState) -> FrameState:
1667
+ """Simulate LDC/LDC_W/LDC2_W."""
1668
+ val = insn.value
1669
+ if isinstance(val, LdcInt):
1670
+ return state.push(_INTEGER)
1671
+ elif isinstance(val, LdcFloat):
1672
+ return state.push(_FLOAT)
1673
+ elif isinstance(val, LdcLong):
1674
+ return state.push(_LONG)
1675
+ elif isinstance(val, LdcDouble):
1676
+ return state.push(_DOUBLE)
1677
+ elif isinstance(val, LdcString):
1678
+ return state.push(_OBJECT_STRING)
1679
+ elif isinstance(val, LdcClass):
1680
+ return state.push(_OBJECT_CLASS)
1681
+ elif isinstance(val, LdcMethodType):
1682
+ return state.push(_OBJECT_METHOD_TYPE)
1683
+ elif isinstance(val, LdcMethodHandle):
1684
+ return state.push(_OBJECT_METHOD_HANDLE)
1685
+ else:
1686
+ # LdcDynamic — type determined by descriptor.
1687
+ vt = vtype_from_field_descriptor_str(val.descriptor)
1688
+ return state.push(vt)
1689
+
1690
+
1691
+ def _simulate_branch_insn(insn: BranchInsn, state: FrameState) -> FrameState:
1692
+ """Simulate the stack effect of a branch instruction (pop condition operands)."""
1693
+ effect = OPCODE_EFFECTS.get(insn.type)
1694
+ if effect is not None and effect.pops > 0:
1695
+ state, _ = state.pop(effect.pops)
1696
+ # JSR/JSR_W pushes a return address onto the stack.
1697
+ if insn.type in {_T.JSR, _T.JSR_W}:
1698
+ return state.push(_INTEGER)
1699
+ return state
1700
+
1701
+
1702
+ def _simulate_raw_insn(insn: InsnInfo, state: FrameState) -> FrameState:
1703
+ """Simulate a raw (non-symbolic) instruction using the opcode effect table."""
1704
+ opcode = insn.type
1705
+
1706
+ # --- Stack manipulation (requires type-aware handling) ---
1707
+ if opcode == _T.DUP:
1708
+ val = state.peek(0)
1709
+ return FrameState(state.stack + (val,), state.locals)
1710
+ elif opcode == _T.DUP_X1:
1711
+ v1 = state.peek(0)
1712
+ v2 = state.peek(1)
1713
+ stack = state.stack[:-2] + (v1, v2, v1)
1714
+ return FrameState(stack, state.locals)
1715
+ elif opcode == _T.DUP_X2:
1716
+ v1 = state.peek(0)
1717
+ v2 = state.peek(1)
1718
+ v3 = state.peek(2)
1719
+ stack = state.stack[:-3] + (v1, v3, v2, v1)
1720
+ return FrameState(stack, state.locals)
1721
+ elif opcode == _T.DUP2:
1722
+ v1 = state.peek(0)
1723
+ v2 = state.peek(1)
1724
+ return FrameState(state.stack + (v2, v1), state.locals)
1725
+ elif opcode == _T.DUP2_X1:
1726
+ v1 = state.peek(0)
1727
+ v2 = state.peek(1)
1728
+ v3 = state.peek(2)
1729
+ stack = state.stack[:-3] + (v2, v1, v3, v2, v1)
1730
+ return FrameState(stack, state.locals)
1731
+ elif opcode == _T.DUP2_X2:
1732
+ v1 = state.peek(0)
1733
+ v2 = state.peek(1)
1734
+ v3 = state.peek(2)
1735
+ v4 = state.peek(3)
1736
+ stack = state.stack[:-4] + (v2, v1, v4, v3, v2, v1)
1737
+ return FrameState(stack, state.locals)
1738
+ elif opcode == _T.SWAP:
1739
+ v1 = state.peek(0)
1740
+ v2 = state.peek(1)
1741
+ stack = state.stack[:-2] + (v1, v2)
1742
+ return FrameState(stack, state.locals)
1743
+ elif opcode == _T.POP:
1744
+ state, _ = state.pop(1)
1745
+ return state
1746
+ elif opcode == _T.POP2:
1747
+ state, _ = state.pop(2)
1748
+ return state
1749
+
1750
+ # --- Constants ---
1751
+ if opcode == _T.ACONST_NULL:
1752
+ return state.push(_NULL)
1753
+ if opcode in {_T.ICONST_M1, _T.ICONST_0, _T.ICONST_1, _T.ICONST_2, _T.ICONST_3, _T.ICONST_4, _T.ICONST_5}:
1754
+ return state.push(_INTEGER)
1755
+ if opcode in {_T.LCONST_0, _T.LCONST_1}:
1756
+ return state.push(_LONG)
1757
+ if opcode in {_T.FCONST_0, _T.FCONST_1, _T.FCONST_2}:
1758
+ return state.push(_FLOAT)
1759
+ if opcode in {_T.DCONST_0, _T.DCONST_1}:
1760
+ return state.push(_DOUBLE)
1761
+ if opcode == _T.BIPUSH:
1762
+ return state.push(_INTEGER)
1763
+ if opcode == _T.SIPUSH:
1764
+ return state.push(_INTEGER)
1765
+
1766
+ # --- Arithmetic (result type by opcode prefix) ---
1767
+ if opcode in {_T.IADD, _T.ISUB, _T.IMUL, _T.IDIV, _T.IREM, _T.ISHL, _T.ISHR, _T.IUSHR, _T.IAND, _T.IOR, _T.IXOR}:
1768
+ state, _ = state.pop(2)
1769
+ return state.push(_INTEGER)
1770
+ if opcode == _T.INEG:
1771
+ state, _ = state.pop(1)
1772
+ return state.push(_INTEGER)
1773
+
1774
+ if opcode in {_T.LADD, _T.LSUB, _T.LMUL, _T.LDIV, _T.LREM, _T.LAND, _T.LOR, _T.LXOR}:
1775
+ state, _ = state.pop(4)
1776
+ return state.push(_LONG)
1777
+ if opcode in {_T.LSHL, _T.LSHR, _T.LUSHR}:
1778
+ state, _ = state.pop(3) # long + int shift amount
1779
+ return state.push(_LONG)
1780
+ if opcode == _T.LNEG:
1781
+ state, _ = state.pop(2)
1782
+ return state.push(_LONG)
1783
+
1784
+ if opcode in {_T.FADD, _T.FSUB, _T.FMUL, _T.FDIV, _T.FREM}:
1785
+ state, _ = state.pop(2)
1786
+ return state.push(_FLOAT)
1787
+ if opcode == _T.FNEG:
1788
+ state, _ = state.pop(1)
1789
+ return state.push(_FLOAT)
1790
+
1791
+ if opcode in {_T.DADD, _T.DSUB, _T.DMUL, _T.DDIV, _T.DREM}:
1792
+ state, _ = state.pop(4)
1793
+ return state.push(_DOUBLE)
1794
+ if opcode == _T.DNEG:
1795
+ state, _ = state.pop(2)
1796
+ return state.push(_DOUBLE)
1797
+
1798
+ # --- Conversions ---
1799
+ if opcode == _T.I2L:
1800
+ state, _ = state.pop(1)
1801
+
1802
+ return state.push(_LONG)
1803
+ if opcode == _T.I2F:
1804
+ state, _ = state.pop(1)
1805
+
1806
+ return state.push(_FLOAT)
1807
+ if opcode == _T.I2D:
1808
+ state, _ = state.pop(1)
1809
+
1810
+ return state.push(_DOUBLE)
1811
+ if opcode == _T.L2I:
1812
+ state, _ = state.pop(2)
1813
+
1814
+ return state.push(_INTEGER)
1815
+ if opcode == _T.L2F:
1816
+ state, _ = state.pop(2)
1817
+
1818
+ return state.push(_FLOAT)
1819
+ if opcode == _T.L2D:
1820
+ state, _ = state.pop(2)
1821
+
1822
+ return state.push(_DOUBLE)
1823
+ if opcode == _T.F2I:
1824
+ state, _ = state.pop(1)
1825
+
1826
+ return state.push(_INTEGER)
1827
+ if opcode == _T.F2L:
1828
+ state, _ = state.pop(1)
1829
+
1830
+ return state.push(_LONG)
1831
+ if opcode == _T.F2D:
1832
+ state, _ = state.pop(1)
1833
+
1834
+ return state.push(_DOUBLE)
1835
+ if opcode == _T.D2I:
1836
+ state, _ = state.pop(2)
1837
+
1838
+ return state.push(_INTEGER)
1839
+ if opcode == _T.D2L:
1840
+ state, _ = state.pop(2)
1841
+
1842
+ return state.push(_LONG)
1843
+ if opcode == _T.D2F:
1844
+ state, _ = state.pop(2)
1845
+
1846
+ return state.push(_FLOAT)
1847
+ if opcode in {_T.I2B, _T.I2C, _T.I2S}:
1848
+ state, _ = state.pop(1)
1849
+
1850
+ return state.push(_INTEGER)
1851
+
1852
+ # --- Comparisons ---
1853
+ if opcode == _T.LCMP:
1854
+ state, _ = state.pop(4)
1855
+
1856
+ return state.push(_INTEGER)
1857
+ if opcode in {_T.FCMPL, _T.FCMPG}:
1858
+ state, _ = state.pop(2)
1859
+
1860
+ return state.push(_INTEGER)
1861
+ if opcode in {_T.DCMPL, _T.DCMPG}:
1862
+ state, _ = state.pop(4)
1863
+
1864
+ return state.push(_INTEGER)
1865
+
1866
+ # --- Array loads ---
1867
+ if opcode in {_T.IALOAD, _T.BALOAD, _T.CALOAD, _T.SALOAD}:
1868
+ state, _ = state.pop(2)
1869
+
1870
+ return state.push(_INTEGER)
1871
+ if opcode == _T.LALOAD:
1872
+ state, _ = state.pop(2)
1873
+
1874
+ return state.push(_LONG)
1875
+ if opcode == _T.FALOAD:
1876
+ state, _ = state.pop(2)
1877
+
1878
+ return state.push(_FLOAT)
1879
+ if opcode == _T.DALOAD:
1880
+ state, _ = state.pop(2)
1881
+
1882
+ return state.push(_DOUBLE)
1883
+ if opcode == _T.AALOAD:
1884
+ state, (_, arrayref) = state.pop(2)
1885
+ # Try to determine component type from array reference.
1886
+ if isinstance(arrayref, VObject) and arrayref.class_name.startswith("["):
1887
+ component = arrayref.class_name[1:]
1888
+ if component.startswith("L") and component.endswith(";"):
1889
+ return state.push(VObject(component[1:-1]))
1890
+ elif component.startswith("["):
1891
+ return state.push(VObject(component))
1892
+ # Primitive component (e.g. "[I") — invalid bytecode for AALOAD
1893
+ # (should use IALOAD/FALOAD/etc.), fall through to Object default.
1894
+ return state.push(_OBJECT_OBJECT)
1895
+
1896
+ # --- Array stores ---
1897
+ if opcode in {_T.IASTORE, _T.BASTORE, _T.CASTORE, _T.SASTORE, _T.FASTORE, _T.AASTORE}:
1898
+ state, _ = state.pop(3)
1899
+
1900
+ return state
1901
+ if opcode in {_T.LASTORE, _T.DASTORE}:
1902
+ state, _ = state.pop(4)
1903
+
1904
+ return state
1905
+
1906
+ # --- Returns ---
1907
+ if opcode in {_T.IRETURN, _T.FRETURN, _T.ARETURN}:
1908
+ state, _ = state.pop(1)
1909
+
1910
+ return state
1911
+ if opcode in {_T.LRETURN, _T.DRETURN}:
1912
+ state, _ = state.pop(2)
1913
+
1914
+ return state
1915
+ if opcode == _T.RETURN:
1916
+ return state
1917
+
1918
+ # --- ATHROW ---
1919
+ if opcode == _T.ATHROW:
1920
+ state, _ = state.pop(1)
1921
+
1922
+ return state
1923
+
1924
+ # --- Monitor ---
1925
+ if opcode in {_T.MONITORENTER, _T.MONITOREXIT}:
1926
+ state, _ = state.pop(1)
1927
+
1928
+ return state
1929
+
1930
+ # --- Array length ---
1931
+ if opcode == _T.ARRAYLENGTH:
1932
+ state, _ = state.pop(1)
1933
+
1934
+ return state.push(_INTEGER)
1935
+
1936
+ # --- NEWARRAY ---
1937
+ if opcode == _T.NEWARRAY:
1938
+ state, _ = state.pop(1) # pop count
1939
+ from .instructions import NewArray as NewArrayInsn
1940
+
1941
+ if isinstance(insn, NewArrayInsn):
1942
+ array_desc = _NEWARRAY_TYPE_MAP.get(insn.atype, "[I")
1943
+ return state.push(VObject(array_desc))
1944
+ return state.push(VObject("[I"))
1945
+
1946
+ # --- NOP / IINC / WIDE ---
1947
+ if opcode in {_T.NOP, _T.IINC, _T.IINCW, _T.WIDE}:
1948
+ return state
1949
+
1950
+ # --- JSR (pushes return address) ---
1951
+ if opcode in {_T.JSR, _T.JSR_W}:
1952
+ return state.push(_INTEGER) # return address (treated as integer for simplicity)
1953
+
1954
+ # --- RET ---
1955
+ if opcode in {_T.RET, _T.RETW}:
1956
+ return state
1957
+
1958
+ # --- Raw load/store opcodes (when not lifted to VarInsn) ---
1959
+ # These shouldn't appear in editing model code, but handle gracefully.
1960
+ if opcode in {_T.ILOAD, _T.ILOAD_0, _T.ILOAD_1, _T.ILOAD_2, _T.ILOAD_3, _T.ILOADW}:
1961
+ return state.push(_INTEGER)
1962
+ if opcode in {_T.LLOAD, _T.LLOAD_0, _T.LLOAD_1, _T.LLOAD_2, _T.LLOAD_3, _T.LLOADW}:
1963
+ return state.push(_LONG)
1964
+ if opcode in {_T.FLOAD, _T.FLOAD_0, _T.FLOAD_1, _T.FLOAD_2, _T.FLOAD_3, _T.FLOADW}:
1965
+ return state.push(_FLOAT)
1966
+ if opcode in {_T.DLOAD, _T.DLOAD_0, _T.DLOAD_1, _T.DLOAD_2, _T.DLOAD_3, _T.DLOADW}:
1967
+ return state.push(_DOUBLE)
1968
+ if opcode in {_T.ALOAD, _T.ALOAD_0, _T.ALOAD_1, _T.ALOAD_2, _T.ALOAD_3, _T.ALOADW}:
1969
+ return state.push(_OBJECT_OBJECT)
1970
+ if opcode in {_T.ISTORE, _T.ISTORE_0, _T.ISTORE_1, _T.ISTORE_2, _T.ISTORE_3, _T.ISTOREW}:
1971
+ state, _ = state.pop(1)
1972
+
1973
+ return state
1974
+ if opcode in {_T.LSTORE, _T.LSTORE_0, _T.LSTORE_1, _T.LSTORE_2, _T.LSTORE_3, _T.LSTOREW}:
1975
+ state, _ = state.pop(2)
1976
+
1977
+ return state
1978
+ if opcode in {_T.FSTORE, _T.FSTORE_0, _T.FSTORE_1, _T.FSTORE_2, _T.FSTORE_3, _T.FSTOREW}:
1979
+ state, _ = state.pop(1)
1980
+
1981
+ return state
1982
+ if opcode in {_T.DSTORE, _T.DSTORE_0, _T.DSTORE_1, _T.DSTORE_2, _T.DSTORE_3, _T.DSTOREW}:
1983
+ state, _ = state.pop(2)
1984
+
1985
+ return state
1986
+ if opcode in {_T.ASTORE, _T.ASTORE_0, _T.ASTORE_1, _T.ASTORE_2, _T.ASTORE_3, _T.ASTOREW}:
1987
+ state, _ = state.pop(1)
1988
+
1989
+ return state
1990
+
1991
+ # Unrecognized opcode — conservative no-op.
1992
+ return state
1993
+
1994
+
1995
+ def _replace_uninitialized(
1996
+ state: FrameState,
1997
+ uninit: VUninitialized | VUninitializedThis,
1998
+ replacement: VObject,
1999
+ ) -> FrameState:
2000
+ """Replace all occurrences of *uninit* in the frame with *replacement*.
2001
+
2002
+ After a successful ``<init>`` call, all references to the uninitialized
2003
+ object (on the stack and in locals) must be replaced with the initialized
2004
+ type (JVM spec §4.10.1.4).
2005
+ """
2006
+ new_stack = tuple(replacement if v == uninit else v for v in state.stack)
2007
+ new_locals = tuple(replacement if v == uninit else v for v in state.locals)
2008
+ return FrameState(new_stack, new_locals)
2009
+
2010
+
2011
+ def _prepare_analysis_code(code: CodeModel) -> CodeModel:
2012
+ """Insert transient labels before unlabeled ``NEW`` instructions."""
2013
+ prepared_items: list[CodeItem] = []
2014
+ inserted = False
2015
+ prev_was_label = False
2016
+
2017
+ for item in code.instructions:
2018
+ if isinstance(item, Label):
2019
+ prepared_items.append(item)
2020
+ prev_was_label = True
2021
+ continue
2022
+
2023
+ if isinstance(item, TypeInsn) and item.type == _T.NEW and not prev_was_label:
2024
+ prepared_items.append(Label())
2025
+ inserted = True
2026
+ prepared_items.append(item)
2027
+ prev_was_label = False
2028
+
2029
+ if not inserted:
2030
+ return code
2031
+
2032
+ return type(code)(
2033
+ max_stack=code.max_stack,
2034
+ max_locals=code.max_locals,
2035
+ instructions=prepared_items,
2036
+ exception_handlers=code.exception_handlers,
2037
+ line_numbers=code.line_numbers,
2038
+ local_variables=code.local_variables,
2039
+ local_variable_types=code.local_variable_types,
2040
+ attributes=code.attributes,
2041
+ )
2042
+
2043
+
2044
+ def _find_label_for_insn(code: CodeModel, target_insn: InsnInfo) -> Label | None:
2045
+ """Find the Label immediately preceding *target_insn* in the code.
2046
+
2047
+ Returns ``None`` if no label precedes the instruction. Simulation calls
2048
+ this on the analysis-prepared instruction stream, where unlabeled
2049
+ ``NEW`` instructions have already been given a synthetic label.
2050
+ """
2051
+ prev_label: Label | None = None
2052
+ for item in code.instructions:
2053
+ if item is target_insn:
2054
+ return prev_label
2055
+ if isinstance(item, Label):
2056
+ prev_label = item
2057
+ else:
2058
+ prev_label = None
2059
+ return None
2060
+
2061
+
2062
+ # ===================================================================
2063
+ # VType → VerificationTypeInfo conversion
2064
+ # ===================================================================
2065
+
2066
+
2067
+ def _vtype_to_vti(
2068
+ vtype: VType,
2069
+ cp: ConstantPoolBuilder,
2070
+ label_offsets: dict[Label, int],
2071
+ ) -> VerificationTypeInfo:
2072
+ """Convert a verification type to a raw ``VerificationTypeInfo``."""
2073
+ if isinstance(vtype, VTop):
2074
+ return TopVariableInfo(VerificationType.TOP)
2075
+ if isinstance(vtype, VInteger):
2076
+ return IntegerVariableInfo(VerificationType.INTEGER)
2077
+ if isinstance(vtype, VFloat):
2078
+ return FloatVariableInfo(VerificationType.FLOAT)
2079
+ if isinstance(vtype, VLong):
2080
+ return LongVariableInfo(VerificationType.LONG)
2081
+ if isinstance(vtype, VDouble):
2082
+ return DoubleVariableInfo(VerificationType.DOUBLE)
2083
+ if isinstance(vtype, VNull):
2084
+ return NullVariableInfo(VerificationType.NULL)
2085
+ if isinstance(vtype, VUninitializedThis):
2086
+ return UninitializedThisVariableInfo(VerificationType.UNINITIALIZED_THIS)
2087
+ if isinstance(vtype, VObject):
2088
+ return ObjectVariableInfo(VerificationType.OBJECT, cp.add_class(vtype.class_name))
2089
+ # VUninitialized
2090
+ offset = label_offsets.get(vtype.new_label)
2091
+ if offset is None:
2092
+ raise ValueError(f"missing bytecode offset for uninitialized NEW site {vtype.new_label!r}")
2093
+ return UninitializedVariableInfo(VerificationType.UNINITIALIZED, offset)
2094
+
2095
+
2096
+ def _vtypes_to_vtis(
2097
+ vtypes: tuple[VType, ...],
2098
+ cp: ConstantPoolBuilder,
2099
+ label_offsets: dict[Label, int],
2100
+ ) -> list[VerificationTypeInfo]:
2101
+ """Convert a tuple of verification types to raw ``VerificationTypeInfo`` list."""
2102
+ return [_vtype_to_vti(vt, cp, label_offsets) for vt in vtypes]
2103
+
2104
+
2105
+ def _verification_type_info_size(vti: VerificationTypeInfo) -> int:
2106
+ """Return the serialized size of a ``verification_type_info``."""
2107
+ if isinstance(vti, ObjectVariableInfo | UninitializedVariableInfo):
2108
+ return 3
2109
+ return 1
2110
+
2111
+
2112
+ def _stack_map_frame_size(frame: StackMapFrameInfo) -> int:
2113
+ """Return the serialized size of a ``stack_map_frame``."""
2114
+ if isinstance(frame, SameFrameInfo):
2115
+ return 1
2116
+ if isinstance(frame, SameLocals1StackItemFrameInfo):
2117
+ return 1 + _verification_type_info_size(frame.stack)
2118
+ if isinstance(frame, SameLocals1StackItemFrameExtendedInfo):
2119
+ return 3 + _verification_type_info_size(frame.stack)
2120
+ if isinstance(frame, ChopFrameInfo | SameFrameExtendedInfo):
2121
+ return 3
2122
+ if isinstance(frame, AppendFrameInfo):
2123
+ return 3 + sum(_verification_type_info_size(vti) for vti in frame.locals)
2124
+ if isinstance(frame, FullFrameInfo):
2125
+ return (
2126
+ 7
2127
+ + sum(_verification_type_info_size(vti) for vti in frame.locals)
2128
+ + sum(_verification_type_info_size(vti) for vti in frame.stack)
2129
+ )
2130
+ raise TypeError(f"unsupported stack map frame type: {type(frame).__name__}")
2131
+
2132
+
2133
+ def _stack_map_table_attribute_length(frames: Sequence[StackMapFrameInfo]) -> int:
2134
+ """Return the serialized ``attribute_length`` for a ``StackMapTable``."""
2135
+ return 2 + sum(_stack_map_frame_size(frame) for frame in frames)
2136
+
2137
+
2138
+ # ===================================================================
2139
+ # Compact frame encoding selection
2140
+ # ===================================================================
2141
+
2142
+
2143
+ def _select_frame(
2144
+ offset_delta: int,
2145
+ prev_locals: Sequence[VerificationTypeInfo],
2146
+ curr_locals: Sequence[VerificationTypeInfo],
2147
+ curr_stack: Sequence[VerificationTypeInfo],
2148
+ ) -> StackMapFrameInfo:
2149
+ """Select the most compact StackMapTable frame encoding.
2150
+
2151
+ Follows JVM spec §4.7.4 frame type selection rules.
2152
+ """
2153
+ locals_same = prev_locals == curr_locals
2154
+
2155
+ if locals_same and not curr_stack:
2156
+ # same_frame or same_frame_extended
2157
+ if offset_delta <= 63:
2158
+ return SameFrameInfo(frame_type=offset_delta)
2159
+ return SameFrameExtendedInfo(frame_type=251, offset_delta=offset_delta)
2160
+
2161
+ if locals_same and len(curr_stack) == 1:
2162
+ # same_locals_1_stack_item or extended variant
2163
+ if offset_delta <= 63:
2164
+ return SameLocals1StackItemFrameInfo(
2165
+ frame_type=64 + offset_delta,
2166
+ stack=curr_stack[0],
2167
+ )
2168
+ return SameLocals1StackItemFrameExtendedInfo(
2169
+ frame_type=247,
2170
+ offset_delta=offset_delta,
2171
+ stack=curr_stack[0],
2172
+ )
2173
+
2174
+ if not curr_stack:
2175
+ diff = len(curr_locals) - len(prev_locals)
2176
+
2177
+ # chop_frame: 1–3 fewer locals
2178
+ if -3 <= diff < 0 and curr_locals == prev_locals[: len(curr_locals)]:
2179
+ return ChopFrameInfo(
2180
+ frame_type=251 + diff, # 248, 249, or 250
2181
+ offset_delta=offset_delta,
2182
+ )
2183
+
2184
+ # append_frame: 1–3 more locals
2185
+ if 0 < diff <= 3 and curr_locals[: len(prev_locals)] == prev_locals:
2186
+ return AppendFrameInfo(
2187
+ frame_type=251 + diff, # 252, 253, or 254
2188
+ offset_delta=offset_delta,
2189
+ locals=list(curr_locals[len(prev_locals) :]),
2190
+ )
2191
+
2192
+ # full_frame
2193
+ return FullFrameInfo(
2194
+ frame_type=255,
2195
+ offset_delta=offset_delta,
2196
+ number_of_locals=len(curr_locals),
2197
+ locals=list(curr_locals),
2198
+ number_of_stack_items=len(curr_stack),
2199
+ stack=list(curr_stack),
2200
+ )
2201
+
2202
+
2203
+ # ===================================================================
2204
+ # compute_maxs / compute_frames — public API
2205
+ # ===================================================================
2206
+
2207
+
2208
+ def compute_maxs(
2209
+ code: CodeModel,
2210
+ method: MethodModel,
2211
+ class_name: str,
2212
+ resolver: ClassResolver | None = None,
2213
+ ) -> tuple[int, int]:
2214
+ """Recompute ``max_stack`` and ``max_locals`` for a method's code.
2215
+
2216
+ Builds a control-flow graph, runs forward dataflow simulation, and
2217
+ returns ``(max_stack, max_locals)``.
2218
+
2219
+ Args:
2220
+ code: The code model to analyze.
2221
+ method: The method model (used for initial frame).
2222
+ class_name: JVM internal name of the enclosing class.
2223
+ resolver: Optional class hierarchy resolver for precise type merging.
2224
+
2225
+ Returns:
2226
+ A ``(max_stack, max_locals)`` tuple.
2227
+ """
2228
+ cfg = build_cfg(code)
2229
+ result = simulate(cfg, code, method, class_name, resolver)
2230
+ return result.max_stack, result.max_locals
2231
+
2232
+
2233
+ @dataclass(frozen=True, slots=True)
2234
+ class FrameComputationResult:
2235
+ """Results of frame computation: limits and StackMapTable.
2236
+
2237
+ Attributes:
2238
+ max_stack: Recomputed maximum operand stack depth.
2239
+ max_locals: Recomputed maximum local variable slot count.
2240
+ stack_map_table: Generated ``StackMapTable`` attribute, or ``None``
2241
+ when no frames are required (e.g. a linear method with no
2242
+ branches or exception handlers).
2243
+ """
2244
+
2245
+ max_stack: int
2246
+ max_locals: int
2247
+ stack_map_table: StackMapTableAttr | None
2248
+
2249
+
2250
+ def compute_frames(
2251
+ code: CodeModel,
2252
+ method: MethodModel,
2253
+ class_name: str,
2254
+ cp: ConstantPoolBuilder,
2255
+ label_offsets: dict[Label, int],
2256
+ resolver: ClassResolver | None = None,
2257
+ ) -> FrameComputationResult:
2258
+ """Recompute ``max_stack``, ``max_locals``, and ``StackMapTable`` frames.
2259
+
2260
+ Builds a CFG, simulates stack/local states, then generates compact
2261
+ StackMapTable entries at every branch/exception-handler target
2262
+ (JVM spec §4.7.4).
2263
+
2264
+ Args:
2265
+ code: The ``CodeModel`` whose frames to compute.
2266
+ method: The ``MethodModel`` owning this code (used for initial frame).
2267
+ class_name: Internal name of the enclosing class
2268
+ (e.g. ``"com/example/Foo"``).
2269
+ cp: ``ConstantPoolBuilder`` for allocating ``CONSTANT_Class`` entries
2270
+ referenced by ``ObjectVariableInfo``.
2271
+ label_offsets: Mapping from ``Label`` to resolved bytecode offset,
2272
+ as produced by ``resolve_labels()``.
2273
+ resolver: Optional class hierarchy resolver for precise type merging.
2274
+
2275
+ Returns:
2276
+ A ``FrameComputationResult`` with ``max_stack``, ``max_locals``, and
2277
+ an optional ``StackMapTableAttr`` (``None`` if no frames are needed).
2278
+ """
2279
+ analysis_code = _prepare_analysis_code(code)
2280
+ analysis_label_offsets = label_offsets
2281
+ if analysis_code is not code:
2282
+ from .labels import resolve_labels
2283
+
2284
+ analysis_label_offsets = resolve_labels(list(analysis_code.instructions), cp).label_offsets
2285
+
2286
+ cfg = build_cfg(code)
2287
+ sim = simulate(cfg, analysis_code, method, class_name, resolver)
2288
+
2289
+ if not cfg.blocks:
2290
+ return FrameComputationResult(
2291
+ max_stack=sim.max_stack,
2292
+ max_locals=sim.max_locals,
2293
+ stack_map_table=None,
2294
+ )
2295
+
2296
+ # Identify blocks that need frames: every block except the entry block
2297
+ # that has an entry state (i.e., is reachable).
2298
+ entry_block_id = cfg.entry.id
2299
+ frame_targets: list[tuple[int, int]] = [] # (bytecode_offset, block_id)
2300
+ for block in cfg.blocks:
2301
+ if block.id == entry_block_id:
2302
+ continue
2303
+ if block.id not in sim.entry_states:
2304
+ continue
2305
+ if block.label is None:
2306
+ continue
2307
+ offset = label_offsets.get(block.label)
2308
+ if offset is None:
2309
+ continue
2310
+ frame_targets.append((offset, block.id))
2311
+
2312
+ frame_targets.sort(key=lambda t: t[0])
2313
+
2314
+ if not frame_targets:
2315
+ return FrameComputationResult(
2316
+ max_stack=sim.max_stack,
2317
+ max_locals=sim.max_locals,
2318
+ stack_map_table=None,
2319
+ )
2320
+
2321
+ # Build the initial frame locals as the "previous" frame for delta computation.
2322
+ entry_frame = initial_frame(method, class_name)
2323
+ prev_locals = _vtypes_to_vtis(entry_frame.locals, cp, analysis_label_offsets)
2324
+ prev_offset = -1 # offset_delta for the first frame is (offset - 0)
2325
+
2326
+ frames: list[StackMapFrameInfo] = []
2327
+ for offset, block_id in frame_targets:
2328
+ state = sim.entry_states[block_id]
2329
+ curr_locals = _vtypes_to_vtis(state.locals, cp, analysis_label_offsets)
2330
+ curr_stack = _vtypes_to_vtis(state.stack, cp, analysis_label_offsets)
2331
+
2332
+ # offset_delta = offset - prev_offset - 1 for the first frame,
2333
+ # and offset - prev_offset - 1 for subsequent frames.
2334
+ offset_delta = offset - prev_offset - 1
2335
+
2336
+ frame = _select_frame(offset_delta, prev_locals, curr_locals, curr_stack)
2337
+ frames.append(frame)
2338
+
2339
+ prev_locals = curr_locals
2340
+ prev_offset = offset
2341
+
2342
+ stack_map_table = StackMapTableAttr(
2343
+ attribute_name_index=cp.add_utf8("StackMapTable"),
2344
+ attribute_length=_stack_map_table_attribute_length(frames),
2345
+ number_of_entries=len(frames),
2346
+ entries=frames,
2347
+ )
2348
+
2349
+ return FrameComputationResult(
2350
+ max_stack=sim.max_stack,
2351
+ max_locals=sim.max_locals,
2352
+ stack_map_table=stack_map_table,
2353
+ )
2354
+
2355
+
2356
+ # ===================================================================
2357
+ # Public API
2358
+ # ===================================================================
2359
+
2360
+
2361
+ __all__ = [
2362
+ # Errors
2363
+ "AnalysisError",
2364
+ "InvalidLocalError",
2365
+ "StackUnderflowError",
2366
+ "TypeMergeError",
2367
+ # Verification types
2368
+ "VDouble",
2369
+ "VFloat",
2370
+ "VInteger",
2371
+ "VLong",
2372
+ "VNull",
2373
+ "VObject",
2374
+ "VTop",
2375
+ "VType",
2376
+ "VUninitialized",
2377
+ "VUninitializedThis",
2378
+ # VType helpers
2379
+ "is_category2",
2380
+ "is_reference",
2381
+ "merge_vtypes",
2382
+ "vtype_from_descriptor",
2383
+ "vtype_from_field_descriptor_str",
2384
+ # Frame state
2385
+ "FrameState",
2386
+ "initial_frame",
2387
+ # Opcode metadata
2388
+ "OPCODE_EFFECTS",
2389
+ "OpcodeEffect",
2390
+ # CFG
2391
+ "BasicBlock",
2392
+ "ControlFlowGraph",
2393
+ "ExceptionEdge",
2394
+ "build_cfg",
2395
+ # Simulation
2396
+ "SimulationResult",
2397
+ "simulate",
2398
+ # Frame computation
2399
+ "FrameComputationResult",
2400
+ "compute_frames",
2401
+ "compute_maxs",
2402
+ ]