pytecode 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pytecode/labels.py ADDED
@@ -0,0 +1,1041 @@
1
+ """Label-based bytecode instruction editing.
2
+
3
+ Provides symbolic ``Label`` targets and label-aware instruction types
4
+ (``BranchInsn``, ``LookupSwitchInsn``, ``TableSwitchInsn``) so that bytecode
5
+ can be manipulated without tracking raw offsets. ``lower_code`` converts a
6
+ label-based ``CodeModel`` into an offset-based ``CodeAttr`` ready for
7
+ serialisation, and ``resolve_labels`` computes the offset mapping.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import copy
13
+ from dataclasses import dataclass
14
+ from typing import TYPE_CHECKING
15
+
16
+ from .attributes import (
17
+ AttributeInfo,
18
+ CodeAttr,
19
+ ExceptionInfo,
20
+ LineNumberInfo,
21
+ LineNumberTableAttr,
22
+ LocalVariableInfo,
23
+ LocalVariableTableAttr,
24
+ LocalVariableTypeInfo,
25
+ LocalVariableTypeTableAttr,
26
+ )
27
+ from .constant_pool import ClassInfo
28
+ from .constant_pool_builder import ConstantPoolBuilder
29
+ from .debug_info import DebugInfoPolicy, is_code_debug_info_stale, normalize_debug_info_policy
30
+ from .descriptors import parameter_slot_count, parse_method_descriptor
31
+ from .instructions import (
32
+ Branch,
33
+ BranchW,
34
+ ByteValue,
35
+ ConstPoolIndex,
36
+ IInc,
37
+ IIncW,
38
+ InsnInfo,
39
+ InsnInfoType,
40
+ InvokeDynamic,
41
+ InvokeInterface,
42
+ LocalIndex,
43
+ LocalIndexW,
44
+ LookupSwitch,
45
+ MatchOffsetPair,
46
+ MultiANewArray,
47
+ NewArray,
48
+ ShortValue,
49
+ TableSwitch,
50
+ )
51
+ from .operands import (
52
+ _BASE_TO_WIDE,
53
+ _VAR_SHORTCUTS,
54
+ FieldInsn,
55
+ IIncInsn,
56
+ InterfaceMethodInsn,
57
+ InvokeDynamicInsn,
58
+ LdcClass,
59
+ LdcDouble,
60
+ LdcFloat,
61
+ LdcInsn,
62
+ LdcInt,
63
+ LdcLong,
64
+ LdcMethodHandle,
65
+ LdcMethodType,
66
+ LdcString,
67
+ LdcValue,
68
+ MethodInsn,
69
+ MultiANewArrayInsn,
70
+ TypeInsn,
71
+ VarInsn,
72
+ _require_i2,
73
+ _require_u1,
74
+ _require_u2,
75
+ )
76
+
77
+ if TYPE_CHECKING:
78
+ from .hierarchy import ClassResolver
79
+ from .model import CodeModel, MethodModel
80
+
81
+ __all__ = [
82
+ "BranchInsn",
83
+ "CodeItem",
84
+ "ExceptionHandler",
85
+ "Label",
86
+ "LabelResolution",
87
+ "LineNumberEntry",
88
+ "LocalVariableEntry",
89
+ "LocalVariableTypeEntry",
90
+ "LookupSwitchInsn",
91
+ "TableSwitchInsn",
92
+ "lower_code",
93
+ "resolve_catch_type",
94
+ "resolve_labels",
95
+ ]
96
+
97
+ _I2_MIN = -(1 << 15)
98
+ _I2_MAX = (1 << 15) - 1
99
+ _I4_MIN = -(1 << 31)
100
+ _I4_MAX = (1 << 31) - 1
101
+
102
+ _BRANCH_WIDENINGS: dict[InsnInfoType, InsnInfoType] = {
103
+ InsnInfoType.GOTO: InsnInfoType.GOTO_W,
104
+ InsnInfoType.JSR: InsnInfoType.JSR_W,
105
+ }
106
+
107
+ _INVERTED_CONDITIONAL_BRANCHES: dict[InsnInfoType, InsnInfoType] = {
108
+ InsnInfoType.IFEQ: InsnInfoType.IFNE,
109
+ InsnInfoType.IFNE: InsnInfoType.IFEQ,
110
+ InsnInfoType.IFLT: InsnInfoType.IFGE,
111
+ InsnInfoType.IFGE: InsnInfoType.IFLT,
112
+ InsnInfoType.IFGT: InsnInfoType.IFLE,
113
+ InsnInfoType.IFLE: InsnInfoType.IFGT,
114
+ InsnInfoType.IF_ICMPEQ: InsnInfoType.IF_ICMPNE,
115
+ InsnInfoType.IF_ICMPNE: InsnInfoType.IF_ICMPEQ,
116
+ InsnInfoType.IF_ICMPLT: InsnInfoType.IF_ICMPGE,
117
+ InsnInfoType.IF_ICMPGE: InsnInfoType.IF_ICMPLT,
118
+ InsnInfoType.IF_ICMPGT: InsnInfoType.IF_ICMPLE,
119
+ InsnInfoType.IF_ICMPLE: InsnInfoType.IF_ICMPGT,
120
+ InsnInfoType.IF_ACMPEQ: InsnInfoType.IF_ACMPNE,
121
+ InsnInfoType.IF_ACMPNE: InsnInfoType.IF_ACMPEQ,
122
+ InsnInfoType.IFNULL: InsnInfoType.IFNONNULL,
123
+ InsnInfoType.IFNONNULL: InsnInfoType.IFNULL,
124
+ }
125
+
126
+
127
+ @dataclass(eq=False)
128
+ class Label:
129
+ """Identity-based marker for a bytecode position.
130
+
131
+ Labels use identity equality so that distinct instances targeting the same
132
+ logical position remain distinguishable.
133
+
134
+ Attributes:
135
+ name: Optional human-readable name for debugging output.
136
+ """
137
+
138
+ name: str | None = None
139
+
140
+ def __repr__(self) -> str:
141
+ if self.name is not None:
142
+ return f"Label({self.name!r})"
143
+ return f"Label(id=0x{id(self):x})"
144
+
145
+
146
+ type CodeItem = InsnInfo | Label
147
+
148
+
149
+ @dataclass
150
+ class ExceptionHandler:
151
+ """An exception handler entry that uses labels for range and target.
152
+
153
+ Attributes:
154
+ start: Label marking the beginning of the protected region (inclusive).
155
+ end: Label marking the end of the protected region (exclusive).
156
+ handler: Label marking the entry point of the handler code.
157
+ catch_type: Internal name of the caught exception class, or ``None``
158
+ for a catch-all (``finally``) handler.
159
+ """
160
+
161
+ start: Label
162
+ end: Label
163
+ handler: Label
164
+ catch_type: str | None
165
+
166
+
167
+ @dataclass
168
+ class LineNumberEntry:
169
+ """Maps a label position to a source line number.
170
+
171
+ Attributes:
172
+ label: Label marking the bytecode position.
173
+ line_number: Corresponding source-file line number.
174
+ """
175
+
176
+ label: Label
177
+ line_number: int
178
+
179
+
180
+ @dataclass
181
+ class LocalVariableEntry:
182
+ """A local variable debug entry using labels for the live range.
183
+
184
+ Attributes:
185
+ start: Label marking the start of the variable's scope (inclusive).
186
+ end: Label marking the end of the variable's scope (exclusive).
187
+ name: Variable name as it appears in source.
188
+ descriptor: JVM field descriptor of the variable's type.
189
+ slot: Local variable table slot index.
190
+ """
191
+
192
+ start: Label
193
+ end: Label
194
+ name: str
195
+ descriptor: str
196
+ slot: int
197
+
198
+
199
+ @dataclass
200
+ class LocalVariableTypeEntry:
201
+ """A local variable type debug entry using labels for the live range.
202
+
203
+ Similar to ``LocalVariableEntry`` but carries a generic signature instead
204
+ of a plain descriptor.
205
+
206
+ Attributes:
207
+ start: Label marking the start of the variable's scope (inclusive).
208
+ end: Label marking the end of the variable's scope (exclusive).
209
+ name: Variable name as it appears in source.
210
+ signature: Generic signature of the variable's type.
211
+ slot: Local variable table slot index.
212
+ """
213
+
214
+ start: Label
215
+ end: Label
216
+ name: str
217
+ signature: str
218
+ slot: int
219
+
220
+
221
+ @dataclass(init=False)
222
+ class BranchInsn(InsnInfo):
223
+ """A branch instruction that targets a label instead of a raw offset.
224
+
225
+ Supports both narrow (2-byte offset) and wide (4-byte offset) branch
226
+ opcodes. During lowering, narrow branches that overflow are automatically
227
+ widened or inverted as needed.
228
+
229
+ Attributes:
230
+ target: The ``Label`` this branch jumps to.
231
+ """
232
+
233
+ target: Label
234
+
235
+ def __init__(self, insn_type: InsnInfoType, target: Label, bytecode_offset: int = -1) -> None:
236
+ if insn_type.instinfo not in {Branch, BranchW}:
237
+ raise ValueError(f"{insn_type.name} is not a branch opcode")
238
+ super().__init__(insn_type, bytecode_offset)
239
+ self.target = target
240
+
241
+
242
+ @dataclass(init=False)
243
+ class LookupSwitchInsn(InsnInfo):
244
+ """A ``lookupswitch`` instruction that uses labels for jump targets.
245
+
246
+ Attributes:
247
+ default_target: Label for the default branch.
248
+ pairs: Match-value / label pairs for each case.
249
+ """
250
+
251
+ default_target: Label
252
+ pairs: list[tuple[int, Label]]
253
+
254
+ def __init__(
255
+ self,
256
+ default_target: Label,
257
+ pairs: list[tuple[int, Label]],
258
+ bytecode_offset: int = -1,
259
+ ) -> None:
260
+ super().__init__(InsnInfoType.LOOKUPSWITCH, bytecode_offset)
261
+ self.default_target = default_target
262
+ self.pairs = list(pairs)
263
+
264
+
265
+ @dataclass(init=False)
266
+ class TableSwitchInsn(InsnInfo):
267
+ """A ``tableswitch`` instruction that uses labels for jump targets.
268
+
269
+ Attributes:
270
+ default_target: Label for the default branch.
271
+ low: Minimum match value (inclusive).
272
+ high: Maximum match value (inclusive).
273
+ targets: Labels for each case in the ``low..high`` range.
274
+ """
275
+
276
+ default_target: Label
277
+ low: int
278
+ high: int
279
+ targets: list[Label]
280
+
281
+ def __init__(
282
+ self,
283
+ default_target: Label,
284
+ low: int,
285
+ high: int,
286
+ targets: list[Label],
287
+ bytecode_offset: int = -1,
288
+ ) -> None:
289
+ if high < low:
290
+ raise ValueError("tableswitch high must be >= low")
291
+ expected_targets = high - low + 1
292
+ if len(targets) != expected_targets:
293
+ raise ValueError(f"tableswitch range {low}..{high} requires {expected_targets} targets, got {len(targets)}")
294
+ super().__init__(InsnInfoType.TABLESWITCH, bytecode_offset)
295
+ self.default_target = default_target
296
+ self.low = low
297
+ self.high = high
298
+ self.targets = list(targets)
299
+
300
+
301
+ @dataclass
302
+ class LabelResolution:
303
+ """Result of resolving labels to bytecode offsets.
304
+
305
+ Attributes:
306
+ label_offsets: Mapping from each ``Label`` to its resolved bytecode offset.
307
+ instruction_offsets: Bytecode offset of each item in the instruction list.
308
+ total_code_length: Total byte length of the lowered bytecode.
309
+ """
310
+
311
+ label_offsets: dict[Label, int]
312
+ instruction_offsets: list[int]
313
+ total_code_length: int
314
+
315
+
316
+ def _switch_padding(offset: int) -> int:
317
+ return (4 - ((offset + 1) % 4)) % 4
318
+
319
+
320
+ def _fits_i2(value: int) -> bool:
321
+ return _I2_MIN <= value <= _I2_MAX
322
+
323
+
324
+ def _fits_i4(value: int) -> bool:
325
+ return _I4_MIN <= value <= _I4_MAX
326
+
327
+
328
+ def _require_label_offset(label_offsets: dict[Label, int], label: Label, *, context: str) -> int:
329
+ try:
330
+ return label_offsets[label]
331
+ except KeyError as exc:
332
+ raise ValueError(f"{context} refers to a label that is not present in CodeModel.instructions") from exc
333
+
334
+
335
+ def _relative_offset(source_offset: int, label: Label, label_offsets: dict[Label, int], *, context: str) -> int:
336
+ return _require_label_offset(label_offsets, label, context=context) - source_offset
337
+
338
+
339
+ def _attribute_marshaled_size(attribute: AttributeInfo) -> int:
340
+ return 6 + attribute.attribute_length
341
+
342
+
343
+ def _code_attribute_length(
344
+ code_length: int,
345
+ exception_table_length: int,
346
+ attributes: list[AttributeInfo],
347
+ ) -> int:
348
+ nested_size = sum(_attribute_marshaled_size(attribute) for attribute in attributes)
349
+ return 12 + code_length + (8 * exception_table_length) + nested_size
350
+
351
+
352
+ def _refresh_code_attr_metadata(code_attr: CodeAttr) -> None:
353
+ code_attr.attributes_count = len(code_attr.attributes)
354
+ code_attr.attribute_length = _code_attribute_length(
355
+ code_attr.code_length,
356
+ code_attr.exception_table_length,
357
+ code_attr.attributes,
358
+ )
359
+
360
+
361
+ def _clone_code_item(item: CodeItem) -> CodeItem:
362
+ return item if isinstance(item, Label) else copy.copy(item)
363
+
364
+
365
+ def _lifted_debug_attrs(attributes: list[AttributeInfo]) -> list[AttributeInfo]:
366
+ return [
367
+ attribute
368
+ for attribute in attributes
369
+ if not isinstance(
370
+ attribute,
371
+ (LineNumberTableAttr, LocalVariableTableAttr, LocalVariableTypeTableAttr),
372
+ )
373
+ ]
374
+
375
+
376
+ def _ordered_nested_code_attributes(
377
+ code: CodeModel,
378
+ line_number_attr: LineNumberTableAttr | None,
379
+ local_variable_attr: LocalVariableTableAttr | None,
380
+ local_variable_type_attr: LocalVariableTypeTableAttr | None,
381
+ ) -> list[AttributeInfo]:
382
+ other_attrs = copy.deepcopy(_lifted_debug_attrs(code.attributes))
383
+ if not code._nested_attribute_layout:
384
+ attrs = other_attrs
385
+ for debug_attr in (line_number_attr, local_variable_attr, local_variable_type_attr):
386
+ if debug_attr is not None:
387
+ attrs.append(debug_attr)
388
+ return attrs
389
+
390
+ attrs: list[AttributeInfo] = []
391
+ other_index = 0
392
+ debug_attrs: dict[str, AttributeInfo | None] = {
393
+ "line_numbers": line_number_attr,
394
+ "local_variables": local_variable_attr,
395
+ "local_variable_types": local_variable_type_attr,
396
+ }
397
+
398
+ for token in code._nested_attribute_layout:
399
+ if token == "other":
400
+ if other_index < len(other_attrs):
401
+ attrs.append(other_attrs[other_index])
402
+ other_index += 1
403
+ continue
404
+
405
+ debug_attr = debug_attrs.get(token)
406
+ if debug_attr is not None:
407
+ attrs.append(debug_attr)
408
+ debug_attrs[token] = None
409
+
410
+ attrs.extend(other_attrs[other_index:])
411
+ for token in ("line_numbers", "local_variables", "local_variable_types"):
412
+ debug_attr = debug_attrs[token]
413
+ if debug_attr is not None:
414
+ attrs.append(debug_attr)
415
+
416
+ return attrs
417
+
418
+
419
+ def _clone_constant_pool_builder(cp: ConstantPoolBuilder) -> ConstantPoolBuilder:
420
+ return ConstantPoolBuilder.from_pool(cp.build())
421
+
422
+
423
+ def _needs_ldc_index_cache(items: list[CodeItem]) -> bool:
424
+ return any(isinstance(item, LdcInsn) and not isinstance(item.value, (LdcLong, LdcDouble)) for item in items)
425
+
426
+
427
+ def _build_ldc_index_cache(items: list[CodeItem], cp: ConstantPoolBuilder) -> dict[int, int]:
428
+ probe_cp = _clone_constant_pool_builder(cp)
429
+ return {id(item): _lower_ldc_value(item.value, probe_cp) for item in items if isinstance(item, LdcInsn)}
430
+
431
+
432
+ def _instruction_byte_size(
433
+ insn: CodeItem,
434
+ offset: int,
435
+ ldc_index_cache: dict[int, int] | None = None,
436
+ ) -> int:
437
+ if isinstance(insn, Label):
438
+ return 0
439
+ if isinstance(insn, BranchInsn):
440
+ return 5 if insn.type.instinfo is BranchW else 3
441
+ if isinstance(insn, LookupSwitchInsn):
442
+ return 1 + _switch_padding(offset) + 8 + (8 * len(insn.pairs))
443
+ if isinstance(insn, TableSwitchInsn):
444
+ return 1 + _switch_padding(offset) + 12 + (4 * len(insn.targets))
445
+ # Symbolic operand wrappers (operands.py)
446
+ if isinstance(insn, (FieldInsn, MethodInsn, TypeInsn)):
447
+ return 3 # opcode(1) + u2 CP index
448
+ if isinstance(insn, InterfaceMethodInsn):
449
+ return 5 # opcode(1) + u2 CP index + u1 count + u1 zero
450
+ if isinstance(insn, InvokeDynamicInsn):
451
+ return 5 # opcode(1) + u2 CP index + u2 zero
452
+ if isinstance(insn, MultiANewArrayInsn):
453
+ return 4 # opcode(1) + u2 CP index + u1 dimensions
454
+ if isinstance(insn, LdcInsn):
455
+ if isinstance(insn.value, (LdcLong, LdcDouble)):
456
+ return 3 # LDC2_W: opcode(1) + u2 CP index
457
+ if ldc_index_cache is None:
458
+ raise ValueError("LdcInsn size requires constant-pool context")
459
+ idx = ldc_index_cache.get(id(insn))
460
+ if idx is None:
461
+ raise ValueError("LdcInsn is missing from the LDC index cache")
462
+ return 2 if idx <= 255 else 3 # LDC: 2, LDC_W: 3
463
+ if isinstance(insn, VarInsn):
464
+ slot = _require_u2(insn.slot, context="local variable slot")
465
+ if _VAR_SHORTCUTS.get((insn.type, slot)) is not None:
466
+ return 1 # implicit form (e.g. ILOAD_0)
467
+ if slot <= 255:
468
+ return 2 # opcode(1) + u1 slot
469
+ return 4 # WIDE(1) + opcode(1) + u2 slot
470
+ if isinstance(insn, IIncInsn):
471
+ slot = _require_u2(insn.slot, context="local variable slot")
472
+ increment = _require_i2(insn.increment, context="iinc increment")
473
+ if slot <= 255 and -128 <= increment <= 127:
474
+ return 3 # IINC(1) + u1 slot + i1 increment
475
+ return 6 # WIDE(1) + IINC(1) + u2 slot + i2 increment
476
+ # Raw spec-model types
477
+ if isinstance(insn, LocalIndex):
478
+ return 2
479
+ if isinstance(insn, LocalIndexW):
480
+ return 4
481
+ if isinstance(insn, ConstPoolIndex):
482
+ return 3
483
+ if isinstance(insn, ByteValue):
484
+ return 2
485
+ if isinstance(insn, ShortValue):
486
+ return 3
487
+ if isinstance(insn, Branch):
488
+ return 3
489
+ if isinstance(insn, BranchW):
490
+ return 5
491
+ if isinstance(insn, IInc):
492
+ return 3
493
+ if isinstance(insn, IIncW):
494
+ return 6
495
+ if isinstance(insn, InvokeDynamic):
496
+ return 5
497
+ if isinstance(insn, InvokeInterface):
498
+ return 5
499
+ if isinstance(insn, NewArray):
500
+ return 2
501
+ if isinstance(insn, MultiANewArray):
502
+ return 4
503
+ if isinstance(insn, LookupSwitch):
504
+ return 1 + _switch_padding(offset) + 8 + (8 * len(insn.pairs))
505
+ if isinstance(insn, TableSwitch):
506
+ return 1 + _switch_padding(offset) + 12 + (4 * len(insn.offsets))
507
+ return 1
508
+
509
+
510
+ def resolve_labels(
511
+ items: list[CodeItem],
512
+ cp: ConstantPoolBuilder | None = None,
513
+ ) -> LabelResolution:
514
+ """Resolve label and instruction offsets for a mixed instruction stream.
515
+
516
+ Args:
517
+ items: Instruction stream containing ``InsnInfo`` and ``Label`` items.
518
+ cp: Constant-pool builder, required when the stream contains
519
+ single-slot ``LdcInsn`` values so their byte size can be
520
+ determined without mutating the live pool.
521
+
522
+ Returns:
523
+ A ``LabelResolution`` with the computed offsets and total code length.
524
+
525
+ Raises:
526
+ ValueError: If a label appears more than once, or if a
527
+ ``ConstantPoolBuilder`` is needed but not provided.
528
+ """
529
+
530
+ ldc_index_cache: dict[int, int] | None = None
531
+ if _needs_ldc_index_cache(items):
532
+ if cp is None:
533
+ raise ValueError(
534
+ "resolve_labels() requires a ConstantPoolBuilder when instructions contain single-slot LdcInsn values"
535
+ )
536
+ ldc_index_cache = _build_ldc_index_cache(items, cp)
537
+
538
+ label_offsets: dict[Label, int] = {}
539
+ instruction_offsets: list[int] = []
540
+ offset = 0
541
+
542
+ for item in items:
543
+ instruction_offsets.append(offset)
544
+ if isinstance(item, Label):
545
+ if item in label_offsets:
546
+ raise ValueError(f"label {item!r} appears multiple times in CodeModel.instructions")
547
+ label_offsets[item] = offset
548
+ continue
549
+ offset += _instruction_byte_size(item, offset, ldc_index_cache)
550
+
551
+ return LabelResolution(
552
+ label_offsets=label_offsets,
553
+ instruction_offsets=instruction_offsets,
554
+ total_code_length=offset,
555
+ )
556
+
557
+
558
+ def _promote_overflow_branches(items: list[CodeItem], resolution: LabelResolution) -> bool:
559
+ changed = False
560
+ index = 0
561
+
562
+ while index < len(items):
563
+ item = items[index]
564
+ if not isinstance(item, BranchInsn):
565
+ index += 1
566
+ continue
567
+
568
+ source_offset = resolution.instruction_offsets[index]
569
+ relative = _relative_offset(
570
+ source_offset,
571
+ item.target,
572
+ resolution.label_offsets,
573
+ context=f"{item.type.name} target",
574
+ )
575
+
576
+ if item.type.instinfo is BranchW:
577
+ if not _fits_i4(relative):
578
+ raise ValueError(f"{item.type.name} branch offset {relative} exceeds JVM i4 range")
579
+ index += 1
580
+ continue
581
+
582
+ if _fits_i2(relative):
583
+ index += 1
584
+ continue
585
+
586
+ widened = _BRANCH_WIDENINGS.get(item.type)
587
+ if widened is not None:
588
+ items[index] = BranchInsn(widened, item.target)
589
+ changed = True
590
+ index += 1
591
+ continue
592
+
593
+ inverted = _INVERTED_CONDITIONAL_BRANCHES.get(item.type)
594
+ if inverted is None:
595
+ raise ValueError(f"{item.type.name} cannot be widened automatically")
596
+
597
+ skip_label = Label(f"{item.type.name.lower()}_skip")
598
+ items[index : index + 1] = [
599
+ BranchInsn(inverted, skip_label),
600
+ BranchInsn(InsnInfoType.GOTO_W, item.target),
601
+ skip_label,
602
+ ]
603
+ changed = True
604
+ index += 3
605
+
606
+ return changed
607
+
608
+
609
+ def _lower_instruction(
610
+ item: CodeItem,
611
+ offset: int,
612
+ label_offsets: dict[Label, int],
613
+ cp: ConstantPoolBuilder,
614
+ ) -> InsnInfo | None:
615
+ if isinstance(item, Label):
616
+ return None
617
+
618
+ if isinstance(item, BranchInsn):
619
+ relative = _relative_offset(offset, item.target, label_offsets, context=f"{item.type.name} target")
620
+ if item.type.instinfo is BranchW:
621
+ if not _fits_i4(relative):
622
+ raise ValueError(f"{item.type.name} branch offset {relative} exceeds JVM i4 range")
623
+ return BranchW(item.type, offset, relative)
624
+ if not _fits_i2(relative):
625
+ raise ValueError(f"{item.type.name} branch offset {relative} exceeds JVM i2 range")
626
+ return Branch(item.type, offset, relative)
627
+
628
+ if isinstance(item, LookupSwitchInsn):
629
+ default = _relative_offset(
630
+ offset,
631
+ item.default_target,
632
+ label_offsets,
633
+ context="lookupswitch default target",
634
+ )
635
+ pairs = [
636
+ MatchOffsetPair(
637
+ match,
638
+ _relative_offset(offset, target, label_offsets, context="lookupswitch case target"),
639
+ )
640
+ for match, target in item.pairs
641
+ ]
642
+ return LookupSwitch(item.type, offset, default, len(pairs), pairs)
643
+
644
+ if isinstance(item, TableSwitchInsn):
645
+ default = _relative_offset(
646
+ offset,
647
+ item.default_target,
648
+ label_offsets,
649
+ context="tableswitch default target",
650
+ )
651
+ offsets = [
652
+ _relative_offset(offset, target, label_offsets, context="tableswitch case target")
653
+ for target in item.targets
654
+ ]
655
+ return TableSwitch(item.type, offset, default, item.low, item.high, offsets)
656
+
657
+ # Symbolic operand wrappers from operands.py
658
+ if isinstance(item, FieldInsn):
659
+ cp_index = cp.add_fieldref(item.owner, item.name, item.descriptor)
660
+ return ConstPoolIndex(item.type, offset, cp_index)
661
+
662
+ if isinstance(item, MethodInsn):
663
+ if item.is_interface:
664
+ cp_index = cp.add_interface_methodref(item.owner, item.name, item.descriptor)
665
+ else:
666
+ cp_index = cp.add_methodref(item.owner, item.name, item.descriptor)
667
+ return ConstPoolIndex(item.type, offset, cp_index)
668
+
669
+ if isinstance(item, InterfaceMethodInsn):
670
+ cp_index = cp.add_interface_methodref(item.owner, item.name, item.descriptor)
671
+ desc = parse_method_descriptor(item.descriptor)
672
+ count = parameter_slot_count(desc) + 1 # +1 for the object reference
673
+ return InvokeInterface(InsnInfoType.INVOKEINTERFACE, offset, cp_index, count, b"\x00")
674
+
675
+ if isinstance(item, TypeInsn):
676
+ cp_index = cp.add_class(item.class_name)
677
+ return ConstPoolIndex(item.type, offset, cp_index)
678
+
679
+ if isinstance(item, LdcInsn):
680
+ cp_index = _lower_ldc_value(item.value, cp)
681
+ if isinstance(item.value, (LdcLong, LdcDouble)):
682
+ return ConstPoolIndex(InsnInfoType.LDC2_W, offset, cp_index)
683
+ if cp_index <= 255:
684
+ return LocalIndex(InsnInfoType.LDC, offset, cp_index)
685
+ return ConstPoolIndex(InsnInfoType.LDC_W, offset, cp_index)
686
+
687
+ if isinstance(item, VarInsn):
688
+ slot = _require_u2(item.slot, context="local variable slot")
689
+ shortcut = _VAR_SHORTCUTS.get((item.type, slot))
690
+ if shortcut is not None:
691
+ return InsnInfo(shortcut, offset)
692
+ if slot > 255:
693
+ wide_type = _BASE_TO_WIDE[item.type]
694
+ return LocalIndexW(wide_type, offset, slot)
695
+ return LocalIndex(item.type, offset, slot)
696
+
697
+ if isinstance(item, IIncInsn):
698
+ slot = _require_u2(item.slot, context="local variable slot")
699
+ increment = _require_i2(item.increment, context="iinc increment")
700
+ if slot <= 255 and -128 <= increment <= 127:
701
+ return IInc(InsnInfoType.IINC, offset, slot, increment)
702
+ return IIncW(InsnInfoType.IINCW, offset, slot, increment)
703
+
704
+ if isinstance(item, InvokeDynamicInsn):
705
+ bootstrap_method_attr_index = _require_u2(
706
+ item.bootstrap_method_attr_index,
707
+ context="bootstrap_method_attr_index",
708
+ )
709
+ cp_index = cp.add_invoke_dynamic(bootstrap_method_attr_index, item.name, item.descriptor)
710
+ return InvokeDynamic(InsnInfoType.INVOKEDYNAMIC, offset, cp_index, b"\x00\x00")
711
+
712
+ if isinstance(item, MultiANewArrayInsn):
713
+ dimensions = _require_u1(
714
+ item.dimensions,
715
+ context="multianewarray dimensions",
716
+ minimum=1,
717
+ )
718
+ cp_index = cp.add_class(item.class_name)
719
+ return MultiANewArray(InsnInfoType.MULTIANEWARRAY, offset, cp_index, dimensions)
720
+
721
+ lowered = copy.deepcopy(item)
722
+ lowered.bytecode_offset = offset
723
+ if isinstance(lowered, LookupSwitch):
724
+ lowered.npairs = len(lowered.pairs)
725
+ return lowered
726
+
727
+
728
+ def _lower_exception_handlers(
729
+ exception_handlers: list[ExceptionHandler],
730
+ label_offsets: dict[Label, int],
731
+ cp: ConstantPoolBuilder,
732
+ ) -> list[ExceptionInfo]:
733
+ lowered: list[ExceptionInfo] = []
734
+ for handler in exception_handlers:
735
+ start_pc = _require_label_offset(label_offsets, handler.start, context="exception handler start")
736
+ end_pc = _require_label_offset(label_offsets, handler.end, context="exception handler end")
737
+ handler_pc = _require_label_offset(label_offsets, handler.handler, context="exception handler target")
738
+ if start_pc >= end_pc:
739
+ raise ValueError("exception handler start must be strictly before end")
740
+ catch_type = 0 if handler.catch_type is None else cp.add_class(handler.catch_type)
741
+ lowered.append(ExceptionInfo(start_pc, end_pc, handler_pc, catch_type))
742
+ return lowered
743
+
744
+
745
+ def _build_line_number_attribute(
746
+ line_numbers: list[LineNumberEntry],
747
+ label_offsets: dict[Label, int],
748
+ cp: ConstantPoolBuilder,
749
+ ) -> LineNumberTableAttr | None:
750
+ if not line_numbers:
751
+ return None
752
+ table = [
753
+ LineNumberInfo(
754
+ _require_label_offset(label_offsets, entry.label, context="line number entry"),
755
+ entry.line_number,
756
+ )
757
+ for entry in line_numbers
758
+ ]
759
+ return LineNumberTableAttr(
760
+ attribute_name_index=cp.add_utf8("LineNumberTable"),
761
+ attribute_length=2 + (4 * len(table)),
762
+ line_number_table_length=len(table),
763
+ line_number_table=table,
764
+ )
765
+
766
+
767
+ def _local_range_length(start: int, end: int, *, context: str) -> int:
768
+ if end < start:
769
+ raise ValueError(f"{context} end label must not resolve before start label")
770
+ return end - start
771
+
772
+
773
+ def _build_local_variable_attribute(
774
+ local_variables: list[LocalVariableEntry],
775
+ label_offsets: dict[Label, int],
776
+ cp: ConstantPoolBuilder,
777
+ ) -> LocalVariableTableAttr | None:
778
+ if not local_variables:
779
+ return None
780
+ table = [
781
+ LocalVariableInfo(
782
+ start_pc := _require_label_offset(label_offsets, entry.start, context="local variable start"),
783
+ _local_range_length(
784
+ start_pc,
785
+ _require_label_offset(label_offsets, entry.end, context="local variable end"),
786
+ context="local variable range",
787
+ ),
788
+ cp.add_utf8(entry.name),
789
+ cp.add_utf8(entry.descriptor),
790
+ entry.slot,
791
+ )
792
+ for entry in local_variables
793
+ ]
794
+ return LocalVariableTableAttr(
795
+ attribute_name_index=cp.add_utf8("LocalVariableTable"),
796
+ attribute_length=2 + (10 * len(table)),
797
+ local_variable_table_length=len(table),
798
+ local_variable_table=table,
799
+ )
800
+
801
+
802
+ def _build_local_variable_type_attribute(
803
+ local_variable_types: list[LocalVariableTypeEntry],
804
+ label_offsets: dict[Label, int],
805
+ cp: ConstantPoolBuilder,
806
+ ) -> LocalVariableTypeTableAttr | None:
807
+ if not local_variable_types:
808
+ return None
809
+ table = [
810
+ LocalVariableTypeInfo(
811
+ start_pc := _require_label_offset(label_offsets, entry.start, context="local variable type start"),
812
+ _local_range_length(
813
+ start_pc,
814
+ _require_label_offset(label_offsets, entry.end, context="local variable type end"),
815
+ context="local variable type range",
816
+ ),
817
+ cp.add_utf8(entry.name),
818
+ cp.add_utf8(entry.signature),
819
+ entry.slot,
820
+ )
821
+ for entry in local_variable_types
822
+ ]
823
+ return LocalVariableTypeTableAttr(
824
+ attribute_name_index=cp.add_utf8("LocalVariableTypeTable"),
825
+ attribute_length=2 + (10 * len(table)),
826
+ local_variable_type_table_length=len(table),
827
+ local_variable_type_table=table,
828
+ )
829
+
830
+
831
+ def _lower_resolved_code(
832
+ code: CodeModel,
833
+ items: list[CodeItem],
834
+ resolution: LabelResolution,
835
+ cp: ConstantPoolBuilder,
836
+ keep_debug_info: bool,
837
+ ) -> CodeAttr:
838
+ lowered_code = [
839
+ lowered
840
+ for item, offset in zip(items, resolution.instruction_offsets, strict=True)
841
+ if (lowered := _lower_instruction(item, offset, resolution.label_offsets, cp)) is not None
842
+ ]
843
+ exception_table = _lower_exception_handlers(code.exception_handlers, resolution.label_offsets, cp)
844
+
845
+ line_number_attr = None
846
+ local_variable_attr = None
847
+ local_variable_type_attr = None
848
+ if keep_debug_info:
849
+ line_number_attr = _build_line_number_attribute(code.line_numbers, resolution.label_offsets, cp)
850
+ local_variable_attr = _build_local_variable_attribute(code.local_variables, resolution.label_offsets, cp)
851
+ local_variable_type_attr = _build_local_variable_type_attribute(
852
+ code.local_variable_types,
853
+ resolution.label_offsets,
854
+ cp,
855
+ )
856
+
857
+ attributes = _ordered_nested_code_attributes(
858
+ code,
859
+ line_number_attr,
860
+ local_variable_attr,
861
+ local_variable_type_attr,
862
+ )
863
+
864
+ return CodeAttr(
865
+ attribute_name_index=cp.add_utf8("Code"),
866
+ attribute_length=_code_attribute_length(
867
+ resolution.total_code_length,
868
+ len(exception_table),
869
+ attributes,
870
+ ),
871
+ max_stacks=code.max_stack,
872
+ max_locals=code.max_locals,
873
+ code_length=resolution.total_code_length,
874
+ code=lowered_code,
875
+ exception_table_length=len(exception_table),
876
+ exception_table=exception_table,
877
+ attributes_count=len(attributes),
878
+ attributes=attributes,
879
+ )
880
+
881
+
882
+ def lower_code(
883
+ code: CodeModel,
884
+ cp: ConstantPoolBuilder,
885
+ *,
886
+ method: MethodModel | None = None,
887
+ class_name: str | None = None,
888
+ resolver: ClassResolver | None = None,
889
+ recompute_frames: bool = False,
890
+ debug_info: DebugInfoPolicy | str = DebugInfoPolicy.PRESERVE,
891
+ ) -> CodeAttr:
892
+ """Lower a label-based ``CodeModel`` into a raw ``CodeAttr``.
893
+
894
+ Converts symbolic label references into concrete bytecode offsets,
895
+ automatically widening branches that overflow the signed 16-bit range.
896
+
897
+ Args:
898
+ code: The label-based code model to lower.
899
+ cp: Constant-pool builder used to allocate pool entries for operands.
900
+ method: Method that owns *code*. Required when *recompute_frames*
901
+ is ``True``.
902
+ class_name: Internal name of the class containing the method.
903
+ Required when *recompute_frames* is ``True``.
904
+ resolver: Optional class hierarchy resolver for frame computation.
905
+ recompute_frames: When ``True``, ``max_stack``, ``max_locals``, and
906
+ the ``StackMapTable`` attribute are recomputed via stack
907
+ simulation.
908
+ debug_info: Policy controlling whether debug attributes
909
+ (``LineNumberTable``, ``LocalVariableTable``,
910
+ ``LocalVariableTypeTable``) are preserved or stripped.
911
+ Stale debug metadata is stripped automatically regardless.
912
+
913
+ Returns:
914
+ A fully resolved ``CodeAttr`` ready for binary serialisation.
915
+
916
+ Raises:
917
+ ValueError: If *recompute_frames* is ``True`` but *method* or
918
+ *class_name* is ``None``, or if the resulting code length
919
+ exceeds the JVM maximum of 65 535 bytes.
920
+ """
921
+ if recompute_frames and (method is None or class_name is None):
922
+ raise ValueError("method and class_name are required when recompute_frames=True")
923
+
924
+ debug_policy = normalize_debug_info_policy(debug_info)
925
+ keep_debug_info = debug_policy is DebugInfoPolicy.PRESERVE and not is_code_debug_info_stale(code)
926
+ items = [_clone_code_item(item) for item in code.instructions]
927
+
928
+ while True:
929
+ resolution = resolve_labels(items, cp)
930
+ if resolution.total_code_length > 65535:
931
+ raise ValueError(f"code length {resolution.total_code_length} exceeds JVM maximum of 65535 bytes")
932
+ if not _promote_overflow_branches(items, resolution):
933
+ break
934
+
935
+ resolution = resolve_labels(items, cp)
936
+ if resolution.total_code_length > 65535:
937
+ raise ValueError(f"code length {resolution.total_code_length} exceeds JVM maximum of 65535 bytes")
938
+
939
+ _lower_resolved_code(code, items, resolution, _clone_constant_pool_builder(cp), keep_debug_info)
940
+ result = _lower_resolved_code(code, items, resolution, cp, keep_debug_info)
941
+
942
+ if recompute_frames:
943
+ assert method is not None and class_name is not None
944
+ from .analysis import compute_frames
945
+ from .attributes import StackMapTableAttr
946
+
947
+ frame_result = compute_frames(
948
+ code,
949
+ method,
950
+ class_name,
951
+ cp,
952
+ resolution.label_offsets,
953
+ resolver,
954
+ )
955
+ result.max_stacks = frame_result.max_stack
956
+ result.max_locals = frame_result.max_locals
957
+ stack_map_index = next(
958
+ (i for i, attr in enumerate(result.attributes) if isinstance(attr, StackMapTableAttr)),
959
+ len(result.attributes),
960
+ )
961
+ result.attributes = [attr for attr in result.attributes if not isinstance(attr, StackMapTableAttr)]
962
+ if frame_result.stack_map_table is not None:
963
+ insert_at = min(stack_map_index, len(result.attributes))
964
+ result.attributes.insert(insert_at, frame_result.stack_map_table)
965
+ _refresh_code_attr_metadata(result)
966
+
967
+ return result
968
+
969
+
970
+ def resolve_catch_type(cp: ConstantPoolBuilder, catch_type_index: int) -> str | None:
971
+ """Resolve an exception handler catch-type constant-pool index.
972
+
973
+ Args:
974
+ cp: Constant-pool builder to look up the entry in.
975
+ catch_type_index: Index into the constant pool. ``0`` denotes a
976
+ catch-all (``finally``) handler.
977
+
978
+ Returns:
979
+ The internal class name of the caught exception type, or ``None``
980
+ for a catch-all handler.
981
+
982
+ Raises:
983
+ ValueError: If the index is non-zero but does not point to a
984
+ ``CONSTANT_Class`` entry.
985
+ """
986
+
987
+ if catch_type_index == 0:
988
+ return None
989
+
990
+ entry = cp.get(catch_type_index)
991
+ if not isinstance(entry, ClassInfo):
992
+ raise ValueError(f"catch_type CP index {catch_type_index} is not a CONSTANT_Class")
993
+ return cp.resolve_utf8(entry.name_index)
994
+
995
+
996
+ # ---------------------------------------------------------------------------
997
+ # LDC value lowering helpers
998
+ # ---------------------------------------------------------------------------
999
+
1000
+
1001
+ def _lower_ldc_value(value: LdcValue, cp: ConstantPoolBuilder) -> int:
1002
+ """Resolve an ``LdcValue`` to a CP index, adding entries as needed."""
1003
+ if isinstance(value, LdcInt):
1004
+ return cp.add_integer(value.value)
1005
+ if isinstance(value, LdcFloat):
1006
+ return cp.add_float(value.raw_bits)
1007
+ if isinstance(value, LdcLong):
1008
+ unsigned = value.value & 0xFFFFFFFFFFFFFFFF
1009
+ high = (unsigned >> 32) & 0xFFFFFFFF
1010
+ low = unsigned & 0xFFFFFFFF
1011
+ return cp.add_long(high, low)
1012
+ if isinstance(value, LdcDouble):
1013
+ return cp.add_double(value.high_bytes, value.low_bytes)
1014
+ if isinstance(value, LdcString):
1015
+ return cp.add_string(value.value)
1016
+ if isinstance(value, LdcClass):
1017
+ return cp.add_class(value.name)
1018
+ if isinstance(value, LdcMethodType):
1019
+ return cp.add_method_type(value.descriptor)
1020
+ if isinstance(value, LdcMethodHandle):
1021
+ return _lower_ldc_method_handle(value, cp)
1022
+ return cp.add_dynamic(value.bootstrap_method_attr_index, value.name, value.descriptor)
1023
+
1024
+
1025
+ def _lower_ldc_method_handle(value: LdcMethodHandle, cp: ConstantPoolBuilder) -> int:
1026
+ """Lower an ``LdcMethodHandle`` to a CONSTANT_MethodHandle CP index."""
1027
+ kind = value.reference_kind
1028
+ if kind in (1, 2, 3, 4): # REF_getField, REF_getStatic, REF_putField, REF_putStatic
1029
+ ref_index = cp.add_fieldref(value.owner, value.name, value.descriptor)
1030
+ elif kind in (5, 8): # REF_invokeVirtual, REF_newInvokeSpecial → always Methodref
1031
+ ref_index = cp.add_methodref(value.owner, value.name, value.descriptor)
1032
+ elif kind == 9: # REF_invokeInterface → always InterfaceMethodref
1033
+ ref_index = cp.add_interface_methodref(value.owner, value.name, value.descriptor)
1034
+ elif kind in (6, 7): # REF_invokeStatic, REF_invokeSpecial → depends on is_interface
1035
+ if value.is_interface:
1036
+ ref_index = cp.add_interface_methodref(value.owner, value.name, value.descriptor)
1037
+ else:
1038
+ ref_index = cp.add_methodref(value.owner, value.name, value.descriptor)
1039
+ else:
1040
+ raise ValueError(f"invalid MethodHandle reference_kind: {kind}")
1041
+ return cp.add_method_handle(kind, ref_index)