pytecode 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pytecode/verify.py ADDED
@@ -0,0 +1,1386 @@
1
+ """Structural validation for JVM classfiles (§4.8–4.10) with structured diagnostics.
2
+
3
+ Provides two entry points:
4
+
5
+ verify_classfile: Spec-level checks on a parsed ``ClassFile`` structure.
6
+ verify_classmodel: Symbolic-level checks on a mutable ``ClassModel``.
7
+
8
+ Both return a list of ``Diagnostic`` objects carrying severity, category,
9
+ message, and location context. By default all diagnostics are collected;
10
+ pass ``fail_fast=True`` to raise ``FailFastError`` on the first ERROR.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+
18
+ from .attributes import (
19
+ AnnotationDefaultAttr,
20
+ AttributeInfo,
21
+ BootstrapMethodsAttr,
22
+ CodeAttr,
23
+ ConstantValueAttr,
24
+ ExceptionsAttr,
25
+ LocalVariableTypeTableAttr,
26
+ MethodParametersAttr,
27
+ ModuleAttr,
28
+ ModuleMainClassAttr,
29
+ ModulePackagesAttr,
30
+ NestHostAttr,
31
+ NestMembersAttr,
32
+ PermittedSubclassesAttr,
33
+ RecordAttr,
34
+ RuntimeInvisibleAnnotationsAttr,
35
+ RuntimeInvisibleParameterAnnotationsAttr,
36
+ RuntimeInvisibleTypeAnnotationsAttr,
37
+ RuntimeVisibleAnnotationsAttr,
38
+ RuntimeVisibleParameterAnnotationsAttr,
39
+ RuntimeVisibleTypeAnnotationsAttr,
40
+ SignatureAttr,
41
+ SourceDebugExtensionAttr,
42
+ StackMapTableAttr,
43
+ )
44
+ from .constant_pool import (
45
+ ClassInfo,
46
+ ConstantPoolInfo,
47
+ DoubleInfo,
48
+ DynamicInfo,
49
+ FieldrefInfo,
50
+ FloatInfo,
51
+ IntegerInfo,
52
+ InterfaceMethodrefInfo,
53
+ InvokeDynamicInfo,
54
+ LongInfo,
55
+ MethodHandleInfo,
56
+ MethodrefInfo,
57
+ MethodTypeInfo,
58
+ ModuleInfo,
59
+ NameAndTypeInfo,
60
+ PackageInfo,
61
+ StringInfo,
62
+ Utf8Info,
63
+ )
64
+ from .constants import MAGIC, ClassAccessFlag, FieldAccessFlag, MethodAccessFlag
65
+ from .debug_info import is_class_debug_info_stale, is_code_debug_info_stale
66
+ from .descriptors import is_valid_field_descriptor, is_valid_method_descriptor
67
+ from .info import ClassFile, FieldInfo, MethodInfo
68
+ from .instructions import (
69
+ Branch,
70
+ BranchW,
71
+ ConstPoolIndex,
72
+ InsnInfoType,
73
+ InvokeDynamic,
74
+ InvokeInterface,
75
+ LocalIndex,
76
+ LookupSwitch,
77
+ MultiANewArray,
78
+ TableSwitch,
79
+ )
80
+ from .labels import (
81
+ BranchInsn,
82
+ Label,
83
+ LookupSwitchInsn,
84
+ TableSwitchInsn,
85
+ )
86
+ from .model import ClassModel, CodeModel, FieldModel, MethodModel
87
+ from .modified_utf8 import decode_modified_utf8
88
+
89
+ __all__ = [
90
+ "Category",
91
+ "Diagnostic",
92
+ "FailFastError",
93
+ "Location",
94
+ "Severity",
95
+ "verify_classfile",
96
+ "verify_classmodel",
97
+ ]
98
+
99
+ # ── Diagnostic model ──────────────────────────────────────────────────
100
+
101
+
102
+ class Severity(Enum):
103
+ """Severity level attached to each ``Diagnostic``.
104
+
105
+ Attributes:
106
+ ERROR: A spec violation that makes the classfile invalid.
107
+ WARNING: A suspicious but technically allowed construct.
108
+ INFO: An informational note with no correctness impact.
109
+ """
110
+
111
+ ERROR = "error"
112
+ WARNING = "warning"
113
+ INFO = "info"
114
+
115
+
116
+ class Category(Enum):
117
+ """Classification of which JVM spec area a ``Diagnostic`` belongs to.
118
+
119
+ Attributes:
120
+ MAGIC: Magic number validation (§4.1).
121
+ VERSION: Class file version checks (§4.1).
122
+ CONSTANT_POOL: Constant pool structure and cross-references (§4.4).
123
+ ACCESS_FLAGS: Access flag combination rules (§4.1, §4.5, §4.6).
124
+ CLASS_STRUCTURE: Top-level class structure (this_class, super_class, interfaces).
125
+ FIELD: Field-level validation (§4.5).
126
+ METHOD: Method-level validation (§4.6).
127
+ CODE: Code attribute and bytecode validation (§4.7.3, §4.9).
128
+ ATTRIBUTE: Attribute structure and version constraints (§4.7).
129
+ DESCRIPTOR: Field and method descriptor syntax (§4.3).
130
+ """
131
+
132
+ MAGIC = "magic"
133
+ VERSION = "version"
134
+ CONSTANT_POOL = "constant_pool"
135
+ ACCESS_FLAGS = "access_flags"
136
+ CLASS_STRUCTURE = "class_structure"
137
+ FIELD = "field"
138
+ METHOD = "method"
139
+ CODE = "code"
140
+ ATTRIBUTE = "attribute"
141
+ DESCRIPTOR = "descriptor"
142
+
143
+
144
+ @dataclass(frozen=True)
145
+ class Location:
146
+ """Source location context attached to a ``Diagnostic``.
147
+
148
+ All fields are optional; only the relevant ones are populated for a
149
+ given diagnostic.
150
+
151
+ Attributes:
152
+ class_name: Internal-form class name (e.g. ``java/lang/Object``).
153
+ field_name: Field name within the class, if applicable.
154
+ method_name: Method name within the class, if applicable.
155
+ method_descriptor: Method descriptor string, if applicable.
156
+ attribute_name: Attribute name, if the diagnostic is attribute-specific.
157
+ cp_index: Constant pool index, if the diagnostic targets a CP entry.
158
+ bytecode_offset: Bytecode offset within a Code attribute, if applicable.
159
+ """
160
+
161
+ class_name: str | None = None
162
+ field_name: str | None = None
163
+ method_name: str | None = None
164
+ method_descriptor: str | None = None
165
+ attribute_name: str | None = None
166
+ cp_index: int | None = None
167
+ bytecode_offset: int | None = None
168
+
169
+
170
+ @dataclass(frozen=True)
171
+ class Diagnostic:
172
+ """A single validation finding produced by the verifier.
173
+
174
+ Attributes:
175
+ severity: How serious the finding is.
176
+ category: Which area of the JVM spec the finding relates to.
177
+ message: Human-readable description of the issue.
178
+ location: Where in the classfile the issue was detected.
179
+ """
180
+
181
+ severity: Severity
182
+ category: Category
183
+ message: str
184
+ location: Location = field(default_factory=Location)
185
+
186
+ def __str__(self) -> str:
187
+ """Return a bracket-tagged human-readable summary."""
188
+ parts = [f"[{self.severity.value.upper()}]", f"[{self.category.value}]", self.message]
189
+ loc_parts: list[str] = []
190
+ if self.location.class_name:
191
+ loc_parts.append(f"class={self.location.class_name}")
192
+ if self.location.method_name:
193
+ loc_parts.append(f"method={self.location.method_name}")
194
+ if self.location.method_descriptor:
195
+ loc_parts.append(f"desc={self.location.method_descriptor}")
196
+ if self.location.field_name:
197
+ loc_parts.append(f"field={self.location.field_name}")
198
+ if self.location.cp_index is not None:
199
+ loc_parts.append(f"cp={self.location.cp_index}")
200
+ if self.location.bytecode_offset is not None:
201
+ loc_parts.append(f"offset={self.location.bytecode_offset}")
202
+ if loc_parts:
203
+ parts.append(f"({', '.join(loc_parts)})")
204
+ return " ".join(parts)
205
+
206
+
207
+ class FailFastError(Exception):
208
+ """Raised when ``fail_fast=True`` and an ERROR-severity diagnostic is found.
209
+
210
+ Attributes:
211
+ diagnostic: The ``Diagnostic`` that triggered the early exit.
212
+ """
213
+
214
+ def __init__(self, diagnostic: Diagnostic) -> None:
215
+ self.diagnostic = diagnostic
216
+ super().__init__(str(diagnostic))
217
+
218
+
219
+ # ── Internal helpers ──────────────────────────────────────────────────
220
+
221
+
222
+ class _Collector:
223
+ """Accumulates diagnostics; optionally raises on first ERROR."""
224
+
225
+ __slots__ = ("diagnostics", "_fail_fast")
226
+
227
+ def __init__(self, fail_fast: bool) -> None:
228
+ self.diagnostics: list[Diagnostic] = []
229
+ self._fail_fast = fail_fast
230
+
231
+ def add(
232
+ self,
233
+ severity: Severity,
234
+ category: Category,
235
+ message: str,
236
+ location: Location | None = None,
237
+ ) -> None:
238
+ diag = Diagnostic(severity, category, message, location or Location())
239
+ self.diagnostics.append(diag)
240
+ if self._fail_fast and severity is Severity.ERROR:
241
+ raise FailFastError(diag)
242
+
243
+
244
+ def _resolve_cp_utf8(cp: list[ConstantPoolInfo | None], index: int) -> str | None:
245
+ """Resolve a CP index to a decoded UTF-8 string, or ``None`` if invalid."""
246
+ if 1 <= index < len(cp):
247
+ entry = cp[index]
248
+ if isinstance(entry, Utf8Info):
249
+ try:
250
+ return decode_modified_utf8(entry.str_bytes)
251
+ except Exception:
252
+ return None
253
+ return None
254
+
255
+
256
+ def _cp_entry(cp: list[ConstantPoolInfo | None], index: int) -> ConstantPoolInfo | None:
257
+ """Return the CP entry at *index*, or ``None`` if out of range."""
258
+ if 1 <= index < len(cp):
259
+ return cp[index]
260
+ return None
261
+
262
+
263
+ def _is_valid_internal_name(name: str) -> bool:
264
+ """Check if *name* is a valid JVM internal-form class name.
265
+
266
+ Internal names use ``/`` as the package separator (not ``.``), must not
267
+ contain ``;`` or ``[``, and must have non-empty segments.
268
+ """
269
+ if not name or name.startswith("/") or name.endswith("/") or "//" in name:
270
+ return False
271
+ if any(c in ".;[" for c in name):
272
+ return False
273
+ return True
274
+
275
+
276
+ def _is_valid_unqualified_name(name: str) -> bool:
277
+ """Check if *name* is a valid JVM unqualified name (field or method)."""
278
+ if not name:
279
+ return False
280
+ return not any(c in ".;[/" for c in name)
281
+
282
+
283
+ def _is_valid_method_name(name: str) -> bool:
284
+ """Check if *name* is a valid JVM method name."""
285
+ if name in ("<init>", "<clinit>"):
286
+ return True
287
+ if not name:
288
+ return False
289
+ return not any(c in ".;[/<>" for c in name)
290
+
291
+
292
+ def _resolve_class_name(cf: ClassFile) -> str | None:
293
+ """Best-effort class name resolution for diagnostic locations."""
294
+ entry = _cp_entry(cf.constant_pool, cf.this_class)
295
+ if isinstance(entry, ClassInfo):
296
+ return _resolve_cp_utf8(cf.constant_pool, entry.name_index)
297
+ return None
298
+
299
+
300
+ # ── Attribute version requirements ────────────────────────────────────
301
+
302
+ _ATTR_MIN_VERSION: dict[type[AttributeInfo], tuple[int, str]] = {
303
+ StackMapTableAttr: (50, "StackMapTable"),
304
+ SourceDebugExtensionAttr: (49, "SourceDebugExtension"),
305
+ LocalVariableTypeTableAttr: (49, "LocalVariableTypeTable"),
306
+ SignatureAttr: (49, "Signature"),
307
+ RuntimeVisibleAnnotationsAttr: (49, "RuntimeVisibleAnnotations"),
308
+ RuntimeInvisibleAnnotationsAttr: (49, "RuntimeInvisibleAnnotations"),
309
+ RuntimeVisibleParameterAnnotationsAttr: (49, "RuntimeVisibleParameterAnnotations"),
310
+ RuntimeInvisibleParameterAnnotationsAttr: (49, "RuntimeInvisibleParameterAnnotations"),
311
+ AnnotationDefaultAttr: (49, "AnnotationDefault"),
312
+ BootstrapMethodsAttr: (51, "BootstrapMethods"),
313
+ MethodParametersAttr: (52, "MethodParameters"),
314
+ RuntimeVisibleTypeAnnotationsAttr: (52, "RuntimeVisibleTypeAnnotations"),
315
+ RuntimeInvisibleTypeAnnotationsAttr: (52, "RuntimeInvisibleTypeAnnotations"),
316
+ ModuleAttr: (53, "Module"),
317
+ ModulePackagesAttr: (53, "ModulePackages"),
318
+ ModuleMainClassAttr: (53, "ModuleMainClass"),
319
+ NestHostAttr: (55, "NestHost"),
320
+ NestMembersAttr: (55, "NestMembers"),
321
+ RecordAttr: (60, "Record"),
322
+ PermittedSubclassesAttr: (61, "PermittedSubclasses"),
323
+ }
324
+
325
+
326
+ # ── Shared flag validation ────────────────────────────────────────────
327
+
328
+
329
+ def _check_class_flags(flags: ClassAccessFlag, loc: Location, dc: _Collector) -> None:
330
+ if ClassAccessFlag.INTERFACE in flags:
331
+ if ClassAccessFlag.ABSTRACT not in flags:
332
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, "INTERFACE class must also be ABSTRACT", loc)
333
+ if ClassAccessFlag.FINAL in flags:
334
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, "INTERFACE class must not be FINAL", loc)
335
+ if ClassAccessFlag.SUPER in flags:
336
+ dc.add(Severity.WARNING, Category.ACCESS_FLAGS, "INTERFACE class should not have SUPER flag", loc)
337
+ if ClassAccessFlag.ENUM in flags:
338
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, "INTERFACE class must not be ENUM", loc)
339
+
340
+ if ClassAccessFlag.ANNOTATION in flags and ClassAccessFlag.INTERFACE not in flags:
341
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, "ANNOTATION class must also be INTERFACE", loc)
342
+
343
+ if ClassAccessFlag.MODULE in flags:
344
+ non_module = int(flags) & ~(int(ClassAccessFlag.MODULE) | int(ClassAccessFlag.SYNTHETIC))
345
+ if non_module:
346
+ dc.add(
347
+ Severity.ERROR,
348
+ Category.ACCESS_FLAGS,
349
+ f"MODULE class has unexpected flags: 0x{non_module:04X}",
350
+ loc,
351
+ )
352
+
353
+ if ClassAccessFlag.FINAL in flags and ClassAccessFlag.ABSTRACT in flags and ClassAccessFlag.INTERFACE not in flags:
354
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, "Class cannot be both FINAL and ABSTRACT", loc)
355
+
356
+
357
+ def _check_method_flags(
358
+ flags: MethodAccessFlag,
359
+ name: str | None,
360
+ is_interface: bool,
361
+ major: int,
362
+ loc: Location,
363
+ dc: _Collector,
364
+ ) -> None:
365
+ vis_count = sum(
366
+ 1 for f in (MethodAccessFlag.PUBLIC, MethodAccessFlag.PRIVATE, MethodAccessFlag.PROTECTED) if f in flags
367
+ )
368
+ if vis_count > 1:
369
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, f"Method {name!r} has multiple visibility modifiers", loc)
370
+
371
+ if MethodAccessFlag.ABSTRACT in flags:
372
+ forbidden = (
373
+ MethodAccessFlag.PRIVATE
374
+ | MethodAccessFlag.STATIC
375
+ | MethodAccessFlag.FINAL
376
+ | MethodAccessFlag.SYNCHRONIZED
377
+ | MethodAccessFlag.NATIVE
378
+ | MethodAccessFlag.STRICT
379
+ )
380
+ bad = flags & forbidden
381
+ if bad:
382
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, f"ABSTRACT method {name!r} has illegal flags: {bad!r}", loc)
383
+
384
+ if is_interface and name not in ("<clinit>",):
385
+ if major < 52:
386
+ if MethodAccessFlag.PUBLIC not in flags or MethodAccessFlag.ABSTRACT not in flags:
387
+ if name != "<init>":
388
+ dc.add(
389
+ Severity.ERROR,
390
+ Category.ACCESS_FLAGS,
391
+ f"Interface method {name!r} must be PUBLIC ABSTRACT (pre-Java 8)",
392
+ loc,
393
+ )
394
+ elif MethodAccessFlag.PUBLIC not in flags:
395
+ if major < 53 or MethodAccessFlag.PRIVATE not in flags:
396
+ if name != "<init>":
397
+ dc.add(
398
+ Severity.ERROR,
399
+ Category.ACCESS_FLAGS,
400
+ f"Interface method {name!r} must be PUBLIC (or PRIVATE for Java 9+)",
401
+ loc,
402
+ )
403
+
404
+ if name == "<init>":
405
+ forbidden_init = (
406
+ MethodAccessFlag.STATIC
407
+ | MethodAccessFlag.FINAL
408
+ | MethodAccessFlag.SYNCHRONIZED
409
+ | MethodAccessFlag.NATIVE
410
+ | MethodAccessFlag.ABSTRACT
411
+ | MethodAccessFlag.BRIDGE
412
+ )
413
+ bad = flags & forbidden_init
414
+ if bad:
415
+ dc.add(Severity.ERROR, Category.METHOD, f"<init> has illegal flags: {bad!r}", loc)
416
+
417
+ if name == "<clinit>":
418
+ if MethodAccessFlag.STATIC not in flags:
419
+ dc.add(Severity.ERROR, Category.METHOD, "<clinit> must be STATIC", loc)
420
+
421
+
422
+ def _check_field_flags(
423
+ flags: FieldAccessFlag,
424
+ name: str | None,
425
+ is_interface: bool,
426
+ loc: Location,
427
+ dc: _Collector,
428
+ ) -> None:
429
+ vis_count = sum(
430
+ 1 for f in (FieldAccessFlag.PUBLIC, FieldAccessFlag.PRIVATE, FieldAccessFlag.PROTECTED) if f in flags
431
+ )
432
+ if vis_count > 1:
433
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, f"Field {name!r} has multiple visibility modifiers", loc)
434
+
435
+ if FieldAccessFlag.FINAL in flags and FieldAccessFlag.VOLATILE in flags:
436
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, f"Field {name!r} cannot be both FINAL and VOLATILE", loc)
437
+
438
+ if is_interface:
439
+ required = FieldAccessFlag.PUBLIC | FieldAccessFlag.STATIC | FieldAccessFlag.FINAL
440
+ if (flags & required) != required:
441
+ dc.add(Severity.ERROR, Category.ACCESS_FLAGS, f"Interface field {name!r} must be PUBLIC STATIC FINAL", loc)
442
+
443
+
444
+ # ── Shared attribute version checking ─────────────────────────────────
445
+
446
+
447
+ def _verify_attr_versions(attrs: list[AttributeInfo], major: int, loc: Location, dc: _Collector) -> None:
448
+ """Check that attributes satisfy their minimum version requirements."""
449
+ for attr in attrs:
450
+ attr_type = type(attr)
451
+ if attr_type in _ATTR_MIN_VERSION:
452
+ min_ver, attr_name = _ATTR_MIN_VERSION[attr_type]
453
+ if major < min_ver:
454
+ dc.add(
455
+ Severity.ERROR,
456
+ Category.ATTRIBUTE,
457
+ f"{attr_name} attribute requires classfile version >= {min_ver}, got {major}",
458
+ Location(
459
+ class_name=loc.class_name,
460
+ field_name=loc.field_name,
461
+ method_name=loc.method_name,
462
+ method_descriptor=loc.method_descriptor,
463
+ attribute_name=attr_name,
464
+ ),
465
+ )
466
+ if isinstance(attr, CodeAttr):
467
+ _verify_attr_versions(attr.attributes, major, loc, dc)
468
+
469
+
470
+ # ── ClassFile verification ────────────────────────────────────────────
471
+
472
+
473
+ def _verify_magic_version(cf: ClassFile, dc: _Collector, loc: Location) -> None:
474
+ if cf.magic != MAGIC:
475
+ dc.add(
476
+ Severity.ERROR,
477
+ Category.MAGIC,
478
+ f"Invalid magic number: 0x{cf.magic:08X} (expected 0xCAFEBABE)",
479
+ loc,
480
+ )
481
+ if cf.major_version < 45:
482
+ dc.add(Severity.ERROR, Category.VERSION, f"Major version {cf.major_version} is below minimum 45", loc)
483
+ if cf.major_version >= 56 and cf.minor_version not in (0, 65535):
484
+ dc.add(
485
+ Severity.ERROR,
486
+ Category.VERSION,
487
+ f"Major version {cf.major_version} (>=56) requires minor 0 or 65535, got {cf.minor_version}",
488
+ loc,
489
+ )
490
+
491
+
492
+ def _verify_constant_pool(cf: ClassFile, dc: _Collector, class_name: str | None) -> None:
493
+ cp = cf.constant_pool
494
+
495
+ if cf.constant_pool_count != len(cp):
496
+ dc.add(
497
+ Severity.ERROR,
498
+ Category.CONSTANT_POOL,
499
+ f"constant_pool_count ({cf.constant_pool_count}) != len(constant_pool) ({len(cp)})",
500
+ Location(class_name=class_name),
501
+ )
502
+
503
+ def _check_ref(index: int, expected: type | tuple[type, ...], entry_idx: int, ref_field: str) -> None:
504
+ loc = Location(class_name=class_name, cp_index=entry_idx)
505
+ target = _cp_entry(cp, index)
506
+ if target is None:
507
+ dc.add(
508
+ Severity.ERROR,
509
+ Category.CONSTANT_POOL,
510
+ f"CP#{entry_idx}.{ref_field} references invalid index {index}",
511
+ loc,
512
+ )
513
+ elif not isinstance(target, expected):
514
+ if isinstance(expected, type):
515
+ exp_name = expected.__name__
516
+ else:
517
+ exp_name = " | ".join(t.__name__ for t in expected)
518
+ dc.add(
519
+ Severity.ERROR,
520
+ Category.CONSTANT_POOL,
521
+ f"CP#{entry_idx}.{ref_field} (index {index}) expected {exp_name}, got {type(target).__name__}",
522
+ loc,
523
+ )
524
+
525
+ i = 1
526
+ while i < len(cp):
527
+ entry = cp[i]
528
+ if entry is None:
529
+ if i > 1:
530
+ prev = cp[i - 1]
531
+ if not isinstance(prev, (LongInfo, DoubleInfo)):
532
+ dc.add(
533
+ Severity.WARNING,
534
+ Category.CONSTANT_POOL,
535
+ f"CP#{i} is None but previous entry is not Long/Double",
536
+ Location(class_name=class_name, cp_index=i),
537
+ )
538
+ i += 1
539
+ continue
540
+
541
+ if isinstance(entry, ClassInfo):
542
+ _check_ref(entry.name_index, Utf8Info, i, "name_index")
543
+ elif isinstance(entry, StringInfo):
544
+ _check_ref(entry.string_index, Utf8Info, i, "string_index")
545
+ elif isinstance(entry, (FieldrefInfo, MethodrefInfo, InterfaceMethodrefInfo)):
546
+ _check_ref(entry.class_index, ClassInfo, i, "class_index")
547
+ _check_ref(entry.name_and_type_index, NameAndTypeInfo, i, "name_and_type_index")
548
+ elif isinstance(entry, NameAndTypeInfo):
549
+ _check_ref(entry.name_index, Utf8Info, i, "name_index")
550
+ _check_ref(entry.descriptor_index, Utf8Info, i, "descriptor_index")
551
+ elif isinstance(entry, MethodHandleInfo):
552
+ loc = Location(class_name=class_name, cp_index=i)
553
+ if not (1 <= entry.reference_kind <= 9):
554
+ dc.add(
555
+ Severity.ERROR,
556
+ Category.CONSTANT_POOL,
557
+ f"CP#{i} MethodHandle has invalid reference_kind {entry.reference_kind}",
558
+ loc,
559
+ )
560
+ else:
561
+ kind = entry.reference_kind
562
+ if kind <= 4:
563
+ _check_ref(entry.reference_index, FieldrefInfo, i, "reference_index")
564
+ elif kind in (5, 8):
565
+ _check_ref(entry.reference_index, MethodrefInfo, i, "reference_index")
566
+ elif kind in (6, 7):
567
+ _check_ref(
568
+ entry.reference_index,
569
+ (MethodrefInfo, InterfaceMethodrefInfo),
570
+ i,
571
+ "reference_index",
572
+ )
573
+ elif kind == 9:
574
+ _check_ref(entry.reference_index, InterfaceMethodrefInfo, i, "reference_index")
575
+ elif isinstance(entry, MethodTypeInfo):
576
+ _check_ref(entry.descriptor_index, Utf8Info, i, "descriptor_index")
577
+ elif isinstance(entry, (DynamicInfo, InvokeDynamicInfo)):
578
+ _check_ref(entry.name_and_type_index, NameAndTypeInfo, i, "name_and_type_index")
579
+ elif isinstance(entry, (ModuleInfo, PackageInfo)):
580
+ _check_ref(entry.name_index, Utf8Info, i, "name_index")
581
+ elif isinstance(entry, (LongInfo, DoubleInfo)):
582
+ if i + 1 < len(cp) and cp[i + 1] is not None:
583
+ dc.add(
584
+ Severity.ERROR,
585
+ Category.CONSTANT_POOL,
586
+ f"CP#{i} is Long/Double but CP#{i + 1} is not empty",
587
+ Location(class_name=class_name, cp_index=i),
588
+ )
589
+ i += 2
590
+ continue
591
+
592
+ i += 1
593
+
594
+
595
+ def _verify_class_structure(cf: ClassFile, dc: _Collector, class_name: str | None) -> None:
596
+ cp = cf.constant_pool
597
+ loc = Location(class_name=class_name)
598
+
599
+ if not isinstance(_cp_entry(cp, cf.this_class), ClassInfo):
600
+ dc.add(
601
+ Severity.ERROR,
602
+ Category.CLASS_STRUCTURE,
603
+ f"this_class (index {cf.this_class}) does not point to CONSTANT_Class",
604
+ loc,
605
+ )
606
+
607
+ if cf.super_class != 0:
608
+ if not isinstance(_cp_entry(cp, cf.super_class), ClassInfo):
609
+ dc.add(
610
+ Severity.ERROR,
611
+ Category.CLASS_STRUCTURE,
612
+ f"super_class (index {cf.super_class}) does not point to CONSTANT_Class",
613
+ loc,
614
+ )
615
+ elif class_name != "java/lang/Object":
616
+ dc.add(
617
+ Severity.WARNING,
618
+ Category.CLASS_STRUCTURE,
619
+ "super_class is 0 but class is not java/lang/Object",
620
+ loc,
621
+ )
622
+
623
+ seen_ifaces: set[int] = set()
624
+ for idx in cf.interfaces:
625
+ if not isinstance(_cp_entry(cp, idx), ClassInfo):
626
+ dc.add(
627
+ Severity.ERROR,
628
+ Category.CLASS_STRUCTURE,
629
+ f"Interface index {idx} does not point to CONSTANT_Class",
630
+ loc,
631
+ )
632
+ if idx in seen_ifaces:
633
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Duplicate interface index {idx}", loc)
634
+ seen_ifaces.add(idx)
635
+
636
+ if cf.interfaces_count != len(cf.interfaces):
637
+ dc.add(
638
+ Severity.ERROR,
639
+ Category.CLASS_STRUCTURE,
640
+ f"interfaces_count ({cf.interfaces_count}) != len(interfaces) ({len(cf.interfaces)})",
641
+ loc,
642
+ )
643
+ if cf.fields_count != len(cf.fields):
644
+ dc.add(
645
+ Severity.ERROR,
646
+ Category.CLASS_STRUCTURE,
647
+ f"fields_count ({cf.fields_count}) != len(fields) ({len(cf.fields)})",
648
+ loc,
649
+ )
650
+ if cf.methods_count != len(cf.methods):
651
+ dc.add(
652
+ Severity.ERROR,
653
+ Category.CLASS_STRUCTURE,
654
+ f"methods_count ({cf.methods_count}) != len(methods) ({len(cf.methods)})",
655
+ loc,
656
+ )
657
+ if cf.attributes_count != len(cf.attributes):
658
+ dc.add(
659
+ Severity.ERROR,
660
+ Category.CLASS_STRUCTURE,
661
+ f"attributes_count ({cf.attributes_count}) != len(attributes) ({len(cf.attributes)})",
662
+ loc,
663
+ )
664
+
665
+ # Duplicate fields (resolved name + descriptor).
666
+ field_sigs: set[tuple[str | None, str | None]] = set()
667
+ for fi in cf.fields:
668
+ name = _resolve_cp_utf8(cp, fi.name_index)
669
+ desc = _resolve_cp_utf8(cp, fi.descriptor_index)
670
+ key = (name, desc)
671
+ if name is not None and key in field_sigs:
672
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Duplicate field: {name} {desc}", loc)
673
+ field_sigs.add(key)
674
+
675
+ # Duplicate methods (resolved name + descriptor).
676
+ method_sigs: set[tuple[str | None, str | None]] = set()
677
+ for mi in cf.methods:
678
+ name = _resolve_cp_utf8(cp, mi.name_index)
679
+ desc = _resolve_cp_utf8(cp, mi.descriptor_index)
680
+ key = (name, desc)
681
+ if name is not None and key in method_sigs:
682
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Duplicate method: {name}{desc}", loc)
683
+ method_sigs.add(key)
684
+
685
+
686
+ def _verify_field(
687
+ fi: FieldInfo,
688
+ cf: ClassFile,
689
+ dc: _Collector,
690
+ class_name: str | None,
691
+ is_interface: bool,
692
+ ) -> None:
693
+ cp = cf.constant_pool
694
+ name = _resolve_cp_utf8(cp, fi.name_index)
695
+ desc = _resolve_cp_utf8(cp, fi.descriptor_index)
696
+ loc = Location(class_name=class_name, field_name=name)
697
+
698
+ if not isinstance(_cp_entry(cp, fi.name_index), Utf8Info):
699
+ dc.add(Severity.ERROR, Category.FIELD, f"Field name_index {fi.name_index} is not Utf8Info", loc)
700
+ elif name is not None and not _is_valid_unqualified_name(name):
701
+ dc.add(Severity.ERROR, Category.FIELD, f"Invalid field name: {name!r}", loc)
702
+
703
+ if not isinstance(_cp_entry(cp, fi.descriptor_index), Utf8Info):
704
+ dc.add(
705
+ Severity.ERROR,
706
+ Category.FIELD,
707
+ f"Field descriptor_index {fi.descriptor_index} is not Utf8Info",
708
+ loc,
709
+ )
710
+ elif desc is not None and not is_valid_field_descriptor(desc):
711
+ dc.add(Severity.ERROR, Category.DESCRIPTOR, f"Invalid field descriptor: {desc!r}", loc)
712
+
713
+ _check_field_flags(fi.access_flags, name, is_interface, loc, dc)
714
+
715
+ # ConstantValue checks.
716
+ cv_count = 0
717
+ for attr in fi.attributes:
718
+ if isinstance(attr, ConstantValueAttr):
719
+ cv_count += 1
720
+ if cv_count > 1:
721
+ dc.add(Severity.ERROR, Category.FIELD, f"Field {name!r} has multiple ConstantValue attributes", loc)
722
+ cv_entry = _cp_entry(cp, attr.constantvalue_index)
723
+ if cv_entry is None:
724
+ dc.add(
725
+ Severity.ERROR,
726
+ Category.FIELD,
727
+ f"Field {name!r} ConstantValue index {attr.constantvalue_index} is invalid",
728
+ loc,
729
+ )
730
+ elif desc is not None and is_valid_field_descriptor(desc):
731
+ _verify_cv_type(desc, cv_entry, name, loc, dc)
732
+ if FieldAccessFlag.STATIC not in fi.access_flags:
733
+ dc.add(
734
+ Severity.WARNING,
735
+ Category.FIELD,
736
+ f"Non-static field {name!r} has ConstantValue (ignored by JVM)",
737
+ loc,
738
+ )
739
+
740
+ _verify_attr_versions(fi.attributes, cf.major_version, loc, dc)
741
+
742
+
743
+ def _verify_cv_type(
744
+ desc: str,
745
+ entry: ConstantPoolInfo,
746
+ field_name: str | None,
747
+ loc: Location,
748
+ dc: _Collector,
749
+ ) -> None:
750
+ """Check that a ConstantValue entry type matches the field descriptor."""
751
+ char = desc[0] if desc else ""
752
+ expected: type | tuple[type, ...] | None = None
753
+ if char in "ISCBZ":
754
+ expected = IntegerInfo
755
+ elif char == "J":
756
+ expected = LongInfo
757
+ elif char == "F":
758
+ expected = FloatInfo
759
+ elif char == "D":
760
+ expected = DoubleInfo
761
+ elif desc == "Ljava/lang/String;":
762
+ expected = StringInfo
763
+ if expected is not None and not isinstance(entry, expected):
764
+ exp_name = expected.__name__
765
+ msg = (
766
+ f"Field {field_name!r} ConstantValue type mismatch: "
767
+ f"descriptor {desc!r} expects {exp_name}, got {type(entry).__name__}"
768
+ )
769
+ dc.add(Severity.ERROR, Category.FIELD, msg, loc)
770
+
771
+
772
+ def _verify_method(
773
+ mi: MethodInfo,
774
+ cf: ClassFile,
775
+ dc: _Collector,
776
+ class_name: str | None,
777
+ is_interface: bool,
778
+ ) -> None:
779
+ cp = cf.constant_pool
780
+ name = _resolve_cp_utf8(cp, mi.name_index)
781
+ desc = _resolve_cp_utf8(cp, mi.descriptor_index)
782
+ loc = Location(class_name=class_name, method_name=name, method_descriptor=desc)
783
+ major = cf.major_version
784
+
785
+ if not isinstance(_cp_entry(cp, mi.name_index), Utf8Info):
786
+ dc.add(Severity.ERROR, Category.METHOD, f"Method name_index {mi.name_index} is not Utf8Info", loc)
787
+ elif name is not None and not _is_valid_method_name(name):
788
+ dc.add(Severity.ERROR, Category.METHOD, f"Invalid method name: {name!r}", loc)
789
+
790
+ if not isinstance(_cp_entry(cp, mi.descriptor_index), Utf8Info):
791
+ dc.add(
792
+ Severity.ERROR,
793
+ Category.METHOD,
794
+ f"Method descriptor_index {mi.descriptor_index} is not Utf8Info",
795
+ loc,
796
+ )
797
+ elif desc is not None and not is_valid_method_descriptor(desc):
798
+ dc.add(Severity.ERROR, Category.DESCRIPTOR, f"Invalid method descriptor: {desc!r}", loc)
799
+
800
+ _check_method_flags(mi.access_flags, name, is_interface, major, loc, dc)
801
+
802
+ if name == "<clinit>" and desc is not None and desc != "()V":
803
+ dc.add(Severity.ERROR, Category.METHOD, f"<clinit> must have descriptor ()V, got {desc!r}", loc)
804
+
805
+ has_code = any(isinstance(a, CodeAttr) for a in mi.attributes)
806
+ code_count = sum(1 for a in mi.attributes if isinstance(a, CodeAttr))
807
+ is_abstract = MethodAccessFlag.ABSTRACT in mi.access_flags
808
+ is_native = MethodAccessFlag.NATIVE in mi.access_flags
809
+
810
+ if is_abstract or is_native:
811
+ if has_code:
812
+ label = "ABSTRACT" if is_abstract else "NATIVE"
813
+ dc.add(Severity.ERROR, Category.METHOD, f"{label} method {name!r} must not have a Code attribute", loc)
814
+ else:
815
+ if not has_code:
816
+ dc.add(Severity.ERROR, Category.METHOD, f"Method {name!r} must have a Code attribute", loc)
817
+ if code_count > 1:
818
+ dc.add(
819
+ Severity.ERROR,
820
+ Category.METHOD,
821
+ f"Method {name!r} has {code_count} Code attributes (max 1)",
822
+ loc,
823
+ )
824
+
825
+ exc_count = sum(1 for a in mi.attributes if isinstance(a, ExceptionsAttr))
826
+ if exc_count > 1:
827
+ dc.add(
828
+ Severity.ERROR,
829
+ Category.METHOD,
830
+ f"Method {name!r} has {exc_count} Exceptions attributes (max 1)",
831
+ loc,
832
+ )
833
+
834
+ for attr in mi.attributes:
835
+ if isinstance(attr, CodeAttr):
836
+ _verify_code(attr, cf, dc, class_name, name, desc)
837
+
838
+ _verify_attr_versions(mi.attributes, major, loc, dc)
839
+
840
+
841
+ # ── Code attribute verification ───────────────────────────────────────
842
+
843
+ _FIELD_OPS = frozenset({InsnInfoType.GETFIELD, InsnInfoType.PUTFIELD, InsnInfoType.GETSTATIC, InsnInfoType.PUTSTATIC})
844
+ _METHOD_OPS = frozenset({InsnInfoType.INVOKEVIRTUAL, InsnInfoType.INVOKESPECIAL, InsnInfoType.INVOKESTATIC})
845
+ _CLASS_OPS = frozenset({InsnInfoType.NEW, InsnInfoType.CHECKCAST, InsnInfoType.INSTANCEOF, InsnInfoType.ANEWARRAY})
846
+
847
+
848
+ def _verify_code(
849
+ code: CodeAttr,
850
+ cf: ClassFile,
851
+ dc: _Collector,
852
+ class_name: str | None,
853
+ method_name: str | None,
854
+ method_desc: str | None,
855
+ ) -> None:
856
+ cp = cf.constant_pool
857
+ major = cf.major_version
858
+ loc = Location(class_name=class_name, method_name=method_name, method_descriptor=method_desc)
859
+
860
+ if code.code_length <= 0:
861
+ dc.add(Severity.ERROR, Category.CODE, "code_length must be > 0", loc)
862
+ if code.code_length > 65535:
863
+ dc.add(Severity.ERROR, Category.CODE, f"code_length {code.code_length} exceeds 65535", loc)
864
+ if code.max_stacks < 0 or code.max_stacks > 65535:
865
+ dc.add(Severity.ERROR, Category.CODE, f"max_stack {code.max_stacks} out of range [0, 65535]", loc)
866
+ if code.max_locals < 0 or code.max_locals > 65535:
867
+ dc.add(Severity.ERROR, Category.CODE, f"max_locals {code.max_locals} out of range [0, 65535]", loc)
868
+
869
+ if not code.code:
870
+ return
871
+
872
+ valid_offsets: set[int] = {insn.bytecode_offset for insn in code.code}
873
+
874
+ for insn in code.code:
875
+ insn_loc = Location(
876
+ class_name=class_name,
877
+ method_name=method_name,
878
+ method_descriptor=method_desc,
879
+ bytecode_offset=insn.bytecode_offset,
880
+ )
881
+
882
+ # Branch target validation.
883
+ if isinstance(insn, Branch):
884
+ target = insn.bytecode_offset + insn.offset
885
+ if target not in valid_offsets:
886
+ dc.add(
887
+ Severity.ERROR,
888
+ Category.CODE,
889
+ f"Branch at offset {insn.bytecode_offset} targets invalid offset {target}",
890
+ insn_loc,
891
+ )
892
+ elif isinstance(insn, BranchW):
893
+ target = insn.bytecode_offset + insn.offset
894
+ if target not in valid_offsets:
895
+ dc.add(
896
+ Severity.ERROR,
897
+ Category.CODE,
898
+ f"Wide branch at offset {insn.bytecode_offset} targets invalid offset {target}",
899
+ insn_loc,
900
+ )
901
+ elif isinstance(insn, LookupSwitch):
902
+ default_target = insn.bytecode_offset + insn.default
903
+ if default_target not in valid_offsets:
904
+ dc.add(
905
+ Severity.ERROR,
906
+ Category.CODE,
907
+ f"lookupswitch default targets invalid offset {default_target}",
908
+ insn_loc,
909
+ )
910
+ for pair in insn.pairs:
911
+ t = insn.bytecode_offset + pair.offset
912
+ if t not in valid_offsets:
913
+ dc.add(
914
+ Severity.ERROR,
915
+ Category.CODE,
916
+ f"lookupswitch case {pair.match} targets invalid offset {t}",
917
+ insn_loc,
918
+ )
919
+ elif isinstance(insn, TableSwitch):
920
+ default_target = insn.bytecode_offset + insn.default
921
+ if default_target not in valid_offsets:
922
+ dc.add(
923
+ Severity.ERROR,
924
+ Category.CODE,
925
+ f"tableswitch default targets invalid offset {default_target}",
926
+ insn_loc,
927
+ )
928
+ for j, off in enumerate(insn.offsets):
929
+ t = insn.bytecode_offset + off
930
+ if t not in valid_offsets:
931
+ dc.add(
932
+ Severity.ERROR,
933
+ Category.CODE,
934
+ f"tableswitch case {insn.low + j} targets invalid offset {t}",
935
+ insn_loc,
936
+ )
937
+
938
+ # Exception handlers.
939
+ for eh in code.exception_table:
940
+ eh_loc = Location(class_name=class_name, method_name=method_name, method_descriptor=method_desc)
941
+ if eh.start_pc not in valid_offsets:
942
+ dc.add(
943
+ Severity.ERROR,
944
+ Category.CODE,
945
+ f"Exception handler start_pc {eh.start_pc} is not a valid instruction offset",
946
+ eh_loc,
947
+ )
948
+ if eh.end_pc not in valid_offsets and eh.end_pc != code.code_length:
949
+ dc.add(
950
+ Severity.ERROR,
951
+ Category.CODE,
952
+ f"Exception handler end_pc {eh.end_pc} is not a valid offset or code_length",
953
+ eh_loc,
954
+ )
955
+ if eh.start_pc >= eh.end_pc:
956
+ dc.add(
957
+ Severity.ERROR,
958
+ Category.CODE,
959
+ f"Exception handler start_pc ({eh.start_pc}) must be < end_pc ({eh.end_pc})",
960
+ eh_loc,
961
+ )
962
+ if eh.handler_pc not in valid_offsets:
963
+ dc.add(
964
+ Severity.ERROR,
965
+ Category.CODE,
966
+ f"Exception handler handler_pc {eh.handler_pc} is not a valid instruction offset",
967
+ eh_loc,
968
+ )
969
+ if eh.catch_type != 0 and not isinstance(_cp_entry(cp, eh.catch_type), ClassInfo):
970
+ dc.add(
971
+ Severity.ERROR,
972
+ Category.CODE,
973
+ f"Exception handler catch_type {eh.catch_type} does not point to CONSTANT_Class",
974
+ eh_loc,
975
+ )
976
+
977
+ # CP references in instructions.
978
+ _verify_code_cp_refs(code, cp, major, dc, class_name, method_name, method_desc)
979
+
980
+ # Nested attribute versioning.
981
+ _verify_attr_versions(code.attributes, major, loc, dc)
982
+
983
+
984
+ def _verify_code_cp_refs(
985
+ code: CodeAttr,
986
+ cp: list[ConstantPoolInfo | None],
987
+ major: int,
988
+ dc: _Collector,
989
+ class_name: str | None,
990
+ method_name: str | None,
991
+ method_desc: str | None,
992
+ ) -> None:
993
+ """Validate CP references in bytecode instructions."""
994
+ for insn in code.code:
995
+ loc = Location(
996
+ class_name=class_name,
997
+ method_name=method_name,
998
+ method_descriptor=method_desc,
999
+ bytecode_offset=insn.bytecode_offset,
1000
+ )
1001
+
1002
+ if isinstance(insn, ConstPoolIndex):
1003
+ entry = _cp_entry(cp, insn.index)
1004
+ if entry is None:
1005
+ dc.add(Severity.ERROR, Category.CODE, f"{insn.type.name} references invalid CP index {insn.index}", loc)
1006
+ continue
1007
+
1008
+ if insn.type in _FIELD_OPS:
1009
+ if not isinstance(entry, FieldrefInfo):
1010
+ dc.add(
1011
+ Severity.ERROR,
1012
+ Category.CODE,
1013
+ f"{insn.type.name} CP#{insn.index} expected FieldrefInfo, got {type(entry).__name__}",
1014
+ loc,
1015
+ )
1016
+ elif insn.type in _METHOD_OPS:
1017
+ if major >= 52:
1018
+ if not isinstance(entry, (MethodrefInfo, InterfaceMethodrefInfo)):
1019
+ msg = (
1020
+ f"{insn.type.name} CP#{insn.index} expected "
1021
+ f"Methodref/InterfaceMethodref, got {type(entry).__name__}"
1022
+ )
1023
+ dc.add(Severity.ERROR, Category.CODE, msg, loc)
1024
+ elif not isinstance(entry, MethodrefInfo):
1025
+ dc.add(
1026
+ Severity.ERROR,
1027
+ Category.CODE,
1028
+ f"{insn.type.name} CP#{insn.index} expected MethodrefInfo, got {type(entry).__name__}",
1029
+ loc,
1030
+ )
1031
+ elif insn.type in _CLASS_OPS:
1032
+ if not isinstance(entry, ClassInfo):
1033
+ dc.add(
1034
+ Severity.ERROR,
1035
+ Category.CODE,
1036
+ f"{insn.type.name} CP#{insn.index} expected ClassInfo, got {type(entry).__name__}",
1037
+ loc,
1038
+ )
1039
+ elif insn.type == InsnInfoType.LDC_W:
1040
+ _verify_ldc_entry(entry, insn.index, major, loc, dc)
1041
+ elif insn.type == InsnInfoType.LDC2_W:
1042
+ if not isinstance(entry, (LongInfo, DoubleInfo)):
1043
+ dc.add(
1044
+ Severity.ERROR,
1045
+ Category.CODE,
1046
+ f"LDC2_W CP#{insn.index} expected Long/Double, got {type(entry).__name__}",
1047
+ loc,
1048
+ )
1049
+
1050
+ elif isinstance(insn, LocalIndex) and insn.type == InsnInfoType.LDC:
1051
+ entry = _cp_entry(cp, insn.index)
1052
+ if entry is None:
1053
+ dc.add(Severity.ERROR, Category.CODE, f"LDC references invalid CP index {insn.index}", loc)
1054
+ else:
1055
+ _verify_ldc_entry(entry, insn.index, major, loc, dc)
1056
+
1057
+ elif isinstance(insn, InvokeInterface):
1058
+ entry = _cp_entry(cp, insn.index)
1059
+ if entry is None:
1060
+ dc.add(
1061
+ Severity.ERROR,
1062
+ Category.CODE,
1063
+ f"INVOKEINTERFACE references invalid CP index {insn.index}",
1064
+ loc,
1065
+ )
1066
+ elif not isinstance(entry, InterfaceMethodrefInfo):
1067
+ dc.add(
1068
+ Severity.ERROR,
1069
+ Category.CODE,
1070
+ f"INVOKEINTERFACE CP#{insn.index} expected InterfaceMethodrefInfo, got {type(entry).__name__}",
1071
+ loc,
1072
+ )
1073
+
1074
+ elif isinstance(insn, InvokeDynamic):
1075
+ entry = _cp_entry(cp, insn.index)
1076
+ if entry is None:
1077
+ dc.add(
1078
+ Severity.ERROR,
1079
+ Category.CODE,
1080
+ f"INVOKEDYNAMIC references invalid CP index {insn.index}",
1081
+ loc,
1082
+ )
1083
+ elif not isinstance(entry, InvokeDynamicInfo):
1084
+ dc.add(
1085
+ Severity.ERROR,
1086
+ Category.CODE,
1087
+ f"INVOKEDYNAMIC CP#{insn.index} expected InvokeDynamicInfo, got {type(entry).__name__}",
1088
+ loc,
1089
+ )
1090
+
1091
+ elif isinstance(insn, MultiANewArray):
1092
+ entry = _cp_entry(cp, insn.index)
1093
+ if entry is None:
1094
+ dc.add(
1095
+ Severity.ERROR,
1096
+ Category.CODE,
1097
+ f"MULTIANEWARRAY references invalid CP index {insn.index}",
1098
+ loc,
1099
+ )
1100
+ elif not isinstance(entry, ClassInfo):
1101
+ dc.add(
1102
+ Severity.ERROR,
1103
+ Category.CODE,
1104
+ f"MULTIANEWARRAY CP#{insn.index} expected ClassInfo, got {type(entry).__name__}",
1105
+ loc,
1106
+ )
1107
+ if insn.dimensions < 1:
1108
+ dc.add(
1109
+ Severity.ERROR,
1110
+ Category.CODE,
1111
+ f"MULTIANEWARRAY dimensions must be >= 1, got {insn.dimensions}",
1112
+ loc,
1113
+ )
1114
+
1115
+
1116
+ def _verify_ldc_entry(entry: ConstantPoolInfo, idx: int, major: int, loc: Location, dc: _Collector) -> None:
1117
+ """Validate that an LDC/LDC_W entry is a valid loadable type."""
1118
+ if isinstance(entry, (IntegerInfo, FloatInfo, StringInfo)):
1119
+ return
1120
+ if isinstance(entry, ClassInfo):
1121
+ if major < 49:
1122
+ dc.add(
1123
+ Severity.ERROR,
1124
+ Category.CODE,
1125
+ f"LDC CP#{idx} ClassInfo requires version >= 49, got {major}",
1126
+ loc,
1127
+ )
1128
+ return
1129
+ if isinstance(entry, (MethodHandleInfo, MethodTypeInfo)):
1130
+ if major < 51:
1131
+ dc.add(
1132
+ Severity.ERROR,
1133
+ Category.CODE,
1134
+ f"LDC CP#{idx} {type(entry).__name__} requires version >= 51, got {major}",
1135
+ loc,
1136
+ )
1137
+ return
1138
+ if isinstance(entry, DynamicInfo):
1139
+ if major < 55:
1140
+ dc.add(
1141
+ Severity.ERROR,
1142
+ Category.CODE,
1143
+ f"LDC CP#{idx} DynamicInfo requires version >= 55, got {major}",
1144
+ loc,
1145
+ )
1146
+ return
1147
+ dc.add(
1148
+ Severity.ERROR,
1149
+ Category.CODE,
1150
+ f"LDC CP#{idx} has non-loadable type {type(entry).__name__}",
1151
+ loc,
1152
+ )
1153
+
1154
+
1155
+ # ── Main entry: verify_classfile ──────────────────────────────────────
1156
+
1157
+
1158
+ def verify_classfile(cf: ClassFile, *, fail_fast: bool = False) -> list[Diagnostic]:
1159
+ """Validate a parsed ``ClassFile`` against JVM spec structural rules (§4.8).
1160
+
1161
+ Checks magic number, version, constant pool integrity, access flags,
1162
+ class structure, fields, methods, Code attributes, and bytecode
1163
+ constant-pool references.
1164
+
1165
+ Args:
1166
+ cf: Parsed class file structure to validate.
1167
+ fail_fast: If ``True``, raise ``FailFastError`` on the first
1168
+ ERROR-severity diagnostic instead of collecting all issues.
1169
+
1170
+ Returns:
1171
+ List of diagnostics found during validation.
1172
+
1173
+ Raises:
1174
+ FailFastError: If ``fail_fast`` is ``True`` and an ERROR is found.
1175
+ """
1176
+ dc = _Collector(fail_fast)
1177
+ class_name = _resolve_class_name(cf)
1178
+ loc = Location(class_name=class_name)
1179
+
1180
+ _verify_magic_version(cf, dc, loc)
1181
+ _verify_constant_pool(cf, dc, class_name)
1182
+ _check_class_flags(cf.access_flags, loc, dc)
1183
+ _verify_class_structure(cf, dc, class_name)
1184
+
1185
+ is_interface = ClassAccessFlag.INTERFACE in cf.access_flags
1186
+
1187
+ for fi in cf.fields:
1188
+ _verify_field(fi, cf, dc, class_name, is_interface)
1189
+
1190
+ for mi in cf.methods:
1191
+ _verify_method(mi, cf, dc, class_name, is_interface)
1192
+
1193
+ _verify_attr_versions(cf.attributes, cf.major_version, loc, dc)
1194
+
1195
+ return dc.diagnostics
1196
+
1197
+
1198
+ # ── ClassModel verification ───────────────────────────────────────────
1199
+
1200
+
1201
+ def _verify_model_names(cm: ClassModel, dc: _Collector) -> None:
1202
+ loc = Location(class_name=cm.name)
1203
+
1204
+ if not _is_valid_internal_name(cm.name):
1205
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Invalid class name: {cm.name!r}", loc)
1206
+
1207
+ if cm.super_name is not None:
1208
+ if not _is_valid_internal_name(cm.super_name):
1209
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Invalid super class name: {cm.super_name!r}", loc)
1210
+ elif cm.name != "java/lang/Object":
1211
+ dc.add(Severity.WARNING, Category.CLASS_STRUCTURE, "No superclass (only valid for java/lang/Object)", loc)
1212
+
1213
+ seen: set[str] = set()
1214
+ for iface in cm.interfaces:
1215
+ if not _is_valid_internal_name(iface):
1216
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Invalid interface name: {iface!r}", loc)
1217
+ if iface in seen:
1218
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Duplicate interface: {iface!r}", loc)
1219
+ seen.add(iface)
1220
+
1221
+
1222
+ def _verify_model_duplicates(cm: ClassModel, dc: _Collector) -> None:
1223
+ loc = Location(class_name=cm.name)
1224
+
1225
+ field_sigs: set[tuple[str, str]] = set()
1226
+ for fm in cm.fields:
1227
+ key = (fm.name, fm.descriptor)
1228
+ if key in field_sigs:
1229
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Duplicate field: {fm.name} {fm.descriptor}", loc)
1230
+ field_sigs.add(key)
1231
+
1232
+ method_sigs: set[tuple[str, str]] = set()
1233
+ for mm in cm.methods:
1234
+ key = (mm.name, mm.descriptor)
1235
+ if key in method_sigs:
1236
+ dc.add(Severity.ERROR, Category.CLASS_STRUCTURE, f"Duplicate method: {mm.name}{mm.descriptor}", loc)
1237
+ method_sigs.add(key)
1238
+
1239
+
1240
+ def _verify_model_field(fm: FieldModel, cm: ClassModel, dc: _Collector) -> None:
1241
+ loc = Location(class_name=cm.name, field_name=fm.name)
1242
+ is_interface = ClassAccessFlag.INTERFACE in cm.access_flags
1243
+
1244
+ if not _is_valid_unqualified_name(fm.name):
1245
+ dc.add(Severity.ERROR, Category.FIELD, f"Invalid field name: {fm.name!r}", loc)
1246
+
1247
+ if not is_valid_field_descriptor(fm.descriptor):
1248
+ dc.add(Severity.ERROR, Category.DESCRIPTOR, f"Invalid field descriptor: {fm.descriptor!r}", loc)
1249
+
1250
+ _check_field_flags(fm.access_flags, fm.name, is_interface, loc, dc)
1251
+ _verify_attr_versions(fm.attributes, cm.version[0], loc, dc)
1252
+
1253
+
1254
+ def _verify_model_method(mm: MethodModel, cm: ClassModel, dc: _Collector) -> None:
1255
+ loc = Location(class_name=cm.name, method_name=mm.name, method_descriptor=mm.descriptor)
1256
+ is_interface = ClassAccessFlag.INTERFACE in cm.access_flags
1257
+ major = cm.version[0]
1258
+
1259
+ if not _is_valid_method_name(mm.name):
1260
+ dc.add(Severity.ERROR, Category.METHOD, f"Invalid method name: {mm.name!r}", loc)
1261
+
1262
+ if not is_valid_method_descriptor(mm.descriptor):
1263
+ dc.add(Severity.ERROR, Category.DESCRIPTOR, f"Invalid method descriptor: {mm.descriptor!r}", loc)
1264
+
1265
+ _check_method_flags(mm.access_flags, mm.name, is_interface, major, loc, dc)
1266
+
1267
+ if mm.name == "<clinit>" and mm.descriptor != "()V":
1268
+ dc.add(Severity.ERROR, Category.METHOD, f"<clinit> must have descriptor ()V, got {mm.descriptor!r}", loc)
1269
+
1270
+ is_abstract = MethodAccessFlag.ABSTRACT in mm.access_flags
1271
+ is_native = MethodAccessFlag.NATIVE in mm.access_flags
1272
+
1273
+ if is_abstract or is_native:
1274
+ if mm.code is not None:
1275
+ label = "ABSTRACT" if is_abstract else "NATIVE"
1276
+ dc.add(Severity.ERROR, Category.METHOD, f"{label} method {mm.name!r} must not have code", loc)
1277
+ else:
1278
+ if mm.code is None:
1279
+ dc.add(Severity.ERROR, Category.METHOD, f"Method {mm.name!r} must have code", loc)
1280
+
1281
+ if mm.code is not None:
1282
+ _verify_model_code(mm.code, cm.name, mm.name, mm.descriptor, dc)
1283
+
1284
+ _verify_attr_versions(mm.attributes, major, loc, dc)
1285
+
1286
+
1287
+ def _verify_model_code(
1288
+ code: CodeModel,
1289
+ class_name: str,
1290
+ method_name: str,
1291
+ method_desc: str,
1292
+ dc: _Collector,
1293
+ ) -> None:
1294
+ loc = Location(class_name=class_name, method_name=method_name, method_descriptor=method_desc)
1295
+
1296
+ if is_code_debug_info_stale(code):
1297
+ dc.add(
1298
+ Severity.WARNING,
1299
+ Category.CODE,
1300
+ "Code debug metadata is marked stale and will be stripped during lowering",
1301
+ loc,
1302
+ )
1303
+
1304
+ if not code.instructions:
1305
+ dc.add(Severity.WARNING, Category.CODE, "Code has empty instruction list", loc)
1306
+ return
1307
+
1308
+ # Collect label identities present in the instruction stream.
1309
+ labels_in_stream: set[int] = set()
1310
+ for item in code.instructions:
1311
+ if isinstance(item, Label):
1312
+ labels_in_stream.add(id(item))
1313
+
1314
+ def _check_label(label: Label, context: str) -> None:
1315
+ if id(label) not in labels_in_stream:
1316
+ dc.add(Severity.ERROR, Category.CODE, f"{context} references label not in instruction stream", loc)
1317
+
1318
+ for eh in code.exception_handlers:
1319
+ _check_label(eh.start, "Exception handler start")
1320
+ _check_label(eh.end, "Exception handler end")
1321
+ _check_label(eh.handler, "Exception handler handler")
1322
+
1323
+ for ln in code.line_numbers:
1324
+ _check_label(ln.label, "Line number entry")
1325
+
1326
+ for lv in code.local_variables:
1327
+ _check_label(lv.start, f"Local variable '{lv.name}' start")
1328
+ _check_label(lv.end, f"Local variable '{lv.name}' end")
1329
+
1330
+ for lvt in code.local_variable_types:
1331
+ _check_label(lvt.start, f"Local variable type '{lvt.name}' start")
1332
+ _check_label(lvt.end, f"Local variable type '{lvt.name}' end")
1333
+
1334
+ for item in code.instructions:
1335
+ if isinstance(item, BranchInsn):
1336
+ _check_label(item.target, f"{item.type.name} target")
1337
+ elif isinstance(item, LookupSwitchInsn):
1338
+ _check_label(item.default_target, "lookupswitch default")
1339
+ for match_val, label in item.pairs:
1340
+ _check_label(label, f"lookupswitch case {match_val}")
1341
+ elif isinstance(item, TableSwitchInsn):
1342
+ _check_label(item.default_target, "tableswitch default")
1343
+ for label in item.targets:
1344
+ _check_label(label, "tableswitch case")
1345
+
1346
+
1347
+ def verify_classmodel(cm: ClassModel, *, fail_fast: bool = False) -> list[Diagnostic]:
1348
+ """Validate a ``ClassModel`` against structural and naming rules (§4.8).
1349
+
1350
+ Checks symbolic names, descriptors, access flags, code model structure
1351
+ (label validity, branch targets), and version-aware attribute rules.
1352
+
1353
+ Args:
1354
+ cm: Class model to validate.
1355
+ fail_fast: If ``True``, raise ``FailFastError`` on the first
1356
+ ERROR-severity diagnostic instead of collecting all issues.
1357
+
1358
+ Returns:
1359
+ List of diagnostics found during validation.
1360
+
1361
+ Raises:
1362
+ FailFastError: If ``fail_fast`` is ``True`` and an ERROR is found.
1363
+ """
1364
+ dc = _Collector(fail_fast)
1365
+ class_loc = Location(class_name=cm.name)
1366
+
1367
+ _verify_model_names(cm, dc)
1368
+ _check_class_flags(cm.access_flags, class_loc, dc)
1369
+ if is_class_debug_info_stale(cm):
1370
+ dc.add(
1371
+ Severity.WARNING,
1372
+ Category.ATTRIBUTE,
1373
+ "Class debug metadata is marked stale and will be stripped during lowering",
1374
+ class_loc,
1375
+ )
1376
+ _verify_model_duplicates(cm, dc)
1377
+
1378
+ for fm in cm.fields:
1379
+ _verify_model_field(fm, cm, dc)
1380
+
1381
+ for mm in cm.methods:
1382
+ _verify_model_method(mm, cm, dc)
1383
+
1384
+ _verify_attr_versions(cm.attributes, cm.version[0], class_loc, dc)
1385
+
1386
+ return dc.diagnostics