java-codebase-rag 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graph_enrich.py ADDED
@@ -0,0 +1,1681 @@
1
+ """Helpers that bridge `ast_java` output with chunk- and graph-level metadata.
2
+
3
+ Used both by the CocoIndex indexer (for per-chunk enrichment) and by
4
+ `build_ast_graph.py` (for module / microservice inference and deterministic
5
+ node ids).
6
+
7
+ Two location concepts are tracked per file:
8
+
9
+ - **module** — the *innermost* build-marker ancestor (Maven / Gradle /
10
+ SBT). Same as the legacy `service` field. Useful for module-scoped
11
+ search inside a microservice.
12
+ - **microservice** — the *outermost* build-marker ancestor under
13
+ `project_root`. Represents one deployable / repo. Resolution order:
14
+ 1. explicit override list (YAML at project root);
15
+ 2. outermost build marker between `project_root` and the file;
16
+ 3. first path segment under `project_root`;
17
+ 4. empty.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import hashlib
22
+ import sys
23
+ from dataclasses import dataclass, field, replace
24
+ from functools import lru_cache
25
+ from pathlib import Path
26
+ from typing import Any
27
+ from ast_java import (
28
+ AnnotationRef,
29
+ JavaFileAst,
30
+ MethodDecl,
31
+ OutgoingCallDecl,
32
+ RouteDecl,
33
+ ROUTE_META_ANNOTATION_NAMES,
34
+ TypeDecl,
35
+ _ROUTE_HTTP_MAPPING_NAMES,
36
+ CODEBASE_HTTP_CLIENT_ANNOTATIONS,
37
+ CODEBASE_PRODUCER_ANNOTATIONS,
38
+ infer_capabilities_for_type,
39
+ infer_role_for_type,
40
+ parse_java,
41
+ ROLE_ANNOTATIONS,
42
+ _METHOD_ANN_TO_CAPABILITY,
43
+ _TYPE_ANN_TO_CAPABILITY,
44
+ )
45
+ from java_ontology import (
46
+ VALID_CAPABILITIES,
47
+ VALID_CLIENT_KINDS,
48
+ VALID_PRODUCER_KINDS,
49
+ VALID_ROLES,
50
+ VALID_ROUTE_FRAMEWORKS,
51
+ VALID_ROUTE_KINDS,
52
+ )
53
+ from path_filtering import LayeredIgnore, iter_java_source_files
54
+
55
+ __all__ = [
56
+ "AnnotationDecl",
57
+ "BrownfieldOverrides",
58
+ "ChunkEnrichment",
59
+ "annotation_meta_decls_from_graph_tables",
60
+ "collect_annotation_meta_chain",
61
+ "compute_meta_chains_from_decls",
62
+ "enrich_chunk",
63
+ "load_brownfield_overrides",
64
+ "load_microservice_overrides",
65
+ "module_for_path",
66
+ "microservice_for_path",
67
+ "resolve_role_and_capabilities",
68
+ "resolve_routes_for_method",
69
+ "resolve_http_client_for_method",
70
+ "resolve_async_producer_for_method",
71
+ "RouteHint",
72
+ "HttpClientHint",
73
+ "AsyncProducerHint",
74
+ "symbol_id",
75
+ "phantom_id",
76
+ "BUILD_MARKERS",
77
+ "CONFIG_FILENAMES",
78
+ ]
79
+
80
+ BUILD_MARKERS = (
81
+ "pom.xml",
82
+ "build.gradle",
83
+ "build.gradle.kts",
84
+ "build.sbt",
85
+ )
86
+
87
+ # Recognised config filenames at `project_root` (first match wins).
88
+ CONFIG_FILENAMES = (".java-codebase-rag.yml", ".java-codebase-rag.yaml")
89
+
90
+
91
+ @dataclass
92
+ class ChunkEnrichment:
93
+ package: str
94
+ module: str
95
+ microservice: str
96
+ primary_type_fqn: str
97
+ primary_type_kind: str
98
+ role: str
99
+ annotations_on_type: list[str]
100
+ symbols: list[str]
101
+ capabilities: list[str] = field(default_factory=list)
102
+
103
+
104
+ # ---------- microservice override loading ----------
105
+
106
+
107
+ def _parse_csv(raw: str) -> list[str]:
108
+ return [s.strip() for s in raw.split(",") if s.strip()]
109
+
110
+
111
+ @lru_cache(maxsize=64)
112
+ def _load_config_microservice_roots(project_root_str: str) -> tuple[str, ...]:
113
+ """Read `microservice_roots` from `.java-codebase-rag.yml` at project_root.
114
+
115
+ Cached per project_root to avoid re-reading on every chunk. Failures
116
+ (file missing, malformed YAML, missing key) silently return an empty
117
+ tuple — config is strictly opt-in.
118
+ """
119
+ root = Path(project_root_str)
120
+ for name in CONFIG_FILENAMES:
121
+ candidate = root / name
122
+ if not candidate.is_file():
123
+ continue
124
+ try:
125
+ import yaml # PyYAML; already a transitive dep of cocoindex
126
+ except ImportError:
127
+ return ()
128
+ try:
129
+ data = yaml.safe_load(candidate.read_text(encoding="utf-8"))
130
+ except Exception:
131
+ return ()
132
+ if not isinstance(data, dict):
133
+ return ()
134
+ raw = data.get("microservice_roots")
135
+ if isinstance(raw, str):
136
+ return tuple(_parse_csv(raw))
137
+ if isinstance(raw, list):
138
+ return tuple(str(x).strip() for x in raw if str(x).strip())
139
+ return ()
140
+ return ()
141
+
142
+
143
+ @lru_cache(maxsize=64)
144
+ def _load_config_cross_service_resolution(project_root_str: str) -> str:
145
+ """Read `cross_service_resolution` from `.java-codebase-rag.yml` at project_root.
146
+
147
+ Returns "auto" or "brownfield_only". Defaults to "auto" when the key is absent
148
+ or the file is missing / malformed. Unknown values warn on stderr and fall back
149
+ to "auto".
150
+ """
151
+ root = Path(project_root_str)
152
+ for name in CONFIG_FILENAMES:
153
+ candidate = root / name
154
+ if not candidate.is_file():
155
+ continue
156
+ try:
157
+ import yaml # PyYAML; already a transitive dep of cocoindex
158
+ except ImportError:
159
+ return "auto"
160
+ try:
161
+ data = yaml.safe_load(candidate.read_text(encoding="utf-8"))
162
+ except Exception:
163
+ return "auto"
164
+ if not isinstance(data, dict):
165
+ return "auto"
166
+ val = data.get("cross_service_resolution", "auto")
167
+ if val not in {"auto", "brownfield_only"}:
168
+ print(
169
+ f"[lancedb-mcp] cross_service_resolution: unknown value "
170
+ f"{val!r}, falling back to 'auto'",
171
+ file=sys.stderr,
172
+ )
173
+ return "auto"
174
+ return val
175
+ return "auto"
176
+
177
+
178
+ def load_microservice_overrides(project_root: str | Path | None) -> tuple[str, ...]:
179
+ """Microservice root overrides from project YAML only (`microservice_roots:`)."""
180
+ out: list[str] = []
181
+ seen: set[str] = set()
182
+
183
+ if project_root is not None:
184
+ try:
185
+ root_str = str(Path(project_root).resolve())
186
+ except OSError:
187
+ root_str = str(project_root)
188
+ for name in _load_config_microservice_roots(root_str):
189
+ if name not in seen:
190
+ seen.add(name)
191
+ out.append(name)
192
+
193
+ return tuple(out)
194
+
195
+
196
+ # ---------- brownfield role / capability overrides ----------
197
+
198
+
199
+ @dataclass(frozen=True)
200
+ class RouteHint:
201
+ """YAML `route_overrides` entry: maps to `RouteDecl` fields (B2a brownfield)."""
202
+
203
+ framework: str
204
+ kind: str
205
+ path: str = ""
206
+ method: str = ""
207
+ topic: str = ""
208
+ broker: str = ""
209
+
210
+
211
+ @dataclass(frozen=True)
212
+ class HttpClientHint:
213
+ client_kind: str
214
+ target_service: str = ""
215
+ path: str = ""
216
+ method: str = ""
217
+
218
+
219
+ @dataclass(frozen=True)
220
+ class AsyncProducerHint:
221
+ client_kind: str
222
+ topic: str = ""
223
+ broker: str = ""
224
+
225
+
226
+ @dataclass(frozen=True)
227
+ class BrownfieldOverrides:
228
+ annotation_to_role: dict[str, str] = field(default_factory=dict)
229
+ annotation_to_capabilities: dict[str, tuple[str, ...]] = field(default_factory=dict)
230
+ fqn_role: dict[str, str] = field(default_factory=dict)
231
+ fqn_capabilities: dict[str, tuple[str, ...]] = field(default_factory=dict)
232
+ annotation_to_route_hint: dict[str, RouteHint] = field(default_factory=dict)
233
+ fqn_to_route_hint: dict[str, RouteHint] = field(default_factory=dict)
234
+ annotation_to_http_client_hint: dict[str, HttpClientHint] = field(default_factory=dict)
235
+ fqn_to_http_client_hint: dict[str, HttpClientHint] = field(default_factory=dict)
236
+ annotation_to_async_producer_hint: dict[str, AsyncProducerHint] = field(default_factory=dict)
237
+ fqn_to_async_producer_hint: dict[str, AsyncProducerHint] = field(default_factory=dict)
238
+
239
+
240
+ def _meta_builtins() -> frozenset[str]:
241
+ return (
242
+ frozenset(ROLE_ANNOTATIONS)
243
+ | frozenset(_METHOD_ANN_TO_CAPABILITY)
244
+ | frozenset(_TYPE_ANN_TO_CAPABILITY)
245
+ | ROUTE_META_ANNOTATION_NAMES
246
+ | CODEBASE_HTTP_CLIENT_ANNOTATIONS
247
+ | CODEBASE_PRODUCER_ANNOTATIONS
248
+ )
249
+
250
+
251
+ # Rounds in the iterative closure; `max_depth` of 4 = at most four hops
252
+ # from any annotation to a built-in in the plan's `_build_meta_chain` sketch
253
+ # (e.g. six linear wrappers to `@Service` leaves the outer name without a role
254
+ # from Layer A).
255
+ _META_PATH_DEPTH_CAP = 4
256
+
257
+
258
+ @dataclass(frozen=True)
259
+ class AnnotationDecl:
260
+ fqn: str
261
+ simple: str
262
+ meta_annotations: tuple[str, ...]
263
+
264
+
265
+ def _build_meta_chain(
266
+ decls: dict[str, AnnotationDecl],
267
+ builtins: frozenset[str],
268
+ *,
269
+ max_depth: int,
270
+ ) -> dict[str, frozenset[str]]:
271
+ """Iterative fixed-point over the meta-annotation graph (PLAN-BROWNFIELD, Pass A2)."""
272
+ chain: dict[str, set[str]] = {b: {b} for b in builtins}
273
+ for _ in range(max_depth):
274
+ changed = False
275
+ for _sk, decl in sorted(decls.items(), key=lambda kv: kv[0]):
276
+ reach: set[str] = set()
277
+ for parent in decl.meta_annotations:
278
+ reach |= chain.get(parent, set())
279
+ if reach and not reach.issubset(chain.get(decl.simple, set())):
280
+ chain.setdefault(decl.simple, set()).update(reach)
281
+ changed = True
282
+ if not changed:
283
+ break
284
+ return {k: frozenset(chain.get(k, set())) for k in decls}
285
+
286
+
287
+ def _collect_annotation_decl_index(project_root_str: str) -> dict[str, AnnotationDecl]:
288
+ """File scan for `@interface` declarations; sorted paths for stable first-wins (Fix 5/6)."""
289
+ root = Path(project_root_str)
290
+ if not root.is_dir():
291
+ return {}
292
+ ignore = LayeredIgnore(root)
293
+ decls: dict[str, AnnotationDecl] = {}
294
+ for p in sorted(iter_java_source_files(root, ignore=ignore), key=str):
295
+ try:
296
+ content = p.read_bytes()
297
+ except OSError as exc:
298
+ print(
299
+ f"[lancedb-mcp] skipped unreadable {p}: {exc}",
300
+ file=sys.stderr,
301
+ )
302
+ continue
303
+ if not content.strip():
304
+ continue
305
+ try:
306
+ jast = parse_java(content)
307
+ except Exception as exc:
308
+ print(
309
+ f"[lancedb-mcp] parse error in {p}: {exc}",
310
+ file=sys.stderr,
311
+ )
312
+ continue
313
+ for t in jast.all_types:
314
+ if t.kind != "annotation":
315
+ continue
316
+ if t.name in decls:
317
+ print(
318
+ f"[lancedb-mcp] duplicate @interface simple name {t.name!r} — "
319
+ f"keeping {decls[t.name].fqn!r}, ignoring {t.fqn!r}",
320
+ file=sys.stderr,
321
+ )
322
+ continue
323
+ decls[t.name] = AnnotationDecl(
324
+ fqn=t.fqn,
325
+ simple=t.name,
326
+ meta_annotations=tuple(a.name for a in t.annotations),
327
+ )
328
+ return decls
329
+
330
+
331
+ @lru_cache(maxsize=4)
332
+ def collect_annotation_meta_chain(
333
+ project_root_str: str,
334
+ ) -> dict[str, frozenset[str]]:
335
+ """Map annotation simple name → built-in simple names reachable via meta-annotations.
336
+
337
+ Single source of truth for Layer A: both the Kuzu writer and Lance chunk
338
+ enrichment must use this; they must not derive `meta_chain` from separate
339
+ filesystem walks. See ``PLAN-BROWNFIELD-ROLE-OVERRIDES`` §
340
+ *Single source of truth (REQUIRED — read before implementation)*.
341
+ """
342
+ decls = _collect_annotation_decl_index(project_root_str)
343
+ b = _meta_builtins()
344
+ return _build_meta_chain(decls, b, max_depth=_META_PATH_DEPTH_CAP)
345
+
346
+
347
+ def annotation_meta_decls_from_graph_tables(
348
+ types: dict[str, Any],
349
+ ) -> dict[str, tuple[str, ...]]:
350
+ """From `build_ast_graph.GraphTables.types`, map @interface simple name -> meta anns.
351
+
352
+ Used for diagnostics; Layer A in production uses `collect_annotation_meta_chain`
353
+ (disk) so Kuzu and Lance share one index.
354
+ """
355
+ decls: dict[str, tuple[str, ...]] = {}
356
+ first_fqn: dict[str, str] = {}
357
+ for e in types.values():
358
+ d = e.decl
359
+ if d.kind != "annotation":
360
+ continue
361
+ if d.name in decls:
362
+ print(
363
+ f"[lancedb-mcp] duplicate @interface simple name {d.name!r} — "
364
+ f"keeping {first_fqn[d.name]!r}, ignoring {d.fqn!r}",
365
+ file=sys.stderr,
366
+ )
367
+ continue
368
+ first_fqn[d.name] = d.fqn
369
+ decls[d.name] = tuple(a.name for a in d.annotations)
370
+ return decls
371
+
372
+
373
+ def compute_meta_chains_from_decls(
374
+ decls: dict[str, tuple[str, ...]],
375
+ ) -> dict[str, frozenset[str]]:
376
+ """Map annotation simple name → transitive built-in simple names (Layer A), tests/legacy.
377
+
378
+ Shape-only callers use placeholder FQNs; use `collect_annotation_meta_chain` for
379
+ a stable project index.
380
+ """
381
+ adecls: dict[str, AnnotationDecl] = {
382
+ s: AnnotationDecl(
383
+ fqn=f"::{s}",
384
+ simple=s,
385
+ meta_annotations=meta,
386
+ )
387
+ for s, meta in decls.items()
388
+ }
389
+ b = _meta_builtins()
390
+ return _build_meta_chain(adecls, b, max_depth=_META_PATH_DEPTH_CAP)
391
+
392
+
393
+ @lru_cache(maxsize=64)
394
+ def _load_brownfield_overrides(project_root_str: str) -> BrownfieldOverrides:
395
+ """Read `role_overrides` from `.java-codebase-rag.yml` at project_root. Cached per root."""
396
+ root = Path(project_root_str)
397
+ valid_roles = VALID_ROLES
398
+ valid_caps = VALID_CAPABILITIES
399
+ for name in CONFIG_FILENAMES:
400
+ candidate = root / name
401
+ if not candidate.is_file():
402
+ continue
403
+ try:
404
+ import yaml # PyYAML; already a transitive dep of cocoindex
405
+ except ImportError:
406
+ return BrownfieldOverrides({}, {}, {}, {}, {}, {}, {}, {}, {}, {})
407
+ try:
408
+ data = yaml.safe_load(candidate.read_text(encoding="utf-8"))
409
+ except Exception:
410
+ return BrownfieldOverrides({}, {}, {}, {}, {}, {}, {}, {}, {}, {})
411
+ if not isinstance(data, dict):
412
+ return BrownfieldOverrides({}, {}, {}, {}, {}, {}, {}, {}, {}, {})
413
+ ro = data.get("role_overrides")
414
+ if not isinstance(ro, dict):
415
+ ro = {}
416
+ a_to_r: dict[str, str] = {}
417
+ a_to_c: dict[str, tuple[str, ...]] = {}
418
+ fqn_r: dict[str, str] = {}
419
+ fqn_c: dict[str, tuple[str, ...]] = {}
420
+
421
+ ann = ro.get("annotations")
422
+ if isinstance(ann, dict):
423
+ for k, v in ann.items():
424
+ ks = str(k).strip()
425
+ if not ks:
426
+ continue
427
+ vs = str(v).strip()
428
+ if not vs:
429
+ continue
430
+ if vs not in valid_roles:
431
+ print(
432
+ f"[lancedb-mcp] role_overrides.annotations: unknown role {vs!r} for {ks!r} — dropped",
433
+ file=sys.stderr,
434
+ )
435
+ continue
436
+ a_to_r[ks] = vs
437
+
438
+ caps_block = ro.get("capabilities")
439
+ if isinstance(caps_block, dict):
440
+ for k, v in caps_block.items():
441
+ ks = str(k).strip()
442
+ if not ks or not isinstance(v, (list, tuple)):
443
+ continue
444
+ out_cp: list[str] = []
445
+ for item in v:
446
+ cap = str(item).strip()
447
+ if not cap:
448
+ continue
449
+ if cap not in valid_caps:
450
+ print(
451
+ f"[lancedb-mcp] role_overrides.capabilities: unknown capability {cap!r} for {ks!r} — dropped",
452
+ file=sys.stderr,
453
+ )
454
+ continue
455
+ out_cp.append(cap)
456
+ if out_cp:
457
+ a_to_c[ks] = tuple(out_cp)
458
+
459
+ fqn = ro.get("fqn")
460
+ if isinstance(fqn, dict):
461
+ for fqn_key, v in fqn.items():
462
+ fk = str(fqn_key).strip()
463
+ if not fk or not isinstance(v, dict):
464
+ continue
465
+ r = v.get("role")
466
+ if r is not None and str(r).strip():
467
+ rs = str(r).strip()
468
+ if rs in valid_roles:
469
+ fqn_r[fk] = rs
470
+ else:
471
+ print(
472
+ f"[lancedb-mcp] role_overrides.fqn: unknown role {rs!r} for {fk!r} — dropped",
473
+ file=sys.stderr,
474
+ )
475
+ cap_list = v.get("capabilities")
476
+ if isinstance(cap_list, (list, tuple)):
477
+ out_c: list[str] = []
478
+ for item in cap_list:
479
+ cap = str(item).strip()
480
+ if not cap:
481
+ continue
482
+ if cap not in valid_caps:
483
+ print(
484
+ f"[lancedb-mcp] role_overrides.fqn: unknown capability {cap!r} for {fk!r} — dropped",
485
+ file=sys.stderr,
486
+ )
487
+ continue
488
+ out_c.append(cap)
489
+ if out_c:
490
+ fqn_c[fk] = tuple(out_c)
491
+
492
+ a_route: dict[str, RouteHint] = {}
493
+ f_route: dict[str, RouteHint] = {}
494
+ a_http: dict[str, HttpClientHint] = {}
495
+ f_http: dict[str, HttpClientHint] = {}
496
+ a_async: dict[str, AsyncProducerHint] = {}
497
+ f_async: dict[str, AsyncProducerHint] = {}
498
+ r_ov = data.get("route_overrides")
499
+ if isinstance(r_ov, dict):
500
+ ann_rt = r_ov.get("annotations")
501
+ if isinstance(ann_rt, dict):
502
+ for key, val in ann_rt.items():
503
+ ks = str(key).strip()
504
+ if not ks or not isinstance(val, dict):
505
+ continue
506
+ fw = str(val.get("framework", "") or "").strip()
507
+ kd = str(val.get("kind", "") or "").strip()
508
+ if fw not in VALID_ROUTE_FRAMEWORKS:
509
+ print(
510
+ f"[lancedb-mcp] route_overrides.annotations: unknown framework {fw!r} "
511
+ f"for key {ks!r} — entry dropped",
512
+ file=sys.stderr,
513
+ )
514
+ continue
515
+ if kd not in VALID_ROUTE_KINDS:
516
+ print(
517
+ f"[lancedb-mcp] route_overrides.annotations: unknown kind {kd!r} "
518
+ f"for key {ks!r} — entry dropped",
519
+ file=sys.stderr,
520
+ )
521
+ continue
522
+ a_route[ks] = RouteHint(
523
+ framework=fw,
524
+ kind=kd,
525
+ path=str(val.get("path", "") or "").strip(),
526
+ method=str(val.get("method", "") or "").strip().upper(),
527
+ topic=str(val.get("topic", "") or "").strip(),
528
+ broker=str(val.get("broker", "") or "").strip(),
529
+ )
530
+ fqn_rt = r_ov.get("fqn")
531
+ if isinstance(fqn_rt, dict):
532
+ for fqn_key, val in fqn_rt.items():
533
+ fk = str(fqn_key).strip()
534
+ if not fk or not isinstance(val, dict):
535
+ continue
536
+ fw = str(val.get("framework", "") or "").strip()
537
+ kd = str(val.get("kind", "") or "").strip()
538
+ if fw not in VALID_ROUTE_FRAMEWORKS:
539
+ print(
540
+ f"[lancedb-mcp] route_overrides.fqn: unknown framework {fw!r} "
541
+ f"for key {fk!r} — entry dropped",
542
+ file=sys.stderr,
543
+ )
544
+ continue
545
+ if kd not in VALID_ROUTE_KINDS:
546
+ print(
547
+ f"[lancedb-mcp] route_overrides.fqn: unknown kind {kd!r} "
548
+ f"for key {fk!r} — entry dropped",
549
+ file=sys.stderr,
550
+ )
551
+ continue
552
+ f_route[fk] = RouteHint(
553
+ framework=fw,
554
+ kind=kd,
555
+ path=str(val.get("path", "") or "").strip(),
556
+ method=str(val.get("method", "") or "").strip().upper(),
557
+ topic=str(val.get("topic", "") or "").strip(),
558
+ broker=str(val.get("broker", "") or "").strip(),
559
+ )
560
+
561
+ http_ov = data.get("http_client_overrides")
562
+ if isinstance(http_ov, dict):
563
+ ann_http = http_ov.get("annotations")
564
+ if isinstance(ann_http, dict):
565
+ for key, val in ann_http.items():
566
+ ks = str(key).strip()
567
+ if not ks or not isinstance(val, dict):
568
+ continue
569
+ ck = str(val.get("client_kind", "") or "").strip()
570
+ if ck not in VALID_CLIENT_KINDS:
571
+ print(
572
+ f"[lancedb-mcp] http_client_overrides.annotations: unknown client_kind {ck!r} "
573
+ f"for key {ks!r} — entry dropped",
574
+ file=sys.stderr,
575
+ )
576
+ continue
577
+ a_http[ks] = HttpClientHint(
578
+ client_kind=ck,
579
+ target_service=str(val.get("target_service", "") or "").strip(),
580
+ path=str(val.get("path", "") or "").strip(),
581
+ method=str(val.get("method", "") or "").strip().upper(),
582
+ )
583
+ fqn_http = http_ov.get("fqn")
584
+ if isinstance(fqn_http, dict):
585
+ for fqn_key, val in fqn_http.items():
586
+ fk = str(fqn_key).strip()
587
+ if not fk or not isinstance(val, dict):
588
+ continue
589
+ ck = str(val.get("client_kind", "") or "").strip()
590
+ if ck not in VALID_CLIENT_KINDS:
591
+ print(
592
+ f"[lancedb-mcp] http_client_overrides.fqn: unknown client_kind {ck!r} "
593
+ f"for key {fk!r} — entry dropped",
594
+ file=sys.stderr,
595
+ )
596
+ continue
597
+ f_http[fk] = HttpClientHint(
598
+ client_kind=ck,
599
+ target_service=str(val.get("target_service", "") or "").strip(),
600
+ path=str(val.get("path", "") or "").strip(),
601
+ method=str(val.get("method", "") or "").strip().upper(),
602
+ )
603
+
604
+ async_ov = data.get("async_producer_overrides")
605
+ if isinstance(async_ov, dict):
606
+ ann_async = async_ov.get("annotations")
607
+ if isinstance(ann_async, dict):
608
+ for key, val in ann_async.items():
609
+ ks = str(key).strip()
610
+ if not ks or not isinstance(val, dict):
611
+ continue
612
+ ck = str(val.get("client_kind", "") or "").strip()
613
+ if ck not in VALID_PRODUCER_KINDS:
614
+ print(
615
+ f"[lancedb-mcp] async_producer_overrides.annotations: unknown client_kind {ck!r} "
616
+ f"for key {ks!r} — entry dropped",
617
+ file=sys.stderr,
618
+ )
619
+ continue
620
+ a_async[ks] = AsyncProducerHint(
621
+ client_kind=ck,
622
+ topic=str(val.get("topic", "") or "").strip(),
623
+ broker=str(val.get("broker", "") or "").strip(),
624
+ )
625
+ fqn_async = async_ov.get("fqn")
626
+ if isinstance(fqn_async, dict):
627
+ for fqn_key, val in fqn_async.items():
628
+ fk = str(fqn_key).strip()
629
+ if not fk or not isinstance(val, dict):
630
+ continue
631
+ ck = str(val.get("client_kind", "") or "").strip()
632
+ if ck not in VALID_PRODUCER_KINDS:
633
+ print(
634
+ f"[lancedb-mcp] async_producer_overrides.fqn: unknown client_kind {ck!r} "
635
+ f"for key {fk!r} — entry dropped",
636
+ file=sys.stderr,
637
+ )
638
+ continue
639
+ f_async[fk] = AsyncProducerHint(
640
+ client_kind=ck,
641
+ topic=str(val.get("topic", "") or "").strip(),
642
+ broker=str(val.get("broker", "") or "").strip(),
643
+ )
644
+
645
+ return BrownfieldOverrides(
646
+ a_to_r,
647
+ a_to_c,
648
+ fqn_r,
649
+ fqn_c,
650
+ a_route,
651
+ f_route,
652
+ a_http,
653
+ f_http,
654
+ a_async,
655
+ f_async,
656
+ )
657
+ return BrownfieldOverrides({}, {}, {}, {}, {}, {}, {}, {}, {}, {})
658
+
659
+
660
+ def load_brownfield_overrides(
661
+ project_root: str | Path | None,
662
+ ) -> BrownfieldOverrides:
663
+ if project_root is None:
664
+ return BrownfieldOverrides({}, {}, {}, {}, {}, {}, {}, {}, {}, {})
665
+ try:
666
+ r = str(Path(project_root).resolve())
667
+ except OSError:
668
+ r = str(project_root)
669
+ return _load_brownfield_overrides(r)
670
+
671
+
672
+ def resolve_role_and_capabilities(
673
+ type_decl: TypeDecl,
674
+ *,
675
+ overrides: BrownfieldOverrides,
676
+ meta_chain: dict[str, frozenset[str]] | None = None,
677
+ ) -> tuple[str, list[str]]:
678
+ """Compose AST inference with brownfield overrides (single execution order).
679
+
680
+ The resolver runs the steps **below in order**; each step mutates the same
681
+ working ``(role, caps)``. Steps listed later in this docstring *override
682
+ or extend* the result of earlier steps when they apply. There is no second
683
+ "priority" axis: "last to run" in this list is the strongest.
684
+
685
+ 1. Built-in inference (``infer_role_for_type`` / ``infer_capabilities_for_type``)
686
+ 2. Layer B — config annotation map (``role_overrides.annotations`` / ``capabilities``)
687
+ 3. Layer A — meta-annotation walk (``meta_chain``; Phase 2; no-op if None)
688
+ 4. Layer C — ``@CodebaseRole`` / ``@CodebaseCapability`` in source
689
+ 5. Layer B — per-FQN map (``role_overrides.fqn``)
690
+
691
+ Role rule: steps 2 and 3 that change *role* use ``if role == "OTHER"`` on the
692
+ *current* role, so step 2 (user config) runs before step 3: explicit config
693
+ wins over automatic meta (see `PLAN-BROWNFIELD-ROLE-OVERRIDES` §
694
+ *Resolver execution order*). Steps 4 and 5 apply to role without that guard.
695
+ Capability rule: every layer is additively unioned; return value is
696
+ ``sorted(caps)`` for a stable on-disk form.
697
+
698
+ See ``PLAN-BROWNFIELD-ROLE-OVERRIDES`` § *Resolver execution order* for the
699
+ side-by-side table.
700
+ """
701
+ # ----- Step 1: built-in inference (runs first) -----
702
+ role = infer_role_for_type(type_decl)
703
+ caps: set[str] = set(infer_capabilities_for_type(type_decl))
704
+ type_ann_names = [a.name for a in type_decl.annotations]
705
+
706
+ # ----- Step 2: Layer B — annotation name map (before meta-walk) -----
707
+ if role == "OTHER":
708
+ for ann in type_ann_names:
709
+ mapped = overrides.annotation_to_role.get(ann)
710
+ if mapped:
711
+ role = mapped
712
+ break
713
+ for ann in type_ann_names:
714
+ for c in overrides.annotation_to_capabilities.get(ann, ()):
715
+ caps.add(c)
716
+ for m in type_decl.methods:
717
+ for ann in m.annotations:
718
+ for c in overrides.annotation_to_capabilities.get(ann.name, ()):
719
+ caps.add(c)
720
+
721
+ # ----- Step 3: Layer A — meta-annotation chain -----
722
+ if meta_chain is not None:
723
+ if role == "OTHER":
724
+ for ann in type_ann_names:
725
+ for builtin in meta_chain.get(ann, ()):
726
+ mapped = ROLE_ANNOTATIONS.get(builtin)
727
+ if mapped:
728
+ role = mapped
729
+ break
730
+ if role != "OTHER":
731
+ break
732
+ for ann in type_ann_names:
733
+ for builtin in meta_chain.get(ann, ()):
734
+ c = _TYPE_ANN_TO_CAPABILITY.get(builtin)
735
+ if c:
736
+ caps.add(c)
737
+ for m in type_decl.methods:
738
+ for ann in m.annotations:
739
+ for builtin in meta_chain.get(ann.name, ()):
740
+ c = _METHOD_ANN_TO_CAPABILITY.get(builtin)
741
+ if c:
742
+ caps.add(c)
743
+
744
+ # ----- Step 4: Layer C — in-source @CodebaseRole / @CodebaseCapability -----
745
+ for ann in type_decl.annotations:
746
+ if ann.name == "CodebaseRole":
747
+ v = ann.arguments.get("value")
748
+ vk = ann.argument_kinds.get("value")
749
+ if vk == "string" and v is not None:
750
+ print(
751
+ f"[lancedb-mcp] CodebaseRole: string literal value {v!r} is no longer supported; "
752
+ "use CodebaseRoleKind.*",
753
+ file=sys.stderr,
754
+ )
755
+ elif vk == "enum" and v in VALID_ROLES:
756
+ role = v
757
+ elif vk == "enum" and v is not None and v not in VALID_ROLES:
758
+ print(
759
+ f"[lancedb-mcp] CodebaseRole: invalid value {v!r} — ignored",
760
+ file=sys.stderr,
761
+ )
762
+ elif ann.name == "CodebaseCapability":
763
+ v = ann.arguments.get("value")
764
+ vk = ann.argument_kinds.get("value")
765
+ if vk == "string" and v is not None:
766
+ print(
767
+ f"[lancedb-mcp] CodebaseCapability: string literal value {v!r} is no longer supported; "
768
+ "use CodebaseCapabilityKind.*",
769
+ file=sys.stderr,
770
+ )
771
+ elif vk == "enum" and v in VALID_CAPABILITIES:
772
+ caps.add(v)
773
+ elif vk == "enum" and v is not None and v not in VALID_CAPABILITIES:
774
+ print(
775
+ f"[lancedb-mcp] CodebaseCapability: invalid value {v!r} — ignored",
776
+ file=sys.stderr,
777
+ )
778
+ elif ann.name == "CodebaseCapabilities":
779
+ for v, vk in zip(
780
+ ann.container_capability_values,
781
+ ann.container_capability_kinds,
782
+ strict=True,
783
+ ):
784
+ if vk == "string" and v:
785
+ print(
786
+ f"[lancedb-mcp] CodebaseCapabilities: string literal value {v!r} is no longer supported; "
787
+ "use CodebaseCapabilityKind.*",
788
+ file=sys.stderr,
789
+ )
790
+ elif vk == "enum" and v in VALID_CAPABILITIES:
791
+ caps.add(v)
792
+ elif vk == "enum" and v:
793
+ print(
794
+ f"[lancedb-mcp] CodebaseCapabilities: invalid value {v!r} — ignored",
795
+ file=sys.stderr,
796
+ )
797
+
798
+ # ----- Step 5: Layer B — per-FQN (runs last; can override role / add caps) -----
799
+ if type_decl.fqn in overrides.fqn_role:
800
+ role = overrides.fqn_role[type_decl.fqn]
801
+ for c in overrides.fqn_capabilities.get(type_decl.fqn, ()):
802
+ caps.add(c)
803
+
804
+ return role, sorted(caps)
805
+
806
+
807
+ _HTTP_ROUTE_KINDS = frozenset({"http_endpoint", "http_consumer"})
808
+ # Layer C `@CodebaseAsyncRoute` replaces same-method auto messaging of these kinds.
809
+ _LAYER_C_ASYNC_REPLACES_BUILTIN_KINDS = frozenset({"kafka_topic"})
810
+
811
+
812
+ def _route_path_atom(raw_value: str, value_kind: str | None) -> tuple[str, str, float, bool]:
813
+ # Canonical ladder for route path hints: annotation -> spel -> constant_ref.
814
+ # Note: an empty string literal is still an explicit annotation value (`annotation`).
815
+ # "No value present" is handled by caller fallback, not by a separate value_kind.
816
+ if value_kind == "string":
817
+ if "${" in raw_value:
818
+ return "", "spel", 0.85, False
819
+ return raw_value, "annotation", 1.0, True
820
+ return "", "constant_ref", 0.7, False
821
+
822
+
823
+ def _route_hint_lookup(ann: AnnotationRef, hints: dict[str, RouteHint]) -> RouteHint | None:
824
+ q = ann.qualified.strip()
825
+ if q in hints:
826
+ return hints[q]
827
+ if ann.name in hints:
828
+ return hints[ann.name]
829
+ for k, h in sorted(hints.items(), key=lambda kv: kv[0]):
830
+ if k.endswith("." + ann.name):
831
+ return h
832
+ return None
833
+
834
+
835
+ def _route_decl_from_route_hint(
836
+ hint: RouteHint,
837
+ *,
838
+ method_fqn: str,
839
+ method_sig: str,
840
+ filename: str,
841
+ start_line: int,
842
+ end_line: int,
843
+ source_layer: str,
844
+ ) -> RouteDecl:
845
+ return RouteDecl(
846
+ method_fqn=method_fqn,
847
+ method_sig=method_sig,
848
+ kind=hint.kind,
849
+ framework=hint.framework,
850
+ http_method=hint.method,
851
+ path=hint.path,
852
+ topic=hint.topic,
853
+ broker=hint.broker,
854
+ feign_name="",
855
+ feign_url="",
856
+ resolution_strategy="annotation",
857
+ confidence=1.0,
858
+ resolved=True,
859
+ filename=filename,
860
+ start_line=start_line,
861
+ end_line=end_line,
862
+ route_source_layer=source_layer,
863
+ )
864
+
865
+
866
+ def _http_paths_from_ann_ref(ann: AnnotationRef) -> list[tuple[str, str, float, bool]]:
867
+ """Path atoms for a custom mapping annotation (AnnotationRef only; Layer A)."""
868
+ out: list[tuple[str, str, float, bool]] = []
869
+ for key in ("path", "value"):
870
+ if key not in ann.arguments:
871
+ continue
872
+ v = ann.arguments[key]
873
+ vk = ann.argument_kinds.get(key)
874
+ if not v:
875
+ continue
876
+ out.append(_route_path_atom(v, vk))
877
+ if not out:
878
+ out.append(_route_path_atom("", "string"))
879
+ return out
880
+
881
+
882
+ def _http_methods_for_ann_ref(ann: AnnotationRef, template: str) -> list[str]:
883
+ if template == "GetMapping":
884
+ return ["GET"]
885
+ if template == "PostMapping":
886
+ return ["POST"]
887
+ if template == "PutMapping":
888
+ return ["PUT"]
889
+ if template == "DeleteMapping":
890
+ return ["DELETE"]
891
+ if template == "PatchMapping":
892
+ return ["PATCH"]
893
+ if template == "RequestMapping":
894
+ raw = ann.arguments.get("method")
895
+ mk = ann.argument_kinds.get("method")
896
+ if raw and mk == "enum":
897
+ return [raw.rsplit(".", 1)[-1].upper()]
898
+ return [""]
899
+ return [""]
900
+
901
+
902
+ def _layer_a_route_decls_from_ann(
903
+ ann: AnnotationRef,
904
+ meta_chain: dict[str, frozenset[str]],
905
+ *,
906
+ method_fqn: str,
907
+ method_sig: str,
908
+ filename: str,
909
+ start_line: int,
910
+ end_line: int,
911
+ ) -> list[RouteDecl]:
912
+ """Synthetic HTTP routes from custom annotations whose meta-chain hits Spring mappings."""
913
+ if ann.name in _ROUTE_HTTP_MAPPING_NAMES:
914
+ return []
915
+ chain = meta_chain.get(ann.name, frozenset())
916
+ http_hits = sorted(chain & _ROUTE_HTTP_MAPPING_NAMES)
917
+ if not http_hits:
918
+ return []
919
+ template = http_hits[0]
920
+ path_atoms = _http_paths_from_ann_ref(ann)
921
+ methods = _http_methods_for_ann_ref(ann, template)
922
+ out: list[RouteDecl] = []
923
+ for raw_path, strat, conf, res in path_atoms:
924
+ for hm in methods:
925
+ out.append(
926
+ RouteDecl(
927
+ method_fqn=method_fqn,
928
+ method_sig=method_sig,
929
+ kind="http_endpoint",
930
+ framework="spring_mvc",
931
+ http_method=hm,
932
+ path=raw_path,
933
+ topic="",
934
+ broker="",
935
+ feign_name="",
936
+ feign_url="",
937
+ resolution_strategy=strat,
938
+ confidence=conf,
939
+ resolved=res,
940
+ filename=filename,
941
+ start_line=start_line,
942
+ end_line=end_line,
943
+ route_source_layer="layer_a_meta",
944
+ ),
945
+ )
946
+ return out
947
+
948
+
949
+ def _merge_layer_c_codebase_routes(
950
+ working: list[RouteDecl],
951
+ layer_c: list[RouteDecl],
952
+ ) -> list[RouteDecl]:
953
+ """Layer C — brownfield in-source routes win over same-method auto extraction.
954
+
955
+ HTTP: any `@CodebaseHttpRoute` for a method drops same-method **built-in** HTTP
956
+ rows (typically `@GetMapping`), then layer C HTTP rows are appended so the
957
+ brownfield path/method is authoritative (no field merge onto surviving built-ins).
958
+ Async: any `@CodebaseAsyncRoute` (`kafka_topic`) for a method drops same-method
959
+ **built-in** `kafka_topic` rows (typically `@KafkaListener`), then layer C rows
960
+ are merged/appended so the brownfield topic is authoritative over auto extraction.
961
+ """
962
+ if not layer_c:
963
+ return working
964
+ merged = [replace(r) for r in working]
965
+ async_override_mf = {
966
+ cr.method_fqn
967
+ for cr in layer_c
968
+ if cr.kind in _LAYER_C_ASYNC_REPLACES_BUILTIN_KINDS
969
+ }
970
+ if async_override_mf:
971
+ merged = [
972
+ r
973
+ for r in merged
974
+ if not (
975
+ r.method_fqn in async_override_mf
976
+ and r.kind in _LAYER_C_ASYNC_REPLACES_BUILTIN_KINDS
977
+ and r.route_source_layer == "builtin"
978
+ )
979
+ ]
980
+ http_override_mf = {
981
+ cr.method_fqn for cr in layer_c if cr.kind in _HTTP_ROUTE_KINDS
982
+ }
983
+ if http_override_mf:
984
+ merged = [
985
+ r
986
+ for r in merged
987
+ if not (
988
+ r.method_fqn in http_override_mf
989
+ and r.kind in _HTTP_ROUTE_KINDS
990
+ and r.route_source_layer == "builtin"
991
+ )
992
+ ]
993
+ for cr in sorted(layer_c, key=lambda x: (x.path, x.http_method, x.topic)):
994
+ if cr.kind in _HTTP_ROUTE_KINDS:
995
+ merged.append(replace(cr))
996
+ continue
997
+ placed = False
998
+ for i, r in enumerate(merged):
999
+ if (
1000
+ r.kind in _HTTP_ROUTE_KINDS
1001
+ and cr.kind in _HTTP_ROUTE_KINDS
1002
+ and r.method_fqn == cr.method_fqn
1003
+ ):
1004
+ merged[i] = replace(
1005
+ r,
1006
+ path=cr.path if cr.path else r.path,
1007
+ http_method=cr.http_method if cr.http_method else r.http_method,
1008
+ framework=cr.framework if cr.framework else r.framework,
1009
+ kind=cr.kind if cr.kind else r.kind,
1010
+ topic=cr.topic if cr.topic else r.topic,
1011
+ broker=cr.broker if cr.broker else r.broker,
1012
+ resolution_strategy="codebase_route",
1013
+ confidence=cr.confidence,
1014
+ resolved=cr.resolved,
1015
+ route_source_layer="layer_c_source",
1016
+ )
1017
+ placed = True
1018
+ break
1019
+ if not placed:
1020
+ merged.append(replace(cr))
1021
+ return merged
1022
+
1023
+
1024
+ def _apply_layer_b_fqn(
1025
+ working: list[RouteDecl],
1026
+ hint: RouteHint,
1027
+ *,
1028
+ method_fqn: str,
1029
+ method_sig: str,
1030
+ filename: str,
1031
+ start_line: int,
1032
+ end_line: int,
1033
+ ) -> list[RouteDecl]:
1034
+ """Layer B fqn — last writer; merges onto existing routes or seeds one."""
1035
+ if not working:
1036
+ return [
1037
+ _route_decl_from_route_hint(
1038
+ hint,
1039
+ method_fqn=method_fqn,
1040
+ method_sig=method_sig,
1041
+ filename=filename,
1042
+ start_line=start_line,
1043
+ end_line=end_line,
1044
+ source_layer="layer_b_fqn",
1045
+ ),
1046
+ ]
1047
+ out: list[RouteDecl] = []
1048
+ for r in working:
1049
+ out.append(
1050
+ replace(
1051
+ r,
1052
+ framework=hint.framework or r.framework,
1053
+ kind=hint.kind or r.kind,
1054
+ path=hint.path or r.path,
1055
+ http_method=hint.method or r.http_method,
1056
+ topic=hint.topic or r.topic,
1057
+ broker=hint.broker or r.broker,
1058
+ route_source_layer="layer_b_fqn",
1059
+ ),
1060
+ )
1061
+ return out
1062
+
1063
+
1064
+ def resolve_routes_for_method(
1065
+ *,
1066
+ method_decl: MethodDecl,
1067
+ enclosing_type: TypeDecl,
1068
+ overrides: BrownfieldOverrides,
1069
+ meta_chain: dict[str, frozenset[str]] | None,
1070
+ builtin_routes: list[RouteDecl],
1071
+ ) -> list[RouteDecl]:
1072
+ """Compose built-in route extraction with brownfield overrides (single execution order).
1073
+
1074
+ Mirrors ``resolve_role_and_capabilities`` layering; see ``PLAN-TIER1-COMPLETION``
1075
+ § PR-A3. Steps run **in order**; later steps override per field on the same
1076
+ route where applicable.
1077
+
1078
+ 1. Built-in routes from ``_collect_routes`` (excluding ``@CodebaseRoute`` stubs)
1079
+ 2. Layer B — ``route_overrides.annotations`` (annotation FQN or simple name)
1080
+ 3. Layer A — meta-annotation walk via ``collect_annotation_meta_chain``
1081
+ 4. Layer C — in-source ``@CodebaseHttpRoute`` / ``@CodebaseAsyncRoute`` (and
1082
+ legacy ``@CodebaseRoute``) from parse; async layer C drops built-in
1083
+ ``kafka_topic`` rows for the same method before merge
1084
+ 5. Layer B — ``route_overrides.fqn`` (outermost; merges onto every route)
1085
+ """
1086
+ method_fqn = f"{enclosing_type.fqn}#{method_decl.signature}"
1087
+ filename = builtin_routes[0].filename if builtin_routes else ""
1088
+ sl, el = method_decl.start_line, method_decl.end_line
1089
+
1090
+ # In-source brownfield: `@CodebaseHttpRoute` marks `codebase_route`; async uses
1091
+ # the topic atom strategy but always sets `route_source_layer=layer_c_source`.
1092
+ builtins_only = [
1093
+ r
1094
+ for r in builtin_routes
1095
+ if r.route_source_layer != "layer_c_source" and r.resolution_strategy != "codebase_route"
1096
+ ]
1097
+ layer_c_src = [
1098
+ r
1099
+ for r in builtin_routes
1100
+ if r.route_source_layer == "layer_c_source" or r.resolution_strategy == "codebase_route"
1101
+ ]
1102
+
1103
+ working: list[RouteDecl] = [
1104
+ replace(r, route_source_layer="builtin") for r in builtins_only
1105
+ ]
1106
+
1107
+ combined_anns: list[tuple[bool, AnnotationRef]] = sorted(
1108
+ [(False, a) for a in enclosing_type.annotations]
1109
+ + [(True, a) for a in method_decl.annotations],
1110
+ key=lambda t: (t[1].name, t[1].qualified, t[0]),
1111
+ )
1112
+ if any(a.name in {"CodebaseRoute", "CodebaseRoutes"} for _m, a in combined_anns):
1113
+ print(
1114
+ "[lancedb-mcp] v1 brownfield annotation detected; migrate to "
1115
+ "CodebaseHttpRoute / CodebaseAsyncRoute / CodebaseHttpClient",
1116
+ file=sys.stderr,
1117
+ )
1118
+
1119
+ # ----- Step 2: Layer B — annotation route hints -----
1120
+ for _is_m, ann in combined_anns:
1121
+ hint = _route_hint_lookup(ann, overrides.annotation_to_route_hint)
1122
+ if hint is None:
1123
+ continue
1124
+ working.append(
1125
+ _route_decl_from_route_hint(
1126
+ hint,
1127
+ method_fqn=method_fqn,
1128
+ method_sig=method_decl.signature,
1129
+ filename=filename,
1130
+ start_line=sl,
1131
+ end_line=el,
1132
+ source_layer="layer_b_ann",
1133
+ ),
1134
+ )
1135
+
1136
+ # ----- Step 3: Layer A — meta-linked custom mapping annotations -----
1137
+ if meta_chain is not None:
1138
+ seen_a: set[tuple[str, str]] = set()
1139
+ for _is_m, ann in combined_anns:
1140
+ key = (ann.name, ann.qualified)
1141
+ if key in seen_a:
1142
+ continue
1143
+ extra = _layer_a_route_decls_from_ann(
1144
+ ann,
1145
+ meta_chain,
1146
+ method_fqn=method_fqn,
1147
+ method_sig=method_decl.signature,
1148
+ filename=filename,
1149
+ start_line=sl,
1150
+ end_line=el,
1151
+ )
1152
+ if extra:
1153
+ seen_a.add(key)
1154
+ working.extend(extra)
1155
+
1156
+ # ----- Step 4: Layer C — in-source @CodebaseRoute -----
1157
+ working = _merge_layer_c_codebase_routes(working, layer_c_src)
1158
+
1159
+ # ----- Step 5: Layer B — per-type FQN route hint -----
1160
+ fh = overrides.fqn_to_route_hint.get(enclosing_type.fqn)
1161
+ if fh is not None:
1162
+ working = _apply_layer_b_fqn(
1163
+ working,
1164
+ fh,
1165
+ method_fqn=method_fqn,
1166
+ method_sig=method_decl.signature,
1167
+ filename=filename,
1168
+ start_line=sl,
1169
+ end_line=el,
1170
+ )
1171
+
1172
+ return working
1173
+
1174
+
1175
+ def _client_hint_lookup(
1176
+ ann: AnnotationRef,
1177
+ hints: dict[str, HttpClientHint],
1178
+ ) -> HttpClientHint | None:
1179
+ q = ann.qualified.strip()
1180
+ if q in hints:
1181
+ return hints[q]
1182
+ if ann.name in hints:
1183
+ return hints[ann.name]
1184
+ for k, h in sorted(hints.items(), key=lambda kv: kv[0]):
1185
+ if k.endswith("." + ann.name):
1186
+ return h
1187
+ return None
1188
+
1189
+
1190
+ def _async_hint_lookup(
1191
+ ann: AnnotationRef,
1192
+ hints: dict[str, AsyncProducerHint],
1193
+ ) -> AsyncProducerHint | None:
1194
+ q = ann.qualified.strip()
1195
+ if q in hints:
1196
+ return hints[q]
1197
+ if ann.name in hints:
1198
+ return hints[ann.name]
1199
+ for k, h in sorted(hints.items(), key=lambda kv: kv[0]):
1200
+ if k.endswith("." + ann.name):
1201
+ return h
1202
+ return None
1203
+
1204
+
1205
+ def _call_from_http_hint(
1206
+ *,
1207
+ hint: HttpClientHint,
1208
+ base_call: OutgoingCallDecl | None,
1209
+ method_decl: MethodDecl,
1210
+ enclosing_type: TypeDecl,
1211
+ source_layer: str,
1212
+ ) -> OutgoingCallDecl:
1213
+ filename = base_call.filename if base_call is not None else ""
1214
+ start_line = base_call.start_line if base_call is not None else method_decl.start_line
1215
+ end_line = base_call.end_line if base_call is not None else method_decl.end_line
1216
+ method_fqn = (
1217
+ base_call.method_fqn if base_call is not None else f"{enclosing_type.fqn}#{method_decl.signature}"
1218
+ )
1219
+ method_sig = base_call.method_sig if base_call is not None else method_decl.signature
1220
+ return OutgoingCallDecl(
1221
+ method_fqn=method_fqn,
1222
+ method_sig=method_sig,
1223
+ client_kind=hint.client_kind or (base_call.client_kind if base_call else ""),
1224
+ channel="http",
1225
+ feign_target_name=hint.target_service or (base_call.feign_target_name if base_call else ""),
1226
+ feign_target_url=base_call.feign_target_url if base_call else "",
1227
+ path_template_call=hint.path or (base_call.path_template_call if base_call else ""),
1228
+ method_call=hint.method or (base_call.method_call if base_call else ""),
1229
+ topic_call="",
1230
+ broker_call="",
1231
+ raw_uri=(base_call.raw_uri if base_call else (hint.path or "")),
1232
+ raw_topic="",
1233
+ resolution_strategy=source_layer,
1234
+ confidence_base=1.0,
1235
+ resolved=True,
1236
+ filename=filename,
1237
+ start_line=start_line,
1238
+ end_line=end_line,
1239
+ )
1240
+
1241
+
1242
+ def _call_from_async_hint(
1243
+ *,
1244
+ hint: AsyncProducerHint,
1245
+ base_call: OutgoingCallDecl | None,
1246
+ method_decl: MethodDecl,
1247
+ enclosing_type: TypeDecl,
1248
+ source_layer: str,
1249
+ ) -> OutgoingCallDecl:
1250
+ filename = base_call.filename if base_call is not None else ""
1251
+ start_line = base_call.start_line if base_call is not None else method_decl.start_line
1252
+ end_line = base_call.end_line if base_call is not None else method_decl.end_line
1253
+ method_fqn = (
1254
+ base_call.method_fqn if base_call is not None else f"{enclosing_type.fqn}#{method_decl.signature}"
1255
+ )
1256
+ method_sig = base_call.method_sig if base_call is not None else method_decl.signature
1257
+ return OutgoingCallDecl(
1258
+ method_fqn=method_fqn,
1259
+ method_sig=method_sig,
1260
+ client_kind=hint.client_kind or (base_call.client_kind if base_call else ""),
1261
+ channel="async",
1262
+ feign_target_name="",
1263
+ feign_target_url="",
1264
+ path_template_call="",
1265
+ method_call="",
1266
+ topic_call=hint.topic or (base_call.topic_call if base_call else ""),
1267
+ broker_call=hint.broker or (base_call.broker_call if base_call else ""),
1268
+ raw_uri="",
1269
+ raw_topic=(base_call.raw_topic if base_call else (hint.topic or "")),
1270
+ resolution_strategy=source_layer,
1271
+ confidence_base=1.0,
1272
+ resolved=True,
1273
+ filename=filename,
1274
+ start_line=start_line,
1275
+ end_line=end_line,
1276
+ )
1277
+
1278
+
1279
+ def resolve_http_client_for_method(
1280
+ *,
1281
+ method_decl: MethodDecl,
1282
+ enclosing_type: TypeDecl,
1283
+ overrides: BrownfieldOverrides,
1284
+ meta_chain: dict[str, frozenset[str]] | None,
1285
+ builtin_calls: list[OutgoingCallDecl],
1286
+ ) -> list[OutgoingCallDecl]:
1287
+ builtins_only = [c for c in builtin_calls if c.resolution_strategy != "codebase_client"]
1288
+ layer_c_src = [c for c in builtin_calls if c.resolution_strategy == "codebase_client"]
1289
+ combined_anns: list[tuple[bool, AnnotationRef]] = sorted(
1290
+ [(False, a) for a in enclosing_type.annotations]
1291
+ + [(True, a) for a in method_decl.annotations],
1292
+ key=lambda t: (t[1].name, t[1].qualified, t[0]),
1293
+ )
1294
+ builtin_http = [c for c in builtins_only if c.channel == "http"]
1295
+ brownfield_calls: list[OutgoingCallDecl] = []
1296
+ anchor = builtin_http[0] if builtin_http else (layer_c_src[0] if layer_c_src else None)
1297
+
1298
+ for _is_m, ann in combined_anns:
1299
+ hint = _client_hint_lookup(ann, overrides.annotation_to_http_client_hint)
1300
+ if hint is None:
1301
+ continue
1302
+ brownfield_calls.append(
1303
+ _call_from_http_hint(
1304
+ hint=hint,
1305
+ base_call=anchor,
1306
+ method_decl=method_decl,
1307
+ enclosing_type=enclosing_type,
1308
+ source_layer="layer_b_ann",
1309
+ ),
1310
+ )
1311
+
1312
+ if meta_chain is not None:
1313
+ seen_a: set[tuple[str, str]] = set()
1314
+ for _is_m, ann in combined_anns:
1315
+ key = (ann.name, ann.qualified)
1316
+ if key in seen_a:
1317
+ continue
1318
+ if ann.name in CODEBASE_HTTP_CLIENT_ANNOTATIONS:
1319
+ continue
1320
+ chain = meta_chain.get(ann.name, frozenset())
1321
+ if "CodebaseHttpClient" not in chain and "CodebaseHttpClients" not in chain:
1322
+ continue
1323
+ hint = overrides.annotation_to_http_client_hint.get("CodebaseHttpClient")
1324
+ if hint is None:
1325
+ hint = HttpClientHint(
1326
+ client_kind=anchor.client_kind if anchor else "rest_template",
1327
+ target_service=anchor.feign_target_name if anchor else "",
1328
+ path=anchor.path_template_call if anchor else "",
1329
+ method=anchor.method_call if anchor else "",
1330
+ )
1331
+ seen_a.add(key)
1332
+ brownfield_calls.append(
1333
+ _call_from_http_hint(
1334
+ hint=hint,
1335
+ base_call=anchor,
1336
+ method_decl=method_decl,
1337
+ enclosing_type=enclosing_type,
1338
+ source_layer="layer_a_meta",
1339
+ ),
1340
+ )
1341
+
1342
+ for c in layer_c_src:
1343
+ if c.channel == "http":
1344
+ brownfield_calls.append(replace(c, resolution_strategy="layer_c_source"))
1345
+
1346
+ fh = overrides.fqn_to_http_client_hint.get(enclosing_type.fqn)
1347
+ if fh is not None:
1348
+ if not brownfield_calls:
1349
+ brownfield_calls.append(
1350
+ _call_from_http_hint(
1351
+ hint=fh,
1352
+ base_call=anchor,
1353
+ method_decl=method_decl,
1354
+ enclosing_type=enclosing_type,
1355
+ source_layer="layer_b_fqn",
1356
+ ),
1357
+ )
1358
+ else:
1359
+ brownfield_calls = [
1360
+ _call_from_http_hint(
1361
+ hint=fh,
1362
+ base_call=c,
1363
+ method_decl=method_decl,
1364
+ enclosing_type=enclosing_type,
1365
+ source_layer="layer_b_fqn",
1366
+ ) for c in brownfield_calls
1367
+ ]
1368
+ return brownfield_calls if brownfield_calls else builtin_http
1369
+
1370
+
1371
+ def resolve_async_producer_for_method(
1372
+ *,
1373
+ method_decl: MethodDecl,
1374
+ enclosing_type: TypeDecl,
1375
+ overrides: BrownfieldOverrides,
1376
+ meta_chain: dict[str, frozenset[str]] | None,
1377
+ builtin_calls: list[OutgoingCallDecl],
1378
+ ) -> list[OutgoingCallDecl]:
1379
+ builtins_only = [c for c in builtin_calls if c.resolution_strategy != "codebase_producer"]
1380
+ layer_c_src = [c for c in builtin_calls if c.resolution_strategy == "codebase_producer"]
1381
+ combined_anns: list[tuple[bool, AnnotationRef]] = sorted(
1382
+ [(False, a) for a in enclosing_type.annotations]
1383
+ + [(True, a) for a in method_decl.annotations],
1384
+ key=lambda t: (t[1].name, t[1].qualified, t[0]),
1385
+ )
1386
+ builtin_async = [c for c in builtins_only if c.channel == "async"]
1387
+ brownfield_calls: list[OutgoingCallDecl] = []
1388
+ anchor = builtin_async[0] if builtin_async else (layer_c_src[0] if layer_c_src else None)
1389
+
1390
+ for _is_m, ann in combined_anns:
1391
+ hint = _async_hint_lookup(ann, overrides.annotation_to_async_producer_hint)
1392
+ if hint is None:
1393
+ continue
1394
+ brownfield_calls.append(
1395
+ _call_from_async_hint(
1396
+ hint=hint,
1397
+ base_call=anchor,
1398
+ method_decl=method_decl,
1399
+ enclosing_type=enclosing_type,
1400
+ source_layer="layer_b_ann",
1401
+ ),
1402
+ )
1403
+
1404
+ if meta_chain is not None:
1405
+ seen_a: set[tuple[str, str]] = set()
1406
+ for _is_m, ann in combined_anns:
1407
+ key = (ann.name, ann.qualified)
1408
+ if key in seen_a:
1409
+ continue
1410
+ if ann.name in CODEBASE_PRODUCER_ANNOTATIONS:
1411
+ continue
1412
+ chain = meta_chain.get(ann.name, frozenset())
1413
+ if "CodebaseProducer" not in chain and "CodebaseProducers" not in chain:
1414
+ continue
1415
+ hint = overrides.annotation_to_async_producer_hint.get("CodebaseProducer")
1416
+ if hint is None:
1417
+ hint = AsyncProducerHint(
1418
+ client_kind=anchor.client_kind if anchor else "kafka_send",
1419
+ topic=anchor.topic_call if anchor else "",
1420
+ broker=anchor.broker_call if anchor else "",
1421
+ )
1422
+ seen_a.add(key)
1423
+ brownfield_calls.append(
1424
+ _call_from_async_hint(
1425
+ hint=hint,
1426
+ base_call=anchor,
1427
+ method_decl=method_decl,
1428
+ enclosing_type=enclosing_type,
1429
+ source_layer="layer_a_meta",
1430
+ ),
1431
+ )
1432
+
1433
+ for c in layer_c_src:
1434
+ if c.channel == "async":
1435
+ brownfield_calls.append(replace(c, resolution_strategy="layer_c_source"))
1436
+
1437
+ fh = overrides.fqn_to_async_producer_hint.get(enclosing_type.fqn)
1438
+ if fh is not None:
1439
+ if not brownfield_calls:
1440
+ brownfield_calls.append(
1441
+ _call_from_async_hint(
1442
+ hint=fh,
1443
+ base_call=anchor,
1444
+ method_decl=method_decl,
1445
+ enclosing_type=enclosing_type,
1446
+ source_layer="layer_b_fqn",
1447
+ ),
1448
+ )
1449
+ else:
1450
+ brownfield_calls = [
1451
+ _call_from_async_hint(
1452
+ hint=fh,
1453
+ base_call=c,
1454
+ method_decl=method_decl,
1455
+ enclosing_type=enclosing_type,
1456
+ source_layer="layer_b_fqn",
1457
+ ) for c in brownfield_calls
1458
+ ]
1459
+ return brownfield_calls if brownfield_calls else builtin_async
1460
+
1461
+
1462
+ def _resolve_with_root(
1463
+ file_path: str, project_root: str | Path | None,
1464
+ ) -> tuple[Path, Path | None]:
1465
+ p = Path(file_path)
1466
+ if project_root is None:
1467
+ try:
1468
+ return p.resolve(), None
1469
+ except OSError:
1470
+ return p, None
1471
+ root = Path(project_root).resolve()
1472
+ try:
1473
+ p_abs = (root / p).resolve() if not p.is_absolute() else p.resolve()
1474
+ except OSError:
1475
+ p_abs = p
1476
+ return p_abs, root
1477
+
1478
+
1479
+ def _bounded_parents(p: Path, root: Path | None) -> list[Path]:
1480
+ """Parents of `p`, stopping at (and not crossing above) `root`."""
1481
+ try:
1482
+ parents = list(p.parents)
1483
+ except OSError:
1484
+ return []
1485
+ if root is None:
1486
+ return parents
1487
+ bounded: list[Path] = []
1488
+ for parent in parents:
1489
+ bounded.append(parent)
1490
+ if parent == root:
1491
+ break
1492
+ return bounded
1493
+
1494
+
1495
+ def _has_build_marker(directory: Path) -> bool:
1496
+ for marker in BUILD_MARKERS:
1497
+ if (directory / marker).is_file():
1498
+ return True
1499
+ return False
1500
+
1501
+
1502
+ def module_for_path(file_path: str, project_root: str | Path | None = None) -> str:
1503
+ """Innermost build-marker ancestor's directory name.
1504
+
1505
+ Returns "" when no build marker is found between the file and
1506
+ `project_root` (inclusive).
1507
+ """
1508
+ p, root = _resolve_with_root(file_path, project_root)
1509
+ for parent in _bounded_parents(p, root):
1510
+ if _has_build_marker(parent):
1511
+ return parent.name
1512
+ return ""
1513
+
1514
+
1515
+ def microservice_for_path(
1516
+ file_path: str, project_root: str | Path | None = None,
1517
+ ) -> str:
1518
+ """Outermost build-marker ancestor under `project_root`.
1519
+
1520
+ Resolution order, first hit wins:
1521
+
1522
+ 1. Explicit override (env var + config file). The override is a list
1523
+ of directory names; the first one that appears in the file's
1524
+ ancestry (under `project_root`) wins.
1525
+ 2. Outermost build-marker ancestor between `project_root` and `file`
1526
+ (i.e. the build marker closest to `project_root`).
1527
+ 3. First path segment under `project_root`.
1528
+ 4. "" — when none of the above apply (typically: file *is*
1529
+ `project_root`, or `project_root` is None and the file path
1530
+ has no parents).
1531
+ """
1532
+ p, root = _resolve_with_root(file_path, project_root)
1533
+ parents = _bounded_parents(p, root)
1534
+
1535
+ overrides = load_microservice_overrides(project_root)
1536
+ if overrides:
1537
+ # Walk from outermost to innermost so a nested override (rare)
1538
+ # still works when the user lists a deeper directory.
1539
+ override_set = set(overrides)
1540
+ for parent in reversed(parents):
1541
+ if parent.name in override_set:
1542
+ return parent.name
1543
+ # Fall through to structural inference if no override matched.
1544
+
1545
+ outermost_marker: Path | None = None
1546
+ for parent in parents:
1547
+ if _has_build_marker(parent):
1548
+ outermost_marker = parent
1549
+ if outermost_marker is not None and (root is None or outermost_marker != root):
1550
+ return outermost_marker.name
1551
+
1552
+ if root is not None:
1553
+ # First path segment under `root`. parents are ordered
1554
+ # innermost-first; the candidate is the parent immediately
1555
+ # below `root`.
1556
+ for parent in parents:
1557
+ try:
1558
+ rel = parent.relative_to(root)
1559
+ except ValueError:
1560
+ continue
1561
+ parts = rel.parts
1562
+ if len(parts) == 1:
1563
+ return parts[0]
1564
+
1565
+ return ""
1566
+
1567
+
1568
+ # ---------- chunk enrichment ----------
1569
+
1570
+
1571
+ def _flatten_types(ast: JavaFileAst) -> list[TypeDecl]:
1572
+ return list(ast.all_types)
1573
+
1574
+
1575
+ def _enclosing_type(ast: JavaFileAst, start: int, end: int) -> TypeDecl | None:
1576
+ """Smallest TypeDecl whose [start_byte, end_byte] contains chunk range.
1577
+
1578
+ Falls back to largest overlap if nothing fully encloses.
1579
+ """
1580
+ best: TypeDecl | None = None
1581
+ best_span = -1
1582
+ for t in _flatten_types(ast):
1583
+ if t.start_byte <= start and end <= t.end_byte:
1584
+ span = t.end_byte - t.start_byte
1585
+ if best is None or span < best_span or best_span < 0:
1586
+ best = t
1587
+ best_span = span
1588
+ if best is not None:
1589
+ return best
1590
+
1591
+ overlap_best: TypeDecl | None = None
1592
+ overlap_size = 0
1593
+ for t in _flatten_types(ast):
1594
+ o = max(0, min(end, t.end_byte) - max(start, t.start_byte))
1595
+ if o > overlap_size:
1596
+ overlap_size = o
1597
+ overlap_best = t
1598
+ return overlap_best
1599
+
1600
+
1601
+ def _symbols_in_range(ast: JavaFileAst, start: int, end: int) -> list[str]:
1602
+ out: list[str] = []
1603
+ seen: set[str] = set()
1604
+ for t in _flatten_types(ast):
1605
+ if t.end_byte < start or t.start_byte > end:
1606
+ continue
1607
+ if start <= t.start_byte <= end and t.name not in seen:
1608
+ out.append(t.name)
1609
+ seen.add(t.name)
1610
+ for f in t.fields:
1611
+ if start <= f.start_byte <= end and f.name not in seen:
1612
+ out.append(f.name)
1613
+ seen.add(f.name)
1614
+ for m in t.methods:
1615
+ if start <= m.start_byte <= end and m.name not in seen:
1616
+ out.append(m.name)
1617
+ seen.add(m.name)
1618
+ return out
1619
+
1620
+
1621
+ def enrich_chunk(
1622
+ ast: JavaFileAst,
1623
+ *,
1624
+ chunk_start_byte: int,
1625
+ chunk_end_byte: int,
1626
+ file_path: str,
1627
+ project_root: str | Path | None = None,
1628
+ ) -> ChunkEnrichment:
1629
+ """Compute enrichment metadata for a single chunk of a parsed Java file."""
1630
+ module = module_for_path(file_path, project_root)
1631
+ microservice = microservice_for_path(file_path, project_root)
1632
+ encl = _enclosing_type(ast, chunk_start_byte, chunk_end_byte)
1633
+ if encl is not None:
1634
+ ann_names = [a.name for a in encl.annotations]
1635
+ prs: str | None = None
1636
+ if project_root is not None:
1637
+ try:
1638
+ prs = str(Path(project_root).resolve())
1639
+ except OSError:
1640
+ prs = str(project_root)
1641
+ bov = load_brownfield_overrides(project_root)
1642
+ mchain = collect_annotation_meta_chain(prs) if prs else None
1643
+ role, cap_list = resolve_role_and_capabilities(
1644
+ encl,
1645
+ overrides=bov,
1646
+ meta_chain=mchain,
1647
+ )
1648
+ return ChunkEnrichment(
1649
+ package=ast.package,
1650
+ module=module,
1651
+ microservice=microservice,
1652
+ primary_type_fqn=encl.fqn,
1653
+ primary_type_kind=encl.kind,
1654
+ role=role,
1655
+ annotations_on_type=ann_names,
1656
+ symbols=_symbols_in_range(ast, chunk_start_byte, chunk_end_byte),
1657
+ capabilities=cap_list,
1658
+ )
1659
+ return ChunkEnrichment(
1660
+ package=ast.package,
1661
+ module=module,
1662
+ microservice=microservice,
1663
+ primary_type_fqn="",
1664
+ primary_type_kind="",
1665
+ role="OTHER",
1666
+ annotations_on_type=[],
1667
+ symbols=_symbols_in_range(ast, chunk_start_byte, chunk_end_byte),
1668
+ capabilities=[],
1669
+ )
1670
+
1671
+
1672
+ def symbol_id(kind: str, fqn: str, file_path: str = "", start_byte: int = 0) -> str:
1673
+ """Deterministic SHA1-based id for Kuzu Symbol nodes."""
1674
+ key = f"{kind}|{fqn}|{file_path}|{start_byte}".encode("utf-8")
1675
+ return hashlib.sha1(key).hexdigest()
1676
+
1677
+
1678
+ def phantom_id(simple_or_fqn: str) -> str:
1679
+ """Id for unresolved/external type targets (phantom Symbol rows)."""
1680
+ key = f"class|__phantom.{simple_or_fqn}|".encode("utf-8")
1681
+ return hashlib.sha1(key).hexdigest()