java-codebase-rag 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ladybug_queries.py ADDED
@@ -0,0 +1,1995 @@
1
+ """Read-only Cypher helpers over the Ladybug AST graph built by `build_ast_graph.py`.
2
+
3
+ Each function opens a Ladybug connection on demand and returns plain JSON-ish dicts
4
+ so the MCP server can serialize them without further mapping.
5
+
6
+ The Ladybug database is opened read-only and cached per-process. This module is
7
+ intentionally dependency-light: nothing here imports LanceDB or sentence-transformers.
8
+
9
+ Cypher pitfalls (see also ``AGENTS.md``): avoid ``label(e) IN $list`` in ``WHERE`` for
10
+ relationship-type filters; use OR of ``label(e) = $param`` with bound parameters.
11
+ Typed unions ``-[e:A|B]-`` require every ``RETURN`` column on ``e`` to exist on all
12
+ listed rel types, or the binder may fail.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ import os
19
+ import re
20
+ import threading
21
+ from dataclasses import asdict, dataclass
22
+ from pathlib import Path
23
+ from typing import Any, Literal
24
+
25
+ import ladybug
26
+
27
+ from ast_java import ONTOLOGY_VERSION as _ONTOLOGY_VERSION
28
+
29
+ log = logging.getLogger(__name__)
30
+
31
+
32
+ def _parse_ladybug_json(raw: str | None) -> dict[str, Any]:
33
+ """Parse JSON from LadybugDB which returns unquoted keys like {key: value}."""
34
+ if not raw:
35
+ return {}
36
+ # LadybugDB returns JSON without quotes around keys: {packages: 1, files: 2}
37
+ # Convert to standard JSON: {"packages": 1, "files": 2}
38
+ # This regex matches word characters followed by ':' at the start of a key
39
+ quoted = re.sub(r'(\w+):', r'"\1":', raw)
40
+ try:
41
+ return json.loads(quoted)
42
+ except Exception:
43
+ try:
44
+ # Fallback: try parsing as-is (for standard JSON)
45
+ return json.loads(raw)
46
+ except Exception:
47
+ log.warning("Failed to parse counts_json: %s", raw[:100])
48
+ return {}
49
+
50
+ # Composed describe / neighbors dot-keys (not stored graph edge labels).
51
+ _MEMBER_EDGE_COMPOSED_REL_MAP: tuple[tuple[str, str], ...] = (
52
+ ("DECLARES.DECLARES_CLIENT", "DECLARES_CLIENT"),
53
+ ("DECLARES.DECLARES_PRODUCER", "DECLARES_PRODUCER"),
54
+ ("DECLARES.EXPOSES", "EXPOSES"),
55
+ )
56
+ _MEMBER_EDGE_COMPOSED_REL_BY_KEY: dict[str, str] = dict(_MEMBER_EDGE_COMPOSED_REL_MAP)
57
+
58
+ _OVERRIDE_AXIS_COMPOSED_REL_MAP: tuple[tuple[str, str | None], ...] = (
59
+ ("OVERRIDDEN_BY", None),
60
+ ("OVERRIDDEN_BY.DECLARES_CLIENT", "DECLARES_CLIENT"),
61
+ ("OVERRIDDEN_BY.DECLARES_PRODUCER", "DECLARES_PRODUCER"),
62
+ ("OVERRIDDEN_BY.EXPOSES", "EXPOSES"),
63
+ )
64
+ _OVERRIDE_AXIS_COMPOSED_REL_BY_KEY: dict[str, str | None] = dict(_OVERRIDE_AXIS_COMPOSED_REL_MAP)
65
+ OVERRIDE_AXIS_COMPOSED_EDGE_TYPES: frozenset[str] = frozenset(_OVERRIDE_AXIS_COMPOSED_REL_BY_KEY)
66
+
67
+
68
+ def _coerce_id_list(raw: Any) -> list[str]:
69
+ """Normalize Ladybug ``collect(DISTINCT ...)`` list results to string ids."""
70
+ if raw is None:
71
+ return []
72
+ if isinstance(raw, list):
73
+ return [str(x) for x in raw if x is not None and str(x) != ""]
74
+ s = str(raw)
75
+ return [s] if s else []
76
+
77
+
78
+ __all__ = [
79
+ "LadybugGraph",
80
+ "resolve_ladybug_path",
81
+ "SymbolHit",
82
+ "EdgeHit",
83
+ "CallEdge",
84
+ "ViaEdge",
85
+ "StageSymbol",
86
+ "RouteCaller",
87
+ "find_symbols_in_file_range",
88
+ ]
89
+
90
+
91
+ def resolve_ladybug_path(explicit: str | None = None) -> str:
92
+ """Resolve the Ladybug DB path the same way the builder does."""
93
+ if explicit:
94
+ return str(Path(explicit).expanduser())
95
+ idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
96
+ if idx and not idx.startswith(("s3://", "gs://", "az://")):
97
+ return str(Path(os.path.expanduser(idx.rstrip("/"))) / "code_graph.lbug")
98
+ return str((Path.cwd() / ".java-codebase-rag" / "code_graph.lbug").resolve())
99
+
100
+
101
+ @dataclass
102
+ class SymbolHit:
103
+ id: str
104
+ kind: str
105
+ name: str
106
+ fqn: str
107
+ package: str
108
+ module: str
109
+ microservice: str
110
+ filename: str
111
+ start_line: int
112
+ end_line: int
113
+ start_byte: int
114
+ end_byte: int
115
+ modifiers: list[str]
116
+ annotations: list[str]
117
+ capabilities: list[str]
118
+ role: str
119
+ signature: str
120
+ parent_id: str
121
+ resolved: bool
122
+
123
+
124
+ @dataclass
125
+ class EdgeHit:
126
+ type: str # EXTENDS | IMPLEMENTS | INJECTS
127
+ src: SymbolHit
128
+ dst: SymbolHit
129
+ mechanism: str = ""
130
+ annotation: str = ""
131
+ field_or_param: str = ""
132
+ resolved: bool = True
133
+
134
+
135
+ @dataclass
136
+ class CallEdge:
137
+ src: SymbolHit
138
+ dst: SymbolHit
139
+ confidence: float
140
+ strategy: str
141
+ source: str
142
+ call_site_line: int
143
+ call_site_byte: int
144
+ arg_count: int
145
+ resolved: bool
146
+
147
+
148
+ @dataclass
149
+ class ViaEdge:
150
+ """Labelled edge from a previous-stage node to a stage symbol.
151
+
152
+ Populated by `trace_flow` so callers can see *why* two types ended up
153
+ in the same chain (e.g. `INJECTS` vs `IMPLEMENTS` vs `CALLS`) and at what hop
154
+ from the frontier they were reached.
155
+ """
156
+ edge_type: str # INJECTS | EXTENDS | IMPLEMENTS | CALLS | HTTP_CALLS | ASYNC_CALLS
157
+ from_fqn: str
158
+ hop: int # 1 = direct neighbour of previous-stage frontier
159
+ caller_node_id: str = "" # Client id when edge_type is HTTP_CALLS (SCHEMA v2)
160
+
161
+
162
+ @dataclass
163
+ class StageSymbol:
164
+ """A trace_flow stage entry: the symbol plus the edges that pulled it in.
165
+
166
+ Stage 0 (seeds) has `via=[]`. Later stages list every first-time path
167
+ from the previous frontier to `symbol`.
168
+ """
169
+ symbol: SymbolHit
170
+ via: list[ViaEdge]
171
+
172
+
173
+ @dataclass
174
+ class RouteCaller:
175
+ caller_node_id: str
176
+ caller_node_kind: Literal["client", "producer"]
177
+ caller_microservice: str
178
+ declaring_symbol_id: str
179
+ confidence: float
180
+ match: str
181
+ target_service: str = ""
182
+ raw_uri: str = ""
183
+ topic: str = ""
184
+ broker: str = ""
185
+
186
+
187
+ def _symbol_return_for(alias: str) -> str:
188
+ """Ladybug RETURN projection for Symbol properties, using the given node alias.
189
+
190
+ Centralised so queries that bind Symbol under a non-`s` alias (e.g. `n` in
191
+ graph-expansion / flow-tracing) don't emit `s.*` references that Ladybug
192
+ rejects with `Variable s is not in scope`.
193
+ """
194
+ return (
195
+ f"{alias}.id AS id, {alias}.kind AS kind, {alias}.name AS name, {alias}.fqn AS fqn, "
196
+ f"{alias}.package AS package, {alias}.module AS module, "
197
+ f"{alias}.microservice AS microservice, {alias}.filename AS filename, "
198
+ f"{alias}.start_line AS start_line, {alias}.end_line AS end_line, "
199
+ f"{alias}.start_byte AS start_byte, {alias}.end_byte AS end_byte, "
200
+ f"{alias}.modifiers AS modifiers, {alias}.annotations AS annotations, "
201
+ f"{alias}.capabilities AS capabilities, "
202
+ f"{alias}.role AS role, {alias}.signature AS signature, "
203
+ f"{alias}.parent_id AS parent_id, {alias}.resolved AS resolved"
204
+ )
205
+
206
+
207
+ _SYMBOL_RETURN = _symbol_return_for("s")
208
+
209
+
210
+ def _scope_filters(
211
+ alias: str,
212
+ *,
213
+ module: str | None,
214
+ microservice: str | None,
215
+ params: dict[str, Any],
216
+ ) -> list[str]:
217
+ """Build module/microservice scoping predicates against a node alias.
218
+
219
+ Mutates `params` to bind `$module` / `$microservice` only when the
220
+ corresponding filter is set, so unused names don't leak into the
221
+ Ladybug plan.
222
+ """
223
+ out: list[str] = []
224
+ if module:
225
+ params["module"] = module
226
+ out.append(f"{alias}.module = $module")
227
+ if microservice:
228
+ params["microservice"] = microservice
229
+ out.append(f"{alias}.microservice = $microservice")
230
+ return out
231
+
232
+
233
+ _EXTERNAL_PREFIXES = (
234
+ "java.",
235
+ "javax.",
236
+ "jakarta.",
237
+ "org.springframework.",
238
+ "lombok.",
239
+ )
240
+
241
+ _EDGE_TYPES: tuple[str, ...] = (
242
+ "EXTENDS",
243
+ "IMPLEMENTS",
244
+ "INJECTS",
245
+ "OVERRIDES",
246
+ "DECLARES",
247
+ "CALLS",
248
+ "EXPOSES",
249
+ "DECLARES_CLIENT",
250
+ "DECLARES_PRODUCER",
251
+ "HTTP_CALLS",
252
+ "ASYNC_CALLS",
253
+ )
254
+
255
+
256
+ def _type_part_fqn(sym_fqn: str) -> str:
257
+ return sym_fqn.split("#", 1)[0]
258
+
259
+
260
+ def _is_external_fqn(fqn: str) -> bool:
261
+ base = _type_part_fqn(fqn)
262
+ return any(base.startswith(p) for p in _EXTERNAL_PREFIXES)
263
+
264
+
265
+ def _row_to_symbol(row: dict[str, Any]) -> SymbolHit:
266
+ return SymbolHit(
267
+ id=row.get("id", "") or "",
268
+ kind=row.get("kind", "") or "",
269
+ name=row.get("name", "") or "",
270
+ fqn=row.get("fqn", "") or "",
271
+ package=row.get("package", "") or "",
272
+ module=row.get("module", "") or "",
273
+ microservice=row.get("microservice", "") or "",
274
+ filename=row.get("filename", "") or "",
275
+ start_line=int(row.get("start_line") or 0),
276
+ end_line=int(row.get("end_line") or 0),
277
+ start_byte=int(row.get("start_byte") or 0),
278
+ end_byte=int(row.get("end_byte") or 0),
279
+ modifiers=list(row.get("modifiers") or []),
280
+ annotations=list(row.get("annotations") or []),
281
+ capabilities=list(row.get("capabilities") or []),
282
+ role=row.get("role", "") or "",
283
+ signature=row.get("signature", "") or "",
284
+ parent_id=row.get("parent_id", "") or "",
285
+ resolved=bool(row.get("resolved", True)),
286
+ )
287
+
288
+
289
+ _SYM_COLS = (
290
+ "id", "kind", "name", "fqn", "package", "module", "microservice",
291
+ "filename", "start_line", "end_line", "start_byte", "end_byte",
292
+ "modifiers", "annotations", "capabilities", "role", "signature", "parent_id", "resolved",
293
+ )
294
+
295
+
296
+ def find_symbols_in_file_range(
297
+ graph: "LadybugGraph",
298
+ *,
299
+ filename: str,
300
+ start_line: int,
301
+ end_line: int,
302
+ ) -> list[SymbolHit]:
303
+ """Return `Symbol` rows overlapping `[start_line, end_line]` in `filename` (1-based, inclusive)."""
304
+ if start_line < 1 or end_line < start_line:
305
+ return []
306
+ q = (
307
+ f"MATCH (s:Symbol) WHERE s.filename = $fn "
308
+ f"AND s.start_line <= $hmax AND s.end_line >= $hmin "
309
+ f"RETURN {_SYMBOL_RETURN} ORDER BY s.start_line, s.end_line"
310
+ )
311
+ params = {"fn": filename, "hmax": int(end_line), "hmin": int(start_line)}
312
+ return [_row_to_symbol(r) for r in graph._rows(q, params)]
313
+
314
+
315
+ def _prefixed_symbol_row(prefix: str, row: dict[str, Any]) -> dict[str, Any]:
316
+ p = f"{prefix}_"
317
+ return {k[len(p) :]: v for k, v in row.items() if k.startswith(p)}
318
+
319
+
320
+ def _row_to_call_edge(row: dict[str, Any]) -> CallEdge:
321
+ return CallEdge(
322
+ src=_row_to_symbol(_prefixed_symbol_row("caller", row)),
323
+ dst=_row_to_symbol(_prefixed_symbol_row("callee", row)),
324
+ confidence=float(row.get("confidence") or 0.0),
325
+ strategy=str(row.get("strategy") or ""),
326
+ source=str(row.get("source") or "static"),
327
+ call_site_line=int(row.get("call_site_line") or 0),
328
+ call_site_byte=int(row.get("call_site_byte") or 0),
329
+ arg_count=int(row.get("arg_count") or 0),
330
+ resolved=bool(row.get("resolved", True)),
331
+ )
332
+
333
+
334
+ def _call_graph_needle_phantom_arity_alt(needle: str) -> str | None:
335
+ """Map ``Type#method(123)`` → ``Type#method(?)`` for phantom callee FQNs (D1)."""
336
+ if "#" not in needle:
337
+ return None
338
+ i = needle.rfind("(")
339
+ if i <= 0 or not needle.endswith(")"):
340
+ return None
341
+ inner = needle[i + 1 : -1]
342
+ if not inner.isdigit():
343
+ return None
344
+ return needle[:i] + "(?)"
345
+
346
+
347
+ class LadybugGraph:
348
+ """Thin wrapper around a read-only Ladybug connection.
349
+
350
+ Safe to share across threads: we hold a single `Connection`, guarded by a lock.
351
+ """
352
+
353
+ _lock = threading.Lock()
354
+ _instance: "LadybugGraph | None" = None
355
+ _instance_path: str | None = None
356
+
357
+ def __init__(self, db_path: str) -> None:
358
+ self.db_path = db_path
359
+ self._db = ladybug.Database(db_path, read_only=True)
360
+ self._conn = ladybug.Connection(self._db)
361
+ self._conn_lock = threading.Lock()
362
+
363
+ @classmethod
364
+ def get(cls, db_path: str | None = None) -> "LadybugGraph":
365
+ resolved = resolve_ladybug_path(db_path)
366
+ with cls._lock:
367
+ if cls._instance is None or cls._instance_path != resolved:
368
+ instance = cls(resolved)
369
+ meta = instance.meta()
370
+ graph_version = int(meta.get("ontology_version") or 0)
371
+ if "error" not in meta and graph_version < _ONTOLOGY_VERSION:
372
+ raise RuntimeError(
373
+ f"Graph ontology version {graph_version} is older than the "
374
+ f"required version {_ONTOLOGY_VERSION}. "
375
+ "Rebuild the graph: `python build_ast_graph.py --source-root <repo>`, "
376
+ "or run `java-codebase-rag reprocess --source-root <repo>` for a full "
377
+ "Lance+Ladybug re-index."
378
+ )
379
+ cls._instance = instance
380
+ cls._instance_path = resolved
381
+ return cls._instance
382
+
383
+ @classmethod
384
+ def exists(cls, db_path: str | None = None) -> bool:
385
+ resolved = resolve_ladybug_path(db_path)
386
+ p = Path(resolved)
387
+ if not p.exists():
388
+ return False
389
+ # Ladybug represents DB as a directory; allow file form too (single-file DBs).
390
+ return True
391
+
392
+ # ---- low-level ----
393
+
394
+ def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
395
+ with self._conn_lock:
396
+ r = self._conn.execute(query, params or {})
397
+ columns = r.get_column_names()
398
+ out: list[dict[str, Any]] = []
399
+ while r.has_next():
400
+ vals = r.get_next()
401
+ out.append(dict(zip(columns, vals)))
402
+ return out
403
+
404
+ # ---- meta ----
405
+
406
+ def meta(self) -> dict[str, Any]:
407
+ _META_PR_F1 = (
408
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
409
+ "m.built_at AS built_at, m.source_root AS source_root, "
410
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
411
+ "m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
412
+ "m.routes_by_framework AS routes_by_framework, "
413
+ "m.routes_resolved_pct AS routes_resolved_pct, "
414
+ "m.routes_from_brownfield_pct AS routes_from_brownfield_pct, "
415
+ "m.routes_by_layer AS routes_by_layer, "
416
+ "m.http_calls_total AS http_calls_total, m.async_calls_total AS async_calls_total, "
417
+ "m.http_calls_by_strategy AS http_calls_by_strategy, m.async_calls_by_strategy AS async_calls_by_strategy, "
418
+ "m.http_calls_resolved_pct AS http_calls_resolved_pct, m.async_calls_resolved_pct AS async_calls_resolved_pct, "
419
+ "m.http_clients_from_brownfield_pct AS http_clients_from_brownfield_pct, "
420
+ "m.async_producers_from_brownfield_pct AS async_producers_from_brownfield_pct, "
421
+ "m.http_calls_match_breakdown AS http_calls_match_breakdown, "
422
+ "m.async_calls_match_breakdown AS async_calls_match_breakdown, "
423
+ "m.cross_service_calls_total AS cross_service_calls_total, "
424
+ "m.pass3_skipped_cross_service AS pass3_skipped_cross_service, "
425
+ "m.pass4_exposes_suppressed_feign AS pass4_exposes_suppressed_feign, "
426
+ "m.cross_service_resolution AS cross_service_resolution"
427
+ )
428
+ _META_PR_E3 = (
429
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
430
+ "m.built_at AS built_at, m.source_root AS source_root, "
431
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
432
+ "m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
433
+ "m.routes_by_framework AS routes_by_framework, "
434
+ "m.routes_resolved_pct AS routes_resolved_pct, "
435
+ "m.routes_from_brownfield_pct AS routes_from_brownfield_pct, "
436
+ "m.routes_by_layer AS routes_by_layer, "
437
+ "m.http_calls_total AS http_calls_total, m.async_calls_total AS async_calls_total, "
438
+ "m.http_calls_by_strategy AS http_calls_by_strategy, m.async_calls_by_strategy AS async_calls_by_strategy, "
439
+ "m.http_calls_resolved_pct AS http_calls_resolved_pct, m.async_calls_resolved_pct AS async_calls_resolved_pct, "
440
+ "m.http_clients_from_brownfield_pct AS http_clients_from_brownfield_pct, "
441
+ "m.async_producers_from_brownfield_pct AS async_producers_from_brownfield_pct, "
442
+ "m.http_calls_match_breakdown AS http_calls_match_breakdown, "
443
+ "m.async_calls_match_breakdown AS async_calls_match_breakdown, "
444
+ "m.cross_service_calls_total AS cross_service_calls_total, "
445
+ "m.pass3_skipped_cross_service AS pass3_skipped_cross_service, "
446
+ "m.cross_service_resolution AS cross_service_resolution"
447
+ )
448
+ _META_PRE_E3 = (
449
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
450
+ "m.built_at AS built_at, m.source_root AS source_root, "
451
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
452
+ "m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
453
+ "m.routes_by_framework AS routes_by_framework, "
454
+ "m.routes_resolved_pct AS routes_resolved_pct, "
455
+ "m.routes_from_brownfield_pct AS routes_from_brownfield_pct, "
456
+ "m.routes_by_layer AS routes_by_layer, "
457
+ "m.http_calls_total AS http_calls_total, m.async_calls_total AS async_calls_total, "
458
+ "m.http_calls_by_strategy AS http_calls_by_strategy, m.async_calls_by_strategy AS async_calls_by_strategy, "
459
+ "m.http_calls_resolved_pct AS http_calls_resolved_pct, m.async_calls_resolved_pct AS async_calls_resolved_pct, "
460
+ "m.http_clients_from_brownfield_pct AS http_clients_from_brownfield_pct, "
461
+ "m.async_producers_from_brownfield_pct AS async_producers_from_brownfield_pct, "
462
+ "m.http_calls_match_breakdown AS http_calls_match_breakdown, "
463
+ "m.async_calls_match_breakdown AS async_calls_match_breakdown, "
464
+ "m.cross_service_calls_total AS cross_service_calls_total"
465
+ )
466
+ _META_PR_A2 = (
467
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
468
+ "m.built_at AS built_at, m.source_root AS source_root, "
469
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
470
+ "m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
471
+ "m.routes_by_framework AS routes_by_framework, "
472
+ "m.routes_resolved_pct AS routes_resolved_pct"
473
+ )
474
+ _META_LEGACY = (
475
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
476
+ "m.built_at AS built_at, m.source_root AS source_root, "
477
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors"
478
+ )
479
+ rows: list[dict[str, Any]]
480
+ meta_mode = "pr_f1"
481
+ try:
482
+ rows = self._rows(_META_PR_F1)
483
+ except Exception:
484
+ meta_mode = "pr_e3"
485
+ try:
486
+ rows = self._rows(_META_PR_E3)
487
+ except Exception:
488
+ meta_mode = "pre_e3"
489
+ try:
490
+ rows = self._rows(_META_PRE_E3)
491
+ except Exception:
492
+ meta_mode = "pr_a2"
493
+ try:
494
+ rows = self._rows(_META_PR_A2)
495
+ except Exception:
496
+ meta_mode = "legacy"
497
+ try:
498
+ rows = self._rows(_META_LEGACY)
499
+ except Exception as e:
500
+ return {"error": f"{e}"}
501
+ if not rows:
502
+ return {"error": "no GraphMeta node"}
503
+ row = rows[0]
504
+ counts: dict[str, Any] = _parse_ladybug_json(row.get("counts_json"))
505
+ # Ensure counts has expected keys even if empty
506
+ if not counts:
507
+ counts = {
508
+ "packages": 0, "files": 0, "types": 0, "members": 0, "phantoms": 0,
509
+ "extends": 0, "implements": 0, "injects": 0, "declares": 0, "overrides": 0,
510
+ "calls": 0, "routes": 0, "exposes": 0, "clients": 0, "declares_client": 0,
511
+ "producers": 0, "declares_producer": 0, "http_calls": 0, "async_calls": 0,
512
+ }
513
+ routes_total = exposes_total = 0
514
+ routes_resolved_pct = 0.0
515
+ routes_by_framework: dict[str, Any] = {}
516
+ routes_from_brownfield_pct = 0.0
517
+ routes_by_layer: dict[str, Any] = {}
518
+ http_calls_total = 0
519
+ async_calls_total = 0
520
+ http_calls_by_strategy: dict[str, Any] = {}
521
+ async_calls_by_strategy: dict[str, Any] = {}
522
+ http_calls_resolved_pct = 0.0
523
+ async_calls_resolved_pct = 0.0
524
+ http_clients_from_brownfield_pct = 0.0
525
+ async_producers_from_brownfield_pct = 0.0
526
+ http_calls_match_breakdown: dict[str, Any] = {}
527
+ async_calls_match_breakdown: dict[str, Any] = {}
528
+ cross_service_calls_total = 0
529
+ pass3_skipped_cross_service = 0
530
+ pass4_exposes_suppressed_feign: int | None = None
531
+ cross_service_resolution: str | None = None
532
+ if meta_mode != "legacy":
533
+ rfw_raw = row.get("routes_by_framework") or "{}"
534
+ routes_by_framework = _parse_ladybug_json(rfw_raw) if isinstance(rfw_raw, str) else (rfw_raw or {})
535
+ if not isinstance(routes_by_framework, dict):
536
+ routes_by_framework = {}
537
+ routes_total = int(row.get("routes_total") or 0)
538
+ exposes_total = int(row.get("exposes_total") or 0)
539
+ routes_resolved_pct = float(row.get("routes_resolved_pct") or 0.0)
540
+ if meta_mode in ("pr_f1", "pr_e3", "pre_e3"):
541
+ routes_from_brownfield_pct = float(row.get("routes_from_brownfield_pct") or 0.0)
542
+ rbl_raw = row.get("routes_by_layer") or "{}"
543
+ routes_by_layer = _parse_ladybug_json(rbl_raw) if isinstance(rbl_raw, str) else (rbl_raw or {})
544
+ if not isinstance(routes_by_layer, dict):
545
+ routes_by_layer = {}
546
+ http_calls_total = int(row.get("http_calls_total") or 0)
547
+ async_calls_total = int(row.get("async_calls_total") or 0)
548
+ hbs_raw = row.get("http_calls_by_strategy") or "{}"
549
+ abs_raw = row.get("async_calls_by_strategy") or "{}"
550
+ http_calls_by_strategy = _parse_ladybug_json(hbs_raw) if isinstance(hbs_raw, str) else (hbs_raw or {})
551
+ if not isinstance(http_calls_by_strategy, dict):
552
+ http_calls_by_strategy = {}
553
+ async_calls_by_strategy = _parse_ladybug_json(abs_raw) if isinstance(abs_raw, str) else (abs_raw or {})
554
+ if not isinstance(async_calls_by_strategy, dict):
555
+ async_calls_by_strategy = {}
556
+ http_calls_resolved_pct = float(row.get("http_calls_resolved_pct") or 0.0)
557
+ async_calls_resolved_pct = float(row.get("async_calls_resolved_pct") or 0.0)
558
+ http_clients_from_brownfield_pct = float(row.get("http_clients_from_brownfield_pct") or 0.0)
559
+ async_producers_from_brownfield_pct = float(row.get("async_producers_from_brownfield_pct") or 0.0)
560
+ hmb_raw = row.get("http_calls_match_breakdown") or "{}"
561
+ amb_raw = row.get("async_calls_match_breakdown") or "{}"
562
+ http_calls_match_breakdown = _parse_ladybug_json(hmb_raw) if isinstance(hmb_raw, str) else (hmb_raw or {})
563
+ if not isinstance(http_calls_match_breakdown, dict):
564
+ http_calls_match_breakdown = {}
565
+ async_calls_match_breakdown = _parse_ladybug_json(amb_raw) if isinstance(amb_raw, str) else (amb_raw or {})
566
+ if not isinstance(async_calls_match_breakdown, dict):
567
+ async_calls_match_breakdown = {}
568
+ cross_service_calls_total = int(row.get("cross_service_calls_total") or 0)
569
+ pass3_skipped_cross_service = int(row.get("pass3_skipped_cross_service") or 0)
570
+ if meta_mode == "pr_f1":
571
+ pass4_exposes_suppressed_feign = int(row.get("pass4_exposes_suppressed_feign") or 0)
572
+ raw_csr = row.get("cross_service_resolution")
573
+ cross_service_resolution = (
574
+ str(raw_csr) if raw_csr not in (None, "") else None
575
+ )
576
+ elif meta_mode == "pr_e3":
577
+ raw_csr = row.get("cross_service_resolution")
578
+ cross_service_resolution = (
579
+ str(raw_csr) if raw_csr not in (None, "") else None
580
+ )
581
+ edge_counts = {edge: 0 for edge in _EDGE_TYPES}
582
+ failed_edges: list[str] = []
583
+ for edge_type in _EDGE_TYPES:
584
+ try:
585
+ edge_rows = self._rows(
586
+ f"MATCH ()-[e:{edge_type}]->() RETURN count(e) AS n"
587
+ )
588
+ edge_counts[edge_type] = int(edge_rows[0].get("n") or 0) if edge_rows else 0
589
+ except Exception as exc:
590
+ failed_edges.append(edge_type)
591
+ log.warning("edge count query failed for %s: %s", edge_type, exc)
592
+ if len(failed_edges) == len(_EDGE_TYPES):
593
+ log.warning("edge count queries failed for all edge types; returning zeroed edge_counts")
594
+
595
+ return {
596
+ "ontology_version": int(row.get("ontology_version") or 0),
597
+ "built_at": int(row.get("built_at") or 0),
598
+ "source_root": row.get("source_root") or "",
599
+ "parse_errors": int(row.get("parse_errors") or 0),
600
+ "counts": counts,
601
+ "routes_total": routes_total,
602
+ "exposes_total": exposes_total,
603
+ "routes_by_framework": routes_by_framework,
604
+ "routes_resolved_pct": routes_resolved_pct,
605
+ "routes_from_brownfield_pct": routes_from_brownfield_pct,
606
+ "routes_by_layer": routes_by_layer,
607
+ "http_calls_total": http_calls_total,
608
+ "async_calls_total": async_calls_total,
609
+ "http_calls_by_strategy": http_calls_by_strategy,
610
+ "async_calls_by_strategy": async_calls_by_strategy,
611
+ "http_calls_resolved_pct": http_calls_resolved_pct,
612
+ "async_calls_resolved_pct": async_calls_resolved_pct,
613
+ "http_clients_from_brownfield_pct": http_clients_from_brownfield_pct,
614
+ "async_producers_from_brownfield_pct": async_producers_from_brownfield_pct,
615
+ "http_calls_match_breakdown": http_calls_match_breakdown,
616
+ "async_calls_match_breakdown": async_calls_match_breakdown,
617
+ "cross_service_calls_total": cross_service_calls_total,
618
+ "pass3_skipped_cross_service": pass3_skipped_cross_service,
619
+ "pass4_exposes_suppressed_feign": pass4_exposes_suppressed_feign,
620
+ "cross_service_resolution": cross_service_resolution,
621
+ "edge_counts": edge_counts,
622
+ "db_path": self.db_path,
623
+ }
624
+
625
+ def edge_counts_for(self, node_id: str) -> dict[str, dict[str, int]]:
626
+ rows = self._rows(
627
+ "MATCH (n {id: $id})-[e]->() "
628
+ "RETURN label(e) AS edge_type, 'out' AS direction, count(e) AS n "
629
+ "UNION ALL "
630
+ "MATCH (n {id: $id})<-[e]-() "
631
+ "RETURN label(e) AS edge_type, 'in' AS direction, count(e) AS n",
632
+ {"id": node_id},
633
+ )
634
+ out: dict[str, dict[str, int]] = {}
635
+ for row in rows:
636
+ edge_type = str(row.get("edge_type") or "")
637
+ direction = str(row.get("direction") or "")
638
+ if edge_type == "" or direction not in ("in", "out"):
639
+ continue
640
+ out.setdefault(edge_type, {"in": 0, "out": 0})
641
+ out[edge_type][direction] = int(row.get("n") or 0)
642
+ return {
643
+ edge_type: dirs
644
+ for edge_type, dirs in out.items()
645
+ if int(dirs.get("in", 0)) > 0 or int(dirs.get("out", 0)) > 0
646
+ }
647
+
648
+ def member_edge_rollup_for(self, type_id: str) -> dict[str, dict[str, int]]:
649
+ """2-hop DECLARES member edge counts for a type Symbol (describe-time only).
650
+
651
+ Keys use dot notation and are not stored graph edge labels.
652
+ """
653
+ params = {"id": type_id}
654
+ rollup: dict[str, dict[str, int]] = {}
655
+ for key, rel in _MEMBER_EDGE_COMPOSED_REL_MAP:
656
+ rows = self._rows(
657
+ f"MATCH (t:Symbol {{id: $id}})-[:DECLARES]->(m:Symbol)-[e:{rel}]->() "
658
+ "RETURN count(e) AS n",
659
+ params,
660
+ )
661
+ n = sum(int(r.get("n") or 0) for r in rows) if rows else 0
662
+ if n > 0:
663
+ rollup[key] = {"in": 0, "out": n}
664
+ return rollup
665
+
666
+ def member_edge_traversal_for(self, type_id: str, composed_key: str) -> list[dict[str, Any]]:
667
+ """2-hop DECLARES member traversal for a type Symbol (neighbors dot-key path)."""
668
+ rel = _MEMBER_EDGE_COMPOSED_REL_BY_KEY.get(composed_key)
669
+ if rel is None:
670
+ return []
671
+ # Untyped [e] + label(e) filter: typed unions fail the binder when RETURN references
672
+ # columns that exist on only some rel types (same pattern as flat neighbors_v2).
673
+ return self._rows(
674
+ "MATCH (t:Symbol {id: $id})-[:DECLARES]->(m:Symbol)-[e]->(term) "
675
+ "WHERE label(e) = $rel "
676
+ "RETURN m.id AS via_id, label(e) AS stored_edge_type, "
677
+ "term.id AS other_id, e.confidence AS confidence, e.strategy AS strategy, "
678
+ "e.match AS match, e.mechanism AS mechanism, e.annotation AS annotation, "
679
+ "e.field_or_param AS field_or_param, e.source AS source, "
680
+ "e.call_site_line AS call_site_line, e.call_site_byte AS call_site_byte, "
681
+ "e.arg_count AS arg_count, e.resolved AS resolved",
682
+ {"id": type_id, "rel": rel},
683
+ )
684
+
685
+ def override_axis_traversal_for(self, method_id: str, composed_key: str) -> list[dict[str, Any]]:
686
+ """Override-axis composed traversal for a method Symbol (neighbors dot-key path).
687
+
688
+ Uses stored ``[:OVERRIDES]`` for the dispatch hop (aligned with ``override_axis_rollup_for``
689
+ overrider ids). Base key returns overrider method ids only; composed keys return terminal
690
+ rows with full edge attr projection plus ``via_id`` (overrider method id).
691
+ """
692
+ rel = _OVERRIDE_AXIS_COMPOSED_REL_BY_KEY.get(composed_key)
693
+ if rel is None and composed_key != "OVERRIDDEN_BY":
694
+ return []
695
+ if rel is None:
696
+ return self._rows(
697
+ "MATCH (decl:Symbol {id: $id})<-[:OVERRIDES]-(mover:Symbol) "
698
+ "RETURN mover.id AS other_id",
699
+ {"id": method_id},
700
+ )
701
+ return self._rows(
702
+ "MATCH (decl:Symbol {id: $id})<-[:OVERRIDES]-(mover:Symbol)-[e]->(term) "
703
+ "WHERE label(e) = $rel "
704
+ "RETURN mover.id AS via_id, label(e) AS stored_edge_type, "
705
+ "term.id AS other_id, e.confidence AS confidence, e.strategy AS strategy, "
706
+ "e.match AS match, e.mechanism AS mechanism, e.annotation AS annotation, "
707
+ "e.field_or_param AS field_or_param, e.source AS source, "
708
+ "e.call_site_line AS call_site_line, e.call_site_byte AS call_site_byte, "
709
+ "e.arg_count AS arg_count, e.resolved AS resolved",
710
+ {"id": method_id, "rel": rel},
711
+ )
712
+
713
+ def count_calls_for_symbol(self, origin_id: str, *, direction: Literal["in", "out"]) -> int:
714
+ """Count CALLS edges incident on a Symbol (hints / diagnostics)."""
715
+ if direction == "out":
716
+ pattern = "MATCH (origin:Symbol {id: $id})-[e:CALLS]->() RETURN count(e) AS n"
717
+ else:
718
+ pattern = "MATCH (origin:Symbol {id: $id})<-[e:CALLS]-() RETURN count(e) AS n"
719
+ rows = self._rows(pattern, {"id": origin_id})
720
+ return int(rows[0].get("n") or 0) if rows else 0
721
+
722
+ def neighbor_calls_for_symbol(
723
+ self,
724
+ origin_id: str,
725
+ *,
726
+ direction: Literal["in", "out"],
727
+ offset: int = 0,
728
+ limit: int | None = None,
729
+ sql_pagination: bool = True,
730
+ min_confidence: float | None = None,
731
+ include_strategies: list[str] | None = None,
732
+ exclude_strategies: list[str] | None = None,
733
+ callee_declaring_role: str | None = None,
734
+ callee_declaring_roles: list[str] | None = None,
735
+ exclude_callee_declaring_roles: list[str] | None = None,
736
+ ) -> list[dict[str, Any]]:
737
+ """CALLS neighbors with source-order delivery and optional edge-attribute pushdown.
738
+
739
+ When ``sql_pagination`` is True and ``limit`` is set, ``SKIP``/``LIMIT`` apply after
740
+ ``ORDER BY e.call_site_line, e.call_site_byte``. Otherwise the full ordered stream is
741
+ returned for caller-side ``NodeFilter`` / pagination.
742
+ """
743
+ wh_parts = ["origin.id = $id"]
744
+ params: dict[str, Any] = {"id": origin_id}
745
+ if min_confidence is not None:
746
+ wh_parts.append("e.confidence >= $min_confidence")
747
+ params["min_confidence"] = min_confidence
748
+ if include_strategies:
749
+ wh_parts.append("e.strategy IN $include_strategies")
750
+ params["include_strategies"] = include_strategies
751
+ if exclude_strategies:
752
+ wh_parts.append("NOT (e.strategy IN $exclude_strategies)")
753
+ params["exclude_strategies"] = exclude_strategies
754
+ if callee_declaring_role is not None:
755
+ wh_parts.append("e.callee_declaring_role = $callee_declaring_role")
756
+ params["callee_declaring_role"] = callee_declaring_role
757
+ if callee_declaring_roles:
758
+ wh_parts.append("e.callee_declaring_role IN $callee_declaring_roles")
759
+ params["callee_declaring_roles"] = callee_declaring_roles
760
+ if exclude_callee_declaring_roles:
761
+ wh_parts.append("NOT (e.callee_declaring_role IN $exclude_callee_declaring_roles)")
762
+ params["exclude_callee_declaring_roles"] = exclude_callee_declaring_roles
763
+ where = " AND ".join(wh_parts)
764
+ if direction == "out":
765
+ match = "MATCH (origin:Symbol)-[e:CALLS]->(other:Symbol)"
766
+ else:
767
+ match = "MATCH (origin:Symbol)<-[e:CALLS]-(other:Symbol)"
768
+ q = (
769
+ f"{match} WHERE {where} "
770
+ "RETURN other.id AS other_id, 'CALLS' AS edge_type, "
771
+ "e.confidence AS confidence, e.strategy AS strategy, e.source AS source, "
772
+ "e.call_site_line AS call_site_line, e.call_site_byte AS call_site_byte, "
773
+ "e.arg_count AS arg_count, e.resolved AS resolved, "
774
+ "e.callee_declaring_role AS callee_declaring_role "
775
+ "ORDER BY e.call_site_line, e.call_site_byte"
776
+ )
777
+ if sql_pagination and limit is not None:
778
+ q += " SKIP $offset LIMIT $limit"
779
+ params["offset"] = offset
780
+ params["limit"] = limit
781
+ return self._rows(q, params)
782
+
783
+ def count_unresolved_for_caller(self, caller_id: str) -> int:
784
+ rows = self._rows(
785
+ "MATCH (:Symbol {id: $id})-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
786
+ "RETURN count(u) AS n",
787
+ {"id": caller_id},
788
+ )
789
+ return int(rows[0].get("n") or 0) if rows else 0
790
+
791
+ def unresolved_sites_for_caller(
792
+ self,
793
+ caller_id: str,
794
+ *,
795
+ direction: Literal["in", "out"] = "out",
796
+ ) -> list[dict[str, Any]]:
797
+ if direction != "out":
798
+ return []
799
+ return self._rows(
800
+ "MATCH (:Symbol {id: $id})-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
801
+ "RETURN u.id AS id, u.caller_id AS caller_id, u.call_site_line AS call_site_line, "
802
+ "u.call_site_byte AS call_site_byte, u.arg_count AS arg_count, "
803
+ "u.callee_simple AS callee_simple, u.receiver_expr AS receiver_expr, "
804
+ "u.reason AS reason "
805
+ "ORDER BY u.call_site_line, u.call_site_byte",
806
+ {"id": caller_id},
807
+ )
808
+
809
+ def unresolved_sites_for_describe(
810
+ self,
811
+ method_id: str,
812
+ *,
813
+ inline_limit: int = 5,
814
+ ) -> tuple[list[dict[str, Any]], int]:
815
+ total_rows = self._rows(
816
+ "MATCH (:Symbol {id: $id})-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
817
+ "RETURN count(u) AS n",
818
+ {"id": method_id},
819
+ )
820
+ total = int(total_rows[0].get("n") or 0) if total_rows else 0
821
+ if total == 0:
822
+ return [], 0
823
+ rows = self._rows(
824
+ "MATCH (:Symbol {id: $id})-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
825
+ "RETURN u.call_site_line AS line, u.reason AS reason, "
826
+ "u.callee_simple AS callee_simple, u.receiver_expr AS receiver_expr "
827
+ "ORDER BY u.call_site_line, u.call_site_byte "
828
+ f"LIMIT {int(inline_limit)}",
829
+ {"id": method_id},
830
+ )
831
+ return rows, total
832
+
833
+ def list_unresolved_call_sites(
834
+ self,
835
+ *,
836
+ method_id: str | None = None,
837
+ reason: str | None = None,
838
+ microservice: str | None = None,
839
+ callee_simple: str | None = None,
840
+ limit: int = 100,
841
+ ) -> list[dict[str, Any]]:
842
+ wh_parts: list[str] = []
843
+ params: dict[str, Any] = {"lim": int(limit)}
844
+ if method_id:
845
+ wh_parts.append("caller.id = $method_id")
846
+ params["method_id"] = method_id
847
+ if reason:
848
+ wh_parts.append("u.reason = $reason")
849
+ params["reason"] = reason
850
+ if microservice:
851
+ wh_parts.append("caller.microservice = $microservice")
852
+ params["microservice"] = microservice
853
+ if callee_simple:
854
+ wh_parts.append("u.callee_simple = $callee_simple")
855
+ params["callee_simple"] = callee_simple
856
+ where = ("WHERE " + " AND ".join(wh_parts)) if wh_parts else ""
857
+ return self._rows(
858
+ "MATCH (caller:Symbol)-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
859
+ f"{where} "
860
+ "RETURN u.id AS id, caller.id AS caller_id, caller.fqn AS caller_fqn, "
861
+ "caller.microservice AS microservice, u.call_site_line AS call_site_line, "
862
+ "u.call_site_byte AS call_site_byte, u.arg_count AS arg_count, "
863
+ "u.callee_simple AS callee_simple, u.receiver_expr AS receiver_expr, "
864
+ "u.reason AS reason "
865
+ "ORDER BY u.call_site_line, u.call_site_byte "
866
+ "LIMIT $lim",
867
+ params,
868
+ )
869
+
870
+ def stats_unresolved_call_sites(
871
+ self,
872
+ *,
873
+ by: Literal["reason", "microservice", "caller_role"],
874
+ ) -> list[dict[str, Any]]:
875
+ if by == "reason":
876
+ return self._rows(
877
+ "MATCH (:Symbol)-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
878
+ "RETURN u.reason AS bucket, count(*) AS n ORDER BY n DESC",
879
+ )
880
+ if by == "microservice":
881
+ return self._rows(
882
+ "MATCH (caller:Symbol)-[:UNRESOLVED_AT]->(:UnresolvedCallSite) "
883
+ "RETURN caller.microservice AS bucket, count(*) AS n ORDER BY n DESC",
884
+ )
885
+ return self._rows(
886
+ "MATCH (caller:Symbol)-[:UNRESOLVED_AT]->(:UnresolvedCallSite) "
887
+ "MATCH (parent:Symbol)-[:DECLARES]->(caller) "
888
+ "RETURN parent.role AS bucket, count(*) AS n ORDER BY n DESC",
889
+ )
890
+
891
+ def _edge_row_count_from_method_ids(self, method_ids: list[str], rel: str) -> int:
892
+ """Count outgoing ``rel`` edges from method symbols (describe rollup helper)."""
893
+ total = 0
894
+ for mid in method_ids:
895
+ rows = self._rows(
896
+ f"MATCH (x:Symbol {{id: $mid}})-[e:{rel}]->() RETURN count(e) AS n",
897
+ {"mid": mid},
898
+ )
899
+ total += int(rows[0].get("n") or 0) if rows else 0
900
+ return total
901
+
902
+ def _override_impl_ids_from_stored(self, method_id: str) -> list[str]:
903
+ """Overrider method ids for a declaration method (stored ``[:OVERRIDES]`` in-hop)."""
904
+ rows = self._rows(
905
+ "MATCH (decl:Symbol {id: $id})<-[:OVERRIDES]-(mover:Symbol) "
906
+ "RETURN collect(DISTINCT mover.id) AS ids",
907
+ {"id": method_id},
908
+ )
909
+ return list(dict.fromkeys(_coerce_id_list(rows[0].get("ids") if rows else None)))
910
+
911
+ def _override_decl_ids_from_stored(self, method_id: str) -> list[str]:
912
+ """Declaration method ids overridden by a concrete method (stored ``[:OVERRIDES]`` out-hop)."""
913
+ rows = self._rows(
914
+ "MATCH (m:Symbol {id: $id})-[:OVERRIDES]->(decl:Symbol) "
915
+ "RETURN collect(DISTINCT decl.id) AS ids",
916
+ {"id": method_id},
917
+ )
918
+ return list(dict.fromkeys(_coerce_id_list(rows[0].get("ids") if rows else None)))
919
+
920
+ def override_axis_rollup_for(self, method_id: str) -> dict[str, dict[str, int]]:
921
+ """Dispatch-axis composed keys for method Symbols (describe-time only).
922
+
923
+ Dispatch hop uses materialized ``[:OVERRIDES]`` (same as ``override_axis_traversal_for`` /
924
+ ``neighbors`` dot-keys). Terminal composed counts sum outgoing edges from overrider
925
+ methods. Omits keys with zero counts. Returns ``{}`` for non-methods, constructors,
926
+ and static methods.
927
+ """
928
+ params = {"id": method_id}
929
+ gate = self._rows(
930
+ "MATCH (m:Symbol {id: $id}) "
931
+ "WHERE m.kind = 'method' "
932
+ "AND NOT list_contains(COALESCE(m.modifiers, []), 'static') "
933
+ "RETURN 1 AS ok LIMIT 1",
934
+ params,
935
+ )
936
+ if not gate:
937
+ return {}
938
+
939
+ rollup: dict[str, dict[str, int]] = {}
940
+
941
+ impl_ids = self._override_impl_ids_from_stored(method_id)
942
+ if impl_ids:
943
+ rollup["OVERRIDDEN_BY"] = {"in": 0, "out": len(impl_ids)}
944
+ n_dc = self._edge_row_count_from_method_ids(impl_ids, "DECLARES_CLIENT")
945
+ if n_dc > 0:
946
+ rollup["OVERRIDDEN_BY.DECLARES_CLIENT"] = {"in": 0, "out": n_dc}
947
+ n_dp = self._edge_row_count_from_method_ids(impl_ids, "DECLARES_PRODUCER")
948
+ if n_dp > 0:
949
+ rollup["OVERRIDDEN_BY.DECLARES_PRODUCER"] = {"in": 0, "out": n_dp}
950
+ n_ex = self._edge_row_count_from_method_ids(impl_ids, "EXPOSES")
951
+ if n_ex > 0:
952
+ rollup["OVERRIDDEN_BY.EXPOSES"] = {"in": 0, "out": n_ex}
953
+
954
+ decl_ids = self._override_decl_ids_from_stored(method_id)
955
+ if decl_ids:
956
+ rollup["OVERRIDES"] = {"in": 0, "out": len(decl_ids)}
957
+
958
+ return rollup
959
+
960
+ def _scope_counts(self, column: str) -> dict[str, int]:
961
+ """Generic helper: count resolved type symbols grouped by `column`.
962
+
963
+ Empty-string keys mean the builder could not infer a value
964
+ (no build-marker ancestor / no path segment under project_root).
965
+ """
966
+ try:
967
+ rows = self._rows(
968
+ f"MATCH (s:Symbol) WHERE s.resolved "
969
+ f"AND s.kind IN ['class','interface','enum','record','annotation'] "
970
+ f"RETURN s.{column} AS bucket, count(*) AS n"
971
+ )
972
+ except Exception:
973
+ return {}
974
+ out: dict[str, int] = {}
975
+ for r in rows:
976
+ key = r.get("bucket") or ""
977
+ out[str(key)] = int(r.get("n") or 0)
978
+ return out
979
+
980
+ def module_counts(self) -> dict[str, int]:
981
+ """Map of module name -> resolved type-symbol count."""
982
+ return self._scope_counts("module")
983
+
984
+ def microservice_counts(self) -> dict[str, int]:
985
+ """Map of microservice name -> resolved type-symbol count."""
986
+ return self._scope_counts("microservice")
987
+
988
+ # ---- symbol-level lookups ----
989
+
990
+ def find_by_name_or_fqn(self, name_or_fqn: str, *, kinds: list[str] | None = None,
991
+ module: str | None = None,
992
+ microservice: str | None = None,
993
+ limit: int = 50) -> list[SymbolHit]:
994
+ filters = ["(s.name = $needle OR s.fqn = $needle)"]
995
+ params: dict[str, Any] = {"needle": name_or_fqn}
996
+ if kinds:
997
+ params["kinds"] = kinds
998
+ filters.append("s.kind IN $kinds")
999
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1000
+ where = " AND ".join(filters)
1001
+ q = f"MATCH (s:Symbol) WHERE {where} RETURN {_SYMBOL_RETURN} LIMIT {int(limit)}"
1002
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1003
+
1004
+ def list_by_role(self, role: str, *, module: str | None = None,
1005
+ microservice: str | None = None,
1006
+ capability: str | None = None,
1007
+ limit: int = 100) -> list[SymbolHit]:
1008
+ filters = ["s.role = $role"]
1009
+ params: dict[str, Any] = {"role": role}
1010
+ if capability:
1011
+ filters.append("$capability IN s.capabilities")
1012
+ params["capability"] = capability
1013
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1014
+ where = " AND ".join(filters)
1015
+ q = f"MATCH (s:Symbol) WHERE {where} RETURN {_SYMBOL_RETURN} LIMIT {int(limit)}"
1016
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1017
+
1018
+ def list_by_annotation(self, annotation: str, *, module: str | None = None,
1019
+ microservice: str | None = None,
1020
+ capability: str | None = None,
1021
+ limit: int = 100) -> list[SymbolHit]:
1022
+ # Ladybug supports `list_contains` for STRING[].
1023
+ filters = ["list_contains(s.annotations, $ann)"]
1024
+ params: dict[str, Any] = {"ann": annotation}
1025
+ if capability:
1026
+ filters.append("$capability IN s.capabilities")
1027
+ params["capability"] = capability
1028
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1029
+ where = " AND ".join(filters)
1030
+ q = f"MATCH (s:Symbol) WHERE {where} RETURN {_SYMBOL_RETURN} LIMIT {int(limit)}"
1031
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1032
+
1033
+ def list_by_capability(self, capability: str, *, module: str | None = None,
1034
+ microservice: str | None = None,
1035
+ limit: int = 100) -> list[SymbolHit]:
1036
+ filters = ["$capability IN s.capabilities"]
1037
+ params: dict[str, Any] = {"capability": capability}
1038
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1039
+ where = " AND ".join(filters)
1040
+ q = f"MATCH (s:Symbol) WHERE {where} RETURN {_SYMBOL_RETURN} LIMIT {int(limit)}"
1041
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1042
+
1043
+ # ---- edge traversals ----
1044
+
1045
+ def find_implementors(self, interface_name_or_fqn: str, *,
1046
+ module: str | None = None,
1047
+ microservice: str | None = None,
1048
+ capability: str | None = None,
1049
+ limit: int = 100) -> list[SymbolHit]:
1050
+ filters = ["(i.name = $needle OR i.fqn = $needle)"]
1051
+ params: dict[str, Any] = {"needle": interface_name_or_fqn}
1052
+ if capability:
1053
+ filters.append("$capability IN c.capabilities")
1054
+ params["capability"] = capability
1055
+ filters.extend(_scope_filters("c", module=module, microservice=microservice, params=params))
1056
+ where = " AND ".join(filters)
1057
+ q = (
1058
+ f"MATCH (c:Symbol)-[:IMPLEMENTS]->(i:Symbol) WHERE {where} "
1059
+ f"RETURN DISTINCT {_symbol_return_for('c')} "
1060
+ f"LIMIT {int(limit)}"
1061
+ )
1062
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1063
+
1064
+ def find_subclasses(self, class_name_or_fqn: str, *,
1065
+ module: str | None = None,
1066
+ microservice: str | None = None,
1067
+ capability: str | None = None,
1068
+ limit: int = 100) -> list[SymbolHit]:
1069
+ filters = ["(b.name = $needle OR b.fqn = $needle)"]
1070
+ params: dict[str, Any] = {"needle": class_name_or_fqn}
1071
+ if capability:
1072
+ filters.append("$capability IN s.capabilities")
1073
+ params["capability"] = capability
1074
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1075
+ where = " AND ".join(filters)
1076
+ q = (
1077
+ f"MATCH (s:Symbol)-[:EXTENDS]->(b:Symbol) WHERE {where} "
1078
+ f"RETURN DISTINCT {_SYMBOL_RETURN} "
1079
+ f"LIMIT {int(limit)}"
1080
+ )
1081
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1082
+
1083
+ def find_injectors(self, target_name_or_fqn: str, *,
1084
+ module: str | None = None,
1085
+ microservice: str | None = None,
1086
+ capability: str | None = None,
1087
+ limit: int = 100) -> list[EdgeHit]:
1088
+ filters = ["(t.name = $needle OR t.fqn = $needle)"]
1089
+ params: dict[str, Any] = {"needle": target_name_or_fqn}
1090
+ if capability:
1091
+ # Filter on the consumer (src) side: "which injectors carry this capability?"
1092
+ filters.append("$capability IN s.capabilities")
1093
+ params["capability"] = capability
1094
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1095
+ where = " AND ".join(filters)
1096
+ # Project both sides of the edge with prefixed aliases (`s_*` / `t_*`)
1097
+ # so we can split rows back into source / target SymbolHits without
1098
+ # column-name collisions.
1099
+ s_proj = ", ".join(
1100
+ f"s.{c} AS s_{c}" for c in (
1101
+ "id", "kind", "name", "fqn", "package", "module", "microservice",
1102
+ "filename", "start_line", "end_line", "start_byte", "end_byte",
1103
+ "modifiers", "annotations", "capabilities", "role", "signature", "parent_id", "resolved",
1104
+ )
1105
+ )
1106
+ t_proj = ", ".join(
1107
+ f"t.{c} AS t_{c}" for c in (
1108
+ "id", "kind", "name", "fqn", "package", "module", "microservice",
1109
+ "filename", "start_line", "end_line", "start_byte", "end_byte",
1110
+ "modifiers", "annotations", "capabilities", "role", "signature", "parent_id", "resolved",
1111
+ )
1112
+ )
1113
+ q = (
1114
+ f"MATCH (s:Symbol)-[e:INJECTS]->(t:Symbol) WHERE {where} "
1115
+ f"RETURN {s_proj}, {t_proj}, "
1116
+ f"e.mechanism AS mechanism, e.annotation AS annotation, "
1117
+ f"e.field_or_param AS field_or_param, e.resolved AS resolved "
1118
+ f"LIMIT {int(limit)}"
1119
+ )
1120
+ out: list[EdgeHit] = []
1121
+ for r in self._rows(q, params):
1122
+ src = _row_to_symbol({k[2:]: v for k, v in r.items() if k.startswith("s_")})
1123
+ dst = _row_to_symbol({k[2:]: v for k, v in r.items() if k.startswith("t_")})
1124
+ out.append(EdgeHit(
1125
+ type="INJECTS", src=src, dst=dst,
1126
+ mechanism=r.get("mechanism") or "",
1127
+ annotation=r.get("annotation") or "",
1128
+ field_or_param=r.get("field_or_param") or "",
1129
+ resolved=bool(r.get("resolved", True)),
1130
+ ))
1131
+ return out
1132
+
1133
+ def _method_ids_for_call_graph_needle(self, needle: str, *, limit: int) -> list[str]:
1134
+ rows = self._rows(
1135
+ "MATCH (s:Symbol) WHERE s.fqn = $n RETURN s.id AS id, s.kind AS kind LIMIT 1",
1136
+ {"n": needle},
1137
+ )
1138
+ if not rows:
1139
+ alt = _call_graph_needle_phantom_arity_alt(needle)
1140
+ if alt:
1141
+ rows = self._rows(
1142
+ "MATCH (s:Symbol) WHERE s.fqn = $n RETURN s.id AS id, s.kind AS kind LIMIT 1",
1143
+ {"n": alt},
1144
+ )
1145
+ if rows:
1146
+ kind = str(rows[0].get("kind") or "")
1147
+ sid = str(rows[0].get("id") or "")
1148
+ if kind in ("class", "interface", "enum", "record", "annotation") and sid:
1149
+ mrows = self._rows(
1150
+ "MATCH (t:Symbol {id: $tid})-[:DECLARES]->(m:Symbol) RETURN m.id AS id "
1151
+ f"LIMIT {int(limit)}",
1152
+ {"tid": sid},
1153
+ )
1154
+ return [str(r["id"]) for r in mrows if r.get("id")]
1155
+ if kind in ("method", "constructor") and sid:
1156
+ return [sid]
1157
+ rows2 = self._rows(
1158
+ f"MATCH (s:Symbol) WHERE s.name = $n AND s.kind IN ['method','constructor'] "
1159
+ f"RETURN s.id AS id LIMIT {int(limit)}",
1160
+ {"n": needle},
1161
+ )
1162
+ return [str(r["id"]) for r in rows2 if r.get("id")]
1163
+
1164
+ def find_callers(
1165
+ self, needle: str, *,
1166
+ depth: int = 1,
1167
+ limit: int = 100,
1168
+ min_confidence: float = 0.0,
1169
+ exclude_external: bool = True,
1170
+ module: str | None = None,
1171
+ microservice: str | None = None,
1172
+ ) -> list[CallEdge]:
1173
+ frontier = self._method_ids_for_call_graph_needle(needle, limit=max(limit, 50))
1174
+ if not frontier:
1175
+ return []
1176
+ caller_proj = ", ".join(f"caller.{c} AS caller_{c}" for c in _SYM_COLS)
1177
+ callee_proj = ", ".join(f"callee.{c} AS callee_{c}" for c in _SYM_COLS)
1178
+ out: list[CallEdge] = []
1179
+ seen: set[tuple[str, str, int, int]] = set()
1180
+ for _ in range(max(1, int(depth))):
1181
+ params: dict[str, Any] = {
1182
+ "frontier": list(frontier),
1183
+ "minc": float(min_confidence),
1184
+ }
1185
+ sc = _scope_filters("caller", module=module, microservice=microservice, params=params)
1186
+ wh_parts = ["callee.id IN $frontier", "c.confidence >= $minc"]
1187
+ wh_parts.extend(sc)
1188
+ wh = " AND ".join(wh_parts)
1189
+ q = (
1190
+ f"MATCH (caller:Symbol)-[c:CALLS]->(callee:Symbol) WHERE {wh} "
1191
+ f"RETURN {caller_proj}, {callee_proj}, "
1192
+ f"c.call_site_line AS call_site_line, c.call_site_byte AS call_site_byte, "
1193
+ f"c.arg_count AS arg_count, c.confidence AS confidence, c.strategy AS strategy, "
1194
+ f"c.source AS source, c.resolved AS resolved "
1195
+ f"LIMIT {int(limit) * 8}"
1196
+ )
1197
+ next_frontier: list[str] = []
1198
+ for row in self._rows(q, params):
1199
+ ce = _row_to_call_edge(row)
1200
+ # Filter only discovered callers (src). Needle may be external
1201
+ # (e.g. java.util.List#add) while still listing internal callers.
1202
+ if exclude_external and _is_external_fqn(ce.src.fqn):
1203
+ continue
1204
+ key = (ce.src.id, ce.dst.id, ce.call_site_line, ce.call_site_byte)
1205
+ if key in seen:
1206
+ continue
1207
+ seen.add(key)
1208
+ out.append(ce)
1209
+ next_frontier.append(ce.src.id)
1210
+ if len(out) >= limit:
1211
+ return out
1212
+ frontier = list(dict.fromkeys(next_frontier))
1213
+ if not frontier:
1214
+ break
1215
+ return out
1216
+
1217
+ def find_callees(
1218
+ self, needle: str, *,
1219
+ depth: int = 1,
1220
+ limit: int = 100,
1221
+ min_confidence: float = 0.0,
1222
+ exclude_external: bool = True,
1223
+ module: str | None = None,
1224
+ microservice: str | None = None,
1225
+ ) -> list[CallEdge]:
1226
+ frontier = self._method_ids_for_call_graph_needle(needle, limit=max(limit, 50))
1227
+ if not frontier:
1228
+ return []
1229
+ caller_proj = ", ".join(f"caller.{c} AS caller_{c}" for c in _SYM_COLS)
1230
+ callee_proj = ", ".join(f"callee.{c} AS callee_{c}" for c in _SYM_COLS)
1231
+ out: list[CallEdge] = []
1232
+ seen: set[tuple[str, str, int, int]] = set()
1233
+ for _ in range(max(1, int(depth))):
1234
+ params: dict[str, Any] = {
1235
+ "frontier": list(frontier),
1236
+ "minc": float(min_confidence),
1237
+ }
1238
+ sc = _scope_filters("callee", module=module, microservice=microservice, params=params)
1239
+ wh_parts = ["caller.id IN $frontier", "c.confidence >= $minc"]
1240
+ wh_parts.extend(sc)
1241
+ wh = " AND ".join(wh_parts)
1242
+ q = (
1243
+ f"MATCH (caller:Symbol)-[c:CALLS]->(callee:Symbol) WHERE {wh} "
1244
+ f"RETURN {caller_proj}, {callee_proj}, "
1245
+ f"c.call_site_line AS call_site_line, c.call_site_byte AS call_site_byte, "
1246
+ f"c.arg_count AS arg_count, c.confidence AS confidence, c.strategy AS strategy, "
1247
+ f"c.source AS source, c.resolved AS resolved "
1248
+ f"LIMIT {int(limit) * 8}"
1249
+ )
1250
+ next_frontier: list[str] = []
1251
+ for row in self._rows(q, params):
1252
+ ce = _row_to_call_edge(row)
1253
+ # Filter only discovered callees (dst). Needle may be external while
1254
+ # still listing non-external outbound calls when any exist.
1255
+ if exclude_external and _is_external_fqn(ce.dst.fqn):
1256
+ continue
1257
+ key = (ce.src.id, ce.dst.id, ce.call_site_line, ce.call_site_byte)
1258
+ if key in seen:
1259
+ continue
1260
+ seen.add(key)
1261
+ out.append(ce)
1262
+ next_frontier.append(ce.dst.id)
1263
+ if len(out) >= limit:
1264
+ return out
1265
+ frontier = list(dict.fromkeys(next_frontier))
1266
+ if not frontier:
1267
+ break
1268
+ return out
1269
+
1270
+ def expand_methods(
1271
+ self, fqns: list[str], *, depth: int = 1,
1272
+ min_confidence: float = 0.0, limit: int = 200,
1273
+ exclude_external: bool = True,
1274
+ ) -> list[tuple[str, float]]:
1275
+ """Reach type FQNs from seed types via DECLARES → CALLS → DECLARES (reverse).
1276
+
1277
+ Each entry is ``(type_fqn, path_confidence)``. ``path_confidence`` is the
1278
+ maximum, over call paths from seed methods, of the minimum ``CALLS.confidence``
1279
+ along that path (seed methods anchor at ``1.0`` before the first hop).
1280
+
1281
+ When ``exclude_external`` is true (default), types whose FQN matches the
1282
+ same JDK/Spring/Lombok prefixes as ``find_callees`` are omitted from the
1283
+ returned list (they are not indexed in LanceDB anyway). BFS still walks
1284
+ through external callees to find further project types.
1285
+ """
1286
+ if not fqns or depth < 1:
1287
+ return []
1288
+ seed_mids: list[str] = []
1289
+ for tfqn in fqns:
1290
+ r = self._rows(
1291
+ "MATCH (t:Symbol) WHERE t.fqn = $f AND t.kind IN ['class','interface','enum','record','annotation'] "
1292
+ "RETURN t.id AS id LIMIT 1",
1293
+ {"f": tfqn},
1294
+ )
1295
+ if not r or not r[0].get("id"):
1296
+ continue
1297
+ tid = str(r[0]["id"])
1298
+ mrows = self._rows(
1299
+ "MATCH (t:Symbol {id: $tid})-[:DECLARES]->(m:Symbol) RETURN m.id AS id",
1300
+ {"tid": tid},
1301
+ )
1302
+ seed_mids.extend(str(x["id"]) for x in mrows if x.get("id"))
1303
+ seed_mids = list(dict.fromkeys(seed_mids))
1304
+ if not seed_mids:
1305
+ return []
1306
+ frontier_conf: dict[str, float] = {mid: 1.0 for mid in seed_mids}
1307
+ type_best: dict[str, float] = {}
1308
+ ordered_types: list[str] = []
1309
+ seen_order: set[str] = set()
1310
+ for _ in range(int(depth)):
1311
+ if not frontier_conf:
1312
+ break
1313
+ ids = list(frontier_conf.keys())
1314
+ rows = self._rows(
1315
+ "MATCH (m:Symbol)-[c:CALLS]->(n:Symbol) WHERE m.id IN $ids AND c.confidence >= $mc "
1316
+ "RETURN m.id AS mid, n.id AS nid, c.confidence AS conf",
1317
+ {"ids": ids, "mc": float(min_confidence)},
1318
+ )
1319
+ next_conf: dict[str, float] = {}
1320
+ for r in rows:
1321
+ mid = str(r.get("mid") or "")
1322
+ nid = str(r.get("nid") or "")
1323
+ if not mid or not nid:
1324
+ continue
1325
+ raw_conf = r.get("conf")
1326
+ try:
1327
+ ec = float(raw_conf) if raw_conf is not None else 0.0
1328
+ except (TypeError, ValueError):
1329
+ ec = 0.0
1330
+ parent = frontier_conf.get(mid)
1331
+ if parent is None:
1332
+ continue
1333
+ new_c = min(parent, ec)
1334
+ next_conf[nid] = max(next_conf.get(nid, 0.0), new_c)
1335
+
1336
+ if not next_conf:
1337
+ break
1338
+
1339
+ for nid, path_c in next_conf.items():
1340
+ srows = self._rows(
1341
+ "MATCH (s:Symbol {id: $id}) RETURN s.fqn AS fqn LIMIT 1",
1342
+ {"id": nid},
1343
+ )
1344
+ if not srows:
1345
+ continue
1346
+ mfqn = str(srows[0].get("fqn") or "")
1347
+ if "#" not in mfqn:
1348
+ continue
1349
+ tpart = mfqn.split("#", 1)[0]
1350
+ if not tpart:
1351
+ continue
1352
+ is_ext = _is_external_fqn(tpart)
1353
+ if exclude_external and is_ext:
1354
+ pass
1355
+ else:
1356
+ type_best[tpart] = max(type_best.get(tpart, 0.0), path_c)
1357
+ if tpart not in seen_order:
1358
+ seen_order.add(tpart)
1359
+ ordered_types.append(tpart)
1360
+ if len(ordered_types) >= limit:
1361
+ return [(t, type_best[t]) for t in ordered_types[:limit]]
1362
+
1363
+ frontier_conf = next_conf
1364
+
1365
+ return [(t, type_best[t]) for t in ordered_types[:limit]]
1366
+
1367
+ def neighbors(self, fqn_or_name: str, *, depth: int = 1,
1368
+ edge_types: list[str] | None = None,
1369
+ direction: str = "both", limit: int = 200) -> list[SymbolHit]:
1370
+ """BFS over `edge_types` up to `depth` hops. `direction` in {out, in, both}."""
1371
+ if depth < 1:
1372
+ return []
1373
+ edges = edge_types or ["EXTENDS", "IMPLEMENTS", "INJECTS", "DECLARES", "CALLS"]
1374
+ edge_pattern = "|".join(edges)
1375
+ if direction == "out":
1376
+ arrow_l, arrow_r = "-", "->"
1377
+ elif direction == "in":
1378
+ arrow_l, arrow_r = "<-", "-"
1379
+ else:
1380
+ arrow_l, arrow_r = "-", "-"
1381
+ q = (
1382
+ f"MATCH (root:Symbol) WHERE root.name = $needle OR root.fqn = $needle "
1383
+ f"MATCH path = (root){arrow_l}[:{edge_pattern}*1..{int(depth)}]{arrow_r}(n:Symbol) "
1384
+ f"RETURN DISTINCT {_symbol_return_for('n')} "
1385
+ f"LIMIT {int(limit)}"
1386
+ )
1387
+ return [_row_to_symbol(r) for r in self._rows(q, {"needle": fqn_or_name})]
1388
+
1389
+ def impact_analysis(self, fqn_or_name: str, *, depth: int = 2,
1390
+ limit: int = 300) -> list[SymbolHit]:
1391
+ """Reverse closure over INJECTS + IMPLEMENTS (who breaks if `fqn` changes)."""
1392
+ q = (
1393
+ f"MATCH (target:Symbol) WHERE target.name = $needle OR target.fqn = $needle "
1394
+ f"MATCH (n:Symbol)-[:INJECTS|IMPLEMENTS|EXTENDS*1..{int(depth)}]->(target) "
1395
+ f"RETURN DISTINCT {_symbol_return_for('n')} "
1396
+ f"LIMIT {int(limit)}"
1397
+ )
1398
+ return [_row_to_symbol(r) for r in self._rows(q, {"needle": fqn_or_name})]
1399
+
1400
+ # ---- flow tracing (entrypoint -> service -> integration / repository) ----
1401
+
1402
+ # Default ordered waterfall of role stages. Each stage collects neighbors of
1403
+ # the previous stage whose role matches the allow-list. Phantom / unresolved
1404
+ # symbols are excluded so we don't propagate noise across the boundary.
1405
+ _FLOW_STAGES: tuple[tuple[str, ...], ...] = (
1406
+ ("CONTROLLER",),
1407
+ ("SERVICE", "COMPONENT"),
1408
+ ("CLIENT", "REPOSITORY", "MAPPER"),
1409
+ )
1410
+
1411
+ # Stage-0 accepts any entrypoint-like role. COMPONENT is included because
1412
+ # Kafka listeners / @Scheduled orchestrators are frequently plain
1413
+ # @Component, not @Controller; SERVICE is included so we don't drop
1414
+ # orchestrator seeds when the caller already narrowed the vector search
1415
+ # to services.
1416
+ _ENTRYPOINT_ROLES: tuple[str, ...] = (
1417
+ "CONTROLLER", "COMPONENT", "SERVICE", "CLIENT",
1418
+ )
1419
+
1420
+ def trace_flow(self, seed_fqns: list[str], *,
1421
+ module: str | None = None,
1422
+ microservice: str | None = None,
1423
+ depth: int = 2, stage_limit: int = 20,
1424
+ follow_calls: bool = True,
1425
+ min_call_confidence: float = 0.0,
1426
+ exclude_external: bool = True) -> list[list[StageSymbol]]:
1427
+ """Walk stages `CONTROLLER -> SERVICE/COMPONENT -> CLIENT/REPOSITORY/MAPPER`.
1428
+
1429
+ Returns a list of stages; each stage is a list of SymbolHit. The first
1430
+ stage is the seed set (entrypoints matched by FQN, filtered to
1431
+ orchestrator-like roles — see `_ENTRYPOINT_ROLES`). If role-filtered
1432
+ seeds come back empty we fall back to unfiltered seeds so a caller
1433
+ with no CONTROLLER coverage still gets *something* back.
1434
+ Each subsequent stage is the neighbor-set (INJECTS+EXTENDS+IMPLEMENTS,
1435
+ optionally merged with type-to-type paths through DECLARES+CALLS when
1436
+ `follow_calls` is true) of the previous stage, restricted to the
1437
+ stage's role allow-list.
1438
+
1439
+ Defaults: ``depth=2`` (clamped to 1..3), ``follow_calls=True``,
1440
+ ``min_call_confidence=0.0``, ``exclude_external=True``. The latter only
1441
+ filters symbols reached via the DECLARES+CALLS hop: discovered **type**
1442
+ symbols matching external FQN prefixes (same list as ``expand_methods`` /
1443
+ the callee side of ``find_callees``), not the seed frontier. INJECTS /
1444
+ EXTENDS / IMPLEMENTS hops ignore ``exclude_external``.
1445
+
1446
+ ``depth`` is the neighbour hop count per stage (not total trace depth).
1447
+ """
1448
+ if not seed_fqns:
1449
+ return []
1450
+ depth = max(1, min(3, int(depth)))
1451
+
1452
+ stages: list[list[StageSymbol]] = []
1453
+ visited_fqns: set[str] = set()
1454
+
1455
+ def _run_seed_query(entry_roles: tuple[str, ...] | None) -> list[SymbolHit]:
1456
+ filters = ["s.fqn IN $fqns"]
1457
+ params: dict[str, Any] = {"fqns": list(seed_fqns)}
1458
+ filters.extend(_scope_filters(
1459
+ "s", module=module, microservice=microservice, params=params,
1460
+ ))
1461
+ if entry_roles:
1462
+ params["entry_roles"] = list(entry_roles)
1463
+ # Ladybug 0.17.x does not support parameterized lists inside ANY
1464
+ # comprehensions, so we expand the fixed capability set as
1465
+ # individual list_contains predicates ORed together.
1466
+ cap_predicates = " OR ".join(
1467
+ f"list_contains(s.capabilities, '{c}')"
1468
+ for c in ("MESSAGE_LISTENER", "SCHEDULED_TASK")
1469
+ )
1470
+ filters.append(
1471
+ f"(s.role IN $entry_roles OR {cap_predicates})"
1472
+ )
1473
+ where = " AND ".join(filters)
1474
+ q0 = (
1475
+ f"MATCH (s:Symbol) WHERE {where} "
1476
+ f"RETURN {_SYMBOL_RETURN} LIMIT {int(stage_limit)}"
1477
+ )
1478
+ return [_row_to_symbol(r) for r in self._rows(q0, params)]
1479
+
1480
+ seed_rows = _run_seed_query(self._ENTRYPOINT_ROLES)
1481
+ if not seed_rows:
1482
+ seed_rows = _run_seed_query(None)
1483
+ if not seed_rows:
1484
+ return []
1485
+ stages.append([StageSymbol(symbol=r, via=[]) for r in seed_rows])
1486
+ for h in seed_rows:
1487
+ if h.fqn:
1488
+ visited_fqns.add(h.fqn)
1489
+
1490
+ frontier_fqns: list[str] = [h.fqn for h in seed_rows if h.fqn]
1491
+ for stage_roles in self._FLOW_STAGES[1:]:
1492
+ if not frontier_fqns:
1493
+ break
1494
+
1495
+ # Single-hop BFS repeated up to `depth` times. Each iteration
1496
+ # knows which edge type and parent node produced a newly-
1497
+ # discovered symbol, so we can label every stage entry.
1498
+ stage_results: dict[str, StageSymbol] = {}
1499
+ current_frontier = list(frontier_fqns)
1500
+
1501
+ for hop in range(1, depth + 1):
1502
+ if not current_frontier:
1503
+ break
1504
+ params: dict[str, Any] = {
1505
+ "fqns": current_frontier,
1506
+ "roles": list(stage_roles),
1507
+ }
1508
+ scope = _scope_filters(
1509
+ "n", module=module, microservice=microservice, params=params,
1510
+ )
1511
+ scope_clause = (" AND " + " AND ".join(scope)) if scope else ""
1512
+ q = (
1513
+ f"MATCH (root:Symbol)-[e:INJECTS|EXTENDS|IMPLEMENTS]-(n:Symbol) "
1514
+ f"WHERE root.fqn IN $fqns AND n.role IN $roles AND n.resolved{scope_clause} "
1515
+ f"RETURN {_symbol_return_for('n')}, "
1516
+ f"label(e) AS edge_type, root.fqn AS from_fqn "
1517
+ f"LIMIT {int(stage_limit) * 4}"
1518
+ )
1519
+ next_frontier: list[str] = []
1520
+ def _ingest_flow_row(
1521
+ row: dict[str, Any], *, filter_external_fqn: bool = False,
1522
+ ) -> None:
1523
+ sym = _row_to_symbol(row)
1524
+ if (
1525
+ filter_external_fqn
1526
+ and exclude_external
1527
+ and _is_external_fqn(sym.fqn)
1528
+ ):
1529
+ return
1530
+ if not sym.fqn or sym.fqn in visited_fqns:
1531
+ return
1532
+ edge = ViaEdge(
1533
+ edge_type=str(row.get("edge_type") or ""),
1534
+ from_fqn=str(row.get("from_fqn") or ""),
1535
+ hop=hop,
1536
+ caller_node_id=str(row.get("caller_client_id") or ""),
1537
+ )
1538
+ existing = stage_results.get(sym.fqn)
1539
+ if existing is None:
1540
+ stage_results[sym.fqn] = StageSymbol(symbol=sym, via=[edge])
1541
+ next_frontier.append(sym.fqn)
1542
+ else:
1543
+ if len(existing.via) < 4 and not any(
1544
+ v.edge_type == edge.edge_type and v.from_fqn == edge.from_fqn
1545
+ for v in existing.via
1546
+ ):
1547
+ existing.via.append(edge)
1548
+
1549
+ for row in self._rows(q, params):
1550
+ _ingest_flow_row(row)
1551
+ if len(stage_results) >= stage_limit:
1552
+ break
1553
+
1554
+ # Structural-first budget: same-microservice CALLS top up first,
1555
+ # then cross-service HTTP/ASYNC caller edges.
1556
+ if follow_calls and len(stage_results) < stage_limit:
1557
+ remaining = stage_limit - len(stage_results)
1558
+ params_cf: dict[str, Any] = {
1559
+ "fqns": current_frontier,
1560
+ "roles": list(stage_roles),
1561
+ "mc": float(min_call_confidence),
1562
+ }
1563
+ scope_cf = _scope_filters(
1564
+ "n", module=module, microservice=microservice, params=params_cf,
1565
+ )
1566
+ sccf = (" AND " + " AND ".join(scope_cf)) if scope_cf else ""
1567
+ qcf = (
1568
+ "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[c:CALLS]->(m2:Symbol)"
1569
+ "<-[:DECLARES]-(n:Symbol) WHERE root.fqn IN $fqns AND n.role IN $roles "
1570
+ "AND root.microservice = n.microservice "
1571
+ "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] "
1572
+ f"AND c.confidence >= $mc{sccf} "
1573
+ f"RETURN {_symbol_return_for('n')}, 'CALLS' AS edge_type, root.fqn AS from_fqn "
1574
+ f"LIMIT {max(1, remaining * 4)}"
1575
+ )
1576
+ for row in self._rows(qcf, params_cf):
1577
+ _ingest_flow_row(row, filter_external_fqn=True)
1578
+ if len(stage_results) >= stage_limit:
1579
+ break
1580
+ if follow_calls and len(stage_results) < stage_limit:
1581
+ remaining = stage_limit - len(stage_results)
1582
+ params_rf: dict[str, Any] = {
1583
+ "fqns": current_frontier,
1584
+ "roles": list(stage_roles),
1585
+ "mc": float(min_call_confidence),
1586
+ }
1587
+ scope_rf = _scope_filters(
1588
+ "n", module=module, microservice=microservice, params=params_rf,
1589
+ )
1590
+ scrf = (" AND " + " AND ".join(scope_rf)) if scope_rf else ""
1591
+ qrf = (
1592
+ "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[:DECLARES_CLIENT]->(c:Client)"
1593
+ "-[e:HTTP_CALLS]->(rt:Route)<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) "
1594
+ "WHERE root.fqn IN $fqns AND n.role IN $roles "
1595
+ "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] "
1596
+ "AND e.confidence >= $mc AND root.microservice <> n.microservice "
1597
+ f"{scrf} "
1598
+ f"RETURN {_symbol_return_for('n')}, 'HTTP_CALLS' AS edge_type, "
1599
+ f"root.fqn AS from_fqn, c.id AS caller_client_id "
1600
+ f"LIMIT {max(1, remaining * 4)}"
1601
+ )
1602
+ for row in self._rows(qrf, params_rf):
1603
+ _ingest_flow_row(row, filter_external_fqn=True)
1604
+ if len(stage_results) >= stage_limit:
1605
+ break
1606
+ if len(stage_results) < stage_limit:
1607
+ remaining = stage_limit - len(stage_results)
1608
+ qrf_async = (
1609
+ "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[:DECLARES_PRODUCER]->(pr:Producer)"
1610
+ "-[e:ASYNC_CALLS]->(rt:Route)<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) "
1611
+ "WHERE root.fqn IN $fqns AND n.role IN $roles "
1612
+ "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] "
1613
+ "AND e.confidence >= $mc AND root.microservice <> n.microservice "
1614
+ f"{scrf} "
1615
+ f"RETURN {_symbol_return_for('n')}, 'ASYNC_CALLS' AS edge_type, "
1616
+ f"root.fqn AS from_fqn, pr.id AS caller_producer_id "
1617
+ f"LIMIT {max(1, remaining * 4)}"
1618
+ )
1619
+ for row in self._rows(qrf_async, params_rf):
1620
+ _ingest_flow_row(row, filter_external_fqn=True)
1621
+ if len(stage_results) >= stage_limit:
1622
+ break
1623
+
1624
+ current_frontier = next_frontier
1625
+ if len(stage_results) >= stage_limit:
1626
+ break
1627
+
1628
+ if not stage_results:
1629
+ break
1630
+ stage_list = list(stage_results.values())
1631
+ stages.append(stage_list)
1632
+ for entry in stage_list:
1633
+ visited_fqns.add(entry.symbol.fqn)
1634
+ frontier_fqns = [entry.symbol.fqn for entry in stage_list]
1635
+ return stages
1636
+
1637
+ # ---- routes (B2a) ----
1638
+
1639
+ _ROUTE_RETURN = (
1640
+ "r.id AS id, r.kind AS kind, r.framework AS framework, r.method AS method, "
1641
+ "r.path AS path, r.path_template AS path_template, r.path_regex AS path_regex, "
1642
+ "r.topic AS topic, r.broker AS broker, r.feign_name AS feign_name, r.feign_url AS feign_url, "
1643
+ "r.microservice AS microservice, r.module AS module, r.filename AS filename, "
1644
+ "r.start_line AS start_line, r.end_line AS end_line, r.resolved AS resolved"
1645
+ )
1646
+
1647
+ @staticmethod
1648
+ def _row_to_route_dict(row: dict[str, Any]) -> dict[str, Any]:
1649
+ return {
1650
+ "id": str(row.get("id") or ""),
1651
+ "kind": str(row.get("kind") or ""),
1652
+ "framework": str(row.get("framework") or ""),
1653
+ "method": str(row.get("method") or ""),
1654
+ "path": str(row.get("path") or ""),
1655
+ "path_template": str(row.get("path_template") or ""),
1656
+ "path_regex": str(row.get("path_regex") or ""),
1657
+ "topic": str(row.get("topic") or ""),
1658
+ "broker": str(row.get("broker") or ""),
1659
+ "feign_name": str(row.get("feign_name") or ""),
1660
+ "feign_url": str(row.get("feign_url") or ""),
1661
+ "microservice": str(row.get("microservice") or ""),
1662
+ "module": str(row.get("module") or ""),
1663
+ "filename": str(row.get("filename") or ""),
1664
+ "start_line": int(row.get("start_line") or 0),
1665
+ "end_line": int(row.get("end_line") or 0),
1666
+ "resolved": bool(row.get("resolved", True)),
1667
+ }
1668
+
1669
+ def list_routes(
1670
+ self,
1671
+ *,
1672
+ microservice: str | None = None,
1673
+ framework: str | None = None,
1674
+ path_prefix: str | None = None,
1675
+ method: str | None = None,
1676
+ limit: int = 100,
1677
+ ) -> list[dict[str, Any]]:
1678
+ lim = max(1, min(int(limit), 500))
1679
+ params: dict[str, Any] = {"lim": lim}
1680
+ preds: list[str] = []
1681
+ if microservice:
1682
+ params["microservice"] = microservice
1683
+ preds.append("r.microservice = $microservice")
1684
+ if framework:
1685
+ params["framework"] = framework
1686
+ preds.append("r.framework = $framework")
1687
+ if path_prefix:
1688
+ params["path_prefix"] = path_prefix
1689
+ preds.append("r.path STARTS WITH $path_prefix")
1690
+ if method is not None and method != "":
1691
+ params["method"] = method
1692
+ preds.append("r.method = $method")
1693
+ where = (" WHERE " + " AND ".join(preds)) if preds else ""
1694
+ q = (
1695
+ f"MATCH (r:Route){where} RETURN {self._ROUTE_RETURN} "
1696
+ f"ORDER BY r.framework, r.path, r.id LIMIT $lim"
1697
+ )
1698
+ return [self._row_to_route_dict(r) for r in self._rows(q, params)]
1699
+
1700
+ def find_route_handlers(self, *, route_id: str) -> list[dict[str, Any]]:
1701
+ s_proj = ", ".join(f"s.{c} AS s_{c}" for c in _SYM_COLS)
1702
+ q = (
1703
+ f"MATCH (s:Symbol)-[e:EXPOSES]->(r:Route) WHERE r.id = $rid "
1704
+ f"RETURN {s_proj}, e.confidence AS confidence, e.strategy AS strategy "
1705
+ f"ORDER BY s.fqn"
1706
+ )
1707
+ out: list[dict[str, Any]] = []
1708
+ for r in self._rows(q, {"rid": route_id}):
1709
+ sym = _row_to_symbol({k[2:]: v for k, v in r.items() if k.startswith("s_")})
1710
+ out.append({
1711
+ "symbol": asdict(sym),
1712
+ "confidence": float(r.get("confidence") or 0.0),
1713
+ "strategy": str(r.get("strategy") or ""),
1714
+ })
1715
+ return out
1716
+
1717
+ def get_route_by_path(
1718
+ self,
1719
+ *,
1720
+ microservice: str,
1721
+ path_template: str,
1722
+ method: str = "",
1723
+ ) -> dict[str, Any] | None:
1724
+ params: dict[str, Any] = {"ms": microservice, "pt": path_template}
1725
+ meth_filter = ""
1726
+ if method != "":
1727
+ params["meth"] = method
1728
+ meth_filter = "AND r.method = $meth"
1729
+ q = (
1730
+ f"MATCH (r:Route) WHERE r.microservice = $ms AND r.path_template = $pt {meth_filter} "
1731
+ f"RETURN {self._ROUTE_RETURN} ORDER BY r.id LIMIT 1"
1732
+ )
1733
+ rows = self._rows(q, params)
1734
+ if not rows:
1735
+ return None
1736
+ return self._row_to_route_dict(rows[0])
1737
+
1738
+ def find_route_callers(
1739
+ self,
1740
+ route_id: str | None = None,
1741
+ *,
1742
+ microservice: str = "",
1743
+ path_template: str = "",
1744
+ method: str = "",
1745
+ ) -> list[RouteCaller]:
1746
+ """HTTP callers via Client; async callers via Producer (two-hop each)."""
1747
+ rid = route_id or ""
1748
+ if not rid:
1749
+ params: dict[str, Any] = {
1750
+ "microservice": microservice,
1751
+ "path_template": path_template,
1752
+ "method": method,
1753
+ }
1754
+ rows = self._rows(
1755
+ "MATCH (r:Route) "
1756
+ "WHERE r.microservice = $microservice AND r.path_template = $path_template AND r.method = $method "
1757
+ "RETURN r.id AS id LIMIT 1",
1758
+ params,
1759
+ )
1760
+ if not rows:
1761
+ return []
1762
+ rid = str(rows[0].get("id") or "")
1763
+ if not rid:
1764
+ return []
1765
+ http_rows = self._rows(
1766
+ "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route {id: $rid}) "
1767
+ "RETURN c.id AS caller_node_id, c.microservice AS caller_microservice, "
1768
+ "s.id AS declaring_symbol_id, e.confidence AS confidence, e.match AS match, "
1769
+ "c.target_service AS target_service, e.raw_uri AS raw_uri "
1770
+ "ORDER BY e.confidence DESC, c.id",
1771
+ {"rid": rid},
1772
+ )
1773
+ async_rows = self._rows(
1774
+ "MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(p:Producer)-[e:ASYNC_CALLS]->(r:Route {id: $rid}) "
1775
+ "RETURN p.id AS caller_node_id, p.microservice AS caller_microservice, "
1776
+ "s.id AS declaring_symbol_id, e.confidence AS confidence, e.match AS match, "
1777
+ "p.topic AS topic, p.broker AS broker "
1778
+ "ORDER BY e.confidence DESC, p.id",
1779
+ {"rid": rid},
1780
+ )
1781
+ out: list[RouteCaller] = []
1782
+ for row in http_rows:
1783
+ out.append(
1784
+ RouteCaller(
1785
+ caller_node_id=str(row.get("caller_node_id") or ""),
1786
+ caller_node_kind="client",
1787
+ caller_microservice=str(row.get("caller_microservice") or ""),
1788
+ declaring_symbol_id=str(row.get("declaring_symbol_id") or ""),
1789
+ confidence=float(row.get("confidence") or 0.0),
1790
+ match=str(row.get("match") or ""),
1791
+ target_service=str(row.get("target_service") or ""),
1792
+ raw_uri=str(row.get("raw_uri") or ""),
1793
+ ),
1794
+ )
1795
+ for row in async_rows:
1796
+ out.append(
1797
+ RouteCaller(
1798
+ caller_node_id=str(row.get("caller_node_id") or ""),
1799
+ caller_node_kind="producer",
1800
+ caller_microservice=str(row.get("caller_microservice") or ""),
1801
+ declaring_symbol_id=str(row.get("declaring_symbol_id") or ""),
1802
+ confidence=float(row.get("confidence") or 0.0),
1803
+ match=str(row.get("match") or ""),
1804
+ topic=str(row.get("topic") or ""),
1805
+ broker=str(row.get("broker") or ""),
1806
+ ),
1807
+ )
1808
+ return out
1809
+
1810
+ def trace_request_flow(self, entry_route_id: str, max_hops: int = 5) -> dict[str, Any]:
1811
+ """Inbound HTTP via Client; async inbound via Producer (two-hop each)."""
1812
+ hops = max(1, min(int(max_hops), 8))
1813
+ inbound_http = self._rows(
1814
+ f"MATCH (entry:Route {{id: $rid}})<-[e:HTTP_CALLS]-(caller:Client)"
1815
+ "<-[:DECLARES_CLIENT]-(decl:Symbol) "
1816
+ f"OPTIONAL MATCH (origin:Symbol)-[:CALLS*0..{hops}]->(decl) "
1817
+ "RETURN DISTINCT caller.id AS caller_node_id, 'client' AS caller_node_kind, "
1818
+ "decl.id AS declaring_symbol_id, decl.fqn AS declaring_symbol_fqn, "
1819
+ "caller.microservice AS microservice, e.confidence AS confidence, "
1820
+ "e.match AS match, origin.id AS origin_symbol_id, origin.fqn AS origin_fqn "
1821
+ "ORDER BY confidence DESC, caller_node_id",
1822
+ {"rid": entry_route_id},
1823
+ )
1824
+ inbound_async = self._rows(
1825
+ f"MATCH (entry:Route {{id: $rid}})<-[e:ASYNC_CALLS]-(caller:Producer)"
1826
+ "<-[:DECLARES_PRODUCER]-(decl:Symbol) "
1827
+ f"OPTIONAL MATCH (origin:Symbol)-[:CALLS*0..{hops}]->(decl) "
1828
+ "RETURN DISTINCT caller.id AS caller_node_id, 'producer' AS caller_node_kind, "
1829
+ "decl.id AS declaring_symbol_id, decl.fqn AS declaring_symbol_fqn, "
1830
+ "caller.microservice AS microservice, e.confidence AS confidence, "
1831
+ "e.match AS match, origin.id AS origin_symbol_id, origin.fqn AS origin_fqn "
1832
+ "ORDER BY confidence DESC, caller_node_id",
1833
+ {"rid": entry_route_id},
1834
+ )
1835
+ inbound = inbound_http + inbound_async
1836
+ outbound = self._rows(
1837
+ f"MATCH (handler:Symbol)-[:EXPOSES]->(entry:Route {{id: $rid}}) "
1838
+ f"OPTIONAL MATCH (handler)-[:CALLS*0..{hops}]->(next:Symbol) "
1839
+ "RETURN DISTINCT handler.id AS handler_symbol_id, handler.fqn AS handler_fqn, "
1840
+ "handler.microservice AS handler_microservice, "
1841
+ "next.id AS next_symbol_id, next.fqn AS next_fqn, next.microservice AS next_microservice "
1842
+ "ORDER BY handler_symbol_id, next_symbol_id",
1843
+ {"rid": entry_route_id},
1844
+ )
1845
+ return {
1846
+ "entry_route_id": entry_route_id,
1847
+ "max_hops": hops,
1848
+ "inbound": inbound,
1849
+ "outbound": outbound,
1850
+ }
1851
+
1852
+ # ---- outbound clients (LC3) ----
1853
+
1854
+ _CLIENT_RETURN = (
1855
+ "c.id AS id, c.client_kind AS client_kind, c.target_service AS target_service, "
1856
+ "c.method AS method, c.path AS path, c.path_template AS path_template, "
1857
+ "c.path_regex AS path_regex, c.member_fqn AS member_fqn, c.member_id AS member_id, "
1858
+ "c.microservice AS microservice, c.module AS module, c.filename AS filename, "
1859
+ "c.start_line AS start_line, c.end_line AS end_line, c.resolved AS resolved, "
1860
+ "c.source_layer AS source_layer"
1861
+ )
1862
+
1863
+ @staticmethod
1864
+ def _row_to_client_dict(row: dict[str, Any]) -> dict[str, Any]:
1865
+ return {
1866
+ "id": str(row.get("id") or ""),
1867
+ "client_kind": str(row.get("client_kind") or ""),
1868
+ "target_service": str(row.get("target_service") or ""),
1869
+ "method": str(row.get("method") or ""),
1870
+ "path": str(row.get("path") or ""),
1871
+ "path_template": str(row.get("path_template") or ""),
1872
+ "path_regex": str(row.get("path_regex") or ""),
1873
+ "member_fqn": str(row.get("member_fqn") or ""),
1874
+ "member_id": str(row.get("member_id") or ""),
1875
+ "microservice": str(row.get("microservice") or ""),
1876
+ "module": str(row.get("module") or ""),
1877
+ "filename": str(row.get("filename") or ""),
1878
+ "start_line": int(row.get("start_line") or 0),
1879
+ "end_line": int(row.get("end_line") or 0),
1880
+ "resolved": bool(row.get("resolved", True)),
1881
+ "source_layer": str(row.get("source_layer") or "builtin"),
1882
+ }
1883
+
1884
+ def list_clients(
1885
+ self,
1886
+ *,
1887
+ microservice: str | None = None,
1888
+ client_kind: str | None = None,
1889
+ target_service: str | None = None,
1890
+ path_prefix: str | None = None,
1891
+ method: str | None = None,
1892
+ limit: int = 100,
1893
+ ) -> list[dict[str, Any]]:
1894
+ lim = max(1, min(int(limit), 500))
1895
+ params: dict[str, Any] = {"lim": lim}
1896
+ preds: list[str] = []
1897
+ if microservice:
1898
+ params["microservice"] = microservice
1899
+ preds.append("c.microservice = $microservice")
1900
+ if client_kind:
1901
+ params["client_kind"] = client_kind
1902
+ preds.append("c.client_kind = $client_kind")
1903
+ if target_service:
1904
+ params["target_service"] = target_service
1905
+ preds.append("c.target_service = $target_service")
1906
+ if path_prefix:
1907
+ params["path_prefix"] = path_prefix
1908
+ preds.append("c.path STARTS WITH $path_prefix")
1909
+ if method is not None and method != "":
1910
+ params["method"] = method
1911
+ preds.append("c.method = $method")
1912
+ where = (" WHERE " + " AND ".join(preds)) if preds else ""
1913
+ q = (
1914
+ f"MATCH (c:Client){where} RETURN {self._CLIENT_RETURN} "
1915
+ f"ORDER BY c.microservice, c.client_kind, c.path, c.method, c.id LIMIT $lim"
1916
+ )
1917
+ return [self._row_to_client_dict(r) for r in self._rows(q, params)]
1918
+
1919
+ _PRODUCER_RETURN = (
1920
+ "p.id AS id, p.producer_kind AS producer_kind, p.topic AS topic, p.broker AS broker, "
1921
+ "p.direction AS direction, p.member_fqn AS member_fqn, p.member_id AS member_id, "
1922
+ "p.microservice AS microservice, p.module AS module, p.filename AS filename, "
1923
+ "p.start_line AS start_line, p.end_line AS end_line, p.resolved AS resolved, "
1924
+ "p.source_layer AS source_layer"
1925
+ )
1926
+
1927
+ @staticmethod
1928
+ def _row_to_producer_dict(row: dict[str, Any]) -> dict[str, Any]:
1929
+ return {
1930
+ "id": str(row.get("id") or ""),
1931
+ "producer_kind": str(row.get("producer_kind") or ""),
1932
+ "topic": str(row.get("topic") or ""),
1933
+ "broker": str(row.get("broker") or ""),
1934
+ "direction": str(row.get("direction") or ""),
1935
+ "member_fqn": str(row.get("member_fqn") or ""),
1936
+ "member_id": str(row.get("member_id") or ""),
1937
+ "microservice": str(row.get("microservice") or ""),
1938
+ "module": str(row.get("module") or ""),
1939
+ "filename": str(row.get("filename") or ""),
1940
+ "start_line": int(row.get("start_line") or 0),
1941
+ "end_line": int(row.get("end_line") or 0),
1942
+ "resolved": bool(row.get("resolved", True)),
1943
+ "source_layer": str(row.get("source_layer") or "builtin"),
1944
+ }
1945
+
1946
+ def list_producers(
1947
+ self,
1948
+ *,
1949
+ microservice: str | None = None,
1950
+ producer_kind: str | None = None,
1951
+ topic_prefix: str | None = None,
1952
+ limit: int = 100,
1953
+ ) -> list[dict[str, Any]]:
1954
+ lim = max(1, min(int(limit), 500))
1955
+ params: dict[str, Any] = {"lim": lim}
1956
+ preds: list[str] = []
1957
+ if microservice:
1958
+ params["microservice"] = microservice
1959
+ preds.append("p.microservice = $microservice")
1960
+ if producer_kind:
1961
+ params["producer_kind"] = producer_kind
1962
+ preds.append("p.producer_kind = $producer_kind")
1963
+ if topic_prefix:
1964
+ params["topic_prefix"] = topic_prefix
1965
+ preds.append("p.topic STARTS WITH $topic_prefix")
1966
+ where = (" WHERE " + " AND ".join(preds)) if preds else ""
1967
+ q = (
1968
+ f"MATCH (p:Producer){where} RETURN {self._PRODUCER_RETURN} "
1969
+ f"ORDER BY p.microservice, p.producer_kind, p.topic, p.id LIMIT $lim"
1970
+ )
1971
+ return [self._row_to_producer_dict(r) for r in self._rows(q, params)]
1972
+
1973
+ # ---- used by search_lancedb.graph_expand ----
1974
+
1975
+ def expand_fqns(self, fqns: list[str], *, depth: int = 1,
1976
+ edge_types: list[str] | None = None,
1977
+ direction: str = "both", limit: int = 200) -> list[str]:
1978
+ """Return neighbor FQNs (types only) for a batch of starting FQNs."""
1979
+ if not fqns or depth < 1:
1980
+ return []
1981
+ edges = edge_types or ["EXTENDS", "IMPLEMENTS", "INJECTS"]
1982
+ edge_pattern = "|".join(edges)
1983
+ if direction == "out":
1984
+ arrow_l, arrow_r = "-", "->"
1985
+ elif direction == "in":
1986
+ arrow_l, arrow_r = "<-", "-"
1987
+ else:
1988
+ arrow_l, arrow_r = "-", "-"
1989
+ q = (
1990
+ f"MATCH (root:Symbol) WHERE root.fqn IN $fqns "
1991
+ f"MATCH (root){arrow_l}[:{edge_pattern}*1..{int(depth)}]{arrow_r}(n:Symbol) "
1992
+ f"WHERE n.kind IN ['class','interface','enum','record','annotation'] AND n.resolved "
1993
+ f"RETURN DISTINCT n.fqn AS fqn LIMIT {int(limit)}"
1994
+ )
1995
+ return [r["fqn"] for r in self._rows(q, {"fqns": fqns}) if r.get("fqn")]