java-codebase-rag 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kuzu_queries.py ADDED
@@ -0,0 +1,1989 @@
1
+ """Read-only Cypher helpers over the Kuzu AST graph built by `build_ast_graph.py`.
2
+
3
+ Each function opens a Kuzu connection on demand and returns plain JSON-ish dicts
4
+ so the MCP server can serialize them without further mapping.
5
+
6
+ The Kuzu database is opened read-only and cached per-process. This module is
7
+ intentionally dependency-light: nothing here imports LanceDB or sentence-transformers.
8
+
9
+ Cypher pitfalls (see also ``AGENTS.md``): avoid ``label(e) IN $list`` in ``WHERE`` for
10
+ relationship-type filters; use OR of ``label(e) = $param`` with bound parameters.
11
+ Typed unions ``-[e:A|B]-`` require every ``RETURN`` column on ``e`` to exist on all
12
+ listed rel types, or the binder may fail.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ import os
19
+ import threading
20
+ from dataclasses import asdict, dataclass
21
+ from pathlib import Path
22
+ from typing import Any, Literal
23
+
24
+ import kuzu
25
+
26
+ from ast_java import ONTOLOGY_VERSION as _ONTOLOGY_VERSION
27
+
28
+ log = logging.getLogger(__name__)
29
+
30
+ # Composed describe / neighbors dot-keys (not stored graph edge labels).
31
+ _MEMBER_EDGE_COMPOSED_REL_MAP: tuple[tuple[str, str], ...] = (
32
+ ("DECLARES.DECLARES_CLIENT", "DECLARES_CLIENT"),
33
+ ("DECLARES.DECLARES_PRODUCER", "DECLARES_PRODUCER"),
34
+ ("DECLARES.EXPOSES", "EXPOSES"),
35
+ )
36
+ _MEMBER_EDGE_COMPOSED_REL_BY_KEY: dict[str, str] = dict(_MEMBER_EDGE_COMPOSED_REL_MAP)
37
+
38
+ _OVERRIDE_AXIS_COMPOSED_REL_MAP: tuple[tuple[str, str | None], ...] = (
39
+ ("OVERRIDDEN_BY", None),
40
+ ("OVERRIDDEN_BY.DECLARES_CLIENT", "DECLARES_CLIENT"),
41
+ ("OVERRIDDEN_BY.DECLARES_PRODUCER", "DECLARES_PRODUCER"),
42
+ ("OVERRIDDEN_BY.EXPOSES", "EXPOSES"),
43
+ )
44
+ _OVERRIDE_AXIS_COMPOSED_REL_BY_KEY: dict[str, str | None] = dict(_OVERRIDE_AXIS_COMPOSED_REL_MAP)
45
+ OVERRIDE_AXIS_COMPOSED_EDGE_TYPES: frozenset[str] = frozenset(_OVERRIDE_AXIS_COMPOSED_REL_BY_KEY)
46
+
47
+
48
+ def _coerce_id_list(raw: Any) -> list[str]:
49
+ """Normalize Kuzu ``collect(DISTINCT ...)`` list results to string ids."""
50
+ if raw is None:
51
+ return []
52
+ if isinstance(raw, list):
53
+ return [str(x) for x in raw if x is not None and str(x) != ""]
54
+ s = str(raw)
55
+ return [s] if s else []
56
+
57
+
58
+ __all__ = [
59
+ "KuzuGraph",
60
+ "resolve_kuzu_path",
61
+ "SymbolHit",
62
+ "EdgeHit",
63
+ "CallEdge",
64
+ "ViaEdge",
65
+ "StageSymbol",
66
+ "RouteCaller",
67
+ "find_symbols_in_file_range",
68
+ ]
69
+
70
+
71
+ def resolve_kuzu_path(explicit: str | None = None) -> str:
72
+ """Resolve the Kuzu DB path the same way the builder does."""
73
+ if explicit:
74
+ return str(Path(explicit).expanduser())
75
+ idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
76
+ if idx and not idx.startswith(("s3://", "gs://", "az://")):
77
+ return str(Path(os.path.expanduser(idx.rstrip("/"))) / "code_graph.kuzu")
78
+ return str((Path.cwd() / ".java-codebase-rag" / "code_graph.kuzu").resolve())
79
+
80
+
81
+ @dataclass
82
+ class SymbolHit:
83
+ id: str
84
+ kind: str
85
+ name: str
86
+ fqn: str
87
+ package: str
88
+ module: str
89
+ microservice: str
90
+ filename: str
91
+ start_line: int
92
+ end_line: int
93
+ start_byte: int
94
+ end_byte: int
95
+ modifiers: list[str]
96
+ annotations: list[str]
97
+ capabilities: list[str]
98
+ role: str
99
+ signature: str
100
+ parent_id: str
101
+ resolved: bool
102
+
103
+
104
+ @dataclass
105
+ class EdgeHit:
106
+ type: str # EXTENDS | IMPLEMENTS | INJECTS
107
+ src: SymbolHit
108
+ dst: SymbolHit
109
+ mechanism: str = ""
110
+ annotation: str = ""
111
+ field_or_param: str = ""
112
+ resolved: bool = True
113
+
114
+
115
+ @dataclass
116
+ class CallEdge:
117
+ src: SymbolHit
118
+ dst: SymbolHit
119
+ confidence: float
120
+ strategy: str
121
+ source: str
122
+ call_site_line: int
123
+ call_site_byte: int
124
+ arg_count: int
125
+ resolved: bool
126
+
127
+
128
+ @dataclass
129
+ class ViaEdge:
130
+ """Labelled edge from a previous-stage node to a stage symbol.
131
+
132
+ Populated by `trace_flow` so callers can see *why* two types ended up
133
+ in the same chain (e.g. `INJECTS` vs `IMPLEMENTS` vs `CALLS`) and at what hop
134
+ from the frontier they were reached.
135
+ """
136
+ edge_type: str # INJECTS | EXTENDS | IMPLEMENTS | CALLS | HTTP_CALLS | ASYNC_CALLS
137
+ from_fqn: str
138
+ hop: int # 1 = direct neighbour of previous-stage frontier
139
+ caller_node_id: str = "" # Client id when edge_type is HTTP_CALLS (SCHEMA v2)
140
+
141
+
142
+ @dataclass
143
+ class StageSymbol:
144
+ """A trace_flow stage entry: the symbol plus the edges that pulled it in.
145
+
146
+ Stage 0 (seeds) has `via=[]`. Later stages list every first-time path
147
+ from the previous frontier to `symbol`.
148
+ """
149
+ symbol: SymbolHit
150
+ via: list[ViaEdge]
151
+
152
+
153
+ @dataclass
154
+ class RouteCaller:
155
+ caller_node_id: str
156
+ caller_node_kind: Literal["client", "producer"]
157
+ caller_microservice: str
158
+ declaring_symbol_id: str
159
+ confidence: float
160
+ match: str
161
+ target_service: str = ""
162
+ raw_uri: str = ""
163
+ topic: str = ""
164
+ broker: str = ""
165
+
166
+
167
+ def _symbol_return_for(alias: str) -> str:
168
+ """Kuzu RETURN projection for Symbol properties, using the given node alias.
169
+
170
+ Centralised so queries that bind Symbol under a non-`s` alias (e.g. `n` in
171
+ graph-expansion / flow-tracing) don't emit `s.*` references that Kuzu
172
+ rejects with `Variable s is not in scope`.
173
+ """
174
+ return (
175
+ f"{alias}.id AS id, {alias}.kind AS kind, {alias}.name AS name, {alias}.fqn AS fqn, "
176
+ f"{alias}.package AS package, {alias}.module AS module, "
177
+ f"{alias}.microservice AS microservice, {alias}.filename AS filename, "
178
+ f"{alias}.start_line AS start_line, {alias}.end_line AS end_line, "
179
+ f"{alias}.start_byte AS start_byte, {alias}.end_byte AS end_byte, "
180
+ f"{alias}.modifiers AS modifiers, {alias}.annotations AS annotations, "
181
+ f"{alias}.capabilities AS capabilities, "
182
+ f"{alias}.role AS role, {alias}.signature AS signature, "
183
+ f"{alias}.parent_id AS parent_id, {alias}.resolved AS resolved"
184
+ )
185
+
186
+
187
+ _SYMBOL_RETURN = _symbol_return_for("s")
188
+
189
+
190
+ def _scope_filters(
191
+ alias: str,
192
+ *,
193
+ module: str | None,
194
+ microservice: str | None,
195
+ params: dict[str, Any],
196
+ ) -> list[str]:
197
+ """Build module/microservice scoping predicates against a node alias.
198
+
199
+ Mutates `params` to bind `$module` / `$microservice` only when the
200
+ corresponding filter is set, so unused names don't leak into the
201
+ Kuzu plan.
202
+ """
203
+ out: list[str] = []
204
+ if module:
205
+ params["module"] = module
206
+ out.append(f"{alias}.module = $module")
207
+ if microservice:
208
+ params["microservice"] = microservice
209
+ out.append(f"{alias}.microservice = $microservice")
210
+ return out
211
+
212
+
213
+ _EXTERNAL_PREFIXES = (
214
+ "java.",
215
+ "javax.",
216
+ "jakarta.",
217
+ "org.springframework.",
218
+ "lombok.",
219
+ )
220
+
221
+ _EDGE_TYPES: tuple[str, ...] = (
222
+ "EXTENDS",
223
+ "IMPLEMENTS",
224
+ "INJECTS",
225
+ "OVERRIDES",
226
+ "DECLARES",
227
+ "CALLS",
228
+ "EXPOSES",
229
+ "DECLARES_CLIENT",
230
+ "DECLARES_PRODUCER",
231
+ "HTTP_CALLS",
232
+ "ASYNC_CALLS",
233
+ )
234
+
235
+
236
+ def _type_part_fqn(sym_fqn: str) -> str:
237
+ return sym_fqn.split("#", 1)[0]
238
+
239
+
240
+ def _is_external_fqn(fqn: str) -> bool:
241
+ base = _type_part_fqn(fqn)
242
+ return any(base.startswith(p) for p in _EXTERNAL_PREFIXES)
243
+
244
+
245
+ def _row_to_symbol(row: dict[str, Any]) -> SymbolHit:
246
+ return SymbolHit(
247
+ id=row.get("id", "") or "",
248
+ kind=row.get("kind", "") or "",
249
+ name=row.get("name", "") or "",
250
+ fqn=row.get("fqn", "") or "",
251
+ package=row.get("package", "") or "",
252
+ module=row.get("module", "") or "",
253
+ microservice=row.get("microservice", "") or "",
254
+ filename=row.get("filename", "") or "",
255
+ start_line=int(row.get("start_line") or 0),
256
+ end_line=int(row.get("end_line") or 0),
257
+ start_byte=int(row.get("start_byte") or 0),
258
+ end_byte=int(row.get("end_byte") or 0),
259
+ modifiers=list(row.get("modifiers") or []),
260
+ annotations=list(row.get("annotations") or []),
261
+ capabilities=list(row.get("capabilities") or []),
262
+ role=row.get("role", "") or "",
263
+ signature=row.get("signature", "") or "",
264
+ parent_id=row.get("parent_id", "") or "",
265
+ resolved=bool(row.get("resolved", True)),
266
+ )
267
+
268
+
269
+ _SYM_COLS = (
270
+ "id", "kind", "name", "fqn", "package", "module", "microservice",
271
+ "filename", "start_line", "end_line", "start_byte", "end_byte",
272
+ "modifiers", "annotations", "capabilities", "role", "signature", "parent_id", "resolved",
273
+ )
274
+
275
+
276
+ def find_symbols_in_file_range(
277
+ graph: "KuzuGraph",
278
+ *,
279
+ filename: str,
280
+ start_line: int,
281
+ end_line: int,
282
+ ) -> list[SymbolHit]:
283
+ """Return `Symbol` rows overlapping `[start_line, end_line]` in `filename` (1-based, inclusive)."""
284
+ if start_line < 1 or end_line < start_line:
285
+ return []
286
+ q = (
287
+ f"MATCH (s:Symbol) WHERE s.filename = $fn "
288
+ f"AND s.start_line <= $hmax AND s.end_line >= $hmin "
289
+ f"RETURN {_SYMBOL_RETURN} ORDER BY s.start_line, s.end_line"
290
+ )
291
+ params = {"fn": filename, "hmax": int(end_line), "hmin": int(start_line)}
292
+ return [_row_to_symbol(r) for r in graph._rows(q, params)]
293
+
294
+
295
+ def _prefixed_symbol_row(prefix: str, row: dict[str, Any]) -> dict[str, Any]:
296
+ p = f"{prefix}_"
297
+ return {k[len(p) :]: v for k, v in row.items() if k.startswith(p)}
298
+
299
+
300
+ def _row_to_call_edge(row: dict[str, Any]) -> CallEdge:
301
+ return CallEdge(
302
+ src=_row_to_symbol(_prefixed_symbol_row("caller", row)),
303
+ dst=_row_to_symbol(_prefixed_symbol_row("callee", row)),
304
+ confidence=float(row.get("confidence") or 0.0),
305
+ strategy=str(row.get("strategy") or ""),
306
+ source=str(row.get("source") or "static"),
307
+ call_site_line=int(row.get("call_site_line") or 0),
308
+ call_site_byte=int(row.get("call_site_byte") or 0),
309
+ arg_count=int(row.get("arg_count") or 0),
310
+ resolved=bool(row.get("resolved", True)),
311
+ )
312
+
313
+
314
+ def _call_graph_needle_phantom_arity_alt(needle: str) -> str | None:
315
+ """Map ``Type#method(123)`` → ``Type#method(?)`` for phantom callee FQNs (D1)."""
316
+ if "#" not in needle:
317
+ return None
318
+ i = needle.rfind("(")
319
+ if i <= 0 or not needle.endswith(")"):
320
+ return None
321
+ inner = needle[i + 1 : -1]
322
+ if not inner.isdigit():
323
+ return None
324
+ return needle[:i] + "(?)"
325
+
326
+
327
+ class KuzuGraph:
328
+ """Thin wrapper around a read-only Kuzu connection.
329
+
330
+ Safe to share across threads: we hold a single `Connection`, guarded by a lock.
331
+ """
332
+
333
+ _lock = threading.Lock()
334
+ _instance: "KuzuGraph | None" = None
335
+ _instance_path: str | None = None
336
+
337
+ def __init__(self, db_path: str) -> None:
338
+ self.db_path = db_path
339
+ self._db = kuzu.Database(db_path, read_only=True)
340
+ self._conn = kuzu.Connection(self._db)
341
+ self._conn_lock = threading.Lock()
342
+
343
+ @classmethod
344
+ def get(cls, db_path: str | None = None) -> "KuzuGraph":
345
+ resolved = resolve_kuzu_path(db_path)
346
+ with cls._lock:
347
+ if cls._instance is None or cls._instance_path != resolved:
348
+ instance = cls(resolved)
349
+ meta = instance.meta()
350
+ graph_version = int(meta.get("ontology_version") or 0)
351
+ if "error" not in meta and graph_version < _ONTOLOGY_VERSION:
352
+ raise RuntimeError(
353
+ f"Graph ontology version {graph_version} is older than the "
354
+ f"required version {_ONTOLOGY_VERSION}. "
355
+ "Rebuild the graph: `python build_ast_graph.py --source-root <repo>`, "
356
+ "or run `java-codebase-rag reprocess --source-root <repo>` for a full "
357
+ "Lance+Kuzu re-index."
358
+ )
359
+ cls._instance = instance
360
+ cls._instance_path = resolved
361
+ return cls._instance
362
+
363
+ @classmethod
364
+ def exists(cls, db_path: str | None = None) -> bool:
365
+ resolved = resolve_kuzu_path(db_path)
366
+ p = Path(resolved)
367
+ if not p.exists():
368
+ return False
369
+ # Kuzu represents DB as a directory; allow file form too (single-file DBs).
370
+ return True
371
+
372
+ # ---- low-level ----
373
+
374
+ def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
375
+ with self._conn_lock:
376
+ r = self._conn.execute(query, params or {})
377
+ columns = r.get_column_names()
378
+ out: list[dict[str, Any]] = []
379
+ while r.has_next():
380
+ vals = r.get_next()
381
+ out.append(dict(zip(columns, vals)))
382
+ return out
383
+
384
+ # ---- meta ----
385
+
386
+ def meta(self) -> dict[str, Any]:
387
+ _META_PR_F1 = (
388
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
389
+ "m.built_at AS built_at, m.source_root AS source_root, "
390
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
391
+ "m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
392
+ "m.routes_by_framework AS routes_by_framework, "
393
+ "m.routes_resolved_pct AS routes_resolved_pct, "
394
+ "m.routes_from_brownfield_pct AS routes_from_brownfield_pct, "
395
+ "m.routes_by_layer AS routes_by_layer, "
396
+ "m.http_calls_total AS http_calls_total, m.async_calls_total AS async_calls_total, "
397
+ "m.http_calls_by_strategy AS http_calls_by_strategy, m.async_calls_by_strategy AS async_calls_by_strategy, "
398
+ "m.http_calls_resolved_pct AS http_calls_resolved_pct, m.async_calls_resolved_pct AS async_calls_resolved_pct, "
399
+ "m.http_clients_from_brownfield_pct AS http_clients_from_brownfield_pct, "
400
+ "m.async_producers_from_brownfield_pct AS async_producers_from_brownfield_pct, "
401
+ "m.http_calls_match_breakdown AS http_calls_match_breakdown, "
402
+ "m.async_calls_match_breakdown AS async_calls_match_breakdown, "
403
+ "m.cross_service_calls_total AS cross_service_calls_total, "
404
+ "m.pass3_skipped_cross_service AS pass3_skipped_cross_service, "
405
+ "m.pass4_exposes_suppressed_feign AS pass4_exposes_suppressed_feign, "
406
+ "m.cross_service_resolution AS cross_service_resolution"
407
+ )
408
+ _META_PR_E3 = (
409
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
410
+ "m.built_at AS built_at, m.source_root AS source_root, "
411
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
412
+ "m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
413
+ "m.routes_by_framework AS routes_by_framework, "
414
+ "m.routes_resolved_pct AS routes_resolved_pct, "
415
+ "m.routes_from_brownfield_pct AS routes_from_brownfield_pct, "
416
+ "m.routes_by_layer AS routes_by_layer, "
417
+ "m.http_calls_total AS http_calls_total, m.async_calls_total AS async_calls_total, "
418
+ "m.http_calls_by_strategy AS http_calls_by_strategy, m.async_calls_by_strategy AS async_calls_by_strategy, "
419
+ "m.http_calls_resolved_pct AS http_calls_resolved_pct, m.async_calls_resolved_pct AS async_calls_resolved_pct, "
420
+ "m.http_clients_from_brownfield_pct AS http_clients_from_brownfield_pct, "
421
+ "m.async_producers_from_brownfield_pct AS async_producers_from_brownfield_pct, "
422
+ "m.http_calls_match_breakdown AS http_calls_match_breakdown, "
423
+ "m.async_calls_match_breakdown AS async_calls_match_breakdown, "
424
+ "m.cross_service_calls_total AS cross_service_calls_total, "
425
+ "m.pass3_skipped_cross_service AS pass3_skipped_cross_service, "
426
+ "m.cross_service_resolution AS cross_service_resolution"
427
+ )
428
+ _META_PRE_E3 = (
429
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
430
+ "m.built_at AS built_at, m.source_root AS source_root, "
431
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
432
+ "m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
433
+ "m.routes_by_framework AS routes_by_framework, "
434
+ "m.routes_resolved_pct AS routes_resolved_pct, "
435
+ "m.routes_from_brownfield_pct AS routes_from_brownfield_pct, "
436
+ "m.routes_by_layer AS routes_by_layer, "
437
+ "m.http_calls_total AS http_calls_total, m.async_calls_total AS async_calls_total, "
438
+ "m.http_calls_by_strategy AS http_calls_by_strategy, m.async_calls_by_strategy AS async_calls_by_strategy, "
439
+ "m.http_calls_resolved_pct AS http_calls_resolved_pct, m.async_calls_resolved_pct AS async_calls_resolved_pct, "
440
+ "m.http_clients_from_brownfield_pct AS http_clients_from_brownfield_pct, "
441
+ "m.async_producers_from_brownfield_pct AS async_producers_from_brownfield_pct, "
442
+ "m.http_calls_match_breakdown AS http_calls_match_breakdown, "
443
+ "m.async_calls_match_breakdown AS async_calls_match_breakdown, "
444
+ "m.cross_service_calls_total AS cross_service_calls_total"
445
+ )
446
+ _META_PR_A2 = (
447
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
448
+ "m.built_at AS built_at, m.source_root AS source_root, "
449
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
450
+ "m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
451
+ "m.routes_by_framework AS routes_by_framework, "
452
+ "m.routes_resolved_pct AS routes_resolved_pct"
453
+ )
454
+ _META_LEGACY = (
455
+ "MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
456
+ "m.built_at AS built_at, m.source_root AS source_root, "
457
+ "m.counts_json AS counts_json, m.parse_errors AS parse_errors"
458
+ )
459
+ rows: list[dict[str, Any]]
460
+ meta_mode = "pr_f1"
461
+ try:
462
+ rows = self._rows(_META_PR_F1)
463
+ except Exception:
464
+ meta_mode = "pr_e3"
465
+ try:
466
+ rows = self._rows(_META_PR_E3)
467
+ except Exception:
468
+ meta_mode = "pre_e3"
469
+ try:
470
+ rows = self._rows(_META_PRE_E3)
471
+ except Exception:
472
+ meta_mode = "pr_a2"
473
+ try:
474
+ rows = self._rows(_META_PR_A2)
475
+ except Exception:
476
+ meta_mode = "legacy"
477
+ try:
478
+ rows = self._rows(_META_LEGACY)
479
+ except Exception as e:
480
+ return {"error": f"{e}"}
481
+ if not rows:
482
+ return {"error": "no GraphMeta node"}
483
+ row = rows[0]
484
+ counts: dict[str, Any]
485
+ try:
486
+ counts = json.loads(row.get("counts_json") or "{}")
487
+ except Exception:
488
+ counts = {}
489
+ routes_total = exposes_total = 0
490
+ routes_resolved_pct = 0.0
491
+ routes_by_framework: dict[str, Any] = {}
492
+ routes_from_brownfield_pct = 0.0
493
+ routes_by_layer: dict[str, Any] = {}
494
+ http_calls_total = 0
495
+ async_calls_total = 0
496
+ http_calls_by_strategy: dict[str, Any] = {}
497
+ async_calls_by_strategy: dict[str, Any] = {}
498
+ http_calls_resolved_pct = 0.0
499
+ async_calls_resolved_pct = 0.0
500
+ http_clients_from_brownfield_pct = 0.0
501
+ async_producers_from_brownfield_pct = 0.0
502
+ http_calls_match_breakdown: dict[str, Any] = {}
503
+ async_calls_match_breakdown: dict[str, Any] = {}
504
+ cross_service_calls_total = 0
505
+ pass3_skipped_cross_service = 0
506
+ pass4_exposes_suppressed_feign: int | None = None
507
+ cross_service_resolution: str | None = None
508
+ if meta_mode != "legacy":
509
+ rfw_raw = row.get("routes_by_framework") or "{}"
510
+ try:
511
+ routes_by_framework = json.loads(rfw_raw) if isinstance(rfw_raw, str) else (rfw_raw or {})
512
+ except Exception:
513
+ routes_by_framework = {}
514
+ if not isinstance(routes_by_framework, dict):
515
+ routes_by_framework = {}
516
+ routes_total = int(row.get("routes_total") or 0)
517
+ exposes_total = int(row.get("exposes_total") or 0)
518
+ routes_resolved_pct = float(row.get("routes_resolved_pct") or 0.0)
519
+ if meta_mode in ("pr_f1", "pr_e3", "pre_e3"):
520
+ routes_from_brownfield_pct = float(row.get("routes_from_brownfield_pct") or 0.0)
521
+ rbl_raw = row.get("routes_by_layer") or "{}"
522
+ try:
523
+ routes_by_layer = json.loads(rbl_raw) if isinstance(rbl_raw, str) else (rbl_raw or {})
524
+ except Exception:
525
+ routes_by_layer = {}
526
+ if not isinstance(routes_by_layer, dict):
527
+ routes_by_layer = {}
528
+ http_calls_total = int(row.get("http_calls_total") or 0)
529
+ async_calls_total = int(row.get("async_calls_total") or 0)
530
+ hbs_raw = row.get("http_calls_by_strategy") or "{}"
531
+ abs_raw = row.get("async_calls_by_strategy") or "{}"
532
+ try:
533
+ http_calls_by_strategy = json.loads(hbs_raw) if isinstance(hbs_raw, str) else (hbs_raw or {})
534
+ except Exception:
535
+ http_calls_by_strategy = {}
536
+ if not isinstance(http_calls_by_strategy, dict):
537
+ http_calls_by_strategy = {}
538
+ try:
539
+ async_calls_by_strategy = json.loads(abs_raw) if isinstance(abs_raw, str) else (abs_raw or {})
540
+ except Exception:
541
+ async_calls_by_strategy = {}
542
+ if not isinstance(async_calls_by_strategy, dict):
543
+ async_calls_by_strategy = {}
544
+ http_calls_resolved_pct = float(row.get("http_calls_resolved_pct") or 0.0)
545
+ async_calls_resolved_pct = float(row.get("async_calls_resolved_pct") or 0.0)
546
+ http_clients_from_brownfield_pct = float(row.get("http_clients_from_brownfield_pct") or 0.0)
547
+ async_producers_from_brownfield_pct = float(row.get("async_producers_from_brownfield_pct") or 0.0)
548
+ hmb_raw = row.get("http_calls_match_breakdown") or "{}"
549
+ amb_raw = row.get("async_calls_match_breakdown") or "{}"
550
+ try:
551
+ http_calls_match_breakdown = json.loads(hmb_raw) if isinstance(hmb_raw, str) else (hmb_raw or {})
552
+ except Exception:
553
+ http_calls_match_breakdown = {}
554
+ if not isinstance(http_calls_match_breakdown, dict):
555
+ http_calls_match_breakdown = {}
556
+ try:
557
+ async_calls_match_breakdown = json.loads(amb_raw) if isinstance(amb_raw, str) else (amb_raw or {})
558
+ except Exception:
559
+ async_calls_match_breakdown = {}
560
+ if not isinstance(async_calls_match_breakdown, dict):
561
+ async_calls_match_breakdown = {}
562
+ cross_service_calls_total = int(row.get("cross_service_calls_total") or 0)
563
+ pass3_skipped_cross_service = int(row.get("pass3_skipped_cross_service") or 0)
564
+ if meta_mode == "pr_f1":
565
+ pass4_exposes_suppressed_feign = int(row.get("pass4_exposes_suppressed_feign") or 0)
566
+ raw_csr = row.get("cross_service_resolution")
567
+ cross_service_resolution = (
568
+ str(raw_csr) if raw_csr not in (None, "") else None
569
+ )
570
+ elif meta_mode == "pr_e3":
571
+ raw_csr = row.get("cross_service_resolution")
572
+ cross_service_resolution = (
573
+ str(raw_csr) if raw_csr not in (None, "") else None
574
+ )
575
+ edge_counts = {edge: 0 for edge in _EDGE_TYPES}
576
+ failed_edges: list[str] = []
577
+ for edge_type in _EDGE_TYPES:
578
+ try:
579
+ edge_rows = self._rows(
580
+ f"MATCH ()-[e:{edge_type}]->() RETURN count(e) AS n"
581
+ )
582
+ edge_counts[edge_type] = int(edge_rows[0].get("n") or 0) if edge_rows else 0
583
+ except Exception as exc:
584
+ failed_edges.append(edge_type)
585
+ log.warning("edge count query failed for %s: %s", edge_type, exc)
586
+ if len(failed_edges) == len(_EDGE_TYPES):
587
+ log.warning("edge count queries failed for all edge types; returning zeroed edge_counts")
588
+
589
+ return {
590
+ "ontology_version": int(row.get("ontology_version") or 0),
591
+ "built_at": int(row.get("built_at") or 0),
592
+ "source_root": row.get("source_root") or "",
593
+ "parse_errors": int(row.get("parse_errors") or 0),
594
+ "counts": counts,
595
+ "routes_total": routes_total,
596
+ "exposes_total": exposes_total,
597
+ "routes_by_framework": routes_by_framework,
598
+ "routes_resolved_pct": routes_resolved_pct,
599
+ "routes_from_brownfield_pct": routes_from_brownfield_pct,
600
+ "routes_by_layer": routes_by_layer,
601
+ "http_calls_total": http_calls_total,
602
+ "async_calls_total": async_calls_total,
603
+ "http_calls_by_strategy": http_calls_by_strategy,
604
+ "async_calls_by_strategy": async_calls_by_strategy,
605
+ "http_calls_resolved_pct": http_calls_resolved_pct,
606
+ "async_calls_resolved_pct": async_calls_resolved_pct,
607
+ "http_clients_from_brownfield_pct": http_clients_from_brownfield_pct,
608
+ "async_producers_from_brownfield_pct": async_producers_from_brownfield_pct,
609
+ "http_calls_match_breakdown": http_calls_match_breakdown,
610
+ "async_calls_match_breakdown": async_calls_match_breakdown,
611
+ "cross_service_calls_total": cross_service_calls_total,
612
+ "pass3_skipped_cross_service": pass3_skipped_cross_service,
613
+ "pass4_exposes_suppressed_feign": pass4_exposes_suppressed_feign,
614
+ "cross_service_resolution": cross_service_resolution,
615
+ "edge_counts": edge_counts,
616
+ "db_path": self.db_path,
617
+ }
618
+
619
+ def edge_counts_for(self, node_id: str) -> dict[str, dict[str, int]]:
620
+ rows = self._rows(
621
+ "MATCH (n {id: $id})-[e]->() "
622
+ "RETURN label(e) AS edge_type, 'out' AS direction, count(e) AS n "
623
+ "UNION ALL "
624
+ "MATCH (n {id: $id})<-[e]-() "
625
+ "RETURN label(e) AS edge_type, 'in' AS direction, count(e) AS n",
626
+ {"id": node_id},
627
+ )
628
+ out: dict[str, dict[str, int]] = {}
629
+ for row in rows:
630
+ edge_type = str(row.get("edge_type") or "")
631
+ direction = str(row.get("direction") or "")
632
+ if edge_type == "" or direction not in ("in", "out"):
633
+ continue
634
+ out.setdefault(edge_type, {"in": 0, "out": 0})
635
+ out[edge_type][direction] = int(row.get("n") or 0)
636
+ return {
637
+ edge_type: dirs
638
+ for edge_type, dirs in out.items()
639
+ if int(dirs.get("in", 0)) > 0 or int(dirs.get("out", 0)) > 0
640
+ }
641
+
642
+ def member_edge_rollup_for(self, type_id: str) -> dict[str, dict[str, int]]:
643
+ """2-hop DECLARES member edge counts for a type Symbol (describe-time only).
644
+
645
+ Keys use dot notation and are not stored graph edge labels.
646
+ """
647
+ params = {"id": type_id}
648
+ rollup: dict[str, dict[str, int]] = {}
649
+ for key, rel in _MEMBER_EDGE_COMPOSED_REL_MAP:
650
+ rows = self._rows(
651
+ f"MATCH (t:Symbol {{id: $id}})-[:DECLARES]->(m:Symbol)-[e:{rel}]->() "
652
+ "RETURN count(e) AS n",
653
+ params,
654
+ )
655
+ n = sum(int(r.get("n") or 0) for r in rows) if rows else 0
656
+ if n > 0:
657
+ rollup[key] = {"in": 0, "out": n}
658
+ return rollup
659
+
660
+ def member_edge_traversal_for(self, type_id: str, composed_key: str) -> list[dict[str, Any]]:
661
+ """2-hop DECLARES member traversal for a type Symbol (neighbors dot-key path)."""
662
+ rel = _MEMBER_EDGE_COMPOSED_REL_BY_KEY.get(composed_key)
663
+ if rel is None:
664
+ return []
665
+ # Untyped [e] + label(e) filter: typed unions fail the binder when RETURN references
666
+ # columns that exist on only some rel types (same pattern as flat neighbors_v2).
667
+ return self._rows(
668
+ "MATCH (t:Symbol {id: $id})-[:DECLARES]->(m:Symbol)-[e]->(term) "
669
+ "WHERE label(e) = $rel "
670
+ "RETURN m.id AS via_id, label(e) AS stored_edge_type, "
671
+ "term.id AS other_id, e.confidence AS confidence, e.strategy AS strategy, "
672
+ "e.match AS match, e.mechanism AS mechanism, e.annotation AS annotation, "
673
+ "e.field_or_param AS field_or_param, e.source AS source, "
674
+ "e.call_site_line AS call_site_line, e.call_site_byte AS call_site_byte, "
675
+ "e.arg_count AS arg_count, e.resolved AS resolved",
676
+ {"id": type_id, "rel": rel},
677
+ )
678
+
679
+ def override_axis_traversal_for(self, method_id: str, composed_key: str) -> list[dict[str, Any]]:
680
+ """Override-axis composed traversal for a method Symbol (neighbors dot-key path).
681
+
682
+ Uses stored ``[:OVERRIDES]`` for the dispatch hop (aligned with ``override_axis_rollup_for``
683
+ overrider ids). Base key returns overrider method ids only; composed keys return terminal
684
+ rows with full edge attr projection plus ``via_id`` (overrider method id).
685
+ """
686
+ rel = _OVERRIDE_AXIS_COMPOSED_REL_BY_KEY.get(composed_key)
687
+ if rel is None and composed_key != "OVERRIDDEN_BY":
688
+ return []
689
+ if rel is None:
690
+ return self._rows(
691
+ "MATCH (decl:Symbol {id: $id})<-[:OVERRIDES]-(mover:Symbol) "
692
+ "RETURN mover.id AS other_id",
693
+ {"id": method_id},
694
+ )
695
+ return self._rows(
696
+ "MATCH (decl:Symbol {id: $id})<-[:OVERRIDES]-(mover:Symbol)-[e]->(term) "
697
+ "WHERE label(e) = $rel "
698
+ "RETURN mover.id AS via_id, label(e) AS stored_edge_type, "
699
+ "term.id AS other_id, e.confidence AS confidence, e.strategy AS strategy, "
700
+ "e.match AS match, e.mechanism AS mechanism, e.annotation AS annotation, "
701
+ "e.field_or_param AS field_or_param, e.source AS source, "
702
+ "e.call_site_line AS call_site_line, e.call_site_byte AS call_site_byte, "
703
+ "e.arg_count AS arg_count, e.resolved AS resolved",
704
+ {"id": method_id, "rel": rel},
705
+ )
706
+
707
+ def count_calls_for_symbol(self, origin_id: str, *, direction: Literal["in", "out"]) -> int:
708
+ """Count CALLS edges incident on a Symbol (hints / diagnostics)."""
709
+ if direction == "out":
710
+ pattern = "MATCH (origin:Symbol {id: $id})-[e:CALLS]->() RETURN count(e) AS n"
711
+ else:
712
+ pattern = "MATCH (origin:Symbol {id: $id})<-[e:CALLS]-() RETURN count(e) AS n"
713
+ rows = self._rows(pattern, {"id": origin_id})
714
+ return int(rows[0].get("n") or 0) if rows else 0
715
+
716
+ def neighbor_calls_for_symbol(
717
+ self,
718
+ origin_id: str,
719
+ *,
720
+ direction: Literal["in", "out"],
721
+ offset: int = 0,
722
+ limit: int | None = None,
723
+ sql_pagination: bool = True,
724
+ min_confidence: float | None = None,
725
+ include_strategies: list[str] | None = None,
726
+ exclude_strategies: list[str] | None = None,
727
+ callee_declaring_role: str | None = None,
728
+ callee_declaring_roles: list[str] | None = None,
729
+ exclude_callee_declaring_roles: list[str] | None = None,
730
+ ) -> list[dict[str, Any]]:
731
+ """CALLS neighbors with source-order delivery and optional edge-attribute pushdown.
732
+
733
+ When ``sql_pagination`` is True and ``limit`` is set, ``SKIP``/``LIMIT`` apply after
734
+ ``ORDER BY e.call_site_line, e.call_site_byte``. Otherwise the full ordered stream is
735
+ returned for caller-side ``NodeFilter`` / pagination.
736
+ """
737
+ wh_parts = ["origin.id = $id"]
738
+ params: dict[str, Any] = {"id": origin_id}
739
+ if min_confidence is not None:
740
+ wh_parts.append("e.confidence >= $min_confidence")
741
+ params["min_confidence"] = min_confidence
742
+ if include_strategies:
743
+ wh_parts.append("e.strategy IN $include_strategies")
744
+ params["include_strategies"] = include_strategies
745
+ if exclude_strategies:
746
+ wh_parts.append("NOT (e.strategy IN $exclude_strategies)")
747
+ params["exclude_strategies"] = exclude_strategies
748
+ if callee_declaring_role is not None:
749
+ wh_parts.append("e.callee_declaring_role = $callee_declaring_role")
750
+ params["callee_declaring_role"] = callee_declaring_role
751
+ if callee_declaring_roles:
752
+ wh_parts.append("e.callee_declaring_role IN $callee_declaring_roles")
753
+ params["callee_declaring_roles"] = callee_declaring_roles
754
+ if exclude_callee_declaring_roles:
755
+ wh_parts.append("NOT (e.callee_declaring_role IN $exclude_callee_declaring_roles)")
756
+ params["exclude_callee_declaring_roles"] = exclude_callee_declaring_roles
757
+ where = " AND ".join(wh_parts)
758
+ if direction == "out":
759
+ match = "MATCH (origin:Symbol)-[e:CALLS]->(other:Symbol)"
760
+ else:
761
+ match = "MATCH (origin:Symbol)<-[e:CALLS]-(other:Symbol)"
762
+ q = (
763
+ f"{match} WHERE {where} "
764
+ "RETURN other.id AS other_id, 'CALLS' AS edge_type, "
765
+ "e.confidence AS confidence, e.strategy AS strategy, e.source AS source, "
766
+ "e.call_site_line AS call_site_line, e.call_site_byte AS call_site_byte, "
767
+ "e.arg_count AS arg_count, e.resolved AS resolved, "
768
+ "e.callee_declaring_role AS callee_declaring_role "
769
+ "ORDER BY e.call_site_line, e.call_site_byte"
770
+ )
771
+ if sql_pagination and limit is not None:
772
+ q += " SKIP $offset LIMIT $limit"
773
+ params["offset"] = offset
774
+ params["limit"] = limit
775
+ return self._rows(q, params)
776
+
777
+ def count_unresolved_for_caller(self, caller_id: str) -> int:
778
+ rows = self._rows(
779
+ "MATCH (:Symbol {id: $id})-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
780
+ "RETURN count(u) AS n",
781
+ {"id": caller_id},
782
+ )
783
+ return int(rows[0].get("n") or 0) if rows else 0
784
+
785
+ def unresolved_sites_for_caller(
786
+ self,
787
+ caller_id: str,
788
+ *,
789
+ direction: Literal["in", "out"] = "out",
790
+ ) -> list[dict[str, Any]]:
791
+ if direction != "out":
792
+ return []
793
+ return self._rows(
794
+ "MATCH (:Symbol {id: $id})-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
795
+ "RETURN u.id AS id, u.caller_id AS caller_id, u.call_site_line AS call_site_line, "
796
+ "u.call_site_byte AS call_site_byte, u.arg_count AS arg_count, "
797
+ "u.callee_simple AS callee_simple, u.receiver_expr AS receiver_expr, "
798
+ "u.reason AS reason "
799
+ "ORDER BY u.call_site_line, u.call_site_byte",
800
+ {"id": caller_id},
801
+ )
802
+
803
+ def unresolved_sites_for_describe(
804
+ self,
805
+ method_id: str,
806
+ *,
807
+ inline_limit: int = 5,
808
+ ) -> tuple[list[dict[str, Any]], int]:
809
+ total_rows = self._rows(
810
+ "MATCH (:Symbol {id: $id})-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
811
+ "RETURN count(u) AS n",
812
+ {"id": method_id},
813
+ )
814
+ total = int(total_rows[0].get("n") or 0) if total_rows else 0
815
+ if total == 0:
816
+ return [], 0
817
+ rows = self._rows(
818
+ "MATCH (:Symbol {id: $id})-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
819
+ "RETURN u.call_site_line AS line, u.reason AS reason, "
820
+ "u.callee_simple AS callee_simple, u.receiver_expr AS receiver_expr "
821
+ "ORDER BY u.call_site_line, u.call_site_byte "
822
+ f"LIMIT {int(inline_limit)}",
823
+ {"id": method_id},
824
+ )
825
+ return rows, total
826
+
827
+ def list_unresolved_call_sites(
828
+ self,
829
+ *,
830
+ method_id: str | None = None,
831
+ reason: str | None = None,
832
+ microservice: str | None = None,
833
+ callee_simple: str | None = None,
834
+ limit: int = 100,
835
+ ) -> list[dict[str, Any]]:
836
+ wh_parts: list[str] = []
837
+ params: dict[str, Any] = {"lim": int(limit)}
838
+ if method_id:
839
+ wh_parts.append("caller.id = $method_id")
840
+ params["method_id"] = method_id
841
+ if reason:
842
+ wh_parts.append("u.reason = $reason")
843
+ params["reason"] = reason
844
+ if microservice:
845
+ wh_parts.append("caller.microservice = $microservice")
846
+ params["microservice"] = microservice
847
+ if callee_simple:
848
+ wh_parts.append("u.callee_simple = $callee_simple")
849
+ params["callee_simple"] = callee_simple
850
+ where = ("WHERE " + " AND ".join(wh_parts)) if wh_parts else ""
851
+ return self._rows(
852
+ "MATCH (caller:Symbol)-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
853
+ f"{where} "
854
+ "RETURN u.id AS id, caller.id AS caller_id, caller.fqn AS caller_fqn, "
855
+ "caller.microservice AS microservice, u.call_site_line AS call_site_line, "
856
+ "u.call_site_byte AS call_site_byte, u.arg_count AS arg_count, "
857
+ "u.callee_simple AS callee_simple, u.receiver_expr AS receiver_expr, "
858
+ "u.reason AS reason "
859
+ "ORDER BY u.call_site_line, u.call_site_byte "
860
+ "LIMIT $lim",
861
+ params,
862
+ )
863
+
864
+ def stats_unresolved_call_sites(
865
+ self,
866
+ *,
867
+ by: Literal["reason", "microservice", "caller_role"],
868
+ ) -> list[dict[str, Any]]:
869
+ if by == "reason":
870
+ return self._rows(
871
+ "MATCH (:Symbol)-[:UNRESOLVED_AT]->(u:UnresolvedCallSite) "
872
+ "RETURN u.reason AS bucket, count(*) AS n ORDER BY n DESC",
873
+ )
874
+ if by == "microservice":
875
+ return self._rows(
876
+ "MATCH (caller:Symbol)-[:UNRESOLVED_AT]->(:UnresolvedCallSite) "
877
+ "RETURN caller.microservice AS bucket, count(*) AS n ORDER BY n DESC",
878
+ )
879
+ return self._rows(
880
+ "MATCH (caller:Symbol)-[:UNRESOLVED_AT]->(:UnresolvedCallSite) "
881
+ "MATCH (parent:Symbol)-[:DECLARES]->(caller) "
882
+ "RETURN parent.role AS bucket, count(*) AS n ORDER BY n DESC",
883
+ )
884
+
885
+ def _edge_row_count_from_method_ids(self, method_ids: list[str], rel: str) -> int:
886
+ """Count outgoing ``rel`` edges from method symbols (describe rollup helper)."""
887
+ total = 0
888
+ for mid in method_ids:
889
+ rows = self._rows(
890
+ f"MATCH (x:Symbol {{id: $mid}})-[e:{rel}]->() RETURN count(e) AS n",
891
+ {"mid": mid},
892
+ )
893
+ total += int(rows[0].get("n") or 0) if rows else 0
894
+ return total
895
+
896
+ def _override_impl_ids_from_stored(self, method_id: str) -> list[str]:
897
+ """Overrider method ids for a declaration method (stored ``[:OVERRIDES]`` in-hop)."""
898
+ rows = self._rows(
899
+ "MATCH (decl:Symbol {id: $id})<-[:OVERRIDES]-(mover:Symbol) "
900
+ "RETURN collect(DISTINCT mover.id) AS ids",
901
+ {"id": method_id},
902
+ )
903
+ return list(dict.fromkeys(_coerce_id_list(rows[0].get("ids") if rows else None)))
904
+
905
+ def _override_decl_ids_from_stored(self, method_id: str) -> list[str]:
906
+ """Declaration method ids overridden by a concrete method (stored ``[:OVERRIDES]`` out-hop)."""
907
+ rows = self._rows(
908
+ "MATCH (m:Symbol {id: $id})-[:OVERRIDES]->(decl:Symbol) "
909
+ "RETURN collect(DISTINCT decl.id) AS ids",
910
+ {"id": method_id},
911
+ )
912
+ return list(dict.fromkeys(_coerce_id_list(rows[0].get("ids") if rows else None)))
913
+
914
+ def override_axis_rollup_for(self, method_id: str) -> dict[str, dict[str, int]]:
915
+ """Dispatch-axis composed keys for method Symbols (describe-time only).
916
+
917
+ Dispatch hop uses materialized ``[:OVERRIDES]`` (same as ``override_axis_traversal_for`` /
918
+ ``neighbors`` dot-keys). Terminal composed counts sum outgoing edges from overrider
919
+ methods. Omits keys with zero counts. Returns ``{}`` for non-methods, constructors,
920
+ and static methods.
921
+ """
922
+ params = {"id": method_id}
923
+ gate = self._rows(
924
+ "MATCH (m:Symbol {id: $id}) "
925
+ "WHERE m.kind = 'method' "
926
+ "AND NOT list_contains(COALESCE(m.modifiers, []), 'static') "
927
+ "RETURN 1 AS ok LIMIT 1",
928
+ params,
929
+ )
930
+ if not gate:
931
+ return {}
932
+
933
+ rollup: dict[str, dict[str, int]] = {}
934
+
935
+ impl_ids = self._override_impl_ids_from_stored(method_id)
936
+ if impl_ids:
937
+ rollup["OVERRIDDEN_BY"] = {"in": 0, "out": len(impl_ids)}
938
+ n_dc = self._edge_row_count_from_method_ids(impl_ids, "DECLARES_CLIENT")
939
+ if n_dc > 0:
940
+ rollup["OVERRIDDEN_BY.DECLARES_CLIENT"] = {"in": 0, "out": n_dc}
941
+ n_dp = self._edge_row_count_from_method_ids(impl_ids, "DECLARES_PRODUCER")
942
+ if n_dp > 0:
943
+ rollup["OVERRIDDEN_BY.DECLARES_PRODUCER"] = {"in": 0, "out": n_dp}
944
+ n_ex = self._edge_row_count_from_method_ids(impl_ids, "EXPOSES")
945
+ if n_ex > 0:
946
+ rollup["OVERRIDDEN_BY.EXPOSES"] = {"in": 0, "out": n_ex}
947
+
948
+ decl_ids = self._override_decl_ids_from_stored(method_id)
949
+ if decl_ids:
950
+ rollup["OVERRIDES"] = {"in": 0, "out": len(decl_ids)}
951
+
952
+ return rollup
953
+
954
+ def _scope_counts(self, column: str) -> dict[str, int]:
955
+ """Generic helper: count resolved type symbols grouped by `column`.
956
+
957
+ Empty-string keys mean the builder could not infer a value
958
+ (no build-marker ancestor / no path segment under project_root).
959
+ """
960
+ try:
961
+ rows = self._rows(
962
+ f"MATCH (s:Symbol) WHERE s.resolved "
963
+ f"AND s.kind IN ['class','interface','enum','record','annotation'] "
964
+ f"RETURN s.{column} AS bucket, count(*) AS n"
965
+ )
966
+ except Exception:
967
+ return {}
968
+ out: dict[str, int] = {}
969
+ for r in rows:
970
+ key = r.get("bucket") or ""
971
+ out[str(key)] = int(r.get("n") or 0)
972
+ return out
973
+
974
+ def module_counts(self) -> dict[str, int]:
975
+ """Map of module name -> resolved type-symbol count."""
976
+ return self._scope_counts("module")
977
+
978
+ def microservice_counts(self) -> dict[str, int]:
979
+ """Map of microservice name -> resolved type-symbol count."""
980
+ return self._scope_counts("microservice")
981
+
982
+ # ---- symbol-level lookups ----
983
+
984
+ def find_by_name_or_fqn(self, name_or_fqn: str, *, kinds: list[str] | None = None,
985
+ module: str | None = None,
986
+ microservice: str | None = None,
987
+ limit: int = 50) -> list[SymbolHit]:
988
+ filters = ["(s.name = $needle OR s.fqn = $needle)"]
989
+ params: dict[str, Any] = {"needle": name_or_fqn}
990
+ if kinds:
991
+ params["kinds"] = kinds
992
+ filters.append("s.kind IN $kinds")
993
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
994
+ where = " AND ".join(filters)
995
+ q = f"MATCH (s:Symbol) WHERE {where} RETURN {_SYMBOL_RETURN} LIMIT {int(limit)}"
996
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
997
+
998
+ def list_by_role(self, role: str, *, module: str | None = None,
999
+ microservice: str | None = None,
1000
+ capability: str | None = None,
1001
+ limit: int = 100) -> list[SymbolHit]:
1002
+ filters = ["s.role = $role"]
1003
+ params: dict[str, Any] = {"role": role}
1004
+ if capability:
1005
+ filters.append("$capability IN s.capabilities")
1006
+ params["capability"] = capability
1007
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1008
+ where = " AND ".join(filters)
1009
+ q = f"MATCH (s:Symbol) WHERE {where} RETURN {_SYMBOL_RETURN} LIMIT {int(limit)}"
1010
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1011
+
1012
+ def list_by_annotation(self, annotation: str, *, module: str | None = None,
1013
+ microservice: str | None = None,
1014
+ capability: str | None = None,
1015
+ limit: int = 100) -> list[SymbolHit]:
1016
+ # Kuzu supports `list_contains` for STRING[].
1017
+ filters = ["list_contains(s.annotations, $ann)"]
1018
+ params: dict[str, Any] = {"ann": annotation}
1019
+ if capability:
1020
+ filters.append("$capability IN s.capabilities")
1021
+ params["capability"] = capability
1022
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1023
+ where = " AND ".join(filters)
1024
+ q = f"MATCH (s:Symbol) WHERE {where} RETURN {_SYMBOL_RETURN} LIMIT {int(limit)}"
1025
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1026
+
1027
+ def list_by_capability(self, capability: str, *, module: str | None = None,
1028
+ microservice: str | None = None,
1029
+ limit: int = 100) -> list[SymbolHit]:
1030
+ filters = ["$capability IN s.capabilities"]
1031
+ params: dict[str, Any] = {"capability": capability}
1032
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1033
+ where = " AND ".join(filters)
1034
+ q = f"MATCH (s:Symbol) WHERE {where} RETURN {_SYMBOL_RETURN} LIMIT {int(limit)}"
1035
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1036
+
1037
+ # ---- edge traversals ----
1038
+
1039
+ def find_implementors(self, interface_name_or_fqn: str, *,
1040
+ module: str | None = None,
1041
+ microservice: str | None = None,
1042
+ capability: str | None = None,
1043
+ limit: int = 100) -> list[SymbolHit]:
1044
+ filters = ["(i.name = $needle OR i.fqn = $needle)"]
1045
+ params: dict[str, Any] = {"needle": interface_name_or_fqn}
1046
+ if capability:
1047
+ filters.append("$capability IN c.capabilities")
1048
+ params["capability"] = capability
1049
+ filters.extend(_scope_filters("c", module=module, microservice=microservice, params=params))
1050
+ where = " AND ".join(filters)
1051
+ q = (
1052
+ f"MATCH (c:Symbol)-[:IMPLEMENTS]->(i:Symbol) WHERE {where} "
1053
+ f"RETURN DISTINCT {_symbol_return_for('c')} "
1054
+ f"LIMIT {int(limit)}"
1055
+ )
1056
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1057
+
1058
+ def find_subclasses(self, class_name_or_fqn: str, *,
1059
+ module: str | None = None,
1060
+ microservice: str | None = None,
1061
+ capability: str | None = None,
1062
+ limit: int = 100) -> list[SymbolHit]:
1063
+ filters = ["(b.name = $needle OR b.fqn = $needle)"]
1064
+ params: dict[str, Any] = {"needle": class_name_or_fqn}
1065
+ if capability:
1066
+ filters.append("$capability IN s.capabilities")
1067
+ params["capability"] = capability
1068
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1069
+ where = " AND ".join(filters)
1070
+ q = (
1071
+ f"MATCH (s:Symbol)-[:EXTENDS]->(b:Symbol) WHERE {where} "
1072
+ f"RETURN DISTINCT {_SYMBOL_RETURN} "
1073
+ f"LIMIT {int(limit)}"
1074
+ )
1075
+ return [_row_to_symbol(r) for r in self._rows(q, params)]
1076
+
1077
+ def find_injectors(self, target_name_or_fqn: str, *,
1078
+ module: str | None = None,
1079
+ microservice: str | None = None,
1080
+ capability: str | None = None,
1081
+ limit: int = 100) -> list[EdgeHit]:
1082
+ filters = ["(t.name = $needle OR t.fqn = $needle)"]
1083
+ params: dict[str, Any] = {"needle": target_name_or_fqn}
1084
+ if capability:
1085
+ # Filter on the consumer (src) side: "which injectors carry this capability?"
1086
+ filters.append("$capability IN s.capabilities")
1087
+ params["capability"] = capability
1088
+ filters.extend(_scope_filters("s", module=module, microservice=microservice, params=params))
1089
+ where = " AND ".join(filters)
1090
+ # Project both sides of the edge with prefixed aliases (`s_*` / `t_*`)
1091
+ # so we can split rows back into source / target SymbolHits without
1092
+ # column-name collisions.
1093
+ s_proj = ", ".join(
1094
+ f"s.{c} AS s_{c}" for c in (
1095
+ "id", "kind", "name", "fqn", "package", "module", "microservice",
1096
+ "filename", "start_line", "end_line", "start_byte", "end_byte",
1097
+ "modifiers", "annotations", "capabilities", "role", "signature", "parent_id", "resolved",
1098
+ )
1099
+ )
1100
+ t_proj = ", ".join(
1101
+ f"t.{c} AS t_{c}" for c in (
1102
+ "id", "kind", "name", "fqn", "package", "module", "microservice",
1103
+ "filename", "start_line", "end_line", "start_byte", "end_byte",
1104
+ "modifiers", "annotations", "capabilities", "role", "signature", "parent_id", "resolved",
1105
+ )
1106
+ )
1107
+ q = (
1108
+ f"MATCH (s:Symbol)-[e:INJECTS]->(t:Symbol) WHERE {where} "
1109
+ f"RETURN {s_proj}, {t_proj}, "
1110
+ f"e.mechanism AS mechanism, e.annotation AS annotation, "
1111
+ f"e.field_or_param AS field_or_param, e.resolved AS resolved "
1112
+ f"LIMIT {int(limit)}"
1113
+ )
1114
+ out: list[EdgeHit] = []
1115
+ for r in self._rows(q, params):
1116
+ src = _row_to_symbol({k[2:]: v for k, v in r.items() if k.startswith("s_")})
1117
+ dst = _row_to_symbol({k[2:]: v for k, v in r.items() if k.startswith("t_")})
1118
+ out.append(EdgeHit(
1119
+ type="INJECTS", src=src, dst=dst,
1120
+ mechanism=r.get("mechanism") or "",
1121
+ annotation=r.get("annotation") or "",
1122
+ field_or_param=r.get("field_or_param") or "",
1123
+ resolved=bool(r.get("resolved", True)),
1124
+ ))
1125
+ return out
1126
+
1127
+ def _method_ids_for_call_graph_needle(self, needle: str, *, limit: int) -> list[str]:
1128
+ rows = self._rows(
1129
+ "MATCH (s:Symbol) WHERE s.fqn = $n RETURN s.id AS id, s.kind AS kind LIMIT 1",
1130
+ {"n": needle},
1131
+ )
1132
+ if not rows:
1133
+ alt = _call_graph_needle_phantom_arity_alt(needle)
1134
+ if alt:
1135
+ rows = self._rows(
1136
+ "MATCH (s:Symbol) WHERE s.fqn = $n RETURN s.id AS id, s.kind AS kind LIMIT 1",
1137
+ {"n": alt},
1138
+ )
1139
+ if rows:
1140
+ kind = str(rows[0].get("kind") or "")
1141
+ sid = str(rows[0].get("id") or "")
1142
+ if kind in ("class", "interface", "enum", "record", "annotation") and sid:
1143
+ mrows = self._rows(
1144
+ "MATCH (t:Symbol {id: $tid})-[:DECLARES]->(m:Symbol) RETURN m.id AS id "
1145
+ f"LIMIT {int(limit)}",
1146
+ {"tid": sid},
1147
+ )
1148
+ return [str(r["id"]) for r in mrows if r.get("id")]
1149
+ if kind in ("method", "constructor") and sid:
1150
+ return [sid]
1151
+ rows2 = self._rows(
1152
+ f"MATCH (s:Symbol) WHERE s.name = $n AND s.kind IN ['method','constructor'] "
1153
+ f"RETURN s.id AS id LIMIT {int(limit)}",
1154
+ {"n": needle},
1155
+ )
1156
+ return [str(r["id"]) for r in rows2 if r.get("id")]
1157
+
1158
+ def find_callers(
1159
+ self, needle: str, *,
1160
+ depth: int = 1,
1161
+ limit: int = 100,
1162
+ min_confidence: float = 0.0,
1163
+ exclude_external: bool = True,
1164
+ module: str | None = None,
1165
+ microservice: str | None = None,
1166
+ ) -> list[CallEdge]:
1167
+ frontier = self._method_ids_for_call_graph_needle(needle, limit=max(limit, 50))
1168
+ if not frontier:
1169
+ return []
1170
+ caller_proj = ", ".join(f"caller.{c} AS caller_{c}" for c in _SYM_COLS)
1171
+ callee_proj = ", ".join(f"callee.{c} AS callee_{c}" for c in _SYM_COLS)
1172
+ out: list[CallEdge] = []
1173
+ seen: set[tuple[str, str, int, int]] = set()
1174
+ for _ in range(max(1, int(depth))):
1175
+ params: dict[str, Any] = {
1176
+ "frontier": list(frontier),
1177
+ "minc": float(min_confidence),
1178
+ }
1179
+ sc = _scope_filters("caller", module=module, microservice=microservice, params=params)
1180
+ wh_parts = ["callee.id IN $frontier", "c.confidence >= $minc"]
1181
+ wh_parts.extend(sc)
1182
+ wh = " AND ".join(wh_parts)
1183
+ q = (
1184
+ f"MATCH (caller:Symbol)-[c:CALLS]->(callee:Symbol) WHERE {wh} "
1185
+ f"RETURN {caller_proj}, {callee_proj}, "
1186
+ f"c.call_site_line AS call_site_line, c.call_site_byte AS call_site_byte, "
1187
+ f"c.arg_count AS arg_count, c.confidence AS confidence, c.strategy AS strategy, "
1188
+ f"c.source AS source, c.resolved AS resolved "
1189
+ f"LIMIT {int(limit) * 8}"
1190
+ )
1191
+ next_frontier: list[str] = []
1192
+ for row in self._rows(q, params):
1193
+ ce = _row_to_call_edge(row)
1194
+ # Filter only discovered callers (src). Needle may be external
1195
+ # (e.g. java.util.List#add) while still listing internal callers.
1196
+ if exclude_external and _is_external_fqn(ce.src.fqn):
1197
+ continue
1198
+ key = (ce.src.id, ce.dst.id, ce.call_site_line, ce.call_site_byte)
1199
+ if key in seen:
1200
+ continue
1201
+ seen.add(key)
1202
+ out.append(ce)
1203
+ next_frontier.append(ce.src.id)
1204
+ if len(out) >= limit:
1205
+ return out
1206
+ frontier = list(dict.fromkeys(next_frontier))
1207
+ if not frontier:
1208
+ break
1209
+ return out
1210
+
1211
+ def find_callees(
1212
+ self, needle: str, *,
1213
+ depth: int = 1,
1214
+ limit: int = 100,
1215
+ min_confidence: float = 0.0,
1216
+ exclude_external: bool = True,
1217
+ module: str | None = None,
1218
+ microservice: str | None = None,
1219
+ ) -> list[CallEdge]:
1220
+ frontier = self._method_ids_for_call_graph_needle(needle, limit=max(limit, 50))
1221
+ if not frontier:
1222
+ return []
1223
+ caller_proj = ", ".join(f"caller.{c} AS caller_{c}" for c in _SYM_COLS)
1224
+ callee_proj = ", ".join(f"callee.{c} AS callee_{c}" for c in _SYM_COLS)
1225
+ out: list[CallEdge] = []
1226
+ seen: set[tuple[str, str, int, int]] = set()
1227
+ for _ in range(max(1, int(depth))):
1228
+ params: dict[str, Any] = {
1229
+ "frontier": list(frontier),
1230
+ "minc": float(min_confidence),
1231
+ }
1232
+ sc = _scope_filters("callee", module=module, microservice=microservice, params=params)
1233
+ wh_parts = ["caller.id IN $frontier", "c.confidence >= $minc"]
1234
+ wh_parts.extend(sc)
1235
+ wh = " AND ".join(wh_parts)
1236
+ q = (
1237
+ f"MATCH (caller:Symbol)-[c:CALLS]->(callee:Symbol) WHERE {wh} "
1238
+ f"RETURN {caller_proj}, {callee_proj}, "
1239
+ f"c.call_site_line AS call_site_line, c.call_site_byte AS call_site_byte, "
1240
+ f"c.arg_count AS arg_count, c.confidence AS confidence, c.strategy AS strategy, "
1241
+ f"c.source AS source, c.resolved AS resolved "
1242
+ f"LIMIT {int(limit) * 8}"
1243
+ )
1244
+ next_frontier: list[str] = []
1245
+ for row in self._rows(q, params):
1246
+ ce = _row_to_call_edge(row)
1247
+ # Filter only discovered callees (dst). Needle may be external while
1248
+ # still listing non-external outbound calls when any exist.
1249
+ if exclude_external and _is_external_fqn(ce.dst.fqn):
1250
+ continue
1251
+ key = (ce.src.id, ce.dst.id, ce.call_site_line, ce.call_site_byte)
1252
+ if key in seen:
1253
+ continue
1254
+ seen.add(key)
1255
+ out.append(ce)
1256
+ next_frontier.append(ce.dst.id)
1257
+ if len(out) >= limit:
1258
+ return out
1259
+ frontier = list(dict.fromkeys(next_frontier))
1260
+ if not frontier:
1261
+ break
1262
+ return out
1263
+
1264
+ def expand_methods(
1265
+ self, fqns: list[str], *, depth: int = 1,
1266
+ min_confidence: float = 0.0, limit: int = 200,
1267
+ exclude_external: bool = True,
1268
+ ) -> list[tuple[str, float]]:
1269
+ """Reach type FQNs from seed types via DECLARES → CALLS → DECLARES (reverse).
1270
+
1271
+ Each entry is ``(type_fqn, path_confidence)``. ``path_confidence`` is the
1272
+ maximum, over call paths from seed methods, of the minimum ``CALLS.confidence``
1273
+ along that path (seed methods anchor at ``1.0`` before the first hop).
1274
+
1275
+ When ``exclude_external`` is true (default), types whose FQN matches the
1276
+ same JDK/Spring/Lombok prefixes as ``find_callees`` are omitted from the
1277
+ returned list (they are not indexed in LanceDB anyway). BFS still walks
1278
+ through external callees to find further project types.
1279
+ """
1280
+ if not fqns or depth < 1:
1281
+ return []
1282
+ seed_mids: list[str] = []
1283
+ for tfqn in fqns:
1284
+ r = self._rows(
1285
+ "MATCH (t:Symbol) WHERE t.fqn = $f AND t.kind IN ['class','interface','enum','record','annotation'] "
1286
+ "RETURN t.id AS id LIMIT 1",
1287
+ {"f": tfqn},
1288
+ )
1289
+ if not r or not r[0].get("id"):
1290
+ continue
1291
+ tid = str(r[0]["id"])
1292
+ mrows = self._rows(
1293
+ "MATCH (t:Symbol {id: $tid})-[:DECLARES]->(m:Symbol) RETURN m.id AS id",
1294
+ {"tid": tid},
1295
+ )
1296
+ seed_mids.extend(str(x["id"]) for x in mrows if x.get("id"))
1297
+ seed_mids = list(dict.fromkeys(seed_mids))
1298
+ if not seed_mids:
1299
+ return []
1300
+ frontier_conf: dict[str, float] = {mid: 1.0 for mid in seed_mids}
1301
+ type_best: dict[str, float] = {}
1302
+ ordered_types: list[str] = []
1303
+ seen_order: set[str] = set()
1304
+ for _ in range(int(depth)):
1305
+ if not frontier_conf:
1306
+ break
1307
+ ids = list(frontier_conf.keys())
1308
+ rows = self._rows(
1309
+ "MATCH (m:Symbol)-[c:CALLS]->(n:Symbol) WHERE m.id IN $ids AND c.confidence >= $mc "
1310
+ "RETURN m.id AS mid, n.id AS nid, c.confidence AS conf",
1311
+ {"ids": ids, "mc": float(min_confidence)},
1312
+ )
1313
+ next_conf: dict[str, float] = {}
1314
+ for r in rows:
1315
+ mid = str(r.get("mid") or "")
1316
+ nid = str(r.get("nid") or "")
1317
+ if not mid or not nid:
1318
+ continue
1319
+ raw_conf = r.get("conf")
1320
+ try:
1321
+ ec = float(raw_conf) if raw_conf is not None else 0.0
1322
+ except (TypeError, ValueError):
1323
+ ec = 0.0
1324
+ parent = frontier_conf.get(mid)
1325
+ if parent is None:
1326
+ continue
1327
+ new_c = min(parent, ec)
1328
+ next_conf[nid] = max(next_conf.get(nid, 0.0), new_c)
1329
+
1330
+ if not next_conf:
1331
+ break
1332
+
1333
+ for nid, path_c in next_conf.items():
1334
+ srows = self._rows(
1335
+ "MATCH (s:Symbol {id: $id}) RETURN s.fqn AS fqn LIMIT 1",
1336
+ {"id": nid},
1337
+ )
1338
+ if not srows:
1339
+ continue
1340
+ mfqn = str(srows[0].get("fqn") or "")
1341
+ if "#" not in mfqn:
1342
+ continue
1343
+ tpart = mfqn.split("#", 1)[0]
1344
+ if not tpart:
1345
+ continue
1346
+ is_ext = _is_external_fqn(tpart)
1347
+ if exclude_external and is_ext:
1348
+ pass
1349
+ else:
1350
+ type_best[tpart] = max(type_best.get(tpart, 0.0), path_c)
1351
+ if tpart not in seen_order:
1352
+ seen_order.add(tpart)
1353
+ ordered_types.append(tpart)
1354
+ if len(ordered_types) >= limit:
1355
+ return [(t, type_best[t]) for t in ordered_types[:limit]]
1356
+
1357
+ frontier_conf = next_conf
1358
+
1359
+ return [(t, type_best[t]) for t in ordered_types[:limit]]
1360
+
1361
+ def neighbors(self, fqn_or_name: str, *, depth: int = 1,
1362
+ edge_types: list[str] | None = None,
1363
+ direction: str = "both", limit: int = 200) -> list[SymbolHit]:
1364
+ """BFS over `edge_types` up to `depth` hops. `direction` in {out, in, both}."""
1365
+ if depth < 1:
1366
+ return []
1367
+ edges = edge_types or ["EXTENDS", "IMPLEMENTS", "INJECTS", "DECLARES", "CALLS"]
1368
+ edge_pattern = "|".join(edges)
1369
+ if direction == "out":
1370
+ arrow_l, arrow_r = "-", "->"
1371
+ elif direction == "in":
1372
+ arrow_l, arrow_r = "<-", "-"
1373
+ else:
1374
+ arrow_l, arrow_r = "-", "-"
1375
+ q = (
1376
+ f"MATCH (root:Symbol) WHERE root.name = $needle OR root.fqn = $needle "
1377
+ f"MATCH path = (root){arrow_l}[:{edge_pattern}*1..{int(depth)}]{arrow_r}(n:Symbol) "
1378
+ f"RETURN DISTINCT {_symbol_return_for('n')} "
1379
+ f"LIMIT {int(limit)}"
1380
+ )
1381
+ return [_row_to_symbol(r) for r in self._rows(q, {"needle": fqn_or_name})]
1382
+
1383
+ def impact_analysis(self, fqn_or_name: str, *, depth: int = 2,
1384
+ limit: int = 300) -> list[SymbolHit]:
1385
+ """Reverse closure over INJECTS + IMPLEMENTS (who breaks if `fqn` changes)."""
1386
+ q = (
1387
+ f"MATCH (target:Symbol) WHERE target.name = $needle OR target.fqn = $needle "
1388
+ f"MATCH (n:Symbol)-[:INJECTS|IMPLEMENTS|EXTENDS*1..{int(depth)}]->(target) "
1389
+ f"RETURN DISTINCT {_symbol_return_for('n')} "
1390
+ f"LIMIT {int(limit)}"
1391
+ )
1392
+ return [_row_to_symbol(r) for r in self._rows(q, {"needle": fqn_or_name})]
1393
+
1394
+ # ---- flow tracing (entrypoint -> service -> integration / repository) ----
1395
+
1396
+ # Default ordered waterfall of role stages. Each stage collects neighbors of
1397
+ # the previous stage whose role matches the allow-list. Phantom / unresolved
1398
+ # symbols are excluded so we don't propagate noise across the boundary.
1399
+ _FLOW_STAGES: tuple[tuple[str, ...], ...] = (
1400
+ ("CONTROLLER",),
1401
+ ("SERVICE", "COMPONENT"),
1402
+ ("CLIENT", "REPOSITORY", "MAPPER"),
1403
+ )
1404
+
1405
+ # Stage-0 accepts any entrypoint-like role. COMPONENT is included because
1406
+ # Kafka listeners / @Scheduled orchestrators are frequently plain
1407
+ # @Component, not @Controller; SERVICE is included so we don't drop
1408
+ # orchestrator seeds when the caller already narrowed the vector search
1409
+ # to services.
1410
+ _ENTRYPOINT_ROLES: tuple[str, ...] = (
1411
+ "CONTROLLER", "COMPONENT", "SERVICE", "CLIENT",
1412
+ )
1413
+
1414
+ def trace_flow(self, seed_fqns: list[str], *,
1415
+ module: str | None = None,
1416
+ microservice: str | None = None,
1417
+ depth: int = 2, stage_limit: int = 20,
1418
+ follow_calls: bool = True,
1419
+ min_call_confidence: float = 0.0,
1420
+ exclude_external: bool = True) -> list[list[StageSymbol]]:
1421
+ """Walk stages `CONTROLLER -> SERVICE/COMPONENT -> CLIENT/REPOSITORY/MAPPER`.
1422
+
1423
+ Returns a list of stages; each stage is a list of SymbolHit. The first
1424
+ stage is the seed set (entrypoints matched by FQN, filtered to
1425
+ orchestrator-like roles — see `_ENTRYPOINT_ROLES`). If role-filtered
1426
+ seeds come back empty we fall back to unfiltered seeds so a caller
1427
+ with no CONTROLLER coverage still gets *something* back.
1428
+ Each subsequent stage is the neighbor-set (INJECTS+EXTENDS+IMPLEMENTS,
1429
+ optionally merged with type-to-type paths through DECLARES+CALLS when
1430
+ `follow_calls` is true) of the previous stage, restricted to the
1431
+ stage's role allow-list.
1432
+
1433
+ Defaults: ``depth=2`` (clamped to 1..3), ``follow_calls=True``,
1434
+ ``min_call_confidence=0.0``, ``exclude_external=True``. The latter only
1435
+ filters symbols reached via the DECLARES+CALLS hop: discovered **type**
1436
+ symbols matching external FQN prefixes (same list as ``expand_methods`` /
1437
+ the callee side of ``find_callees``), not the seed frontier. INJECTS /
1438
+ EXTENDS / IMPLEMENTS hops ignore ``exclude_external``.
1439
+
1440
+ ``depth`` is the neighbour hop count per stage (not total trace depth).
1441
+ """
1442
+ if not seed_fqns:
1443
+ return []
1444
+ depth = max(1, min(3, int(depth)))
1445
+
1446
+ stages: list[list[StageSymbol]] = []
1447
+ visited_fqns: set[str] = set()
1448
+
1449
+ def _run_seed_query(entry_roles: tuple[str, ...] | None) -> list[SymbolHit]:
1450
+ filters = ["s.fqn IN $fqns"]
1451
+ params: dict[str, Any] = {"fqns": list(seed_fqns)}
1452
+ filters.extend(_scope_filters(
1453
+ "s", module=module, microservice=microservice, params=params,
1454
+ ))
1455
+ if entry_roles:
1456
+ params["entry_roles"] = list(entry_roles)
1457
+ # Kuzu 0.11.x does not support parameterized lists inside ANY
1458
+ # comprehensions, so we expand the fixed capability set as
1459
+ # individual list_contains predicates ORed together.
1460
+ cap_predicates = " OR ".join(
1461
+ f"list_contains(s.capabilities, '{c}')"
1462
+ for c in ("MESSAGE_LISTENER", "SCHEDULED_TASK")
1463
+ )
1464
+ filters.append(
1465
+ f"(s.role IN $entry_roles OR {cap_predicates})"
1466
+ )
1467
+ where = " AND ".join(filters)
1468
+ q0 = (
1469
+ f"MATCH (s:Symbol) WHERE {where} "
1470
+ f"RETURN {_SYMBOL_RETURN} LIMIT {int(stage_limit)}"
1471
+ )
1472
+ return [_row_to_symbol(r) for r in self._rows(q0, params)]
1473
+
1474
+ seed_rows = _run_seed_query(self._ENTRYPOINT_ROLES)
1475
+ if not seed_rows:
1476
+ seed_rows = _run_seed_query(None)
1477
+ if not seed_rows:
1478
+ return []
1479
+ stages.append([StageSymbol(symbol=r, via=[]) for r in seed_rows])
1480
+ for h in seed_rows:
1481
+ if h.fqn:
1482
+ visited_fqns.add(h.fqn)
1483
+
1484
+ frontier_fqns: list[str] = [h.fqn for h in seed_rows if h.fqn]
1485
+ for stage_roles in self._FLOW_STAGES[1:]:
1486
+ if not frontier_fqns:
1487
+ break
1488
+
1489
+ # Single-hop BFS repeated up to `depth` times. Each iteration
1490
+ # knows which edge type and parent node produced a newly-
1491
+ # discovered symbol, so we can label every stage entry.
1492
+ stage_results: dict[str, StageSymbol] = {}
1493
+ current_frontier = list(frontier_fqns)
1494
+
1495
+ for hop in range(1, depth + 1):
1496
+ if not current_frontier:
1497
+ break
1498
+ params: dict[str, Any] = {
1499
+ "fqns": current_frontier,
1500
+ "roles": list(stage_roles),
1501
+ }
1502
+ scope = _scope_filters(
1503
+ "n", module=module, microservice=microservice, params=params,
1504
+ )
1505
+ scope_clause = (" AND " + " AND ".join(scope)) if scope else ""
1506
+ q = (
1507
+ f"MATCH (root:Symbol)-[e:INJECTS|EXTENDS|IMPLEMENTS]-(n:Symbol) "
1508
+ f"WHERE root.fqn IN $fqns AND n.role IN $roles AND n.resolved{scope_clause} "
1509
+ f"RETURN {_symbol_return_for('n')}, "
1510
+ f"label(e) AS edge_type, root.fqn AS from_fqn "
1511
+ f"LIMIT {int(stage_limit) * 4}"
1512
+ )
1513
+ next_frontier: list[str] = []
1514
+ def _ingest_flow_row(
1515
+ row: dict[str, Any], *, filter_external_fqn: bool = False,
1516
+ ) -> None:
1517
+ sym = _row_to_symbol(row)
1518
+ if (
1519
+ filter_external_fqn
1520
+ and exclude_external
1521
+ and _is_external_fqn(sym.fqn)
1522
+ ):
1523
+ return
1524
+ if not sym.fqn or sym.fqn in visited_fqns:
1525
+ return
1526
+ edge = ViaEdge(
1527
+ edge_type=str(row.get("edge_type") or ""),
1528
+ from_fqn=str(row.get("from_fqn") or ""),
1529
+ hop=hop,
1530
+ caller_node_id=str(row.get("caller_client_id") or ""),
1531
+ )
1532
+ existing = stage_results.get(sym.fqn)
1533
+ if existing is None:
1534
+ stage_results[sym.fqn] = StageSymbol(symbol=sym, via=[edge])
1535
+ next_frontier.append(sym.fqn)
1536
+ else:
1537
+ if len(existing.via) < 4 and not any(
1538
+ v.edge_type == edge.edge_type and v.from_fqn == edge.from_fqn
1539
+ for v in existing.via
1540
+ ):
1541
+ existing.via.append(edge)
1542
+
1543
+ for row in self._rows(q, params):
1544
+ _ingest_flow_row(row)
1545
+ if len(stage_results) >= stage_limit:
1546
+ break
1547
+
1548
+ # Structural-first budget: same-microservice CALLS top up first,
1549
+ # then cross-service HTTP/ASYNC caller edges.
1550
+ if follow_calls and len(stage_results) < stage_limit:
1551
+ remaining = stage_limit - len(stage_results)
1552
+ params_cf: dict[str, Any] = {
1553
+ "fqns": current_frontier,
1554
+ "roles": list(stage_roles),
1555
+ "mc": float(min_call_confidence),
1556
+ }
1557
+ scope_cf = _scope_filters(
1558
+ "n", module=module, microservice=microservice, params=params_cf,
1559
+ )
1560
+ sccf = (" AND " + " AND ".join(scope_cf)) if scope_cf else ""
1561
+ qcf = (
1562
+ "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[c:CALLS]->(m2:Symbol)"
1563
+ "<-[:DECLARES]-(n:Symbol) WHERE root.fqn IN $fqns AND n.role IN $roles "
1564
+ "AND root.microservice = n.microservice "
1565
+ "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] "
1566
+ f"AND c.confidence >= $mc{sccf} "
1567
+ f"RETURN {_symbol_return_for('n')}, 'CALLS' AS edge_type, root.fqn AS from_fqn "
1568
+ f"LIMIT {max(1, remaining * 4)}"
1569
+ )
1570
+ for row in self._rows(qcf, params_cf):
1571
+ _ingest_flow_row(row, filter_external_fqn=True)
1572
+ if len(stage_results) >= stage_limit:
1573
+ break
1574
+ if follow_calls and len(stage_results) < stage_limit:
1575
+ remaining = stage_limit - len(stage_results)
1576
+ params_rf: dict[str, Any] = {
1577
+ "fqns": current_frontier,
1578
+ "roles": list(stage_roles),
1579
+ "mc": float(min_call_confidence),
1580
+ }
1581
+ scope_rf = _scope_filters(
1582
+ "n", module=module, microservice=microservice, params=params_rf,
1583
+ )
1584
+ scrf = (" AND " + " AND ".join(scope_rf)) if scope_rf else ""
1585
+ qrf = (
1586
+ "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[:DECLARES_CLIENT]->(c:Client)"
1587
+ "-[e:HTTP_CALLS]->(rt:Route)<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) "
1588
+ "WHERE root.fqn IN $fqns AND n.role IN $roles "
1589
+ "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] "
1590
+ "AND e.confidence >= $mc AND root.microservice <> n.microservice "
1591
+ f"{scrf} "
1592
+ f"RETURN {_symbol_return_for('n')}, 'HTTP_CALLS' AS edge_type, "
1593
+ f"root.fqn AS from_fqn, c.id AS caller_client_id "
1594
+ f"LIMIT {max(1, remaining * 4)}"
1595
+ )
1596
+ for row in self._rows(qrf, params_rf):
1597
+ _ingest_flow_row(row, filter_external_fqn=True)
1598
+ if len(stage_results) >= stage_limit:
1599
+ break
1600
+ if len(stage_results) < stage_limit:
1601
+ remaining = stage_limit - len(stage_results)
1602
+ qrf_async = (
1603
+ "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[:DECLARES_PRODUCER]->(pr:Producer)"
1604
+ "-[e:ASYNC_CALLS]->(rt:Route)<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) "
1605
+ "WHERE root.fqn IN $fqns AND n.role IN $roles "
1606
+ "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] "
1607
+ "AND e.confidence >= $mc AND root.microservice <> n.microservice "
1608
+ f"{scrf} "
1609
+ f"RETURN {_symbol_return_for('n')}, 'ASYNC_CALLS' AS edge_type, "
1610
+ f"root.fqn AS from_fqn, pr.id AS caller_producer_id "
1611
+ f"LIMIT {max(1, remaining * 4)}"
1612
+ )
1613
+ for row in self._rows(qrf_async, params_rf):
1614
+ _ingest_flow_row(row, filter_external_fqn=True)
1615
+ if len(stage_results) >= stage_limit:
1616
+ break
1617
+
1618
+ current_frontier = next_frontier
1619
+ if len(stage_results) >= stage_limit:
1620
+ break
1621
+
1622
+ if not stage_results:
1623
+ break
1624
+ stage_list = list(stage_results.values())
1625
+ stages.append(stage_list)
1626
+ for entry in stage_list:
1627
+ visited_fqns.add(entry.symbol.fqn)
1628
+ frontier_fqns = [entry.symbol.fqn for entry in stage_list]
1629
+ return stages
1630
+
1631
+ # ---- routes (B2a) ----
1632
+
1633
+ _ROUTE_RETURN = (
1634
+ "r.id AS id, r.kind AS kind, r.framework AS framework, r.method AS method, "
1635
+ "r.path AS path, r.path_template AS path_template, r.path_regex AS path_regex, "
1636
+ "r.topic AS topic, r.broker AS broker, r.feign_name AS feign_name, r.feign_url AS feign_url, "
1637
+ "r.microservice AS microservice, r.module AS module, r.filename AS filename, "
1638
+ "r.start_line AS start_line, r.end_line AS end_line, r.resolved AS resolved"
1639
+ )
1640
+
1641
+ @staticmethod
1642
+ def _row_to_route_dict(row: dict[str, Any]) -> dict[str, Any]:
1643
+ return {
1644
+ "id": str(row.get("id") or ""),
1645
+ "kind": str(row.get("kind") or ""),
1646
+ "framework": str(row.get("framework") or ""),
1647
+ "method": str(row.get("method") or ""),
1648
+ "path": str(row.get("path") or ""),
1649
+ "path_template": str(row.get("path_template") or ""),
1650
+ "path_regex": str(row.get("path_regex") or ""),
1651
+ "topic": str(row.get("topic") or ""),
1652
+ "broker": str(row.get("broker") or ""),
1653
+ "feign_name": str(row.get("feign_name") or ""),
1654
+ "feign_url": str(row.get("feign_url") or ""),
1655
+ "microservice": str(row.get("microservice") or ""),
1656
+ "module": str(row.get("module") or ""),
1657
+ "filename": str(row.get("filename") or ""),
1658
+ "start_line": int(row.get("start_line") or 0),
1659
+ "end_line": int(row.get("end_line") or 0),
1660
+ "resolved": bool(row.get("resolved", True)),
1661
+ }
1662
+
1663
+ def list_routes(
1664
+ self,
1665
+ *,
1666
+ microservice: str | None = None,
1667
+ framework: str | None = None,
1668
+ path_prefix: str | None = None,
1669
+ method: str | None = None,
1670
+ limit: int = 100,
1671
+ ) -> list[dict[str, Any]]:
1672
+ lim = max(1, min(int(limit), 500))
1673
+ params: dict[str, Any] = {"lim": lim}
1674
+ preds: list[str] = []
1675
+ if microservice:
1676
+ params["microservice"] = microservice
1677
+ preds.append("r.microservice = $microservice")
1678
+ if framework:
1679
+ params["framework"] = framework
1680
+ preds.append("r.framework = $framework")
1681
+ if path_prefix:
1682
+ params["path_prefix"] = path_prefix
1683
+ preds.append("r.path STARTS WITH $path_prefix")
1684
+ if method is not None and method != "":
1685
+ params["method"] = method
1686
+ preds.append("r.method = $method")
1687
+ where = (" WHERE " + " AND ".join(preds)) if preds else ""
1688
+ q = (
1689
+ f"MATCH (r:Route){where} RETURN {self._ROUTE_RETURN} "
1690
+ f"ORDER BY r.framework, r.path, r.id LIMIT $lim"
1691
+ )
1692
+ return [self._row_to_route_dict(r) for r in self._rows(q, params)]
1693
+
1694
+ def find_route_handlers(self, *, route_id: str) -> list[dict[str, Any]]:
1695
+ s_proj = ", ".join(f"s.{c} AS s_{c}" for c in _SYM_COLS)
1696
+ q = (
1697
+ f"MATCH (s:Symbol)-[e:EXPOSES]->(r:Route) WHERE r.id = $rid "
1698
+ f"RETURN {s_proj}, e.confidence AS confidence, e.strategy AS strategy "
1699
+ f"ORDER BY s.fqn"
1700
+ )
1701
+ out: list[dict[str, Any]] = []
1702
+ for r in self._rows(q, {"rid": route_id}):
1703
+ sym = _row_to_symbol({k[2:]: v for k, v in r.items() if k.startswith("s_")})
1704
+ out.append({
1705
+ "symbol": asdict(sym),
1706
+ "confidence": float(r.get("confidence") or 0.0),
1707
+ "strategy": str(r.get("strategy") or ""),
1708
+ })
1709
+ return out
1710
+
1711
+ def get_route_by_path(
1712
+ self,
1713
+ *,
1714
+ microservice: str,
1715
+ path_template: str,
1716
+ method: str = "",
1717
+ ) -> dict[str, Any] | None:
1718
+ params: dict[str, Any] = {"ms": microservice, "pt": path_template}
1719
+ meth_filter = ""
1720
+ if method != "":
1721
+ params["meth"] = method
1722
+ meth_filter = "AND r.method = $meth"
1723
+ q = (
1724
+ f"MATCH (r:Route) WHERE r.microservice = $ms AND r.path_template = $pt {meth_filter} "
1725
+ f"RETURN {self._ROUTE_RETURN} ORDER BY r.id LIMIT 1"
1726
+ )
1727
+ rows = self._rows(q, params)
1728
+ if not rows:
1729
+ return None
1730
+ return self._row_to_route_dict(rows[0])
1731
+
1732
+ def find_route_callers(
1733
+ self,
1734
+ route_id: str | None = None,
1735
+ *,
1736
+ microservice: str = "",
1737
+ path_template: str = "",
1738
+ method: str = "",
1739
+ ) -> list[RouteCaller]:
1740
+ """HTTP callers via Client; async callers via Producer (two-hop each)."""
1741
+ rid = route_id or ""
1742
+ if not rid:
1743
+ params: dict[str, Any] = {
1744
+ "microservice": microservice,
1745
+ "path_template": path_template,
1746
+ "method": method,
1747
+ }
1748
+ rows = self._rows(
1749
+ "MATCH (r:Route) "
1750
+ "WHERE r.microservice = $microservice AND r.path_template = $path_template AND r.method = $method "
1751
+ "RETURN r.id AS id LIMIT 1",
1752
+ params,
1753
+ )
1754
+ if not rows:
1755
+ return []
1756
+ rid = str(rows[0].get("id") or "")
1757
+ if not rid:
1758
+ return []
1759
+ http_rows = self._rows(
1760
+ "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route {id: $rid}) "
1761
+ "RETURN c.id AS caller_node_id, c.microservice AS caller_microservice, "
1762
+ "s.id AS declaring_symbol_id, e.confidence AS confidence, e.match AS match, "
1763
+ "c.target_service AS target_service, e.raw_uri AS raw_uri "
1764
+ "ORDER BY e.confidence DESC, c.id",
1765
+ {"rid": rid},
1766
+ )
1767
+ async_rows = self._rows(
1768
+ "MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(p:Producer)-[e:ASYNC_CALLS]->(r:Route {id: $rid}) "
1769
+ "RETURN p.id AS caller_node_id, p.microservice AS caller_microservice, "
1770
+ "s.id AS declaring_symbol_id, e.confidence AS confidence, e.match AS match, "
1771
+ "p.topic AS topic, p.broker AS broker "
1772
+ "ORDER BY e.confidence DESC, p.id",
1773
+ {"rid": rid},
1774
+ )
1775
+ out: list[RouteCaller] = []
1776
+ for row in http_rows:
1777
+ out.append(
1778
+ RouteCaller(
1779
+ caller_node_id=str(row.get("caller_node_id") or ""),
1780
+ caller_node_kind="client",
1781
+ caller_microservice=str(row.get("caller_microservice") or ""),
1782
+ declaring_symbol_id=str(row.get("declaring_symbol_id") or ""),
1783
+ confidence=float(row.get("confidence") or 0.0),
1784
+ match=str(row.get("match") or ""),
1785
+ target_service=str(row.get("target_service") or ""),
1786
+ raw_uri=str(row.get("raw_uri") or ""),
1787
+ ),
1788
+ )
1789
+ for row in async_rows:
1790
+ out.append(
1791
+ RouteCaller(
1792
+ caller_node_id=str(row.get("caller_node_id") or ""),
1793
+ caller_node_kind="producer",
1794
+ caller_microservice=str(row.get("caller_microservice") or ""),
1795
+ declaring_symbol_id=str(row.get("declaring_symbol_id") or ""),
1796
+ confidence=float(row.get("confidence") or 0.0),
1797
+ match=str(row.get("match") or ""),
1798
+ topic=str(row.get("topic") or ""),
1799
+ broker=str(row.get("broker") or ""),
1800
+ ),
1801
+ )
1802
+ return out
1803
+
1804
+ def trace_request_flow(self, entry_route_id: str, max_hops: int = 5) -> dict[str, Any]:
1805
+ """Inbound HTTP via Client; async inbound via Producer (two-hop each)."""
1806
+ hops = max(1, min(int(max_hops), 8))
1807
+ inbound_http = self._rows(
1808
+ f"MATCH (entry:Route {{id: $rid}})<-[e:HTTP_CALLS]-(caller:Client)"
1809
+ "<-[:DECLARES_CLIENT]-(decl:Symbol) "
1810
+ f"OPTIONAL MATCH (origin:Symbol)-[:CALLS*0..{hops}]->(decl) "
1811
+ "RETURN DISTINCT caller.id AS caller_node_id, 'client' AS caller_node_kind, "
1812
+ "decl.id AS declaring_symbol_id, decl.fqn AS declaring_symbol_fqn, "
1813
+ "caller.microservice AS microservice, e.confidence AS confidence, "
1814
+ "e.match AS match, origin.id AS origin_symbol_id, origin.fqn AS origin_fqn "
1815
+ "ORDER BY confidence DESC, caller_node_id",
1816
+ {"rid": entry_route_id},
1817
+ )
1818
+ inbound_async = self._rows(
1819
+ f"MATCH (entry:Route {{id: $rid}})<-[e:ASYNC_CALLS]-(caller:Producer)"
1820
+ "<-[:DECLARES_PRODUCER]-(decl:Symbol) "
1821
+ f"OPTIONAL MATCH (origin:Symbol)-[:CALLS*0..{hops}]->(decl) "
1822
+ "RETURN DISTINCT caller.id AS caller_node_id, 'producer' AS caller_node_kind, "
1823
+ "decl.id AS declaring_symbol_id, decl.fqn AS declaring_symbol_fqn, "
1824
+ "caller.microservice AS microservice, e.confidence AS confidence, "
1825
+ "e.match AS match, origin.id AS origin_symbol_id, origin.fqn AS origin_fqn "
1826
+ "ORDER BY confidence DESC, caller_node_id",
1827
+ {"rid": entry_route_id},
1828
+ )
1829
+ inbound = inbound_http + inbound_async
1830
+ outbound = self._rows(
1831
+ f"MATCH (handler:Symbol)-[:EXPOSES]->(entry:Route {{id: $rid}}) "
1832
+ f"OPTIONAL MATCH (handler)-[:CALLS*0..{hops}]->(next:Symbol) "
1833
+ "RETURN DISTINCT handler.id AS handler_symbol_id, handler.fqn AS handler_fqn, "
1834
+ "handler.microservice AS handler_microservice, "
1835
+ "next.id AS next_symbol_id, next.fqn AS next_fqn, next.microservice AS next_microservice "
1836
+ "ORDER BY handler_symbol_id, next_symbol_id",
1837
+ {"rid": entry_route_id},
1838
+ )
1839
+ return {
1840
+ "entry_route_id": entry_route_id,
1841
+ "max_hops": hops,
1842
+ "inbound": inbound,
1843
+ "outbound": outbound,
1844
+ }
1845
+
1846
+ # ---- outbound clients (LC3) ----
1847
+
1848
+ _CLIENT_RETURN = (
1849
+ "c.id AS id, c.client_kind AS client_kind, c.target_service AS target_service, "
1850
+ "c.method AS method, c.path AS path, c.path_template AS path_template, "
1851
+ "c.path_regex AS path_regex, c.member_fqn AS member_fqn, c.member_id AS member_id, "
1852
+ "c.microservice AS microservice, c.module AS module, c.filename AS filename, "
1853
+ "c.start_line AS start_line, c.end_line AS end_line, c.resolved AS resolved, "
1854
+ "c.source_layer AS source_layer"
1855
+ )
1856
+
1857
+ @staticmethod
1858
+ def _row_to_client_dict(row: dict[str, Any]) -> dict[str, Any]:
1859
+ return {
1860
+ "id": str(row.get("id") or ""),
1861
+ "client_kind": str(row.get("client_kind") or ""),
1862
+ "target_service": str(row.get("target_service") or ""),
1863
+ "method": str(row.get("method") or ""),
1864
+ "path": str(row.get("path") or ""),
1865
+ "path_template": str(row.get("path_template") or ""),
1866
+ "path_regex": str(row.get("path_regex") or ""),
1867
+ "member_fqn": str(row.get("member_fqn") or ""),
1868
+ "member_id": str(row.get("member_id") or ""),
1869
+ "microservice": str(row.get("microservice") or ""),
1870
+ "module": str(row.get("module") or ""),
1871
+ "filename": str(row.get("filename") or ""),
1872
+ "start_line": int(row.get("start_line") or 0),
1873
+ "end_line": int(row.get("end_line") or 0),
1874
+ "resolved": bool(row.get("resolved", True)),
1875
+ "source_layer": str(row.get("source_layer") or "builtin"),
1876
+ }
1877
+
1878
+ def list_clients(
1879
+ self,
1880
+ *,
1881
+ microservice: str | None = None,
1882
+ client_kind: str | None = None,
1883
+ target_service: str | None = None,
1884
+ path_prefix: str | None = None,
1885
+ method: str | None = None,
1886
+ limit: int = 100,
1887
+ ) -> list[dict[str, Any]]:
1888
+ lim = max(1, min(int(limit), 500))
1889
+ params: dict[str, Any] = {"lim": lim}
1890
+ preds: list[str] = []
1891
+ if microservice:
1892
+ params["microservice"] = microservice
1893
+ preds.append("c.microservice = $microservice")
1894
+ if client_kind:
1895
+ params["client_kind"] = client_kind
1896
+ preds.append("c.client_kind = $client_kind")
1897
+ if target_service:
1898
+ params["target_service"] = target_service
1899
+ preds.append("c.target_service = $target_service")
1900
+ if path_prefix:
1901
+ params["path_prefix"] = path_prefix
1902
+ preds.append("c.path STARTS WITH $path_prefix")
1903
+ if method is not None and method != "":
1904
+ params["method"] = method
1905
+ preds.append("c.method = $method")
1906
+ where = (" WHERE " + " AND ".join(preds)) if preds else ""
1907
+ q = (
1908
+ f"MATCH (c:Client){where} RETURN {self._CLIENT_RETURN} "
1909
+ f"ORDER BY c.microservice, c.client_kind, c.path, c.method, c.id LIMIT $lim"
1910
+ )
1911
+ return [self._row_to_client_dict(r) for r in self._rows(q, params)]
1912
+
1913
+ _PRODUCER_RETURN = (
1914
+ "p.id AS id, p.producer_kind AS producer_kind, p.topic AS topic, p.broker AS broker, "
1915
+ "p.direction AS direction, p.member_fqn AS member_fqn, p.member_id AS member_id, "
1916
+ "p.microservice AS microservice, p.module AS module, p.filename AS filename, "
1917
+ "p.start_line AS start_line, p.end_line AS end_line, p.resolved AS resolved, "
1918
+ "p.source_layer AS source_layer"
1919
+ )
1920
+
1921
+ @staticmethod
1922
+ def _row_to_producer_dict(row: dict[str, Any]) -> dict[str, Any]:
1923
+ return {
1924
+ "id": str(row.get("id") or ""),
1925
+ "producer_kind": str(row.get("producer_kind") or ""),
1926
+ "topic": str(row.get("topic") or ""),
1927
+ "broker": str(row.get("broker") or ""),
1928
+ "direction": str(row.get("direction") or ""),
1929
+ "member_fqn": str(row.get("member_fqn") or ""),
1930
+ "member_id": str(row.get("member_id") or ""),
1931
+ "microservice": str(row.get("microservice") or ""),
1932
+ "module": str(row.get("module") or ""),
1933
+ "filename": str(row.get("filename") or ""),
1934
+ "start_line": int(row.get("start_line") or 0),
1935
+ "end_line": int(row.get("end_line") or 0),
1936
+ "resolved": bool(row.get("resolved", True)),
1937
+ "source_layer": str(row.get("source_layer") or "builtin"),
1938
+ }
1939
+
1940
+ def list_producers(
1941
+ self,
1942
+ *,
1943
+ microservice: str | None = None,
1944
+ producer_kind: str | None = None,
1945
+ topic_prefix: str | None = None,
1946
+ limit: int = 100,
1947
+ ) -> list[dict[str, Any]]:
1948
+ lim = max(1, min(int(limit), 500))
1949
+ params: dict[str, Any] = {"lim": lim}
1950
+ preds: list[str] = []
1951
+ if microservice:
1952
+ params["microservice"] = microservice
1953
+ preds.append("p.microservice = $microservice")
1954
+ if producer_kind:
1955
+ params["producer_kind"] = producer_kind
1956
+ preds.append("p.producer_kind = $producer_kind")
1957
+ if topic_prefix:
1958
+ params["topic_prefix"] = topic_prefix
1959
+ preds.append("p.topic STARTS WITH $topic_prefix")
1960
+ where = (" WHERE " + " AND ".join(preds)) if preds else ""
1961
+ q = (
1962
+ f"MATCH (p:Producer){where} RETURN {self._PRODUCER_RETURN} "
1963
+ f"ORDER BY p.microservice, p.producer_kind, p.topic, p.id LIMIT $lim"
1964
+ )
1965
+ return [self._row_to_producer_dict(r) for r in self._rows(q, params)]
1966
+
1967
+ # ---- used by search_lancedb.graph_expand ----
1968
+
1969
+ def expand_fqns(self, fqns: list[str], *, depth: int = 1,
1970
+ edge_types: list[str] | None = None,
1971
+ direction: str = "both", limit: int = 200) -> list[str]:
1972
+ """Return neighbor FQNs (types only) for a batch of starting FQNs."""
1973
+ if not fqns or depth < 1:
1974
+ return []
1975
+ edges = edge_types or ["EXTENDS", "IMPLEMENTS", "INJECTS"]
1976
+ edge_pattern = "|".join(edges)
1977
+ if direction == "out":
1978
+ arrow_l, arrow_r = "-", "->"
1979
+ elif direction == "in":
1980
+ arrow_l, arrow_r = "<-", "-"
1981
+ else:
1982
+ arrow_l, arrow_r = "-", "-"
1983
+ q = (
1984
+ f"MATCH (root:Symbol) WHERE root.fqn IN $fqns "
1985
+ f"MATCH (root){arrow_l}[:{edge_pattern}*1..{int(depth)}]{arrow_r}(n:Symbol) "
1986
+ f"WHERE n.kind IN ['class','interface','enum','record','annotation'] AND n.resolved "
1987
+ f"RETURN DISTINCT n.fqn AS fqn LIMIT {int(limit)}"
1988
+ )
1989
+ return [r["fqn"] for r in self._rows(q, {"fqns": fqns}) if r.get("fqn")]