java-codebase-rag 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_v2.py ADDED
@@ -0,0 +1,1957 @@
1
+ """MCP V2 graph query surface (``search`` / ``find`` / ``describe`` / ``neighbors`` / ``resolve``).
2
+
3
+ Strict frame contract
4
+ ---------------------
5
+ NodeFilter is a typed predicate bag: each populated field maps to one stored graph
6
+ attribute for the selected kind; inapplicable fields fail loud with a teaching message.
7
+ The ``search`` tool's ``query`` parameter is the ranked-text carve-out; structured
8
+ prefix fields (``fqn_prefix``, ``path_prefix``, ``target_path_prefix``) reject ``*``
9
+ and ``?`` — see ``_validate_no_wildcards``.
10
+
11
+ Revisit trigger (``propose/completed/MCP-FILTER-FRAME-PROPOSE.md`` section 3.4.6)
12
+ --------------------------------------------------------------
13
+ If **three** legitimate issue-tracker workflows appear within **six months** of frame
14
+ lock where the strict frame has no clean analog under ``search``, deferred
15
+ ``resolve``, or documented multi-call patterns, reopen the frame for revision.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import os
22
+ import sys
23
+ from pathlib import Path
24
+ import threading
25
+ from typing import Annotated, Any, Literal, get_args
26
+
27
+ from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError, model_validator, validate_call
28
+ from sentence_transformers import SentenceTransformer
29
+
30
+ from index_common import SBERT_MODEL
31
+ from java_codebase_rag.config import resolved_sbert_model_for_process_env
32
+ from java_ontology import EDGE_SCHEMA, ResolveReason
33
+ from kuzu_queries import KuzuGraph, OVERRIDE_AXIS_COMPOSED_EDGE_TYPES
34
+ from mcp_hints import MCP_HINTS_FIELD_DESCRIPTION, generate_hints
35
+ from search_lancedb import TABLES, run_search
36
+
37
+ DeclarationSymbolKind = Literal["class", "interface", "enum", "record", "annotation", "method", "constructor"]
38
+
39
+ # Stored graph edge labels for one-hop neighbors. Composed DECLARES.* and OVERRIDDEN_BY.*
40
+ # dot-keys are separate ComposedEdgeType literals (2-hop traversal). Stored OVERRIDES is an EdgeType.
41
+ EdgeType = Literal[
42
+ "EXTENDS",
43
+ "IMPLEMENTS",
44
+ "INJECTS",
45
+ "OVERRIDES",
46
+ "DECLARES",
47
+ "DECLARES_CLIENT",
48
+ "DECLARES_PRODUCER",
49
+ "CALLS",
50
+ "EXPOSES",
51
+ "HTTP_CALLS",
52
+ "ASYNC_CALLS",
53
+ ]
54
+
55
+ ComposedEdgeType = Literal[
56
+ "DECLARES.DECLARES_CLIENT",
57
+ "DECLARES.DECLARES_PRODUCER",
58
+ "DECLARES.EXPOSES",
59
+ "OVERRIDDEN_BY",
60
+ "OVERRIDDEN_BY.DECLARES_CLIENT",
61
+ "OVERRIDDEN_BY.DECLARES_PRODUCER",
62
+ "OVERRIDDEN_BY.EXPOSES",
63
+ ]
64
+
65
+ NeighborEdgeType = EdgeType | ComposedEdgeType
66
+
67
+ _COMPOSED_EDGE_TYPES = frozenset(get_args(ComposedEdgeType))
68
+ _MEMBER_COMPOSED_EDGE_TYPES = frozenset(
69
+ k for k in _COMPOSED_EDGE_TYPES if k.startswith("DECLARES.")
70
+ )
71
+ _OVERRIDE_COMPOSED_EDGE_TYPES = OVERRIDE_AXIS_COMPOSED_EDGE_TYPES
72
+
73
+ _NEIGHBOR_EDGE_TYPES_ADAPTER = TypeAdapter(
74
+ Annotated[
75
+ list[NeighborEdgeType],
76
+ Field(min_length=1, description="At least one graph edge label or DECLARES.* dot-key"),
77
+ ]
78
+ )
79
+
80
+ _st_lock = threading.Lock()
81
+ _st_model: SentenceTransformer | None = None
82
+
83
+ _TYPE_SYMBOL_KINDS_FOR_EDGE_ROLLUP = frozenset(
84
+ {"class", "interface", "enum", "record", "annotation"}
85
+ )
86
+
87
+ _METHOD_SYMBOL_KINDS_FOR_OVERRIDE_ROLLUP = frozenset({"method"})
88
+
89
+ _fail_loud_counts: dict[str, int] = {}
90
+ _fail_loud_lock = threading.Lock()
91
+
92
+
93
+ def _log_fail_loud(category: str) -> None:
94
+ """Increment process-local fail-loud counter and emit one stderr line (PR-FRAME-3)."""
95
+ with _fail_loud_lock:
96
+ _fail_loud_counts[category] = _fail_loud_counts.get(category, 0) + 1
97
+ n = _fail_loud_counts[category]
98
+ print(f"[filter-frame] fail-loud category={category} count={n}", file=sys.stderr, flush=True)
99
+
100
+
101
+ def filter_frame_counters() -> dict[str, int]:
102
+ """Snapshot of fail-loud counts (tests / local diagnostics; not an MCP tool)."""
103
+ with _fail_loud_lock:
104
+ return dict(_fail_loud_counts)
105
+
106
+
107
+ def _get_sentence_transformer(model_name: str, device: str | None) -> SentenceTransformer:
108
+ global _st_model
109
+ with _st_lock:
110
+ if _st_model is None:
111
+ _st_model = SentenceTransformer(
112
+ model_name,
113
+ device=device,
114
+ trust_remote_code=True,
115
+ )
116
+ return _st_model
117
+
118
+
119
+ class NodeFilter(BaseModel):
120
+ model_config = ConfigDict(extra="forbid")
121
+
122
+ microservice: str | None = None
123
+ module: str | None = None
124
+ source_layer: str | None = None
125
+ role: str | None = None
126
+ exclude_roles: list[str] | None = None
127
+ annotation: str | None = None
128
+ capability: str | None = None
129
+ fqn_prefix: str | None = None
130
+ symbol_kind: DeclarationSymbolKind | None = None
131
+ symbol_kinds: list[DeclarationSymbolKind] | None = None
132
+ http_method: str | None = None
133
+ path_prefix: str | None = None
134
+ framework: str | None = None
135
+ client_kind: str | None = None
136
+ target_service: str | None = None
137
+ target_path_prefix: str | None = None
138
+ producer_kind: str | None = None
139
+ topic_prefix: str | None = None
140
+
141
+
142
+ class EdgeFilter(BaseModel):
143
+ model_config = ConfigDict(extra="forbid")
144
+
145
+ min_confidence: float | None = None
146
+ exclude_strategies: list[str] | None = None
147
+ include_strategies: list[str] | None = None
148
+ callee_declaring_role: str | None = None
149
+ callee_declaring_roles: list[str] | None = None
150
+ exclude_callee_declaring_roles: list[str] | None = None
151
+
152
+ @model_validator(mode="after")
153
+ def _strategy_axes_mutually_exclusive(self) -> EdgeFilter:
154
+ has_include = bool(self.include_strategies)
155
+ has_exclude = bool(self.exclude_strategies)
156
+ if has_include and has_exclude:
157
+ raise ValueError("include_strategies and exclude_strategies are mutually exclusive")
158
+ return self
159
+
160
+ @model_validator(mode="after")
161
+ def _role_axes_mutually_exclusive(self) -> EdgeFilter:
162
+ role_axes = (
163
+ self.callee_declaring_role is not None,
164
+ bool(self.callee_declaring_roles),
165
+ bool(self.exclude_callee_declaring_roles),
166
+ )
167
+ if sum(role_axes) > 1:
168
+ raise ValueError(
169
+ "callee_declaring_role, callee_declaring_roles, and "
170
+ "exclude_callee_declaring_roles are mutually exclusive"
171
+ )
172
+ return self
173
+
174
+
175
+ _NODEFILTER_FIELD_ORDER: tuple[str, ...] = tuple(NodeFilter.model_fields.keys())
176
+ _EDGEFILTER_FIELD_ORDER: tuple[str, ...] = tuple(EdgeFilter.model_fields.keys())
177
+
178
+ # Populated EdgeFilter field -> EDGE_SCHEMA attribute name used in Cypher pushdown.
179
+ _EDGEFILTER_FIELD_TO_ATTR: dict[str, str] = {
180
+ "min_confidence": "confidence",
181
+ "exclude_strategies": "strategy",
182
+ "include_strategies": "strategy",
183
+ "callee_declaring_role": "callee_declaring_role",
184
+ "callee_declaring_roles": "callee_declaring_role",
185
+ "exclude_callee_declaring_roles": "callee_declaring_role",
186
+ }
187
+
188
+ _ROLE_FILTER_OTHER_FALLBACK_VALUES = frozenset({"SERVICE", "REPOSITORY"})
189
+
190
+ _NODEFILTER_APPLICABLE_FIELDS: dict[Literal["symbol", "route", "client", "producer"], tuple[str, ...]] = {
191
+ "symbol": (
192
+ "microservice",
193
+ "module",
194
+ "role",
195
+ "exclude_roles",
196
+ "annotation",
197
+ "capability",
198
+ "fqn_prefix",
199
+ "symbol_kind",
200
+ "symbol_kinds",
201
+ ),
202
+ "route": (
203
+ "microservice",
204
+ "module",
205
+ "http_method",
206
+ "path_prefix",
207
+ "framework",
208
+ ),
209
+ "client": (
210
+ "microservice",
211
+ "module",
212
+ "source_layer",
213
+ "client_kind",
214
+ "target_service",
215
+ "target_path_prefix",
216
+ "http_method",
217
+ ),
218
+ "producer": (
219
+ "microservice",
220
+ "module",
221
+ "source_layer",
222
+ "producer_kind",
223
+ "topic_prefix",
224
+ ),
225
+ }
226
+
227
+
228
+ def _ordered_nodefilter_fields(field_names: set[str]) -> list[str]:
229
+ return [name for name in _NODEFILTER_FIELD_ORDER if name in field_names]
230
+
231
+
232
+ def _populated_nodefilter_fields(nf: NodeFilter) -> set[str]:
233
+ populated: set[str] = set()
234
+ for field_name in _NODEFILTER_FIELD_ORDER:
235
+ value = getattr(nf, field_name)
236
+ if value is None:
237
+ continue
238
+ if isinstance(value, list) and not value:
239
+ continue
240
+ populated.add(field_name)
241
+ return populated
242
+
243
+
244
+ def _nodefilter_inapplicable_fields(
245
+ kind: Literal["symbol", "route", "client", "producer"], nf: NodeFilter,
246
+ ) -> list[str]:
247
+ populated = _populated_nodefilter_fields(nf)
248
+ applicable = set(_NODEFILTER_APPLICABLE_FIELDS[kind])
249
+ return _ordered_nodefilter_fields(populated - applicable)
250
+
251
+
252
+ def _nodefilter_applicability_error(
253
+ kind: Literal["symbol", "route", "client", "producer"], nf: NodeFilter,
254
+ ) -> str | None:
255
+ inapplicable = _nodefilter_inapplicable_fields(kind, nf)
256
+ if not inapplicable:
257
+ return None
258
+ applicable = ", ".join(_NODEFILTER_APPLICABLE_FIELDS[kind])
259
+ bad = ", ".join(inapplicable)
260
+ return (
261
+ f"Invalid filter for kind='{kind}': populated field(s) not applicable: [{bad}]. "
262
+ f"Applicable field(s): [{applicable}]"
263
+ )
264
+
265
+
266
+ def _validate_no_wildcards(nf: NodeFilter) -> str | None:
267
+ """Reject ``*`` / ``?`` in prefix-match fields; wildcards belong in ``search(query=…)``."""
268
+ for field_name in ("fqn_prefix", "path_prefix", "target_path_prefix"):
269
+ val = getattr(nf, field_name)
270
+ if val is None:
271
+ continue
272
+ if "*" in val or "?" in val:
273
+ return (
274
+ f"Wildcards (* and ?) are not supported in structured filter field `{field_name}`; "
275
+ "use search(query=...) for ranked text match instead."
276
+ )
277
+ return None
278
+
279
+
280
+ def _filter_validation_error_message(exc: ValidationError) -> str:
281
+ items: list[str] = []
282
+ for err in exc.errors():
283
+ loc = ".".join(str(part) for part in err.get("loc", ()))
284
+ msg = str(err.get("msg") or "invalid value")
285
+ if loc:
286
+ items.append(f"{loc}: {msg}")
287
+ else:
288
+ items.append(msg)
289
+ details = "; ".join(items) if items else str(exc)
290
+ return f"Invalid filter: {details}"
291
+
292
+
293
+ def _populated_edgefilter_fields(ef: EdgeFilter) -> set[str]:
294
+ populated: set[str] = set()
295
+ for field_name in _EDGEFILTER_FIELD_ORDER:
296
+ value = getattr(ef, field_name)
297
+ if value is None:
298
+ continue
299
+ if isinstance(value, list) and not value:
300
+ continue
301
+ populated.add(field_name)
302
+ return populated
303
+
304
+
305
+ def _edge_schema_attr_names(edge_type: str) -> set[str]:
306
+ spec = EDGE_SCHEMA.get(edge_type)
307
+ if spec is None:
308
+ return set()
309
+ return {attr.name for attr in spec.attrs}
310
+
311
+
312
+ def _edgefilter_applicability_error(edge_types: list[str], ef: EdgeFilter) -> str | None:
313
+ populated = _populated_edgefilter_fields(ef)
314
+ if not populated:
315
+ return None
316
+ flat_types = [et for et in edge_types if et not in _COMPOSED_EDGE_TYPES]
317
+ composed = [et for et in edge_types if et in _COMPOSED_EDGE_TYPES]
318
+ if composed or flat_types != ["CALLS"]:
319
+ parts: list[str] = []
320
+ if flat_types != ["CALLS"]:
321
+ parts.append(f"stored labels {flat_types!r}")
322
+ if composed:
323
+ parts.append(f"composed keys {composed!r}")
324
+ detail = " and ".join(parts) if parts else "requested edge_types"
325
+ return (
326
+ f"edge_filter requires edge_types=['CALLS'] only; {detail} is not supported — "
327
+ "split into separate neighbors calls"
328
+ )
329
+ for edge_type in flat_types:
330
+ available = _edge_schema_attr_names(edge_type)
331
+ for field_name in _EDGEFILTER_FIELD_ORDER:
332
+ if field_name not in populated:
333
+ continue
334
+ attr = _EDGEFILTER_FIELD_TO_ATTR[field_name]
335
+ if attr not in available:
336
+ return (
337
+ f"{attr} is not on {edge_type}; restrict edge_types to ['CALLS'] "
338
+ "or split into two neighbors_v2 calls"
339
+ )
340
+ return None
341
+
342
+
343
+ def _coerce_edge_filter(
344
+ value: EdgeFilter | dict[str, Any] | str | None,
345
+ ) -> EdgeFilter | dict[str, Any] | None:
346
+ """Normalize MCP tool input: weak clients sometimes pass JSON-encoded strings."""
347
+ if value is None or isinstance(value, EdgeFilter):
348
+ return value
349
+ if isinstance(value, str):
350
+ s = value.strip()
351
+ if not s:
352
+ return None
353
+ try:
354
+ decoded = json.loads(s)
355
+ except json.JSONDecodeError as exc:
356
+ raise ValueError(f"edge_filter must be a JSON object; invalid JSON: {exc.msg}") from exc
357
+ if decoded is None:
358
+ return None
359
+ if not isinstance(decoded, dict):
360
+ raise ValueError(
361
+ f"edge_filter must decode to a JSON object, got {type(decoded).__name__}"
362
+ )
363
+ return decoded
364
+ return value
365
+
366
+
367
+ def _coerce_filter(
368
+ value: NodeFilter | dict[str, Any] | str | None,
369
+ ) -> NodeFilter | dict[str, Any] | None:
370
+ """Normalize MCP tool input: weak clients sometimes pass JSON-encoded strings."""
371
+ if value is None or isinstance(value, NodeFilter):
372
+ return value
373
+ if isinstance(value, str):
374
+ s = value.strip()
375
+ if not s:
376
+ return None
377
+ try:
378
+ decoded = json.loads(s)
379
+ except json.JSONDecodeError as exc:
380
+ raise ValueError(f"filter must be a JSON object; invalid JSON: {exc.msg}") from exc
381
+ if decoded is None:
382
+ return None
383
+ if not isinstance(decoded, dict):
384
+ raise ValueError(f"filter must decode to a JSON object, got {type(decoded).__name__}")
385
+ return decoded
386
+ return value
387
+
388
+
389
+ class SearchHit(BaseModel):
390
+ chunk_id: str
391
+ symbol_id: str | None = None
392
+ fqn: str | None = None
393
+ score: float
394
+ snippet: str
395
+ microservice: str | None = None
396
+ module: str | None = None
397
+ role: str | None = None
398
+
399
+
400
+ class NodeRef(BaseModel):
401
+ id: str
402
+ kind: Literal["symbol", "route", "client", "producer", "unresolved_call_site"]
403
+ fqn: str
404
+ symbol_kind: str | None = None
405
+ microservice: str | None = None
406
+ module: str | None = None
407
+ role: str | None = None
408
+
409
+
410
+ class NodeRecord(BaseModel):
411
+ id: str
412
+ kind: Literal["symbol", "route", "client", "producer"]
413
+ fqn: str
414
+ data: dict[str, Any] = Field(default_factory=dict)
415
+ edge_summary: dict[str, dict[str, int]] | None = Field(
416
+ default=None,
417
+ description=(
418
+ "Per graph edge label, in/out incident counts. For type Symbols (class, interface, "
419
+ "enum, record, annotation), may also include composed dot-keys "
420
+ "`DECLARES.DECLARES_CLIENT`, `DECLARES.DECLARES_PRODUCER`, and `DECLARES.EXPOSES`: 2-hop summaries "
421
+ "(DECLARES to member, then that edge) — edge-row counts; navigable via neighbors for type "
422
+ "Symbol origins (`direction=\"out\"` only). For non-static method Symbols, may include "
423
+ "override-axis virtual keys `OVERRIDDEN_BY`, `OVERRIDDEN_BY.DECLARES_CLIENT`, "
424
+ "`OVERRIDDEN_BY.DECLARES_PRODUCER`, `OVERRIDDEN_BY.EXPOSES` (stored `[:OVERRIDES]` "
425
+ "dispatch hop, then terminal edges; navigable via neighbors for method Symbol origins, "
426
+ "`direction=\"out\"` only; composed results include `via_id` in attrs). Plus an "
427
+ "`OVERRIDES` map entry that **merges** stored `[:OVERRIDES]` in/out counts with the "
428
+ "describe-time dispatch-up rollup (per direction `max`, so inbound stored overrides "
429
+ "are not dropped). The stored relationship label `OVERRIDES` **is** also a valid "
430
+ "EdgeType for one-hop neighbors (`direction=\"in\"` from declaration toward overriders)."
431
+ ),
432
+ )
433
+
434
+
435
+ class Edge(BaseModel):
436
+ origin_id: str
437
+ edge_type: str
438
+ direction: Literal["in", "out"]
439
+ other: NodeRef
440
+ attrs: dict[str, Any] = Field(default_factory=dict)
441
+
442
+
443
+ class SearchOutput(BaseModel):
444
+ success: bool
445
+ results: list[SearchHit] = Field(default_factory=list)
446
+ message: str | None = None
447
+ limit: int | None = Field(
448
+ default=None,
449
+ description="Echoed from the request — the page size the server applied. None on success=False.",
450
+ )
451
+ offset: int | None = Field(
452
+ default=None,
453
+ description="Echoed from the request — the page offset the server applied. None on success=False.",
454
+ )
455
+ hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
456
+
457
+
458
+ class FindOutput(BaseModel):
459
+ success: bool
460
+ results: list[NodeRef] = Field(default_factory=list)
461
+ message: str | None = None
462
+ limit: int | None = Field(
463
+ default=None,
464
+ description="Echoed from the request — the page size the server applied. None on success=False.",
465
+ )
466
+ offset: int | None = Field(
467
+ default=None,
468
+ description="Echoed from the request — the page offset the server applied. None on success=False.",
469
+ )
470
+ hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
471
+
472
+
473
+ class DescribeOutput(BaseModel):
474
+ success: bool
475
+ record: NodeRecord | None = None
476
+ message: str | None = None
477
+ hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
478
+
479
+
480
+ class NeighborsOutput(BaseModel):
481
+ success: bool
482
+ results: list[Edge] = Field(default_factory=list)
483
+ message: str | None = None
484
+ requested_edge_types: list[str] = Field(
485
+ default_factory=list,
486
+ description="Echo of neighbors(edge_types=...) from the request; empty when success=False.",
487
+ )
488
+ hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
489
+
490
+
491
+ ResolveStatus = Literal["one", "many", "none"]
492
+
493
+ _RESOLVE_CANDIDATE_CAP = 10
494
+
495
+ _RESOLVE_REASON_PRIORITY: dict[ResolveReason, int] = {
496
+ "exact_id": 0,
497
+ "exact_fqn": 1,
498
+ "route_method_path": 1,
499
+ "client_target_path": 1,
500
+ "producer_topic_prefix": 1,
501
+ "fqn_suffix": 2,
502
+ "route_template": 2,
503
+ "short_name": 3,
504
+ "client_target": 3,
505
+ "producer_topic": 3,
506
+ }
507
+
508
+ _SYMBOL_RESOLVE_RETURN = (
509
+ "s.id AS id, s.fqn AS fqn, s.microservice AS microservice, "
510
+ "s.module AS module, s.role AS role, s.kind AS symbol_kind"
511
+ )
512
+
513
+ _ROUTE_RESOLVE_RETURN = (
514
+ "r.id AS id, r.kind AS kind, r.framework AS framework, r.method AS method, "
515
+ "r.path AS path, r.path_template AS path_template, r.path_regex AS path_regex, "
516
+ "r.topic AS topic, r.broker AS broker, r.feign_name AS feign_name, r.feign_url AS feign_url, "
517
+ "r.microservice AS microservice, r.module AS module, r.filename AS filename, "
518
+ "r.start_line AS start_line, r.end_line AS end_line, r.resolved AS resolved"
519
+ )
520
+
521
+ _CLIENT_RESOLVE_RETURN = (
522
+ "c.id AS id, c.client_kind AS client_kind, c.target_service AS target_service, "
523
+ "c.method AS method, c.path AS path, c.path_template AS path_template, "
524
+ "c.path_regex AS path_regex, c.member_fqn AS member_fqn, c.member_id AS member_id, "
525
+ "c.microservice AS microservice, c.module AS module, c.filename AS filename, "
526
+ "c.start_line AS start_line, c.end_line AS end_line, c.resolved AS resolved, "
527
+ "c.source_layer AS source_layer"
528
+ )
529
+
530
+ _PRODUCER_RESOLVE_RETURN = (
531
+ "p.id AS id, p.producer_kind AS producer_kind, p.topic AS topic, p.broker AS broker, "
532
+ "p.direction AS direction, p.member_fqn AS member_fqn, p.member_id AS member_id, "
533
+ "p.microservice AS microservice, p.module AS module, p.filename AS filename, "
534
+ "p.start_line AS start_line, p.end_line AS end_line, p.resolved AS resolved, "
535
+ "p.source_layer AS source_layer"
536
+ )
537
+
538
+ _RESOLVE_PRE_DEDUP_LIMIT = 50
539
+
540
+
541
+ class ResolveCandidate(BaseModel):
542
+ model_config = ConfigDict(extra="forbid")
543
+
544
+ node: NodeRef
545
+ score: float
546
+ reason: ResolveReason
547
+
548
+
549
+ class ResolveOutput(BaseModel):
550
+ model_config = ConfigDict(extra="forbid")
551
+
552
+ success: bool
553
+ status: ResolveStatus
554
+ node: NodeRef | None = None
555
+ candidates: list[ResolveCandidate] = Field(default_factory=list)
556
+ message: str | None = None
557
+ resolved_identifier: str | None = None
558
+ hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
559
+
560
+
561
+ def _node_kind_from_id(
562
+ id_str: str,
563
+ ) -> Literal["symbol", "route", "client", "producer", "unresolved_call_site"]:
564
+ if id_str.startswith("ucs:"):
565
+ return "unresolved_call_site"
566
+ if id_str.startswith("sym:"):
567
+ return "symbol"
568
+ if id_str.startswith("route:") or id_str.startswith("r:"):
569
+ return "route"
570
+ if id_str.startswith("client:") or id_str.startswith("c:"):
571
+ return "client"
572
+ if id_str.startswith("producer:") or id_str.startswith("p:"):
573
+ return "producer"
574
+ raise ValueError(f"Unknown id prefix for `{id_str}`")
575
+
576
+
577
+ def _resolve_node_kind(
578
+ graph: KuzuGraph,
579
+ node_id: str,
580
+ ) -> Literal["symbol", "route", "client", "producer", "unresolved_call_site"]:
581
+ try:
582
+ return _node_kind_from_id(node_id)
583
+ except ValueError:
584
+ pass
585
+ if graph._rows("MATCH (n:Symbol) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001
586
+ return "symbol"
587
+ if graph._rows("MATCH (n:Route) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001
588
+ return "route"
589
+ if graph._rows("MATCH (n:Client) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001
590
+ return "client"
591
+ if graph._rows("MATCH (n:Producer) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001
592
+ return "producer"
593
+ raise ValueError(f"Unknown id prefix for `{node_id}`")
594
+
595
+
596
+ def _chunk_id_from_row(row: dict[str, Any]) -> str:
597
+ filename = str(row.get("filename") or "")
598
+ start = row.get("start") or {}
599
+ end = row.get("end") or {}
600
+ sb = int(start.get("byte_offset") or 0) if isinstance(start, dict) else 0
601
+ eb = int(end.get("byte_offset") or 0) if isinstance(end, dict) else 0
602
+ return f"{filename}:{sb}:{eb}"
603
+
604
+
605
+ def _row_to_search_hit(row: dict[str, Any]) -> SearchHit:
606
+ score = float(row.get("_rrf_score") or row.get("_score") or 0.0)
607
+ return SearchHit(
608
+ chunk_id=_chunk_id_from_row(row),
609
+ symbol_id=_chunk_to_symbol_id(row),
610
+ fqn=str(row.get("primary_type_fqn")) if row.get("primary_type_fqn") else None,
611
+ score=score,
612
+ snippet=str(row.get("text") or ""),
613
+ microservice=str(row.get("microservice")) if row.get("microservice") else None,
614
+ module=str(row.get("module")) if row.get("module") else None,
615
+ role=str(row.get("role")) if row.get("role") else None,
616
+ )
617
+
618
+
619
+ def _chunk_to_symbol_id(chunk_row: dict[str, Any]) -> str | None:
620
+ symbol_id = chunk_row.get("symbol_id")
621
+ if symbol_id:
622
+ return str(symbol_id)
623
+ meta = chunk_row.get("metadata")
624
+ if isinstance(meta, str):
625
+ try:
626
+ parsed = json.loads(meta)
627
+ if isinstance(parsed, dict):
628
+ meta = parsed
629
+ except Exception:
630
+ meta = None
631
+ if isinstance(meta, dict):
632
+ nested = meta.get("symbol_id")
633
+ if nested:
634
+ return str(nested)
635
+ return None
636
+
637
+
638
+ def _symbol_where_from_filter(f: NodeFilter) -> tuple[str, dict[str, Any]]:
639
+ preds: list[str] = []
640
+ params: dict[str, Any] = {}
641
+ if f.microservice:
642
+ preds.append("s.microservice = $microservice")
643
+ params["microservice"] = f.microservice
644
+ if f.module:
645
+ preds.append("s.module = $module")
646
+ params["module"] = f.module
647
+ if f.role:
648
+ preds.append("s.role = $role")
649
+ params["role"] = f.role
650
+ if f.exclude_roles:
651
+ preds.append("NOT s.role IN $exclude_roles")
652
+ params["exclude_roles"] = list(f.exclude_roles)
653
+ if f.annotation:
654
+ preds.append("list_contains(s.annotations, $annotation)")
655
+ params["annotation"] = f.annotation
656
+ if f.capability:
657
+ preds.append("$capability IN s.capabilities")
658
+ params["capability"] = f.capability
659
+ if f.fqn_prefix:
660
+ preds.append("s.fqn STARTS WITH $fqn_prefix")
661
+ params["fqn_prefix"] = f.fqn_prefix
662
+ if f.symbol_kind:
663
+ preds.append("s.kind = $symbol_kind")
664
+ params["symbol_kind"] = f.symbol_kind
665
+ if f.symbol_kinds:
666
+ preds.append("s.kind IN $symbol_kinds")
667
+ params["symbol_kinds"] = list(f.symbol_kinds)
668
+ where = f"WHERE {' AND '.join(preds)}" if preds else ""
669
+ return where, params
670
+
671
+
672
+ def _node_ref_from_row(kind: Literal["symbol", "route", "client", "producer"], row: dict[str, Any]) -> NodeRef:
673
+ symbol_kind: str | None = None
674
+ if kind == "symbol":
675
+ fqn = str(row.get("fqn") or "")
676
+ role = str(row.get("role") or "") or None
677
+ symbol_kind_val = str(row.get("symbol_kind") or row.get("kind") or "").strip()
678
+ symbol_kind = symbol_kind_val or None
679
+ elif kind == "route":
680
+ method = str(row.get("method") or "")
681
+ path = str(row.get("path_template") or row.get("path") or "")
682
+ fqn = f"{method} {path}".strip()
683
+ role = None
684
+ elif kind == "client":
685
+ method = str(row.get("method") or "")
686
+ target = str(row.get("target_service") or "")
687
+ path = str(row.get("path_template") or row.get("path") or "")
688
+ fqn = f"{target} {method} {path}".strip()
689
+ role = None
690
+ else:
691
+ topic = str(row.get("topic") or "")
692
+ broker = str(row.get("broker") or "")
693
+ fqn = f"{topic} {broker}".strip()
694
+ role = None
695
+ return NodeRef(
696
+ id=str(row.get("id") or ""),
697
+ kind=kind,
698
+ fqn=fqn,
699
+ symbol_kind=symbol_kind,
700
+ microservice=str(row.get("microservice") or "") or None,
701
+ module=str(row.get("module") or "") or None,
702
+ role=role,
703
+ )
704
+
705
+
706
+ def _load_node_record(
707
+ graph: KuzuGraph, node_id: str, kind: Literal["symbol", "route", "client", "producer"],
708
+ ) -> dict[str, Any] | None:
709
+ if kind == "symbol":
710
+ projection = (
711
+ "n.id AS id, n.kind AS kind, n.name AS name, n.fqn AS fqn, n.package AS package, "
712
+ "n.module AS module, n.microservice AS microservice, n.filename AS filename, "
713
+ "n.start_line AS start_line, n.end_line AS end_line, n.start_byte AS start_byte, "
714
+ "n.end_byte AS end_byte, n.modifiers AS modifiers, n.annotations AS annotations, "
715
+ "n.capabilities AS capabilities, n.role AS role, n.signature AS signature, "
716
+ "n.parent_id AS parent_id, n.resolved AS resolved"
717
+ )
718
+ label = "Symbol"
719
+ elif kind == "route":
720
+ projection = (
721
+ "n.id AS id, n.kind AS kind, n.framework AS framework, n.method AS method, n.path AS path, "
722
+ "n.path_template AS path_template, n.path_regex AS path_regex, n.topic AS topic, "
723
+ "n.broker AS broker, n.feign_name AS feign_name, n.feign_url AS feign_url, "
724
+ "n.microservice AS microservice, n.module AS module, n.filename AS filename, "
725
+ "n.start_line AS start_line, n.end_line AS end_line, n.resolved AS resolved"
726
+ )
727
+ label = "Route"
728
+ elif kind == "client":
729
+ projection = (
730
+ "n.id AS id, n.client_kind AS client_kind, n.target_service AS target_service, "
731
+ "n.method AS method, n.path AS path, n.path_template AS path_template, "
732
+ "n.path_regex AS path_regex, n.member_fqn AS member_fqn, n.member_id AS member_id, "
733
+ "n.microservice AS microservice, n.module AS module, n.filename AS filename, "
734
+ "n.start_line AS start_line, n.end_line AS end_line, n.resolved AS resolved, "
735
+ "n.source_layer AS source_layer"
736
+ )
737
+ label = "Client"
738
+ else:
739
+ projection = (
740
+ "n.id AS id, n.producer_kind AS producer_kind, n.topic AS topic, n.broker AS broker, "
741
+ "n.direction AS direction, n.member_fqn AS member_fqn, n.member_id AS member_id, "
742
+ "n.microservice AS microservice, n.module AS module, n.filename AS filename, "
743
+ "n.start_line AS start_line, n.end_line AS end_line, n.resolved AS resolved, "
744
+ "n.source_layer AS source_layer"
745
+ )
746
+ label = "Producer"
747
+ rows = graph._rows(f"MATCH (n:{label}) WHERE n.id = $id RETURN {projection}", {"id": node_id}) # noqa: SLF001
748
+ if not rows:
749
+ return None
750
+ return rows[0]
751
+
752
+
753
+ def _incident_counts(cell: dict[str, int] | None) -> dict[str, int]:
754
+ if not cell:
755
+ return {"in": 0, "out": 0}
756
+ return {"in": int(cell.get("in", 0)), "out": int(cell.get("out", 0))}
757
+
758
+
759
+ def _merge_overrides_edge_summary(
760
+ stored_before_rollups: dict[str, int],
761
+ summary_after_rollups: dict[str, dict[str, int]],
762
+ ) -> None:
763
+ """Reconcile `OVERRIDES` with `override_axis_rollup_for` without clobbering stored `in`.
764
+
765
+ Rollup rows reuse the ``OVERRIDES`` key for dispatch-up counts only (``in`` is always
766
+ zero there). Stored ``[:OVERRIDES]`` edges contribute real ``in``/``out`` from Kuzu;
767
+ merge per direction with ``max`` so inbound override edges stay visible.
768
+ """
769
+ roll = _incident_counts(summary_after_rollups.get("OVERRIDES"))
770
+ if "OVERRIDES" not in summary_after_rollups and not any(stored_before_rollups.values()):
771
+ return
772
+ merged_in = max(stored_before_rollups["in"], roll["in"])
773
+ merged_out = max(stored_before_rollups["out"], roll["out"])
774
+ if merged_in == 0 and merged_out == 0:
775
+ summary_after_rollups.pop("OVERRIDES", None)
776
+ else:
777
+ summary_after_rollups["OVERRIDES"] = {"in": merged_in, "out": merged_out}
778
+
779
+
780
+ def _edge_summary_for_node(
781
+ graph: KuzuGraph, node_id: str, *, kind: str, row: dict[str, Any]
782
+ ) -> dict[str, dict[str, int]]:
783
+ summary = dict(graph.edge_counts_for(node_id))
784
+ sym_kind = str(row.get("kind") or "")
785
+ if kind == "symbol" and sym_kind in _TYPE_SYMBOL_KINDS_FOR_EDGE_ROLLUP:
786
+ summary.update(graph.member_edge_rollup_for(node_id))
787
+ elif kind == "symbol" and sym_kind in _METHOD_SYMBOL_KINDS_FOR_OVERRIDE_ROLLUP:
788
+ stored_overrides = _incident_counts(summary.get("OVERRIDES"))
789
+ summary.update(graph.override_axis_rollup_for(node_id))
790
+ _merge_overrides_edge_summary(stored_overrides, summary)
791
+ return summary
792
+
793
+
794
+ def _node_matches_filter(
795
+ kind: Literal["symbol", "route", "client", "producer"], row: dict[str, Any], f: NodeFilter | None,
796
+ ) -> bool:
797
+ if f is None:
798
+ return True
799
+ if f.microservice and str(row.get("microservice") or "") != f.microservice:
800
+ return False
801
+ if f.module and str(row.get("module") or "") != f.module:
802
+ return False
803
+ if kind in ("client", "producer") and f.source_layer and str(row.get("source_layer") or "") != f.source_layer:
804
+ return False
805
+ if kind == "symbol":
806
+ role = str(row.get("role") or "")
807
+ fqn_val = str(row.get("fqn") or row.get("primary_type_fqn") or "")
808
+ symbol_kind_val = str(row.get("kind") or row.get("symbol_kind") or "")
809
+ if f.role and role != f.role:
810
+ return False
811
+ if f.exclude_roles and role in set(f.exclude_roles):
812
+ return False
813
+ if f.annotation and f.annotation not in list(row.get("annotations") or []):
814
+ return False
815
+ if f.capability and f.capability not in list(row.get("capabilities") or []):
816
+ return False
817
+ if f.fqn_prefix and not fqn_val.startswith(f.fqn_prefix):
818
+ return False
819
+ if f.symbol_kind and symbol_kind_val != f.symbol_kind:
820
+ return False
821
+ if f.symbol_kinds and symbol_kind_val not in set(f.symbol_kinds):
822
+ return False
823
+ elif kind == "route":
824
+ if f.http_method and str(row.get("method") or "") != f.http_method:
825
+ return False
826
+ if f.path_prefix:
827
+ path = str(row.get("path") or "")
828
+ if not path.startswith(f.path_prefix):
829
+ return False
830
+ if f.framework and str(row.get("framework") or "") != f.framework:
831
+ return False
832
+ elif kind == "client":
833
+ if f.client_kind and str(row.get("client_kind") or "") != f.client_kind:
834
+ return False
835
+ if f.target_service and str(row.get("target_service") or "") != f.target_service:
836
+ return False
837
+ if f.target_path_prefix:
838
+ path = str(row.get("path") or "")
839
+ if not path.startswith(f.target_path_prefix):
840
+ return False
841
+ if f.http_method and str(row.get("method") or "") != f.http_method:
842
+ return False
843
+ else:
844
+ if f.producer_kind and str(row.get("producer_kind") or "") != f.producer_kind:
845
+ return False
846
+ if f.topic_prefix:
847
+ topic = str(row.get("topic") or "")
848
+ if not topic.startswith(f.topic_prefix):
849
+ return False
850
+ return True
851
+
852
+
853
+ def search_v2(
854
+ query: str,
855
+ table: str = "java",
856
+ hybrid: bool = False,
857
+ limit: int = 5,
858
+ offset: int = 0,
859
+ path_contains: str | None = None,
860
+ filter: NodeFilter | dict[str, Any] | str | None = None,
861
+ graph: KuzuGraph | None = None,
862
+ ) -> SearchOutput:
863
+ try:
864
+ raw_filter = _coerce_filter(filter)
865
+ try:
866
+ nf = (
867
+ NodeFilter.model_validate(raw_filter)
868
+ if raw_filter is not None and not isinstance(raw_filter, NodeFilter)
869
+ else raw_filter
870
+ )
871
+ except ValidationError as exc:
872
+ _log_fail_loud("unknown_key")
873
+ return SearchOutput(
874
+ success=False,
875
+ message=_filter_validation_error_message(exc),
876
+ hints=[],
877
+ limit=None,
878
+ offset=None,
879
+ )
880
+ if nf and (err := _nodefilter_applicability_error("symbol", nf)):
881
+ _log_fail_loud("applicability")
882
+ return SearchOutput(success=False, message=err, hints=[], limit=None, offset=None)
883
+ if nf and (err := _validate_no_wildcards(nf)):
884
+ _log_fail_loud("wildcard")
885
+ return SearchOutput(success=False, message=err, hints=[], limit=None, offset=None)
886
+ model_name = resolved_sbert_model_for_process_env(SBERT_MODEL)
887
+ device = os.environ.get("SBERT_DEVICE") or None
888
+ model = _get_sentence_transformer(model_name, device)
889
+ uri = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip() or str(
890
+ (Path.cwd() / ".java-codebase-rag").resolve()
891
+ )
892
+ uri_path = Path(uri)
893
+ if not uri.startswith(("s3://", "gs://", "az://")) and uri_path.exists():
894
+ uri = str(uri_path.resolve())
895
+ table_keys = list(TABLES) if table == "all" else [table]
896
+ rows = run_search(
897
+ query,
898
+ uri=uri,
899
+ table_keys=table_keys,
900
+ hybrid=hybrid,
901
+ limit=limit,
902
+ offset=offset,
903
+ path_substring=path_contains,
904
+ model_name=model_name,
905
+ device=device,
906
+ model=model,
907
+ )
908
+ hits: list[SearchHit] = []
909
+ for row in rows:
910
+ if path_contains and path_contains not in str(row.get("filename") or ""):
911
+ continue
912
+ if nf:
913
+ row_kind = "symbol"
914
+ if not _node_matches_filter(row_kind, row, nf):
915
+ continue
916
+ hits.append(_row_to_search_hit(row))
917
+ hint_payload = {
918
+ "success": True,
919
+ "results": [h.model_dump() for h in hits],
920
+ "limit": limit,
921
+ "offset": offset,
922
+ }
923
+ return SearchOutput(
924
+ success=True,
925
+ results=hits,
926
+ limit=limit,
927
+ offset=offset,
928
+ hints=generate_hints("search", hint_payload),
929
+ )
930
+ except Exception as exc:
931
+ return SearchOutput(success=False, message=str(exc), hints=[], limit=None, offset=None)
932
+
933
+
934
+ def find_v2(
935
+ kind: Literal["symbol", "route", "client", "producer"],
936
+ filter: NodeFilter | dict[str, Any] | str,
937
+ limit: int = 25,
938
+ offset: int = 0,
939
+ graph: KuzuGraph | None = None,
940
+ ) -> FindOutput:
941
+ try:
942
+ g = graph or KuzuGraph.get()
943
+ raw_filter = _coerce_filter(filter)
944
+ if raw_filter is None:
945
+ raw_filter = {}
946
+ try:
947
+ nf = NodeFilter.model_validate(raw_filter) if not isinstance(raw_filter, NodeFilter) else raw_filter
948
+ except ValidationError as exc:
949
+ _log_fail_loud("unknown_key")
950
+ return FindOutput(
951
+ success=False,
952
+ message=_filter_validation_error_message(exc),
953
+ hints=[],
954
+ limit=None,
955
+ offset=None,
956
+ )
957
+ if err := _nodefilter_applicability_error(kind, nf):
958
+ _log_fail_loud("applicability")
959
+ return FindOutput(success=False, message=err, hints=[], limit=None, offset=None)
960
+ if err := _validate_no_wildcards(nf):
961
+ _log_fail_loud("wildcard")
962
+ return FindOutput(success=False, message=err, hints=[], limit=None, offset=None)
963
+ fetch_cap = int(limit) + int(offset) + 1
964
+ if kind == "symbol":
965
+ where, params = _symbol_where_from_filter(nf)
966
+ params["lim"] = fetch_cap
967
+ rows = g._rows( # noqa: SLF001
968
+ f"MATCH (s:Symbol) {where} RETURN s.id AS id, s.fqn AS fqn, s.microservice AS microservice, "
969
+ "s.module AS module, s.role AS role, s.kind AS symbol_kind ORDER BY s.fqn LIMIT $lim",
970
+ params,
971
+ )
972
+ elif kind == "route":
973
+ rows = g.list_routes(
974
+ microservice=nf.microservice,
975
+ framework=nf.framework,
976
+ path_prefix=nf.path_prefix,
977
+ method=nf.http_method,
978
+ limit=max(500, fetch_cap),
979
+ )
980
+ rows = [r for r in rows if _node_matches_filter("route", r, nf)]
981
+ elif kind == "client":
982
+ rows = g.list_clients(
983
+ microservice=nf.microservice,
984
+ client_kind=nf.client_kind,
985
+ target_service=nf.target_service,
986
+ path_prefix=nf.target_path_prefix,
987
+ method=nf.http_method,
988
+ limit=max(500, fetch_cap),
989
+ )
990
+ rows = [r for r in rows if _node_matches_filter("client", r, nf)]
991
+ else:
992
+ rows = g.list_producers(
993
+ microservice=nf.microservice,
994
+ producer_kind=nf.producer_kind,
995
+ topic_prefix=nf.topic_prefix,
996
+ limit=max(500, fetch_cap),
997
+ )
998
+ rows = [r for r in rows if _node_matches_filter("producer", r, nf)]
999
+ has_more_results = len(rows) > int(offset) + int(limit)
1000
+ rows = rows[offset : offset + limit]
1001
+ refs = [_node_ref_from_row(kind, r) for r in rows]
1002
+ filter_dump = nf.model_dump(exclude_none=True)
1003
+ hint_payload: dict[str, Any] = {
1004
+ "success": True,
1005
+ "kind": kind,
1006
+ "results": [r.model_dump() for r in refs],
1007
+ "limit": limit,
1008
+ "offset": offset,
1009
+ "filter": filter_dump,
1010
+ "has_more_results": has_more_results,
1011
+ }
1012
+ return FindOutput(
1013
+ success=True,
1014
+ results=refs,
1015
+ limit=limit,
1016
+ offset=offset,
1017
+ hints=generate_hints("find", hint_payload),
1018
+ )
1019
+ except Exception as exc:
1020
+ return FindOutput(success=False, message=str(exc), hints=[], limit=None, offset=None)
1021
+
1022
+
1023
+ _DESCRIBE_UCS_ID_MESSAGE = (
1024
+ "UnresolvedCallSite ids (ucs:…) are not describable — use describe(caller_method_id) "
1025
+ "for record.data.unresolved_call_sites, neighbors(..., include_unresolved=True), "
1026
+ "or java-codebase-rag unresolved-calls list --method-id <caller_id>"
1027
+ )
1028
+
1029
+
1030
+ def describe_v2(
1031
+ id: str | None = None,
1032
+ fqn: str | None = None,
1033
+ graph: KuzuGraph | None = None,
1034
+ ) -> DescribeOutput:
1035
+ try:
1036
+ g = graph or KuzuGraph.get()
1037
+ has_id = bool(id and str(id).strip())
1038
+ has_fqn = bool(fqn and str(fqn).strip())
1039
+ if not has_id and not has_fqn:
1040
+ return DescribeOutput(success=False, message="id or fqn required", hints=[])
1041
+ if has_id and str(id).strip().startswith("ucs:"):
1042
+ return DescribeOutput(success=False, message=_DESCRIBE_UCS_ID_MESSAGE, hints=[])
1043
+ hint_message: str | None = None
1044
+ node_id: str
1045
+ if has_id:
1046
+ node_id = str(id).strip()
1047
+ else:
1048
+ fqn_val = str(fqn).strip()
1049
+ rows = g._rows( # noqa: SLF001
1050
+ "MATCH (s:Symbol) WHERE s.fqn = $fqn RETURN s.id AS id LIMIT 2",
1051
+ {"fqn": fqn_val},
1052
+ )
1053
+ if not rows:
1054
+ return DescribeOutput(success=False, message=f"No Symbol found for fqn='{fqn_val}'", hints=[])
1055
+ node_id = str(rows[0]["id"] or "")
1056
+ if len(rows) > 1:
1057
+ hint_message = (
1058
+ "multiple symbols share this FQN; use "
1059
+ f"resolve(identifier={fqn_val!r}, hint_kind='symbol') to list candidates with reasons, "
1060
+ "then describe(id=...) on the chosen node"
1061
+ )
1062
+ kind = _resolve_node_kind(g, node_id)
1063
+ if kind == "unresolved_call_site":
1064
+ return DescribeOutput(success=False, message=_DESCRIBE_UCS_ID_MESSAGE, hints=[])
1065
+ row = _load_node_record(g, node_id, kind)
1066
+ if row is None:
1067
+ return DescribeOutput(success=False, message=f"No node found for `{node_id}`", hints=[])
1068
+ ref = _node_ref_from_row(kind, row)
1069
+ edge_summary = _edge_summary_for_node(g, node_id, kind=kind, row=row)
1070
+ data = dict(row)
1071
+ if kind == "symbol" and str(row.get("kind") or "") in _METHOD_SYMBOL_KINDS_FOR_OVERRIDE_ROLLUP:
1072
+ inline, total = g.unresolved_sites_for_describe(node_id)
1073
+ if total > 0:
1074
+ data["unresolved_call_sites_total"] = total
1075
+ data["unresolved_call_sites"] = [
1076
+ {
1077
+ "line": int(r.get("line") or 0),
1078
+ "reason": str(r.get("reason") or ""),
1079
+ "callee_simple": str(r.get("callee_simple") or ""),
1080
+ "receiver_expr": str(r.get("receiver_expr") or ""),
1081
+ }
1082
+ for r in inline
1083
+ ]
1084
+ if total > len(inline):
1085
+ data["unresolved_call_sites_footer"] = (
1086
+ f"{total} unresolved call sites — see "
1087
+ f"java-codebase-rag unresolved-calls list --method-id {node_id} for the full list"
1088
+ )
1089
+ record = NodeRecord(id=ref.id, kind=kind, fqn=ref.fqn, data=data, edge_summary=edge_summary)
1090
+ return DescribeOutput(
1091
+ success=True,
1092
+ record=record,
1093
+ message=hint_message,
1094
+ hints=generate_hints("describe", {"success": True, "record": record.model_dump()}),
1095
+ )
1096
+ except ValueError as exc:
1097
+ return DescribeOutput(success=False, message=str(exc), hints=[])
1098
+ except Exception as exc:
1099
+ return DescribeOutput(success=False, message=str(exc), hints=[])
1100
+
1101
+
1102
+ def _resolve_validate_identifier(raw: str) -> tuple[str | None, str | None]:
1103
+ trimmed = raw.strip()
1104
+ if not trimmed:
1105
+ detail = "empty string" if raw == "" else "whitespace only"
1106
+ return None, f"Invalid identifier: {detail}"
1107
+ return trimmed, None
1108
+
1109
+
1110
+ def _resolve_kinds_to_search(
1111
+ hint_kind: Literal["symbol", "route", "client", "producer"] | None,
1112
+ ) -> list[Literal["symbol", "route", "client", "producer"]]:
1113
+ if hint_kind is None:
1114
+ return ["symbol", "route", "client", "producer"]
1115
+ return [hint_kind]
1116
+
1117
+
1118
+ def _resolve_parse_route_method_path(identifier: str) -> tuple[str, str] | None:
1119
+ parts = identifier.split(None, 1)
1120
+ if len(parts) != 2:
1121
+ return None
1122
+ method, path = parts[0].upper(), parts[1].strip()
1123
+ if not method.isalpha() or not path.startswith("/"):
1124
+ return None
1125
+ return method, path
1126
+
1127
+
1128
+ def _resolve_parse_microservice_route(identifier: str) -> tuple[str, str, str] | None:
1129
+ parts = identifier.split(None, 2)
1130
+ if len(parts) != 3:
1131
+ return None
1132
+ microservice, method, path = parts[0], parts[1].upper(), parts[2].strip()
1133
+ if not method.isalpha() or not path.startswith("/"):
1134
+ return None
1135
+ return microservice, method, path
1136
+
1137
+
1138
+ def _resolve_symbol_candidates(
1139
+ g: KuzuGraph,
1140
+ identifier: str,
1141
+ ) -> list[tuple[NodeRef, ResolveReason, int]]:
1142
+ out: list[tuple[NodeRef, ResolveReason, int]] = []
1143
+ lim = _RESOLVE_PRE_DEDUP_LIMIT
1144
+
1145
+ rows = g._rows( # noqa: SLF001
1146
+ f"MATCH (s:Symbol) WHERE s.id = $id RETURN {_SYMBOL_RESOLVE_RETURN} LIMIT $lim",
1147
+ {"id": identifier, "lim": lim},
1148
+ )
1149
+ for row in rows:
1150
+ out.append((_node_ref_from_row("symbol", row), "exact_id", len(identifier)))
1151
+
1152
+ rows = g._rows( # noqa: SLF001
1153
+ f"MATCH (s:Symbol) WHERE s.fqn = $fqn RETURN {_SYMBOL_RESOLVE_RETURN} LIMIT $lim",
1154
+ {"fqn": identifier, "lim": lim},
1155
+ )
1156
+ for row in rows:
1157
+ out.append((_node_ref_from_row("symbol", row), "exact_fqn", len(identifier)))
1158
+
1159
+ suffix = f".{identifier}"
1160
+ rows = g._rows( # noqa: SLF001
1161
+ f"MATCH (s:Symbol) WHERE s.fqn = $ident OR s.fqn ENDS WITH $suffix "
1162
+ f"RETURN {_SYMBOL_RESOLVE_RETURN} LIMIT $lim",
1163
+ {"ident": identifier, "suffix": suffix, "lim": lim},
1164
+ )
1165
+ for row in rows:
1166
+ fqn = str(row.get("fqn") or "")
1167
+ spec = len(fqn)
1168
+ out.append((_node_ref_from_row("symbol", row), "fqn_suffix", spec))
1169
+
1170
+ rows = g._rows( # noqa: SLF001
1171
+ f"MATCH (s:Symbol) WHERE s.name = $name RETURN {_SYMBOL_RESOLVE_RETURN} LIMIT $lim",
1172
+ {"name": identifier, "lim": lim},
1173
+ )
1174
+ for row in rows:
1175
+ out.append((_node_ref_from_row("symbol", row), "short_name", len(identifier)))
1176
+
1177
+ return out
1178
+
1179
+
1180
+ def _resolve_route_candidates(
1181
+ g: KuzuGraph,
1182
+ identifier: str,
1183
+ ) -> list[tuple[NodeRef, ResolveReason, int]]:
1184
+ out: list[tuple[NodeRef, ResolveReason, int]] = []
1185
+ lim = _RESOLVE_PRE_DEDUP_LIMIT
1186
+
1187
+ rows = g._rows( # noqa: SLF001
1188
+ f"MATCH (r:Route) WHERE r.id = $id RETURN {_ROUTE_RESOLVE_RETURN} LIMIT $lim",
1189
+ {"id": identifier, "lim": lim},
1190
+ )
1191
+ for row in rows:
1192
+ out.append((_node_ref_from_row("route", row), "exact_id", len(identifier)))
1193
+
1194
+ ms_route = _resolve_parse_microservice_route(identifier)
1195
+ if ms_route is not None:
1196
+ microservice, method, path = ms_route
1197
+ rows = g._rows( # noqa: SLF001
1198
+ f"MATCH (r:Route) WHERE r.microservice = $ms AND r.method = $method "
1199
+ f"AND (r.path = $path OR r.path_template = $path) "
1200
+ f"RETURN {_ROUTE_RESOLVE_RETURN} LIMIT $lim",
1201
+ {"ms": microservice, "method": method, "path": path, "lim": lim},
1202
+ )
1203
+ for row in rows:
1204
+ spec = len(path)
1205
+ out.append((_node_ref_from_row("route", row), "route_method_path", spec))
1206
+
1207
+ method_path = _resolve_parse_route_method_path(identifier)
1208
+ if method_path is not None:
1209
+ method, path = method_path
1210
+ rows = g._rows( # noqa: SLF001
1211
+ f"MATCH (r:Route) WHERE r.method = $method "
1212
+ f"AND (r.path = $path OR r.path_template = $path) "
1213
+ f"RETURN {_ROUTE_RESOLVE_RETURN} LIMIT $lim",
1214
+ {"method": method, "path": path, "lim": lim},
1215
+ )
1216
+ for row in rows:
1217
+ out.append((_node_ref_from_row("route", row), "route_method_path", len(path)))
1218
+
1219
+ if identifier.startswith("/"):
1220
+ rows = g._rows( # noqa: SLF001
1221
+ f"MATCH (r:Route) WHERE r.path = $path OR r.path_template = $path "
1222
+ f"RETURN {_ROUTE_RESOLVE_RETURN} LIMIT $lim",
1223
+ {"path": identifier, "lim": lim},
1224
+ )
1225
+ for row in rows:
1226
+ path_val = str(row.get("path_template") or row.get("path") or "")
1227
+ out.append((_node_ref_from_row("route", row), "route_template", len(path_val)))
1228
+
1229
+ return out
1230
+
1231
+
1232
+ def _resolve_client_candidates(
1233
+ g: KuzuGraph,
1234
+ identifier: str,
1235
+ ) -> list[tuple[NodeRef, ResolveReason, int]]:
1236
+ out: list[tuple[NodeRef, ResolveReason, int]] = []
1237
+ lim = _RESOLVE_PRE_DEDUP_LIMIT
1238
+
1239
+ rows = g._rows( # noqa: SLF001
1240
+ f"MATCH (c:Client) WHERE c.id = $id RETURN {_CLIENT_RESOLVE_RETURN} LIMIT $lim",
1241
+ {"id": identifier, "lim": lim},
1242
+ )
1243
+ for row in rows:
1244
+ out.append((_node_ref_from_row("client", row), "exact_id", len(identifier)))
1245
+
1246
+ if " " in identifier:
1247
+ target, path_prefix = identifier.split(" ", 1)
1248
+ target = target.strip()
1249
+ path_prefix = path_prefix.strip()
1250
+ if target and path_prefix:
1251
+ rows = g._rows( # noqa: SLF001
1252
+ f"MATCH (c:Client) WHERE c.target_service = $target "
1253
+ f"AND (c.path STARTS WITH $path OR c.path_template STARTS WITH $path) "
1254
+ f"RETURN {_CLIENT_RESOLVE_RETURN} LIMIT $lim",
1255
+ {"target": target, "path": path_prefix, "lim": lim},
1256
+ )
1257
+ for row in rows:
1258
+ spec = len(path_prefix)
1259
+ out.append((_node_ref_from_row("client", row), "client_target_path", spec))
1260
+ elif not identifier.startswith("/"):
1261
+ rows = g._rows( # noqa: SLF001
1262
+ f"MATCH (c:Client) WHERE c.target_service = $target RETURN {_CLIENT_RESOLVE_RETURN} LIMIT $lim",
1263
+ {"target": identifier, "lim": lim},
1264
+ )
1265
+ for row in rows:
1266
+ out.append((_node_ref_from_row("client", row), "client_target", len(identifier)))
1267
+
1268
+ return out
1269
+
1270
+
1271
+ def _resolve_producer_candidates(
1272
+ g: KuzuGraph,
1273
+ identifier: str,
1274
+ ) -> list[tuple[NodeRef, ResolveReason, int]]:
1275
+ out: list[tuple[NodeRef, ResolveReason, int]] = []
1276
+ lim = _RESOLVE_PRE_DEDUP_LIMIT
1277
+
1278
+ rows = g._rows( # noqa: SLF001
1279
+ f"MATCH (p:Producer) WHERE p.id = $id RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim",
1280
+ {"id": identifier, "lim": lim},
1281
+ )
1282
+ for row in rows:
1283
+ out.append((_node_ref_from_row("producer", row), "exact_id", len(identifier)))
1284
+
1285
+ rows = g._rows( # noqa: SLF001
1286
+ f"MATCH (p:Producer) WHERE p.topic = $topic RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim",
1287
+ {"topic": identifier, "lim": lim},
1288
+ )
1289
+ for row in rows:
1290
+ out.append((_node_ref_from_row("producer", row), "producer_topic", len(identifier)))
1291
+
1292
+ if not identifier.startswith("/"):
1293
+ rows = g._rows( # noqa: SLF001
1294
+ f"MATCH (p:Producer) WHERE p.topic STARTS WITH $topic RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim",
1295
+ {"topic": identifier, "lim": lim},
1296
+ )
1297
+ for row in rows:
1298
+ out.append((_node_ref_from_row("producer", row), "producer_topic_prefix", len(identifier)))
1299
+
1300
+ return out
1301
+
1302
+
1303
+ def _resolve_dedupe_candidates(
1304
+ raw: list[tuple[NodeRef, ResolveReason, int]],
1305
+ ) -> list[tuple[NodeRef, ResolveReason, int]]:
1306
+ best: dict[str, tuple[NodeRef, ResolveReason, int]] = {}
1307
+ for node, reason, specificity in raw:
1308
+ prev = best.get(node.id)
1309
+ if prev is None:
1310
+ best[node.id] = (node, reason, specificity)
1311
+ continue
1312
+ prev_pri = _RESOLVE_REASON_PRIORITY[prev[1]]
1313
+ new_pri = _RESOLVE_REASON_PRIORITY[reason]
1314
+ if new_pri < prev_pri or (new_pri == prev_pri and specificity > prev[2]):
1315
+ best[node.id] = (node, reason, specificity)
1316
+ return list(best.values())
1317
+
1318
+
1319
+ def _resolve_rank_candidates(
1320
+ deduped: list[tuple[NodeRef, ResolveReason, int]],
1321
+ ) -> list[ResolveCandidate]:
1322
+ ordered = sorted(
1323
+ deduped,
1324
+ key=lambda item: (_RESOLVE_REASON_PRIORITY[item[1]], -item[2], item[0].id),
1325
+ )
1326
+ total = len(ordered)
1327
+ return [
1328
+ ResolveCandidate(
1329
+ node=node,
1330
+ reason=reason,
1331
+ score=(1.0 - (idx / total)) if total else 0.0,
1332
+ )
1333
+ for idx, (node, reason, _spec) in enumerate(ordered)
1334
+ ]
1335
+
1336
+
1337
+ def _resolve_assert_invariants(out: ResolveOutput) -> None:
1338
+ if not out.success:
1339
+ assert out.status == "none"
1340
+ assert out.node is None
1341
+ assert not out.candidates
1342
+ assert out.message
1343
+ return
1344
+ if out.status == "one":
1345
+ assert out.node is not None
1346
+ assert not out.candidates
1347
+ elif out.status == "many":
1348
+ assert out.node is None
1349
+ assert len(out.candidates) >= 2
1350
+ elif out.status == "none":
1351
+ assert out.node is None
1352
+ assert not out.candidates
1353
+ assert out.message
1354
+
1355
+
1356
+ def _resolve_seeds_for_hints(identifier: str) -> tuple[str | None, str | None]:
1357
+ path_prefix_seed: str | None = None
1358
+ method_path = _resolve_parse_route_method_path(identifier)
1359
+ if method_path is not None:
1360
+ path_prefix_seed = method_path[1]
1361
+ else:
1362
+ ms_route = _resolve_parse_microservice_route(identifier)
1363
+ if ms_route is not None:
1364
+ path_prefix_seed = ms_route[2]
1365
+ elif identifier.startswith("/"):
1366
+ path_prefix_seed = identifier
1367
+
1368
+ target_service_seed: str | None = None
1369
+ if " " in identifier:
1370
+ target, _path_prefix = identifier.split(" ", 1)
1371
+ target = target.strip()
1372
+ if target:
1373
+ target_service_seed = target
1374
+ elif not identifier.startswith("/"):
1375
+ target_service_seed = identifier
1376
+
1377
+ return path_prefix_seed, target_service_seed
1378
+
1379
+
1380
+ def _resolve_finalize_success(
1381
+ trimmed: str,
1382
+ hint_kind: Literal["symbol", "route", "client", "producer"] | None,
1383
+ matches: list[ResolveCandidate],
1384
+ ) -> ResolveOutput:
1385
+ if not matches:
1386
+ out = ResolveOutput(
1387
+ success=True,
1388
+ status="none",
1389
+ message=(
1390
+ "No matches for identifier; use search(query=...) for ranked fuzzy lookup."
1391
+ ),
1392
+ resolved_identifier=trimmed,
1393
+ )
1394
+ elif len(matches) == 1:
1395
+ out = ResolveOutput(
1396
+ success=True,
1397
+ status="one",
1398
+ node=matches[0].node,
1399
+ resolved_identifier=trimmed,
1400
+ )
1401
+ else:
1402
+ out = ResolveOutput(
1403
+ success=True,
1404
+ status="many",
1405
+ candidates=matches,
1406
+ resolved_identifier=trimmed,
1407
+ )
1408
+
1409
+ path_prefix_seed, target_service_seed = _resolve_seeds_for_hints(trimmed)
1410
+ hint_payload = {
1411
+ "status": out.status,
1412
+ "resolved_identifier": trimmed,
1413
+ "candidates": out.candidates,
1414
+ "hint_kind": hint_kind,
1415
+ "path_prefix_seed": path_prefix_seed,
1416
+ "target_service_seed": target_service_seed,
1417
+ }
1418
+ out = out.model_copy(update={"hints": generate_hints("resolve", hint_payload)})
1419
+ _resolve_assert_invariants(out)
1420
+ return out
1421
+
1422
+
1423
+ def resolve_v2(
1424
+ identifier: str,
1425
+ hint_kind: Literal["symbol", "route", "client", "producer"] | None = None,
1426
+ graph: KuzuGraph | None = None,
1427
+ ) -> ResolveOutput:
1428
+ try:
1429
+ trimmed, err = _resolve_validate_identifier(identifier)
1430
+ if err is not None:
1431
+ out = ResolveOutput(
1432
+ success=False,
1433
+ status="none",
1434
+ message=err,
1435
+ hints=[],
1436
+ resolved_identifier=None,
1437
+ )
1438
+ _resolve_assert_invariants(out)
1439
+ return out
1440
+
1441
+ assert trimmed is not None
1442
+ if "*" in trimmed or "?" in trimmed:
1443
+ return _resolve_finalize_success(trimmed, hint_kind, [])
1444
+
1445
+ g = graph or KuzuGraph.get()
1446
+ raw: list[tuple[NodeRef, ResolveReason, int]] = []
1447
+ for kind in _resolve_kinds_to_search(hint_kind):
1448
+ if kind == "symbol":
1449
+ raw.extend(_resolve_symbol_candidates(g, trimmed))
1450
+ elif kind == "route":
1451
+ raw.extend(_resolve_route_candidates(g, trimmed))
1452
+ elif kind == "client":
1453
+ raw.extend(_resolve_client_candidates(g, trimmed))
1454
+ else:
1455
+ raw.extend(_resolve_producer_candidates(g, trimmed))
1456
+
1457
+ deduped = _resolve_dedupe_candidates(raw)
1458
+ ranked = _resolve_rank_candidates(deduped)
1459
+ capped = ranked[:_RESOLVE_CANDIDATE_CAP]
1460
+ return _resolve_finalize_success(trimmed, hint_kind, capped)
1461
+ except Exception as exc:
1462
+ out = ResolveOutput(
1463
+ success=False,
1464
+ status="none",
1465
+ message=str(exc),
1466
+ hints=[],
1467
+ resolved_identifier=None,
1468
+ )
1469
+ _resolve_assert_invariants(out)
1470
+ return out
1471
+
1472
+
1473
+ def _neighbor_edge_attrs(row: dict[str, Any]) -> dict[str, Any]:
1474
+ attrs = {
1475
+ k: v
1476
+ for k, v in row.items()
1477
+ if k not in {"other_id", "edge_type", "stored_edge_type"}
1478
+ and v not in (None, "")
1479
+ }
1480
+ attrs.setdefault("row_kind", "resolved")
1481
+ return attrs
1482
+
1483
+
1484
+ def _unresolved_site_to_edge(origin_id: str, row: dict[str, Any]) -> Edge:
1485
+ ucs_id = str(row.get("id") or "")
1486
+ callee = str(row.get("callee_simple") or "")
1487
+ line = int(row.get("call_site_line") or 0)
1488
+ byte = int(row.get("call_site_byte") or 0)
1489
+ return Edge(
1490
+ origin_id=origin_id,
1491
+ edge_type="CALLS",
1492
+ direction="out",
1493
+ other=NodeRef(id=ucs_id, kind="unresolved_call_site", fqn="", name=callee),
1494
+ attrs={
1495
+ "row_kind": "unresolved_call_site",
1496
+ "unresolved_call_site_id": ucs_id,
1497
+ "reason": str(row.get("reason") or ""),
1498
+ "call_site_line": line,
1499
+ "call_site_byte": byte,
1500
+ "arg_count": int(row.get("arg_count") or 0),
1501
+ "callee_simple": callee,
1502
+ "receiver_expr": str(row.get("receiver_expr") or ""),
1503
+ },
1504
+ )
1505
+
1506
+
1507
+ def _calls_transcript_sort_key(edge: Edge) -> tuple[int, int, int]:
1508
+ attrs = edge.attrs or {}
1509
+ line = int(attrs.get("call_site_line") or 0)
1510
+ byte = int(attrs.get("call_site_byte") or 0)
1511
+ kind_rank = 0 if str(attrs.get("row_kind") or "resolved") == "resolved" else 1
1512
+ return (line, byte, kind_rank)
1513
+
1514
+
1515
+ def _dedup_call_edges(edges: list[Edge]) -> list[Edge]:
1516
+ """Collapse resolved CALLS rows sharing (origin_id, other.id); unresolved rows pass through."""
1517
+ resolved: list[Edge] = []
1518
+ unresolved: list[Edge] = []
1519
+ for e in edges:
1520
+ if str((e.attrs or {}).get("row_kind") or "resolved") == "unresolved_call_site":
1521
+ unresolved.append(e)
1522
+ else:
1523
+ resolved.append(e)
1524
+ groups: dict[tuple[str, str], list[Edge]] = {}
1525
+ for e in resolved:
1526
+ key = (e.origin_id, e.other.id)
1527
+ groups.setdefault(key, []).append(e)
1528
+ collapsed: list[Edge] = []
1529
+ for group in groups.values():
1530
+ ordered = sorted(group, key=_calls_transcript_sort_key)
1531
+ canonical = ordered[0]
1532
+ lines = sorted(
1533
+ {int((x.attrs or {}).get("call_site_line") or 0) for x in group},
1534
+ )
1535
+ attrs = dict(canonical.attrs or {})
1536
+ attrs["call_site_count"] = len(group)
1537
+ attrs["call_site_lines"] = lines
1538
+ collapsed.append(canonical.model_copy(update={"attrs": attrs}))
1539
+ merged = collapsed + unresolved
1540
+ merged.sort(key=_calls_transcript_sort_key)
1541
+ return merged
1542
+
1543
+
1544
+ def _edgefilter_pushdown_kwargs(ef: EdgeFilter | None) -> dict[str, Any]:
1545
+ if ef is None:
1546
+ return {}
1547
+ return {
1548
+ "min_confidence": ef.min_confidence,
1549
+ "include_strategies": ef.include_strategies,
1550
+ "exclude_strategies": ef.exclude_strategies,
1551
+ "callee_declaring_role": ef.callee_declaring_role,
1552
+ "callee_declaring_roles": ef.callee_declaring_roles,
1553
+ "exclude_callee_declaring_roles": ef.exclude_callee_declaring_roles,
1554
+ }
1555
+
1556
+
1557
+ def _rows_to_call_edges(
1558
+ g: Any,
1559
+ *,
1560
+ origin_id: str,
1561
+ direction: Literal["in", "out"],
1562
+ rows: list[dict[str, Any]],
1563
+ nf: NodeFilter | None,
1564
+ ) -> list[Edge]:
1565
+ edges: list[Edge] = []
1566
+ for row in rows:
1567
+ other_id = str(row.get("other_id") or "")
1568
+ other_kind = _resolve_node_kind(g, other_id)
1569
+ other_rec = _load_node_record(g, other_id, other_kind)
1570
+ if other_rec is None:
1571
+ continue
1572
+ if nf and (err := _nodefilter_applicability_error(other_kind, nf)):
1573
+ _log_fail_loud("applicability")
1574
+ raise ValueError(err)
1575
+ if not _node_matches_filter(other_kind, other_rec, nf):
1576
+ continue
1577
+ edges.append(
1578
+ Edge(
1579
+ origin_id=origin_id,
1580
+ edge_type=str(row.get("edge_type") or "CALLS"),
1581
+ direction=direction,
1582
+ other=_node_ref_from_row(other_kind, other_rec),
1583
+ attrs=_neighbor_edge_attrs(row),
1584
+ )
1585
+ )
1586
+ return edges
1587
+
1588
+
1589
+ def _neighbors_calls_for_origin(
1590
+ g: Any,
1591
+ origin_id: str,
1592
+ *,
1593
+ direction: Literal["in", "out"],
1594
+ nf: NodeFilter | None,
1595
+ ef: EdgeFilter | None,
1596
+ offset: int,
1597
+ limit: int | None,
1598
+ include_unresolved: bool = False,
1599
+ dedup_calls: bool = False,
1600
+ ) -> list[Edge]:
1601
+ pushdown = _edgefilter_pushdown_kwargs(ef)
1602
+ needs_full_stream = (
1603
+ nf is not None
1604
+ or dedup_calls
1605
+ or include_unresolved
1606
+ or limit is None
1607
+ )
1608
+ sql_pagination = not needs_full_stream and limit is not None
1609
+ if sql_pagination:
1610
+ rows = g.neighbor_calls_for_symbol(
1611
+ origin_id,
1612
+ direction=direction,
1613
+ offset=offset,
1614
+ limit=limit,
1615
+ sql_pagination=True,
1616
+ **pushdown,
1617
+ )
1618
+ return _rows_to_call_edges(g, origin_id=origin_id, direction=direction, rows=rows, nf=nf)
1619
+ rows = g.neighbor_calls_for_symbol(
1620
+ origin_id,
1621
+ direction=direction,
1622
+ offset=0,
1623
+ limit=None,
1624
+ sql_pagination=False,
1625
+ **pushdown,
1626
+ )
1627
+ edges = _rows_to_call_edges(g, origin_id=origin_id, direction=direction, rows=rows, nf=nf)
1628
+ if include_unresolved and direction == "out":
1629
+ ucs_rows = g.unresolved_sites_for_caller(origin_id, direction=direction)
1630
+ edges.extend(_unresolved_site_to_edge(origin_id, r) for r in ucs_rows)
1631
+ edges.sort(key=_calls_transcript_sort_key)
1632
+ if dedup_calls:
1633
+ edges = _dedup_call_edges(edges)
1634
+ if limit is None:
1635
+ return edges
1636
+ return edges[offset : offset + limit]
1637
+
1638
+
1639
+ def _composed_axis_origin_error(
1640
+ *,
1641
+ symbol_kind: str,
1642
+ modifiers: list[str] | None,
1643
+ declares_composed: list[str],
1644
+ override_composed: list[str],
1645
+ ) -> str | None:
1646
+ """Fail-fast origin gate for composed DECLARES.* vs OVERRIDDEN_BY.* families."""
1647
+ if declares_composed and symbol_kind not in _TYPE_SYMBOL_KINDS_FOR_EDGE_ROLLUP:
1648
+ return f"Composed edge types ({declares_composed[0]}) require a type Symbol origin"
1649
+ if override_composed:
1650
+ key = override_composed[0]
1651
+ mods = modifiers or []
1652
+ if symbol_kind == "constructor":
1653
+ return (
1654
+ f"Composed edge types ({key}) require a non-static method Symbol origin "
1655
+ "(constructors are not supported)"
1656
+ )
1657
+ if symbol_kind not in _METHOD_SYMBOL_KINDS_FOR_OVERRIDE_ROLLUP:
1658
+ return f"Composed edge types ({key}) require a method Symbol origin"
1659
+ if "static" in mods:
1660
+ return (
1661
+ f"Composed edge types ({key}) require a non-static method Symbol origin "
1662
+ "(static methods are not supported)"
1663
+ )
1664
+ return None
1665
+
1666
+
1667
+ @validate_call(config={"arbitrary_types_allowed": True})
1668
+ def neighbors_v2(
1669
+ ids: str | list[str],
1670
+ # Required fields are intentional: direct Python calls and MCP-bound calls
1671
+ # share the same validation contract through @validate_call.
1672
+ direction: Literal["in", "out"] = Field(...),
1673
+ edge_types: list[NeighborEdgeType] = Field(...),
1674
+ limit: int = 25,
1675
+ offset: int = 0,
1676
+ filter: NodeFilter | dict[str, Any] | str | None = None,
1677
+ edge_filter: EdgeFilter | dict[str, Any] | str | None = None,
1678
+ include_unresolved: bool = False,
1679
+ dedup_calls: bool = False,
1680
+ graph: Any | None = None,
1681
+ ) -> NeighborsOutput:
1682
+ try:
1683
+ validated_types = _NEIGHBOR_EDGE_TYPES_ADAPTER.validate_python(edge_types)
1684
+ requested_edge_types = list(dict.fromkeys(validated_types))
1685
+ flat_labels = [et for et in requested_edge_types if et not in _COMPOSED_EDGE_TYPES]
1686
+ composed_keys = [et for et in requested_edge_types if et in _COMPOSED_EDGE_TYPES]
1687
+ declares_composed = [k for k in composed_keys if k in _MEMBER_COMPOSED_EDGE_TYPES]
1688
+ override_composed = [k for k in composed_keys if k in _OVERRIDE_COMPOSED_EDGE_TYPES]
1689
+ ordered_composed = declares_composed + override_composed
1690
+ g = graph or KuzuGraph.get()
1691
+ try:
1692
+ raw_filter = _coerce_filter(filter)
1693
+ nf = (
1694
+ NodeFilter.model_validate(raw_filter)
1695
+ if raw_filter is not None and not isinstance(raw_filter, NodeFilter)
1696
+ else raw_filter
1697
+ )
1698
+ except ValidationError as exc:
1699
+ _log_fail_loud("unknown_key")
1700
+ return NeighborsOutput(
1701
+ success=False,
1702
+ message=_filter_validation_error_message(exc),
1703
+ hints=[],
1704
+ requested_edge_types=[],
1705
+ )
1706
+ try:
1707
+ raw_edge_filter = _coerce_edge_filter(edge_filter)
1708
+ ef = (
1709
+ EdgeFilter.model_validate(raw_edge_filter)
1710
+ if raw_edge_filter is not None and not isinstance(raw_edge_filter, EdgeFilter)
1711
+ else raw_edge_filter
1712
+ )
1713
+ except ValidationError as exc:
1714
+ _log_fail_loud("edge_filter")
1715
+ return NeighborsOutput(
1716
+ success=False,
1717
+ message=_filter_validation_error_message(exc),
1718
+ hints=[],
1719
+ requested_edge_types=[],
1720
+ )
1721
+ except ValueError as exc:
1722
+ _log_fail_loud("edge_filter")
1723
+ return NeighborsOutput(success=False, message=str(exc), hints=[], requested_edge_types=[])
1724
+ if include_unresolved and ef is not None:
1725
+ return NeighborsOutput(
1726
+ success=False,
1727
+ message=(
1728
+ "include_unresolved=True is incompatible with edge_filter; "
1729
+ "UnresolvedCallSite rows have no edge attributes to filter on"
1730
+ ),
1731
+ hints=[],
1732
+ requested_edge_types=requested_edge_types,
1733
+ )
1734
+ if include_unresolved and requested_edge_types != ["CALLS"]:
1735
+ return NeighborsOutput(
1736
+ success=False,
1737
+ message="include_unresolved requires edge_types=['CALLS']",
1738
+ hints=[],
1739
+ requested_edge_types=requested_edge_types,
1740
+ )
1741
+ if include_unresolved and direction != "out":
1742
+ return NeighborsOutput(
1743
+ success=False,
1744
+ message='include_unresolved requires direction="out"',
1745
+ hints=[],
1746
+ requested_edge_types=requested_edge_types,
1747
+ )
1748
+ if ef and (err := _edgefilter_applicability_error(requested_edge_types, ef)):
1749
+ _log_fail_loud("edge_filter")
1750
+ return NeighborsOutput(
1751
+ success=False,
1752
+ message=err,
1753
+ hints=[],
1754
+ requested_edge_types=requested_edge_types,
1755
+ )
1756
+ if nf and (err := _validate_no_wildcards(nf)):
1757
+ _log_fail_loud("wildcard")
1758
+ return NeighborsOutput(success=False, message=err, hints=[], requested_edge_types=[])
1759
+ if composed_keys and direction != "out":
1760
+ return NeighborsOutput(
1761
+ success=False,
1762
+ message='Composed edge types require direction="out"',
1763
+ hints=[],
1764
+ requested_edge_types=requested_edge_types,
1765
+ )
1766
+ use_calls_path = flat_labels == ["CALLS"] and not composed_keys
1767
+ origins = [ids] if isinstance(ids, str) else list(ids)
1768
+ results: list[Edge] = []
1769
+ unfiltered_calls_count: int | None = None
1770
+ unresolved_count: int | None = None
1771
+ calls_row_count: int | None = None
1772
+ if use_calls_path and len(origins) == 1 and direction == "out":
1773
+ unresolved_count = g.count_unresolved_for_caller(origins[0])
1774
+ calls_row_count = g.count_calls_for_symbol(origins[0], direction=direction)
1775
+ for origin_id in origins:
1776
+ origin_kind = _resolve_node_kind(g, origin_id)
1777
+ if ordered_composed:
1778
+ if origin_kind != "symbol":
1779
+ first_key = ordered_composed[0]
1780
+ axis_msg = (
1781
+ f"Composed edge types ({first_key}) require a method Symbol origin"
1782
+ if first_key in _OVERRIDE_COMPOSED_EDGE_TYPES
1783
+ else f"Composed edge types ({first_key}) require a type Symbol origin"
1784
+ )
1785
+ return NeighborsOutput(
1786
+ success=False,
1787
+ message=axis_msg,
1788
+ hints=[],
1789
+ requested_edge_types=requested_edge_types,
1790
+ )
1791
+ origin_row = _load_node_record(g, origin_id, "symbol")
1792
+ sym_kind = str((origin_row or {}).get("kind") or "")
1793
+ mods_raw = (origin_row or {}).get("modifiers")
1794
+ mods = mods_raw if isinstance(mods_raw, list) else None
1795
+ if err := _composed_axis_origin_error(
1796
+ symbol_kind=sym_kind,
1797
+ modifiers=mods,
1798
+ declares_composed=declares_composed,
1799
+ override_composed=override_composed,
1800
+ ):
1801
+ return NeighborsOutput(
1802
+ success=False,
1803
+ message=err,
1804
+ hints=[],
1805
+ requested_edge_types=requested_edge_types,
1806
+ )
1807
+ if use_calls_path:
1808
+ paginate_in_sql = (
1809
+ len(origins) == 1
1810
+ and nf is None
1811
+ and not include_unresolved
1812
+ and not dedup_calls
1813
+ )
1814
+ try:
1815
+ origin_edges = _neighbors_calls_for_origin(
1816
+ g,
1817
+ origin_id,
1818
+ direction=direction,
1819
+ nf=nf,
1820
+ ef=ef,
1821
+ offset=offset if paginate_in_sql else 0,
1822
+ limit=limit if paginate_in_sql else None,
1823
+ include_unresolved=include_unresolved,
1824
+ dedup_calls=dedup_calls,
1825
+ )
1826
+ except ValueError as exc:
1827
+ return NeighborsOutput(
1828
+ success=False,
1829
+ message=str(exc),
1830
+ hints=[],
1831
+ requested_edge_types=requested_edge_types,
1832
+ )
1833
+ if (
1834
+ ef is not None
1835
+ and ef.callee_declaring_role in _ROLE_FILTER_OTHER_FALLBACK_VALUES
1836
+ and not origin_edges
1837
+ and unfiltered_calls_count is None
1838
+ ):
1839
+ unfiltered_calls_count = g.count_calls_for_symbol(origin_id, direction=direction)
1840
+ results.extend(origin_edges)
1841
+ continue
1842
+ if flat_labels:
1843
+ # Kuzu 0.11.x can drop `label(e) IN $list` in WHERE; use OR of scalar equalities.
1844
+ label_params = [f"l{i}" for i in range(len(flat_labels))]
1845
+ label_predicate = "(" + " OR ".join(f"label(e) = ${name}" for name in label_params) + ")"
1846
+ q_params = {"id": origin_id, **dict(zip(label_params, flat_labels, strict=True))}
1847
+ if direction == "out":
1848
+ rows = g._rows( # noqa: SLF001
1849
+ "MATCH (a)-[e]->(b) WHERE a.id = $id AND "
1850
+ f"{label_predicate} "
1851
+ "RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
1852
+ "e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
1853
+ "e.annotation AS annotation, e.field_or_param AS field_or_param, "
1854
+ "e.source AS source, e.call_site_line AS call_site_line, "
1855
+ "e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
1856
+ "e.resolved AS resolved",
1857
+ q_params,
1858
+ )
1859
+ else:
1860
+ rows = g._rows( # noqa: SLF001
1861
+ "MATCH (a)<-[e]-(b) WHERE a.id = $id AND "
1862
+ f"{label_predicate} "
1863
+ "RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
1864
+ "e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
1865
+ "e.annotation AS annotation, e.field_or_param AS field_or_param, "
1866
+ "e.source AS source, e.call_site_line AS call_site_line, "
1867
+ "e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
1868
+ "e.resolved AS resolved",
1869
+ q_params,
1870
+ )
1871
+ for row in rows:
1872
+ other_id = str(row.get("other_id") or "")
1873
+ other_kind = _resolve_node_kind(g, other_id)
1874
+ other_rec = _load_node_record(g, other_id, other_kind)
1875
+ if other_rec is None:
1876
+ continue
1877
+ if nf and (err := _nodefilter_applicability_error(other_kind, nf)):
1878
+ _log_fail_loud("applicability")
1879
+ return NeighborsOutput(
1880
+ success=False, message=err, hints=[], requested_edge_types=[]
1881
+ )
1882
+ if not _node_matches_filter(other_kind, other_rec, nf):
1883
+ continue
1884
+ results.append(
1885
+ Edge(
1886
+ origin_id=origin_id,
1887
+ edge_type=str(row.get("edge_type") or ""),
1888
+ direction=direction,
1889
+ other=_node_ref_from_row(other_kind, other_rec),
1890
+ attrs=_neighbor_edge_attrs(row),
1891
+ )
1892
+ )
1893
+ for composed_key in ordered_composed:
1894
+ if composed_key in _MEMBER_COMPOSED_EDGE_TYPES:
1895
+ traversal_rows = g.member_edge_traversal_for(origin_id, composed_key)
1896
+ else:
1897
+ traversal_rows = g.override_axis_traversal_for(origin_id, composed_key)
1898
+ for row in traversal_rows:
1899
+ other_id = str(row.get("other_id") or "")
1900
+ other_kind = _resolve_node_kind(g, other_id)
1901
+ other_rec = _load_node_record(g, other_id, other_kind)
1902
+ if other_rec is None:
1903
+ continue
1904
+ if nf and (err := _nodefilter_applicability_error(other_kind, nf)):
1905
+ _log_fail_loud("applicability")
1906
+ return NeighborsOutput(
1907
+ success=False, message=err, hints=[], requested_edge_types=[]
1908
+ )
1909
+ if not _node_matches_filter(other_kind, other_rec, nf):
1910
+ continue
1911
+ if composed_key == "OVERRIDDEN_BY":
1912
+ edge_attrs: dict[str, Any] = {}
1913
+ else:
1914
+ edge_attrs = _neighbor_edge_attrs(row)
1915
+ results.append(
1916
+ Edge(
1917
+ origin_id=origin_id,
1918
+ edge_type=composed_key,
1919
+ direction="out",
1920
+ other=_node_ref_from_row(other_kind, other_rec),
1921
+ attrs=edge_attrs,
1922
+ )
1923
+ )
1924
+ if use_calls_path and len(origins) > 1:
1925
+ sliced = results[offset : offset + limit]
1926
+ else:
1927
+ sliced = results if use_calls_path else results[offset : offset + limit]
1928
+ first_origin = origins[0]
1929
+ origin_kind = _resolve_node_kind(g, first_origin)
1930
+ subject_record = _load_node_record(g, first_origin, origin_kind)
1931
+ neigh_payload = {
1932
+ "success": True,
1933
+ "results": [e.model_dump() for e in sliced],
1934
+ "requested_edge_types": requested_edge_types,
1935
+ "requested_direction": direction,
1936
+ "offset": offset,
1937
+ "origin_id": first_origin,
1938
+ "subject_record": subject_record,
1939
+ "node_filter": nf.model_dump(exclude_none=True) if nf else None,
1940
+ "edge_filter": ef.model_dump(exclude_none=True) if ef else None,
1941
+ "edge_filter_provided": ef is not None,
1942
+ "include_unresolved": include_unresolved,
1943
+ "dedup_calls": dedup_calls,
1944
+ "unfiltered_calls_count": unfiltered_calls_count,
1945
+ "unresolved_count": unresolved_count,
1946
+ "calls_row_count": calls_row_count,
1947
+ }
1948
+ return NeighborsOutput(
1949
+ success=True,
1950
+ results=sliced,
1951
+ requested_edge_types=requested_edge_types,
1952
+ hints=generate_hints("neighbors", neigh_payload),
1953
+ )
1954
+ except ValidationError:
1955
+ raise
1956
+ except Exception as exc:
1957
+ return NeighborsOutput(success=False, message=str(exc), hints=[], requested_edge_types=[])