java-codebase-rag 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +2813 -0
- brownfield_events.py +58 -0
- build_ast_graph.py +3081 -0
- chunk_heuristics.py +62 -0
- graph_enrich.py +1681 -0
- index_common.py +10 -0
- java_codebase_rag/__init__.py +1 -0
- java_codebase_rag/cli.py +761 -0
- java_codebase_rag/cli_progress.py +52 -0
- java_codebase_rag/config.py +327 -0
- java_codebase_rag/pipeline.py +189 -0
- java_codebase_rag-0.1.0.dist-info/METADATA +818 -0
- java_codebase_rag-0.1.0.dist-info/RECORD +27 -0
- java_codebase_rag-0.1.0.dist-info/WHEEL +5 -0
- java_codebase_rag-0.1.0.dist-info/entry_points.txt +3 -0
- java_codebase_rag-0.1.0.dist-info/licenses/LICENSE +21 -0
- java_codebase_rag-0.1.0.dist-info/top_level.txt +17 -0
- java_index_flow_lancedb.py +398 -0
- java_index_v1_common.py +33 -0
- java_ontology.py +446 -0
- kuzu_queries.py +1989 -0
- mcp_hints.py +748 -0
- mcp_v2.py +1957 -0
- path_filtering.py +472 -0
- pr_analysis.py +534 -0
- search_lancedb.py +1075 -0
- server.py +578 -0
mcp_v2.py
ADDED
|
@@ -0,0 +1,1957 @@
|
|
|
1
|
+
"""MCP V2 graph query surface (``search`` / ``find`` / ``describe`` / ``neighbors`` / ``resolve``).
|
|
2
|
+
|
|
3
|
+
Strict frame contract
|
|
4
|
+
---------------------
|
|
5
|
+
NodeFilter is a typed predicate bag: each populated field maps to one stored graph
|
|
6
|
+
attribute for the selected kind; inapplicable fields fail loud with a teaching message.
|
|
7
|
+
The ``search`` tool's ``query`` parameter is the ranked-text carve-out; structured
|
|
8
|
+
prefix fields (``fqn_prefix``, ``path_prefix``, ``target_path_prefix``) reject ``*``
|
|
9
|
+
and ``?`` — see ``_validate_no_wildcards``.
|
|
10
|
+
|
|
11
|
+
Revisit trigger (``propose/completed/MCP-FILTER-FRAME-PROPOSE.md`` section 3.4.6)
|
|
12
|
+
--------------------------------------------------------------
|
|
13
|
+
If **three** legitimate issue-tracker workflows appear within **six months** of frame
|
|
14
|
+
lock where the strict frame has no clean analog under ``search``, deferred
|
|
15
|
+
``resolve``, or documented multi-call patterns, reopen the frame for revision.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import sys
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
import threading
|
|
25
|
+
from typing import Annotated, Any, Literal, get_args
|
|
26
|
+
|
|
27
|
+
from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError, model_validator, validate_call
|
|
28
|
+
from sentence_transformers import SentenceTransformer
|
|
29
|
+
|
|
30
|
+
from index_common import SBERT_MODEL
|
|
31
|
+
from java_codebase_rag.config import resolved_sbert_model_for_process_env
|
|
32
|
+
from java_ontology import EDGE_SCHEMA, ResolveReason
|
|
33
|
+
from kuzu_queries import KuzuGraph, OVERRIDE_AXIS_COMPOSED_EDGE_TYPES
|
|
34
|
+
from mcp_hints import MCP_HINTS_FIELD_DESCRIPTION, generate_hints
|
|
35
|
+
from search_lancedb import TABLES, run_search
|
|
36
|
+
|
|
37
|
+
DeclarationSymbolKind = Literal["class", "interface", "enum", "record", "annotation", "method", "constructor"]
|
|
38
|
+
|
|
39
|
+
# Stored graph edge labels for one-hop neighbors. Composed DECLARES.* and OVERRIDDEN_BY.*
|
|
40
|
+
# dot-keys are separate ComposedEdgeType literals (2-hop traversal). Stored OVERRIDES is an EdgeType.
|
|
41
|
+
EdgeType = Literal[
|
|
42
|
+
"EXTENDS",
|
|
43
|
+
"IMPLEMENTS",
|
|
44
|
+
"INJECTS",
|
|
45
|
+
"OVERRIDES",
|
|
46
|
+
"DECLARES",
|
|
47
|
+
"DECLARES_CLIENT",
|
|
48
|
+
"DECLARES_PRODUCER",
|
|
49
|
+
"CALLS",
|
|
50
|
+
"EXPOSES",
|
|
51
|
+
"HTTP_CALLS",
|
|
52
|
+
"ASYNC_CALLS",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
ComposedEdgeType = Literal[
|
|
56
|
+
"DECLARES.DECLARES_CLIENT",
|
|
57
|
+
"DECLARES.DECLARES_PRODUCER",
|
|
58
|
+
"DECLARES.EXPOSES",
|
|
59
|
+
"OVERRIDDEN_BY",
|
|
60
|
+
"OVERRIDDEN_BY.DECLARES_CLIENT",
|
|
61
|
+
"OVERRIDDEN_BY.DECLARES_PRODUCER",
|
|
62
|
+
"OVERRIDDEN_BY.EXPOSES",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
NeighborEdgeType = EdgeType | ComposedEdgeType
|
|
66
|
+
|
|
67
|
+
_COMPOSED_EDGE_TYPES = frozenset(get_args(ComposedEdgeType))
|
|
68
|
+
_MEMBER_COMPOSED_EDGE_TYPES = frozenset(
|
|
69
|
+
k for k in _COMPOSED_EDGE_TYPES if k.startswith("DECLARES.")
|
|
70
|
+
)
|
|
71
|
+
_OVERRIDE_COMPOSED_EDGE_TYPES = OVERRIDE_AXIS_COMPOSED_EDGE_TYPES
|
|
72
|
+
|
|
73
|
+
_NEIGHBOR_EDGE_TYPES_ADAPTER = TypeAdapter(
|
|
74
|
+
Annotated[
|
|
75
|
+
list[NeighborEdgeType],
|
|
76
|
+
Field(min_length=1, description="At least one graph edge label or DECLARES.* dot-key"),
|
|
77
|
+
]
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
_st_lock = threading.Lock()
|
|
81
|
+
_st_model: SentenceTransformer | None = None
|
|
82
|
+
|
|
83
|
+
_TYPE_SYMBOL_KINDS_FOR_EDGE_ROLLUP = frozenset(
|
|
84
|
+
{"class", "interface", "enum", "record", "annotation"}
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
_METHOD_SYMBOL_KINDS_FOR_OVERRIDE_ROLLUP = frozenset({"method"})
|
|
88
|
+
|
|
89
|
+
_fail_loud_counts: dict[str, int] = {}
|
|
90
|
+
_fail_loud_lock = threading.Lock()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _log_fail_loud(category: str) -> None:
|
|
94
|
+
"""Increment process-local fail-loud counter and emit one stderr line (PR-FRAME-3)."""
|
|
95
|
+
with _fail_loud_lock:
|
|
96
|
+
_fail_loud_counts[category] = _fail_loud_counts.get(category, 0) + 1
|
|
97
|
+
n = _fail_loud_counts[category]
|
|
98
|
+
print(f"[filter-frame] fail-loud category={category} count={n}", file=sys.stderr, flush=True)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def filter_frame_counters() -> dict[str, int]:
|
|
102
|
+
"""Snapshot of fail-loud counts (tests / local diagnostics; not an MCP tool)."""
|
|
103
|
+
with _fail_loud_lock:
|
|
104
|
+
return dict(_fail_loud_counts)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _get_sentence_transformer(model_name: str, device: str | None) -> SentenceTransformer:
|
|
108
|
+
global _st_model
|
|
109
|
+
with _st_lock:
|
|
110
|
+
if _st_model is None:
|
|
111
|
+
_st_model = SentenceTransformer(
|
|
112
|
+
model_name,
|
|
113
|
+
device=device,
|
|
114
|
+
trust_remote_code=True,
|
|
115
|
+
)
|
|
116
|
+
return _st_model
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class NodeFilter(BaseModel):
|
|
120
|
+
model_config = ConfigDict(extra="forbid")
|
|
121
|
+
|
|
122
|
+
microservice: str | None = None
|
|
123
|
+
module: str | None = None
|
|
124
|
+
source_layer: str | None = None
|
|
125
|
+
role: str | None = None
|
|
126
|
+
exclude_roles: list[str] | None = None
|
|
127
|
+
annotation: str | None = None
|
|
128
|
+
capability: str | None = None
|
|
129
|
+
fqn_prefix: str | None = None
|
|
130
|
+
symbol_kind: DeclarationSymbolKind | None = None
|
|
131
|
+
symbol_kinds: list[DeclarationSymbolKind] | None = None
|
|
132
|
+
http_method: str | None = None
|
|
133
|
+
path_prefix: str | None = None
|
|
134
|
+
framework: str | None = None
|
|
135
|
+
client_kind: str | None = None
|
|
136
|
+
target_service: str | None = None
|
|
137
|
+
target_path_prefix: str | None = None
|
|
138
|
+
producer_kind: str | None = None
|
|
139
|
+
topic_prefix: str | None = None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class EdgeFilter(BaseModel):
|
|
143
|
+
model_config = ConfigDict(extra="forbid")
|
|
144
|
+
|
|
145
|
+
min_confidence: float | None = None
|
|
146
|
+
exclude_strategies: list[str] | None = None
|
|
147
|
+
include_strategies: list[str] | None = None
|
|
148
|
+
callee_declaring_role: str | None = None
|
|
149
|
+
callee_declaring_roles: list[str] | None = None
|
|
150
|
+
exclude_callee_declaring_roles: list[str] | None = None
|
|
151
|
+
|
|
152
|
+
@model_validator(mode="after")
|
|
153
|
+
def _strategy_axes_mutually_exclusive(self) -> EdgeFilter:
|
|
154
|
+
has_include = bool(self.include_strategies)
|
|
155
|
+
has_exclude = bool(self.exclude_strategies)
|
|
156
|
+
if has_include and has_exclude:
|
|
157
|
+
raise ValueError("include_strategies and exclude_strategies are mutually exclusive")
|
|
158
|
+
return self
|
|
159
|
+
|
|
160
|
+
@model_validator(mode="after")
|
|
161
|
+
def _role_axes_mutually_exclusive(self) -> EdgeFilter:
|
|
162
|
+
role_axes = (
|
|
163
|
+
self.callee_declaring_role is not None,
|
|
164
|
+
bool(self.callee_declaring_roles),
|
|
165
|
+
bool(self.exclude_callee_declaring_roles),
|
|
166
|
+
)
|
|
167
|
+
if sum(role_axes) > 1:
|
|
168
|
+
raise ValueError(
|
|
169
|
+
"callee_declaring_role, callee_declaring_roles, and "
|
|
170
|
+
"exclude_callee_declaring_roles are mutually exclusive"
|
|
171
|
+
)
|
|
172
|
+
return self
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
_NODEFILTER_FIELD_ORDER: tuple[str, ...] = tuple(NodeFilter.model_fields.keys())
|
|
176
|
+
_EDGEFILTER_FIELD_ORDER: tuple[str, ...] = tuple(EdgeFilter.model_fields.keys())
|
|
177
|
+
|
|
178
|
+
# Populated EdgeFilter field -> EDGE_SCHEMA attribute name used in Cypher pushdown.
|
|
179
|
+
_EDGEFILTER_FIELD_TO_ATTR: dict[str, str] = {
|
|
180
|
+
"min_confidence": "confidence",
|
|
181
|
+
"exclude_strategies": "strategy",
|
|
182
|
+
"include_strategies": "strategy",
|
|
183
|
+
"callee_declaring_role": "callee_declaring_role",
|
|
184
|
+
"callee_declaring_roles": "callee_declaring_role",
|
|
185
|
+
"exclude_callee_declaring_roles": "callee_declaring_role",
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
_ROLE_FILTER_OTHER_FALLBACK_VALUES = frozenset({"SERVICE", "REPOSITORY"})
|
|
189
|
+
|
|
190
|
+
_NODEFILTER_APPLICABLE_FIELDS: dict[Literal["symbol", "route", "client", "producer"], tuple[str, ...]] = {
|
|
191
|
+
"symbol": (
|
|
192
|
+
"microservice",
|
|
193
|
+
"module",
|
|
194
|
+
"role",
|
|
195
|
+
"exclude_roles",
|
|
196
|
+
"annotation",
|
|
197
|
+
"capability",
|
|
198
|
+
"fqn_prefix",
|
|
199
|
+
"symbol_kind",
|
|
200
|
+
"symbol_kinds",
|
|
201
|
+
),
|
|
202
|
+
"route": (
|
|
203
|
+
"microservice",
|
|
204
|
+
"module",
|
|
205
|
+
"http_method",
|
|
206
|
+
"path_prefix",
|
|
207
|
+
"framework",
|
|
208
|
+
),
|
|
209
|
+
"client": (
|
|
210
|
+
"microservice",
|
|
211
|
+
"module",
|
|
212
|
+
"source_layer",
|
|
213
|
+
"client_kind",
|
|
214
|
+
"target_service",
|
|
215
|
+
"target_path_prefix",
|
|
216
|
+
"http_method",
|
|
217
|
+
),
|
|
218
|
+
"producer": (
|
|
219
|
+
"microservice",
|
|
220
|
+
"module",
|
|
221
|
+
"source_layer",
|
|
222
|
+
"producer_kind",
|
|
223
|
+
"topic_prefix",
|
|
224
|
+
),
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _ordered_nodefilter_fields(field_names: set[str]) -> list[str]:
|
|
229
|
+
return [name for name in _NODEFILTER_FIELD_ORDER if name in field_names]
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _populated_nodefilter_fields(nf: NodeFilter) -> set[str]:
|
|
233
|
+
populated: set[str] = set()
|
|
234
|
+
for field_name in _NODEFILTER_FIELD_ORDER:
|
|
235
|
+
value = getattr(nf, field_name)
|
|
236
|
+
if value is None:
|
|
237
|
+
continue
|
|
238
|
+
if isinstance(value, list) and not value:
|
|
239
|
+
continue
|
|
240
|
+
populated.add(field_name)
|
|
241
|
+
return populated
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _nodefilter_inapplicable_fields(
|
|
245
|
+
kind: Literal["symbol", "route", "client", "producer"], nf: NodeFilter,
|
|
246
|
+
) -> list[str]:
|
|
247
|
+
populated = _populated_nodefilter_fields(nf)
|
|
248
|
+
applicable = set(_NODEFILTER_APPLICABLE_FIELDS[kind])
|
|
249
|
+
return _ordered_nodefilter_fields(populated - applicable)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _nodefilter_applicability_error(
|
|
253
|
+
kind: Literal["symbol", "route", "client", "producer"], nf: NodeFilter,
|
|
254
|
+
) -> str | None:
|
|
255
|
+
inapplicable = _nodefilter_inapplicable_fields(kind, nf)
|
|
256
|
+
if not inapplicable:
|
|
257
|
+
return None
|
|
258
|
+
applicable = ", ".join(_NODEFILTER_APPLICABLE_FIELDS[kind])
|
|
259
|
+
bad = ", ".join(inapplicable)
|
|
260
|
+
return (
|
|
261
|
+
f"Invalid filter for kind='{kind}': populated field(s) not applicable: [{bad}]. "
|
|
262
|
+
f"Applicable field(s): [{applicable}]"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _validate_no_wildcards(nf: NodeFilter) -> str | None:
|
|
267
|
+
"""Reject ``*`` / ``?`` in prefix-match fields; wildcards belong in ``search(query=…)``."""
|
|
268
|
+
for field_name in ("fqn_prefix", "path_prefix", "target_path_prefix"):
|
|
269
|
+
val = getattr(nf, field_name)
|
|
270
|
+
if val is None:
|
|
271
|
+
continue
|
|
272
|
+
if "*" in val or "?" in val:
|
|
273
|
+
return (
|
|
274
|
+
f"Wildcards (* and ?) are not supported in structured filter field `{field_name}`; "
|
|
275
|
+
"use search(query=...) for ranked text match instead."
|
|
276
|
+
)
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _filter_validation_error_message(exc: ValidationError) -> str:
|
|
281
|
+
items: list[str] = []
|
|
282
|
+
for err in exc.errors():
|
|
283
|
+
loc = ".".join(str(part) for part in err.get("loc", ()))
|
|
284
|
+
msg = str(err.get("msg") or "invalid value")
|
|
285
|
+
if loc:
|
|
286
|
+
items.append(f"{loc}: {msg}")
|
|
287
|
+
else:
|
|
288
|
+
items.append(msg)
|
|
289
|
+
details = "; ".join(items) if items else str(exc)
|
|
290
|
+
return f"Invalid filter: {details}"
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _populated_edgefilter_fields(ef: EdgeFilter) -> set[str]:
|
|
294
|
+
populated: set[str] = set()
|
|
295
|
+
for field_name in _EDGEFILTER_FIELD_ORDER:
|
|
296
|
+
value = getattr(ef, field_name)
|
|
297
|
+
if value is None:
|
|
298
|
+
continue
|
|
299
|
+
if isinstance(value, list) and not value:
|
|
300
|
+
continue
|
|
301
|
+
populated.add(field_name)
|
|
302
|
+
return populated
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _edge_schema_attr_names(edge_type: str) -> set[str]:
|
|
306
|
+
spec = EDGE_SCHEMA.get(edge_type)
|
|
307
|
+
if spec is None:
|
|
308
|
+
return set()
|
|
309
|
+
return {attr.name for attr in spec.attrs}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _edgefilter_applicability_error(edge_types: list[str], ef: EdgeFilter) -> str | None:
|
|
313
|
+
populated = _populated_edgefilter_fields(ef)
|
|
314
|
+
if not populated:
|
|
315
|
+
return None
|
|
316
|
+
flat_types = [et for et in edge_types if et not in _COMPOSED_EDGE_TYPES]
|
|
317
|
+
composed = [et for et in edge_types if et in _COMPOSED_EDGE_TYPES]
|
|
318
|
+
if composed or flat_types != ["CALLS"]:
|
|
319
|
+
parts: list[str] = []
|
|
320
|
+
if flat_types != ["CALLS"]:
|
|
321
|
+
parts.append(f"stored labels {flat_types!r}")
|
|
322
|
+
if composed:
|
|
323
|
+
parts.append(f"composed keys {composed!r}")
|
|
324
|
+
detail = " and ".join(parts) if parts else "requested edge_types"
|
|
325
|
+
return (
|
|
326
|
+
f"edge_filter requires edge_types=['CALLS'] only; {detail} is not supported — "
|
|
327
|
+
"split into separate neighbors calls"
|
|
328
|
+
)
|
|
329
|
+
for edge_type in flat_types:
|
|
330
|
+
available = _edge_schema_attr_names(edge_type)
|
|
331
|
+
for field_name in _EDGEFILTER_FIELD_ORDER:
|
|
332
|
+
if field_name not in populated:
|
|
333
|
+
continue
|
|
334
|
+
attr = _EDGEFILTER_FIELD_TO_ATTR[field_name]
|
|
335
|
+
if attr not in available:
|
|
336
|
+
return (
|
|
337
|
+
f"{attr} is not on {edge_type}; restrict edge_types to ['CALLS'] "
|
|
338
|
+
"or split into two neighbors_v2 calls"
|
|
339
|
+
)
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _coerce_edge_filter(
|
|
344
|
+
value: EdgeFilter | dict[str, Any] | str | None,
|
|
345
|
+
) -> EdgeFilter | dict[str, Any] | None:
|
|
346
|
+
"""Normalize MCP tool input: weak clients sometimes pass JSON-encoded strings."""
|
|
347
|
+
if value is None or isinstance(value, EdgeFilter):
|
|
348
|
+
return value
|
|
349
|
+
if isinstance(value, str):
|
|
350
|
+
s = value.strip()
|
|
351
|
+
if not s:
|
|
352
|
+
return None
|
|
353
|
+
try:
|
|
354
|
+
decoded = json.loads(s)
|
|
355
|
+
except json.JSONDecodeError as exc:
|
|
356
|
+
raise ValueError(f"edge_filter must be a JSON object; invalid JSON: {exc.msg}") from exc
|
|
357
|
+
if decoded is None:
|
|
358
|
+
return None
|
|
359
|
+
if not isinstance(decoded, dict):
|
|
360
|
+
raise ValueError(
|
|
361
|
+
f"edge_filter must decode to a JSON object, got {type(decoded).__name__}"
|
|
362
|
+
)
|
|
363
|
+
return decoded
|
|
364
|
+
return value
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _coerce_filter(
|
|
368
|
+
value: NodeFilter | dict[str, Any] | str | None,
|
|
369
|
+
) -> NodeFilter | dict[str, Any] | None:
|
|
370
|
+
"""Normalize MCP tool input: weak clients sometimes pass JSON-encoded strings."""
|
|
371
|
+
if value is None or isinstance(value, NodeFilter):
|
|
372
|
+
return value
|
|
373
|
+
if isinstance(value, str):
|
|
374
|
+
s = value.strip()
|
|
375
|
+
if not s:
|
|
376
|
+
return None
|
|
377
|
+
try:
|
|
378
|
+
decoded = json.loads(s)
|
|
379
|
+
except json.JSONDecodeError as exc:
|
|
380
|
+
raise ValueError(f"filter must be a JSON object; invalid JSON: {exc.msg}") from exc
|
|
381
|
+
if decoded is None:
|
|
382
|
+
return None
|
|
383
|
+
if not isinstance(decoded, dict):
|
|
384
|
+
raise ValueError(f"filter must decode to a JSON object, got {type(decoded).__name__}")
|
|
385
|
+
return decoded
|
|
386
|
+
return value
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class SearchHit(BaseModel):
|
|
390
|
+
chunk_id: str
|
|
391
|
+
symbol_id: str | None = None
|
|
392
|
+
fqn: str | None = None
|
|
393
|
+
score: float
|
|
394
|
+
snippet: str
|
|
395
|
+
microservice: str | None = None
|
|
396
|
+
module: str | None = None
|
|
397
|
+
role: str | None = None
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class NodeRef(BaseModel):
|
|
401
|
+
id: str
|
|
402
|
+
kind: Literal["symbol", "route", "client", "producer", "unresolved_call_site"]
|
|
403
|
+
fqn: str
|
|
404
|
+
symbol_kind: str | None = None
|
|
405
|
+
microservice: str | None = None
|
|
406
|
+
module: str | None = None
|
|
407
|
+
role: str | None = None
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
class NodeRecord(BaseModel):
|
|
411
|
+
id: str
|
|
412
|
+
kind: Literal["symbol", "route", "client", "producer"]
|
|
413
|
+
fqn: str
|
|
414
|
+
data: dict[str, Any] = Field(default_factory=dict)
|
|
415
|
+
edge_summary: dict[str, dict[str, int]] | None = Field(
|
|
416
|
+
default=None,
|
|
417
|
+
description=(
|
|
418
|
+
"Per graph edge label, in/out incident counts. For type Symbols (class, interface, "
|
|
419
|
+
"enum, record, annotation), may also include composed dot-keys "
|
|
420
|
+
"`DECLARES.DECLARES_CLIENT`, `DECLARES.DECLARES_PRODUCER`, and `DECLARES.EXPOSES`: 2-hop summaries "
|
|
421
|
+
"(DECLARES to member, then that edge) — edge-row counts; navigable via neighbors for type "
|
|
422
|
+
"Symbol origins (`direction=\"out\"` only). For non-static method Symbols, may include "
|
|
423
|
+
"override-axis virtual keys `OVERRIDDEN_BY`, `OVERRIDDEN_BY.DECLARES_CLIENT`, "
|
|
424
|
+
"`OVERRIDDEN_BY.DECLARES_PRODUCER`, `OVERRIDDEN_BY.EXPOSES` (stored `[:OVERRIDES]` "
|
|
425
|
+
"dispatch hop, then terminal edges; navigable via neighbors for method Symbol origins, "
|
|
426
|
+
"`direction=\"out\"` only; composed results include `via_id` in attrs). Plus an "
|
|
427
|
+
"`OVERRIDES` map entry that **merges** stored `[:OVERRIDES]` in/out counts with the "
|
|
428
|
+
"describe-time dispatch-up rollup (per direction `max`, so inbound stored overrides "
|
|
429
|
+
"are not dropped). The stored relationship label `OVERRIDES` **is** also a valid "
|
|
430
|
+
"EdgeType for one-hop neighbors (`direction=\"in\"` from declaration toward overriders)."
|
|
431
|
+
),
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class Edge(BaseModel):
|
|
436
|
+
origin_id: str
|
|
437
|
+
edge_type: str
|
|
438
|
+
direction: Literal["in", "out"]
|
|
439
|
+
other: NodeRef
|
|
440
|
+
attrs: dict[str, Any] = Field(default_factory=dict)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
class SearchOutput(BaseModel):
|
|
444
|
+
success: bool
|
|
445
|
+
results: list[SearchHit] = Field(default_factory=list)
|
|
446
|
+
message: str | None = None
|
|
447
|
+
limit: int | None = Field(
|
|
448
|
+
default=None,
|
|
449
|
+
description="Echoed from the request — the page size the server applied. None on success=False.",
|
|
450
|
+
)
|
|
451
|
+
offset: int | None = Field(
|
|
452
|
+
default=None,
|
|
453
|
+
description="Echoed from the request — the page offset the server applied. None on success=False.",
|
|
454
|
+
)
|
|
455
|
+
hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
class FindOutput(BaseModel):
|
|
459
|
+
success: bool
|
|
460
|
+
results: list[NodeRef] = Field(default_factory=list)
|
|
461
|
+
message: str | None = None
|
|
462
|
+
limit: int | None = Field(
|
|
463
|
+
default=None,
|
|
464
|
+
description="Echoed from the request — the page size the server applied. None on success=False.",
|
|
465
|
+
)
|
|
466
|
+
offset: int | None = Field(
|
|
467
|
+
default=None,
|
|
468
|
+
description="Echoed from the request — the page offset the server applied. None on success=False.",
|
|
469
|
+
)
|
|
470
|
+
hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
class DescribeOutput(BaseModel):
|
|
474
|
+
success: bool
|
|
475
|
+
record: NodeRecord | None = None
|
|
476
|
+
message: str | None = None
|
|
477
|
+
hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
class NeighborsOutput(BaseModel):
|
|
481
|
+
success: bool
|
|
482
|
+
results: list[Edge] = Field(default_factory=list)
|
|
483
|
+
message: str | None = None
|
|
484
|
+
requested_edge_types: list[str] = Field(
|
|
485
|
+
default_factory=list,
|
|
486
|
+
description="Echo of neighbors(edge_types=...) from the request; empty when success=False.",
|
|
487
|
+
)
|
|
488
|
+
hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
ResolveStatus = Literal["one", "many", "none"]
|
|
492
|
+
|
|
493
|
+
_RESOLVE_CANDIDATE_CAP = 10
|
|
494
|
+
|
|
495
|
+
_RESOLVE_REASON_PRIORITY: dict[ResolveReason, int] = {
|
|
496
|
+
"exact_id": 0,
|
|
497
|
+
"exact_fqn": 1,
|
|
498
|
+
"route_method_path": 1,
|
|
499
|
+
"client_target_path": 1,
|
|
500
|
+
"producer_topic_prefix": 1,
|
|
501
|
+
"fqn_suffix": 2,
|
|
502
|
+
"route_template": 2,
|
|
503
|
+
"short_name": 3,
|
|
504
|
+
"client_target": 3,
|
|
505
|
+
"producer_topic": 3,
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
_SYMBOL_RESOLVE_RETURN = (
|
|
509
|
+
"s.id AS id, s.fqn AS fqn, s.microservice AS microservice, "
|
|
510
|
+
"s.module AS module, s.role AS role, s.kind AS symbol_kind"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
_ROUTE_RESOLVE_RETURN = (
|
|
514
|
+
"r.id AS id, r.kind AS kind, r.framework AS framework, r.method AS method, "
|
|
515
|
+
"r.path AS path, r.path_template AS path_template, r.path_regex AS path_regex, "
|
|
516
|
+
"r.topic AS topic, r.broker AS broker, r.feign_name AS feign_name, r.feign_url AS feign_url, "
|
|
517
|
+
"r.microservice AS microservice, r.module AS module, r.filename AS filename, "
|
|
518
|
+
"r.start_line AS start_line, r.end_line AS end_line, r.resolved AS resolved"
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
_CLIENT_RESOLVE_RETURN = (
|
|
522
|
+
"c.id AS id, c.client_kind AS client_kind, c.target_service AS target_service, "
|
|
523
|
+
"c.method AS method, c.path AS path, c.path_template AS path_template, "
|
|
524
|
+
"c.path_regex AS path_regex, c.member_fqn AS member_fqn, c.member_id AS member_id, "
|
|
525
|
+
"c.microservice AS microservice, c.module AS module, c.filename AS filename, "
|
|
526
|
+
"c.start_line AS start_line, c.end_line AS end_line, c.resolved AS resolved, "
|
|
527
|
+
"c.source_layer AS source_layer"
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
_PRODUCER_RESOLVE_RETURN = (
|
|
531
|
+
"p.id AS id, p.producer_kind AS producer_kind, p.topic AS topic, p.broker AS broker, "
|
|
532
|
+
"p.direction AS direction, p.member_fqn AS member_fqn, p.member_id AS member_id, "
|
|
533
|
+
"p.microservice AS microservice, p.module AS module, p.filename AS filename, "
|
|
534
|
+
"p.start_line AS start_line, p.end_line AS end_line, p.resolved AS resolved, "
|
|
535
|
+
"p.source_layer AS source_layer"
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
_RESOLVE_PRE_DEDUP_LIMIT = 50
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
class ResolveCandidate(BaseModel):
|
|
542
|
+
model_config = ConfigDict(extra="forbid")
|
|
543
|
+
|
|
544
|
+
node: NodeRef
|
|
545
|
+
score: float
|
|
546
|
+
reason: ResolveReason
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
class ResolveOutput(BaseModel):
|
|
550
|
+
model_config = ConfigDict(extra="forbid")
|
|
551
|
+
|
|
552
|
+
success: bool
|
|
553
|
+
status: ResolveStatus
|
|
554
|
+
node: NodeRef | None = None
|
|
555
|
+
candidates: list[ResolveCandidate] = Field(default_factory=list)
|
|
556
|
+
message: str | None = None
|
|
557
|
+
resolved_identifier: str | None = None
|
|
558
|
+
hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _node_kind_from_id(
|
|
562
|
+
id_str: str,
|
|
563
|
+
) -> Literal["symbol", "route", "client", "producer", "unresolved_call_site"]:
|
|
564
|
+
if id_str.startswith("ucs:"):
|
|
565
|
+
return "unresolved_call_site"
|
|
566
|
+
if id_str.startswith("sym:"):
|
|
567
|
+
return "symbol"
|
|
568
|
+
if id_str.startswith("route:") or id_str.startswith("r:"):
|
|
569
|
+
return "route"
|
|
570
|
+
if id_str.startswith("client:") or id_str.startswith("c:"):
|
|
571
|
+
return "client"
|
|
572
|
+
if id_str.startswith("producer:") or id_str.startswith("p:"):
|
|
573
|
+
return "producer"
|
|
574
|
+
raise ValueError(f"Unknown id prefix for `{id_str}`")
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
def _resolve_node_kind(
|
|
578
|
+
graph: KuzuGraph,
|
|
579
|
+
node_id: str,
|
|
580
|
+
) -> Literal["symbol", "route", "client", "producer", "unresolved_call_site"]:
|
|
581
|
+
try:
|
|
582
|
+
return _node_kind_from_id(node_id)
|
|
583
|
+
except ValueError:
|
|
584
|
+
pass
|
|
585
|
+
if graph._rows("MATCH (n:Symbol) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001
|
|
586
|
+
return "symbol"
|
|
587
|
+
if graph._rows("MATCH (n:Route) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001
|
|
588
|
+
return "route"
|
|
589
|
+
if graph._rows("MATCH (n:Client) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001
|
|
590
|
+
return "client"
|
|
591
|
+
if graph._rows("MATCH (n:Producer) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001
|
|
592
|
+
return "producer"
|
|
593
|
+
raise ValueError(f"Unknown id prefix for `{node_id}`")
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def _chunk_id_from_row(row: dict[str, Any]) -> str:
|
|
597
|
+
filename = str(row.get("filename") or "")
|
|
598
|
+
start = row.get("start") or {}
|
|
599
|
+
end = row.get("end") or {}
|
|
600
|
+
sb = int(start.get("byte_offset") or 0) if isinstance(start, dict) else 0
|
|
601
|
+
eb = int(end.get("byte_offset") or 0) if isinstance(end, dict) else 0
|
|
602
|
+
return f"{filename}:{sb}:{eb}"
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def _row_to_search_hit(row: dict[str, Any]) -> SearchHit:
|
|
606
|
+
score = float(row.get("_rrf_score") or row.get("_score") or 0.0)
|
|
607
|
+
return SearchHit(
|
|
608
|
+
chunk_id=_chunk_id_from_row(row),
|
|
609
|
+
symbol_id=_chunk_to_symbol_id(row),
|
|
610
|
+
fqn=str(row.get("primary_type_fqn")) if row.get("primary_type_fqn") else None,
|
|
611
|
+
score=score,
|
|
612
|
+
snippet=str(row.get("text") or ""),
|
|
613
|
+
microservice=str(row.get("microservice")) if row.get("microservice") else None,
|
|
614
|
+
module=str(row.get("module")) if row.get("module") else None,
|
|
615
|
+
role=str(row.get("role")) if row.get("role") else None,
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _chunk_to_symbol_id(chunk_row: dict[str, Any]) -> str | None:
|
|
620
|
+
symbol_id = chunk_row.get("symbol_id")
|
|
621
|
+
if symbol_id:
|
|
622
|
+
return str(symbol_id)
|
|
623
|
+
meta = chunk_row.get("metadata")
|
|
624
|
+
if isinstance(meta, str):
|
|
625
|
+
try:
|
|
626
|
+
parsed = json.loads(meta)
|
|
627
|
+
if isinstance(parsed, dict):
|
|
628
|
+
meta = parsed
|
|
629
|
+
except Exception:
|
|
630
|
+
meta = None
|
|
631
|
+
if isinstance(meta, dict):
|
|
632
|
+
nested = meta.get("symbol_id")
|
|
633
|
+
if nested:
|
|
634
|
+
return str(nested)
|
|
635
|
+
return None
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def _symbol_where_from_filter(f: NodeFilter) -> tuple[str, dict[str, Any]]:
|
|
639
|
+
preds: list[str] = []
|
|
640
|
+
params: dict[str, Any] = {}
|
|
641
|
+
if f.microservice:
|
|
642
|
+
preds.append("s.microservice = $microservice")
|
|
643
|
+
params["microservice"] = f.microservice
|
|
644
|
+
if f.module:
|
|
645
|
+
preds.append("s.module = $module")
|
|
646
|
+
params["module"] = f.module
|
|
647
|
+
if f.role:
|
|
648
|
+
preds.append("s.role = $role")
|
|
649
|
+
params["role"] = f.role
|
|
650
|
+
if f.exclude_roles:
|
|
651
|
+
preds.append("NOT s.role IN $exclude_roles")
|
|
652
|
+
params["exclude_roles"] = list(f.exclude_roles)
|
|
653
|
+
if f.annotation:
|
|
654
|
+
preds.append("list_contains(s.annotations, $annotation)")
|
|
655
|
+
params["annotation"] = f.annotation
|
|
656
|
+
if f.capability:
|
|
657
|
+
preds.append("$capability IN s.capabilities")
|
|
658
|
+
params["capability"] = f.capability
|
|
659
|
+
if f.fqn_prefix:
|
|
660
|
+
preds.append("s.fqn STARTS WITH $fqn_prefix")
|
|
661
|
+
params["fqn_prefix"] = f.fqn_prefix
|
|
662
|
+
if f.symbol_kind:
|
|
663
|
+
preds.append("s.kind = $symbol_kind")
|
|
664
|
+
params["symbol_kind"] = f.symbol_kind
|
|
665
|
+
if f.symbol_kinds:
|
|
666
|
+
preds.append("s.kind IN $symbol_kinds")
|
|
667
|
+
params["symbol_kinds"] = list(f.symbol_kinds)
|
|
668
|
+
where = f"WHERE {' AND '.join(preds)}" if preds else ""
|
|
669
|
+
return where, params
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def _node_ref_from_row(kind: Literal["symbol", "route", "client", "producer"], row: dict[str, Any]) -> NodeRef:
|
|
673
|
+
symbol_kind: str | None = None
|
|
674
|
+
if kind == "symbol":
|
|
675
|
+
fqn = str(row.get("fqn") or "")
|
|
676
|
+
role = str(row.get("role") or "") or None
|
|
677
|
+
symbol_kind_val = str(row.get("symbol_kind") or row.get("kind") or "").strip()
|
|
678
|
+
symbol_kind = symbol_kind_val or None
|
|
679
|
+
elif kind == "route":
|
|
680
|
+
method = str(row.get("method") or "")
|
|
681
|
+
path = str(row.get("path_template") or row.get("path") or "")
|
|
682
|
+
fqn = f"{method} {path}".strip()
|
|
683
|
+
role = None
|
|
684
|
+
elif kind == "client":
|
|
685
|
+
method = str(row.get("method") or "")
|
|
686
|
+
target = str(row.get("target_service") or "")
|
|
687
|
+
path = str(row.get("path_template") or row.get("path") or "")
|
|
688
|
+
fqn = f"{target} {method} {path}".strip()
|
|
689
|
+
role = None
|
|
690
|
+
else:
|
|
691
|
+
topic = str(row.get("topic") or "")
|
|
692
|
+
broker = str(row.get("broker") or "")
|
|
693
|
+
fqn = f"{topic} {broker}".strip()
|
|
694
|
+
role = None
|
|
695
|
+
return NodeRef(
|
|
696
|
+
id=str(row.get("id") or ""),
|
|
697
|
+
kind=kind,
|
|
698
|
+
fqn=fqn,
|
|
699
|
+
symbol_kind=symbol_kind,
|
|
700
|
+
microservice=str(row.get("microservice") or "") or None,
|
|
701
|
+
module=str(row.get("module") or "") or None,
|
|
702
|
+
role=role,
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
def _load_node_record(
|
|
707
|
+
graph: KuzuGraph, node_id: str, kind: Literal["symbol", "route", "client", "producer"],
|
|
708
|
+
) -> dict[str, Any] | None:
|
|
709
|
+
if kind == "symbol":
|
|
710
|
+
projection = (
|
|
711
|
+
"n.id AS id, n.kind AS kind, n.name AS name, n.fqn AS fqn, n.package AS package, "
|
|
712
|
+
"n.module AS module, n.microservice AS microservice, n.filename AS filename, "
|
|
713
|
+
"n.start_line AS start_line, n.end_line AS end_line, n.start_byte AS start_byte, "
|
|
714
|
+
"n.end_byte AS end_byte, n.modifiers AS modifiers, n.annotations AS annotations, "
|
|
715
|
+
"n.capabilities AS capabilities, n.role AS role, n.signature AS signature, "
|
|
716
|
+
"n.parent_id AS parent_id, n.resolved AS resolved"
|
|
717
|
+
)
|
|
718
|
+
label = "Symbol"
|
|
719
|
+
elif kind == "route":
|
|
720
|
+
projection = (
|
|
721
|
+
"n.id AS id, n.kind AS kind, n.framework AS framework, n.method AS method, n.path AS path, "
|
|
722
|
+
"n.path_template AS path_template, n.path_regex AS path_regex, n.topic AS topic, "
|
|
723
|
+
"n.broker AS broker, n.feign_name AS feign_name, n.feign_url AS feign_url, "
|
|
724
|
+
"n.microservice AS microservice, n.module AS module, n.filename AS filename, "
|
|
725
|
+
"n.start_line AS start_line, n.end_line AS end_line, n.resolved AS resolved"
|
|
726
|
+
)
|
|
727
|
+
label = "Route"
|
|
728
|
+
elif kind == "client":
|
|
729
|
+
projection = (
|
|
730
|
+
"n.id AS id, n.client_kind AS client_kind, n.target_service AS target_service, "
|
|
731
|
+
"n.method AS method, n.path AS path, n.path_template AS path_template, "
|
|
732
|
+
"n.path_regex AS path_regex, n.member_fqn AS member_fqn, n.member_id AS member_id, "
|
|
733
|
+
"n.microservice AS microservice, n.module AS module, n.filename AS filename, "
|
|
734
|
+
"n.start_line AS start_line, n.end_line AS end_line, n.resolved AS resolved, "
|
|
735
|
+
"n.source_layer AS source_layer"
|
|
736
|
+
)
|
|
737
|
+
label = "Client"
|
|
738
|
+
else:
|
|
739
|
+
projection = (
|
|
740
|
+
"n.id AS id, n.producer_kind AS producer_kind, n.topic AS topic, n.broker AS broker, "
|
|
741
|
+
"n.direction AS direction, n.member_fqn AS member_fqn, n.member_id AS member_id, "
|
|
742
|
+
"n.microservice AS microservice, n.module AS module, n.filename AS filename, "
|
|
743
|
+
"n.start_line AS start_line, n.end_line AS end_line, n.resolved AS resolved, "
|
|
744
|
+
"n.source_layer AS source_layer"
|
|
745
|
+
)
|
|
746
|
+
label = "Producer"
|
|
747
|
+
rows = graph._rows(f"MATCH (n:{label}) WHERE n.id = $id RETURN {projection}", {"id": node_id}) # noqa: SLF001
|
|
748
|
+
if not rows:
|
|
749
|
+
return None
|
|
750
|
+
return rows[0]
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def _incident_counts(cell: dict[str, int] | None) -> dict[str, int]:
|
|
754
|
+
if not cell:
|
|
755
|
+
return {"in": 0, "out": 0}
|
|
756
|
+
return {"in": int(cell.get("in", 0)), "out": int(cell.get("out", 0))}
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def _merge_overrides_edge_summary(
|
|
760
|
+
stored_before_rollups: dict[str, int],
|
|
761
|
+
summary_after_rollups: dict[str, dict[str, int]],
|
|
762
|
+
) -> None:
|
|
763
|
+
"""Reconcile `OVERRIDES` with `override_axis_rollup_for` without clobbering stored `in`.
|
|
764
|
+
|
|
765
|
+
Rollup rows reuse the ``OVERRIDES`` key for dispatch-up counts only (``in`` is always
|
|
766
|
+
zero there). Stored ``[:OVERRIDES]`` edges contribute real ``in``/``out`` from Kuzu;
|
|
767
|
+
merge per direction with ``max`` so inbound override edges stay visible.
|
|
768
|
+
"""
|
|
769
|
+
roll = _incident_counts(summary_after_rollups.get("OVERRIDES"))
|
|
770
|
+
if "OVERRIDES" not in summary_after_rollups and not any(stored_before_rollups.values()):
|
|
771
|
+
return
|
|
772
|
+
merged_in = max(stored_before_rollups["in"], roll["in"])
|
|
773
|
+
merged_out = max(stored_before_rollups["out"], roll["out"])
|
|
774
|
+
if merged_in == 0 and merged_out == 0:
|
|
775
|
+
summary_after_rollups.pop("OVERRIDES", None)
|
|
776
|
+
else:
|
|
777
|
+
summary_after_rollups["OVERRIDES"] = {"in": merged_in, "out": merged_out}
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
def _edge_summary_for_node(
|
|
781
|
+
graph: KuzuGraph, node_id: str, *, kind: str, row: dict[str, Any]
|
|
782
|
+
) -> dict[str, dict[str, int]]:
|
|
783
|
+
summary = dict(graph.edge_counts_for(node_id))
|
|
784
|
+
sym_kind = str(row.get("kind") or "")
|
|
785
|
+
if kind == "symbol" and sym_kind in _TYPE_SYMBOL_KINDS_FOR_EDGE_ROLLUP:
|
|
786
|
+
summary.update(graph.member_edge_rollup_for(node_id))
|
|
787
|
+
elif kind == "symbol" and sym_kind in _METHOD_SYMBOL_KINDS_FOR_OVERRIDE_ROLLUP:
|
|
788
|
+
stored_overrides = _incident_counts(summary.get("OVERRIDES"))
|
|
789
|
+
summary.update(graph.override_axis_rollup_for(node_id))
|
|
790
|
+
_merge_overrides_edge_summary(stored_overrides, summary)
|
|
791
|
+
return summary
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
def _node_matches_filter(
|
|
795
|
+
kind: Literal["symbol", "route", "client", "producer"], row: dict[str, Any], f: NodeFilter | None,
|
|
796
|
+
) -> bool:
|
|
797
|
+
if f is None:
|
|
798
|
+
return True
|
|
799
|
+
if f.microservice and str(row.get("microservice") or "") != f.microservice:
|
|
800
|
+
return False
|
|
801
|
+
if f.module and str(row.get("module") or "") != f.module:
|
|
802
|
+
return False
|
|
803
|
+
if kind in ("client", "producer") and f.source_layer and str(row.get("source_layer") or "") != f.source_layer:
|
|
804
|
+
return False
|
|
805
|
+
if kind == "symbol":
|
|
806
|
+
role = str(row.get("role") or "")
|
|
807
|
+
fqn_val = str(row.get("fqn") or row.get("primary_type_fqn") or "")
|
|
808
|
+
symbol_kind_val = str(row.get("kind") or row.get("symbol_kind") or "")
|
|
809
|
+
if f.role and role != f.role:
|
|
810
|
+
return False
|
|
811
|
+
if f.exclude_roles and role in set(f.exclude_roles):
|
|
812
|
+
return False
|
|
813
|
+
if f.annotation and f.annotation not in list(row.get("annotations") or []):
|
|
814
|
+
return False
|
|
815
|
+
if f.capability and f.capability not in list(row.get("capabilities") or []):
|
|
816
|
+
return False
|
|
817
|
+
if f.fqn_prefix and not fqn_val.startswith(f.fqn_prefix):
|
|
818
|
+
return False
|
|
819
|
+
if f.symbol_kind and symbol_kind_val != f.symbol_kind:
|
|
820
|
+
return False
|
|
821
|
+
if f.symbol_kinds and symbol_kind_val not in set(f.symbol_kinds):
|
|
822
|
+
return False
|
|
823
|
+
elif kind == "route":
|
|
824
|
+
if f.http_method and str(row.get("method") or "") != f.http_method:
|
|
825
|
+
return False
|
|
826
|
+
if f.path_prefix:
|
|
827
|
+
path = str(row.get("path") or "")
|
|
828
|
+
if not path.startswith(f.path_prefix):
|
|
829
|
+
return False
|
|
830
|
+
if f.framework and str(row.get("framework") or "") != f.framework:
|
|
831
|
+
return False
|
|
832
|
+
elif kind == "client":
|
|
833
|
+
if f.client_kind and str(row.get("client_kind") or "") != f.client_kind:
|
|
834
|
+
return False
|
|
835
|
+
if f.target_service and str(row.get("target_service") or "") != f.target_service:
|
|
836
|
+
return False
|
|
837
|
+
if f.target_path_prefix:
|
|
838
|
+
path = str(row.get("path") or "")
|
|
839
|
+
if not path.startswith(f.target_path_prefix):
|
|
840
|
+
return False
|
|
841
|
+
if f.http_method and str(row.get("method") or "") != f.http_method:
|
|
842
|
+
return False
|
|
843
|
+
else:
|
|
844
|
+
if f.producer_kind and str(row.get("producer_kind") or "") != f.producer_kind:
|
|
845
|
+
return False
|
|
846
|
+
if f.topic_prefix:
|
|
847
|
+
topic = str(row.get("topic") or "")
|
|
848
|
+
if not topic.startswith(f.topic_prefix):
|
|
849
|
+
return False
|
|
850
|
+
return True
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
def search_v2(
|
|
854
|
+
query: str,
|
|
855
|
+
table: str = "java",
|
|
856
|
+
hybrid: bool = False,
|
|
857
|
+
limit: int = 5,
|
|
858
|
+
offset: int = 0,
|
|
859
|
+
path_contains: str | None = None,
|
|
860
|
+
filter: NodeFilter | dict[str, Any] | str | None = None,
|
|
861
|
+
graph: KuzuGraph | None = None,
|
|
862
|
+
) -> SearchOutput:
|
|
863
|
+
try:
|
|
864
|
+
raw_filter = _coerce_filter(filter)
|
|
865
|
+
try:
|
|
866
|
+
nf = (
|
|
867
|
+
NodeFilter.model_validate(raw_filter)
|
|
868
|
+
if raw_filter is not None and not isinstance(raw_filter, NodeFilter)
|
|
869
|
+
else raw_filter
|
|
870
|
+
)
|
|
871
|
+
except ValidationError as exc:
|
|
872
|
+
_log_fail_loud("unknown_key")
|
|
873
|
+
return SearchOutput(
|
|
874
|
+
success=False,
|
|
875
|
+
message=_filter_validation_error_message(exc),
|
|
876
|
+
hints=[],
|
|
877
|
+
limit=None,
|
|
878
|
+
offset=None,
|
|
879
|
+
)
|
|
880
|
+
if nf and (err := _nodefilter_applicability_error("symbol", nf)):
|
|
881
|
+
_log_fail_loud("applicability")
|
|
882
|
+
return SearchOutput(success=False, message=err, hints=[], limit=None, offset=None)
|
|
883
|
+
if nf and (err := _validate_no_wildcards(nf)):
|
|
884
|
+
_log_fail_loud("wildcard")
|
|
885
|
+
return SearchOutput(success=False, message=err, hints=[], limit=None, offset=None)
|
|
886
|
+
model_name = resolved_sbert_model_for_process_env(SBERT_MODEL)
|
|
887
|
+
device = os.environ.get("SBERT_DEVICE") or None
|
|
888
|
+
model = _get_sentence_transformer(model_name, device)
|
|
889
|
+
uri = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip() or str(
|
|
890
|
+
(Path.cwd() / ".java-codebase-rag").resolve()
|
|
891
|
+
)
|
|
892
|
+
uri_path = Path(uri)
|
|
893
|
+
if not uri.startswith(("s3://", "gs://", "az://")) and uri_path.exists():
|
|
894
|
+
uri = str(uri_path.resolve())
|
|
895
|
+
table_keys = list(TABLES) if table == "all" else [table]
|
|
896
|
+
rows = run_search(
|
|
897
|
+
query,
|
|
898
|
+
uri=uri,
|
|
899
|
+
table_keys=table_keys,
|
|
900
|
+
hybrid=hybrid,
|
|
901
|
+
limit=limit,
|
|
902
|
+
offset=offset,
|
|
903
|
+
path_substring=path_contains,
|
|
904
|
+
model_name=model_name,
|
|
905
|
+
device=device,
|
|
906
|
+
model=model,
|
|
907
|
+
)
|
|
908
|
+
hits: list[SearchHit] = []
|
|
909
|
+
for row in rows:
|
|
910
|
+
if path_contains and path_contains not in str(row.get("filename") or ""):
|
|
911
|
+
continue
|
|
912
|
+
if nf:
|
|
913
|
+
row_kind = "symbol"
|
|
914
|
+
if not _node_matches_filter(row_kind, row, nf):
|
|
915
|
+
continue
|
|
916
|
+
hits.append(_row_to_search_hit(row))
|
|
917
|
+
hint_payload = {
|
|
918
|
+
"success": True,
|
|
919
|
+
"results": [h.model_dump() for h in hits],
|
|
920
|
+
"limit": limit,
|
|
921
|
+
"offset": offset,
|
|
922
|
+
}
|
|
923
|
+
return SearchOutput(
|
|
924
|
+
success=True,
|
|
925
|
+
results=hits,
|
|
926
|
+
limit=limit,
|
|
927
|
+
offset=offset,
|
|
928
|
+
hints=generate_hints("search", hint_payload),
|
|
929
|
+
)
|
|
930
|
+
except Exception as exc:
|
|
931
|
+
return SearchOutput(success=False, message=str(exc), hints=[], limit=None, offset=None)
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
def find_v2(
|
|
935
|
+
kind: Literal["symbol", "route", "client", "producer"],
|
|
936
|
+
filter: NodeFilter | dict[str, Any] | str,
|
|
937
|
+
limit: int = 25,
|
|
938
|
+
offset: int = 0,
|
|
939
|
+
graph: KuzuGraph | None = None,
|
|
940
|
+
) -> FindOutput:
|
|
941
|
+
try:
|
|
942
|
+
g = graph or KuzuGraph.get()
|
|
943
|
+
raw_filter = _coerce_filter(filter)
|
|
944
|
+
if raw_filter is None:
|
|
945
|
+
raw_filter = {}
|
|
946
|
+
try:
|
|
947
|
+
nf = NodeFilter.model_validate(raw_filter) if not isinstance(raw_filter, NodeFilter) else raw_filter
|
|
948
|
+
except ValidationError as exc:
|
|
949
|
+
_log_fail_loud("unknown_key")
|
|
950
|
+
return FindOutput(
|
|
951
|
+
success=False,
|
|
952
|
+
message=_filter_validation_error_message(exc),
|
|
953
|
+
hints=[],
|
|
954
|
+
limit=None,
|
|
955
|
+
offset=None,
|
|
956
|
+
)
|
|
957
|
+
if err := _nodefilter_applicability_error(kind, nf):
|
|
958
|
+
_log_fail_loud("applicability")
|
|
959
|
+
return FindOutput(success=False, message=err, hints=[], limit=None, offset=None)
|
|
960
|
+
if err := _validate_no_wildcards(nf):
|
|
961
|
+
_log_fail_loud("wildcard")
|
|
962
|
+
return FindOutput(success=False, message=err, hints=[], limit=None, offset=None)
|
|
963
|
+
fetch_cap = int(limit) + int(offset) + 1
|
|
964
|
+
if kind == "symbol":
|
|
965
|
+
where, params = _symbol_where_from_filter(nf)
|
|
966
|
+
params["lim"] = fetch_cap
|
|
967
|
+
rows = g._rows( # noqa: SLF001
|
|
968
|
+
f"MATCH (s:Symbol) {where} RETURN s.id AS id, s.fqn AS fqn, s.microservice AS microservice, "
|
|
969
|
+
"s.module AS module, s.role AS role, s.kind AS symbol_kind ORDER BY s.fqn LIMIT $lim",
|
|
970
|
+
params,
|
|
971
|
+
)
|
|
972
|
+
elif kind == "route":
|
|
973
|
+
rows = g.list_routes(
|
|
974
|
+
microservice=nf.microservice,
|
|
975
|
+
framework=nf.framework,
|
|
976
|
+
path_prefix=nf.path_prefix,
|
|
977
|
+
method=nf.http_method,
|
|
978
|
+
limit=max(500, fetch_cap),
|
|
979
|
+
)
|
|
980
|
+
rows = [r for r in rows if _node_matches_filter("route", r, nf)]
|
|
981
|
+
elif kind == "client":
|
|
982
|
+
rows = g.list_clients(
|
|
983
|
+
microservice=nf.microservice,
|
|
984
|
+
client_kind=nf.client_kind,
|
|
985
|
+
target_service=nf.target_service,
|
|
986
|
+
path_prefix=nf.target_path_prefix,
|
|
987
|
+
method=nf.http_method,
|
|
988
|
+
limit=max(500, fetch_cap),
|
|
989
|
+
)
|
|
990
|
+
rows = [r for r in rows if _node_matches_filter("client", r, nf)]
|
|
991
|
+
else:
|
|
992
|
+
rows = g.list_producers(
|
|
993
|
+
microservice=nf.microservice,
|
|
994
|
+
producer_kind=nf.producer_kind,
|
|
995
|
+
topic_prefix=nf.topic_prefix,
|
|
996
|
+
limit=max(500, fetch_cap),
|
|
997
|
+
)
|
|
998
|
+
rows = [r for r in rows if _node_matches_filter("producer", r, nf)]
|
|
999
|
+
has_more_results = len(rows) > int(offset) + int(limit)
|
|
1000
|
+
rows = rows[offset : offset + limit]
|
|
1001
|
+
refs = [_node_ref_from_row(kind, r) for r in rows]
|
|
1002
|
+
filter_dump = nf.model_dump(exclude_none=True)
|
|
1003
|
+
hint_payload: dict[str, Any] = {
|
|
1004
|
+
"success": True,
|
|
1005
|
+
"kind": kind,
|
|
1006
|
+
"results": [r.model_dump() for r in refs],
|
|
1007
|
+
"limit": limit,
|
|
1008
|
+
"offset": offset,
|
|
1009
|
+
"filter": filter_dump,
|
|
1010
|
+
"has_more_results": has_more_results,
|
|
1011
|
+
}
|
|
1012
|
+
return FindOutput(
|
|
1013
|
+
success=True,
|
|
1014
|
+
results=refs,
|
|
1015
|
+
limit=limit,
|
|
1016
|
+
offset=offset,
|
|
1017
|
+
hints=generate_hints("find", hint_payload),
|
|
1018
|
+
)
|
|
1019
|
+
except Exception as exc:
|
|
1020
|
+
return FindOutput(success=False, message=str(exc), hints=[], limit=None, offset=None)
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
_DESCRIBE_UCS_ID_MESSAGE = (
|
|
1024
|
+
"UnresolvedCallSite ids (ucs:…) are not describable — use describe(caller_method_id) "
|
|
1025
|
+
"for record.data.unresolved_call_sites, neighbors(..., include_unresolved=True), "
|
|
1026
|
+
"or java-codebase-rag unresolved-calls list --method-id <caller_id>"
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
def describe_v2(
|
|
1031
|
+
id: str | None = None,
|
|
1032
|
+
fqn: str | None = None,
|
|
1033
|
+
graph: KuzuGraph | None = None,
|
|
1034
|
+
) -> DescribeOutput:
|
|
1035
|
+
try:
|
|
1036
|
+
g = graph or KuzuGraph.get()
|
|
1037
|
+
has_id = bool(id and str(id).strip())
|
|
1038
|
+
has_fqn = bool(fqn and str(fqn).strip())
|
|
1039
|
+
if not has_id and not has_fqn:
|
|
1040
|
+
return DescribeOutput(success=False, message="id or fqn required", hints=[])
|
|
1041
|
+
if has_id and str(id).strip().startswith("ucs:"):
|
|
1042
|
+
return DescribeOutput(success=False, message=_DESCRIBE_UCS_ID_MESSAGE, hints=[])
|
|
1043
|
+
hint_message: str | None = None
|
|
1044
|
+
node_id: str
|
|
1045
|
+
if has_id:
|
|
1046
|
+
node_id = str(id).strip()
|
|
1047
|
+
else:
|
|
1048
|
+
fqn_val = str(fqn).strip()
|
|
1049
|
+
rows = g._rows( # noqa: SLF001
|
|
1050
|
+
"MATCH (s:Symbol) WHERE s.fqn = $fqn RETURN s.id AS id LIMIT 2",
|
|
1051
|
+
{"fqn": fqn_val},
|
|
1052
|
+
)
|
|
1053
|
+
if not rows:
|
|
1054
|
+
return DescribeOutput(success=False, message=f"No Symbol found for fqn='{fqn_val}'", hints=[])
|
|
1055
|
+
node_id = str(rows[0]["id"] or "")
|
|
1056
|
+
if len(rows) > 1:
|
|
1057
|
+
hint_message = (
|
|
1058
|
+
"multiple symbols share this FQN; use "
|
|
1059
|
+
f"resolve(identifier={fqn_val!r}, hint_kind='symbol') to list candidates with reasons, "
|
|
1060
|
+
"then describe(id=...) on the chosen node"
|
|
1061
|
+
)
|
|
1062
|
+
kind = _resolve_node_kind(g, node_id)
|
|
1063
|
+
if kind == "unresolved_call_site":
|
|
1064
|
+
return DescribeOutput(success=False, message=_DESCRIBE_UCS_ID_MESSAGE, hints=[])
|
|
1065
|
+
row = _load_node_record(g, node_id, kind)
|
|
1066
|
+
if row is None:
|
|
1067
|
+
return DescribeOutput(success=False, message=f"No node found for `{node_id}`", hints=[])
|
|
1068
|
+
ref = _node_ref_from_row(kind, row)
|
|
1069
|
+
edge_summary = _edge_summary_for_node(g, node_id, kind=kind, row=row)
|
|
1070
|
+
data = dict(row)
|
|
1071
|
+
if kind == "symbol" and str(row.get("kind") or "") in _METHOD_SYMBOL_KINDS_FOR_OVERRIDE_ROLLUP:
|
|
1072
|
+
inline, total = g.unresolved_sites_for_describe(node_id)
|
|
1073
|
+
if total > 0:
|
|
1074
|
+
data["unresolved_call_sites_total"] = total
|
|
1075
|
+
data["unresolved_call_sites"] = [
|
|
1076
|
+
{
|
|
1077
|
+
"line": int(r.get("line") or 0),
|
|
1078
|
+
"reason": str(r.get("reason") or ""),
|
|
1079
|
+
"callee_simple": str(r.get("callee_simple") or ""),
|
|
1080
|
+
"receiver_expr": str(r.get("receiver_expr") or ""),
|
|
1081
|
+
}
|
|
1082
|
+
for r in inline
|
|
1083
|
+
]
|
|
1084
|
+
if total > len(inline):
|
|
1085
|
+
data["unresolved_call_sites_footer"] = (
|
|
1086
|
+
f"{total} unresolved call sites — see "
|
|
1087
|
+
f"java-codebase-rag unresolved-calls list --method-id {node_id} for the full list"
|
|
1088
|
+
)
|
|
1089
|
+
record = NodeRecord(id=ref.id, kind=kind, fqn=ref.fqn, data=data, edge_summary=edge_summary)
|
|
1090
|
+
return DescribeOutput(
|
|
1091
|
+
success=True,
|
|
1092
|
+
record=record,
|
|
1093
|
+
message=hint_message,
|
|
1094
|
+
hints=generate_hints("describe", {"success": True, "record": record.model_dump()}),
|
|
1095
|
+
)
|
|
1096
|
+
except ValueError as exc:
|
|
1097
|
+
return DescribeOutput(success=False, message=str(exc), hints=[])
|
|
1098
|
+
except Exception as exc:
|
|
1099
|
+
return DescribeOutput(success=False, message=str(exc), hints=[])
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
def _resolve_validate_identifier(raw: str) -> tuple[str | None, str | None]:
|
|
1103
|
+
trimmed = raw.strip()
|
|
1104
|
+
if not trimmed:
|
|
1105
|
+
detail = "empty string" if raw == "" else "whitespace only"
|
|
1106
|
+
return None, f"Invalid identifier: {detail}"
|
|
1107
|
+
return trimmed, None
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
def _resolve_kinds_to_search(
|
|
1111
|
+
hint_kind: Literal["symbol", "route", "client", "producer"] | None,
|
|
1112
|
+
) -> list[Literal["symbol", "route", "client", "producer"]]:
|
|
1113
|
+
if hint_kind is None:
|
|
1114
|
+
return ["symbol", "route", "client", "producer"]
|
|
1115
|
+
return [hint_kind]
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
def _resolve_parse_route_method_path(identifier: str) -> tuple[str, str] | None:
|
|
1119
|
+
parts = identifier.split(None, 1)
|
|
1120
|
+
if len(parts) != 2:
|
|
1121
|
+
return None
|
|
1122
|
+
method, path = parts[0].upper(), parts[1].strip()
|
|
1123
|
+
if not method.isalpha() or not path.startswith("/"):
|
|
1124
|
+
return None
|
|
1125
|
+
return method, path
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def _resolve_parse_microservice_route(identifier: str) -> tuple[str, str, str] | None:
|
|
1129
|
+
parts = identifier.split(None, 2)
|
|
1130
|
+
if len(parts) != 3:
|
|
1131
|
+
return None
|
|
1132
|
+
microservice, method, path = parts[0], parts[1].upper(), parts[2].strip()
|
|
1133
|
+
if not method.isalpha() or not path.startswith("/"):
|
|
1134
|
+
return None
|
|
1135
|
+
return microservice, method, path
|
|
1136
|
+
|
|
1137
|
+
|
|
1138
|
+
def _resolve_symbol_candidates(
|
|
1139
|
+
g: KuzuGraph,
|
|
1140
|
+
identifier: str,
|
|
1141
|
+
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1142
|
+
out: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
1143
|
+
lim = _RESOLVE_PRE_DEDUP_LIMIT
|
|
1144
|
+
|
|
1145
|
+
rows = g._rows( # noqa: SLF001
|
|
1146
|
+
f"MATCH (s:Symbol) WHERE s.id = $id RETURN {_SYMBOL_RESOLVE_RETURN} LIMIT $lim",
|
|
1147
|
+
{"id": identifier, "lim": lim},
|
|
1148
|
+
)
|
|
1149
|
+
for row in rows:
|
|
1150
|
+
out.append((_node_ref_from_row("symbol", row), "exact_id", len(identifier)))
|
|
1151
|
+
|
|
1152
|
+
rows = g._rows( # noqa: SLF001
|
|
1153
|
+
f"MATCH (s:Symbol) WHERE s.fqn = $fqn RETURN {_SYMBOL_RESOLVE_RETURN} LIMIT $lim",
|
|
1154
|
+
{"fqn": identifier, "lim": lim},
|
|
1155
|
+
)
|
|
1156
|
+
for row in rows:
|
|
1157
|
+
out.append((_node_ref_from_row("symbol", row), "exact_fqn", len(identifier)))
|
|
1158
|
+
|
|
1159
|
+
suffix = f".{identifier}"
|
|
1160
|
+
rows = g._rows( # noqa: SLF001
|
|
1161
|
+
f"MATCH (s:Symbol) WHERE s.fqn = $ident OR s.fqn ENDS WITH $suffix "
|
|
1162
|
+
f"RETURN {_SYMBOL_RESOLVE_RETURN} LIMIT $lim",
|
|
1163
|
+
{"ident": identifier, "suffix": suffix, "lim": lim},
|
|
1164
|
+
)
|
|
1165
|
+
for row in rows:
|
|
1166
|
+
fqn = str(row.get("fqn") or "")
|
|
1167
|
+
spec = len(fqn)
|
|
1168
|
+
out.append((_node_ref_from_row("symbol", row), "fqn_suffix", spec))
|
|
1169
|
+
|
|
1170
|
+
rows = g._rows( # noqa: SLF001
|
|
1171
|
+
f"MATCH (s:Symbol) WHERE s.name = $name RETURN {_SYMBOL_RESOLVE_RETURN} LIMIT $lim",
|
|
1172
|
+
{"name": identifier, "lim": lim},
|
|
1173
|
+
)
|
|
1174
|
+
for row in rows:
|
|
1175
|
+
out.append((_node_ref_from_row("symbol", row), "short_name", len(identifier)))
|
|
1176
|
+
|
|
1177
|
+
return out
|
|
1178
|
+
|
|
1179
|
+
|
|
1180
|
+
def _resolve_route_candidates(
|
|
1181
|
+
g: KuzuGraph,
|
|
1182
|
+
identifier: str,
|
|
1183
|
+
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1184
|
+
out: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
1185
|
+
lim = _RESOLVE_PRE_DEDUP_LIMIT
|
|
1186
|
+
|
|
1187
|
+
rows = g._rows( # noqa: SLF001
|
|
1188
|
+
f"MATCH (r:Route) WHERE r.id = $id RETURN {_ROUTE_RESOLVE_RETURN} LIMIT $lim",
|
|
1189
|
+
{"id": identifier, "lim": lim},
|
|
1190
|
+
)
|
|
1191
|
+
for row in rows:
|
|
1192
|
+
out.append((_node_ref_from_row("route", row), "exact_id", len(identifier)))
|
|
1193
|
+
|
|
1194
|
+
ms_route = _resolve_parse_microservice_route(identifier)
|
|
1195
|
+
if ms_route is not None:
|
|
1196
|
+
microservice, method, path = ms_route
|
|
1197
|
+
rows = g._rows( # noqa: SLF001
|
|
1198
|
+
f"MATCH (r:Route) WHERE r.microservice = $ms AND r.method = $method "
|
|
1199
|
+
f"AND (r.path = $path OR r.path_template = $path) "
|
|
1200
|
+
f"RETURN {_ROUTE_RESOLVE_RETURN} LIMIT $lim",
|
|
1201
|
+
{"ms": microservice, "method": method, "path": path, "lim": lim},
|
|
1202
|
+
)
|
|
1203
|
+
for row in rows:
|
|
1204
|
+
spec = len(path)
|
|
1205
|
+
out.append((_node_ref_from_row("route", row), "route_method_path", spec))
|
|
1206
|
+
|
|
1207
|
+
method_path = _resolve_parse_route_method_path(identifier)
|
|
1208
|
+
if method_path is not None:
|
|
1209
|
+
method, path = method_path
|
|
1210
|
+
rows = g._rows( # noqa: SLF001
|
|
1211
|
+
f"MATCH (r:Route) WHERE r.method = $method "
|
|
1212
|
+
f"AND (r.path = $path OR r.path_template = $path) "
|
|
1213
|
+
f"RETURN {_ROUTE_RESOLVE_RETURN} LIMIT $lim",
|
|
1214
|
+
{"method": method, "path": path, "lim": lim},
|
|
1215
|
+
)
|
|
1216
|
+
for row in rows:
|
|
1217
|
+
out.append((_node_ref_from_row("route", row), "route_method_path", len(path)))
|
|
1218
|
+
|
|
1219
|
+
if identifier.startswith("/"):
|
|
1220
|
+
rows = g._rows( # noqa: SLF001
|
|
1221
|
+
f"MATCH (r:Route) WHERE r.path = $path OR r.path_template = $path "
|
|
1222
|
+
f"RETURN {_ROUTE_RESOLVE_RETURN} LIMIT $lim",
|
|
1223
|
+
{"path": identifier, "lim": lim},
|
|
1224
|
+
)
|
|
1225
|
+
for row in rows:
|
|
1226
|
+
path_val = str(row.get("path_template") or row.get("path") or "")
|
|
1227
|
+
out.append((_node_ref_from_row("route", row), "route_template", len(path_val)))
|
|
1228
|
+
|
|
1229
|
+
return out
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
def _resolve_client_candidates(
|
|
1233
|
+
g: KuzuGraph,
|
|
1234
|
+
identifier: str,
|
|
1235
|
+
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1236
|
+
out: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
1237
|
+
lim = _RESOLVE_PRE_DEDUP_LIMIT
|
|
1238
|
+
|
|
1239
|
+
rows = g._rows( # noqa: SLF001
|
|
1240
|
+
f"MATCH (c:Client) WHERE c.id = $id RETURN {_CLIENT_RESOLVE_RETURN} LIMIT $lim",
|
|
1241
|
+
{"id": identifier, "lim": lim},
|
|
1242
|
+
)
|
|
1243
|
+
for row in rows:
|
|
1244
|
+
out.append((_node_ref_from_row("client", row), "exact_id", len(identifier)))
|
|
1245
|
+
|
|
1246
|
+
if " " in identifier:
|
|
1247
|
+
target, path_prefix = identifier.split(" ", 1)
|
|
1248
|
+
target = target.strip()
|
|
1249
|
+
path_prefix = path_prefix.strip()
|
|
1250
|
+
if target and path_prefix:
|
|
1251
|
+
rows = g._rows( # noqa: SLF001
|
|
1252
|
+
f"MATCH (c:Client) WHERE c.target_service = $target "
|
|
1253
|
+
f"AND (c.path STARTS WITH $path OR c.path_template STARTS WITH $path) "
|
|
1254
|
+
f"RETURN {_CLIENT_RESOLVE_RETURN} LIMIT $lim",
|
|
1255
|
+
{"target": target, "path": path_prefix, "lim": lim},
|
|
1256
|
+
)
|
|
1257
|
+
for row in rows:
|
|
1258
|
+
spec = len(path_prefix)
|
|
1259
|
+
out.append((_node_ref_from_row("client", row), "client_target_path", spec))
|
|
1260
|
+
elif not identifier.startswith("/"):
|
|
1261
|
+
rows = g._rows( # noqa: SLF001
|
|
1262
|
+
f"MATCH (c:Client) WHERE c.target_service = $target RETURN {_CLIENT_RESOLVE_RETURN} LIMIT $lim",
|
|
1263
|
+
{"target": identifier, "lim": lim},
|
|
1264
|
+
)
|
|
1265
|
+
for row in rows:
|
|
1266
|
+
out.append((_node_ref_from_row("client", row), "client_target", len(identifier)))
|
|
1267
|
+
|
|
1268
|
+
return out
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
def _resolve_producer_candidates(
|
|
1272
|
+
g: KuzuGraph,
|
|
1273
|
+
identifier: str,
|
|
1274
|
+
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1275
|
+
out: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
1276
|
+
lim = _RESOLVE_PRE_DEDUP_LIMIT
|
|
1277
|
+
|
|
1278
|
+
rows = g._rows( # noqa: SLF001
|
|
1279
|
+
f"MATCH (p:Producer) WHERE p.id = $id RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim",
|
|
1280
|
+
{"id": identifier, "lim": lim},
|
|
1281
|
+
)
|
|
1282
|
+
for row in rows:
|
|
1283
|
+
out.append((_node_ref_from_row("producer", row), "exact_id", len(identifier)))
|
|
1284
|
+
|
|
1285
|
+
rows = g._rows( # noqa: SLF001
|
|
1286
|
+
f"MATCH (p:Producer) WHERE p.topic = $topic RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim",
|
|
1287
|
+
{"topic": identifier, "lim": lim},
|
|
1288
|
+
)
|
|
1289
|
+
for row in rows:
|
|
1290
|
+
out.append((_node_ref_from_row("producer", row), "producer_topic", len(identifier)))
|
|
1291
|
+
|
|
1292
|
+
if not identifier.startswith("/"):
|
|
1293
|
+
rows = g._rows( # noqa: SLF001
|
|
1294
|
+
f"MATCH (p:Producer) WHERE p.topic STARTS WITH $topic RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim",
|
|
1295
|
+
{"topic": identifier, "lim": lim},
|
|
1296
|
+
)
|
|
1297
|
+
for row in rows:
|
|
1298
|
+
out.append((_node_ref_from_row("producer", row), "producer_topic_prefix", len(identifier)))
|
|
1299
|
+
|
|
1300
|
+
return out
|
|
1301
|
+
|
|
1302
|
+
|
|
1303
|
+
def _resolve_dedupe_candidates(
|
|
1304
|
+
raw: list[tuple[NodeRef, ResolveReason, int]],
|
|
1305
|
+
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1306
|
+
best: dict[str, tuple[NodeRef, ResolveReason, int]] = {}
|
|
1307
|
+
for node, reason, specificity in raw:
|
|
1308
|
+
prev = best.get(node.id)
|
|
1309
|
+
if prev is None:
|
|
1310
|
+
best[node.id] = (node, reason, specificity)
|
|
1311
|
+
continue
|
|
1312
|
+
prev_pri = _RESOLVE_REASON_PRIORITY[prev[1]]
|
|
1313
|
+
new_pri = _RESOLVE_REASON_PRIORITY[reason]
|
|
1314
|
+
if new_pri < prev_pri or (new_pri == prev_pri and specificity > prev[2]):
|
|
1315
|
+
best[node.id] = (node, reason, specificity)
|
|
1316
|
+
return list(best.values())
|
|
1317
|
+
|
|
1318
|
+
|
|
1319
|
+
def _resolve_rank_candidates(
|
|
1320
|
+
deduped: list[tuple[NodeRef, ResolveReason, int]],
|
|
1321
|
+
) -> list[ResolveCandidate]:
|
|
1322
|
+
ordered = sorted(
|
|
1323
|
+
deduped,
|
|
1324
|
+
key=lambda item: (_RESOLVE_REASON_PRIORITY[item[1]], -item[2], item[0].id),
|
|
1325
|
+
)
|
|
1326
|
+
total = len(ordered)
|
|
1327
|
+
return [
|
|
1328
|
+
ResolveCandidate(
|
|
1329
|
+
node=node,
|
|
1330
|
+
reason=reason,
|
|
1331
|
+
score=(1.0 - (idx / total)) if total else 0.0,
|
|
1332
|
+
)
|
|
1333
|
+
for idx, (node, reason, _spec) in enumerate(ordered)
|
|
1334
|
+
]
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
def _resolve_assert_invariants(out: ResolveOutput) -> None:
|
|
1338
|
+
if not out.success:
|
|
1339
|
+
assert out.status == "none"
|
|
1340
|
+
assert out.node is None
|
|
1341
|
+
assert not out.candidates
|
|
1342
|
+
assert out.message
|
|
1343
|
+
return
|
|
1344
|
+
if out.status == "one":
|
|
1345
|
+
assert out.node is not None
|
|
1346
|
+
assert not out.candidates
|
|
1347
|
+
elif out.status == "many":
|
|
1348
|
+
assert out.node is None
|
|
1349
|
+
assert len(out.candidates) >= 2
|
|
1350
|
+
elif out.status == "none":
|
|
1351
|
+
assert out.node is None
|
|
1352
|
+
assert not out.candidates
|
|
1353
|
+
assert out.message
|
|
1354
|
+
|
|
1355
|
+
|
|
1356
|
+
def _resolve_seeds_for_hints(identifier: str) -> tuple[str | None, str | None]:
|
|
1357
|
+
path_prefix_seed: str | None = None
|
|
1358
|
+
method_path = _resolve_parse_route_method_path(identifier)
|
|
1359
|
+
if method_path is not None:
|
|
1360
|
+
path_prefix_seed = method_path[1]
|
|
1361
|
+
else:
|
|
1362
|
+
ms_route = _resolve_parse_microservice_route(identifier)
|
|
1363
|
+
if ms_route is not None:
|
|
1364
|
+
path_prefix_seed = ms_route[2]
|
|
1365
|
+
elif identifier.startswith("/"):
|
|
1366
|
+
path_prefix_seed = identifier
|
|
1367
|
+
|
|
1368
|
+
target_service_seed: str | None = None
|
|
1369
|
+
if " " in identifier:
|
|
1370
|
+
target, _path_prefix = identifier.split(" ", 1)
|
|
1371
|
+
target = target.strip()
|
|
1372
|
+
if target:
|
|
1373
|
+
target_service_seed = target
|
|
1374
|
+
elif not identifier.startswith("/"):
|
|
1375
|
+
target_service_seed = identifier
|
|
1376
|
+
|
|
1377
|
+
return path_prefix_seed, target_service_seed
|
|
1378
|
+
|
|
1379
|
+
|
|
1380
|
+
def _resolve_finalize_success(
|
|
1381
|
+
trimmed: str,
|
|
1382
|
+
hint_kind: Literal["symbol", "route", "client", "producer"] | None,
|
|
1383
|
+
matches: list[ResolveCandidate],
|
|
1384
|
+
) -> ResolveOutput:
|
|
1385
|
+
if not matches:
|
|
1386
|
+
out = ResolveOutput(
|
|
1387
|
+
success=True,
|
|
1388
|
+
status="none",
|
|
1389
|
+
message=(
|
|
1390
|
+
"No matches for identifier; use search(query=...) for ranked fuzzy lookup."
|
|
1391
|
+
),
|
|
1392
|
+
resolved_identifier=trimmed,
|
|
1393
|
+
)
|
|
1394
|
+
elif len(matches) == 1:
|
|
1395
|
+
out = ResolveOutput(
|
|
1396
|
+
success=True,
|
|
1397
|
+
status="one",
|
|
1398
|
+
node=matches[0].node,
|
|
1399
|
+
resolved_identifier=trimmed,
|
|
1400
|
+
)
|
|
1401
|
+
else:
|
|
1402
|
+
out = ResolveOutput(
|
|
1403
|
+
success=True,
|
|
1404
|
+
status="many",
|
|
1405
|
+
candidates=matches,
|
|
1406
|
+
resolved_identifier=trimmed,
|
|
1407
|
+
)
|
|
1408
|
+
|
|
1409
|
+
path_prefix_seed, target_service_seed = _resolve_seeds_for_hints(trimmed)
|
|
1410
|
+
hint_payload = {
|
|
1411
|
+
"status": out.status,
|
|
1412
|
+
"resolved_identifier": trimmed,
|
|
1413
|
+
"candidates": out.candidates,
|
|
1414
|
+
"hint_kind": hint_kind,
|
|
1415
|
+
"path_prefix_seed": path_prefix_seed,
|
|
1416
|
+
"target_service_seed": target_service_seed,
|
|
1417
|
+
}
|
|
1418
|
+
out = out.model_copy(update={"hints": generate_hints("resolve", hint_payload)})
|
|
1419
|
+
_resolve_assert_invariants(out)
|
|
1420
|
+
return out
|
|
1421
|
+
|
|
1422
|
+
|
|
1423
|
+
def resolve_v2(
|
|
1424
|
+
identifier: str,
|
|
1425
|
+
hint_kind: Literal["symbol", "route", "client", "producer"] | None = None,
|
|
1426
|
+
graph: KuzuGraph | None = None,
|
|
1427
|
+
) -> ResolveOutput:
|
|
1428
|
+
try:
|
|
1429
|
+
trimmed, err = _resolve_validate_identifier(identifier)
|
|
1430
|
+
if err is not None:
|
|
1431
|
+
out = ResolveOutput(
|
|
1432
|
+
success=False,
|
|
1433
|
+
status="none",
|
|
1434
|
+
message=err,
|
|
1435
|
+
hints=[],
|
|
1436
|
+
resolved_identifier=None,
|
|
1437
|
+
)
|
|
1438
|
+
_resolve_assert_invariants(out)
|
|
1439
|
+
return out
|
|
1440
|
+
|
|
1441
|
+
assert trimmed is not None
|
|
1442
|
+
if "*" in trimmed or "?" in trimmed:
|
|
1443
|
+
return _resolve_finalize_success(trimmed, hint_kind, [])
|
|
1444
|
+
|
|
1445
|
+
g = graph or KuzuGraph.get()
|
|
1446
|
+
raw: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
1447
|
+
for kind in _resolve_kinds_to_search(hint_kind):
|
|
1448
|
+
if kind == "symbol":
|
|
1449
|
+
raw.extend(_resolve_symbol_candidates(g, trimmed))
|
|
1450
|
+
elif kind == "route":
|
|
1451
|
+
raw.extend(_resolve_route_candidates(g, trimmed))
|
|
1452
|
+
elif kind == "client":
|
|
1453
|
+
raw.extend(_resolve_client_candidates(g, trimmed))
|
|
1454
|
+
else:
|
|
1455
|
+
raw.extend(_resolve_producer_candidates(g, trimmed))
|
|
1456
|
+
|
|
1457
|
+
deduped = _resolve_dedupe_candidates(raw)
|
|
1458
|
+
ranked = _resolve_rank_candidates(deduped)
|
|
1459
|
+
capped = ranked[:_RESOLVE_CANDIDATE_CAP]
|
|
1460
|
+
return _resolve_finalize_success(trimmed, hint_kind, capped)
|
|
1461
|
+
except Exception as exc:
|
|
1462
|
+
out = ResolveOutput(
|
|
1463
|
+
success=False,
|
|
1464
|
+
status="none",
|
|
1465
|
+
message=str(exc),
|
|
1466
|
+
hints=[],
|
|
1467
|
+
resolved_identifier=None,
|
|
1468
|
+
)
|
|
1469
|
+
_resolve_assert_invariants(out)
|
|
1470
|
+
return out
|
|
1471
|
+
|
|
1472
|
+
|
|
1473
|
+
def _neighbor_edge_attrs(row: dict[str, Any]) -> dict[str, Any]:
|
|
1474
|
+
attrs = {
|
|
1475
|
+
k: v
|
|
1476
|
+
for k, v in row.items()
|
|
1477
|
+
if k not in {"other_id", "edge_type", "stored_edge_type"}
|
|
1478
|
+
and v not in (None, "")
|
|
1479
|
+
}
|
|
1480
|
+
attrs.setdefault("row_kind", "resolved")
|
|
1481
|
+
return attrs
|
|
1482
|
+
|
|
1483
|
+
|
|
1484
|
+
def _unresolved_site_to_edge(origin_id: str, row: dict[str, Any]) -> Edge:
|
|
1485
|
+
ucs_id = str(row.get("id") or "")
|
|
1486
|
+
callee = str(row.get("callee_simple") or "")
|
|
1487
|
+
line = int(row.get("call_site_line") or 0)
|
|
1488
|
+
byte = int(row.get("call_site_byte") or 0)
|
|
1489
|
+
return Edge(
|
|
1490
|
+
origin_id=origin_id,
|
|
1491
|
+
edge_type="CALLS",
|
|
1492
|
+
direction="out",
|
|
1493
|
+
other=NodeRef(id=ucs_id, kind="unresolved_call_site", fqn="", name=callee),
|
|
1494
|
+
attrs={
|
|
1495
|
+
"row_kind": "unresolved_call_site",
|
|
1496
|
+
"unresolved_call_site_id": ucs_id,
|
|
1497
|
+
"reason": str(row.get("reason") or ""),
|
|
1498
|
+
"call_site_line": line,
|
|
1499
|
+
"call_site_byte": byte,
|
|
1500
|
+
"arg_count": int(row.get("arg_count") or 0),
|
|
1501
|
+
"callee_simple": callee,
|
|
1502
|
+
"receiver_expr": str(row.get("receiver_expr") or ""),
|
|
1503
|
+
},
|
|
1504
|
+
)
|
|
1505
|
+
|
|
1506
|
+
|
|
1507
|
+
def _calls_transcript_sort_key(edge: Edge) -> tuple[int, int, int]:
|
|
1508
|
+
attrs = edge.attrs or {}
|
|
1509
|
+
line = int(attrs.get("call_site_line") or 0)
|
|
1510
|
+
byte = int(attrs.get("call_site_byte") or 0)
|
|
1511
|
+
kind_rank = 0 if str(attrs.get("row_kind") or "resolved") == "resolved" else 1
|
|
1512
|
+
return (line, byte, kind_rank)
|
|
1513
|
+
|
|
1514
|
+
|
|
1515
|
+
def _dedup_call_edges(edges: list[Edge]) -> list[Edge]:
|
|
1516
|
+
"""Collapse resolved CALLS rows sharing (origin_id, other.id); unresolved rows pass through."""
|
|
1517
|
+
resolved: list[Edge] = []
|
|
1518
|
+
unresolved: list[Edge] = []
|
|
1519
|
+
for e in edges:
|
|
1520
|
+
if str((e.attrs or {}).get("row_kind") or "resolved") == "unresolved_call_site":
|
|
1521
|
+
unresolved.append(e)
|
|
1522
|
+
else:
|
|
1523
|
+
resolved.append(e)
|
|
1524
|
+
groups: dict[tuple[str, str], list[Edge]] = {}
|
|
1525
|
+
for e in resolved:
|
|
1526
|
+
key = (e.origin_id, e.other.id)
|
|
1527
|
+
groups.setdefault(key, []).append(e)
|
|
1528
|
+
collapsed: list[Edge] = []
|
|
1529
|
+
for group in groups.values():
|
|
1530
|
+
ordered = sorted(group, key=_calls_transcript_sort_key)
|
|
1531
|
+
canonical = ordered[0]
|
|
1532
|
+
lines = sorted(
|
|
1533
|
+
{int((x.attrs or {}).get("call_site_line") or 0) for x in group},
|
|
1534
|
+
)
|
|
1535
|
+
attrs = dict(canonical.attrs or {})
|
|
1536
|
+
attrs["call_site_count"] = len(group)
|
|
1537
|
+
attrs["call_site_lines"] = lines
|
|
1538
|
+
collapsed.append(canonical.model_copy(update={"attrs": attrs}))
|
|
1539
|
+
merged = collapsed + unresolved
|
|
1540
|
+
merged.sort(key=_calls_transcript_sort_key)
|
|
1541
|
+
return merged
|
|
1542
|
+
|
|
1543
|
+
|
|
1544
|
+
def _edgefilter_pushdown_kwargs(ef: EdgeFilter | None) -> dict[str, Any]:
|
|
1545
|
+
if ef is None:
|
|
1546
|
+
return {}
|
|
1547
|
+
return {
|
|
1548
|
+
"min_confidence": ef.min_confidence,
|
|
1549
|
+
"include_strategies": ef.include_strategies,
|
|
1550
|
+
"exclude_strategies": ef.exclude_strategies,
|
|
1551
|
+
"callee_declaring_role": ef.callee_declaring_role,
|
|
1552
|
+
"callee_declaring_roles": ef.callee_declaring_roles,
|
|
1553
|
+
"exclude_callee_declaring_roles": ef.exclude_callee_declaring_roles,
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
|
|
1557
|
+
def _rows_to_call_edges(
|
|
1558
|
+
g: Any,
|
|
1559
|
+
*,
|
|
1560
|
+
origin_id: str,
|
|
1561
|
+
direction: Literal["in", "out"],
|
|
1562
|
+
rows: list[dict[str, Any]],
|
|
1563
|
+
nf: NodeFilter | None,
|
|
1564
|
+
) -> list[Edge]:
|
|
1565
|
+
edges: list[Edge] = []
|
|
1566
|
+
for row in rows:
|
|
1567
|
+
other_id = str(row.get("other_id") or "")
|
|
1568
|
+
other_kind = _resolve_node_kind(g, other_id)
|
|
1569
|
+
other_rec = _load_node_record(g, other_id, other_kind)
|
|
1570
|
+
if other_rec is None:
|
|
1571
|
+
continue
|
|
1572
|
+
if nf and (err := _nodefilter_applicability_error(other_kind, nf)):
|
|
1573
|
+
_log_fail_loud("applicability")
|
|
1574
|
+
raise ValueError(err)
|
|
1575
|
+
if not _node_matches_filter(other_kind, other_rec, nf):
|
|
1576
|
+
continue
|
|
1577
|
+
edges.append(
|
|
1578
|
+
Edge(
|
|
1579
|
+
origin_id=origin_id,
|
|
1580
|
+
edge_type=str(row.get("edge_type") or "CALLS"),
|
|
1581
|
+
direction=direction,
|
|
1582
|
+
other=_node_ref_from_row(other_kind, other_rec),
|
|
1583
|
+
attrs=_neighbor_edge_attrs(row),
|
|
1584
|
+
)
|
|
1585
|
+
)
|
|
1586
|
+
return edges
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
def _neighbors_calls_for_origin(
|
|
1590
|
+
g: Any,
|
|
1591
|
+
origin_id: str,
|
|
1592
|
+
*,
|
|
1593
|
+
direction: Literal["in", "out"],
|
|
1594
|
+
nf: NodeFilter | None,
|
|
1595
|
+
ef: EdgeFilter | None,
|
|
1596
|
+
offset: int,
|
|
1597
|
+
limit: int | None,
|
|
1598
|
+
include_unresolved: bool = False,
|
|
1599
|
+
dedup_calls: bool = False,
|
|
1600
|
+
) -> list[Edge]:
|
|
1601
|
+
pushdown = _edgefilter_pushdown_kwargs(ef)
|
|
1602
|
+
needs_full_stream = (
|
|
1603
|
+
nf is not None
|
|
1604
|
+
or dedup_calls
|
|
1605
|
+
or include_unresolved
|
|
1606
|
+
or limit is None
|
|
1607
|
+
)
|
|
1608
|
+
sql_pagination = not needs_full_stream and limit is not None
|
|
1609
|
+
if sql_pagination:
|
|
1610
|
+
rows = g.neighbor_calls_for_symbol(
|
|
1611
|
+
origin_id,
|
|
1612
|
+
direction=direction,
|
|
1613
|
+
offset=offset,
|
|
1614
|
+
limit=limit,
|
|
1615
|
+
sql_pagination=True,
|
|
1616
|
+
**pushdown,
|
|
1617
|
+
)
|
|
1618
|
+
return _rows_to_call_edges(g, origin_id=origin_id, direction=direction, rows=rows, nf=nf)
|
|
1619
|
+
rows = g.neighbor_calls_for_symbol(
|
|
1620
|
+
origin_id,
|
|
1621
|
+
direction=direction,
|
|
1622
|
+
offset=0,
|
|
1623
|
+
limit=None,
|
|
1624
|
+
sql_pagination=False,
|
|
1625
|
+
**pushdown,
|
|
1626
|
+
)
|
|
1627
|
+
edges = _rows_to_call_edges(g, origin_id=origin_id, direction=direction, rows=rows, nf=nf)
|
|
1628
|
+
if include_unresolved and direction == "out":
|
|
1629
|
+
ucs_rows = g.unresolved_sites_for_caller(origin_id, direction=direction)
|
|
1630
|
+
edges.extend(_unresolved_site_to_edge(origin_id, r) for r in ucs_rows)
|
|
1631
|
+
edges.sort(key=_calls_transcript_sort_key)
|
|
1632
|
+
if dedup_calls:
|
|
1633
|
+
edges = _dedup_call_edges(edges)
|
|
1634
|
+
if limit is None:
|
|
1635
|
+
return edges
|
|
1636
|
+
return edges[offset : offset + limit]
|
|
1637
|
+
|
|
1638
|
+
|
|
1639
|
+
def _composed_axis_origin_error(
|
|
1640
|
+
*,
|
|
1641
|
+
symbol_kind: str,
|
|
1642
|
+
modifiers: list[str] | None,
|
|
1643
|
+
declares_composed: list[str],
|
|
1644
|
+
override_composed: list[str],
|
|
1645
|
+
) -> str | None:
|
|
1646
|
+
"""Fail-fast origin gate for composed DECLARES.* vs OVERRIDDEN_BY.* families."""
|
|
1647
|
+
if declares_composed and symbol_kind not in _TYPE_SYMBOL_KINDS_FOR_EDGE_ROLLUP:
|
|
1648
|
+
return f"Composed edge types ({declares_composed[0]}) require a type Symbol origin"
|
|
1649
|
+
if override_composed:
|
|
1650
|
+
key = override_composed[0]
|
|
1651
|
+
mods = modifiers or []
|
|
1652
|
+
if symbol_kind == "constructor":
|
|
1653
|
+
return (
|
|
1654
|
+
f"Composed edge types ({key}) require a non-static method Symbol origin "
|
|
1655
|
+
"(constructors are not supported)"
|
|
1656
|
+
)
|
|
1657
|
+
if symbol_kind not in _METHOD_SYMBOL_KINDS_FOR_OVERRIDE_ROLLUP:
|
|
1658
|
+
return f"Composed edge types ({key}) require a method Symbol origin"
|
|
1659
|
+
if "static" in mods:
|
|
1660
|
+
return (
|
|
1661
|
+
f"Composed edge types ({key}) require a non-static method Symbol origin "
|
|
1662
|
+
"(static methods are not supported)"
|
|
1663
|
+
)
|
|
1664
|
+
return None
|
|
1665
|
+
|
|
1666
|
+
|
|
1667
|
+
@validate_call(config={"arbitrary_types_allowed": True})
|
|
1668
|
+
def neighbors_v2(
|
|
1669
|
+
ids: str | list[str],
|
|
1670
|
+
# Required fields are intentional: direct Python calls and MCP-bound calls
|
|
1671
|
+
# share the same validation contract through @validate_call.
|
|
1672
|
+
direction: Literal["in", "out"] = Field(...),
|
|
1673
|
+
edge_types: list[NeighborEdgeType] = Field(...),
|
|
1674
|
+
limit: int = 25,
|
|
1675
|
+
offset: int = 0,
|
|
1676
|
+
filter: NodeFilter | dict[str, Any] | str | None = None,
|
|
1677
|
+
edge_filter: EdgeFilter | dict[str, Any] | str | None = None,
|
|
1678
|
+
include_unresolved: bool = False,
|
|
1679
|
+
dedup_calls: bool = False,
|
|
1680
|
+
graph: Any | None = None,
|
|
1681
|
+
) -> NeighborsOutput:
|
|
1682
|
+
try:
|
|
1683
|
+
validated_types = _NEIGHBOR_EDGE_TYPES_ADAPTER.validate_python(edge_types)
|
|
1684
|
+
requested_edge_types = list(dict.fromkeys(validated_types))
|
|
1685
|
+
flat_labels = [et for et in requested_edge_types if et not in _COMPOSED_EDGE_TYPES]
|
|
1686
|
+
composed_keys = [et for et in requested_edge_types if et in _COMPOSED_EDGE_TYPES]
|
|
1687
|
+
declares_composed = [k for k in composed_keys if k in _MEMBER_COMPOSED_EDGE_TYPES]
|
|
1688
|
+
override_composed = [k for k in composed_keys if k in _OVERRIDE_COMPOSED_EDGE_TYPES]
|
|
1689
|
+
ordered_composed = declares_composed + override_composed
|
|
1690
|
+
g = graph or KuzuGraph.get()
|
|
1691
|
+
try:
|
|
1692
|
+
raw_filter = _coerce_filter(filter)
|
|
1693
|
+
nf = (
|
|
1694
|
+
NodeFilter.model_validate(raw_filter)
|
|
1695
|
+
if raw_filter is not None and not isinstance(raw_filter, NodeFilter)
|
|
1696
|
+
else raw_filter
|
|
1697
|
+
)
|
|
1698
|
+
except ValidationError as exc:
|
|
1699
|
+
_log_fail_loud("unknown_key")
|
|
1700
|
+
return NeighborsOutput(
|
|
1701
|
+
success=False,
|
|
1702
|
+
message=_filter_validation_error_message(exc),
|
|
1703
|
+
hints=[],
|
|
1704
|
+
requested_edge_types=[],
|
|
1705
|
+
)
|
|
1706
|
+
try:
|
|
1707
|
+
raw_edge_filter = _coerce_edge_filter(edge_filter)
|
|
1708
|
+
ef = (
|
|
1709
|
+
EdgeFilter.model_validate(raw_edge_filter)
|
|
1710
|
+
if raw_edge_filter is not None and not isinstance(raw_edge_filter, EdgeFilter)
|
|
1711
|
+
else raw_edge_filter
|
|
1712
|
+
)
|
|
1713
|
+
except ValidationError as exc:
|
|
1714
|
+
_log_fail_loud("edge_filter")
|
|
1715
|
+
return NeighborsOutput(
|
|
1716
|
+
success=False,
|
|
1717
|
+
message=_filter_validation_error_message(exc),
|
|
1718
|
+
hints=[],
|
|
1719
|
+
requested_edge_types=[],
|
|
1720
|
+
)
|
|
1721
|
+
except ValueError as exc:
|
|
1722
|
+
_log_fail_loud("edge_filter")
|
|
1723
|
+
return NeighborsOutput(success=False, message=str(exc), hints=[], requested_edge_types=[])
|
|
1724
|
+
if include_unresolved and ef is not None:
|
|
1725
|
+
return NeighborsOutput(
|
|
1726
|
+
success=False,
|
|
1727
|
+
message=(
|
|
1728
|
+
"include_unresolved=True is incompatible with edge_filter; "
|
|
1729
|
+
"UnresolvedCallSite rows have no edge attributes to filter on"
|
|
1730
|
+
),
|
|
1731
|
+
hints=[],
|
|
1732
|
+
requested_edge_types=requested_edge_types,
|
|
1733
|
+
)
|
|
1734
|
+
if include_unresolved and requested_edge_types != ["CALLS"]:
|
|
1735
|
+
return NeighborsOutput(
|
|
1736
|
+
success=False,
|
|
1737
|
+
message="include_unresolved requires edge_types=['CALLS']",
|
|
1738
|
+
hints=[],
|
|
1739
|
+
requested_edge_types=requested_edge_types,
|
|
1740
|
+
)
|
|
1741
|
+
if include_unresolved and direction != "out":
|
|
1742
|
+
return NeighborsOutput(
|
|
1743
|
+
success=False,
|
|
1744
|
+
message='include_unresolved requires direction="out"',
|
|
1745
|
+
hints=[],
|
|
1746
|
+
requested_edge_types=requested_edge_types,
|
|
1747
|
+
)
|
|
1748
|
+
if ef and (err := _edgefilter_applicability_error(requested_edge_types, ef)):
|
|
1749
|
+
_log_fail_loud("edge_filter")
|
|
1750
|
+
return NeighborsOutput(
|
|
1751
|
+
success=False,
|
|
1752
|
+
message=err,
|
|
1753
|
+
hints=[],
|
|
1754
|
+
requested_edge_types=requested_edge_types,
|
|
1755
|
+
)
|
|
1756
|
+
if nf and (err := _validate_no_wildcards(nf)):
|
|
1757
|
+
_log_fail_loud("wildcard")
|
|
1758
|
+
return NeighborsOutput(success=False, message=err, hints=[], requested_edge_types=[])
|
|
1759
|
+
if composed_keys and direction != "out":
|
|
1760
|
+
return NeighborsOutput(
|
|
1761
|
+
success=False,
|
|
1762
|
+
message='Composed edge types require direction="out"',
|
|
1763
|
+
hints=[],
|
|
1764
|
+
requested_edge_types=requested_edge_types,
|
|
1765
|
+
)
|
|
1766
|
+
use_calls_path = flat_labels == ["CALLS"] and not composed_keys
|
|
1767
|
+
origins = [ids] if isinstance(ids, str) else list(ids)
|
|
1768
|
+
results: list[Edge] = []
|
|
1769
|
+
unfiltered_calls_count: int | None = None
|
|
1770
|
+
unresolved_count: int | None = None
|
|
1771
|
+
calls_row_count: int | None = None
|
|
1772
|
+
if use_calls_path and len(origins) == 1 and direction == "out":
|
|
1773
|
+
unresolved_count = g.count_unresolved_for_caller(origins[0])
|
|
1774
|
+
calls_row_count = g.count_calls_for_symbol(origins[0], direction=direction)
|
|
1775
|
+
for origin_id in origins:
|
|
1776
|
+
origin_kind = _resolve_node_kind(g, origin_id)
|
|
1777
|
+
if ordered_composed:
|
|
1778
|
+
if origin_kind != "symbol":
|
|
1779
|
+
first_key = ordered_composed[0]
|
|
1780
|
+
axis_msg = (
|
|
1781
|
+
f"Composed edge types ({first_key}) require a method Symbol origin"
|
|
1782
|
+
if first_key in _OVERRIDE_COMPOSED_EDGE_TYPES
|
|
1783
|
+
else f"Composed edge types ({first_key}) require a type Symbol origin"
|
|
1784
|
+
)
|
|
1785
|
+
return NeighborsOutput(
|
|
1786
|
+
success=False,
|
|
1787
|
+
message=axis_msg,
|
|
1788
|
+
hints=[],
|
|
1789
|
+
requested_edge_types=requested_edge_types,
|
|
1790
|
+
)
|
|
1791
|
+
origin_row = _load_node_record(g, origin_id, "symbol")
|
|
1792
|
+
sym_kind = str((origin_row or {}).get("kind") or "")
|
|
1793
|
+
mods_raw = (origin_row or {}).get("modifiers")
|
|
1794
|
+
mods = mods_raw if isinstance(mods_raw, list) else None
|
|
1795
|
+
if err := _composed_axis_origin_error(
|
|
1796
|
+
symbol_kind=sym_kind,
|
|
1797
|
+
modifiers=mods,
|
|
1798
|
+
declares_composed=declares_composed,
|
|
1799
|
+
override_composed=override_composed,
|
|
1800
|
+
):
|
|
1801
|
+
return NeighborsOutput(
|
|
1802
|
+
success=False,
|
|
1803
|
+
message=err,
|
|
1804
|
+
hints=[],
|
|
1805
|
+
requested_edge_types=requested_edge_types,
|
|
1806
|
+
)
|
|
1807
|
+
if use_calls_path:
|
|
1808
|
+
paginate_in_sql = (
|
|
1809
|
+
len(origins) == 1
|
|
1810
|
+
and nf is None
|
|
1811
|
+
and not include_unresolved
|
|
1812
|
+
and not dedup_calls
|
|
1813
|
+
)
|
|
1814
|
+
try:
|
|
1815
|
+
origin_edges = _neighbors_calls_for_origin(
|
|
1816
|
+
g,
|
|
1817
|
+
origin_id,
|
|
1818
|
+
direction=direction,
|
|
1819
|
+
nf=nf,
|
|
1820
|
+
ef=ef,
|
|
1821
|
+
offset=offset if paginate_in_sql else 0,
|
|
1822
|
+
limit=limit if paginate_in_sql else None,
|
|
1823
|
+
include_unresolved=include_unresolved,
|
|
1824
|
+
dedup_calls=dedup_calls,
|
|
1825
|
+
)
|
|
1826
|
+
except ValueError as exc:
|
|
1827
|
+
return NeighborsOutput(
|
|
1828
|
+
success=False,
|
|
1829
|
+
message=str(exc),
|
|
1830
|
+
hints=[],
|
|
1831
|
+
requested_edge_types=requested_edge_types,
|
|
1832
|
+
)
|
|
1833
|
+
if (
|
|
1834
|
+
ef is not None
|
|
1835
|
+
and ef.callee_declaring_role in _ROLE_FILTER_OTHER_FALLBACK_VALUES
|
|
1836
|
+
and not origin_edges
|
|
1837
|
+
and unfiltered_calls_count is None
|
|
1838
|
+
):
|
|
1839
|
+
unfiltered_calls_count = g.count_calls_for_symbol(origin_id, direction=direction)
|
|
1840
|
+
results.extend(origin_edges)
|
|
1841
|
+
continue
|
|
1842
|
+
if flat_labels:
|
|
1843
|
+
# Kuzu 0.11.x can drop `label(e) IN $list` in WHERE; use OR of scalar equalities.
|
|
1844
|
+
label_params = [f"l{i}" for i in range(len(flat_labels))]
|
|
1845
|
+
label_predicate = "(" + " OR ".join(f"label(e) = ${name}" for name in label_params) + ")"
|
|
1846
|
+
q_params = {"id": origin_id, **dict(zip(label_params, flat_labels, strict=True))}
|
|
1847
|
+
if direction == "out":
|
|
1848
|
+
rows = g._rows( # noqa: SLF001
|
|
1849
|
+
"MATCH (a)-[e]->(b) WHERE a.id = $id AND "
|
|
1850
|
+
f"{label_predicate} "
|
|
1851
|
+
"RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
|
|
1852
|
+
"e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
|
|
1853
|
+
"e.annotation AS annotation, e.field_or_param AS field_or_param, "
|
|
1854
|
+
"e.source AS source, e.call_site_line AS call_site_line, "
|
|
1855
|
+
"e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
|
|
1856
|
+
"e.resolved AS resolved",
|
|
1857
|
+
q_params,
|
|
1858
|
+
)
|
|
1859
|
+
else:
|
|
1860
|
+
rows = g._rows( # noqa: SLF001
|
|
1861
|
+
"MATCH (a)<-[e]-(b) WHERE a.id = $id AND "
|
|
1862
|
+
f"{label_predicate} "
|
|
1863
|
+
"RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
|
|
1864
|
+
"e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
|
|
1865
|
+
"e.annotation AS annotation, e.field_or_param AS field_or_param, "
|
|
1866
|
+
"e.source AS source, e.call_site_line AS call_site_line, "
|
|
1867
|
+
"e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
|
|
1868
|
+
"e.resolved AS resolved",
|
|
1869
|
+
q_params,
|
|
1870
|
+
)
|
|
1871
|
+
for row in rows:
|
|
1872
|
+
other_id = str(row.get("other_id") or "")
|
|
1873
|
+
other_kind = _resolve_node_kind(g, other_id)
|
|
1874
|
+
other_rec = _load_node_record(g, other_id, other_kind)
|
|
1875
|
+
if other_rec is None:
|
|
1876
|
+
continue
|
|
1877
|
+
if nf and (err := _nodefilter_applicability_error(other_kind, nf)):
|
|
1878
|
+
_log_fail_loud("applicability")
|
|
1879
|
+
return NeighborsOutput(
|
|
1880
|
+
success=False, message=err, hints=[], requested_edge_types=[]
|
|
1881
|
+
)
|
|
1882
|
+
if not _node_matches_filter(other_kind, other_rec, nf):
|
|
1883
|
+
continue
|
|
1884
|
+
results.append(
|
|
1885
|
+
Edge(
|
|
1886
|
+
origin_id=origin_id,
|
|
1887
|
+
edge_type=str(row.get("edge_type") or ""),
|
|
1888
|
+
direction=direction,
|
|
1889
|
+
other=_node_ref_from_row(other_kind, other_rec),
|
|
1890
|
+
attrs=_neighbor_edge_attrs(row),
|
|
1891
|
+
)
|
|
1892
|
+
)
|
|
1893
|
+
for composed_key in ordered_composed:
|
|
1894
|
+
if composed_key in _MEMBER_COMPOSED_EDGE_TYPES:
|
|
1895
|
+
traversal_rows = g.member_edge_traversal_for(origin_id, composed_key)
|
|
1896
|
+
else:
|
|
1897
|
+
traversal_rows = g.override_axis_traversal_for(origin_id, composed_key)
|
|
1898
|
+
for row in traversal_rows:
|
|
1899
|
+
other_id = str(row.get("other_id") or "")
|
|
1900
|
+
other_kind = _resolve_node_kind(g, other_id)
|
|
1901
|
+
other_rec = _load_node_record(g, other_id, other_kind)
|
|
1902
|
+
if other_rec is None:
|
|
1903
|
+
continue
|
|
1904
|
+
if nf and (err := _nodefilter_applicability_error(other_kind, nf)):
|
|
1905
|
+
_log_fail_loud("applicability")
|
|
1906
|
+
return NeighborsOutput(
|
|
1907
|
+
success=False, message=err, hints=[], requested_edge_types=[]
|
|
1908
|
+
)
|
|
1909
|
+
if not _node_matches_filter(other_kind, other_rec, nf):
|
|
1910
|
+
continue
|
|
1911
|
+
if composed_key == "OVERRIDDEN_BY":
|
|
1912
|
+
edge_attrs: dict[str, Any] = {}
|
|
1913
|
+
else:
|
|
1914
|
+
edge_attrs = _neighbor_edge_attrs(row)
|
|
1915
|
+
results.append(
|
|
1916
|
+
Edge(
|
|
1917
|
+
origin_id=origin_id,
|
|
1918
|
+
edge_type=composed_key,
|
|
1919
|
+
direction="out",
|
|
1920
|
+
other=_node_ref_from_row(other_kind, other_rec),
|
|
1921
|
+
attrs=edge_attrs,
|
|
1922
|
+
)
|
|
1923
|
+
)
|
|
1924
|
+
if use_calls_path and len(origins) > 1:
|
|
1925
|
+
sliced = results[offset : offset + limit]
|
|
1926
|
+
else:
|
|
1927
|
+
sliced = results if use_calls_path else results[offset : offset + limit]
|
|
1928
|
+
first_origin = origins[0]
|
|
1929
|
+
origin_kind = _resolve_node_kind(g, first_origin)
|
|
1930
|
+
subject_record = _load_node_record(g, first_origin, origin_kind)
|
|
1931
|
+
neigh_payload = {
|
|
1932
|
+
"success": True,
|
|
1933
|
+
"results": [e.model_dump() for e in sliced],
|
|
1934
|
+
"requested_edge_types": requested_edge_types,
|
|
1935
|
+
"requested_direction": direction,
|
|
1936
|
+
"offset": offset,
|
|
1937
|
+
"origin_id": first_origin,
|
|
1938
|
+
"subject_record": subject_record,
|
|
1939
|
+
"node_filter": nf.model_dump(exclude_none=True) if nf else None,
|
|
1940
|
+
"edge_filter": ef.model_dump(exclude_none=True) if ef else None,
|
|
1941
|
+
"edge_filter_provided": ef is not None,
|
|
1942
|
+
"include_unresolved": include_unresolved,
|
|
1943
|
+
"dedup_calls": dedup_calls,
|
|
1944
|
+
"unfiltered_calls_count": unfiltered_calls_count,
|
|
1945
|
+
"unresolved_count": unresolved_count,
|
|
1946
|
+
"calls_row_count": calls_row_count,
|
|
1947
|
+
}
|
|
1948
|
+
return NeighborsOutput(
|
|
1949
|
+
success=True,
|
|
1950
|
+
results=sliced,
|
|
1951
|
+
requested_edge_types=requested_edge_types,
|
|
1952
|
+
hints=generate_hints("neighbors", neigh_payload),
|
|
1953
|
+
)
|
|
1954
|
+
except ValidationError:
|
|
1955
|
+
raise
|
|
1956
|
+
except Exception as exc:
|
|
1957
|
+
return NeighborsOutput(success=False, message=str(exc), hints=[], requested_edge_types=[])
|