java-codebase-rag 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +2813 -0
- brownfield_events.py +58 -0
- build_ast_graph.py +3081 -0
- chunk_heuristics.py +62 -0
- graph_enrich.py +1681 -0
- index_common.py +10 -0
- java_codebase_rag/__init__.py +1 -0
- java_codebase_rag/cli.py +761 -0
- java_codebase_rag/cli_progress.py +52 -0
- java_codebase_rag/config.py +327 -0
- java_codebase_rag/pipeline.py +189 -0
- java_codebase_rag-0.1.0.dist-info/METADATA +818 -0
- java_codebase_rag-0.1.0.dist-info/RECORD +27 -0
- java_codebase_rag-0.1.0.dist-info/WHEEL +5 -0
- java_codebase_rag-0.1.0.dist-info/entry_points.txt +3 -0
- java_codebase_rag-0.1.0.dist-info/licenses/LICENSE +21 -0
- java_codebase_rag-0.1.0.dist-info/top_level.txt +17 -0
- java_index_flow_lancedb.py +398 -0
- java_index_v1_common.py +33 -0
- java_ontology.py +446 -0
- kuzu_queries.py +1989 -0
- mcp_hints.py +748 -0
- mcp_v2.py +1957 -0
- path_filtering.py +472 -0
- pr_analysis.py +534 -0
- search_lancedb.py +1075 -0
- server.py +578 -0
server.py
ADDED
|
@@ -0,0 +1,578 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""LanceDB code-search MCP (stdio)."""
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
import mcp_v2
|
|
13
|
+
from index_common import SBERT_MODEL
|
|
14
|
+
from java_codebase_rag.cli_progress import (
|
|
15
|
+
accumulate_and_relay_subprocess_streams,
|
|
16
|
+
emit_lance_cocoindex_finish,
|
|
17
|
+
emit_lance_cocoindex_start,
|
|
18
|
+
)
|
|
19
|
+
from java_codebase_rag.config import emit_legacy_env_hints_if_present, resolved_sbert_model_for_process_env
|
|
20
|
+
from kuzu_queries import KuzuGraph, resolve_kuzu_path
|
|
21
|
+
from mcp.server.fastmcp import FastMCP
|
|
22
|
+
from pydantic import BaseModel, Field
|
|
23
|
+
from search_lancedb import TABLES
|
|
24
|
+
|
|
25
|
+
_COCOINDEX_TARGET = "java_index_flow_lancedb.py:JavaCodeIndexLance"
|
|
26
|
+
_INSTRUCTIONS = (
|
|
27
|
+
"Java codebase graph navigator (LanceDB + Kuzu). "
|
|
28
|
+
"Tools: search (NL/code locate), find (structured NodeFilter), describe (one node + edge_summary: stored edge-label counts and optional composed keys for type Symbols and override-axis virtual keys for method Symbols), "
|
|
29
|
+
"neighbors (one hop; you MUST pass direction in|out AND edge_types list — no defaults), "
|
|
30
|
+
"resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses one|many|none). "
|
|
31
|
+
"NodeFilter `filter` is a JSON object (preferred); a JSON-encoded string is also accepted as a fallback. "
|
|
32
|
+
"Unknown filter keys and populated fields not applicable to the effective node kind fail with success=false and message. "
|
|
33
|
+
"Edge labels: EXTENDS, IMPLEMENTS, INJECTS, OVERRIDES, DECLARES, DECLARES_CLIENT, DECLARES_PRODUCER, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS; "
|
|
34
|
+
"type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only). "
|
|
35
|
+
"Reprocess/init, meta, tables, diagnose-ignore, analyze-pr: use java-codebase-rag CLI — not MCP."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GraphMetaOutput(BaseModel):
|
|
40
|
+
success: bool
|
|
41
|
+
enabled: bool
|
|
42
|
+
db_path: str
|
|
43
|
+
ontology_version: int = 0
|
|
44
|
+
built_at: int = 0
|
|
45
|
+
source_root: str = ""
|
|
46
|
+
parse_errors: int = 0
|
|
47
|
+
counts: dict[str, int] = Field(default_factory=dict)
|
|
48
|
+
module_counts: dict[str, int] = Field(default_factory=dict)
|
|
49
|
+
microservice_counts: dict[str, int] = Field(default_factory=dict)
|
|
50
|
+
routes_total: int = 0
|
|
51
|
+
exposes_total: int = 0
|
|
52
|
+
routes_by_framework: dict[str, int] = Field(default_factory=dict)
|
|
53
|
+
routes_resolved_pct: float = 0.0
|
|
54
|
+
routes_from_brownfield_pct: float = 0.0
|
|
55
|
+
routes_by_layer: dict[str, int] = Field(default_factory=dict)
|
|
56
|
+
edge_counts: dict[str, int] = Field(default_factory=dict)
|
|
57
|
+
http_calls_match_breakdown: dict[str, int] = Field(default_factory=dict)
|
|
58
|
+
async_calls_match_breakdown: dict[str, int] = Field(default_factory=dict)
|
|
59
|
+
cross_service_calls_total: int = 0
|
|
60
|
+
cross_service_resolution: str | None = None
|
|
61
|
+
message: str | None = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class RefreshIndexOutput(BaseModel):
|
|
65
|
+
"""Structured result for ``run_refresh_pipeline`` / CLI ``reprocess`` JSON.
|
|
66
|
+
|
|
67
|
+
``phases_run`` records which phase subprocesses actually started; the CLI maps
|
|
68
|
+
failures to exit **2** when it is empty (setup / nothing spawned) and exit **1**
|
|
69
|
+
when it is non-empty (build failure). Callers constructing this model manually
|
|
70
|
+
must set ``phases_run`` accordingly — omitting it leaves the default ``[]``,
|
|
71
|
+
which the CLI treats like a preflight failure.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
success: bool
|
|
75
|
+
exit_code: int | None = None
|
|
76
|
+
stdout: str = ""
|
|
77
|
+
stderr: str = ""
|
|
78
|
+
message: str | None = None
|
|
79
|
+
graph_exit_code: int | None = None
|
|
80
|
+
graph_stdout: str = ""
|
|
81
|
+
graph_stderr: str = ""
|
|
82
|
+
phases_run: list[Literal["vectors", "graph"]] = Field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class IndexInfoOutput(BaseModel):
|
|
86
|
+
lancedb_uri: str
|
|
87
|
+
embedding_model: str
|
|
88
|
+
project_root: str
|
|
89
|
+
cocoindex_target: str
|
|
90
|
+
tables: dict[str, str]
|
|
91
|
+
graph: GraphMetaOutput
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _resolve_lancedb_uri() -> str:
|
|
95
|
+
raw = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
|
|
96
|
+
if not raw:
|
|
97
|
+
raw = str((Path.cwd() / ".java-codebase-rag").resolve())
|
|
98
|
+
p = Path(raw).expanduser()
|
|
99
|
+
if not str(raw).startswith(("s3://", "gs://", "az://")):
|
|
100
|
+
try:
|
|
101
|
+
return str(p.resolve())
|
|
102
|
+
except OSError:
|
|
103
|
+
return str(p)
|
|
104
|
+
return raw
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _project_root() -> Path:
|
|
108
|
+
env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
|
|
109
|
+
if env:
|
|
110
|
+
return Path(env).expanduser().resolve()
|
|
111
|
+
return Path.cwd().resolve()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
|
|
115
|
+
sub_env = os.environ.copy()
|
|
116
|
+
sub_env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(project_root)
|
|
117
|
+
idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
|
|
118
|
+
if idx:
|
|
119
|
+
sub_env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(Path(idx).expanduser().resolve())
|
|
120
|
+
return sub_env
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _graph_enabled() -> bool:
|
|
124
|
+
return KuzuGraph.exists()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _graph_meta_output() -> GraphMetaOutput:
|
|
128
|
+
if not KuzuGraph.exists():
|
|
129
|
+
return GraphMetaOutput(
|
|
130
|
+
success=True,
|
|
131
|
+
enabled=False,
|
|
132
|
+
db_path=resolve_kuzu_path(),
|
|
133
|
+
message="Kuzu graph not present; run java-codebase-rag reprocess or build_ast_graph.py",
|
|
134
|
+
)
|
|
135
|
+
try:
|
|
136
|
+
graph = KuzuGraph.get()
|
|
137
|
+
meta = graph.meta()
|
|
138
|
+
except Exception as e:
|
|
139
|
+
return GraphMetaOutput(
|
|
140
|
+
success=False,
|
|
141
|
+
enabled=_graph_enabled(),
|
|
142
|
+
db_path=resolve_kuzu_path(),
|
|
143
|
+
message=f"Kuzu open failed: {e}",
|
|
144
|
+
)
|
|
145
|
+
if "error" in meta:
|
|
146
|
+
return GraphMetaOutput(
|
|
147
|
+
success=False,
|
|
148
|
+
enabled=_graph_enabled(),
|
|
149
|
+
db_path=meta.get("db_path", resolve_kuzu_path()),
|
|
150
|
+
message=str(meta["error"]),
|
|
151
|
+
)
|
|
152
|
+
try:
|
|
153
|
+
mod_counts = graph.module_counts()
|
|
154
|
+
except Exception:
|
|
155
|
+
mod_counts = {}
|
|
156
|
+
try:
|
|
157
|
+
ms_counts = graph.microservice_counts()
|
|
158
|
+
except Exception:
|
|
159
|
+
ms_counts = {}
|
|
160
|
+
rfw = meta.get("routes_by_framework") or {}
|
|
161
|
+
routes_by_framework = {str(k): int(v) for k, v in rfw.items()} if isinstance(rfw, dict) else {}
|
|
162
|
+
rbl = meta.get("routes_by_layer") or {}
|
|
163
|
+
routes_by_layer = {str(k): int(v) for k, v in rbl.items()} if isinstance(rbl, dict) else {}
|
|
164
|
+
return GraphMetaOutput(
|
|
165
|
+
success=True,
|
|
166
|
+
enabled=_graph_enabled(),
|
|
167
|
+
db_path=meta.get("db_path", resolve_kuzu_path()),
|
|
168
|
+
ontology_version=int(meta.get("ontology_version") or 0),
|
|
169
|
+
built_at=int(meta.get("built_at") or 0),
|
|
170
|
+
source_root=str(meta.get("source_root") or ""),
|
|
171
|
+
parse_errors=int(meta.get("parse_errors") or 0),
|
|
172
|
+
counts={k: int(v) for k, v in (meta.get("counts") or {}).items()},
|
|
173
|
+
module_counts=mod_counts,
|
|
174
|
+
microservice_counts=ms_counts,
|
|
175
|
+
routes_total=int(meta.get("routes_total") or 0),
|
|
176
|
+
exposes_total=int(meta.get("exposes_total") or 0),
|
|
177
|
+
routes_by_framework=routes_by_framework,
|
|
178
|
+
routes_resolved_pct=float(meta.get("routes_resolved_pct") or 0.0),
|
|
179
|
+
routes_from_brownfield_pct=float(meta.get("routes_from_brownfield_pct") or 0.0),
|
|
180
|
+
routes_by_layer=routes_by_layer,
|
|
181
|
+
edge_counts={str(k): int(v) for k, v in (meta.get("edge_counts") or {}).items()},
|
|
182
|
+
http_calls_match_breakdown={
|
|
183
|
+
str(k): int(v) for k, v in (meta.get("http_calls_match_breakdown") or {}).items()
|
|
184
|
+
},
|
|
185
|
+
async_calls_match_breakdown={
|
|
186
|
+
str(k): int(v) for k, v in (meta.get("async_calls_match_breakdown") or {}).items()
|
|
187
|
+
},
|
|
188
|
+
cross_service_calls_total=int(meta.get("cross_service_calls_total") or 0),
|
|
189
|
+
cross_service_resolution=meta.get("cross_service_resolution"),
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def list_code_index_tables_payload() -> IndexInfoOutput:
|
|
194
|
+
return IndexInfoOutput(
|
|
195
|
+
lancedb_uri=_resolve_lancedb_uri(),
|
|
196
|
+
embedding_model=resolved_sbert_model_for_process_env(SBERT_MODEL),
|
|
197
|
+
project_root=str(_project_root()),
|
|
198
|
+
cocoindex_target=_COCOINDEX_TARGET,
|
|
199
|
+
tables=dict(TABLES),
|
|
200
|
+
graph=_graph_meta_output(),
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
async def run_refresh_pipeline(*, quiet: bool = False) -> RefreshIndexOutput:
|
|
205
|
+
root = _project_root()
|
|
206
|
+
cocoindex_bin = Path(sys.executable).parent / "cocoindex"
|
|
207
|
+
if not cocoindex_bin.is_file():
|
|
208
|
+
return RefreshIndexOutput(
|
|
209
|
+
success=False,
|
|
210
|
+
message=f"cocoindex not found next to Python: {cocoindex_bin}",
|
|
211
|
+
phases_run=[],
|
|
212
|
+
)
|
|
213
|
+
flow_path = root / "java_index_flow_lancedb.py"
|
|
214
|
+
bundle_dir = Path(__file__).resolve().parent
|
|
215
|
+
if not flow_path.is_file():
|
|
216
|
+
fallback = bundle_dir / "java_index_flow_lancedb.py"
|
|
217
|
+
if fallback.is_file():
|
|
218
|
+
flow_path = fallback
|
|
219
|
+
else:
|
|
220
|
+
return RefreshIndexOutput(
|
|
221
|
+
success=False,
|
|
222
|
+
message=f"java_index_flow_lancedb.py not found under {root} nor {bundle_dir}",
|
|
223
|
+
phases_run=[],
|
|
224
|
+
)
|
|
225
|
+
proc: asyncio.subprocess.Process | None = None
|
|
226
|
+
out_b, err_b = b"", b""
|
|
227
|
+
if quiet:
|
|
228
|
+
try:
|
|
229
|
+
proc = await asyncio.create_subprocess_exec(
|
|
230
|
+
str(cocoindex_bin),
|
|
231
|
+
"update",
|
|
232
|
+
_COCOINDEX_TARGET,
|
|
233
|
+
"--full-reprocess",
|
|
234
|
+
"-f",
|
|
235
|
+
cwd=str(flow_path.parent),
|
|
236
|
+
env=_cocoindex_subprocess_env(root),
|
|
237
|
+
stdout=asyncio.subprocess.PIPE,
|
|
238
|
+
stderr=asyncio.subprocess.PIPE,
|
|
239
|
+
)
|
|
240
|
+
out_b, err_b = await proc.communicate()
|
|
241
|
+
except Exception as exc:
|
|
242
|
+
return RefreshIndexOutput(
|
|
243
|
+
success=False,
|
|
244
|
+
message=f"spawn failed: {exc!s}",
|
|
245
|
+
phases_run=[],
|
|
246
|
+
)
|
|
247
|
+
else:
|
|
248
|
+
emit_lance_cocoindex_start(root)
|
|
249
|
+
t0 = time.perf_counter()
|
|
250
|
+
code_c = -1
|
|
251
|
+
try:
|
|
252
|
+
proc = await asyncio.create_subprocess_exec(
|
|
253
|
+
str(cocoindex_bin),
|
|
254
|
+
"update",
|
|
255
|
+
_COCOINDEX_TARGET,
|
|
256
|
+
"--full-reprocess",
|
|
257
|
+
"-f",
|
|
258
|
+
cwd=str(flow_path.parent),
|
|
259
|
+
env=_cocoindex_subprocess_env(root),
|
|
260
|
+
stdout=asyncio.subprocess.PIPE,
|
|
261
|
+
stderr=asyncio.subprocess.PIPE,
|
|
262
|
+
)
|
|
263
|
+
out_b, err_b = await accumulate_and_relay_subprocess_streams(proc, relay=True)
|
|
264
|
+
code_c = proc.returncode if proc.returncode is not None else -1
|
|
265
|
+
except Exception as exc:
|
|
266
|
+
return RefreshIndexOutput(
|
|
267
|
+
success=False,
|
|
268
|
+
message=f"spawn failed: {exc!s}",
|
|
269
|
+
phases_run=[],
|
|
270
|
+
)
|
|
271
|
+
finally:
|
|
272
|
+
emit_lance_cocoindex_finish(elapsed_s=time.perf_counter() - t0, exit_code=code_c)
|
|
273
|
+
assert proc is not None
|
|
274
|
+
out = out_b.decode(errors="replace")
|
|
275
|
+
err = err_b.decode(errors="replace")
|
|
276
|
+
ok = proc.returncode == 0
|
|
277
|
+
phases_run: list[Literal["vectors", "graph"]] = ["vectors"]
|
|
278
|
+
graph_code: int | None = None
|
|
279
|
+
graph_out = ""
|
|
280
|
+
graph_err = ""
|
|
281
|
+
if ok:
|
|
282
|
+
builder = Path(__file__).resolve().parent / "build_ast_graph.py"
|
|
283
|
+
if builder.is_file():
|
|
284
|
+
try:
|
|
285
|
+
graph_args = [
|
|
286
|
+
sys.executable,
|
|
287
|
+
str(builder),
|
|
288
|
+
"--source-root",
|
|
289
|
+
str(root),
|
|
290
|
+
"--kuzu-path",
|
|
291
|
+
resolve_kuzu_path(),
|
|
292
|
+
]
|
|
293
|
+
if not quiet:
|
|
294
|
+
graph_args.append("--verbose")
|
|
295
|
+
gproc = await asyncio.create_subprocess_exec(
|
|
296
|
+
*graph_args,
|
|
297
|
+
cwd=str(root),
|
|
298
|
+
env=_cocoindex_subprocess_env(root),
|
|
299
|
+
stdout=asyncio.subprocess.PIPE,
|
|
300
|
+
stderr=asyncio.subprocess.PIPE,
|
|
301
|
+
)
|
|
302
|
+
phases_run = ["vectors", "graph"]
|
|
303
|
+
if quiet:
|
|
304
|
+
gout_b, gerr_b = await gproc.communicate()
|
|
305
|
+
else:
|
|
306
|
+
gout_b, gerr_b = await accumulate_and_relay_subprocess_streams(gproc, relay=True)
|
|
307
|
+
graph_code = gproc.returncode
|
|
308
|
+
graph_out = gout_b.decode(errors="replace")
|
|
309
|
+
graph_err = gerr_b.decode(errors="replace")
|
|
310
|
+
except Exception as exc:
|
|
311
|
+
graph_code = -1
|
|
312
|
+
graph_err = f"graph builder spawn failed: {exc}"
|
|
313
|
+
message: str | None = None
|
|
314
|
+
if not ok:
|
|
315
|
+
message = f"cocoindex exit {proc.returncode}"
|
|
316
|
+
elif graph_code is not None and graph_code != 0:
|
|
317
|
+
message = f"graph builder exit {graph_code}"
|
|
318
|
+
return RefreshIndexOutput(
|
|
319
|
+
success=ok and (graph_code is None or graph_code == 0),
|
|
320
|
+
exit_code=proc.returncode,
|
|
321
|
+
stdout=out[-8000:] if len(out) > 8000 else out,
|
|
322
|
+
stderr=err[-8000:] if len(err) > 8000 else err,
|
|
323
|
+
message=message,
|
|
324
|
+
graph_exit_code=graph_code,
|
|
325
|
+
graph_stdout=graph_out[-4000:] if len(graph_out) > 4000 else graph_out,
|
|
326
|
+
graph_stderr=graph_err[-4000:] if len(graph_err) > 4000 else graph_err,
|
|
327
|
+
phases_run=phases_run,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def create_mcp_server() -> FastMCP:
|
|
332
|
+
mcp = FastMCP("java-codebase-rag", instructions=_INSTRUCTIONS)
|
|
333
|
+
|
|
334
|
+
@mcp.tool(
|
|
335
|
+
name="search",
|
|
336
|
+
description=(
|
|
337
|
+
"Ranked chunk retrieval: `query` is opaque text (natural language or code fragments); "
|
|
338
|
+
"results are score-ranked, not boolean-matched. Optional `filter` uses the same NodeFilter "
|
|
339
|
+
"schema as `find` but only **symbol-applicable** fields apply (strict frame). Wildcards "
|
|
340
|
+
"(`*`, `?`) in prefix fields are rejected—use ranked `query` text instead. There is **no** "
|
|
341
|
+
"structured DSL inside `query`; structured predicates belong in `find`. "
|
|
342
|
+
"For identifier-shaped lookups (FQN, id prefix, route/client identifiers, …), use `resolve` first; "
|
|
343
|
+
"use `search` for natural-language or ranked fuzzy discovery. "
|
|
344
|
+
"Successful responses echo `limit`/`offset` and may include `hints` (advisory next-step strings)."
|
|
345
|
+
),
|
|
346
|
+
)
|
|
347
|
+
async def search(
|
|
348
|
+
query: str = Field(description="Search query"),
|
|
349
|
+
table: Literal["java", "sql", "yaml", "all"] = Field(
|
|
350
|
+
default="java",
|
|
351
|
+
description="Which content table to search. 'all' fuses java/sql/yaml results.",
|
|
352
|
+
),
|
|
353
|
+
hybrid: bool = Field(
|
|
354
|
+
default=False,
|
|
355
|
+
description="If true, fuse FTS + vector (single-table java/sql/yaml only)",
|
|
356
|
+
),
|
|
357
|
+
limit: int = Field(default=5, ge=1, le=50, description="Max hits to return"),
|
|
358
|
+
offset: int = Field(default=0, ge=0, le=500, description="Skip this many hits (pagination)"),
|
|
359
|
+
path_contains: str | None = Field(
|
|
360
|
+
default=None,
|
|
361
|
+
description="Substring match on file path (pre-filter from index)",
|
|
362
|
+
),
|
|
363
|
+
filter: dict[str, Any] | str | None = Field(
|
|
364
|
+
default=None,
|
|
365
|
+
description=(
|
|
366
|
+
"Optional NodeFilter post-filter on symbol-oriented hit rows. Unknown keys or populated fields not "
|
|
367
|
+
"applicable to symbols return success=false. Prefer a JSON object; a JSON-encoded string is accepted."
|
|
368
|
+
),
|
|
369
|
+
),
|
|
370
|
+
) -> mcp_v2.SearchOutput:
|
|
371
|
+
return await asyncio.to_thread(
|
|
372
|
+
mcp_v2.search_v2,
|
|
373
|
+
query,
|
|
374
|
+
table,
|
|
375
|
+
hybrid,
|
|
376
|
+
limit,
|
|
377
|
+
offset,
|
|
378
|
+
path_contains,
|
|
379
|
+
filter,
|
|
380
|
+
None,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
@mcp.tool(
|
|
384
|
+
name="find",
|
|
385
|
+
description=(
|
|
386
|
+
"Exact structured listing for one node kind. Per-kind applicable fields: **symbol** — "
|
|
387
|
+
"microservice, module, role, exclude_roles, annotation, capability, fqn_prefix, symbol_kind, symbol_kinds; "
|
|
388
|
+
"**route** — microservice, module, http_method, path_prefix, framework; **client** — microservice, module, "
|
|
389
|
+
"source_layer, client_kind, target_service, target_path_prefix, http_method; **producer** — microservice, "
|
|
390
|
+
"module, source_layer, producer_kind, topic_prefix. "
|
|
391
|
+
"Wildcards in prefix fields are rejected. An empty filter (`{}`) or `filter=None` means no predicate (all nodes of "
|
|
392
|
+
"that kind; use pagination). Unknown keys or inapplicable populated fields return success=false. "
|
|
393
|
+
"Successful responses echo `limit`/`offset` and may include `hints` (advisory next-step strings)."
|
|
394
|
+
),
|
|
395
|
+
)
|
|
396
|
+
async def find(
|
|
397
|
+
kind: Literal["symbol", "route", "client", "producer"] = Field(
|
|
398
|
+
description=(
|
|
399
|
+
"Which graph table to search. 'symbol' = declarations, "
|
|
400
|
+
"'route' = endpoints, 'client' = outbound HTTP clients, "
|
|
401
|
+
"'producer' = outbound async producers."
|
|
402
|
+
)
|
|
403
|
+
),
|
|
404
|
+
filter: dict[str, Any] | str = Field(
|
|
405
|
+
...,
|
|
406
|
+
description=(
|
|
407
|
+
"Required NodeFilter dict (extra keys forbidden). Fields must be applicable to `kind`. "
|
|
408
|
+
"Prefer a JSON object; a JSON-encoded string is accepted."
|
|
409
|
+
),
|
|
410
|
+
),
|
|
411
|
+
limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
|
|
412
|
+
offset: int = Field(default=0, ge=0, le=499, description="Skip this many nodes (pagination)"),
|
|
413
|
+
) -> mcp_v2.FindOutput:
|
|
414
|
+
return await asyncio.to_thread(mcp_v2.find_v2, kind, filter, limit, offset, None)
|
|
415
|
+
|
|
416
|
+
@mcp.tool(
|
|
417
|
+
name="describe",
|
|
418
|
+
description=(
|
|
419
|
+
"Full node record plus `edge_summary` (in/out counts per stored edge label, plus optional describe-time keys). Type Symbols may add "
|
|
420
|
+
"composed keys DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, and DECLARES.EXPOSES (navigable on type Symbols via neighbors, out only); "
|
|
421
|
+
"method Symbols may add override-axis virtual keys (OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, OVERRIDDEN_BY.DECLARES_PRODUCER, "
|
|
422
|
+
"OVERRIDDEN_BY.EXPOSES, plus an `OVERRIDES` map entry that merges stored `[:OVERRIDES]` counts with the dispatch-up rollup per direction). "
|
|
423
|
+
"Override-axis virtual keys are navigable via neighbors on non-static method Symbol origins "
|
|
424
|
+
"(out only; composed keys include via_id in attrs). The stored `OVERRIDES` relationship "
|
|
425
|
+
"is also a normal edge label (e.g. direction in from declaration toward overriders). "
|
|
426
|
+
"Pass `id` for any kind, or exact `fqn` for Symbol lookup (`id` wins when both are set). "
|
|
427
|
+
"`describe(fqn=…)` keeps the first graph row when multiple symbols share that FQN; when an FQN may collide, "
|
|
428
|
+
"prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node. "
|
|
429
|
+
"Successful responses may include `hints` (advisory next-step strings)."
|
|
430
|
+
),
|
|
431
|
+
)
|
|
432
|
+
async def describe(
|
|
433
|
+
id: str | None = Field(
|
|
434
|
+
default=None,
|
|
435
|
+
description=(
|
|
436
|
+
"Graph node id: sym:, route:, client:, or producer: prefix "
|
|
437
|
+
'(e.g. sym:com.bank.chat.core.api.ChatController#joinOperator(JoinOperatorRequest); '
|
|
438
|
+
"producer: p:a1b2c3d4e5f67890 — the stored id from the graph, not a human-readable "
|
|
439
|
+
"pipe key). For producers by topic, prefer resolve(identifier=<topic>, hint_kind='producer'). "
|
|
440
|
+
"When set, takes precedence over fqn."
|
|
441
|
+
),
|
|
442
|
+
),
|
|
443
|
+
fqn: str | None = Field(
|
|
444
|
+
default=None,
|
|
445
|
+
description="Exact FQN for Symbol lookup (alternative to id; Symbol kind only)",
|
|
446
|
+
),
|
|
447
|
+
) -> mcp_v2.DescribeOutput:
|
|
448
|
+
return await asyncio.to_thread(mcp_v2.describe_v2, id, fqn, None)
|
|
449
|
+
|
|
450
|
+
@mcp.tool(
|
|
451
|
+
name="neighbors",
|
|
452
|
+
description=(
|
|
453
|
+
"Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (stored labels for one hop; "
|
|
454
|
+
"type Symbol origins may also pass composed DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, or DECLARES.EXPOSES "
|
|
455
|
+
"for 2-hop member rollups; method Symbol origins may pass OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, "
|
|
456
|
+
"OVERRIDDEN_BY.DECLARES_PRODUCER, OVERRIDDEN_BY.EXPOSES for override-axis rollups — out only, via_id in "
|
|
457
|
+
"attrs on composed keys). "
|
|
458
|
+
"Optional `filter` applies to each neighbor endpoint row; populated fields must be applicable to that "
|
|
459
|
+
"neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (strict frame). "
|
|
460
|
+
"Optional `edge_filter` requires edge_types=['CALLS'] only (no composed dot-keys or extra stored "
|
|
461
|
+
"labels); projects the ordered CALLS stream by edge attributes (min_confidence, strategies, "
|
|
462
|
+
"callee_declaring_role). Wildcards in prefix fields are rejected. Unknown filter keys return success=false. "
|
|
463
|
+
"Successful responses echo `requested_edge_types` and may include `hints` (advisory next-step strings; "
|
|
464
|
+
"empty results may include EDGE_SCHEMA-driven traversal hints). "
|
|
465
|
+
"Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths)."
|
|
466
|
+
),
|
|
467
|
+
)
|
|
468
|
+
async def neighbors(
|
|
469
|
+
ids: str | list[str] = Field(
|
|
470
|
+
description="Origin symbol/route/client/producer id, or list for batch",
|
|
471
|
+
),
|
|
472
|
+
direction: Literal["in", "out"] = Field(
|
|
473
|
+
description="Required. 'in' = predecessors (callers), 'out' = successors (callees). No default.",
|
|
474
|
+
),
|
|
475
|
+
edge_types: list[mcp_v2.NeighborEdgeType] = Field(
|
|
476
|
+
description=(
|
|
477
|
+
"Required non-empty list of stored edge labels (e.g. CALLS, EXPOSES, HTTP_CALLS, OVERRIDES) "
|
|
478
|
+
"and/or composed DECLARES.DECLARES_* (type Symbol origin, out only) or OVERRIDDEN_BY* "
|
|
479
|
+
"(non-static method Symbol origin, out only)"
|
|
480
|
+
),
|
|
481
|
+
),
|
|
482
|
+
limit: int = Field(
|
|
483
|
+
default=25,
|
|
484
|
+
ge=1,
|
|
485
|
+
le=500,
|
|
486
|
+
description=(
|
|
487
|
+
"Max edges after concatenating all origins (ids order; offset/limit on merged list)"
|
|
488
|
+
),
|
|
489
|
+
),
|
|
490
|
+
offset: int = Field(
|
|
491
|
+
default=0,
|
|
492
|
+
ge=0,
|
|
493
|
+
le=1000,
|
|
494
|
+
description="Skip this many edges after merge (pagination)",
|
|
495
|
+
),
|
|
496
|
+
filter: dict[str, Any] | str | None = Field(
|
|
497
|
+
default=None,
|
|
498
|
+
description=(
|
|
499
|
+
"Optional NodeFilter on the neighbor node. Same applicability rules as `find` for that node's kind. "
|
|
500
|
+
"Prefer a JSON object; a JSON-encoded string is accepted."
|
|
501
|
+
),
|
|
502
|
+
),
|
|
503
|
+
edge_filter: dict[str, Any] | str | None = Field(
|
|
504
|
+
default=None,
|
|
505
|
+
description=(
|
|
506
|
+
"Optional EdgeFilter on CALLS edge attributes (edge_types=['CALLS'] only). Use "
|
|
507
|
+
"callee_declaring_role for callee stereotype projection — not NodeFilter.role on method neighbors. "
|
|
508
|
+
"Mutually exclusive with include_unresolved. Prefer a JSON object; a JSON-encoded string is accepted."
|
|
509
|
+
),
|
|
510
|
+
),
|
|
511
|
+
include_unresolved: bool = Field(
|
|
512
|
+
default=False,
|
|
513
|
+
description=(
|
|
514
|
+
"When true with edge_types=['CALLS'] and direction='out', interleave UnresolvedCallSite "
|
|
515
|
+
"rows (row_kind='unresolved_call_site') with resolved CALLS in source order. "
|
|
516
|
+
"Mutually exclusive with edge_filter."
|
|
517
|
+
),
|
|
518
|
+
),
|
|
519
|
+
dedup_calls: bool = Field(
|
|
520
|
+
default=False,
|
|
521
|
+
description=(
|
|
522
|
+
"When true with edge_types=['CALLS'], collapse identical (origin, callee) CALLS to one row "
|
|
523
|
+
"with call_site_count and call_site_lines; unresolved sites are not deduped."
|
|
524
|
+
),
|
|
525
|
+
),
|
|
526
|
+
) -> mcp_v2.NeighborsOutput:
|
|
527
|
+
return await asyncio.to_thread(
|
|
528
|
+
mcp_v2.neighbors_v2,
|
|
529
|
+
ids,
|
|
530
|
+
direction,
|
|
531
|
+
edge_types,
|
|
532
|
+
limit,
|
|
533
|
+
offset,
|
|
534
|
+
filter,
|
|
535
|
+
edge_filter,
|
|
536
|
+
include_unresolved,
|
|
537
|
+
dedup_calls,
|
|
538
|
+
None,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
@mcp.tool(
|
|
542
|
+
name="resolve",
|
|
543
|
+
description=(
|
|
544
|
+
"Identifier-shaped node lookup (FQN, sym:/route:/client:/producer: id, HTTP method+path, "
|
|
545
|
+
"route path template, client target_service, target+path pair, or producer topic). Returns "
|
|
546
|
+
"status=one (single node), many (≥2 ranked candidates with reason), or none "
|
|
547
|
+
"(no match — fall back to search(query=...) for natural language or fuzzy text). "
|
|
548
|
+
"Optional hint_kind narrows to symbol, route, client, or producer. "
|
|
549
|
+
"Successful responses may include advisory hints (same contract as other v2 tools). "
|
|
550
|
+
"Malformed empty/whitespace identifier returns success=false. "
|
|
551
|
+
"Examples: resolve('com.foo.Bar', hint_kind='symbol'); "
|
|
552
|
+
"resolve('GET /api/v1/customers', hint_kind='route'); "
|
|
553
|
+
"resolve('the client that handles assignments') → none (use search instead)."
|
|
554
|
+
),
|
|
555
|
+
)
|
|
556
|
+
async def resolve(
|
|
557
|
+
identifier: str = Field(
|
|
558
|
+
description=(
|
|
559
|
+
"Identifier-shaped node lookup (FQN, id prefix, route path, client target, producer topic, …)"
|
|
560
|
+
),
|
|
561
|
+
),
|
|
562
|
+
hint_kind: Literal["symbol", "route", "client", "producer"] | None = Field(
|
|
563
|
+
default=None,
|
|
564
|
+
description="Optional kind constraint. Omit to search symbol, route, client, and producer.",
|
|
565
|
+
),
|
|
566
|
+
) -> mcp_v2.ResolveOutput:
|
|
567
|
+
return await asyncio.to_thread(mcp_v2.resolve_v2, identifier, hint_kind, None)
|
|
568
|
+
|
|
569
|
+
return mcp
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def main() -> None:
|
|
573
|
+
emit_legacy_env_hints_if_present()
|
|
574
|
+
asyncio.run(create_mcp_server().run_stdio_async())
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
if __name__ == "__main__":
|
|
578
|
+
main()
|