polycodegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. codegraph/__init__.py +10 -0
  2. codegraph/analysis/__init__.py +30 -0
  3. codegraph/analysis/_common.py +125 -0
  4. codegraph/analysis/blast_radius.py +63 -0
  5. codegraph/analysis/cycles.py +79 -0
  6. codegraph/analysis/dataflow.py +861 -0
  7. codegraph/analysis/dead_code.py +165 -0
  8. codegraph/analysis/hotspots.py +68 -0
  9. codegraph/analysis/infrastructure.py +439 -0
  10. codegraph/analysis/metrics.py +52 -0
  11. codegraph/analysis/report.py +222 -0
  12. codegraph/analysis/roles.py +323 -0
  13. codegraph/analysis/untested.py +79 -0
  14. codegraph/cli.py +1506 -0
  15. codegraph/config.py +64 -0
  16. codegraph/embed/__init__.py +35 -0
  17. codegraph/embed/chunker.py +120 -0
  18. codegraph/embed/embedder.py +113 -0
  19. codegraph/embed/query.py +181 -0
  20. codegraph/embed/store.py +360 -0
  21. codegraph/graph/__init__.py +0 -0
  22. codegraph/graph/builder.py +212 -0
  23. codegraph/graph/schema.py +69 -0
  24. codegraph/graph/store_networkx.py +55 -0
  25. codegraph/graph/store_sqlite.py +249 -0
  26. codegraph/mcp_server/__init__.py +6 -0
  27. codegraph/mcp_server/server.py +933 -0
  28. codegraph/parsers/__init__.py +0 -0
  29. codegraph/parsers/base.py +70 -0
  30. codegraph/parsers/go.py +570 -0
  31. codegraph/parsers/python.py +1707 -0
  32. codegraph/parsers/typescript.py +1397 -0
  33. codegraph/py.typed +0 -0
  34. codegraph/resolve/__init__.py +4 -0
  35. codegraph/resolve/calls.py +480 -0
  36. codegraph/review/__init__.py +31 -0
  37. codegraph/review/baseline.py +32 -0
  38. codegraph/review/differ.py +211 -0
  39. codegraph/review/hook.py +70 -0
  40. codegraph/review/risk.py +219 -0
  41. codegraph/review/rules.py +342 -0
  42. codegraph/viz/__init__.py +17 -0
  43. codegraph/viz/_style.py +45 -0
  44. codegraph/viz/dashboard.py +740 -0
  45. codegraph/viz/diagrams.py +370 -0
  46. codegraph/viz/explore.py +453 -0
  47. codegraph/viz/hld.py +683 -0
  48. codegraph/viz/html.py +115 -0
  49. codegraph/viz/mermaid.py +111 -0
  50. codegraph/viz/svg.py +77 -0
  51. codegraph/web/__init__.py +4 -0
  52. codegraph/web/server.py +165 -0
  53. codegraph/web/static/app.css +664 -0
  54. codegraph/web/static/app.js +919 -0
  55. codegraph/web/static/index.html +112 -0
  56. codegraph/web/static/views/architecture.js +1671 -0
  57. codegraph/web/static/views/graph3d.css +564 -0
  58. codegraph/web/static/views/graph3d.js +999 -0
  59. codegraph/web/static/views/graph3d_transform.js +984 -0
  60. codegraph/workspace/__init__.py +34 -0
  61. codegraph/workspace/config.py +110 -0
  62. codegraph/workspace/operations.py +294 -0
  63. polycodegraph-0.1.0.dist-info/METADATA +687 -0
  64. polycodegraph-0.1.0.dist-info/RECORD +67 -0
  65. polycodegraph-0.1.0.dist-info/WHEEL +4 -0
  66. polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
  67. polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,933 @@
1
+ """MCP stdio server exposing codegraph analysis tools."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import json
6
+ from collections import deque
7
+ from collections.abc import Callable
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import networkx as nx
12
+
13
+ # ---------------------------------------------------------------------------
14
+ # Graph loading — cached per-process
15
+ # ---------------------------------------------------------------------------
16
+
17
+ _CACHED_GRAPH: nx.MultiDiGraph | None = None
18
+ _CACHED_DB_PATH: Path | None = None
19
+
20
+
21
+ def _load_graph(db_path: Path | None = None) -> nx.MultiDiGraph:
22
+ """Load (or return cached) the MultiDiGraph from *db_path*.
23
+
24
+ If *db_path* is None, auto-resolves to ``cwd/.codegraph/graph.db``.
25
+ A different *db_path* forces a reload.
26
+ """
27
+ global _CACHED_GRAPH, _CACHED_DB_PATH
28
+
29
+ resolved = db_path or (Path.cwd() / ".codegraph" / "graph.db")
30
+ if _CACHED_GRAPH is not None and resolved == _CACHED_DB_PATH:
31
+ return _CACHED_GRAPH
32
+
33
+ from codegraph.graph.store_networkx import to_digraph
34
+ from codegraph.graph.store_sqlite import SQLiteGraphStore
35
+
36
+ store = SQLiteGraphStore(resolved)
37
+ g = to_digraph(store)
38
+ store.close()
39
+
40
+ _CACHED_GRAPH = g
41
+ _CACHED_DB_PATH = resolved
42
+ return g
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Pure tool-handler functions (testable without MCP machinery)
47
+ # ---------------------------------------------------------------------------
48
+
49
+ def _node_df_metadata(attrs: dict[str, Any]) -> dict[str, Any]:
50
+ """Extract DF0/DF1.5 metadata fields from a node — omit when absent."""
51
+ md = attrs.get("metadata") or {}
52
+ out: dict[str, Any] = {}
53
+ if not isinstance(md, dict):
54
+ return out
55
+ if "params" in md:
56
+ out["params"] = md["params"]
57
+ if "returns" in md:
58
+ out["returns"] = md["returns"]
59
+ if "role" in md and md["role"] is not None:
60
+ out["role"] = md["role"]
61
+ return out
62
+
63
+
64
+ def tool_find_symbol(
65
+ graph: nx.MultiDiGraph,
66
+ query: str,
67
+ kind: str | None = None,
68
+ limit: int = 20,
69
+ role: str | None = None,
70
+ ) -> list[dict[str, Any]]:
71
+ """Substring match on qualname (case-insensitive).
72
+
73
+ Optional ``role`` filters to symbols whose ``metadata.role`` matches
74
+ one of HANDLER, SERVICE, COMPONENT, REPO.
75
+ """
76
+ q = query.lower()
77
+ results: list[dict[str, Any]] = []
78
+ for nid, attrs in graph.nodes(data=True):
79
+ qualname = str(attrs.get("qualname") or nid)
80
+ if q not in qualname.lower():
81
+ continue
82
+ node_kind = str(attrs.get("kind") or "")
83
+ if kind and node_kind.lower() != kind.lower():
84
+ continue
85
+ df_md = _node_df_metadata(attrs)
86
+ if role is not None and df_md.get("role") != role:
87
+ continue
88
+ hit: dict[str, Any] = {
89
+ "qualname": qualname,
90
+ "kind": node_kind,
91
+ "file": str(attrs.get("file") or ""),
92
+ "line": int(attrs.get("line_start") or 0),
93
+ }
94
+ hit.update(df_md)
95
+ results.append(hit)
96
+ if len(results) >= limit:
97
+ break
98
+ return results
99
+
100
+
101
+ def tool_callers(
102
+ graph: nx.MultiDiGraph,
103
+ qualname: str,
104
+ depth: int = 1,
105
+ ) -> list[dict[str, Any]]:
106
+ """Reverse BFS from *qualname* over CALLS edges (who calls this?)."""
107
+ from codegraph.analysis._common import REFERENCE_EDGE_KINDS
108
+
109
+ # Find the node ID matching qualname
110
+ target = _resolve_node(graph, qualname)
111
+ if target is None:
112
+ return []
113
+
114
+ visited: dict[str, int] = {target: 0}
115
+ queue: deque[str] = deque([target])
116
+ results: list[dict[str, Any]] = []
117
+
118
+ while queue and len(results) < 100:
119
+ current = queue.popleft()
120
+ current_depth = visited[current]
121
+ if current_depth >= depth:
122
+ continue
123
+ for src, _dst, key, edata in graph.in_edges(current, keys=True, data=True):
124
+ if key not in REFERENCE_EDGE_KINDS:
125
+ continue
126
+ if src in visited:
127
+ continue
128
+ visited[src] = current_depth + 1
129
+ queue.append(src)
130
+ attrs = graph.nodes.get(src) or {}
131
+ entry: dict[str, Any] = {
132
+ "qualname": str(attrs.get("qualname") or src),
133
+ "file": str(attrs.get("file") or ""),
134
+ "depth": current_depth + 1,
135
+ }
136
+ entry.update(_node_df_metadata(attrs))
137
+ edge_md = edata.get("metadata") or {}
138
+ if isinstance(edge_md, dict):
139
+ if "args" in edge_md:
140
+ entry["args"] = list(edge_md.get("args") or [])
141
+ if "kwargs" in edge_md:
142
+ entry["kwargs"] = dict(edge_md.get("kwargs") or {})
143
+ results.append(entry)
144
+
145
+ return results
146
+
147
+
148
+ def tool_callees(
149
+ graph: nx.MultiDiGraph,
150
+ qualname: str,
151
+ depth: int = 1,
152
+ ) -> list[dict[str, Any]]:
153
+ """Forward BFS from *qualname* over CALLS edges (what does this call?)."""
154
+ from codegraph.graph.schema import EdgeKind
155
+
156
+ target = _resolve_node(graph, qualname)
157
+ if target is None:
158
+ return []
159
+
160
+ visited: dict[str, int] = {target: 0}
161
+ queue: deque[str] = deque([target])
162
+ results: list[dict[str, Any]] = []
163
+
164
+ while queue and len(results) < 100:
165
+ current = queue.popleft()
166
+ current_depth = visited[current]
167
+ if current_depth >= depth:
168
+ continue
169
+ for _src, dst, key, edata in graph.out_edges(current, keys=True, data=True):
170
+ if key != EdgeKind.CALLS.value:
171
+ continue
172
+ if dst in visited:
173
+ continue
174
+ visited[dst] = current_depth + 1
175
+ queue.append(dst)
176
+ attrs = graph.nodes.get(dst) or {}
177
+ entry: dict[str, Any] = {
178
+ "qualname": str(attrs.get("qualname") or dst),
179
+ "file": str(attrs.get("file") or ""),
180
+ "depth": current_depth + 1,
181
+ }
182
+ entry.update(_node_df_metadata(attrs))
183
+ edge_md = edata.get("metadata") or {}
184
+ if isinstance(edge_md, dict):
185
+ if "args" in edge_md:
186
+ entry["args"] = list(edge_md.get("args") or [])
187
+ if "kwargs" in edge_md:
188
+ entry["kwargs"] = dict(edge_md.get("kwargs") or {})
189
+ results.append(entry)
190
+
191
+ return results
192
+
193
+
194
+ def tool_blast_radius(
195
+ graph: nx.MultiDiGraph,
196
+ qualname: str,
197
+ depth: int = 2,
198
+ ) -> dict[str, Any]:
199
+ """Compute blast radius for *qualname*."""
200
+ from codegraph.analysis.blast_radius import blast_radius
201
+
202
+ target = _resolve_node(graph, qualname)
203
+ node_id = target if target is not None else qualname
204
+ result = blast_radius(graph, node_id, depth=depth)
205
+ return {
206
+ "target": result.target,
207
+ "size": result.size,
208
+ "nodes": result.nodes,
209
+ "files": sorted(result.files),
210
+ "test_nodes": result.test_nodes,
211
+ }
212
+
213
+
214
+ def tool_subgraph(
215
+ graph: nx.MultiDiGraph,
216
+ qualnames: list[str],
217
+ depth: int = 1,
218
+ ) -> dict[str, Any]:
219
+ """Induced subgraph expanded *depth* hops outward over CALLS+IMPORTS+INHERITS."""
220
+ from codegraph.graph.schema import EdgeKind
221
+
222
+ allowed_kinds = {
223
+ EdgeKind.CALLS.value,
224
+ EdgeKind.IMPORTS.value,
225
+ EdgeKind.INHERITS.value,
226
+ }
227
+
228
+ seeds: set[str] = set()
229
+ for qn in qualnames:
230
+ nid = _resolve_node(graph, qn)
231
+ if nid is not None:
232
+ seeds.add(nid)
233
+
234
+ visited: set[str] = set()
235
+ frontier = set(seeds)
236
+
237
+ for _ in range(depth):
238
+ next_frontier: set[str] = set()
239
+ for node in frontier:
240
+ if node not in graph:
241
+ continue
242
+ for _src, dst, key in graph.out_edges(node, keys=True):
243
+ if key in allowed_kinds and dst not in visited:
244
+ next_frontier.add(dst)
245
+ for src, _dst, key in graph.in_edges(node, keys=True):
246
+ if key in allowed_kinds and src not in visited:
247
+ next_frontier.add(src)
248
+ visited.update(frontier)
249
+ frontier = next_frontier - visited
250
+
251
+ visited.update(frontier)
252
+ sub = graph.subgraph(visited)
253
+
254
+ nodes_out: list[dict[str, Any]] = []
255
+ for nid in sub.nodes():
256
+ attrs = sub.nodes[nid]
257
+ nodes_out.append(
258
+ {
259
+ "id": nid,
260
+ "qualname": str(attrs.get("qualname") or nid),
261
+ "kind": str(attrs.get("kind") or ""),
262
+ "file": str(attrs.get("file") or ""),
263
+ }
264
+ )
265
+
266
+ edges_out: list[dict[str, Any]] = []
267
+ for src, dst, key in sub.edges(keys=True):
268
+ edges_out.append({"src": src, "dst": dst, "kind": key})
269
+
270
+ return {"nodes": nodes_out, "edges": edges_out}
271
+
272
+
273
+ def tool_dead_code(
274
+ graph: nx.MultiDiGraph,
275
+ limit: int = 50,
276
+ ) -> list[dict[str, Any]]:
277
+ """Return dead code candidates."""
278
+ from codegraph.analysis.dead_code import find_dead_code
279
+
280
+ dead = find_dead_code(graph)
281
+ return [
282
+ {
283
+ "qualname": d.qualname,
284
+ "kind": d.kind,
285
+ "file": d.file,
286
+ "line": d.line_start,
287
+ "reason": d.reason,
288
+ }
289
+ for d in dead[:limit]
290
+ ]
291
+
292
+
293
+ def tool_cycles(graph: nx.MultiDiGraph) -> dict[str, Any]:
294
+ """Return import and call cycles."""
295
+ from codegraph.analysis.cycles import find_cycles
296
+
297
+ report = find_cycles(graph)
298
+ return {
299
+ "import_cycles": [
300
+ {"node_ids": c.node_ids, "qualnames": c.qualnames}
301
+ for c in report.import_cycles
302
+ ],
303
+ "call_cycles": [
304
+ {"node_ids": c.node_ids, "qualnames": c.qualnames}
305
+ for c in report.call_cycles
306
+ ],
307
+ "total": report.total,
308
+ }
309
+
310
+
311
+ def tool_untested(
312
+ graph: nx.MultiDiGraph,
313
+ limit: int = 50,
314
+ ) -> list[dict[str, Any]]:
315
+ """Return untested functions/methods."""
316
+ from codegraph.analysis.untested import find_untested
317
+
318
+ items = find_untested(graph)
319
+ return [
320
+ {
321
+ "qualname": u.qualname,
322
+ "kind": u.kind,
323
+ "file": u.file,
324
+ "line": u.line_start,
325
+ "incoming_calls": u.incoming_calls,
326
+ }
327
+ for u in items[:limit]
328
+ ]
329
+
330
+
331
+ def tool_hotspots(
332
+ graph: nx.MultiDiGraph,
333
+ limit: int = 20,
334
+ ) -> list[dict[str, Any]]:
335
+ """Return hotspot callables ranked by fan-in/fan-out/LOC."""
336
+ from codegraph.analysis.hotspots import find_hotspots
337
+
338
+ spots = find_hotspots(graph, limit=limit)
339
+ return [
340
+ {
341
+ "qualname": h.qualname,
342
+ "kind": h.kind,
343
+ "file": h.file,
344
+ "fan_in": h.fan_in,
345
+ "fan_out": h.fan_out,
346
+ "loc": h.loc,
347
+ "score": h.score,
348
+ }
349
+ for h in spots
350
+ ]
351
+
352
+
353
+ def tool_metrics(graph: nx.MultiDiGraph) -> dict[str, Any]:
354
+ """Return aggregate graph metrics."""
355
+ from codegraph.analysis.metrics import compute_metrics
356
+
357
+ m = compute_metrics(graph)
358
+ return {
359
+ "total_nodes": m.total_nodes,
360
+ "total_edges": m.total_edges,
361
+ "nodes_by_kind": m.nodes_by_kind,
362
+ "edges_by_kind": m.edges_by_kind,
363
+ "languages": m.languages,
364
+ "top_files_by_nodes": m.top_files_by_nodes,
365
+ "unresolved_edges": m.unresolved_edges,
366
+ }
367
+
368
+
369
+ # ---------------------------------------------------------------------------
370
+ # Helper
371
+ # ---------------------------------------------------------------------------
372
+
373
+ def _resolve_node(graph: nx.MultiDiGraph, qualname: str) -> str | None:
374
+ """Return the node ID for *qualname*, or None if not found.
375
+
376
+ Tries exact match first, then substring on ``qualname`` attribute.
377
+ """
378
+ if qualname in graph:
379
+ return qualname
380
+ q = qualname.lower()
381
+ for nid, attrs in graph.nodes(data=True):
382
+ if str(attrs.get("qualname") or "").lower() == q:
383
+ return str(nid)
384
+ return None
385
+
386
+
387
+ # ---------------------------------------------------------------------------
388
+ # Tool registry — used for discovery and tests
389
+ # ---------------------------------------------------------------------------
390
+
391
+ _HandlerFn = Callable[["nx.MultiDiGraph", "dict[str, Any]"], Any]
392
+
393
+ #: Mapping of tool-name → (handler, input_schema_dict)
394
+ tool_registry: dict[str, tuple[_HandlerFn, dict[str, Any]]] = {}
395
+
396
+
397
+ def _register(
398
+ name: str, schema: dict[str, Any]
399
+ ) -> Callable[[_HandlerFn], _HandlerFn]:
400
+ # pragma: codegraph-public-api
401
+ def decorator(fn: _HandlerFn) -> _HandlerFn:
402
+ tool_registry[name] = (fn, schema)
403
+ return fn
404
+ return decorator
405
+
406
+
407
+ @_register(
408
+ "find_symbol",
409
+ {
410
+ "type": "object",
411
+ "properties": {
412
+ "query": {"type": "string", "description": "Substring to match in qualname"},
413
+ "kind": {"type": "string", "description": "Filter by node kind"},
414
+ "limit": {"type": "integer", "default": 20},
415
+ "role": {
416
+ "type": "string",
417
+ "enum": ["HANDLER", "SERVICE", "COMPONENT", "REPO"],
418
+ "description": "Filter by classified role (DF1.5)",
419
+ },
420
+ },
421
+ "required": ["query"],
422
+ },
423
+ )
424
+ def _handle_find_symbol(
425
+ graph: nx.MultiDiGraph, args: dict[str, Any]
426
+ ) -> Any:
427
+ role_arg = args.get("role")
428
+ return tool_find_symbol(
429
+ graph,
430
+ query=str(args["query"]),
431
+ kind=args.get("kind"),
432
+ limit=int(args.get("limit", 20)),
433
+ role=str(role_arg) if role_arg is not None else None,
434
+ )
435
+
436
+
437
+ @_register(
438
+ "callers",
439
+ {
440
+ "type": "object",
441
+ "properties": {
442
+ "qualname": {"type": "string"},
443
+ "depth": {"type": "integer", "default": 1},
444
+ },
445
+ "required": ["qualname"],
446
+ },
447
+ )
448
+ def _handle_callers(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
449
+ return tool_callers(graph, qualname=str(args["qualname"]), depth=int(args.get("depth", 1)))
450
+
451
+
452
+ @_register(
453
+ "callees",
454
+ {
455
+ "type": "object",
456
+ "properties": {
457
+ "qualname": {"type": "string"},
458
+ "depth": {"type": "integer", "default": 1},
459
+ },
460
+ "required": ["qualname"],
461
+ },
462
+ )
463
+ def _handle_callees(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
464
+ return tool_callees(graph, qualname=str(args["qualname"]), depth=int(args.get("depth", 1)))
465
+
466
+
467
+ @_register(
468
+ "blast_radius",
469
+ {
470
+ "type": "object",
471
+ "properties": {
472
+ "qualname": {"type": "string"},
473
+ "depth": {"type": "integer", "default": 2},
474
+ },
475
+ "required": ["qualname"],
476
+ },
477
+ )
478
+ def _handle_blast_radius(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
479
+ return tool_blast_radius(
480
+ graph, qualname=str(args["qualname"]), depth=int(args.get("depth", 2))
481
+ )
482
+
483
+
484
+ @_register(
485
+ "subgraph",
486
+ {
487
+ "type": "object",
488
+ "properties": {
489
+ "qualnames": {"type": "array", "items": {"type": "string"}},
490
+ "depth": {"type": "integer", "default": 1},
491
+ },
492
+ "required": ["qualnames"],
493
+ },
494
+ )
495
+ def _handle_subgraph(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
496
+ return tool_subgraph(
497
+ graph,
498
+ qualnames=[str(q) for q in args["qualnames"]],
499
+ depth=int(args.get("depth", 1)),
500
+ )
501
+
502
+
503
+ @_register(
504
+ "dead_code",
505
+ {
506
+ "type": "object",
507
+ "properties": {
508
+ "limit": {"type": "integer", "default": 50},
509
+ },
510
+ },
511
+ )
512
+ def _handle_dead_code(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
513
+ return tool_dead_code(graph, limit=int(args.get("limit", 50)))
514
+
515
+
516
+ @_register(
517
+ "cycles",
518
+ {"type": "object", "properties": {}},
519
+ )
520
+ def _handle_cycles(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
521
+ return tool_cycles(graph)
522
+
523
+
524
+ @_register(
525
+ "untested",
526
+ {
527
+ "type": "object",
528
+ "properties": {
529
+ "limit": {"type": "integer", "default": 50},
530
+ },
531
+ },
532
+ )
533
+ def _handle_untested(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
534
+ return tool_untested(graph, limit=int(args.get("limit", 50)))
535
+
536
+
537
+ @_register(
538
+ "hotspots",
539
+ {
540
+ "type": "object",
541
+ "properties": {
542
+ "limit": {"type": "integer", "default": 20},
543
+ },
544
+ },
545
+ )
546
+ def _handle_hotspots(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
547
+ return tool_hotspots(graph, limit=int(args.get("limit", 20)))
548
+
549
+
550
+ @_register(
551
+ "metrics",
552
+ {"type": "object", "properties": {}},
553
+ )
554
+ def _handle_metrics(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
555
+ return tool_metrics(graph)
556
+
557
+
558
+ def tool_dataflow_routes(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
559
+ """List HTTP routes captured by DF1 (FastAPI / Flask).
560
+
561
+ Each entry: ``{handler_qn, method, path, framework}``. Sorted by
562
+ ``(path, method)``. Returns ``[]`` when the graph carries no ROUTE
563
+ edges (e.g. the repo has no Python web framework usage).
564
+ """
565
+ from codegraph.viz.hld import serialize_route_edges
566
+ return serialize_route_edges(graph)
567
+
568
+
569
+ @_register(
570
+ "dataflow_routes",
571
+ {"type": "object", "properties": {}},
572
+ )
573
+ def _handle_dataflow_routes(
574
+ graph: nx.MultiDiGraph, args: dict[str, Any]
575
+ ) -> Any:
576
+ return tool_dataflow_routes(graph)
577
+
578
+
579
+ # ---------------------------------------------------------------------------
580
+ # v0.3 — semantic + hybrid search
581
+ # ---------------------------------------------------------------------------
582
+
583
+
584
+ def tool_semantic_search(
585
+ graph: nx.MultiDiGraph,
586
+ query: str,
587
+ k: int = 5,
588
+ *,
589
+ repo_root: Path | None = None,
590
+ ) -> Any:
591
+ """Pure cosine-similarity search against the embeddings index."""
592
+ try:
593
+ from codegraph.embed.query import IndexMissingError, semantic_query
594
+ except ImportError as exc:
595
+ return {
596
+ "error": "embeddings module unavailable",
597
+ "detail": str(exc),
598
+ }
599
+ try:
600
+ hits = semantic_query(query, k=k, repo_root=repo_root)
601
+ except IndexMissingError as exc:
602
+ return {"error": str(exc)}
603
+ except Exception as exc: # pragma: no cover
604
+ return {"error": f"semantic search failed: {exc}"}
605
+ return [hit.as_dict() for hit in hits]
606
+
607
+
608
+ def tool_hybrid_search(
609
+ graph: nx.MultiDiGraph,
610
+ query: str,
611
+ k: int = 5,
612
+ *,
613
+ role: str | None = None,
614
+ focus_qualname: str | None = None,
615
+ repo_root: Path | None = None,
616
+ ) -> Any:
617
+ """Semantic search reranked by graph distance from a focus node."""
618
+ try:
619
+ from codegraph.embed.query import IndexMissingError, hybrid_query
620
+ except ImportError as exc:
621
+ return {
622
+ "error": "embeddings module unavailable",
623
+ "detail": str(exc),
624
+ }
625
+ try:
626
+ hits = hybrid_query(
627
+ query,
628
+ k=k,
629
+ role=role,
630
+ focus_qn=focus_qualname,
631
+ repo_root=repo_root,
632
+ graph=graph,
633
+ )
634
+ except IndexMissingError as exc:
635
+ return {"error": str(exc)}
636
+ except Exception as exc: # pragma: no cover
637
+ return {"error": f"hybrid search failed: {exc}"}
638
+ return [hit.as_dict(score_field="final_score") for hit in hits]
639
+
640
+
641
+ @_register(
642
+ "semantic_search",
643
+ {
644
+ "type": "object",
645
+ "properties": {
646
+ "query": {"type": "string", "description": "Free-text query"},
647
+ "k": {"type": "integer", "default": 5},
648
+ },
649
+ "required": ["query"],
650
+ },
651
+ )
652
+ def _handle_semantic_search(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
653
+ return tool_semantic_search(
654
+ graph,
655
+ query=str(args["query"]),
656
+ k=int(args.get("k", 5)),
657
+ )
658
+
659
+
660
+ @_register(
661
+ "hybrid_search",
662
+ {
663
+ "type": "object",
664
+ "properties": {
665
+ "query": {"type": "string", "description": "Free-text query"},
666
+ "k": {"type": "integer", "default": 5},
667
+ "role": {
668
+ "type": "string",
669
+ "enum": ["HANDLER", "SERVICE", "COMPONENT", "REPO"],
670
+ "description": "Filter to symbols with this role",
671
+ },
672
+ "focus_qualname": {
673
+ "type": "string",
674
+ "description": "Rerank hits by graph distance from this symbol",
675
+ },
676
+ },
677
+ "required": ["query"],
678
+ },
679
+ )
680
+ def _handle_hybrid_search(graph: nx.MultiDiGraph, args: dict[str, Any]) -> Any:
681
+ role = args.get("role")
682
+ focus = args.get("focus_qualname")
683
+ return tool_hybrid_search(
684
+ graph,
685
+ query=str(args["query"]),
686
+ k=int(args.get("k", 5)),
687
+ role=str(role) if role is not None else None,
688
+ focus_qualname=str(focus) if focus is not None else None,
689
+ )
690
+
691
+
692
+ @_register(
693
+ "dataflow_fetches",
694
+ {
695
+ "type": "object",
696
+ "properties": {
697
+ "library": {
698
+ "type": "string",
699
+ "description": (
700
+ "Optional filter on library: "
701
+ "fetch | axios | swr | tanstack | apiclient"
702
+ ),
703
+ },
704
+ "limit": {"type": "integer", "default": 200},
705
+ },
706
+ },
707
+ )
708
+ def _handle_dataflow_fetches(
709
+ graph: nx.MultiDiGraph, args: dict[str, Any]
710
+ ) -> Any:
711
+ """Surface DF2 FETCH_CALL edges as a flat list (caller_qn, method, url, …)."""
712
+ from codegraph.viz.hld import serialize_fetch_edges
713
+ fetches = serialize_fetch_edges(graph)
714
+ library = args.get("library")
715
+ if isinstance(library, str) and library:
716
+ fetches = [f for f in fetches if f.get("library") == library]
717
+ limit = int(args.get("limit", 200))
718
+ return {"fetches": fetches[:limit], "total": len(fetches)}
719
+
720
+
721
+ def tool_dataflow_trace(
722
+ graph: nx.MultiDiGraph, entry: str, depth: int = 6
723
+ ) -> dict[str, Any]:
724
+ """Trace a data flow from an entry through the call graph + cross-layer edges."""
725
+ from codegraph.analysis.dataflow import trace as _trace
726
+
727
+ flow = _trace(graph, entry, max_depth=depth)
728
+ if flow is None:
729
+ return {"error": f"entry not found in graph: {entry}"}
730
+ return flow.to_dict()
731
+
732
+
733
+ @_register(
734
+ "dataflow_trace",
735
+ {
736
+ "type": "object",
737
+ "properties": {
738
+ "entry": {
739
+ "type": "string",
740
+ "description": (
741
+ "Function qualname (e.g. 'app.handlers.get_user') or "
742
+ "fetch shape ('GET /api/users/{id}')."
743
+ ),
744
+ },
745
+ "depth": {"type": "integer", "default": 6},
746
+ },
747
+ "required": ["entry"],
748
+ },
749
+ )
750
+ def _handle_dataflow_trace(
751
+ graph: nx.MultiDiGraph, args: dict[str, Any]
752
+ ) -> Any:
753
+ return tool_dataflow_trace(
754
+ graph,
755
+ entry=str(args["entry"]),
756
+ depth=int(args.get("depth", 6)),
757
+ )
758
+
759
+
760
+ # ---------------------------------------------------------------------------
761
+ # Workspace tools — multi-repo, ignore the single-repo `graph` parameter.
762
+ # Each handler loads its own graphs from the user-level workspace config.
763
+ # ---------------------------------------------------------------------------
764
+
765
+
766
+ @_register(
767
+ "workspace_state",
768
+ {
769
+ "type": "object",
770
+ "properties": {},
771
+ "required": [],
772
+ },
773
+ )
774
+ def _handle_workspace_state(
775
+ graph: nx.MultiDiGraph, args: dict[str, Any]
776
+ ) -> Any:
777
+ from codegraph.workspace.config import load_workspace, resolve_workspace_path
778
+ from codegraph.workspace.operations import workspace_state
779
+
780
+ cfg = load_workspace(resolve_workspace_path())
781
+ return workspace_state(cfg)
782
+
783
+
784
+ @_register(
785
+ "workspace_diff_since",
786
+ {
787
+ "type": "object",
788
+ "properties": {
789
+ "ref": {
790
+ "type": "string",
791
+ "default": "main",
792
+ "description": "Git ref to diff against (default: main).",
793
+ },
794
+ },
795
+ "required": [],
796
+ },
797
+ )
798
+ def _handle_workspace_diff_since(
799
+ graph: nx.MultiDiGraph, args: dict[str, Any]
800
+ ) -> Any:
801
+ from codegraph.workspace.config import load_workspace, resolve_workspace_path
802
+ from codegraph.workspace.operations import workspace_diff_since
803
+
804
+ cfg = load_workspace(resolve_workspace_path())
805
+ ref = str(args.get("ref") or "main")
806
+ return workspace_diff_since(cfg, ref=ref)
807
+
808
+
809
+ @_register(
810
+ "workspace_blast_radius",
811
+ {
812
+ "type": "object",
813
+ "properties": {
814
+ "symbol": {
815
+ "type": "string",
816
+ "description": "Qualname (or unambiguous substring) to look up across all repos.",
817
+ },
818
+ "depth": {
819
+ "type": "integer",
820
+ "description": "Optional max-hops; omit for full transitive radius.",
821
+ },
822
+ },
823
+ "required": ["symbol"],
824
+ },
825
+ )
826
+ def _handle_workspace_blast_radius(
827
+ graph: nx.MultiDiGraph, args: dict[str, Any]
828
+ ) -> Any:
829
+ from codegraph.workspace.config import load_workspace, resolve_workspace_path
830
+ from codegraph.workspace.operations import workspace_blast_radius
831
+
832
+ cfg = load_workspace(resolve_workspace_path())
833
+ symbol = str(args["symbol"])
834
+ depth_raw = args.get("depth")
835
+ depth = int(depth_raw) if depth_raw is not None else None
836
+ return workspace_blast_radius(cfg, symbol=symbol, depth=depth)
837
+
838
+
839
+ # ---------------------------------------------------------------------------
840
+ # MCP Server
841
+ # ---------------------------------------------------------------------------
842
+
843
+ def _build_server(name: str) -> Any: # returns mcp.server.Server
844
+ from mcp.server import Server
845
+ from mcp.types import TextContent, Tool
846
+
847
+ server: Any = Server(name)
848
+
849
+ @server.list_tools() # type: ignore[untyped-decorator]
850
+ async def _list_tools() -> list[Tool]:
851
+ return [
852
+ Tool(name=tool_name, description=_tool_description(tool_name), inputSchema=schema)
853
+ for tool_name, (_fn, schema) in tool_registry.items()
854
+ ]
855
+
856
+ @server.call_tool(validate_input=False) # type: ignore[untyped-decorator]
857
+ async def _call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
858
+ if name not in tool_registry:
859
+ raise ValueError(f"Unknown tool: {name}")
860
+ handler_fn, _ = tool_registry[name]
861
+ # Workspace tools self-load their per-repo graphs from the user-level
862
+ # workspace config and don't need the cwd's graph. Skipping _load_graph
863
+ # avoids materializing an empty .codegraph/graph.db in the server's cwd
864
+ # for users who run `codegraph mcp serve` from arbitrary directories.
865
+ if name.startswith("workspace_"):
866
+ graph: nx.MultiDiGraph | None = None
867
+ else:
868
+ graph = _load_graph(None)
869
+ result = handler_fn(graph, arguments)
870
+ return [TextContent(type="text", text=json.dumps(result, indent=2))]
871
+
872
+ return server
873
+
874
+
875
+ def _tool_description(name: str) -> str:
876
+ descriptions = {
877
+ "find_symbol": "Search for symbols by qualname substring",
878
+ "callers": "Find callers of a symbol (reverse BFS)",
879
+ "callees": "Find callees of a symbol (forward BFS)",
880
+ "blast_radius": "Compute blast radius for a symbol",
881
+ "subgraph": "Extract induced subgraph around symbols",
882
+ "dead_code": "List unreferenced (dead) code",
883
+ "cycles": "Detect import and call cycles",
884
+ "untested": "List untested functions/methods",
885
+ "hotspots": "List hotspot callables by fan-in/out/LOC",
886
+ "metrics": "Return aggregate graph metrics",
887
+ "semantic_search": "Free-text semantic search over the embeddings index",
888
+ "hybrid_search": "Semantic search reranked by graph distance + role filter",
889
+ "dataflow_routes": "List HTTP routes (DF1: FastAPI/Flask handlers)",
890
+ "dataflow_fetches": (
891
+ "List frontend FETCH_CALL edges (caller, method, url, library, body_keys)"
892
+ ),
893
+ "dataflow_trace": (
894
+ "Trace a data flow from entry through call graph + cross-layer edges (DF4)"
895
+ ),
896
+ "workspace_state": (
897
+ "Report git + graph state for every repo registered in the user's "
898
+ "codegraph workspace (~/.codegraph/workspace.yml)"
899
+ ),
900
+ "workspace_diff_since": (
901
+ "List files changed across every workspace repo since a given git ref "
902
+ "(default: main)"
903
+ ),
904
+ "workspace_blast_radius": (
905
+ "Compute blast radius for a symbol across every workspace repo "
906
+ "(unions the per-repo results)"
907
+ ),
908
+ }
909
+ return descriptions.get(name, name)
910
+
911
+
912
+ async def _serve(db_path: Path | None, server_name: str) -> None:
913
+ import contextlib
914
+
915
+ from mcp.server.stdio import stdio_server
916
+
917
+ # Pre-warm the graph if db exists
918
+ if db_path is not None or (Path.cwd() / ".codegraph" / "graph.db").exists():
919
+ with contextlib.suppress(Exception):
920
+ _load_graph(db_path)
921
+
922
+ server = _build_server(server_name)
923
+ async with stdio_server() as (read_stream, write_stream):
924
+ await server.run(
925
+ read_stream,
926
+ write_stream,
927
+ server.create_initialization_options(),
928
+ )
929
+
930
+
931
+ def run(db_path: Path | None = None, server_name: str = "codegraph") -> None:
932
+ """Synchronous entry point called from the CLI."""
933
+ asyncio.run(_serve(db_path, server_name))