code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. code_review_graph/__init__.py +20 -0
  2. code_review_graph/__main__.py +4 -0
  3. code_review_graph/analysis.py +410 -0
  4. code_review_graph/changes.py +409 -0
  5. code_review_graph/cli.py +1255 -0
  6. code_review_graph/communities.py +874 -0
  7. code_review_graph/constants.py +23 -0
  8. code_review_graph/context_savings.py +317 -0
  9. code_review_graph/custom_languages.py +322 -0
  10. code_review_graph/daemon.py +1009 -0
  11. code_review_graph/daemon_cli.py +320 -0
  12. code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
  13. code_review_graph/embeddings.py +1006 -0
  14. code_review_graph/enrich.py +303 -0
  15. code_review_graph/eval/__init__.py +33 -0
  16. code_review_graph/eval/benchmarks/__init__.py +1 -0
  17. code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
  18. code_review_graph/eval/benchmarks/build_performance.py +60 -0
  19. code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
  20. code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
  21. code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
  22. code_review_graph/eval/benchmarks/search_quality.py +59 -0
  23. code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
  24. code_review_graph/eval/configs/code-review-graph.yaml +50 -0
  25. code_review_graph/eval/configs/express.yaml +45 -0
  26. code_review_graph/eval/configs/fastapi.yaml +48 -0
  27. code_review_graph/eval/configs/flask.yaml +50 -0
  28. code_review_graph/eval/configs/gin.yaml +51 -0
  29. code_review_graph/eval/configs/httpx.yaml +48 -0
  30. code_review_graph/eval/reporter.py +301 -0
  31. code_review_graph/eval/runner.py +211 -0
  32. code_review_graph/eval/scorer.py +85 -0
  33. code_review_graph/eval/token_benchmark.py +182 -0
  34. code_review_graph/exports.py +409 -0
  35. code_review_graph/flows.py +698 -0
  36. code_review_graph/graph.py +1427 -0
  37. code_review_graph/graph_diff.py +122 -0
  38. code_review_graph/hints.py +384 -0
  39. code_review_graph/incremental.py +1245 -0
  40. code_review_graph/jedi_resolver.py +303 -0
  41. code_review_graph/main.py +1079 -0
  42. code_review_graph/memory.py +142 -0
  43. code_review_graph/migrations.py +284 -0
  44. code_review_graph/parser.py +6957 -0
  45. code_review_graph/postprocessing.py +134 -0
  46. code_review_graph/prompts.py +159 -0
  47. code_review_graph/refactor.py +852 -0
  48. code_review_graph/registry.py +319 -0
  49. code_review_graph/rescript_resolver.py +206 -0
  50. code_review_graph/search.py +447 -0
  51. code_review_graph/skills.py +1481 -0
  52. code_review_graph/spring_resolver.py +200 -0
  53. code_review_graph/temporal_resolver.py +199 -0
  54. code_review_graph/token_benchmark.py +125 -0
  55. code_review_graph/tools/__init__.py +156 -0
  56. code_review_graph/tools/_common.py +176 -0
  57. code_review_graph/tools/analysis_tools.py +184 -0
  58. code_review_graph/tools/build.py +541 -0
  59. code_review_graph/tools/community_tools.py +246 -0
  60. code_review_graph/tools/context.py +152 -0
  61. code_review_graph/tools/docs.py +274 -0
  62. code_review_graph/tools/flows_tools.py +176 -0
  63. code_review_graph/tools/query.py +692 -0
  64. code_review_graph/tools/refactor_tools.py +168 -0
  65. code_review_graph/tools/registry_tools.py +125 -0
  66. code_review_graph/tools/review.py +477 -0
  67. code_review_graph/tsconfig_resolver.py +257 -0
  68. code_review_graph/visualization.py +2184 -0
  69. code_review_graph/wiki.py +305 -0
  70. code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
  71. code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
  72. code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
  73. code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
  74. code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1079 @@
1
+ """MCP server entry point for Code Review Graph.
2
+
3
+ Run as: code-review-graph serve
4
+ Communicates via stdio (standard MCP transport), or use
5
+ ``code-review-graph serve --http`` for Streamable HTTP on localhost (port 5555
6
+ by default).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import logging
13
+ import os
14
+ import sys
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ from fastmcp import FastMCP
19
+
20
+ from .graph import GraphStore
21
+ from .incremental import find_project_root, get_db_path, start_watch_thread
22
+ from .prompts import (
23
+ architecture_map_prompt,
24
+ debug_issue_prompt,
25
+ onboard_developer_prompt,
26
+ pre_merge_check_prompt,
27
+ review_changes_prompt,
28
+ )
29
+ from .tools import (
30
+ apply_refactor_func,
31
+ build_or_update_graph,
32
+ cross_repo_search_func,
33
+ detect_changes_func,
34
+ embed_graph,
35
+ find_large_functions,
36
+ generate_wiki_func,
37
+ get_affected_flows_func,
38
+ get_architecture_overview_func,
39
+ get_bridge_nodes_func,
40
+ get_community_func,
41
+ get_docs_section,
42
+ get_flow,
43
+ get_hub_nodes_func,
44
+ get_impact_radius,
45
+ get_knowledge_gaps_func,
46
+ get_minimal_context,
47
+ get_review_context,
48
+ get_suggested_questions_func,
49
+ get_surprising_connections_func,
50
+ get_wiki_page_func,
51
+ list_communities_func,
52
+ list_flows,
53
+ list_graph_stats,
54
+ list_repos_func,
55
+ query_graph,
56
+ refactor_func,
57
+ run_postprocess,
58
+ semantic_search_nodes,
59
+ traverse_graph_func,
60
+ )
61
+
62
+ logger = logging.getLogger(__name__)
63
+
64
+ # NOTE: Thread-safe for stdio MCP (single-threaded). If adding HTTP/SSE
65
+ # transport with concurrent requests, replace with contextvars.ContextVar.
66
+ _default_repo_root: str | None = None
67
+
68
+
69
+ def _resolve_repo_root(repo_root: Optional[str]) -> Optional[str]:
70
+ """Resolve repo_root for a tool call.
71
+
72
+ Order of precedence:
73
+ 1. Explicit ``repo_root`` passed by the MCP client (highest).
74
+ 2. ``--repo`` CLI flag passed to ``code-review-graph serve``
75
+ (captured in ``_default_repo_root``).
76
+ 3. None — the underlying impl will fall back to the server's cwd.
77
+
78
+ All MCP tools that accept ``repo_root`` should use this helper so
79
+ ``serve --repo <X>`` applies consistently, including
80
+ ``get_docs_section_tool``. See: #222.
81
+ """
82
+ return repo_root if repo_root else _default_repo_root
83
+
84
+
85
+ mcp = FastMCP(
86
+ "code-review-graph",
87
+ instructions=(
88
+ "Persistent incremental knowledge graph for token-efficient, "
89
+ "context-aware code reviews. Parses your codebase with Tree-sitter, "
90
+ "builds a structural graph, and provides smart impact analysis."
91
+ ),
92
+ )
93
+
94
+
95
+ @mcp.tool()
96
+ async def build_or_update_graph_tool(
97
+ full_rebuild: bool = False,
98
+ repo_root: Optional[str] = None,
99
+ base: str = "HEAD~1",
100
+ postprocess: str = "full",
101
+ recurse_submodules: Optional[bool] = None,
102
+ ) -> dict:
103
+ """Build or incrementally update the code knowledge graph.
104
+
105
+ Call this first to initialize the graph, or after making changes.
106
+ By default performs an incremental update (only changed files).
107
+ Set full_rebuild=True to re-parse every file.
108
+
109
+ Runs the blocking full_build / incremental_update work in a thread
110
+ via ``asyncio.to_thread`` so the stdio event loop stays responsive.
111
+ Without this wrapper, long builds deadlocked on Windows because
112
+ ``ProcessPoolExecutor`` (used by parallel parsing) interacted badly
113
+ with the sync handler blocking the only event-loop thread. See:
114
+ #46, #136.
115
+
116
+ Args:
117
+ full_rebuild: If True, re-parse all files. Default: False (incremental).
118
+ repo_root: Repository root path. Auto-detected from current directory if omitted.
119
+ base: Git ref to diff against for incremental updates. Default: HEAD~1.
120
+ postprocess: Post-processing level: "full" (default), "minimal" (signatures+FTS only),
121
+ or "none" (skip all post-processing). Use "minimal" for faster builds.
122
+ recurse_submodules: If True, include files from git submodules.
123
+ When None (default), falls back to CRG_RECURSE_SUBMODULES env var.
124
+ """
125
+ return await asyncio.to_thread(
126
+ build_or_update_graph,
127
+ full_rebuild=full_rebuild,
128
+ repo_root=_resolve_repo_root(repo_root),
129
+ base=base,
130
+ postprocess=postprocess,
131
+ recurse_submodules=recurse_submodules,
132
+ )
133
+
134
+
135
+ @mcp.tool()
136
+ async def run_postprocess_tool(
137
+ flows: bool = True,
138
+ communities: bool = True,
139
+ fts: bool = True,
140
+ repo_root: Optional[str] = None,
141
+ ) -> dict:
142
+ """Run post-processing on existing graph (flows, communities, FTS index).
143
+
144
+ Use after building with postprocess="none" or "minimal", or to re-run
145
+ expensive steps independently. Signatures are always computed.
146
+
147
+ Offloaded to a thread via ``asyncio.to_thread`` so community
148
+ detection on large graphs doesn't block the MCP event loop. See:
149
+ #46, #136.
150
+
151
+ Args:
152
+ flows: Run flow detection. Default: True.
153
+ communities: Run community detection. Default: True.
154
+ fts: Rebuild FTS index. Default: True.
155
+ repo_root: Repository root path. Auto-detected if omitted.
156
+ """
157
+ return await asyncio.to_thread(
158
+ run_postprocess,
159
+ flows=flows, communities=communities, fts=fts,
160
+ repo_root=_resolve_repo_root(repo_root),
161
+ )
162
+
163
+
164
+ @mcp.tool()
165
+ def get_minimal_context_tool(
166
+ task: str = "",
167
+ changed_files: Optional[list[str]] = None,
168
+ repo_root: Optional[str] = None,
169
+ base: str = "HEAD~1",
170
+ ) -> dict:
171
+ """Get ultra-compact context for any task (~100 tokens). Always call this first.
172
+
173
+ Returns graph stats, risk score, top communities/flows, and suggested
174
+ next tools in a single compact response. Use this as the entry point
175
+ before any other graph tool to minimize token usage.
176
+
177
+ Args:
178
+ task: What you are doing (e.g. "review PR #42", "debug login timeout").
179
+ changed_files: Explicit list of changed files. Auto-detected if omitted.
180
+ repo_root: Repository root path. Auto-detected if omitted.
181
+ base: Git ref for diff comparison. Default: HEAD~1.
182
+ """
183
+ return get_minimal_context(
184
+ task=task, changed_files=changed_files,
185
+ repo_root=_resolve_repo_root(repo_root), base=base,
186
+ )
187
+
188
+
189
+ @mcp.tool()
190
+ def get_impact_radius_tool(
191
+ changed_files: Optional[list[str]] = None,
192
+ max_depth: int = 2,
193
+ repo_root: Optional[str] = None,
194
+ base: str = "HEAD~1",
195
+ detail_level: str = "standard",
196
+ ) -> dict:
197
+ """Analyze the blast radius of changed files in the codebase.
198
+
199
+ Shows which functions, classes, and files are impacted by changes.
200
+ Auto-detects changed files from git if not specified.
201
+
202
+ Args:
203
+ changed_files: List of changed file paths (relative to repo root). Auto-detected if omitted.
204
+ max_depth: Number of hops to traverse in the dependency graph. Default: 2.
205
+ repo_root: Repository root path. Auto-detected if omitted.
206
+ base: Git ref for auto-detecting changes. Default: HEAD~1.
207
+ detail_level: "standard" for full output, "minimal" for compact summary. Default: standard.
208
+ """
209
+ return get_impact_radius(
210
+ changed_files=changed_files, max_depth=max_depth,
211
+ repo_root=_resolve_repo_root(repo_root), base=base, detail_level=detail_level,
212
+ )
213
+
214
+
215
+ @mcp.tool()
216
+ def query_graph_tool(
217
+ pattern: str,
218
+ target: str,
219
+ repo_root: Optional[str] = None,
220
+ detail_level: str = "standard",
221
+ ) -> dict:
222
+ """Run a predefined graph query to explore code relationships.
223
+
224
+ Available patterns:
225
+ - callers_of: Find functions that call the target
226
+ - callees_of: Find functions called by the target
227
+ - imports_of: Find what the target imports
228
+ - importers_of: Find files that import the target
229
+ - children_of: Find nodes contained in a file or class
230
+ - tests_for: Find tests for the target
231
+ - inheritors_of: Find classes inheriting from the target
232
+ - file_summary: Get all nodes in a file
233
+
234
+ Args:
235
+ pattern: Query pattern name (see above).
236
+ target: Node name, qualified name, or file path to query.
237
+ repo_root: Repository root path. Auto-detected if omitted.
238
+ detail_level: "standard" for full output, "minimal" for compact summary. Default: standard.
239
+ """
240
+ return query_graph(
241
+ pattern=pattern, target=target, repo_root=_resolve_repo_root(repo_root),
242
+ detail_level=detail_level,
243
+ )
244
+
245
+
246
+ @mcp.tool()
247
+ def get_review_context_tool(
248
+ changed_files: Optional[list[str]] = None,
249
+ max_depth: int = 2,
250
+ include_source: bool = True,
251
+ max_lines_per_file: int = 200,
252
+ repo_root: Optional[str] = None,
253
+ base: str = "HEAD~1",
254
+ detail_level: str = "standard",
255
+ ) -> dict:
256
+ """Generate a focused, token-efficient review context for code changes.
257
+
258
+ Combines impact analysis with source snippets and review guidance.
259
+ Use this for comprehensive code reviews.
260
+
261
+ Args:
262
+ changed_files: Files to review. Auto-detected from git diff if omitted.
263
+ max_depth: Impact radius depth. Default: 2.
264
+ include_source: Include source code snippets. Default: True.
265
+ max_lines_per_file: Max source lines per file. Default: 200.
266
+ repo_root: Repository root path. Auto-detected if omitted.
267
+ base: Git ref for change detection. Default: HEAD~1.
268
+ detail_level: "standard" for full output, "minimal" for
269
+ token-efficient summary. Default: standard.
270
+ """
271
+ return get_review_context(
272
+ changed_files=changed_files, max_depth=max_depth,
273
+ include_source=include_source, max_lines_per_file=max_lines_per_file,
274
+ repo_root=_resolve_repo_root(repo_root), base=base, detail_level=detail_level,
275
+ )
276
+
277
+
278
+ @mcp.tool()
279
+ def semantic_search_nodes_tool(
280
+ query: str,
281
+ kind: Optional[str] = None,
282
+ limit: int = 20,
283
+ repo_root: Optional[str] = None,
284
+ model: Optional[str] = None,
285
+ provider: Optional[str] = None,
286
+ detail_level: str = "standard",
287
+ ) -> dict:
288
+ """Search for code entities by name, keyword, or semantic similarity.
289
+
290
+ Uses vector embeddings for semantic search when available (run embed_graph_tool
291
+ first, with a provider of your choice: "local" needs sentence-transformers,
292
+ "openai" / "google" / "minimax" need their respective env vars). Falls back
293
+ to FTS5 / keyword matching when no matching embeddings exist for the given
294
+ provider.
295
+
296
+ Args:
297
+ query: Search string to match against node names.
298
+ kind: Optional filter: File, Class, Function, Type, or Test.
299
+ limit: Maximum results. Default: 20.
300
+ repo_root: Repository root path. Auto-detected if omitted.
301
+ model: Embedding model for query vectors. Must match the model used
302
+ during embed_graph. Falls back to CRG_EMBEDDING_MODEL env var
303
+ (local) or CRG_OPENAI_MODEL (openai).
304
+ provider: Embedding provider: "local" (default), "openai", "google",
305
+ or "minimax". Must match the provider used during embed_graph.
306
+ detail_level: "standard" for full output, "minimal" for compact summary. Default: standard.
307
+ """
308
+ return semantic_search_nodes(
309
+ query=query, kind=kind, limit=limit, repo_root=_resolve_repo_root(repo_root),
310
+ model=model, provider=provider, detail_level=detail_level,
311
+ )
312
+
313
+
314
+ @mcp.tool()
315
+ async def embed_graph_tool(
316
+ repo_root: Optional[str] = None,
317
+ model: Optional[str] = None,
318
+ provider: Optional[str] = None,
319
+ ) -> dict:
320
+ """Compute vector embeddings for all graph nodes to enable semantic search.
321
+
322
+ Requires: pip install code-review-graph[embeddings] (local provider only;
323
+ cloud providers use stdlib urllib).
324
+ Default provider: local. Default model: all-MiniLM-L6-v2.
325
+ Override provider via `provider` param, model via `model` param or
326
+ CRG_EMBEDDING_MODEL / CRG_OPENAI_MODEL env vars.
327
+ Changing the model or provider re-embeds all nodes automatically.
328
+
329
+ After running this, semantic_search_nodes_tool will use vector similarity
330
+ instead of keyword matching for much better results.
331
+
332
+ Runs the blocking sentence-transformers / Gemini / HTTP inference in a
333
+ thread via ``asyncio.to_thread`` so the stdio event loop stays
334
+ responsive — without this wrapper, embedding a large graph would
335
+ silently hang the MCP server on Windows. See: #46, #136.
336
+
337
+ Args:
338
+ repo_root: Repository root path. Auto-detected if omitted.
339
+ model: Embedding model. For local: HuggingFace ID/path; for openai:
340
+ model ID (e.g. "text-embedding-3-small"); for google: Gemini
341
+ model ID. Falls back to CRG_EMBEDDING_MODEL / CRG_OPENAI_MODEL
342
+ env vars as appropriate.
343
+ provider: "local" (default), "openai", "google", or "minimax".
344
+ "openai" requires CRG_OPENAI_BASE_URL + CRG_OPENAI_API_KEY +
345
+ CRG_OPENAI_MODEL env vars and accepts any OpenAI-compatible
346
+ endpoint (real OpenAI, Azure, new-api, LiteLLM, vLLM, etc.).
347
+ """
348
+ return await asyncio.to_thread(
349
+ embed_graph,
350
+ repo_root=_resolve_repo_root(repo_root),
351
+ model=model,
352
+ provider=provider,
353
+ )
354
+
355
+
356
+ @mcp.tool()
357
+ def list_graph_stats_tool(
358
+ repo_root: Optional[str] = None,
359
+ ) -> dict:
360
+ """Get aggregate statistics about the code knowledge graph.
361
+
362
+ Shows total nodes, edges, languages, files, and last update time.
363
+ Useful for checking if the graph is built and up to date.
364
+
365
+ Args:
366
+ repo_root: Repository root path. Auto-detected if omitted.
367
+ """
368
+ return list_graph_stats(repo_root=_resolve_repo_root(repo_root))
369
+
370
+
371
+ @mcp.tool()
372
+ def get_docs_section_tool(
373
+ section_name: str,
374
+ repo_root: Optional[str] = None,
375
+ ) -> dict:
376
+ """Get a specific section from the LLM-optimized documentation reference.
377
+
378
+ Returns only the requested section content for minimal token usage.
379
+ Use this before answering any user question about the plugin.
380
+
381
+ Available sections: usage, review-delta, review-pr, commands, legal,
382
+ watch, embeddings, languages, troubleshooting.
383
+
384
+ Args:
385
+ section_name: The section to retrieve (e.g. "review-delta", "usage").
386
+ repo_root: Repository root path. Auto-detected if omitted.
387
+ """
388
+ return get_docs_section(
389
+ section_name=section_name,
390
+ repo_root=_resolve_repo_root(repo_root),
391
+ )
392
+
393
+
394
+ @mcp.tool()
395
+ def find_large_functions_tool(
396
+ min_lines: int = 50,
397
+ kind: Optional[str] = None,
398
+ file_path_pattern: Optional[str] = None,
399
+ limit: int = 50,
400
+ repo_root: Optional[str] = None,
401
+ ) -> dict:
402
+ """Find functions, classes, or files exceeding a line-count threshold.
403
+
404
+ Useful for decomposition audits, code quality checks, and enforcing
405
+ size limits during code review. Results are ordered by line count.
406
+
407
+ Args:
408
+ min_lines: Minimum line count to flag. Default: 50.
409
+ kind: Optional filter: Function, Class, File, or Test.
410
+ file_path_pattern: Filter by file path substring (e.g. "components/").
411
+ limit: Maximum results. Default: 50.
412
+ repo_root: Repository root path. Auto-detected if omitted.
413
+ """
414
+ return find_large_functions(
415
+ min_lines=min_lines, kind=kind, file_path_pattern=file_path_pattern,
416
+ limit=limit, repo_root=_resolve_repo_root(repo_root),
417
+ )
418
+
419
+
420
+ @mcp.tool()
421
+ def list_flows_tool(
422
+ sort_by: str = "criticality",
423
+ limit: int = 50,
424
+ kind: Optional[str] = None,
425
+ detail_level: str = "standard",
426
+ repo_root: Optional[str] = None,
427
+ ) -> dict:
428
+ """List execution flows in the codebase, sorted by criticality.
429
+
430
+ Each flow represents a call chain starting from an entry point
431
+ (HTTP handler, CLI command, test function, etc.). Use this to
432
+ understand the main execution paths through the codebase.
433
+
434
+ Args:
435
+ sort_by: Sort column: criticality, depth, node_count, file_count, or name.
436
+ limit: Maximum flows to return. Default: 50.
437
+ kind: Optional filter by entry point kind (e.g. "Test", "Function").
438
+ detail_level: "standard" (default) returns full flow data; "minimal"
439
+ returns only name, criticality, and node_count per flow.
440
+ repo_root: Repository root path. Auto-detected if omitted.
441
+ """
442
+ return list_flows(
443
+ repo_root=_resolve_repo_root(repo_root), sort_by=sort_by, limit=limit, kind=kind,
444
+ detail_level=detail_level,
445
+ )
446
+
447
+
448
+ @mcp.tool()
449
+ def get_flow_tool(
450
+ flow_id: Optional[int] = None,
451
+ flow_name: Optional[str] = None,
452
+ include_source: bool = False,
453
+ repo_root: Optional[str] = None,
454
+ ) -> dict:
455
+ """Get detailed information about a single execution flow.
456
+
457
+ Returns the full call path with each step's function name, file, and
458
+ line numbers. Optionally includes source code snippets for each step.
459
+
460
+ Provide either flow_id (from list_flows_tool) or flow_name to search by name.
461
+
462
+ Args:
463
+ flow_id: Database ID of the flow.
464
+ flow_name: Name to search for (partial match). Ignored if flow_id given.
465
+ include_source: Include source code snippets for each step. Default: False.
466
+ repo_root: Repository root path. Auto-detected if omitted.
467
+ """
468
+ return get_flow(
469
+ flow_id=flow_id, flow_name=flow_name,
470
+ include_source=include_source, repo_root=_resolve_repo_root(repo_root),
471
+ )
472
+
473
+
474
+ @mcp.tool()
475
+ def get_affected_flows_tool(
476
+ changed_files: Optional[list[str]] = None,
477
+ base: str = "HEAD~1",
478
+ repo_root: Optional[str] = None,
479
+ ) -> dict:
480
+ """Find execution flows affected by changed files.
481
+
482
+ Identifies which execution flows pass through nodes in the changed files.
483
+ Useful during code review to understand which user-facing or critical paths
484
+ are impacted by a change. Auto-detects changed files from git if not specified.
485
+
486
+ Args:
487
+ changed_files: List of changed file paths (relative to repo root). Auto-detected if omitted.
488
+ base: Git ref for auto-detecting changes. Default: HEAD~1.
489
+ repo_root: Repository root path. Auto-detected if omitted.
490
+ """
491
+ return get_affected_flows_func(
492
+ changed_files=changed_files, base=base, repo_root=_resolve_repo_root(repo_root),
493
+ )
494
+
495
+
496
+ @mcp.tool()
497
+ def list_communities_tool(
498
+ sort_by: str = "size",
499
+ min_size: int = 0,
500
+ detail_level: str = "standard",
501
+ repo_root: Optional[str] = None,
502
+ ) -> dict:
503
+ """List detected code communities in the codebase.
504
+
505
+ Each community represents a cluster of related code entities (functions,
506
+ classes) detected via the Leiden algorithm or file-based grouping.
507
+ Use this to understand the high-level structure of the codebase.
508
+
509
+ Args:
510
+ sort_by: Sort column: size, cohesion, or name.
511
+ min_size: Minimum community size to include. Default: 0.
512
+ detail_level: "standard" (default) returns full community data;
513
+ "minimal" returns only name, size, and cohesion
514
+ per community.
515
+ repo_root: Repository root path. Auto-detected if omitted.
516
+ """
517
+ return list_communities_func(
518
+ repo_root=_resolve_repo_root(repo_root), sort_by=sort_by, min_size=min_size,
519
+ detail_level=detail_level,
520
+ )
521
+
522
+
523
+ @mcp.tool()
524
+ def get_community_tool(
525
+ community_name: Optional[str] = None,
526
+ community_id: Optional[int] = None,
527
+ include_members: bool = False,
528
+ repo_root: Optional[str] = None,
529
+ ) -> dict:
530
+ """Get detailed information about a single code community.
531
+
532
+ Returns community metadata including size, cohesion, dominant language,
533
+ and member list. Optionally includes full node details for each member.
534
+
535
+ Provide either community_id (from list_communities_tool) or community_name
536
+ to search by name.
537
+
538
+ Args:
539
+ community_name: Name to search for (partial match). Ignored if community_id given.
540
+ community_id: Database ID of the community.
541
+ include_members: Include full member node details. Default: False.
542
+ repo_root: Repository root path. Auto-detected if omitted.
543
+ """
544
+ return get_community_func(
545
+ community_name=community_name, community_id=community_id,
546
+ include_members=include_members, repo_root=_resolve_repo_root(repo_root),
547
+ )
548
+
549
+
550
+ @mcp.tool()
551
+ def get_architecture_overview_tool(
552
+ repo_root: Optional[str] = None,
553
+ detail_level: str = "minimal",
554
+ ) -> dict:
555
+ """Generate an architecture overview based on community structure.
556
+
557
+ Builds a high-level view of the codebase architecture by analyzing
558
+ community boundaries and cross-community coupling. Includes warnings
559
+ for high coupling between communities.
560
+
561
+ Args:
562
+ repo_root: Repository root path. Auto-detected if omitted.
563
+ detail_level: "minimal" (default) drops community member lists
564
+ and aggregates cross-community edges to one row per
565
+ community pair (typical reduction: 600KB -> <5KB);
566
+ "standard" returns full per-edge detail.
567
+ """
568
+ return get_architecture_overview_func(
569
+ repo_root=_resolve_repo_root(repo_root),
570
+ detail_level=detail_level,
571
+ )
572
+
573
+
574
+ @mcp.tool()
575
+ async def detect_changes_tool(
576
+ base: str = "HEAD~1",
577
+ changed_files: Optional[list[str]] = None,
578
+ include_source: bool = False,
579
+ max_depth: int = 2,
580
+ repo_root: Optional[str] = None,
581
+ detail_level: str = "standard",
582
+ ) -> dict:
583
+ """Detect changes and produce risk-scored, priority-ordered review guidance.
584
+
585
+ Primary tool for code review. Maps git diffs to affected functions,
586
+ flows, communities, and test coverage gaps. Returns risk scores and
587
+ prioritized review items. Replaces get_review_context for change-aware reviews.
588
+
589
+ Offloaded to a thread via ``asyncio.to_thread`` — runs `git diff`
590
+ subprocesses and BFS traversals that can take several seconds on
591
+ large repos. See: #46, #136.
592
+
593
+ Args:
594
+ base: Git ref to diff against. Default: HEAD~1.
595
+ changed_files: List of changed file paths (relative to repo root). Auto-detected if omitted.
596
+ include_source: Include source code snippets for changed functions. Default: False.
597
+ max_depth: Impact radius depth for BFS traversal. Default: 2.
598
+ repo_root: Repository root path. Auto-detected if omitted.
599
+ detail_level: "standard" for full output, "minimal" for
600
+ token-efficient summary. Default: standard.
601
+ """
602
+ coro = asyncio.to_thread(
603
+ detect_changes_func,
604
+ base=base, changed_files=changed_files,
605
+ include_source=include_source, max_depth=max_depth,
606
+ repo_root=_resolve_repo_root(repo_root), detail_level=detail_level,
607
+ )
608
+ tool_timeout = int(os.environ.get("CRG_TOOL_TIMEOUT", "0"))
609
+ if tool_timeout > 0:
610
+ try:
611
+ return await asyncio.wait_for(coro, timeout=tool_timeout)
612
+ except asyncio.TimeoutError:
613
+ message = (
614
+ f"detect_changes_tool timed out after {tool_timeout}s. "
615
+ "Reduce scope with CRG_MAX_CHANGED_FUNCS / CRG_MAX_TRANSITIVE_FRONTIER, "
616
+ "or increase CRG_TOOL_TIMEOUT."
617
+ )
618
+ return {
619
+ "status": "error",
620
+ "error": message,
621
+ "summary": message,
622
+ }
623
+ return await coro
624
+
625
+
626
+ @mcp.tool()
627
+ def refactor_tool(
628
+ mode: str = "rename",
629
+ old_name: Optional[str] = None,
630
+ new_name: Optional[str] = None,
631
+ kind: Optional[str] = None,
632
+ file_pattern: Optional[str] = None,
633
+ repo_root: Optional[str] = None,
634
+ ) -> dict:
635
+ """Graph-powered refactoring operations.
636
+
637
+ Unified entry point for rename previews, dead code detection, and
638
+ refactoring suggestions.
639
+
640
+ Modes:
641
+ - rename: Preview renaming a symbol. Returns an edit list and a refactor_id
642
+ to pass to apply_refactor_tool. Requires old_name and new_name.
643
+ - dead_code: Find unreferenced functions/classes (no callers, tests, or
644
+ importers, and not entry points).
645
+ - suggest: Get community-driven refactoring suggestions (move misplaced
646
+ functions, remove dead code).
647
+
648
+ Args:
649
+ mode: Operation mode: "rename", "dead_code", or "suggest".
650
+ old_name: (rename) Current symbol name to rename.
651
+ new_name: (rename) Desired new name for the symbol.
652
+ kind: (dead_code) Optional filter: Function or Class.
653
+ file_pattern: (dead_code) Filter by file path substring.
654
+ repo_root: Repository root path. Auto-detected if omitted.
655
+ """
656
+ return refactor_func(
657
+ mode=mode, old_name=old_name, new_name=new_name,
658
+ kind=kind, file_pattern=file_pattern, repo_root=_resolve_repo_root(repo_root),
659
+ )
660
+
661
+
662
+ @mcp.tool()
663
+ def apply_refactor_tool(
664
+ refactor_id: str,
665
+ repo_root: Optional[str] = None,
666
+ dry_run: bool = False,
667
+ ) -> dict:
668
+ """Apply a previously previewed refactoring to source files.
669
+
670
+ Takes a refactor_id from a prior refactor_tool(mode="rename") call and
671
+ applies the exact string replacements to the target files. Previews
672
+ expire after 10 minutes.
673
+
674
+ Security: All edit paths are validated to be within the repo root.
675
+ Only exact string replacements are performed (no regex, no eval).
676
+
677
+ Args:
678
+ refactor_id: The refactor ID from refactor_tool's response.
679
+ repo_root: Repository root path. Auto-detected if omitted.
680
+ dry_run: If True, return a unified diff of what would change
681
+ without touching any files. The refactor_id remains valid so
682
+ the same preview can be applied in a follow-up call without
683
+ dry_run. Use this for a human-in-the-loop review before
684
+ committing changes to disk. See: #176
685
+ """
686
+ return apply_refactor_func(
687
+ refactor_id=refactor_id, repo_root=_resolve_repo_root(repo_root),
688
+ dry_run=dry_run,
689
+ )
690
+
691
+
692
+ @mcp.tool()
693
+ async def generate_wiki_tool(
694
+ repo_root: Optional[str] = None,
695
+ force: bool = False,
696
+ ) -> dict:
697
+ """Generate a markdown wiki from the code community structure.
698
+
699
+ Creates a wiki page for each detected community and an index page.
700
+ Pages are written to .code-review-graph/wiki/ inside the repository.
701
+ Only regenerates pages whose content has changed unless force=True.
702
+
703
+ Offloaded to a thread via ``asyncio.to_thread`` — on large graphs
704
+ the page-generation loop touches every community and issues many
705
+ SQLite reads, which would block the MCP event loop. See: #46, #136.
706
+
707
+ Args:
708
+ repo_root: Repository root path. Auto-detected if omitted.
709
+ force: If True, regenerate all pages even if content unchanged. Default: False.
710
+ """
711
+ return await asyncio.to_thread(
712
+ generate_wiki_func,
713
+ repo_root=_resolve_repo_root(repo_root),
714
+ force=force,
715
+ )
716
+
717
+
718
+ @mcp.tool()
719
+ def get_wiki_page_tool(
720
+ community_name: str,
721
+ repo_root: Optional[str] = None,
722
+ ) -> dict:
723
+ """Retrieve a specific wiki page by community name.
724
+
725
+ Returns the markdown content of the wiki page for the given community.
726
+ The wiki must have been generated first via generate_wiki_tool.
727
+
728
+ Args:
729
+ community_name: Community name to look up.
730
+ repo_root: Repository root path. Auto-detected if omitted.
731
+ """
732
+ return get_wiki_page_func(
733
+ community_name=community_name, repo_root=_resolve_repo_root(repo_root),
734
+ )
735
+
736
+
737
+ @mcp.tool()
738
+ def get_hub_nodes_tool(
739
+ top_n: int = 10,
740
+ repo_root: Optional[str] = None,
741
+ ) -> dict:
742
+ """Find the most connected nodes in the codebase (architectural hotspots).
743
+
744
+ Hub nodes have the highest total degree (in + out edges). Changes to
745
+ them have disproportionate blast radius. Excludes File nodes.
746
+
747
+ Args:
748
+ top_n: Number of top hubs to return. Default: 10.
749
+ repo_root: Repository root path. Auto-detected if omitted.
750
+ """
751
+ return get_hub_nodes_func(
752
+ repo_root=_resolve_repo_root(repo_root), top_n=top_n,
753
+ )
754
+
755
+
756
+ @mcp.tool()
757
+ def get_bridge_nodes_tool(
758
+ top_n: int = 10,
759
+ repo_root: Optional[str] = None,
760
+ ) -> dict:
761
+ """Find architectural chokepoints via betweenness centrality.
762
+
763
+ Bridge nodes sit on shortest paths between many node pairs.
764
+ If they break, multiple code regions lose connectivity.
765
+ Uses sampling approximation for graphs > 5000 nodes.
766
+
767
+ Args:
768
+ top_n: Number of top bridges to return. Default: 10.
769
+ repo_root: Repository root path. Auto-detected if omitted.
770
+ """
771
+ return get_bridge_nodes_func(
772
+ repo_root=_resolve_repo_root(repo_root), top_n=top_n,
773
+ )
774
+
775
+
776
+ @mcp.tool()
777
+ def get_knowledge_gaps_tool(
778
+ repo_root: Optional[str] = None,
779
+ ) -> dict:
780
+ """Identify structural weaknesses in the codebase graph.
781
+
782
+ Finds isolated nodes (disconnected), thin communities (< 3 members),
783
+ untested hotspots (high-degree nodes without test coverage), and
784
+ single-file communities.
785
+
786
+ Args:
787
+ repo_root: Repository root path. Auto-detected if omitted.
788
+ """
789
+ return get_knowledge_gaps_func(
790
+ repo_root=_resolve_repo_root(repo_root),
791
+ )
792
+
793
+
794
+ @mcp.tool()
795
+ def get_surprising_connections_tool(
796
+ top_n: int = 15,
797
+ repo_root: Optional[str] = None,
798
+ ) -> dict:
799
+ """Find unexpected architectural coupling via composite surprise scoring.
800
+
801
+ Scores edges by: cross-community (+0.3), cross-language (+0.2),
802
+ peripheral-to-hub (+0.2), cross-test-boundary (+0.15), and
803
+ unusual edge kinds (+0.15).
804
+
805
+ Args:
806
+ top_n: Number of top surprises to return. Default: 15.
807
+ repo_root: Repository root path. Auto-detected if omitted.
808
+ """
809
+ return get_surprising_connections_func(
810
+ repo_root=_resolve_repo_root(repo_root), top_n=top_n,
811
+ )
812
+
813
+
814
+ @mcp.tool()
815
+ def get_suggested_questions_tool(
816
+ repo_root: Optional[str] = None,
817
+ ) -> dict:
818
+ """Auto-generate review questions from graph analysis.
819
+
820
+ Produces prioritized questions about: bridge nodes needing tests,
821
+ untested hub nodes, surprising cross-community coupling, thin
822
+ communities, and untested hotspots.
823
+
824
+ Args:
825
+ repo_root: Repository root path. Auto-detected if omitted.
826
+ """
827
+ return get_suggested_questions_func(
828
+ repo_root=_resolve_repo_root(repo_root),
829
+ )
830
+
831
+
832
+ @mcp.tool()
833
+ def traverse_graph_tool(
834
+ query: str,
835
+ mode: str = "bfs",
836
+ depth: int = 3,
837
+ token_budget: int = 2000,
838
+ repo_root: Optional[str] = None,
839
+ ) -> dict:
840
+ """BFS/DFS traversal from best-matching node with token budget.
841
+
842
+ Free-form graph exploration: finds the node best matching your
843
+ query, then traverses outward via BFS or DFS up to the given
844
+ depth, collecting connected nodes within the token budget.
845
+
846
+ Args:
847
+ query: Search string to find the starting node.
848
+ mode: Traversal mode: "bfs" (breadth-first) or "dfs"
849
+ (depth-first). Default: bfs.
850
+ depth: Max traversal depth (1-6). Default: 3.
851
+ token_budget: Approximate token limit for results.
852
+ Default: 2000.
853
+ repo_root: Repository root path. Auto-detected if omitted.
854
+ """
855
+ return traverse_graph_func(
856
+ query=query, mode=mode, depth=depth,
857
+ token_budget=token_budget,
858
+ repo_root=_resolve_repo_root(repo_root) or "",
859
+ )
860
+
861
+
862
+ @mcp.tool()
863
+ def list_repos_tool() -> dict:
864
+ """List all registered repositories in the multi-repo registry.
865
+
866
+ Returns the list of repos registered at ~/.code-review-graph/registry.json.
867
+ Use the CLI 'register' command to add repos.
868
+ """
869
+ return list_repos_func()
870
+
871
+
872
+ @mcp.tool()
873
+ def cross_repo_search_tool(
874
+ query: str,
875
+ kind: Optional[str] = None,
876
+ limit: int = 20,
877
+ ) -> dict:
878
+ """Search for code entities across all registered repositories.
879
+
880
+ Runs hybrid search on each registered repo's graph database and merges
881
+ the results by score. Register repos first with the CLI 'register' command.
882
+
883
+ Args:
884
+ query: Search string to match against node names.
885
+ kind: Optional filter: File, Class, Function, Type, or Test.
886
+ limit: Maximum results per repo. Default: 20.
887
+ """
888
+ return cross_repo_search_func(query=query, kind=kind, limit=limit)
889
+
890
+
891
+ @mcp.prompt()
892
+ def review_changes(base: str = "HEAD~1") -> list[dict]:
893
+ """Pre-commit review workflow using detect_changes, affected_flows, and test gaps.
894
+
895
+ Produces a structured code review with risk levels and actionable findings.
896
+
897
+ Args:
898
+ base: Git ref to diff against. Default: HEAD~1.
899
+ """
900
+ return review_changes_prompt(base=base)
901
+
902
+
903
+ @mcp.prompt()
904
+ def architecture_map() -> list[dict]:
905
+ """Architecture documentation using communities, flows, and Mermaid diagrams.
906
+
907
+ Generates a comprehensive architecture map with module summaries and coupling warnings.
908
+ """
909
+ return architecture_map_prompt()
910
+
911
+
912
+ @mcp.prompt()
913
+ def debug_issue(description: str = "") -> list[dict]:
914
+ """Guided debugging using search, flow tracing, and recent changes.
915
+
916
+ Systematic debugging workflow that traces execution paths and identifies root causes.
917
+
918
+ Args:
919
+ description: Description of the issue to debug.
920
+ """
921
+ return debug_issue_prompt(description=description)
922
+
923
+
924
+ @mcp.prompt()
925
+ def onboard_developer() -> list[dict]:
926
+ """New developer orientation using stats, architecture, and critical flows.
927
+
928
+ Creates an onboarding guide covering codebase structure, key modules, and patterns.
929
+ """
930
+ return onboard_developer_prompt()
931
+
932
+
933
+ @mcp.prompt()
934
+ def pre_merge_check(base: str = "HEAD~1") -> list[dict]:
935
+ """PR readiness check with risk scoring, test gaps, and dead code detection.
936
+
937
+ Produces a merge readiness report with risk assessment and recommendations.
938
+
939
+ Args:
940
+ base: Git ref to diff against. Default: HEAD~1.
941
+ """
942
+ return pre_merge_check_prompt(base=base)
943
+
944
+
945
+ def _apply_tool_filter(tools: str | None = None) -> None:
946
+ """Remove tools not listed in the allow-list.
947
+
948
+ Accepts a comma-separated string of tool names to keep. When set,
949
+ every registered MCP tool whose name is **not** in the list is
950
+ removed via ``FastMCP.remove_tool()``.
951
+
952
+ The allow-list can be supplied in two ways (first match wins):
953
+
954
+ 1. ``tools`` argument (from ``serve --tools ...``).
955
+ 2. ``CRG_TOOLS`` environment variable.
956
+
957
+ When neither is set, all tools remain available.
958
+
959
+ This is useful for token-constrained environments: CRG exposes 28+
960
+ tools by default (~8k description tokens per LLM turn). Filtering
961
+ to a working set of 5-10 tools can reduce overhead by 70-85%.
962
+
963
+ Example::
964
+
965
+ # via CLI
966
+ code-review-graph serve --tools query_graph_tool,semantic_search_nodes_tool
967
+
968
+ # via env var
969
+ CRG_TOOLS=query_graph_tool,semantic_search_nodes_tool
970
+ """
971
+ import asyncio
972
+ import os
973
+
974
+ raw = tools or os.environ.get("CRG_TOOLS")
975
+ if not raw:
976
+ return
977
+ allowed = {t.strip() for t in raw.split(",") if t.strip()}
978
+ if not allowed:
979
+ return
980
+ # FastMCP >=3 exposes tool enumeration via the async ``list_tools``
981
+ # method. ``_apply_tool_filter`` is typically called from
982
+ # ``main()`` before the MCP event loop starts, but tests may invoke
983
+ # it from within a running event loop — in that case ``asyncio.run``
984
+ # raises ``RuntimeError``. Fall back to running the coroutine on a
985
+ # dedicated short-lived loop in a worker thread. Earlier code path
986
+ # relied on ``mcp._tool_manager._tools`` which is a private
987
+ # attribute that was removed in fastmcp>=3.0.
988
+ def _list_tool_names() -> list[str]:
989
+ coro_factory = mcp.list_tools
990
+ try:
991
+ asyncio.get_running_loop()
992
+ except RuntimeError:
993
+ return [t.name for t in asyncio.run(coro_factory())]
994
+ import concurrent.futures
995
+
996
+ def _runner() -> list[str]:
997
+ return [t.name for t in asyncio.run(coro_factory())]
998
+
999
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
1000
+ return pool.submit(_runner).result()
1001
+
1002
+ for name in _list_tool_names():
1003
+ if name not in allowed:
1004
+ mcp.local_provider.remove_tool(name)
1005
+
1006
+
1007
+
1008
+ def main(
1009
+ repo_root: str | None = None,
1010
+ tools: str | None = None,
1011
+ auto_watch: bool = False,
1012
+ *,
1013
+ transport: str = "stdio",
1014
+ host: str | None = None,
1015
+ port: int | None = None,
1016
+ ) -> None:
1017
+ """Run the MCP server (stdio or HTTP).
1018
+
1019
+ On Windows, Python 3.8+ defaults to ``ProactorEventLoop``, which
1020
+ interacts poorly with ``concurrent.futures.ProcessPoolExecutor``
1021
+ (used by ``full_build``) over a stdio MCP transport — the combination
1022
+ produces silent hangs on ``build_or_update_graph_tool`` and
1023
+ ``embed_graph_tool``. Switching to ``WindowsSelectorEventLoopPolicy``
1024
+ before fastmcp starts its loop avoids the deadlock.
1025
+ See: #46, #136
1026
+
1027
+ Args:
1028
+ repo_root: Default repository root for all tool calls.
1029
+ tools: Comma-separated list of tool names to expose.
1030
+ Falls back to ``CRG_TOOLS`` env var. When unset, all
1031
+ tools are available.
1032
+ auto_watch: Start filesystem watcher in a background daemon thread
1033
+ while the MCP server runs.
1034
+ transport: ``"stdio"`` (default) or ``"streamable-http"`` for local HTTP.
1035
+ host: Bind address when using HTTP (required for HTTP; set by CLI).
1036
+ port: Port when using HTTP (required for HTTP; set by CLI).
1037
+ """
1038
+ global _default_repo_root
1039
+ root = Path(repo_root) if repo_root else find_project_root()
1040
+ _default_repo_root = str(root)
1041
+ _apply_tool_filter(tools)
1042
+
1043
+ watch_store: GraphStore | None = None
1044
+ if auto_watch:
1045
+ watch_store = GraphStore(get_db_path(root))
1046
+ thread = start_watch_thread(root, watch_store, daemon=True)
1047
+ if thread is None:
1048
+ logger.warning("Auto-watch was requested but could not be started")
1049
+
1050
+ if sys.platform == "win32":
1051
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
1052
+ # Pre-warm sentence-transformers on the main thread before fastmcp's
1053
+ # event loop starts. Lazy-loading ``torch`` + tokenizers inside an
1054
+ # executor worker thread deadlocks ``semantic_search_nodes_tool`` on
1055
+ # Windows stdio MCP (DLL init / OpenMP thread-pool registration grabs
1056
+ # locks the loop needs). #385 added ``asyncio.to_thread`` to peer
1057
+ # tools but cannot fix this case — the dangerous initialization has
1058
+ # to happen on the main thread before any worker thread is spawned.
1059
+ from .embeddings import prewarm_local_embeddings
1060
+ prewarm_local_embeddings()
1061
+
1062
+ try:
1063
+ if transport == "stdio":
1064
+ # Stdio MCP must keep stdout strictly JSON-RPC. FastMCP's banner/update
1065
+ # notices corrupt the handshake stream on clients like Codex CLI.
1066
+ mcp.run(transport="stdio", show_banner=False)
1067
+ elif transport == "streamable-http":
1068
+ if host is None or port is None:
1069
+ raise ValueError("streamable-http transport requires host and port")
1070
+ mcp.run(transport="streamable-http", host=host, port=port)
1071
+ else:
1072
+ raise ValueError(f"unsupported transport: {transport!r}")
1073
+ finally:
1074
+ if watch_store is not None:
1075
+ watch_store.close()
1076
+
1077
+
1078
+ if __name__ == "__main__":
1079
+ main()