codevira 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. codevira-1.6.0.dist-info/LICENSE +21 -0
  2. codevira-1.6.0.dist-info/METADATA +477 -0
  3. codevira-1.6.0.dist-info/RECORD +58 -0
  4. codevira-1.6.0.dist-info/WHEEL +5 -0
  5. codevira-1.6.0.dist-info/entry_points.txt +2 -0
  6. codevira-1.6.0.dist-info/top_level.txt +2 -0
  7. indexer/__init__.py +1 -0
  8. indexer/chunker.py +428 -0
  9. indexer/global_db.py +197 -0
  10. indexer/graph_generator.py +380 -0
  11. indexer/index_codebase.py +588 -0
  12. indexer/outcome_tracker.py +172 -0
  13. indexer/rule_learner.py +186 -0
  14. indexer/sqlite_graph.py +640 -0
  15. indexer/treesitter_parser.py +423 -0
  16. mcp_server/__init__.py +1 -0
  17. mcp_server/__main__.py +20 -0
  18. mcp_server/auto_init.py +257 -0
  19. mcp_server/cli.py +622 -0
  20. mcp_server/crash_logger.py +236 -0
  21. mcp_server/data/__init__.py +1 -0
  22. mcp_server/data/agents/builder.md +84 -0
  23. mcp_server/data/agents/developer.md +111 -0
  24. mcp_server/data/agents/documenter.md +138 -0
  25. mcp_server/data/agents/orchestrator.md +96 -0
  26. mcp_server/data/agents/planner.md +106 -0
  27. mcp_server/data/agents/reviewer.md +82 -0
  28. mcp_server/data/agents/tester.md +83 -0
  29. mcp_server/data/config.example.yaml +33 -0
  30. mcp_server/data/rules/coding-standards.md +48 -0
  31. mcp_server/data/rules/engineering-excellence.md +28 -0
  32. mcp_server/data/rules/git-cicd-governance.md +32 -0
  33. mcp_server/data/rules/git_commits.md +130 -0
  34. mcp_server/data/rules/incremental-updates.md +5 -0
  35. mcp_server/data/rules/master_rule.md +187 -0
  36. mcp_server/data/rules/multi-language.md +19 -0
  37. mcp_server/data/rules/persistence.md +21 -0
  38. mcp_server/data/rules/resilience-observability.md +17 -0
  39. mcp_server/data/rules/smoke-testing.md +48 -0
  40. mcp_server/data/rules/testing-standards.md +23 -0
  41. mcp_server/detect.py +284 -0
  42. mcp_server/gitignore.py +284 -0
  43. mcp_server/global_sync.py +187 -0
  44. mcp_server/http_server.py +341 -0
  45. mcp_server/ide_inject.py +444 -0
  46. mcp_server/launchd.py +156 -0
  47. mcp_server/migrate.py +215 -0
  48. mcp_server/paths.py +256 -0
  49. mcp_server/prompts.py +136 -0
  50. mcp_server/server.py +1049 -0
  51. mcp_server/tools/__init__.py +0 -0
  52. mcp_server/tools/changesets.py +223 -0
  53. mcp_server/tools/code_reader.py +335 -0
  54. mcp_server/tools/graph.py +637 -0
  55. mcp_server/tools/learning.py +238 -0
  56. mcp_server/tools/playbook.py +89 -0
  57. mcp_server/tools/roadmap.py +599 -0
  58. mcp_server/tools/search.py +145 -0
@@ -0,0 +1,637 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import subprocess
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from mcp_server.paths import get_data_dir, get_project_root
10
+ from indexer.sqlite_graph import SQLiteGraph
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ def _graph_dir() -> Path:
15
+ return get_data_dir() / "graph"
16
+
17
+ def _index_dir() -> Path:
18
+ return get_data_dir() / "codeindex"
19
+
20
+ def _get_db() -> SQLiteGraph:
21
+ db_path = _graph_dir() / "graph.db"
22
+ return SQLiteGraph(db_path)
23
+
24
+ def _last_indexed_file() -> Path:
25
+ return _index_dir() / ".last_indexed"
26
+
27
+ def _get_index_timestamp() -> float | None:
28
+ lif = _last_indexed_file()
29
+ if lif.exists():
30
+ try:
31
+ return float(lif.read_text().strip())
32
+ except ValueError:
33
+ return None
34
+ return None
35
+
36
+ def _get_file_mtime(file_path: str) -> float | None:
37
+ abs_path = get_project_root() / file_path
38
+ if abs_path.exists():
39
+ return abs_path.stat().st_mtime
40
+ return None
41
+
42
+ def _check_staleness(file_path: str) -> dict[str, Any]:
43
+ from datetime import datetime
44
+ index_ts = _get_index_timestamp()
45
+ file_mtime = _get_file_mtime(file_path)
46
+
47
+ stale = False
48
+ reason = "File is tracked and up-to-date with index."
49
+
50
+ if file_mtime is None:
51
+ reason = "File does not exist on disk."
52
+ stale = True
53
+ elif index_ts is None:
54
+ reason = "Project index missing (.last_indexed not found)."
55
+ stale = True
56
+ elif file_mtime > index_ts:
57
+ reason = "File modified AFTER last index build."
58
+ stale = True
59
+
60
+ return {
61
+ "stale": stale,
62
+ "reason": reason,
63
+ "last_indexed": datetime.fromtimestamp(index_ts).isoformat() if index_ts else None,
64
+ "file_mtime": datetime.fromtimestamp(file_mtime).isoformat() if file_mtime else None,
65
+ }
66
+
67
+ def list_nodes(layer: str | None = None, do_not_revert: bool | None = None, stability: str | None = None) -> dict[str, Any]:
68
+ db = _get_db()
69
+ nodes = db.list_file_nodes(layer=layer, stability=stability, do_not_revert=do_not_revert)
70
+ db.close()
71
+
72
+ index_ts = _get_index_timestamp()
73
+ result = []
74
+
75
+ for n in nodes:
76
+ fp = n['file_path']
77
+ stale = None
78
+ if index_ts is not None:
79
+ file_mtime = _get_file_mtime(fp)
80
+ if file_mtime is not None:
81
+ stale = file_mtime > index_ts
82
+
83
+ result.append({
84
+ "file_path": fp,
85
+ "role": n.get('role'),
86
+ "layer": n.get('layer'),
87
+ "stability": n.get('stability'),
88
+ "do_not_revert": bool(n.get('do_not_revert')),
89
+ "stale": stale
90
+ })
91
+
92
+ return {
93
+ "count": len(result),
94
+ "nodes": result,
95
+ "hint": "Use get_node(file_path) to read the rules and dependencies for a specific file."
96
+ }
97
+
98
+ def add_node(file_path: str, role: str, layer: str, stability: str = "medium", node_type: str = "file", key_functions: list[str] | None = None, connects_to: list[dict] | None = None, rules: list[str] | None = None, do_not_revert: bool = False, tests: list[str] | None = None) -> dict[str, str]:
99
+ db = _get_db()
100
+
101
+ node_id = f"file:{file_path}"
102
+
103
+ db.add_node(
104
+ node_id=node_id,
105
+ kind="file",
106
+ name=Path(file_path).name,
107
+ file_path=file_path,
108
+ role=role,
109
+ layer=layer,
110
+ stability=stability,
111
+ type=node_type,
112
+ key_functions=json.dumps(key_functions) if key_functions else None,
113
+ dependencies=json.dumps(connects_to) if connects_to else None,
114
+ rules=json.dumps(rules) if rules else None,
115
+ do_not_revert=do_not_revert
116
+ )
117
+ db.close()
118
+ return {"status": f"Graph node added for '{file_path}'"}
119
+
120
+ def update_node(file_path: str, changes: dict[str, Any]) -> dict[str, str]:
121
+ db = _get_db()
122
+ node = db.get_node_by_path(file_path)
123
+ if not node:
124
+ db.close()
125
+ return {"error": f"Node '{file_path}' not found."}
126
+
127
+ updates = {}
128
+ for key, val in changes.items():
129
+ if key in ["rules", "key_functions"]:
130
+ existing = json.loads(node.get(key) or "[]")
131
+ if isinstance(existing, list) and isinstance(val, list):
132
+ updates[key] = json.dumps(list(set(existing + val)))
133
+ else:
134
+ updates[key] = json.dumps(val)
135
+ elif key == "connects_to":
136
+ existing = json.loads(node.get("dependencies") or "[]")
137
+ if isinstance(existing, list) and isinstance(val, list):
138
+ updates["dependencies"] = json.dumps(val)
139
+ else:
140
+ updates[key] = val
141
+
142
+ db.update_node_metadata(node["id"], **updates)
143
+ db.close()
144
+ return {"status": f"Updated node '{file_path}'"}
145
+
146
+ def get_node(file_path: str) -> dict[str, Any]:
147
+ db = _get_db()
148
+ node = db.get_node_by_path(file_path)
149
+ if not node:
150
+ nodes = db.list_file_nodes()
151
+ matches = [n for n in nodes if file_path in n['file_path']]
152
+ if len(matches) == 1:
153
+ node = matches[0]
154
+ file_path = node['file_path']
155
+
156
+ db.close()
157
+
158
+ if not node:
159
+ # v1.6: Check if auto-init is running
160
+ hint = "Use refresh_graph(['path/to/file']) to auto-generate a graph node for new files."
161
+ try:
162
+ from mcp_server.auto_init import get_init_progress
163
+ prog = get_init_progress()
164
+ if prog["status"] in ("initializing", "indexing"):
165
+ return {
166
+ "found": False,
167
+ "status": "initializing",
168
+ "file_path": file_path,
169
+ "message": "Graph is being built in the background. Try again in a few seconds.",
170
+ "indexing_progress": prog,
171
+ "hint": hint,
172
+ }
173
+ except Exception:
174
+ pass
175
+ return {
176
+ "found": False,
177
+ "message": f"File '{file_path}' not found in the context graph.",
178
+ "hint": hint,
179
+ }
180
+
181
+ staleness = _check_staleness(file_path)
182
+
183
+ res_node = dict(node)
184
+ for k in ["rules", "key_functions", "dependencies"]:
185
+ if res_node.get(k):
186
+ try:
187
+ res_node[k] = json.loads(res_node[k])
188
+ except (json.JSONDecodeError, TypeError, ValueError):
189
+ pass
190
+
191
+ return {
192
+ "found": True,
193
+ "file_path": file_path,
194
+ "node": res_node,
195
+ "index_status": staleness,
196
+ "hint": "Next: call get_signature(file_path) to see all public symbols and line ranges.",
197
+ }
198
+
199
+ def get_impact(file_path: str) -> dict[str, Any]:
200
+ db = _get_db()
201
+
202
+ node = db.get_node_by_path(file_path)
203
+ if not node:
204
+ nodes = db.list_file_nodes()
205
+ matches = [n for n in nodes if file_path in n['file_path']]
206
+ if len(matches) == 1:
207
+ node = matches[0]
208
+
209
+ if not node:
210
+ db.close()
211
+ return {
212
+ "found": False,
213
+ "message": f"No graph node for '{file_path}'. Impact analysis unavailable.",
214
+ }
215
+
216
+ file_path = node['file_path']
217
+ blast_radius = db.get_blast_radius(node['id'], max_depth=3)
218
+ db.close()
219
+
220
+ affected = []
221
+ for r in blast_radius:
222
+ path = r['file_path']
223
+ if not any(a['file'] == path for a in affected) and path != file_path:
224
+ affected.append({
225
+ "file": path,
226
+ "role": r.get('role', 'Unknown'),
227
+ "stability": r.get('stability', 'medium'),
228
+ "do_not_revert": bool(r.get('do_not_revert'))
229
+ })
230
+
231
+ return {
232
+ "found": True,
233
+ "target_file": file_path,
234
+ "blast_radius": len(affected),
235
+ "affected_files": affected,
236
+ }
237
+
238
+ def export_graph(format: str = "mermaid", scope: str | None = None) -> dict[str, Any]:
239
+ """Export the dependency graph as Mermaid or DOT format."""
240
+ db = _get_db()
241
+ try:
242
+ nodes = db.list_file_nodes()
243
+ edges = db.get_all_edges()
244
+
245
+ # Filter by scope if provided
246
+ if scope:
247
+ nodes = [n for n in nodes if n["file_path"].startswith(scope)]
248
+ node_ids = {f"file:{n['file_path']}" for n in nodes}
249
+ edges = [e for e in edges if e["source_id"] in node_ids or e["target_id"] in node_ids]
250
+
251
+ if format == "mermaid":
252
+ output = _to_mermaid(nodes, edges)
253
+ elif format == "dot":
254
+ output = _to_dot(nodes, edges)
255
+ else:
256
+ return {"error": f"Unknown format '{format}'. Use 'mermaid' or 'dot'."}
257
+
258
+ return {
259
+ "format": format,
260
+ "node_count": len(nodes),
261
+ "edge_count": len(edges),
262
+ "output": output,
263
+ }
264
+ finally:
265
+ db.close()
266
+
267
+
268
+ def _to_mermaid(nodes: list[dict], edges: list[dict]) -> str:
269
+ lines = ["graph LR"]
270
+ # Create safe node IDs for Mermaid
271
+ id_map = {}
272
+ for n in nodes:
273
+ safe_id = n["file_path"].replace("/", "_").replace(".", "_").replace("-", "_")
274
+ id_map[f"file:{n['file_path']}"] = safe_id
275
+ label = Path(n["file_path"]).name
276
+ stability = n.get("stability", "medium")
277
+ style = ""
278
+ if stability == "high":
279
+ style = ":::high"
280
+ elif stability == "low":
281
+ style = ":::low"
282
+ lines.append(f" {safe_id}[\"{label}\"]{style}")
283
+
284
+ for e in edges:
285
+ src = id_map.get(e["source_id"])
286
+ tgt = id_map.get(e["target_id"])
287
+ if src and tgt:
288
+ lines.append(f" {src} --> {tgt}")
289
+
290
+ return "\n".join(lines)
291
+
292
+
293
+ def _to_dot(nodes: list[dict], edges: list[dict]) -> str:
294
+ lines = ["digraph codevira {", " rankdir=LR;", " node [shape=box, fontsize=10];"]
295
+ id_map = {}
296
+ for n in nodes:
297
+ safe_id = n["file_path"].replace("/", "_").replace(".", "_").replace("-", "_")
298
+ id_map[f"file:{n['file_path']}"] = safe_id
299
+ label = Path(n["file_path"]).name
300
+ color = {"high": "green", "medium": "yellow", "low": "red"}.get(n.get("stability", "medium"), "white")
301
+ lines.append(f' {safe_id} [label="{label}", fillcolor={color}, style=filled];')
302
+
303
+ for e in edges:
304
+ src = id_map.get(e["source_id"])
305
+ tgt = id_map.get(e["target_id"])
306
+ if src and tgt:
307
+ lines.append(f" {src} -> {tgt};")
308
+
309
+ lines.append("}")
310
+ return "\n".join(lines)
311
+
312
+
313
+ def get_graph_diff(base_ref: str = "main", head_ref: str = "HEAD") -> dict[str, Any]:
314
+ """Show which graph nodes changed between two git refs and their blast radius."""
315
+ root = get_project_root()
316
+
317
+ try:
318
+ diff_output = subprocess.check_output(
319
+ ["git", "-C", str(root), "diff", "--name-only", f"{base_ref}...{head_ref}"],
320
+ stderr=subprocess.DEVNULL,
321
+ ).decode("utf-8").strip()
322
+ except subprocess.CalledProcessError:
323
+ # Fallback for when there's no common ancestor (e.g., same branch)
324
+ try:
325
+ diff_output = subprocess.check_output(
326
+ ["git", "-C", str(root), "diff", "--name-only", base_ref, head_ref],
327
+ stderr=subprocess.DEVNULL,
328
+ ).decode("utf-8").strip()
329
+ except subprocess.CalledProcessError as e:
330
+ return {
331
+ "error": f"Could not compute diff between {base_ref} and {head_ref}",
332
+ "detail": f"git exit code {e.returncode}. Ensure both refs exist.",
333
+ }
334
+
335
+ if not diff_output:
336
+ return {"changed_files": [], "total_blast_radius": 0, "hint": "No files changed."}
337
+
338
+ changed_files = [f for f in diff_output.split("\n") if f.strip()]
339
+
340
+ db = _get_db()
341
+ try:
342
+ result_files = []
343
+ all_affected = set()
344
+
345
+ for fp in changed_files:
346
+ node = db.get_node_by_path(fp)
347
+ if node:
348
+ blast = db.get_blast_radius(node["id"], max_depth=3)
349
+ affected_paths = [r["file_path"] for r in blast if r["file_path"] != fp]
350
+ all_affected.update(affected_paths)
351
+ result_files.append({
352
+ "file_path": fp,
353
+ "in_graph": True,
354
+ "stability": node.get("stability", "medium"),
355
+ "do_not_revert": bool(node.get("do_not_revert")),
356
+ "blast_radius": len(affected_paths),
357
+ "affected": affected_paths[:5], # Top 5 for brevity
358
+ })
359
+ else:
360
+ result_files.append({
361
+ "file_path": fp,
362
+ "in_graph": False,
363
+ "stability": "unknown",
364
+ "do_not_revert": False,
365
+ "blast_radius": 0,
366
+ "affected": [],
367
+ })
368
+
369
+ return {
370
+ "base_ref": base_ref,
371
+ "head_ref": head_ref,
372
+ "changed_files": result_files,
373
+ "total_changed": len(changed_files),
374
+ "total_blast_radius": len(all_affected),
375
+ "union_affected": list(all_affected)[:20], # Top 20
376
+ }
377
+ finally:
378
+ db.close()
379
+
380
+
381
+ def refresh_graph(file_paths: list[str] | None = None) -> dict[str, Any]:
382
+ from indexer.graph_generator import generate_graph_sqlite
383
+ from mcp_server.paths import get_project_root
384
+ from indexer.treesitter_parser import get_language
385
+
386
+ root = get_project_root()
387
+ if not file_paths:
388
+ file_paths = []
389
+ for p in root.rglob("*.*"):
390
+ if get_language(p.suffix) is not None or p.suffix == ".py":
391
+ if "node_modules" not in p.parts and ".venv" not in p.parts:
392
+ file_paths.append(str(p.relative_to(root)))
393
+
394
+ generated = 0
395
+ db_path = str(_graph_dir() / "graph.db")
396
+ # For a list of specific files, we can just call it (though the generator scans all files,
397
+ # it only adds missing ones).
398
+ generate_graph_sqlite(str(root), db_path)
399
+
400
+ return {
401
+ "status": f"Generated graph nodes in SQLite DB.",
402
+ "hint": "Call get_node(file_path) to read the new graph stub."
403
+ }
404
+
405
+
406
+ # ---------------------------------------------------------------------------
407
+ # v1.5: query_graph — callers/callees/tests/dependents
408
+ # ---------------------------------------------------------------------------
409
+
410
+ def query_graph(file_path: str, symbol: str | None = None,
411
+ query_type: str = "callees") -> dict[str, Any]:
412
+ """
413
+ Query the call graph.
414
+ query_type: 'callers' | 'callees' | 'tests' | 'dependents' | 'symbols'
415
+ """
416
+ db = _get_db()
417
+ try:
418
+ if query_type == "symbols":
419
+ # List all symbols in a file
420
+ node_id = f"file:{file_path}"
421
+ symbols = db.get_symbols_for_file(node_id)
422
+ return {
423
+ "file_path": file_path,
424
+ "query_type": "symbols",
425
+ "results": [
426
+ {"name": s["name"], "kind": s["kind"], "signature": s["signature"],
427
+ "start_line": s["start_line"], "end_line": s["end_line"],
428
+ "is_public": bool(s["is_public"])}
429
+ for s in symbols
430
+ ],
431
+ "count": len(symbols),
432
+ }
433
+
434
+ if symbol:
435
+ sym = db.find_symbol(symbol, file_path)
436
+ else:
437
+ return {"error": "symbol is required for callers/callees/tests queries"}
438
+
439
+ if not sym:
440
+ return {"error": f"Symbol '{symbol}' not found in {file_path}",
441
+ "hint": "Call query_graph with query_type='symbols' to list available symbols."}
442
+
443
+ sym_id = sym["id"]
444
+
445
+ if query_type == "callers":
446
+ callers = db.get_callers(sym_id)
447
+ return {
448
+ "file_path": file_path, "symbol": symbol, "query_type": "callers",
449
+ "results": [{"name": c["name"], "kind": c["kind"],
450
+ "file": c["file_node_id"].replace("file:", "")}
451
+ for c in callers],
452
+ "count": len(callers),
453
+ }
454
+
455
+ elif query_type == "callees":
456
+ callees = db.get_callees(sym_id)
457
+ return {
458
+ "file_path": file_path, "symbol": symbol, "query_type": "callees",
459
+ "results": [{"name": c["name"], "kind": c["kind"],
460
+ "file": c["file_node_id"].replace("file:", "")}
461
+ for c in callees],
462
+ "count": len(callees),
463
+ }
464
+
465
+ elif query_type == "tests":
466
+ # Find test files that import or call this file's functions
467
+ node_id = f"file:{file_path}"
468
+ # Check edges: which test files depend on this file?
469
+ edges = db.conn.execute(
470
+ "SELECT source_id FROM edges WHERE target_id = ? AND kind = 'imports'",
471
+ (node_id,),
472
+ ).fetchall()
473
+ test_files = []
474
+ for e in edges:
475
+ src = e["source_id"].replace("file:", "")
476
+ if "test" in src.lower():
477
+ test_files.append(src)
478
+ return {
479
+ "file_path": file_path, "symbol": symbol, "query_type": "tests",
480
+ "test_files": test_files,
481
+ "count": len(test_files),
482
+ }
483
+
484
+ elif query_type == "dependents":
485
+ # Files that depend on the file containing this symbol
486
+ node_id = f"file:{file_path}"
487
+ blast = db.get_blast_radius(node_id, max_depth=2)
488
+ return {
489
+ "file_path": file_path, "symbol": symbol, "query_type": "dependents",
490
+ "results": [{"file": r["file_path"]} for r in blast],
491
+ "count": len(blast),
492
+ }
493
+
494
+ else:
495
+ return {"error": f"Unknown query_type: {query_type}. Use: callers, callees, tests, dependents, symbols"}
496
+ finally:
497
+ db.close()
498
+
499
+
500
+ # ---------------------------------------------------------------------------
501
+ # v1.5: analyze_changes — function-level risk-scored change analysis
502
+ # ---------------------------------------------------------------------------
503
+
504
+ def analyze_changes(base_ref: str = "main", head_ref: str = "HEAD") -> dict[str, Any]:
505
+ """
506
+ Enhanced change analysis with function-level risk scoring.
507
+ Maps git diff to affected functions, callers, and test coverage gaps.
508
+ """
509
+ root = get_project_root()
510
+
511
+ # Get changed files
512
+ try:
513
+ diff_output = subprocess.check_output(
514
+ ["git", "-C", str(root), "diff", "--name-only", f"{base_ref}...{head_ref}"],
515
+ stderr=subprocess.DEVNULL,
516
+ ).decode("utf-8").strip()
517
+ except subprocess.CalledProcessError:
518
+ try:
519
+ diff_output = subprocess.check_output(
520
+ ["git", "-C", str(root), "diff", "--name-only", base_ref, head_ref],
521
+ stderr=subprocess.DEVNULL,
522
+ ).decode("utf-8").strip()
523
+ except subprocess.CalledProcessError as e:
524
+ return {"error": f"Could not compute diff: {e}"}
525
+
526
+ if not diff_output:
527
+ return {"changes": [], "summary": "No changes detected."}
528
+
529
+ changed_files = [f for f in diff_output.split("\n") if f.strip()]
530
+
531
+ db = _get_db()
532
+ try:
533
+ results = []
534
+ total_risk = {"high": 0, "medium": 0, "low": 0}
535
+ test_gaps = []
536
+
537
+ for fp in changed_files:
538
+ node_id = f"file:{fp}"
539
+ symbols = db.get_symbols_for_file(node_id)
540
+
541
+ # Check if any test files cover this file
542
+ test_edges = db.conn.execute(
543
+ "SELECT source_id FROM edges WHERE target_id = ? AND kind = 'imports'",
544
+ (node_id,),
545
+ ).fetchall()
546
+ test_files = [e["source_id"].replace("file:", "") for e in test_edges
547
+ if "test" in e["source_id"].lower()]
548
+ has_tests = len(test_files) > 0
549
+
550
+ for sym in symbols:
551
+ sym_id = sym["id"]
552
+ callers = db.get_callers(sym_id)
553
+ caller_count = len(callers)
554
+
555
+ # Risk scoring
556
+ is_public = bool(sym.get("is_public"))
557
+ if is_public and caller_count >= 3 and not has_tests:
558
+ risk = "high"
559
+ elif is_public and caller_count >= 1:
560
+ risk = "medium"
561
+ else:
562
+ risk = "low"
563
+
564
+ total_risk[risk] += 1
565
+
566
+ if is_public and not has_tests:
567
+ test_gaps.append({"file": fp, "symbol": sym["name"], "callers": caller_count})
568
+
569
+ results.append({
570
+ "file": fp,
571
+ "symbol": sym["name"],
572
+ "kind": sym["kind"],
573
+ "risk": risk,
574
+ "caller_count": caller_count,
575
+ "has_tests": has_tests,
576
+ "callers": [c["name"] for c in callers[:5]],
577
+ })
578
+
579
+ return {
580
+ "base_ref": base_ref,
581
+ "head_ref": head_ref,
582
+ "changed_files": len(changed_files),
583
+ "functions_analyzed": len(results),
584
+ "risk_summary": total_risk,
585
+ "test_gaps": test_gaps[:10],
586
+ "details": results[:30], # Top 30 for token efficiency
587
+ }
588
+ finally:
589
+ db.close()
590
+
591
+
592
+ # ---------------------------------------------------------------------------
593
+ # v1.5: find_hotspots — complexity and risk hotspots
594
+ # ---------------------------------------------------------------------------
595
+
596
+ def find_hotspots(threshold: int = 50) -> dict[str, Any]:
597
+ """
598
+ Find complexity hotspots: large functions, high fan-in, high fan-out,
599
+ and low confidence areas.
600
+ """
601
+ db = _get_db()
602
+ try:
603
+ # Large functions
604
+ large_funcs = db.find_hotspot_functions(min_lines=threshold)
605
+
606
+ # High fan-in (many callers = high risk if changed)
607
+ high_fan_in = db.find_high_fan_in(min_callers=3)
608
+
609
+ # High fan-out (files with many dependencies = fragile)
610
+ fan_out = db.conn.execute('''
611
+ SELECT source_id, COUNT(target_id) as dep_count
612
+ FROM edges
613
+ GROUP BY source_id
614
+ HAVING dep_count >= 5
615
+ ORDER BY dep_count DESC
616
+ LIMIT 10
617
+ ''').fetchall()
618
+
619
+ return {
620
+ "large_functions": [
621
+ {"file": f.get("full_path", ""), "name": f["name"], "lines": f["line_count"],
622
+ "kind": f["kind"]}
623
+ for f in large_funcs[:10]
624
+ ],
625
+ "high_fan_in": [
626
+ {"name": h["name"], "kind": h["kind"], "callers": h["caller_count"],
627
+ "file": h["file_node_id"].replace("file:", "")}
628
+ for h in high_fan_in[:10]
629
+ ],
630
+ "high_fan_out": [
631
+ {"file": f["source_id"].replace("file:", ""), "dependencies": f["dep_count"]}
632
+ for f in fan_out
633
+ ],
634
+ "threshold": threshold,
635
+ }
636
+ finally:
637
+ db.close()