cerebro-code-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cerebro/__init__.py +3 -0
- cerebro/callgraph.py +38 -0
- cerebro/cli.py +348 -0
- cerebro/config.py +136 -0
- cerebro/db.py +245 -0
- cerebro/docaudit.py +174 -0
- cerebro/embeddings.py +175 -0
- cerebro/gitsync.py +124 -0
- cerebro/graph.py +77 -0
- cerebro/indexer.py +854 -0
- cerebro/insights.py +217 -0
- cerebro/notes.py +70 -0
- cerebro/server.py +382 -0
- cerebro/summaries.py +66 -0
- cerebro/summarizer.py +109 -0
- cerebro/tsconfig.py +159 -0
- cerebro/views.py +52 -0
- cerebro/viz.py +374 -0
- cerebro_code_memory-0.1.0.dist-info/METADATA +160 -0
- cerebro_code_memory-0.1.0.dist-info/RECORD +23 -0
- cerebro_code_memory-0.1.0.dist-info/WHEEL +4 -0
- cerebro_code_memory-0.1.0.dist-info/entry_points.txt +11 -0
- cerebro_code_memory-0.1.0.dist-info/licenses/LICENSE +21 -0
cerebro/insights.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""Architecture insights derived from the dependency graph (no new analysis):
|
|
2
|
+
|
|
3
|
+
- impact(path): the transitive blast radius — everything that (directly or
|
|
4
|
+
indirectly) imports `path`, so you know what a change can break.
|
|
5
|
+
- cycles(): circular-import groups (strongly connected components > 1), a smell.
|
|
6
|
+
- orphans(): code files nobody imports — dead-code candidates (minus the obvious
|
|
7
|
+
framework entrypoints, which are loaded by convention, not by import).
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from collections import deque
|
|
13
|
+
|
|
14
|
+
import networkx as nx
|
|
15
|
+
|
|
16
|
+
from . import graph as graphmod
|
|
17
|
+
|
|
18
|
+
_ENTRY_SUFFIXES = (
|
|
19
|
+
".module.ts", ".controller.ts", ".config.ts", ".config.js", ".config.mjs",
|
|
20
|
+
".config.cjs", ".d.ts", ".stories.tsx", ".stories.ts",
|
|
21
|
+
)
|
|
22
|
+
_ENTRY_BASENAMES = {
|
|
23
|
+
"main.ts", "index.ts", "index.js", "index.tsx", "main.js",
|
|
24
|
+
# Next.js root conventions: invoked by the framework, never imported.
|
|
25
|
+
"middleware.ts", "middleware.js", "instrumentation.ts", "instrumentation.js",
|
|
26
|
+
# Dart/Flutter: lib/main.dart is the app entry (runApp), invoked by the
|
|
27
|
+
# runtime, never imported. Flavor variants (main_dev.dart, ...) match below.
|
|
28
|
+
"main.dart",
|
|
29
|
+
}
|
|
30
|
+
_ENTRY_SEGMENTS = (
|
|
31
|
+
"/pages/", "/app/", "/migrations/", "/seeders/", "/seeds/", "/scripts/",
|
|
32
|
+
"/test/", "/tests/", "/__tests__/", "/e2e/",
|
|
33
|
+
# Dart conventions: bin/ holds executables (each with a main()), and
|
|
34
|
+
# integration_test/ is run by the Flutter test harness, not imported.
|
|
35
|
+
"/bin/", "/integration_test/",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def looks_entrypoint(path: str) -> bool:
|
|
40
|
+
base = path.rsplit("/", 1)[-1]
|
|
41
|
+
if base in _ENTRY_BASENAMES or path.endswith(_ENTRY_SUFFIXES):
|
|
42
|
+
return True
|
|
43
|
+
# Dart flavor entrypoints (main_dev.dart, main_production.dart, ...).
|
|
44
|
+
if base.startswith("main_") and base.endswith(".dart"):
|
|
45
|
+
return True
|
|
46
|
+
# JS uses foo.test.ts / foo.spec.ts; Dart uses foo_test.dart.
|
|
47
|
+
if ".spec." in base or ".test." in base or ".e2e." in base or base.endswith("_test.dart"):
|
|
48
|
+
return True
|
|
49
|
+
return any(seg in ("/" + path) for seg in _ENTRY_SEGMENTS)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def impact(conn, path: str, limit: int = 300) -> dict | None:
|
|
53
|
+
g = graphmod.build_graph(conn)
|
|
54
|
+
if path not in g:
|
|
55
|
+
return None
|
|
56
|
+
rev = g.reverse(copy=False) # edge v->u means "u imports v"
|
|
57
|
+
dist = {}
|
|
58
|
+
dq = deque([path])
|
|
59
|
+
seen = {path}
|
|
60
|
+
while dq:
|
|
61
|
+
n = dq.popleft()
|
|
62
|
+
for importer in rev.successors(n):
|
|
63
|
+
if importer not in seen:
|
|
64
|
+
seen.add(importer)
|
|
65
|
+
dist[importer] = dist.get(n, 0) + 1
|
|
66
|
+
dq.append(importer)
|
|
67
|
+
by_dist: dict[int, int] = {}
|
|
68
|
+
for d in dist.values():
|
|
69
|
+
by_dist[d] = by_dist.get(d, 0) + 1
|
|
70
|
+
items = sorted(dist.items(), key=lambda kv: (kv[1], kv[0]))
|
|
71
|
+
direct = sorted(m for m, d in dist.items() if d == 1)
|
|
72
|
+
return {
|
|
73
|
+
"path": path,
|
|
74
|
+
"total": len(dist),
|
|
75
|
+
"direct": direct,
|
|
76
|
+
"by_distance": dict(sorted(by_dist.items())),
|
|
77
|
+
"all": [p for p, _ in items][:limit],
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def is_barrel(path: str) -> bool:
|
|
82
|
+
base = path.rsplit("/", 1)[-1]
|
|
83
|
+
return base.startswith("index.") and base.rsplit(".", 1)[-1] in {
|
|
84
|
+
"ts", "tsx", "js", "jsx", "mjs", "cjs"
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def cycles(conn, ignore_barrels: bool = True, max_report: int = 50) -> dict:
|
|
89
|
+
"""Circular-import groups. Barrel files (index.*) re-export everything and
|
|
90
|
+
create huge artificial cycles, so by default they're removed first — what's
|
|
91
|
+
left are genuine module cycles, reported tightest (shortest) first.
|
|
92
|
+
|
|
93
|
+
Type-only edges (TS `import type`) are excluded: they're erased at compile
|
|
94
|
+
time, so a cycle made purely of them never exists at runtime."""
|
|
95
|
+
g = graphmod.build_graph(conn, exclude_kinds=("type",))
|
|
96
|
+
barrels = 0
|
|
97
|
+
if ignore_barrels:
|
|
98
|
+
drop = [n for n in g.nodes if is_barrel(n)]
|
|
99
|
+
barrels = len(drop)
|
|
100
|
+
g = g.copy()
|
|
101
|
+
g.remove_nodes_from(drop)
|
|
102
|
+
out = []
|
|
103
|
+
for comp in nx.strongly_connected_components(g):
|
|
104
|
+
if len(comp) < 2:
|
|
105
|
+
continue
|
|
106
|
+
sub = g.subgraph(comp)
|
|
107
|
+
try:
|
|
108
|
+
cyc = nx.find_cycle(sub)
|
|
109
|
+
chain = [u for u, _ in cyc] + [cyc[0][0]]
|
|
110
|
+
except Exception:
|
|
111
|
+
chain = sorted(comp)
|
|
112
|
+
out.append(
|
|
113
|
+
{"size": len(comp), "length": len(chain) - 1, "cycle": chain, "members": sorted(comp)}
|
|
114
|
+
)
|
|
115
|
+
out.sort(key=lambda c: (c["length"], c["size"])) # tightest first
|
|
116
|
+
return {"cycles": out[:max_report], "barrels_ignored": barrels, "total": len(out)}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def cycle_members(conn) -> set[str]:
|
|
120
|
+
"""Set of files participating in any (barrel-free) cycle — for graph overlays."""
|
|
121
|
+
members: set[str] = set()
|
|
122
|
+
for c in cycles(conn)["cycles"]:
|
|
123
|
+
members.update(c["members"])
|
|
124
|
+
return members
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def script_entrypoints(conn) -> set[str]:
|
|
128
|
+
"""Files invoked by package.json scripts, recorded at index time. Loaded by
|
|
129
|
+
tooling rather than imported, so they're entrypoints, not dead code."""
|
|
130
|
+
row = conn.execute("SELECT value FROM meta WHERE key='script_entrypoints'").fetchone()
|
|
131
|
+
if not row or not row["value"]:
|
|
132
|
+
return set()
|
|
133
|
+
try:
|
|
134
|
+
return set(json.loads(row["value"]))
|
|
135
|
+
except (ValueError, TypeError):
|
|
136
|
+
return set()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# Methods invoked by a framework via interface/lifecycle contract, never by name
|
|
140
|
+
# in code — so "referenced nowhere" doesn't mean dead. Skipped in dead_symbols.
|
|
141
|
+
_FRAMEWORK_METHODS = {
|
|
142
|
+
# NestJS
|
|
143
|
+
"use", "canActivate", "intercept", "transform", "catch",
|
|
144
|
+
"onModuleInit", "onModuleDestroy", "onApplicationBootstrap",
|
|
145
|
+
"onApplicationShutdown", "beforeApplicationShutdown",
|
|
146
|
+
# Angular
|
|
147
|
+
"ngOnInit", "ngOnDestroy", "ngOnChanges", "ngAfterViewInit",
|
|
148
|
+
"ngAfterViewChecked", "ngDoCheck", "ngAfterContentInit",
|
|
149
|
+
# React class lifecycle
|
|
150
|
+
"render", "componentDidMount", "componentWillUnmount", "componentDidUpdate",
|
|
151
|
+
"shouldComponentUpdate", "getDerivedStateFromProps", "getSnapshotBeforeUpdate",
|
|
152
|
+
# Flutter widget/state lifecycle — overridden, invoked by the framework
|
|
153
|
+
"build", "createState", "initState", "dispose", "didChangeDependencies",
|
|
154
|
+
"didUpdateWidget", "deactivate", "reassemble", "createElement",
|
|
155
|
+
"didChangeAppLifecycleState",
|
|
156
|
+
# flutter_bloc / Cubit contract overrides; Equatable's props
|
|
157
|
+
"onChange", "onTransition", "onError", "close", "props",
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _project_of(path: str) -> str:
|
|
162
|
+
"""The independent repo a file belongs to — its top-level directory. Cerebro
|
|
163
|
+
indexes a workspace of separate projects (each its own git repo) under one
|
|
164
|
+
root, with zero import edges crossing between them."""
|
|
165
|
+
return path.split("/", 1)[0]
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def dead_symbols(conn, prefix: str | None = None, limit: int = 300) -> dict:
|
|
169
|
+
"""Top-level/exported symbols whose name is referenced nowhere *in their own
|
|
170
|
+
project* — unused-export candidates inside otherwise-live files (the dead code
|
|
171
|
+
orphans() can't see, since the file itself is imported).
|
|
172
|
+
|
|
173
|
+
Matching is per-project, not global: the workspace holds independent repos
|
|
174
|
+
(no cross-project import edges), so a same-named symbol in another project is
|
|
175
|
+
unrelated and must NOT count as a use — global pooling would hide that dead
|
|
176
|
+
code. Uses the `refs` table (name uses, minus definition sites). Heuristic,
|
|
177
|
+
erring toward silence: same-named symbols within one project still mask each
|
|
178
|
+
other, and dynamic access (obj['x'], string DI) can yield false positives — a
|
|
179
|
+
lead to confirm, not a delete list. Framework entrypoints are skipped."""
|
|
180
|
+
scripts = script_entrypoints(conn)
|
|
181
|
+
langs = {r["path"]: r["lang"] for r in conn.execute("SELECT path, lang FROM files")}
|
|
182
|
+
# name -> set of projects that reference it.
|
|
183
|
+
ref_projects: dict[str, set[str]] = {}
|
|
184
|
+
for r in conn.execute("SELECT name, path FROM refs"):
|
|
185
|
+
ref_projects.setdefault(r["name"], set()).add(_project_of(r["path"]))
|
|
186
|
+
out = []
|
|
187
|
+
for r in conn.execute(
|
|
188
|
+
"SELECT file_path AS path, kind, name, line FROM symbols ORDER BY file_path, line"
|
|
189
|
+
):
|
|
190
|
+
path = r["path"]
|
|
191
|
+
if langs.get(path) is None:
|
|
192
|
+
continue
|
|
193
|
+
if prefix and not path.startswith(prefix):
|
|
194
|
+
continue
|
|
195
|
+
if looks_entrypoint(path) or path in scripts:
|
|
196
|
+
continue
|
|
197
|
+
if r["kind"] == "method" and r["name"] in _FRAMEWORK_METHODS:
|
|
198
|
+
continue # invoked by the framework via contract, not by name
|
|
199
|
+
if _project_of(path) in ref_projects.get(r["name"], ()):
|
|
200
|
+
continue # referenced somewhere in its own project -> alive
|
|
201
|
+
out.append({"path": path, "name": r["name"], "kind": r["kind"], "line": r["line"]})
|
|
202
|
+
return {"dead": out[:limit], "total": len(out)}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def orphans(conn, prefix: str | None = None) -> dict:
|
|
206
|
+
g = graphmod.build_graph(conn)
|
|
207
|
+
langs = {r["path"]: r["lang"] for r in conn.execute("SELECT path, lang FROM files")}
|
|
208
|
+
scripts = script_entrypoints(conn)
|
|
209
|
+
dead, entry = [], []
|
|
210
|
+
for n in g.nodes:
|
|
211
|
+
if langs.get(n) is None:
|
|
212
|
+
continue
|
|
213
|
+
if prefix and not n.startswith(prefix):
|
|
214
|
+
continue
|
|
215
|
+
if g.in_degree(n) == 0:
|
|
216
|
+
(entry if (looks_entrypoint(n) or n in scripts) else dead).append(n)
|
|
217
|
+
return {"dead": sorted(dead), "entrypoints": sorted(entry)}
|
cerebro/notes.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Decision log (plan layer 4): cross-session memory of the *why*.
|
|
2
|
+
|
|
3
|
+
The structural index recovers WHAT exists for free (tree-sitter). What a new chat
|
|
4
|
+
can never recover by reading code is the WHY: decisions, domain rules, and gotchas
|
|
5
|
+
("QR_MANUAL = merchant confirms payment by hand", "Seller was refactored to
|
|
6
|
+
Organization"). Sessions persist these with cerebro_note and retrieve them with
|
|
7
|
+
cerebro_recall. Stored in the `notes` table; indexed in `fts` under kind='note'.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import sqlite3
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def now_iso() -> str:
|
|
16
|
+
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def add(conn, topic: str | None, content: str) -> int:
|
|
20
|
+
cur = conn.execute(
|
|
21
|
+
"INSERT INTO notes(topic, content, created_at) VALUES(?,?,?)",
|
|
22
|
+
(topic or None, content, now_iso()),
|
|
23
|
+
)
|
|
24
|
+
nid = cur.lastrowid
|
|
25
|
+
conn.execute(
|
|
26
|
+
"INSERT INTO fts(path, kind, text) VALUES(?, 'note', ?)",
|
|
27
|
+
(f"note:{nid}", f"{topic or ''}\n{content}"),
|
|
28
|
+
)
|
|
29
|
+
conn.commit()
|
|
30
|
+
return nid
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def list_recent(conn, limit: int = 10):
|
|
34
|
+
return conn.execute(
|
|
35
|
+
"SELECT * FROM notes ORDER BY id DESC LIMIT ?", (limit,)
|
|
36
|
+
).fetchall()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def recall(conn, query: str = "", limit: int = 10):
|
|
40
|
+
"""Return notes matching `query` (by topic/content meaning), or the most recent
|
|
41
|
+
notes when `query` is empty."""
|
|
42
|
+
if not query.strip():
|
|
43
|
+
return list_recent(conn, limit)
|
|
44
|
+
|
|
45
|
+
ids: list[int] = []
|
|
46
|
+
try:
|
|
47
|
+
rows = conn.execute(
|
|
48
|
+
"SELECT path FROM fts WHERE fts MATCH ? AND kind='note' "
|
|
49
|
+
"ORDER BY rank LIMIT ?",
|
|
50
|
+
(query, limit),
|
|
51
|
+
).fetchall()
|
|
52
|
+
ids = [int(r["path"].split(":", 1)[1]) for r in rows]
|
|
53
|
+
except sqlite3.OperationalError:
|
|
54
|
+
pass
|
|
55
|
+
if not ids: # FTS syntax rejected the query, or no hits — fall back to LIKE
|
|
56
|
+
like = f"%{query}%"
|
|
57
|
+
rows = conn.execute(
|
|
58
|
+
"SELECT id FROM notes WHERE topic LIKE ? OR content LIKE ? "
|
|
59
|
+
"ORDER BY id DESC LIMIT ?",
|
|
60
|
+
(like, like, limit),
|
|
61
|
+
).fetchall()
|
|
62
|
+
ids = [r["id"] for r in rows]
|
|
63
|
+
if not ids:
|
|
64
|
+
return []
|
|
65
|
+
placeholders = ",".join("?" * len(ids))
|
|
66
|
+
notes = conn.execute(
|
|
67
|
+
f"SELECT * FROM notes WHERE id IN ({placeholders})", ids
|
|
68
|
+
).fetchall()
|
|
69
|
+
order = {nid: i for i, nid in enumerate(ids)} # preserve match ranking
|
|
70
|
+
return sorted(notes, key=lambda r: order.get(r["id"], 1 << 30))
|
cerebro/server.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
"""Cerebro MCP server.
|
|
2
|
+
|
|
3
|
+
Exposes the persistent code-knowledge brain as MCP tools so any chat session can
|
|
4
|
+
*query* what was already understood instead of re-reading folders. Output is kept
|
|
5
|
+
deliberately compact (token-cheap) — that is the whole point of the project.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import posixpath
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from mcp.server.fastmcp import FastMCP
|
|
14
|
+
|
|
15
|
+
from . import config as cfg
|
|
16
|
+
from . import callgraph, db, embeddings, gitsync, graph, indexer, insights, notes, summaries, views
|
|
17
|
+
|
|
18
|
+
mcp = FastMCP("cerebro")
|
|
19
|
+
|
|
20
|
+
_CONFIG: cfg.Config | None = None
|
|
21
|
+
_CONN = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _ctx():
|
|
25
|
+
global _CONFIG, _CONN
|
|
26
|
+
if _CONFIG is None:
|
|
27
|
+
_CONFIG = cfg.Config.load()
|
|
28
|
+
if _CONN is None:
|
|
29
|
+
_CONN = db.connect(_CONFIG.db_path)
|
|
30
|
+
return _CONFIG, _CONN
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
_DEP_CAP = 15 # max dependency paths listed before collapsing to "(+N more)"
|
|
34
|
+
_SYM_CAP = 40 # max symbols listed for one file
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _join_capped(items, n: int = _DEP_CAP) -> str:
|
|
38
|
+
items = list(items)
|
|
39
|
+
if len(items) <= n:
|
|
40
|
+
return ", ".join(items)
|
|
41
|
+
return ", ".join(items[:n]) + f", … (+{len(items) - n} more)"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _empty_index_hint(conn) -> str | None:
|
|
45
|
+
n = conn.execute("SELECT COUNT(*) AS n FROM files").fetchone()["n"]
|
|
46
|
+
if n == 0:
|
|
47
|
+
return "Index is empty. Run cerebro_reindex() first to build the map."
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _resolve_path(config, conn, path: str) -> str:
|
|
52
|
+
"""Normalize whatever path form the model passes (absolute, ./relative, or
|
|
53
|
+
already repo-relative) to the canonical repo-relative key the brain is indexed
|
|
54
|
+
by. Without this, a summary recorded under an absolute path is invisible to a
|
|
55
|
+
later cerebro_get using a relative one — which silently breaks cross-session
|
|
56
|
+
persistence."""
|
|
57
|
+
p = Path(path)
|
|
58
|
+
if p.is_absolute():
|
|
59
|
+
try:
|
|
60
|
+
norm = p.resolve().relative_to(config.root.resolve()).as_posix()
|
|
61
|
+
except ValueError:
|
|
62
|
+
norm = path # outside the indexed root; leave as given
|
|
63
|
+
else:
|
|
64
|
+
norm = posixpath.normpath(path.lstrip("/"))
|
|
65
|
+
if conn.execute("SELECT 1 FROM files WHERE path=?", (norm,)).fetchone():
|
|
66
|
+
return norm
|
|
67
|
+
# Fallback: a unique indexed file whose path ends with what was given.
|
|
68
|
+
rows = conn.execute(
|
|
69
|
+
"SELECT path FROM files WHERE path LIKE ? ESCAPE '\\'",
|
|
70
|
+
("%/" + norm.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_"),),
|
|
71
|
+
).fetchall()
|
|
72
|
+
if len(rows) == 1:
|
|
73
|
+
return rows[0]["path"]
|
|
74
|
+
return norm
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@mcp.tool()
|
|
78
|
+
def cerebro_map(top: int = 30) -> str:
|
|
79
|
+
"""Cheap whole-project overview: file/language counts and the most important
|
|
80
|
+
modules ranked by dependency centrality (PageRank). Call this FIRST in a new
|
|
81
|
+
session instead of exploring folders."""
|
|
82
|
+
config, conn = _ctx()
|
|
83
|
+
return views.map_text(conn, config.root, top)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@mcp.tool()
|
|
87
|
+
def cerebro_get(path: str) -> str:
|
|
88
|
+
"""Everything Cerebro knows about a file WITHOUT reading it: cached summary
|
|
89
|
+
(with staleness flag), defined symbols, and dependency edges."""
|
|
90
|
+
config, conn = _ctx()
|
|
91
|
+
path = _resolve_path(config, conn, path)
|
|
92
|
+
file_row = conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone()
|
|
93
|
+
if file_row is None:
|
|
94
|
+
return f"'{path}' is not in the index. It may be new — run cerebro_reindex()."
|
|
95
|
+
|
|
96
|
+
out = [f"# {path} ({file_row['lang'] or 'other'})"]
|
|
97
|
+
abs_path = config.root / path
|
|
98
|
+
live_hash = indexer.file_hash(abs_path) if abs_path.exists() else None
|
|
99
|
+
s = summaries.get(conn, path, current_hash=live_hash)
|
|
100
|
+
if s:
|
|
101
|
+
flag = " ⚠ STALE (file changed since summary)" if s["stale"] else ""
|
|
102
|
+
out.append(f"\nSummary:{flag}\n{s['summary_en']}")
|
|
103
|
+
else:
|
|
104
|
+
out.append("\nSummary: (none yet — record one with cerebro_record)")
|
|
105
|
+
|
|
106
|
+
syms = db.symbols_for(conn, path)
|
|
107
|
+
if syms:
|
|
108
|
+
out.append("\nSymbols:")
|
|
109
|
+
out += [
|
|
110
|
+
f" L{r['line']:<5} {r['kind']:<8} {r['signature'] or r['name']}"
|
|
111
|
+
for r in syms[:_SYM_CAP]
|
|
112
|
+
]
|
|
113
|
+
if len(syms) > _SYM_CAP:
|
|
114
|
+
out.append(f" … (+{len(syms) - _SYM_CAP} more symbols)")
|
|
115
|
+
|
|
116
|
+
deps = graph.dependencies(conn, path)
|
|
117
|
+
dependents = graph.dependents(conn, path)
|
|
118
|
+
if deps:
|
|
119
|
+
out.append("\nImports (depends on): " + _join_capped(deps))
|
|
120
|
+
if dependents:
|
|
121
|
+
out.append("Imported by (impact if changed): " + _join_capped(dependents))
|
|
122
|
+
return "\n".join(out)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@mcp.tool()
|
|
126
|
+
def cerebro_search(query: str, limit: int = 15) -> str:
|
|
127
|
+
"""Find relevant files. When the semantic index is built it ranks by meaning
|
|
128
|
+
(intent), so phrase queries naturally ("where do we validate stock at
|
|
129
|
+
checkout?"); it also includes keyword/symbol matches. Returns paths to
|
|
130
|
+
cerebro_get()."""
|
|
131
|
+
config, conn = _ctx()
|
|
132
|
+
hint = _empty_index_hint(conn)
|
|
133
|
+
if hint:
|
|
134
|
+
return hint
|
|
135
|
+
|
|
136
|
+
sem = embeddings.search(config, conn, query, limit=limit)
|
|
137
|
+
lines, seen = [], set()
|
|
138
|
+
for path, name, line, score in sem:
|
|
139
|
+
seen.add(path)
|
|
140
|
+
if name:
|
|
141
|
+
loc = f"{path}:{line}" if line else path
|
|
142
|
+
lines.append(f"~{score:.2f} {loc} — {name}")
|
|
143
|
+
else:
|
|
144
|
+
s = summaries.get(conn, path)
|
|
145
|
+
snip = f" — {s['summary_en'][:90]}" if s else ""
|
|
146
|
+
lines.append(f"~{score:.2f} {path}{snip}")
|
|
147
|
+
for r in db.search(conn, query, limit=limit):
|
|
148
|
+
if r["path"] in seen:
|
|
149
|
+
continue
|
|
150
|
+
lines.append(f"[{r['kind']}] {r['path']} — {r['snip']}")
|
|
151
|
+
|
|
152
|
+
if not lines:
|
|
153
|
+
return f"No matches for '{query}'."
|
|
154
|
+
header = "(semantic + keyword)\n" if sem else ""
|
|
155
|
+
return header + "\n".join(lines[:limit])
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@mcp.tool()
|
|
159
|
+
def cerebro_record(path: str, summary: str, model: str = "") -> str:
|
|
160
|
+
"""Leave a trace: store your English understanding of a file so future
|
|
161
|
+
sessions reuse it instead of re-analyzing. Write 1-3 dense sentences in
|
|
162
|
+
English describing what the file does and its role."""
|
|
163
|
+
config, conn = _ctx()
|
|
164
|
+
path = _resolve_path(config, conn, path)
|
|
165
|
+
res = summaries.record(conn, path, summary, model or None)
|
|
166
|
+
if not res["indexed"]:
|
|
167
|
+
return (
|
|
168
|
+
f"Recorded summary for '{path}', but it is not in the index yet "
|
|
169
|
+
f"(staleness tracking disabled until cerebro_reindex())."
|
|
170
|
+
)
|
|
171
|
+
return f"Recorded summary for '{path}'."
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@mcp.tool()
|
|
175
|
+
def cerebro_stale() -> str:
|
|
176
|
+
"""What the index no longer trusts: files changed/added/deleted on disk since
|
|
177
|
+
the last reindex, plus summaries whose source file has changed."""
|
|
178
|
+
config, conn = _ctx()
|
|
179
|
+
disk = indexer.disk_state(config)
|
|
180
|
+
d = indexer.diff(conn, disk)
|
|
181
|
+
stale_sum = summaries.stale_summaries(conn)
|
|
182
|
+
if not any([d["new"], d["changed"], d["deleted"], stale_sum]):
|
|
183
|
+
return "Everything is fresh. No reindex needed."
|
|
184
|
+
parts = []
|
|
185
|
+
if d["changed"]:
|
|
186
|
+
parts.append("Changed on disk:\n " + "\n ".join(d["changed"]))
|
|
187
|
+
if d["new"]:
|
|
188
|
+
parts.append("New (not indexed):\n " + "\n ".join(d["new"]))
|
|
189
|
+
if d["deleted"]:
|
|
190
|
+
parts.append("Deleted (still in index):\n " + "\n ".join(d["deleted"]))
|
|
191
|
+
if stale_sum:
|
|
192
|
+
parts.append("Summaries now stale:\n " + "\n ".join(stale_sum))
|
|
193
|
+
parts.append("\nRun cerebro_reindex() to refresh the structural index.")
|
|
194
|
+
return "\n\n".join(parts)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@mcp.tool()
|
|
198
|
+
def cerebro_sync() -> str:
|
|
199
|
+
"""Catch changes made outside Claude Code (branch switch, git pull, edits in the
|
|
200
|
+
raw editor) and reindex only the affected files. Works across nested repos."""
|
|
201
|
+
config, conn = _ctx()
|
|
202
|
+
r = gitsync.sync(config, conn)
|
|
203
|
+
if not r.get("git"):
|
|
204
|
+
return "No git repo found under the root — nothing to sync."
|
|
205
|
+
return f"Git sync: {r['changed']} changed files reindexed across {r['repos']} repo(s)."
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@mcp.tool()
|
|
209
|
+
def cerebro_reindex(paths: list[str] | None = None) -> str:
|
|
210
|
+
"""Refresh the static index (symbols, dependency edges, hashes). Only
|
|
211
|
+
changed/new/deleted files are reprocessed. Pass `paths` to limit scope."""
|
|
212
|
+
config, conn = _ctx()
|
|
213
|
+
result = indexer.reindex(config, conn, paths=paths)
|
|
214
|
+
return (
|
|
215
|
+
f"Reindexed. {result['indexed']} files processed "
|
|
216
|
+
f"(new={result['new']}, changed={result['changed']}, "
|
|
217
|
+
f"deleted={result['deleted']}), {result['total_files']} total."
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
@mcp.tool()
|
|
222
|
+
def cerebro_note(content: str, topic: str = "") -> str:
|
|
223
|
+
"""Record a decision, domain rule, or gotcha — the *why* that reading code can
|
|
224
|
+
never recover (e.g. 'QR_MANUAL = merchant confirms payment by hand', 'Seller was
|
|
225
|
+
refactored to Organization'). Future sessions retrieve it with cerebro_recall.
|
|
226
|
+
Keep `content` to 1-3 sentences; `topic` is an optional short tag."""
|
|
227
|
+
_, conn = _ctx()
|
|
228
|
+
nid = notes.add(conn, topic or None, content)
|
|
229
|
+
return f"Recorded note #{nid}" + (f" on '{topic}'." if topic else ".")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@mcp.tool()
|
|
233
|
+
def cerebro_recall(query: str = "", limit: int = 10) -> str:
|
|
234
|
+
"""Recall decisions/rules/gotchas recorded by past sessions BEFORE re-deriving
|
|
235
|
+
them. Pass a query to search by meaning of topic/content, or leave empty for the
|
|
236
|
+
most recent notes."""
|
|
237
|
+
_, conn = _ctx()
|
|
238
|
+
return views.recall_text(conn, query, limit)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
@mcp.tool()
|
|
242
|
+
def cerebro_impact(path: str) -> str:
|
|
243
|
+
"""Transitive blast radius: every file that directly OR indirectly imports
|
|
244
|
+
`path`. Use before changing a widely-used file to see what could break."""
|
|
245
|
+
config, conn = _ctx()
|
|
246
|
+
path = _resolve_path(config, conn, path)
|
|
247
|
+
r = insights.impact(conn, path)
|
|
248
|
+
if r is None:
|
|
249
|
+
return f"'{path}' is not in the index."
|
|
250
|
+
if r["total"] == 0:
|
|
251
|
+
return f"Nothing imports '{path}' — changing it has no in-repo dependents."
|
|
252
|
+
spread = ", ".join(f"{n} at depth {d}" for d, n in r["by_distance"].items())
|
|
253
|
+
out = [
|
|
254
|
+
f"Changing '{path}' transitively affects {r['total']} files ({spread}).",
|
|
255
|
+
"\nDirect importers (" + str(len(r["direct"])) + "): " + _join_capped(r["direct"], 20),
|
|
256
|
+
]
|
|
257
|
+
deeper = [p for p in r["all"] if p not in set(r["direct"])]
|
|
258
|
+
if deeper:
|
|
259
|
+
out.append("Further downstream: " + _join_capped(deeper, 20))
|
|
260
|
+
return "\n".join(out)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@mcp.tool()
|
|
264
|
+
def cerebro_cycles() -> str:
|
|
265
|
+
"""Find circular import groups (files that mutually depend on each other) — an
|
|
266
|
+
architecture smell worth breaking. Returns each cycle's members."""
|
|
267
|
+
_, conn = _ctx()
|
|
268
|
+
r = insights.cycles(conn)
|
|
269
|
+
cs = r["cycles"]
|
|
270
|
+
note = f" (barrel/index files ignored: {r['barrels_ignored']})" if r["barrels_ignored"] else ""
|
|
271
|
+
if not cs:
|
|
272
|
+
return f"No genuine circular import cycles found. 🎉{note}"
|
|
273
|
+
out = [f"{r['total']} circular dependency group(s){note}, tightest first:"]
|
|
274
|
+
for i, c in enumerate(cs, 1):
|
|
275
|
+
tag = f"{c['length']}-file cycle" if c["size"] == c["length"] else f"{c['size']}-file tangle"
|
|
276
|
+
out.append(f"\n{i}. {tag}:")
|
|
277
|
+
out.append(" " + " → ".join(c["cycle"]))
|
|
278
|
+
return "\n".join(out)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
@mcp.tool()
|
|
282
|
+
def cerebro_callers(name: str) -> str:
|
|
283
|
+
"""Find every call site of a function / method / class by NAME across the repo
|
|
284
|
+
(symbol-level call graph; name-resolved, so it may include same-named symbols).
|
|
285
|
+
Use to see who actually uses a symbol before you change it."""
|
|
286
|
+
_, conn = _ctx()
|
|
287
|
+
r = callgraph.callers(conn, name)
|
|
288
|
+
if not r["defined_in"] and r["count"] == 0:
|
|
289
|
+
return f"No symbol named '{name}' found in the index."
|
|
290
|
+
out = []
|
|
291
|
+
if r["defined_in"]:
|
|
292
|
+
out.append(f"'{name}' defined in: " + _join_capped(r["defined_in"], 10))
|
|
293
|
+
if r["count"] == 0:
|
|
294
|
+
out.append("No call sites found in the repo.")
|
|
295
|
+
return "\n".join(out)
|
|
296
|
+
out.append(f"\n{r['count']} call site(s):")
|
|
297
|
+
out += [f" {sym or '(module scope)'} @ {path}:{line}" for path, sym, line in r["sites"][:60]]
|
|
298
|
+
if r["count"] > 60:
|
|
299
|
+
out.append(f" … (+{r['count'] - 60} more)")
|
|
300
|
+
return "\n".join(out)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
@mcp.tool()
|
|
304
|
+
def cerebro_calls(path: str) -> str:
|
|
305
|
+
"""List the internal functions/methods a file calls — its outgoing call edges
|
|
306
|
+
(name-resolved). External library calls are omitted."""
|
|
307
|
+
config, conn = _ctx()
|
|
308
|
+
path = _resolve_path(config, conn, path)
|
|
309
|
+
r = callgraph.calls_from(conn, path)
|
|
310
|
+
if r["count"] == 0:
|
|
311
|
+
return f"No internal calls found from '{path}'."
|
|
312
|
+
out = [f"'{path}' makes {r['count']} internal call(s):"]
|
|
313
|
+
out += [f" {sym or '(module scope)'} → {dst}() L{line}" for sym, dst, line in r["calls"][:80]]
|
|
314
|
+
if r["count"] > 80:
|
|
315
|
+
out.append(f" … (+{r['count'] - 80} more)")
|
|
316
|
+
return "\n".join(out)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
@mcp.tool()
|
|
320
|
+
def cerebro_orphans(prefix: str = "") -> str:
|
|
321
|
+
"""List code files that nothing imports — dead-code candidates. Framework
|
|
322
|
+
entrypoints (modules, controllers, pages, configs, tests) are listed
|
|
323
|
+
separately since they're loaded by convention, not by import."""
|
|
324
|
+
_, conn = _ctx()
|
|
325
|
+
r = insights.orphans(conn, prefix or None)
|
|
326
|
+
if not r["dead"] and not r["entrypoints"]:
|
|
327
|
+
return "No orphan files found in scope."
|
|
328
|
+
out = []
|
|
329
|
+
if r["dead"]:
|
|
330
|
+
out.append(f"Dead-code candidates ({len(r['dead'])}) — imported by nothing, not entrypoints:")
|
|
331
|
+
out += [f" {p}" for p in r["dead"][:60]]
|
|
332
|
+
if len(r["dead"]) > 60:
|
|
333
|
+
out.append(f" … (+{len(r['dead']) - 60} more)")
|
|
334
|
+
else:
|
|
335
|
+
out.append("No dead-code candidates — every non-entrypoint file is imported.")
|
|
336
|
+
out.append(f"\n(Plus {len(r['entrypoints'])} unimported framework entrypoints, excluded.)")
|
|
337
|
+
return "\n".join(out)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
@mcp.tool()
|
|
341
|
+
def cerebro_dead_symbols(prefix: str = "") -> str:
|
|
342
|
+
"""List unused-export candidates: functions/classes/methods whose name is
|
|
343
|
+
referenced nowhere in the indexed code, inside files that ARE imported (the
|
|
344
|
+
symbol-level dead code that cerebro_orphans, which works per-file, can't see).
|
|
345
|
+
Heuristic — confirm before deleting: dynamic access (obj['x'], string-based
|
|
346
|
+
DI) and reflection can make a used symbol look dead."""
|
|
347
|
+
_, conn = _ctx()
|
|
348
|
+
r = insights.dead_symbols(conn, prefix or None)
|
|
349
|
+
if not r["dead"]:
|
|
350
|
+
return "No unused-export candidates found in scope."
|
|
351
|
+
out = [f"Unused-export candidates ({r['total']}) — defined but referenced nowhere:"]
|
|
352
|
+
for d in r["dead"][:60]:
|
|
353
|
+
out.append(f" {d['path']}:{d['line']} {d['kind']} {d['name']}")
|
|
354
|
+
if r["total"] > 60:
|
|
355
|
+
out.append(f" … (+{r['total'] - 60} more)")
|
|
356
|
+
out.append("\n(Heuristic: verify — dynamic/reflective access can hide a real use.)")
|
|
357
|
+
return "\n".join(out)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def map_main():
|
|
361
|
+
"""`cerebro-map` entry point: print the project map (read-only). Used by the
|
|
362
|
+
Claude Code session-start hook to inject the overview into a new session."""
|
|
363
|
+
print(cerebro_map())
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def recall_main():
|
|
367
|
+
"""`cerebro-recall` entry point: print recalled notes (recent if no query).
|
|
368
|
+
Used by the session-start hook to surface decisions in a new session."""
|
|
369
|
+
import sys
|
|
370
|
+
|
|
371
|
+
print(cerebro_recall(" ".join(sys.argv[1:])))
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def main():
|
|
375
|
+
# Allow pointing the server at a specific repo via env without changing cwd.
|
|
376
|
+
if os.environ.get("CEREBRO_ROOT"):
|
|
377
|
+
_ctx()
|
|
378
|
+
mcp.run()
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
if __name__ == "__main__":
|
|
382
|
+
main()
|