cerebro-code-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cerebro/summaries.py ADDED
@@ -0,0 +1,66 @@
1
+ """Cached English summaries (plan layer 2) and summary-staleness.
2
+
3
+ A summary is tied to the file version it described via `source_hash`. When the
4
+ file's current hash differs, the summary is flagged stale so a session knows to
5
+ re-read just that file instead of trusting an outdated trace.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime, timezone
10
+
11
+
12
+ def now_iso() -> str:
13
+ return datetime.now(timezone.utc).isoformat(timespec="seconds")
14
+
15
+
16
+ def record(conn, path: str, summary: str, model: str | None = None) -> dict:
17
+ row = conn.execute("SELECT hash FROM files WHERE path=?", (path,)).fetchone()
18
+ source_hash = row["hash"] if row else None
19
+ conn.execute(
20
+ """INSERT INTO summaries(path, summary_en, model, source_hash, updated_at)
21
+ VALUES(?,?,?,?,?)
22
+ ON CONFLICT(path) DO UPDATE SET
23
+ summary_en=excluded.summary_en, model=excluded.model,
24
+ source_hash=excluded.source_hash, updated_at=excluded.updated_at""",
25
+ (path, summary, model, source_hash, now_iso()),
26
+ )
27
+ conn.execute("DELETE FROM fts WHERE path=? AND kind='summary'", (path,))
28
+ conn.execute(
29
+ "INSERT INTO fts(path, kind, text) VALUES(?, 'summary', ?)", (path, summary)
30
+ )
31
+ conn.commit()
32
+ return {"path": path, "indexed": source_hash is not None}
33
+
34
+
35
+ def get(conn, path: str, current_hash: str | None = None) -> dict | None:
36
+ """Look up a cached summary. Pass `current_hash` (the live on-disk hash) to
37
+ detect staleness against disk directly; otherwise it is compared against the
38
+ last-indexed hash, which only reflects changes after a reindex."""
39
+ row = conn.execute("SELECT * FROM summaries WHERE path=?", (path,)).fetchone()
40
+ if not row:
41
+ return None
42
+ if current_hash is None:
43
+ file_row = conn.execute(
44
+ "SELECT hash FROM files WHERE path=?", (path,)
45
+ ).fetchone()
46
+ current_hash = file_row["hash"] if file_row else None
47
+ stale = bool(
48
+ row["source_hash"] and current_hash and current_hash != row["source_hash"]
49
+ )
50
+ return {
51
+ "path": path,
52
+ "summary_en": row["summary_en"],
53
+ "model": row["model"],
54
+ "updated_at": row["updated_at"],
55
+ "stale": stale,
56
+ }
57
+
58
+
59
+ def stale_summaries(conn) -> list[str]:
60
+ """Summaries whose source file changed since the summary was written."""
61
+ rows = conn.execute(
62
+ """SELECT s.path FROM summaries s JOIN files f ON f.path = s.path
63
+ WHERE s.source_hash IS NOT NULL AND s.source_hash != f.hash
64
+ ORDER BY s.path"""
65
+ ).fetchall()
66
+ return [r["path"] for r in rows]
cerebro/summarizer.py ADDED
@@ -0,0 +1,109 @@
1
+ """Batch summary generation (plan layer 2, warmed proactively).
2
+
3
+ Generates English summaries for the most central files so even a first-time query
4
+ is cheap, instead of waiting for sessions to fill them in lazily. Uses headless
5
+ `claude -p` so it needs no API key — it rides the user's existing Claude Code auth.
6
+ A cheap model (Haiku by default) keeps the one-time cost low.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import concurrent.futures as cf
11
+ import os
12
+ import shutil
13
+ import subprocess
14
+
15
+ from . import config as cfg
16
+ from . import db, graph, summaries
17
+
18
+ INSTRUCTION = (
19
+ "You are summarizing a source file for a code-navigation index. In 1-3 dense "
20
+ "sentences, in English, describe what this file does and its role in the system "
21
+ "(key responsibilities, important types/functions, how it fits in). Output ONLY "
22
+ "the summary text — no preamble, no markdown, no bullet points."
23
+ )
24
+ MAX_CHARS = 16000
25
+ DEFAULT_MODEL = "claude-haiku-4-5"
26
+
27
+
28
+ def _claude_bin() -> str:
29
+ return os.environ.get("CEREBRO_CLAUDE") or shutil.which("claude") or "claude"
30
+
31
+
32
+ def summarize_one(config, rel: str, model: str) -> str | None:
33
+ """Generate a summary for one file via `claude -p`. Returns None on failure."""
34
+ abs_path = config.root / rel
35
+ try:
36
+ content = abs_path.read_text(encoding="utf-8", errors="ignore")[:MAX_CHARS]
37
+ except OSError:
38
+ return None
39
+ prompt = f"{INSTRUCTION}\n\nFile path: {rel}\n\n```\n{content}\n```\n"
40
+ try:
41
+ out = subprocess.run(
42
+ [_claude_bin(), "-p", "--model", model],
43
+ input=prompt,
44
+ capture_output=True,
45
+ text=True,
46
+ timeout=180,
47
+ )
48
+ except Exception:
49
+ return None
50
+ if out.returncode != 0:
51
+ return None
52
+ return out.stdout.strip() or None
53
+
54
+
55
+ def select_central_missing(conn, limit: int, prefix: str | None = None) -> list[str]:
56
+ """Top files by dependency centrality that have no summary yet."""
57
+ have = {r["path"] for r in conn.execute("SELECT path FROM summaries")}
58
+ out = []
59
+ for path, _score in graph.rank(conn):
60
+ if path in have or cfg.Config.lang_for(path) is None:
61
+ continue
62
+ if prefix and not path.startswith(prefix):
63
+ continue
64
+ out.append(path)
65
+ if len(out) >= limit:
66
+ break
67
+ return out
68
+
69
+
70
+ def run(config, conn, rels: list[str], model: str = DEFAULT_MODEL, workers: int = 4) -> dict:
71
+ """Summarize files in parallel (claude -p subprocesses), then record serially
72
+ (one sqlite writer). Returns a count of what was produced."""
73
+ produced: dict[str, str] = {}
74
+ with cf.ThreadPoolExecutor(max_workers=workers) as ex:
75
+ futs = {ex.submit(summarize_one, config, r, model): r for r in rels}
76
+ for fut in cf.as_completed(futs):
77
+ summary = fut.result()
78
+ if summary:
79
+ produced[futs[fut]] = summary
80
+ for rel, summary in produced.items():
81
+ summaries.record(conn, rel, summary, model=model)
82
+ return {"requested": len(rels), "summarized": len(produced)}
83
+
84
+
85
+ def main(): # `cerebro-summarize` entry point
86
+ import argparse
87
+ import json
88
+
89
+ ap = argparse.ArgumentParser(description="Pre-generate Cerebro summaries via claude -p")
90
+ ap.add_argument("--limit", type=int, default=20, help="max files to summarize")
91
+ ap.add_argument("--model", default=DEFAULT_MODEL)
92
+ ap.add_argument("--prefix", default=None, help="only files under this path prefix")
93
+ ap.add_argument("--workers", type=int, default=4)
94
+ args = ap.parse_args()
95
+
96
+ config = cfg.Config.load()
97
+ conn = db.connect(config.db_path)
98
+ rels = select_central_missing(conn, args.limit, args.prefix)
99
+ if not rels:
100
+ print(json.dumps({"summarized": 0, "note": "nothing missing in scope"}))
101
+ return
102
+ result = run(config, conn, rels, model=args.model, workers=args.workers)
103
+ result["model"] = args.model
104
+ result["root"] = str(config.root)
105
+ print(json.dumps(result, indent=2))
106
+
107
+
108
+ if __name__ == "__main__":
109
+ main()
cerebro/tsconfig.py ADDED
@@ -0,0 +1,159 @@
1
+ """tsconfig / jsconfig path-alias resolution.
2
+
3
+ Next.js and NestJS projects import via aliases like `@/components/Button` instead
4
+ of relative paths. Those are declared in `compilerOptions.paths` (relative to
5
+ `baseUrl`). Without expanding them, the dependency graph misses most edges in
6
+ alias-heavy frontends. This module parses those configs (tolerating JSONC and a
7
+ single level of `extends`) and expands an aliased import to candidate repo-relative
8
+ module paths. The indexer then resolves a candidate to a real file.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import posixpath
14
+ import re
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+
18
+
19
+ @dataclass
20
+ class AliasConfig:
21
+ dir: str # posix dir of the config, relative to root ("" == root)
22
+ base_url: str # posix dir that `paths` resolve from, relative to root
23
+ patterns: dict[str, list[str]] # e.g. {"@/*": ["./*"]}
24
+
25
+
26
+ def _strip_comments(text: str) -> str:
27
+ """Remove // and /* */ comments while respecting string literals, so glob
28
+ patterns like "@/*" and "**/*.ts" (which contain /* and */) are not mistaken
29
+ for comment delimiters."""
30
+ out = []
31
+ i, n = 0, len(text)
32
+ in_str = False
33
+ while i < n:
34
+ ch = text[i]
35
+ if in_str:
36
+ out.append(ch)
37
+ if ch == "\\" and i + 1 < n:
38
+ out.append(text[i + 1])
39
+ i += 2
40
+ continue
41
+ if ch == '"':
42
+ in_str = False
43
+ i += 1
44
+ continue
45
+ if ch == '"':
46
+ in_str = True
47
+ out.append(ch)
48
+ i += 1
49
+ continue
50
+ if ch == "/" and i + 1 < n and text[i + 1] == "/":
51
+ i += 2
52
+ while i < n and text[i] not in "\n\r":
53
+ i += 1
54
+ continue
55
+ if ch == "/" and i + 1 < n and text[i + 1] == "*":
56
+ i += 2
57
+ while i + 1 < n and not (text[i] == "*" and text[i + 1] == "/"):
58
+ i += 1
59
+ i += 2
60
+ continue
61
+ out.append(ch)
62
+ i += 1
63
+ return "".join(out)
64
+
65
+
66
+ def _loads_jsonc(text: str):
67
+ text = _strip_comments(text)
68
+ text = re.sub(r",(\s*[}\]])", r"\1", text) # trailing commas
69
+ return json.loads(text)
70
+
71
+
72
+ def _resolve_extends(base_dir: Path, ext: str) -> Path | None:
73
+ # Only follow path-like extends (./base, ../tsconfig.base.json); skip packages.
74
+ if not (ext.startswith(".") or "/" in ext):
75
+ return None
76
+ cand = base_dir / ext
77
+ if cand.suffix != ".json":
78
+ cand = base_dir / (ext + ".json")
79
+ return cand if cand.exists() else None
80
+
81
+
82
+ def _read_with_extends(abs_path: Path, seen: set) -> dict:
83
+ rp = abs_path.resolve()
84
+ if rp in seen:
85
+ return {}
86
+ seen.add(rp)
87
+ try:
88
+ data = _loads_jsonc(abs_path.read_text(encoding="utf-8", errors="ignore"))
89
+ except Exception:
90
+ return {}
91
+ co = data.get("compilerOptions") or {}
92
+ result = {"baseUrl": co.get("baseUrl"), "paths": co.get("paths")}
93
+ ext = data.get("extends")
94
+ if isinstance(ext, str) and (result["baseUrl"] is None or result["paths"] is None):
95
+ parent = _resolve_extends(abs_path.parent, ext)
96
+ if parent is not None:
97
+ pdata = _read_with_extends(parent, seen)
98
+ for key in ("baseUrl", "paths"):
99
+ if result[key] is None:
100
+ result[key] = pdata.get(key)
101
+ return result
102
+
103
+
104
+ def load_alias_configs(config) -> list[AliasConfig]:
105
+ """Scan the repo for tsconfig.json / jsconfig.json files that declare paths."""
106
+ out: list[AliasConfig] = []
107
+ for rel, abs_path in config.iter_files():
108
+ if posixpath.basename(rel) not in ("tsconfig.json", "jsconfig.json"):
109
+ continue
110
+ merged = _read_with_extends(abs_path, set())
111
+ raw_paths = merged.get("paths") or {}
112
+ if not raw_paths:
113
+ continue
114
+ patterns = {
115
+ k: (v if isinstance(v, list) else [v]) for k, v in raw_paths.items()
116
+ }
117
+ cfg_dir = posixpath.dirname(rel)
118
+ base_url = merged.get("baseUrl") or "."
119
+ base = posixpath.normpath(posixpath.join(cfg_dir, base_url))
120
+ out.append(AliasConfig(dir=cfg_dir, base_url=base, patterns=patterns))
121
+ return out
122
+
123
+
124
+ def _nearest(configs: list[AliasConfig], importer_rel: str) -> AliasConfig | None:
125
+ best = None
126
+ for c in configs:
127
+ prefix = (c.dir + "/") if c.dir else ""
128
+ if importer_rel.startswith(prefix):
129
+ if best is None or len(c.dir) > len(best.dir):
130
+ best = c
131
+ return best
132
+
133
+
134
+ def _match(pattern: str, name: str) -> str | None:
135
+ """Return the wildcard capture if `name` matches `pattern`, else None.
136
+ Exact (no-`*`) patterns return '' on an exact match."""
137
+ if "*" in pattern:
138
+ pre, post = pattern.split("*", 1)
139
+ if name.startswith(pre) and name.endswith(post) and len(name) >= len(pre) + len(post):
140
+ return name[len(pre): len(name) - len(post)] if post else name[len(pre):]
141
+ return None
142
+ return "" if name == pattern else None
143
+
144
+
145
+ def expand(import_str: str, importer_rel: str, configs: list[AliasConfig]) -> list[str]:
146
+ """Expand an aliased import to candidate repo-relative module paths (no
147
+ extension). The indexer resolves these against the known file set."""
148
+ cfg = _nearest(configs, importer_rel)
149
+ if cfg is None:
150
+ return []
151
+ out: list[str] = []
152
+ for pattern, targets in cfg.patterns.items():
153
+ cap = _match(pattern, import_str)
154
+ if cap is None:
155
+ continue
156
+ for t in targets:
157
+ sub = t.replace("*", cap, 1) if "*" in t else t
158
+ out.append(posixpath.normpath(posixpath.join(cfg.base_url, sub)))
159
+ return out
cerebro/views.py ADDED
@@ -0,0 +1,52 @@
1
+ """FastMCP-free renderers for the read tools.
2
+
3
+ The MCP SDK (FastMCP → pydantic/starlette/uvicorn) costs ~230ms to import and is
4
+ only needed to *serve* MCP. The CLI and the SessionStart hook just need the text
5
+ these tools produce, so the rendering logic lives here — importable without pulling
6
+ in the server module — and `server.py` delegates to it.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from . import db, graph, notes, summaries
11
+
12
+
13
+ def map_text(conn, root, top: int = 30) -> str:
14
+ total = conn.execute("SELECT COUNT(*) AS n FROM files").fetchone()["n"]
15
+ if total == 0:
16
+ return "Index is empty. Run cerebro_reindex() first to build the map."
17
+ langs = ", ".join(f"{r['lang']}:{r['n']}" for r in db.lang_counts(conn))
18
+ last = conn.execute("SELECT value FROM meta WHERE key='last_reindex'").fetchone()
19
+ lines = [
20
+ f"# Cerebro map — {root}",
21
+ f"{total} files | {langs} | last reindex: {last['value'] if last else 'n/a'}",
22
+ "",
23
+ f"## Top {top} modules by centrality (most depended-upon):",
24
+ ]
25
+ for path, score in graph.rank(conn, top=top):
26
+ s = summaries.get(conn, path)
27
+ note = ""
28
+ if s:
29
+ flag = " (STALE)" if s["stale"] else ""
30
+ note = f" — {s['summary_en'][:90]}{flag}"
31
+ lines.append(f" {score:.3f} {path}{note}")
32
+ no_summary = conn.execute(
33
+ "SELECT COUNT(*) AS n FROM files f "
34
+ "LEFT JOIN summaries s ON s.path=f.path WHERE s.path IS NULL"
35
+ ).fetchone()["n"]
36
+ lines.append("")
37
+ lines.append(
38
+ f"{no_summary} files have no summary yet. As you learn a file, call "
39
+ f"cerebro_record(path, summary) so future sessions skip re-reading it."
40
+ )
41
+ return "\n".join(lines)
42
+
43
+
44
+ def recall_text(conn, query: str = "", limit: int = 10) -> str:
45
+ rows = notes.recall(conn, query, limit=limit)
46
+ if not rows:
47
+ return "No notes recorded yet." if not query else f"No notes match '{query}'."
48
+ out = []
49
+ for r in rows:
50
+ head = f"#{r['id']}" + (f" [{r['topic']}]" if r["topic"] else "")
51
+ out.append(f"{head} ({r['created_at']})\n {r['content']}")
52
+ return "\n".join(out)