cerebro-code-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cerebro/__init__.py +3 -0
- cerebro/callgraph.py +38 -0
- cerebro/cli.py +348 -0
- cerebro/config.py +136 -0
- cerebro/db.py +245 -0
- cerebro/docaudit.py +174 -0
- cerebro/embeddings.py +175 -0
- cerebro/gitsync.py +124 -0
- cerebro/graph.py +77 -0
- cerebro/indexer.py +854 -0
- cerebro/insights.py +217 -0
- cerebro/notes.py +70 -0
- cerebro/server.py +382 -0
- cerebro/summaries.py +66 -0
- cerebro/summarizer.py +109 -0
- cerebro/tsconfig.py +159 -0
- cerebro/views.py +52 -0
- cerebro/viz.py +374 -0
- cerebro_code_memory-0.1.0.dist-info/METADATA +160 -0
- cerebro_code_memory-0.1.0.dist-info/RECORD +23 -0
- cerebro_code_memory-0.1.0.dist-info/WHEEL +4 -0
- cerebro_code_memory-0.1.0.dist-info/entry_points.txt +11 -0
- cerebro_code_memory-0.1.0.dist-info/licenses/LICENSE +21 -0
cerebro/gitsync.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Git-aware freshness.
|
|
2
|
+
|
|
3
|
+
The post-edit hook only catches edits made *through* Claude Code. Branch switches,
|
|
4
|
+
`git pull`, rebases, and edits in the raw editor go unnoticed until a manual reindex.
|
|
5
|
+
This diffs git state since the last sync and incrementally reindexes only the changed
|
|
6
|
+
files. It handles a single repo at the root OR several nested repos (e.g. a folder of
|
|
7
|
+
sub-app repos, like a Fenix-style multi-repo workspace).
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import subprocess
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from . import indexer
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _git(repo_abs: Path, *args: str, timeout: int = 15) -> str | None:
|
|
19
|
+
try:
|
|
20
|
+
out = subprocess.run(
|
|
21
|
+
["git", "-C", str(repo_abs), *args],
|
|
22
|
+
capture_output=True,
|
|
23
|
+
text=True,
|
|
24
|
+
timeout=timeout,
|
|
25
|
+
)
|
|
26
|
+
if out.returncode == 0:
|
|
27
|
+
return out.stdout.strip()
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def find_git_repos(config, max_depth: int = 3) -> list[str]:
|
|
34
|
+
"""Repo dirs relative to root. Returns [''] when the root itself is a repo."""
|
|
35
|
+
root = config.root
|
|
36
|
+
if (root / ".git").exists():
|
|
37
|
+
return [""]
|
|
38
|
+
repos: list[str] = []
|
|
39
|
+
for dirpath, dirnames, _files in os.walk(root):
|
|
40
|
+
d = Path(dirpath)
|
|
41
|
+
depth = len(d.relative_to(root).parts)
|
|
42
|
+
if depth >= max_depth:
|
|
43
|
+
dirnames[:] = []
|
|
44
|
+
continue
|
|
45
|
+
dirnames[:] = [dn for dn in dirnames if not config.is_ignored(d / dn)]
|
|
46
|
+
if (d / ".git").exists():
|
|
47
|
+
repos.append(d.relative_to(root).as_posix())
|
|
48
|
+
dirnames[:] = [] # a repo is one unit — don't descend for nested repos
|
|
49
|
+
return repos
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _porcelain_paths(status: str) -> set[str]:
|
|
53
|
+
paths = set()
|
|
54
|
+
for line in status.splitlines():
|
|
55
|
+
path = line[3:]
|
|
56
|
+
if " -> " in path: # rename: "old -> new"
|
|
57
|
+
path = path.split(" -> ", 1)[1]
|
|
58
|
+
path = path.strip().strip('"')
|
|
59
|
+
if path:
|
|
60
|
+
paths.add(path)
|
|
61
|
+
return paths
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def sync(config, conn) -> dict:
|
|
65
|
+
"""Reindex files changed via git since the last sync, across all repos under
|
|
66
|
+
the root. First run just records each repo's HEAD as a baseline."""
|
|
67
|
+
repos = find_git_repos(config)
|
|
68
|
+
if not repos:
|
|
69
|
+
return {"git": False, "changed": 0}
|
|
70
|
+
|
|
71
|
+
changed_root_rel: set[str] = set()
|
|
72
|
+
new_heads: dict[str, str] = {}
|
|
73
|
+
for repo_rel in repos:
|
|
74
|
+
repo_abs = config.root / repo_rel if repo_rel else config.root
|
|
75
|
+
head = _git(repo_abs, "rev-parse", "HEAD")
|
|
76
|
+
if head is None:
|
|
77
|
+
continue
|
|
78
|
+
key = f"git_head:{repo_rel}"
|
|
79
|
+
row = conn.execute("SELECT value FROM meta WHERE key=?", (key,)).fetchone()
|
|
80
|
+
old = row["value"] if row else None
|
|
81
|
+
|
|
82
|
+
files: set[str] = set()
|
|
83
|
+
if old and old != head:
|
|
84
|
+
diff = _git(repo_abs, "diff", "--name-only", old, head)
|
|
85
|
+
if diff:
|
|
86
|
+
files.update(diff.splitlines())
|
|
87
|
+
status = _git(repo_abs, "status", "--porcelain")
|
|
88
|
+
if status:
|
|
89
|
+
files.update(_porcelain_paths(status))
|
|
90
|
+
|
|
91
|
+
for f in files:
|
|
92
|
+
rel = f"{repo_rel}/{f}" if repo_rel else f
|
|
93
|
+
if not config.is_ignored(config.root / rel):
|
|
94
|
+
changed_root_rel.add(rel)
|
|
95
|
+
new_heads[key] = head
|
|
96
|
+
|
|
97
|
+
rels = sorted(changed_root_rel)
|
|
98
|
+
if rels:
|
|
99
|
+
indexer.reindex_paths(config, conn, rels)
|
|
100
|
+
for key, head in new_heads.items():
|
|
101
|
+
conn.execute(
|
|
102
|
+
"INSERT INTO meta(key,value) VALUES(?,?) "
|
|
103
|
+
"ON CONFLICT(key) DO UPDATE SET value=excluded.value",
|
|
104
|
+
(key, head),
|
|
105
|
+
)
|
|
106
|
+
conn.commit()
|
|
107
|
+
return {"git": True, "repos": len(new_heads), "changed": len(rels)}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def main(): # `cerebro-sync` entry point
|
|
111
|
+
import json
|
|
112
|
+
|
|
113
|
+
from . import config as cfg
|
|
114
|
+
from . import db
|
|
115
|
+
|
|
116
|
+
config = cfg.Config.load()
|
|
117
|
+
conn = db.connect(config.db_path)
|
|
118
|
+
result = sync(config, conn)
|
|
119
|
+
result["root"] = str(config.root)
|
|
120
|
+
print(json.dumps(result))
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
if __name__ == "__main__":
|
|
124
|
+
main()
|
cerebro/graph.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Dependency-graph analysis over the `edges` table.
|
|
2
|
+
|
|
3
|
+
An edge src -> dst means "src imports dst". PageRank therefore scores
|
|
4
|
+
widely-imported files (shared utilities, core modules) highest, which is what we
|
|
5
|
+
want to surface first in the map. NOT shortest-path / Dijkstra — code knowledge
|
|
6
|
+
is a relevance problem, not a routing one.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import networkx as nx
|
|
11
|
+
|
|
12
|
+
from . import db
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def build_graph(conn, exclude_kinds: tuple[str, ...] = ()) -> nx.DiGraph:
|
|
16
|
+
"""Build the import digraph. Pass exclude_kinds=('type',) to drop edges that
|
|
17
|
+
are elided at runtime (TS type-only imports) — e.g. for cycle detection, where
|
|
18
|
+
a type-only cycle is cosmetic, not a real startup-order problem."""
|
|
19
|
+
g = nx.DiGraph()
|
|
20
|
+
for row in conn.execute("SELECT path FROM files"):
|
|
21
|
+
g.add_node(row["path"])
|
|
22
|
+
if exclude_kinds:
|
|
23
|
+
ph = ",".join("?" * len(exclude_kinds))
|
|
24
|
+
rows = conn.execute(
|
|
25
|
+
f"SELECT src_path, dst_path FROM edges WHERE kind NOT IN ({ph})", exclude_kinds
|
|
26
|
+
)
|
|
27
|
+
else:
|
|
28
|
+
rows = conn.execute("SELECT src_path, dst_path FROM edges")
|
|
29
|
+
for row in rows:
|
|
30
|
+
g.add_edge(row["src_path"], row["dst_path"])
|
|
31
|
+
return g
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _pagerank(g, damping: float = 0.85, iters: int = 50, tol: float = 1e-6):
|
|
35
|
+
"""Pure-Python power-iteration PageRank.
|
|
36
|
+
|
|
37
|
+
Kept dependency-light on purpose: networkx's own pagerank requires scipy,
|
|
38
|
+
which would be a heavy add for ~15 lines of standard math. Dangling nodes
|
|
39
|
+
(no outgoing edges) redistribute their rank uniformly.
|
|
40
|
+
"""
|
|
41
|
+
nodes = list(g.nodes)
|
|
42
|
+
n = len(nodes)
|
|
43
|
+
if n == 0:
|
|
44
|
+
return {}
|
|
45
|
+
out_deg = {v: g.out_degree(v) for v in nodes}
|
|
46
|
+
rank = {v: 1.0 / n for v in nodes}
|
|
47
|
+
for _ in range(iters):
|
|
48
|
+
dangling = sum(rank[v] for v in nodes if out_deg[v] == 0)
|
|
49
|
+
nxt = {}
|
|
50
|
+
for v in nodes:
|
|
51
|
+
inflow = sum(rank[u] / out_deg[u] for u in g.predecessors(v) if out_deg[u])
|
|
52
|
+
nxt[v] = (1 - damping) / n + damping * (inflow + dangling / n)
|
|
53
|
+
delta = sum(abs(nxt[v] - rank[v]) for v in nodes)
|
|
54
|
+
rank = nxt
|
|
55
|
+
if delta < tol:
|
|
56
|
+
break
|
|
57
|
+
return rank
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def rank(conn, top: int | None = None):
|
|
61
|
+
"""Return [(path, score), ...] ordered by importance (descending)."""
|
|
62
|
+
g = build_graph(conn)
|
|
63
|
+
scores = _pagerank(g)
|
|
64
|
+
ranked = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
|
|
65
|
+
return ranked[:top] if top else ranked
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def dependents(conn, path: str) -> list[str]:
|
|
69
|
+
"""Files that import `path` (impact analysis: change `path`, these may break)."""
|
|
70
|
+
g = build_graph(conn)
|
|
71
|
+
return sorted(g.predecessors(path)) if path in g else []
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def dependencies(conn, path: str) -> list[str]:
|
|
75
|
+
"""Files that `path` imports."""
|
|
76
|
+
g = build_graph(conn)
|
|
77
|
+
return sorted(g.successors(path)) if path in g else []
|