cerebro-code-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cerebro/gitsync.py ADDED
@@ -0,0 +1,124 @@
1
+ """Git-aware freshness.
2
+
3
+ The post-edit hook only catches edits made *through* Claude Code. Branch switches,
4
+ `git pull`, rebases, and edits in the raw editor go unnoticed until a manual reindex.
5
+ This diffs git state since the last sync and incrementally reindexes only the changed
6
+ files. It handles a single repo at the root OR several nested repos (e.g. a folder of
7
+ sub-app repos, like a Fenix-style multi-repo workspace).
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import subprocess
13
+ from pathlib import Path
14
+
15
+ from . import indexer
16
+
17
+
18
+ def _git(repo_abs: Path, *args: str, timeout: int = 15) -> str | None:
19
+ try:
20
+ out = subprocess.run(
21
+ ["git", "-C", str(repo_abs), *args],
22
+ capture_output=True,
23
+ text=True,
24
+ timeout=timeout,
25
+ )
26
+ if out.returncode == 0:
27
+ return out.stdout.strip()
28
+ except Exception:
29
+ pass
30
+ return None
31
+
32
+
33
+ def find_git_repos(config, max_depth: int = 3) -> list[str]:
34
+ """Repo dirs relative to root. Returns [''] when the root itself is a repo."""
35
+ root = config.root
36
+ if (root / ".git").exists():
37
+ return [""]
38
+ repos: list[str] = []
39
+ for dirpath, dirnames, _files in os.walk(root):
40
+ d = Path(dirpath)
41
+ depth = len(d.relative_to(root).parts)
42
+ if depth >= max_depth:
43
+ dirnames[:] = []
44
+ continue
45
+ dirnames[:] = [dn for dn in dirnames if not config.is_ignored(d / dn)]
46
+ if (d / ".git").exists():
47
+ repos.append(d.relative_to(root).as_posix())
48
+ dirnames[:] = [] # a repo is one unit — don't descend for nested repos
49
+ return repos
50
+
51
+
52
+ def _porcelain_paths(status: str) -> set[str]:
53
+ paths = set()
54
+ for line in status.splitlines():
55
+ path = line[3:]
56
+ if " -> " in path: # rename: "old -> new"
57
+ path = path.split(" -> ", 1)[1]
58
+ path = path.strip().strip('"')
59
+ if path:
60
+ paths.add(path)
61
+ return paths
62
+
63
+
64
+ def sync(config, conn) -> dict:
65
+ """Reindex files changed via git since the last sync, across all repos under
66
+ the root. First run just records each repo's HEAD as a baseline."""
67
+ repos = find_git_repos(config)
68
+ if not repos:
69
+ return {"git": False, "changed": 0}
70
+
71
+ changed_root_rel: set[str] = set()
72
+ new_heads: dict[str, str] = {}
73
+ for repo_rel in repos:
74
+ repo_abs = config.root / repo_rel if repo_rel else config.root
75
+ head = _git(repo_abs, "rev-parse", "HEAD")
76
+ if head is None:
77
+ continue
78
+ key = f"git_head:{repo_rel}"
79
+ row = conn.execute("SELECT value FROM meta WHERE key=?", (key,)).fetchone()
80
+ old = row["value"] if row else None
81
+
82
+ files: set[str] = set()
83
+ if old and old != head:
84
+ diff = _git(repo_abs, "diff", "--name-only", old, head)
85
+ if diff:
86
+ files.update(diff.splitlines())
87
+ status = _git(repo_abs, "status", "--porcelain")
88
+ if status:
89
+ files.update(_porcelain_paths(status))
90
+
91
+ for f in files:
92
+ rel = f"{repo_rel}/{f}" if repo_rel else f
93
+ if not config.is_ignored(config.root / rel):
94
+ changed_root_rel.add(rel)
95
+ new_heads[key] = head
96
+
97
+ rels = sorted(changed_root_rel)
98
+ if rels:
99
+ indexer.reindex_paths(config, conn, rels)
100
+ for key, head in new_heads.items():
101
+ conn.execute(
102
+ "INSERT INTO meta(key,value) VALUES(?,?) "
103
+ "ON CONFLICT(key) DO UPDATE SET value=excluded.value",
104
+ (key, head),
105
+ )
106
+ conn.commit()
107
+ return {"git": True, "repos": len(new_heads), "changed": len(rels)}
108
+
109
+
110
+ def main(): # `cerebro-sync` entry point
111
+ import json
112
+
113
+ from . import config as cfg
114
+ from . import db
115
+
116
+ config = cfg.Config.load()
117
+ conn = db.connect(config.db_path)
118
+ result = sync(config, conn)
119
+ result["root"] = str(config.root)
120
+ print(json.dumps(result))
121
+
122
+
123
+ if __name__ == "__main__":
124
+ main()
cerebro/graph.py ADDED
@@ -0,0 +1,77 @@
1
+ """Dependency-graph analysis over the `edges` table.
2
+
3
+ An edge src -> dst means "src imports dst". PageRank therefore scores
4
+ widely-imported files (shared utilities, core modules) highest, which is what we
5
+ want to surface first in the map. NOT shortest-path / Dijkstra — code knowledge
6
+ is a relevance problem, not a routing one.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import networkx as nx
11
+
12
+ from . import db
13
+
14
+
15
+ def build_graph(conn, exclude_kinds: tuple[str, ...] = ()) -> nx.DiGraph:
16
+ """Build the import digraph. Pass exclude_kinds=('type',) to drop edges that
17
+ are elided at runtime (TS type-only imports) — e.g. for cycle detection, where
18
+ a type-only cycle is cosmetic, not a real startup-order problem."""
19
+ g = nx.DiGraph()
20
+ for row in conn.execute("SELECT path FROM files"):
21
+ g.add_node(row["path"])
22
+ if exclude_kinds:
23
+ ph = ",".join("?" * len(exclude_kinds))
24
+ rows = conn.execute(
25
+ f"SELECT src_path, dst_path FROM edges WHERE kind NOT IN ({ph})", exclude_kinds
26
+ )
27
+ else:
28
+ rows = conn.execute("SELECT src_path, dst_path FROM edges")
29
+ for row in rows:
30
+ g.add_edge(row["src_path"], row["dst_path"])
31
+ return g
32
+
33
+
34
+ def _pagerank(g, damping: float = 0.85, iters: int = 50, tol: float = 1e-6):
35
+ """Pure-Python power-iteration PageRank.
36
+
37
+ Kept dependency-light on purpose: networkx's own pagerank requires scipy,
38
+ which would be a heavy add for ~15 lines of standard math. Dangling nodes
39
+ (no outgoing edges) redistribute their rank uniformly.
40
+ """
41
+ nodes = list(g.nodes)
42
+ n = len(nodes)
43
+ if n == 0:
44
+ return {}
45
+ out_deg = {v: g.out_degree(v) for v in nodes}
46
+ rank = {v: 1.0 / n for v in nodes}
47
+ for _ in range(iters):
48
+ dangling = sum(rank[v] for v in nodes if out_deg[v] == 0)
49
+ nxt = {}
50
+ for v in nodes:
51
+ inflow = sum(rank[u] / out_deg[u] for u in g.predecessors(v) if out_deg[u])
52
+ nxt[v] = (1 - damping) / n + damping * (inflow + dangling / n)
53
+ delta = sum(abs(nxt[v] - rank[v]) for v in nodes)
54
+ rank = nxt
55
+ if delta < tol:
56
+ break
57
+ return rank
58
+
59
+
60
+ def rank(conn, top: int | None = None):
61
+ """Return [(path, score), ...] ordered by importance (descending)."""
62
+ g = build_graph(conn)
63
+ scores = _pagerank(g)
64
+ ranked = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
65
+ return ranked[:top] if top else ranked
66
+
67
+
68
+ def dependents(conn, path: str) -> list[str]:
69
+ """Files that import `path` (impact analysis: change `path`, these may break)."""
70
+ g = build_graph(conn)
71
+ return sorted(g.predecessors(path)) if path in g else []
72
+
73
+
74
+ def dependencies(conn, path: str) -> list[str]:
75
+ """Files that `path` imports."""
76
+ g = build_graph(conn)
77
+ return sorted(g.successors(path)) if path in g else []