second-brain-graph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
second_brain/gate.py ADDED
@@ -0,0 +1,79 @@
1
+ """The anti-drift gate: refuses to call the brain "fine" while it is stale or broken.
2
+
3
+ Three signals, by severity:
4
+ - **broken** (error) — a reference points inside the project but the target is missing.
5
+ - **stale** (error) — files changed/added/removed since the last build (rebuild needed).
6
+ - **orphans** (info) — file nodes not connected by any import/reference edge (possibly
7
+ forgotten, but often legitimately standalone, so they never fail the gate).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass, field
13
+
14
+ from second_brain.freshness import diff_manifest
15
+ from second_brain.model import EdgeType, Graph
16
+
17
+
18
+ @dataclass
19
+ class GateReport:
20
+ broken: list[tuple[str, str]] = field(default_factory=list)
21
+ stale: dict[str, list[str]] = field(
22
+ default_factory=lambda: {"added": [], "removed": [], "changed": []}
23
+ )
24
+ orphans: list[str] = field(default_factory=list)
25
+
26
+ @property
27
+ def stale_count(self) -> int:
28
+ return sum(len(v) for v in self.stale.values())
29
+
30
+ @property
31
+ def ok(self) -> bool:
32
+ """True when there is nothing that must be fixed (broken/stale)."""
33
+ return not self.broken and self.stale_count == 0
34
+
35
+ def summary(self) -> str:
36
+ added, removed, changed = self.stale["added"], self.stale["removed"], self.stale["changed"]
37
+ lines = [
38
+ f"broken references: {len(self.broken)}",
39
+ f"stale files: {self.stale_count} (+{len(added)} / -{len(removed)} / ~{len(changed)})",
40
+ f"orphans: {len(self.orphans)} (info)",
41
+ ]
42
+ for src, tgt in self.broken[:20]:
43
+ lines.append(f" [broken] {src} -> {tgt}")
44
+ return "\n".join(lines)
45
+
46
+
47
+ def find_broken(graph: Graph) -> list[tuple[str, str]]:
48
+ out: list[tuple[str, str]] = []
49
+ for n in graph.nodes.values():
50
+ for t in n.meta.get("broken_refs", []):
51
+ out.append((n.id, t))
52
+ return sorted(out)
53
+
54
+
55
+ def find_orphans(graph: Graph) -> list[str]:
56
+ """File nodes with no import/reference edge (area membership does not count)."""
57
+ connected: set[str] = set()
58
+ for e in graph.edges:
59
+ if e.type in (EdgeType.IMPORTS, EdgeType.REFERENCES):
60
+ connected.add(e.source)
61
+ connected.add(e.target)
62
+ return sorted(
63
+ n.id for n in graph.nodes.values()
64
+ if n.path is not None and n.id not in connected
65
+ )
66
+
67
+
68
+ def evaluate(
69
+ graph: Graph,
70
+ old_manifest: dict[str, str] | None,
71
+ new_manifest: dict[str, str],
72
+ ) -> GateReport:
73
+ """Evaluate the gate. ``old_manifest`` None means "no baseline" -> no stale reported."""
74
+ stale = (
75
+ diff_manifest(old_manifest, new_manifest)
76
+ if old_manifest is not None
77
+ else {"added": [], "removed": [], "changed": []}
78
+ )
79
+ return GateReport(broken=find_broken(graph), stale=stale, orphans=find_orphans(graph))
second_brain/ignore.py ADDED
@@ -0,0 +1,76 @@
1
+ """What the indexer skips: derived/vendor directories, noise files, and user patterns.
2
+
3
+ A project may add a ``.secondbrainignore`` file at its root with one glob pattern per line
4
+ (``#`` comments allowed). Patterns are matched against the POSIX relative path.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import fnmatch
10
+ import os
11
+ from pathlib import Path
12
+
13
+ # Directory names never walked into.
14
+ DEFAULT_IGNORE_DIRS: frozenset[str] = frozenset(
15
+ {
16
+ ".git", ".hg", ".svn",
17
+ ".secondbrain",
18
+ "__pycache__", ".pytest_cache", ".ruff_cache", ".mypy_cache", ".cache",
19
+ ".venv", "venv", "env", "node_modules", "site-packages",
20
+ "dist", "build", ".eggs", ".tox",
21
+ ".idea", ".vscode",
22
+ "graphify-out",
23
+ }
24
+ )
25
+
26
+ # Exact file names skipped.
27
+ DEFAULT_IGNORE_FILES: frozenset[str] = frozenset(
28
+ {".DS_Store", "Thumbs.db", ".secondbrainignore", "package-lock.json", "poetry.lock",
29
+ "yarn.lock"}
30
+ )
31
+
32
+ # Binary / noise extensions skipped entirely (not useful as knowledge nodes).
33
+ DEFAULT_IGNORE_EXTS: frozenset[str] = frozenset(
34
+ {
35
+ ".pyc", ".pyo", ".pyd",
36
+ ".png", ".jpg", ".jpeg", ".gif", ".ico", ".svg", ".webp", ".bmp",
37
+ ".woff", ".woff2", ".ttf", ".eot",
38
+ ".zip", ".gz", ".tar", ".7z", ".rar",
39
+ ".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a",
40
+ ".lock",
41
+ }
42
+ )
43
+
44
+
45
+ def load_ignore_patterns(root: Path) -> list[str]:
46
+ """Read user glob patterns from ``<root>/.secondbrainignore`` (may be empty)."""
47
+ f = root / ".secondbrainignore"
48
+ if not f.is_file():
49
+ return []
50
+ out: list[str] = []
51
+ for line in f.read_text(encoding="utf-8", errors="ignore").splitlines():
52
+ s = line.strip()
53
+ if s and not s.startswith("#"):
54
+ out.append(s)
55
+ return out
56
+
57
+
58
+ def is_ignored_dir(name: str) -> bool:
59
+ return name in DEFAULT_IGNORE_DIRS or name.endswith(".egg-info")
60
+
61
+
62
+ def is_ignored_file(rel_posix: str, name: str, patterns: list[str]) -> bool:
63
+ """True if a file should be skipped (default names/extensions or a user pattern)."""
64
+ if name in DEFAULT_IGNORE_FILES:
65
+ return True
66
+ # splitext (not rsplit) so dotfiles like ".lock" are extensionless, consistent with the
67
+ # rest of the codebase (classify/indexer) and not mis-classified by their leading dot.
68
+ ext = os.path.splitext(name)[1].lower()
69
+ if ext in DEFAULT_IGNORE_EXTS:
70
+ return True
71
+ for pat in patterns:
72
+ # fnmatchcase (not fnmatch) so matching is case-sensitive and identical on Windows and
73
+ # POSIX - otherwise the same .secondbrainignore would index a different set per platform.
74
+ if fnmatch.fnmatchcase(rel_posix, pat) or fnmatch.fnmatchcase(name, pat):
75
+ return True
76
+ return False
@@ -0,0 +1,295 @@
1
+ """Build the project graph: typed file nodes, area clustering, and typed edges.
2
+
3
+ Edges produced in v1:
4
+ - ``imports`` — code -> code (Python via ``ast``, JS/TS relative specifiers best-effort)
5
+ - ``references`` — doc -> file (markdown link / wikilink / plain path-in-prose)
6
+ - ``belongs_to`` — file -> area (the top-level directory)
7
+
8
+ Reference resolution is conservative: a reference is recorded as *broken* only when it
9
+ points inside the project root and cannot be found, so the anti-drift gate stays trustworthy.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import posixpath
16
+ from pathlib import Path
17
+
18
+ from second_brain.classify import classify
19
+ from second_brain.ignore import (
20
+ DEFAULT_IGNORE_DIRS,
21
+ is_ignored_dir,
22
+ is_ignored_file,
23
+ load_ignore_patterns,
24
+ )
25
+ from second_brain.model import Edge, EdgeType, Graph, Node, NodeType
26
+ from second_brain.pycode import PyImport, js_imports, python_imports
27
+ from second_brain.references import extract_references_tagged
28
+
29
+ _TEXT_EXTS = {
30
+ ".md", ".markdown", ".rst", ".txt", ".py", ".js", ".ts", ".tsx", ".jsx", ".mjs", ".cjs",
31
+ ".toml", ".ini", ".cfg", ".conf", ".yaml", ".yml", ".json", ".jsonl", ".xml",
32
+ ".html", ".htm", ".css", ".sql", ".ps1", ".psm1", ".sh", ".bash", ".go", ".rs",
33
+ ".java", ".c", ".cc", ".cpp", ".h", ".hpp", ".rb", ".php", ".cs",
34
+ }
35
+ _JS_EXTS = {".js", ".ts", ".tsx", ".jsx", ".mjs", ".cjs"}
36
+ # Documentation references (links, wikilinks, path-in-prose) are extracted ONLY from
37
+ # documents. In source code, filename-looking strings are data, not references — scanning
38
+ # them produces noise, so we rely on import edges (ast) for code instead.
39
+ _DOC_REF_EXTS = {".md", ".markdown", ".rst", ".txt", ".html", ".htm"}
40
+ _JS_RESOLVE_ORDER = ("", ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs", "/index.ts", "/index.js")
41
+ _MAX_READ_BYTES = 5_000_000
42
+
43
+ AREA_ROOT = "(root)"
44
+
45
+
46
+ def _ext(name: str) -> str:
47
+ # os.path.splitext treats leading-dot names (".gitignore") as extensionless.
48
+ return os.path.splitext(name)[1].lower()
49
+
50
+
51
+ def iter_files(root: Path, patterns: list[str]) -> list[str]:
52
+ """Return sorted POSIX relative paths of indexable files under ``root``.
53
+
54
+ ``os.walk`` does not follow directory symlinks (loop-safe). An entry that cannot be
55
+ expressed relative to ``root`` (exotic symlink/junction) is skipped, never aborting.
56
+ """
57
+ rels: list[str] = []
58
+ for dirpath, dirnames, filenames in os.walk(root):
59
+ dirnames[:] = [d for d in dirnames if not is_ignored_dir(d)]
60
+ for fn in filenames:
61
+ try:
62
+ rel = (Path(dirpath) / fn).relative_to(root).as_posix()
63
+ except ValueError:
64
+ continue
65
+ if is_ignored_file(rel, fn, patterns):
66
+ continue
67
+ rels.append(rel)
68
+ return sorted(rels)
69
+
70
+
71
+ def _read_text(path: Path) -> str | None:
72
+ ext = _ext(path.name)
73
+ if ext and ext not in _TEXT_EXTS:
74
+ return None
75
+ try:
76
+ if path.stat().st_size > _MAX_READ_BYTES:
77
+ return None
78
+ return path.read_text(encoding="utf-8", errors="ignore")
79
+ except OSError:
80
+ return None
81
+
82
+
83
+ def _top_area(rel: str) -> str:
84
+ return rel.split("/", 1)[0] if "/" in rel else AREA_ROOT
85
+
86
+
87
+ def _python_module_map(py_files: list[str]) -> dict[str, str]:
88
+ """Map dotted module/package -> file rel path for internal import resolution.
89
+
90
+ A package (``__init__.py``) wins over a same-named module if both exist (matching
91
+ Python's own resolution), regardless of iteration order.
92
+ """
93
+ mods: dict[str, str] = {}
94
+ pkgs: dict[str, str] = {}
95
+ for rel in py_files:
96
+ parts = rel[:-3].split("/") # drop ".py"
97
+ if parts[-1] == "__init__":
98
+ pkg = ".".join(parts[:-1])
99
+ if pkg:
100
+ pkgs[pkg] = rel
101
+ else:
102
+ mods[".".join(parts)] = rel
103
+ return {**mods, **pkgs}
104
+
105
+
106
+ def _resolve_py(imp: PyImport, from_rel: str, module_map: dict[str, str]) -> list[str]:
107
+ """Resolve one Python import to internal file rel paths (may be empty)."""
108
+ mod_parts = from_rel[:-3].split("/")
109
+ pkg_parts = mod_parts[:-1] # package of the importing module
110
+ cands: list[str] = []
111
+ if imp.level == 0:
112
+ if not imp.module:
113
+ return []
114
+ cands.append(imp.module)
115
+ for n in imp.names:
116
+ cands.append(f"{imp.module}.{n}")
117
+ else:
118
+ keep = len(pkg_parts) - (imp.level - 1)
119
+ if keep < 0:
120
+ return [] # relative import reaches above the project root: not resolvable
121
+ base = pkg_parts[:keep]
122
+ if imp.module:
123
+ cand_parts = base + imp.module.split(".")
124
+ cands.append(".".join(cand_parts))
125
+ for n in imp.names:
126
+ cands.append(".".join(cand_parts + [n]))
127
+ else:
128
+ for n in imp.names:
129
+ cands.append(".".join(base + [n]))
130
+ out: list[str] = []
131
+ for c in cands:
132
+ tgt = module_map.get(c)
133
+ if tgt and tgt != from_rel and tgt not in out:
134
+ out.append(tgt)
135
+ return out
136
+
137
+
138
+ def _resolve_js(spec: str, from_rel: str, node_ids: set[str]) -> str | None:
139
+ base = posixpath.normpath(posixpath.join(posixpath.dirname(from_rel), spec))
140
+ for suffix in _JS_RESOLVE_ORDER:
141
+ cand = base + suffix
142
+ if cand in node_ids and cand != from_rel:
143
+ return cand
144
+ return None
145
+
146
+
147
+ def _is_external(target: str) -> bool:
148
+ """True for URLs, Windows drive-letter paths, and UNC paths (cleaned form uses '/')."""
149
+ if "://" in target:
150
+ return True
151
+ if len(target) >= 2 and target[1] == ":" and target[0].isalpha():
152
+ return True # e.g. C:/Users/...
153
+ return target.startswith("//") # UNC (\\server\share -> //server/share after cleaning)
154
+
155
+
156
+ def _resolve_ref(
157
+ target: str,
158
+ kind: str,
159
+ from_rel: str,
160
+ node_ids: set[str],
161
+ basename_index: dict[str, list[str]],
162
+ stem_index: dict[str, list[str]],
163
+ ) -> tuple[str | None, bool]:
164
+ """Resolve a documentation reference to a project file.
165
+
166
+ Returns ``(resolved_id, is_broken)``. A reference is *broken* only when it is an
167
+ intentional link/wikilink that targets a project-internal file which does not exist.
168
+ Plain prose mentions (``kind == "path"``) are never broken (often library names or
169
+ examples); external URLs/absolute paths and references into ignored dirs are skipped.
170
+ """
171
+ if not target:
172
+ return None, False
173
+ cand_file = posixpath.normpath(posixpath.join(posixpath.dirname(from_rel), target))
174
+ cand_root = posixpath.normpath(target.lstrip("/"))
175
+ for cand in (cand_file, cand_root):
176
+ if cand in node_ids:
177
+ return (cand if cand != from_rel else None), False
178
+ base = target.rsplit("/", 1)[-1]
179
+ stem = base.rsplit(".", 1)[0] if "." in base else base
180
+ for index, key in ((basename_index, base), (stem_index, stem)):
181
+ ids = [i for i in index.get(key, []) if i != from_rel]
182
+ if len(ids) == 1:
183
+ return ids[0], False
184
+
185
+ # Unresolved: decide whether it is genuinely a broken link.
186
+ if kind == "path" or _is_external(target):
187
+ return None, False
188
+ if cand_root.split("/", 1)[0] in DEFAULT_IGNORE_DIRS:
189
+ return None, False
190
+ if cand_root.startswith("../") or cand_file.startswith("../"):
191
+ return None, False # escapes the project root
192
+ if kind == "wikilink" and not ("/" in target or "." in base):
193
+ return None, False # bare [[Concept]] is a concept link, not a file
194
+ return None, True
195
+
196
+
197
+ def _describe(node: Node, area: str, inbound: int) -> str:
198
+ where = "root" if area == AREA_ROOT else f"area '{area}'"
199
+ tail = f" · {inbound} inbound" if inbound else ""
200
+ if node.type is NodeType.AREA:
201
+ return f"area '{node.label}'"
202
+ return f"{node.type.value} · {where}{tail}"
203
+
204
+
205
+ def build_graph(
206
+ root: str | os.PathLike[str],
207
+ *,
208
+ project: str | None = None,
209
+ _rels: list[str] | None = None,
210
+ ) -> Graph:
211
+ """Index the project at ``root`` and return its graph. Never modifies the project.
212
+
213
+ ``_rels`` lets a caller pass a precomputed file list to avoid walking the tree twice
214
+ (see :func:`second_brain.freshness.index`).
215
+ """
216
+ root_p = Path(root).resolve()
217
+ if not root_p.is_dir():
218
+ raise NotADirectoryError(f"not a directory: {root_p}")
219
+
220
+ rels = _rels if _rels is not None else iter_files(root_p, load_ignore_patterns(root_p))
221
+ g = Graph(project=project or root_p.name)
222
+
223
+ # 1. File nodes + areas.
224
+ areas: set[str] = set()
225
+ for rel in rels:
226
+ ntype = classify(rel)
227
+ label = rel.rsplit("/", 1)[-1]
228
+ node = Node(id=rel, type=ntype, label=label, path=rel)
229
+ try:
230
+ node.meta["size"] = (root_p / rel).stat().st_size
231
+ except OSError:
232
+ pass
233
+ g.add_node(node)
234
+ areas.add(_top_area(rel))
235
+ for area in sorted(areas):
236
+ g.add_node(Node(id=f"area:{area}", type=NodeType.AREA, label=area))
237
+ for rel in rels:
238
+ g.add_edge(Edge(rel, f"area:{_top_area(rel)}", EdgeType.BELONGS_TO))
239
+
240
+ # 2. Indexes for resolution.
241
+ node_ids = set(g.nodes.keys())
242
+ basename_index: dict[str, list[str]] = {}
243
+ stem_index: dict[str, list[str]] = {}
244
+ for rel in rels:
245
+ base = rel.rsplit("/", 1)[-1]
246
+ basename_index.setdefault(base, []).append(rel)
247
+ stem = base.rsplit(".", 1)[0] if "." in base else base
248
+ stem_index.setdefault(stem, []).append(rel)
249
+ py_files = [r for r in rels if _ext(r) == ".py"]
250
+ module_map = _python_module_map(py_files)
251
+
252
+ # 3. Edges from file contents. Only code (imports) and docs (references) are ever READ;
253
+ # data/config/binaries are never opened - the graph needs only their type/size/area.
254
+ # This is what keeps indexing light on data-heavy projects (no reading huge JSON/CSV/logs).
255
+ for rel in rels:
256
+ ext = _ext(rel)
257
+ is_code = ext == ".py" or ext in _JS_EXTS
258
+ is_doc = ext in _DOC_REF_EXTS
259
+ if not (is_code or is_doc):
260
+ continue
261
+ text = _read_text(root_p / rel)
262
+ if text is None:
263
+ continue
264
+ if ext == ".py":
265
+ for imp in python_imports(text):
266
+ for tgt in _resolve_py(imp, rel, module_map):
267
+ g.add_edge(Edge(rel, tgt, EdgeType.IMPORTS))
268
+ elif ext in _JS_EXTS:
269
+ for spec in js_imports(text):
270
+ tgt = _resolve_js(spec, rel, node_ids)
271
+ if tgt:
272
+ g.add_edge(Edge(rel, tgt, EdgeType.IMPORTS))
273
+ if ext in _DOC_REF_EXTS:
274
+ broken: list[str] = []
275
+ for target, kind in extract_references_tagged(text):
276
+ resolved, is_broken = _resolve_ref(
277
+ target, kind, rel, node_ids, basename_index, stem_index
278
+ )
279
+ if resolved:
280
+ g.add_edge(Edge(rel, resolved, EdgeType.REFERENCES))
281
+ elif is_broken:
282
+ broken.append(target)
283
+ if broken:
284
+ g.nodes[rel].meta["broken_refs"] = broken
285
+
286
+ # 4. Descriptions (after edges, so we can include inbound counts).
287
+ inbound: dict[str, int] = {}
288
+ for e in g.edges:
289
+ if e.type in (EdgeType.REFERENCES, EdgeType.IMPORTS):
290
+ inbound[e.target] = inbound.get(e.target, 0) + 1
291
+ for node in g.nodes.values():
292
+ area = _top_area(node.path) if node.path else AREA_ROOT
293
+ node.description = _describe(node, area, inbound.get(node.id, 0))
294
+
295
+ return g
@@ -0,0 +1,91 @@
1
+ """Optional MCP server exposing Second Brain\'s low-token queries to AI assistants.
2
+
3
+ This is the piece that lets an assistant *query* the project instead of re-reading it. It is
4
+ an OPTIONAL extra so the core stays dependency-free:
5
+
6
+ pip install second-brain[mcp]
7
+ second-brain-mcp [PROJECT_PATH] # defaults to the current directory
8
+
9
+ Read-only on your sources. Exposes a handful of small, budgeted tools (map / find /
10
+ neighbors / subgraph / health) over stdio.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ import sys
17
+ from typing import Any
18
+
19
+ from second_brain import gate, query, store
20
+ from second_brain.freshness import build_manifest, index
21
+ from second_brain.model import Graph
22
+
23
+ try: # pragma: no cover - import-guard
24
+ from mcp.server.fastmcp import FastMCP
25
+ except ImportError:
26
+ # Stay importable without the optional extra (linters, autodoc, test collection): keep
27
+ # FastMCP as None and fail with a friendly message only when the server is actually run.
28
+ FastMCP = None
29
+
30
+ _NO_MCP = "The MCP server needs the optional 'mcp' extra: pip install second-brain[mcp]"
31
+
32
+
33
+ def _graph(project: str) -> Graph:
34
+ return store.load_graph(project) or index(project)[0]
35
+
36
+
37
+ def build_server(project: str):
38
+ """Construct the FastMCP server bound to ``project`` (no I/O until a tool is called)."""
39
+ if FastMCP is None: # pragma: no cover - exercised only without the extra installed
40
+ raise ImportError(_NO_MCP)
41
+ server = FastMCP("second-brain")
42
+
43
+ @server.tool()
44
+ def project_map() -> dict[str, Any]:
45
+ """Compact project digest: areas with file counts/sizes/types, type and edge
46
+ tallies, the most-connected files, and orphan/broken counts. Cheap to load first."""
47
+ return query.project_map(_graph(project))
48
+
49
+ @server.tool()
50
+ def find(text: str) -> list[dict[str, Any]]:
51
+ """Find files/nodes whose name or path contains ``text`` (case-insensitive)."""
52
+ return query.find(_graph(project), text)
53
+
54
+ @server.tool()
55
+ def neighbors(node_id: str) -> dict[str, Any]:
56
+ """A node and its incoming/outgoing connections (imports, references, area membership)."""
57
+ # Distinct error for an unknown node, so an assistant can tell "no edges" from "no node".
58
+ res = query.neighbors(_graph(project), node_id)
59
+ return res if res is not None else {"error": "node not found", "id": node_id}
60
+
61
+ @server.tool()
62
+ def subgraph(node_id: str, hops: int = 1) -> dict[str, Any]:
63
+ """A small subgraph (nodes + edges) around ``node_id`` within ``hops``."""
64
+ return query.subgraph(_graph(project), node_id, hops=hops)
65
+
66
+ @server.tool()
67
+ def health() -> dict[str, Any]:
68
+ """Anti-drift status: broken references, stale files vs the last build, orphan count."""
69
+ g = store.load_graph(project)
70
+ old = store.load_manifest(project)
71
+ if g is None or old is None:
72
+ return {"status": "no-baseline", "hint": "run 'second-brain build' first"}
73
+ rep = gate.evaluate(g, old, build_manifest(project))
74
+ return {"ok": rep.ok, "broken": rep.broken, "stale": rep.stale,
75
+ "orphans": len(rep.orphans)}
76
+
77
+ return server
78
+
79
+
80
+ def main(argv: list[str] | None = None) -> int:
81
+ args = sys.argv[1:] if argv is None else argv
82
+ project = args[0] if args else os.getcwd()
83
+ if FastMCP is None:
84
+ print(_NO_MCP, file=sys.stderr)
85
+ return 2
86
+ build_server(project).run()
87
+ return 0
88
+
89
+
90
+ if __name__ == "__main__": # pragma: no cover
91
+ raise SystemExit(main())