PyPI - second-brain-graph - Versions diffs - 0.1.0__py3-none-any.whl - Mend

second-brain-graph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

second_brain/__init__.py +21 -0
second_brain/__main__.py +8 -0
second_brain/assess.py +211 -0
second_brain/classify.py +87 -0
second_brain/cli.py +185 -0
second_brain/freshness.py +121 -0
second_brain/gate.py +79 -0
second_brain/ignore.py +76 -0
second_brain/indexer.py +295 -0
second_brain/mcp_server.py +91 -0
second_brain/model.py +213 -0
second_brain/operational.py +103 -0
second_brain/py.typed +0 -0
second_brain/pycode.py +61 -0
second_brain/query.py +177 -0
second_brain/references.py +80 -0
second_brain/store.py +66 -0
second_brain/ui/3d-force-graph.min.js +5 -0
second_brain/ui/template.html +341 -0
second_brain/viewer.py +57 -0
second_brain_graph-0.1.0.dist-info/METADATA +258 -0
second_brain_graph-0.1.0.dist-info/RECORD +26 -0
second_brain_graph-0.1.0.dist-info/WHEEL +5 -0
second_brain_graph-0.1.0.dist-info/entry_points.txt +3 -0
second_brain_graph-0.1.0.dist-info/licenses/LICENSE +21 -0
second_brain_graph-0.1.0.dist-info/top_level.txt +1 -0

second_brain/gate.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""The anti-drift gate: refuses to call the brain "fine" while it is stale or broken.
+Three signals, by severity:
+- **broken** (error)   — a reference points inside the project but the target is missing.
+- **stale**  (error)   — files changed/added/removed since the last build (rebuild needed).
+- **orphans** (info)    — file nodes not connected by any import/reference edge (possibly
+  forgotten, but often legitimately standalone, so they never fail the gate).
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from second_brain.freshness import diff_manifest
+from second_brain.model import EdgeType, Graph
+@dataclass
+class GateReport:
+    broken: list[tuple[str, str]] = field(default_factory=list)
+    stale: dict[str, list[str]] = field(
+        default_factory=lambda: {"added": [], "removed": [], "changed": []}
+    )
+    orphans: list[str] = field(default_factory=list)
+    @property
+    def stale_count(self) -> int:
+        return sum(len(v) for v in self.stale.values())
+    @property
+    def ok(self) -> bool:
+        """True when there is nothing that must be fixed (broken/stale)."""
+        return not self.broken and self.stale_count == 0
+    def summary(self) -> str:
+        added, removed, changed = self.stale["added"], self.stale["removed"], self.stale["changed"]
+        lines = [
+            f"broken references: {len(self.broken)}",
+            f"stale files: {self.stale_count} (+{len(added)} / -{len(removed)} / ~{len(changed)})",
+            f"orphans: {len(self.orphans)} (info)",
+        ]
+        for src, tgt in self.broken[:20]:
+            lines.append(f"  [broken] {src} -> {tgt}")
+        return "\n".join(lines)
+def find_broken(graph: Graph) -> list[tuple[str, str]]:
+    out: list[tuple[str, str]] = []
+    for n in graph.nodes.values():
+        for t in n.meta.get("broken_refs", []):
+            out.append((n.id, t))
+    return sorted(out)
+def find_orphans(graph: Graph) -> list[str]:
+    """File nodes with no import/reference edge (area membership does not count)."""
+    connected: set[str] = set()
+    for e in graph.edges:
+        if e.type in (EdgeType.IMPORTS, EdgeType.REFERENCES):
+            connected.add(e.source)
+            connected.add(e.target)
+    return sorted(
+        n.id for n in graph.nodes.values()
+        if n.path is not None and n.id not in connected
+    )
+def evaluate(
+    graph: Graph,
+    old_manifest: dict[str, str] | None,
+    new_manifest: dict[str, str],
+) -> GateReport:
+    """Evaluate the gate. ``old_manifest`` None means "no baseline" -> no stale reported."""
+    stale = (
+        diff_manifest(old_manifest, new_manifest)
+        if old_manifest is not None
+        else {"added": [], "removed": [], "changed": []}
+    )
+    return GateReport(broken=find_broken(graph), stale=stale, orphans=find_orphans(graph))

second_brain/ignore.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""What the indexer skips: derived/vendor directories, noise files, and user patterns.
+A project may add a ``.secondbrainignore`` file at its root with one glob pattern per line
+(``#`` comments allowed). Patterns are matched against the POSIX relative path.
+"""
+from __future__ import annotations
+import fnmatch
+import os
+from pathlib import Path
+# Directory names never walked into.
+DEFAULT_IGNORE_DIRS: frozenset[str] = frozenset(
+    {
+        ".git", ".hg", ".svn",
+        ".secondbrain",
+        "__pycache__", ".pytest_cache", ".ruff_cache", ".mypy_cache", ".cache",
+        ".venv", "venv", "env", "node_modules", "site-packages",
+        "dist", "build", ".eggs", ".tox",
+        ".idea", ".vscode",
+        "graphify-out",
+    }
+)
+# Exact file names skipped.
+DEFAULT_IGNORE_FILES: frozenset[str] = frozenset(
+    {".DS_Store", "Thumbs.db", ".secondbrainignore", "package-lock.json", "poetry.lock",
+     "yarn.lock"}
+)
+# Binary / noise extensions skipped entirely (not useful as knowledge nodes).
+DEFAULT_IGNORE_EXTS: frozenset[str] = frozenset(
+    {
+        ".pyc", ".pyo", ".pyd",
+        ".png", ".jpg", ".jpeg", ".gif", ".ico", ".svg", ".webp", ".bmp",
+        ".woff", ".woff2", ".ttf", ".eot",
+        ".zip", ".gz", ".tar", ".7z", ".rar",
+        ".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a",
+        ".lock",
+    }
+)
+def load_ignore_patterns(root: Path) -> list[str]:
+    """Read user glob patterns from ``<root>/.secondbrainignore`` (may be empty)."""
+    f = root / ".secondbrainignore"
+    if not f.is_file():
+        return []
+    out: list[str] = []
+    for line in f.read_text(encoding="utf-8", errors="ignore").splitlines():
+        s = line.strip()
+        if s and not s.startswith("#"):
+            out.append(s)
+    return out
+def is_ignored_dir(name: str) -> bool:
+    return name in DEFAULT_IGNORE_DIRS or name.endswith(".egg-info")
+def is_ignored_file(rel_posix: str, name: str, patterns: list[str]) -> bool:
+    """True if a file should be skipped (default names/extensions or a user pattern)."""
+    if name in DEFAULT_IGNORE_FILES:
+        return True
+    # splitext (not rsplit) so dotfiles like ".lock" are extensionless, consistent with the
+    # rest of the codebase (classify/indexer) and not mis-classified by their leading dot.
+    ext = os.path.splitext(name)[1].lower()
+    if ext in DEFAULT_IGNORE_EXTS:
+        return True
+    for pat in patterns:
+        # fnmatchcase (not fnmatch) so matching is case-sensitive and identical on Windows and
+        # POSIX - otherwise the same .secondbrainignore would index a different set per platform.
+        if fnmatch.fnmatchcase(rel_posix, pat) or fnmatch.fnmatchcase(name, pat):
+            return True
+    return False

second_brain/indexer.py ADDED Viewed

@@ -0,0 +1,295 @@
+"""Build the project graph: typed file nodes, area clustering, and typed edges.
+Edges produced in v1:
+- ``imports``    — code -> code (Python via ``ast``, JS/TS relative specifiers best-effort)
+- ``references`` — doc -> file (markdown link / wikilink / plain path-in-prose)
+- ``belongs_to`` — file -> area (the top-level directory)
+Reference resolution is conservative: a reference is recorded as *broken* only when it
+points inside the project root and cannot be found, so the anti-drift gate stays trustworthy.
+"""
+from __future__ import annotations
+import os
+import posixpath
+from pathlib import Path
+from second_brain.classify import classify
+from second_brain.ignore import (
+    DEFAULT_IGNORE_DIRS,
+    is_ignored_dir,
+    is_ignored_file,
+    load_ignore_patterns,
+)
+from second_brain.model import Edge, EdgeType, Graph, Node, NodeType
+from second_brain.pycode import PyImport, js_imports, python_imports
+from second_brain.references import extract_references_tagged
+_TEXT_EXTS = {
+    ".md", ".markdown", ".rst", ".txt", ".py", ".js", ".ts", ".tsx", ".jsx", ".mjs", ".cjs",
+    ".toml", ".ini", ".cfg", ".conf", ".yaml", ".yml", ".json", ".jsonl", ".xml",
+    ".html", ".htm", ".css", ".sql", ".ps1", ".psm1", ".sh", ".bash", ".go", ".rs",
+    ".java", ".c", ".cc", ".cpp", ".h", ".hpp", ".rb", ".php", ".cs",
+}
+_JS_EXTS = {".js", ".ts", ".tsx", ".jsx", ".mjs", ".cjs"}
+# Documentation references (links, wikilinks, path-in-prose) are extracted ONLY from
+# documents. In source code, filename-looking strings are data, not references — scanning
+# them produces noise, so we rely on import edges (ast) for code instead.
+_DOC_REF_EXTS = {".md", ".markdown", ".rst", ".txt", ".html", ".htm"}
+_JS_RESOLVE_ORDER = ("", ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs", "/index.ts", "/index.js")
+_MAX_READ_BYTES = 5_000_000
+AREA_ROOT = "(root)"
+def _ext(name: str) -> str:
+    # os.path.splitext treats leading-dot names (".gitignore") as extensionless.
+    return os.path.splitext(name)[1].lower()
+def iter_files(root: Path, patterns: list[str]) -> list[str]:
+    """Return sorted POSIX relative paths of indexable files under ``root``.
+    ``os.walk`` does not follow directory symlinks (loop-safe). An entry that cannot be
+    expressed relative to ``root`` (exotic symlink/junction) is skipped, never aborting.
+    """
+    rels: list[str] = []
+    for dirpath, dirnames, filenames in os.walk(root):
+        dirnames[:] = [d for d in dirnames if not is_ignored_dir(d)]
+        for fn in filenames:
+            try:
+                rel = (Path(dirpath) / fn).relative_to(root).as_posix()
+            except ValueError:
+                continue
+            if is_ignored_file(rel, fn, patterns):
+                continue
+            rels.append(rel)
+    return sorted(rels)
+def _read_text(path: Path) -> str | None:
+    ext = _ext(path.name)
+    if ext and ext not in _TEXT_EXTS:
+        return None
+    try:
+        if path.stat().st_size > _MAX_READ_BYTES:
+            return None
+        return path.read_text(encoding="utf-8", errors="ignore")
+    except OSError:
+        return None
+def _top_area(rel: str) -> str:
+    return rel.split("/", 1)[0] if "/" in rel else AREA_ROOT
+def _python_module_map(py_files: list[str]) -> dict[str, str]:
+    """Map dotted module/package -> file rel path for internal import resolution.
+    A package (``__init__.py``) wins over a same-named module if both exist (matching
+    Python's own resolution), regardless of iteration order.
+    """
+    mods: dict[str, str] = {}
+    pkgs: dict[str, str] = {}
+    for rel in py_files:
+        parts = rel[:-3].split("/")  # drop ".py"
+        if parts[-1] == "__init__":
+            pkg = ".".join(parts[:-1])
+            if pkg:
+                pkgs[pkg] = rel
+        else:
+            mods[".".join(parts)] = rel
+    return {**mods, **pkgs}
+def _resolve_py(imp: PyImport, from_rel: str, module_map: dict[str, str]) -> list[str]:
+    """Resolve one Python import to internal file rel paths (may be empty)."""
+    mod_parts = from_rel[:-3].split("/")
+    pkg_parts = mod_parts[:-1]  # package of the importing module
+    cands: list[str] = []
+    if imp.level == 0:
+        if not imp.module:
+            return []
+        cands.append(imp.module)
+        for n in imp.names:
+            cands.append(f"{imp.module}.{n}")
+    else:
+        keep = len(pkg_parts) - (imp.level - 1)
+        if keep < 0:
+            return []  # relative import reaches above the project root: not resolvable
+        base = pkg_parts[:keep]
+        if imp.module:
+            cand_parts = base + imp.module.split(".")
+            cands.append(".".join(cand_parts))
+            for n in imp.names:
+                cands.append(".".join(cand_parts + [n]))
+        else:
+            for n in imp.names:
+                cands.append(".".join(base + [n]))
+    out: list[str] = []
+    for c in cands:
+        tgt = module_map.get(c)
+        if tgt and tgt != from_rel and tgt not in out:
+            out.append(tgt)
+    return out
+def _resolve_js(spec: str, from_rel: str, node_ids: set[str]) -> str | None:
+    base = posixpath.normpath(posixpath.join(posixpath.dirname(from_rel), spec))
+    for suffix in _JS_RESOLVE_ORDER:
+        cand = base + suffix
+        if cand in node_ids and cand != from_rel:
+            return cand
+    return None
+def _is_external(target: str) -> bool:
+    """True for URLs, Windows drive-letter paths, and UNC paths (cleaned form uses '/')."""
+    if "://" in target:
+        return True
+    if len(target) >= 2 and target[1] == ":" and target[0].isalpha():
+        return True  # e.g. C:/Users/...
+    return target.startswith("//")  # UNC (\\server\share -> //server/share after cleaning)
+def _resolve_ref(
+    target: str,
+    kind: str,
+    from_rel: str,
+    node_ids: set[str],
+    basename_index: dict[str, list[str]],
+    stem_index: dict[str, list[str]],
+) -> tuple[str | None, bool]:
+    """Resolve a documentation reference to a project file.
+    Returns ``(resolved_id, is_broken)``. A reference is *broken* only when it is an
+    intentional link/wikilink that targets a project-internal file which does not exist.
+    Plain prose mentions (``kind == "path"``) are never broken (often library names or
+    examples); external URLs/absolute paths and references into ignored dirs are skipped.
+    """
+    if not target:
+        return None, False
+    cand_file = posixpath.normpath(posixpath.join(posixpath.dirname(from_rel), target))
+    cand_root = posixpath.normpath(target.lstrip("/"))
+    for cand in (cand_file, cand_root):
+        if cand in node_ids:
+            return (cand if cand != from_rel else None), False
+    base = target.rsplit("/", 1)[-1]
+    stem = base.rsplit(".", 1)[0] if "." in base else base
+    for index, key in ((basename_index, base), (stem_index, stem)):
+        ids = [i for i in index.get(key, []) if i != from_rel]
+        if len(ids) == 1:
+            return ids[0], False
+    # Unresolved: decide whether it is genuinely a broken link.
+    if kind == "path" or _is_external(target):
+        return None, False
+    if cand_root.split("/", 1)[0] in DEFAULT_IGNORE_DIRS:
+        return None, False
+    if cand_root.startswith("../") or cand_file.startswith("../"):
+        return None, False  # escapes the project root
+    if kind == "wikilink" and not ("/" in target or "." in base):
+        return None, False  # bare [[Concept]] is a concept link, not a file
+    return None, True
+def _describe(node: Node, area: str, inbound: int) -> str:
+    where = "root" if area == AREA_ROOT else f"area '{area}'"
+    tail = f" · {inbound} inbound" if inbound else ""
+    if node.type is NodeType.AREA:
+        return f"area '{node.label}'"
+    return f"{node.type.value} · {where}{tail}"
+def build_graph(
+    root: str | os.PathLike[str],
+    *,
+    project: str | None = None,
+    _rels: list[str] | None = None,
+) -> Graph:
+    """Index the project at ``root`` and return its graph. Never modifies the project.
+    ``_rels`` lets a caller pass a precomputed file list to avoid walking the tree twice
+    (see :func:`second_brain.freshness.index`).
+    """
+    root_p = Path(root).resolve()
+    if not root_p.is_dir():
+        raise NotADirectoryError(f"not a directory: {root_p}")
+    rels = _rels if _rels is not None else iter_files(root_p, load_ignore_patterns(root_p))
+    g = Graph(project=project or root_p.name)
+    # 1. File nodes + areas.
+    areas: set[str] = set()
+    for rel in rels:
+        ntype = classify(rel)
+        label = rel.rsplit("/", 1)[-1]
+        node = Node(id=rel, type=ntype, label=label, path=rel)
+        try:
+            node.meta["size"] = (root_p / rel).stat().st_size
+        except OSError:
+            pass
+        g.add_node(node)
+        areas.add(_top_area(rel))
+    for area in sorted(areas):
+        g.add_node(Node(id=f"area:{area}", type=NodeType.AREA, label=area))
+    for rel in rels:
+        g.add_edge(Edge(rel, f"area:{_top_area(rel)}", EdgeType.BELONGS_TO))
+    # 2. Indexes for resolution.
+    node_ids = set(g.nodes.keys())
+    basename_index: dict[str, list[str]] = {}
+    stem_index: dict[str, list[str]] = {}
+    for rel in rels:
+        base = rel.rsplit("/", 1)[-1]
+        basename_index.setdefault(base, []).append(rel)
+        stem = base.rsplit(".", 1)[0] if "." in base else base
+        stem_index.setdefault(stem, []).append(rel)
+    py_files = [r for r in rels if _ext(r) == ".py"]
+    module_map = _python_module_map(py_files)
+    # 3. Edges from file contents. Only code (imports) and docs (references) are ever READ;
+    #    data/config/binaries are never opened - the graph needs only their type/size/area.
+    #    This is what keeps indexing light on data-heavy projects (no reading huge JSON/CSV/logs).
+    for rel in rels:
+        ext = _ext(rel)
+        is_code = ext == ".py" or ext in _JS_EXTS
+        is_doc = ext in _DOC_REF_EXTS
+        if not (is_code or is_doc):
+            continue
+        text = _read_text(root_p / rel)
+        if text is None:
+            continue
+        if ext == ".py":
+            for imp in python_imports(text):
+                for tgt in _resolve_py(imp, rel, module_map):
+                    g.add_edge(Edge(rel, tgt, EdgeType.IMPORTS))
+        elif ext in _JS_EXTS:
+            for spec in js_imports(text):
+                tgt = _resolve_js(spec, rel, node_ids)
+                if tgt:
+                    g.add_edge(Edge(rel, tgt, EdgeType.IMPORTS))
+        if ext in _DOC_REF_EXTS:
+            broken: list[str] = []
+            for target, kind in extract_references_tagged(text):
+                resolved, is_broken = _resolve_ref(
+                    target, kind, rel, node_ids, basename_index, stem_index
+                )
+                if resolved:
+                    g.add_edge(Edge(rel, resolved, EdgeType.REFERENCES))
+                elif is_broken:
+                    broken.append(target)
+            if broken:
+                g.nodes[rel].meta["broken_refs"] = broken
+    # 4. Descriptions (after edges, so we can include inbound counts).
+    inbound: dict[str, int] = {}
+    for e in g.edges:
+        if e.type in (EdgeType.REFERENCES, EdgeType.IMPORTS):
+            inbound[e.target] = inbound.get(e.target, 0) + 1
+    for node in g.nodes.values():
+        area = _top_area(node.path) if node.path else AREA_ROOT
+        node.description = _describe(node, area, inbound.get(node.id, 0))
+    return g

second_brain/mcp_server.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Optional MCP server exposing Second Brain\'s low-token queries to AI assistants.
+This is the piece that lets an assistant *query* the project instead of re-reading it. It is
+an OPTIONAL extra so the core stays dependency-free:
+    pip install second-brain[mcp]
+    second-brain-mcp [PROJECT_PATH]      # defaults to the current directory
+Read-only on your sources. Exposes a handful of small, budgeted tools (map / find /
+neighbors / subgraph / health) over stdio.
+"""
+from __future__ import annotations
+import os
+import sys
+from typing import Any
+from second_brain import gate, query, store
+from second_brain.freshness import build_manifest, index
+from second_brain.model import Graph
+try:  # pragma: no cover - import-guard
+    from mcp.server.fastmcp import FastMCP
+except ImportError:
+    # Stay importable without the optional extra (linters, autodoc, test collection): keep
+    # FastMCP as None and fail with a friendly message only when the server is actually run.
+    FastMCP = None
+_NO_MCP = "The MCP server needs the optional 'mcp' extra: pip install second-brain[mcp]"
+def _graph(project: str) -> Graph:
+    return store.load_graph(project) or index(project)[0]
+def build_server(project: str):
+    """Construct the FastMCP server bound to ``project`` (no I/O until a tool is called)."""
+    if FastMCP is None:  # pragma: no cover - exercised only without the extra installed
+        raise ImportError(_NO_MCP)
+    server = FastMCP("second-brain")
+    @server.tool()
+    def project_map() -> dict[str, Any]:
+        """Compact project digest: areas with file counts/sizes/types, type and edge
+        tallies, the most-connected files, and orphan/broken counts. Cheap to load first."""
+        return query.project_map(_graph(project))
+    @server.tool()
+    def find(text: str) -> list[dict[str, Any]]:
+        """Find files/nodes whose name or path contains ``text`` (case-insensitive)."""
+        return query.find(_graph(project), text)
+    @server.tool()
+    def neighbors(node_id: str) -> dict[str, Any]:
+        """A node and its incoming/outgoing connections (imports, references, area membership)."""
+        # Distinct error for an unknown node, so an assistant can tell "no edges" from "no node".
+        res = query.neighbors(_graph(project), node_id)
+        return res if res is not None else {"error": "node not found", "id": node_id}
+    @server.tool()
+    def subgraph(node_id: str, hops: int = 1) -> dict[str, Any]:
+        """A small subgraph (nodes + edges) around ``node_id`` within ``hops``."""
+        return query.subgraph(_graph(project), node_id, hops=hops)
+    @server.tool()
+    def health() -> dict[str, Any]:
+        """Anti-drift status: broken references, stale files vs the last build, orphan count."""
+        g = store.load_graph(project)
+        old = store.load_manifest(project)
+        if g is None or old is None:
+            return {"status": "no-baseline", "hint": "run 'second-brain build' first"}
+        rep = gate.evaluate(g, old, build_manifest(project))
+        return {"ok": rep.ok, "broken": rep.broken, "stale": rep.stale,
+                "orphans": len(rep.orphans)}
+    return server
+def main(argv: list[str] | None = None) -> int:
+    args = sys.argv[1:] if argv is None else argv
+    project = args[0] if args else os.getcwd()
+    if FastMCP is None:
+        print(_NO_MCP, file=sys.stderr)
+        return 2
+    build_server(project).run()
+    return 0
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())