PyPI - graphcoding - Versions diffs - 0.1.0__py3-none-any.whl - Mend

graphcoding 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

graphcoding/__init__.py +6 -0
graphcoding/cli.py +382 -0
graphcoding/drift.py +83 -0
graphcoding/health.py +97 -0
graphcoding/hooks.py +65 -0
graphcoding/scan.py +254 -0
graphcoding/store.py +202 -0
graphcoding/sync.py +86 -0
graphcoding-0.1.0.dist-info/METADATA +199 -0
graphcoding-0.1.0.dist-info/RECORD +14 -0
graphcoding-0.1.0.dist-info/WHEEL +5 -0
graphcoding-0.1.0.dist-info/entry_points.txt +2 -0
graphcoding-0.1.0.dist-info/licenses/LICENSE +21 -0
graphcoding-0.1.0.dist-info/top_level.txt +1 -0

graphcoding/scan.py ADDED Viewed

@@ -0,0 +1,254 @@
+"""Scanner — turn source files into graph nodes and import edges.
+Two jobs:
+  * scan_repo():  full sweep — migrate an existing repo onto the graph.
+  * scan_file():  one file — used by sync after every change.
+Python is parsed with ast (imports, top-level defs, module docstring).
+JS/TS/JSX/TSX/Vue/Svelte use regex import extraction with relative-path
+resolution. Everything else gets a file node with language + first-comment
+summary. Deliberately lightweight: the graph's value is the design layer
+(summaries, planned nodes, cross-cutting edges an import scan can't see),
+not perfect static analysis.
+"""
+from __future__ import annotations
+import ast
+import os
+import re
+import subprocess
+from .store import Graph, Node
+LANG = {
+    ".py": "python", ".ts": "typescript", ".tsx": "typescript",
+    ".js": "javascript", ".jsx": "javascript", ".go": "go", ".rs": "rust",
+    ".rb": "ruby", ".java": "java", ".kt": "kotlin", ".c": "c", ".h": "c",
+    ".cpp": "cpp", ".hpp": "cpp", ".cs": "csharp", ".php": "php",
+    ".swift": "swift", ".css": "css", ".scss": "css", ".sql": "sql",
+    ".sh": "shell", ".html": "html", ".vue": "vue", ".svelte": "svelte",
+    ".json": "json", ".yaml": "yaml", ".yml": "yaml", ".toml": "toml",
+    ".md": "markdown",
+}
+TEST_MARKERS = (".test.", ".spec.", "_test.")
+JS_IMPORT_RE = re.compile(
+    r"""(?:import\s+(?:[\w${},*\s]+\s+from\s+)?|export\s+[\w${},*\s]+\s+from\s+|require\()\s*['"]([^'"]+)['"]""")
+JS_EXPORT_RE = re.compile(
+    r"""export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const)\s+([A-Za-z_$][\w$]*)""")
+def language_of(path: str) -> str:
+    return LANG.get(os.path.splitext(path)[1], "")
+def is_test(path: str) -> bool:
+    base = os.path.basename(path)
+    return base.startswith("test_") or any(m in base for m in TEST_MARKERS) \
+        or "/tests/" in "/" + path or "/__tests__/" in "/" + path
+def trackable(path: str, cfg: dict) -> bool:
+    segs = path.split("/")
+    if any(s in cfg["ignore_segments"] for s in segs):
+        return False
+    if cfg.get("ignore_tests") and is_test(path):
+        return False
+    return os.path.splitext(path)[1] in cfg["track_extensions"]
+def tracked_files(root: str, cfg: dict) -> list[str]:
+    """git ls-files when possible (respects .gitignore); os.walk fallback."""
+    try:
+        # cached + untracked-but-not-ignored: the graph should see WIP files too
+        out = subprocess.run(
+            ["git", "-C", root, "ls-files", "--cached", "--others",
+             "--exclude-standard"],
+            capture_output=True, text=True, check=True).stdout
+        # a file deleted from the worktree is still in the index — drop it
+        files = [p for p in out.splitlines()
+                 if os.path.exists(os.path.join(root, p))]
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        files = []
+        for dirpath, dirnames, filenames in os.walk(root):
+            rel = os.path.relpath(dirpath, root)
+            dirnames[:] = [d for d in dirnames
+                           if d not in cfg["ignore_segments"] and not d.startswith(".")]
+            for fn in filenames:
+                files.append(os.path.normpath(os.path.join(rel, fn)).replace(os.sep, "/"))
+    return sorted(p for p in files if trackable(p, cfg))
+# -- per-language extraction --------------------------------------------------
+def _py_extract(root: str, path: str, src: str):
+    """Returns (summary, imports, symbols). Never raises on bad source."""
+    try:
+        tree = ast.parse(src)
+    except SyntaxError:
+        return "", [], []
+    doc = ast.get_docstring(tree) or ""
+    summary = doc.strip().splitlines()[0] if doc.strip() else ""
+    modules = []
+    for stmt in ast.walk(tree):
+        if isinstance(stmt, ast.Import):
+            modules.extend(a.name for a in stmt.names)
+        elif isinstance(stmt, ast.ImportFrom):
+            prefix = "." * stmt.level
+            base = prefix + (stmt.module or "")
+            modules.append(base)
+            # `from pkg import mod` — each name may itself be a module file
+            for a in stmt.names:
+                modules.append((base + "." + a.name) if stmt.module
+                               else prefix + a.name)
+    imports = []
+    for m in modules:
+        target = _py_resolve(root, path, m)
+        if target:
+            imports.append(target)
+    symbols = []
+    for stmt in tree.body:
+        if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            symbols.append((stmt.name, "CodeFunction", ast.get_docstring(stmt) or ""))
+        elif isinstance(stmt, ast.ClassDef):
+            symbols.append((stmt.name, "CodeClass", ast.get_docstring(stmt) or ""))
+    return summary, sorted(set(imports)), symbols
+def _py_resolve(root: str, path: str, module: str) -> str | None:
+    """Map a python module string to a repo-relative file, if it lives in-repo."""
+    if module.startswith("."):
+        level = len(module) - len(module.lstrip("."))
+        base = os.path.dirname(path)
+        for _ in range(level - 1):
+            base = os.path.dirname(base)
+        tail = module.lstrip(".")
+        parts = ([base] if base else []) + (tail.split(".") if tail else [])
+        cand = "/".join(p for p in parts if p)
+    else:
+        cand = module.replace(".", "/")
+    for suffix in (".py", "/__init__.py"):
+        rel = cand + suffix
+        if os.path.exists(os.path.join(root, rel)):
+            return rel
+    # src-layout: src/<pkg>/...
+    for prefix in ("src/",):
+        for suffix in (".py", "/__init__.py"):
+            rel = prefix + cand + suffix
+            if os.path.exists(os.path.join(root, rel)):
+                return rel
+    return None
+def _js_resolve(root: str, path: str, spec: str) -> str | None:
+    """Resolve a relative (or @/ aliased) JS/TS import to a repo file."""
+    if spec.startswith("@/"):
+        base_dir = "src" if os.path.isdir(os.path.join(root, "src")) else ""
+        cand = os.path.normpath(os.path.join(base_dir, spec[2:]))
+    elif spec.startswith("."):
+        cand = os.path.normpath(os.path.join(os.path.dirname(path), spec))
+    else:
+        return None  # external package
+    cand = cand.replace(os.sep, "/")
+    exts = ["", ".ts", ".tsx", ".js", ".jsx", ".vue", ".svelte", ".css", ".json"]
+    for ext in exts:
+        rel = cand + ext
+        if os.path.isfile(os.path.join(root, rel)):
+            return rel
+    for ext in (".ts", ".tsx", ".js", ".jsx"):
+        rel = cand + "/index" + ext
+        if os.path.isfile(os.path.join(root, rel)):
+            return rel
+    return None
+def _first_comment(src: str) -> str:
+    """First comment or heading line — a serviceable auto-summary."""
+    for line in src.splitlines()[:15]:
+        s = line.strip()
+        for prefix in ("#", "//", "/*", "*", "--", "<!--"):
+            if s.startswith(prefix):
+                text = s.lstrip("#/*-<!– ").rstrip("*/->").strip()
+                if len(text) > 8 and not text.lower().startswith(("eslint", "ts-", "noqa", "prettier")):
+                    return text
+    return ""
+def node_type_for(path: str, src: str = "") -> str:
+    base = os.path.basename(path)
+    ext = os.path.splitext(path)[1]
+    if ext in (".json", ".yaml", ".yml", ".toml"):
+        return "ConfigFile"
+    if ext == ".md":
+        return "Doc"
+    if path.endswith(".d.ts"):
+        return "TypeDef"
+    if ext in (".tsx", ".jsx") and base[:1].isupper():
+        return "Component"
+    if re.match(r"^use[A-Z]", base) and ext in (".ts", ".tsx", ".js", ".jsx"):
+        return "Hook"
+    return "CodeFile"
+def scan_file(root: str, path: str, cfg: dict) -> tuple[Node, list[Node]]:
+    """Build the node (+ optional symbol sub-nodes) for one file."""
+    full = os.path.join(root, path)
+    try:
+        with open(full, encoding="utf-8", errors="replace") as f:
+            src = f.read()
+    except OSError:
+        src = ""
+    lang = language_of(path)
+    summary, imports, symbols = "", [], []
+    if lang == "python":
+        summary, imports, symbols = _py_extract(root, path, src)
+    elif lang in ("typescript", "javascript", "vue", "svelte"):
+        for spec in JS_IMPORT_RE.findall(src):
+            t = _js_resolve(root, path, spec)
+            if t:
+                imports.append(t)
+        imports = sorted(set(imports))
+        if cfg.get("scan_symbols"):
+            symbols = [(m, "CodeFunction", "") for m in JS_EXPORT_RE.findall(src)]
+    if not summary:
+        summary = _first_comment(src)
+    node = Node(name=path, type=node_type_for(path, src), status="ok",
+                language=lang, summary=summary,
+                edges=[{"to": t, "type": "IMPORTS"} for t in imports if t != path])
+    subs = []
+    if cfg.get("scan_symbols") and symbols:
+        for sname, stype, sdoc in symbols:
+            if sname.startswith("_"):
+                continue
+            ssum = sdoc.strip().splitlines()[0] if sdoc.strip() else ""
+            subs.append(Node(name=f"{path}::{sname}", type=stype, status="ok",
+                             language=lang, summary=ssum))
+            node.add_edge(f"{path}::{sname}", "CONTAINS")
+    return node, subs
+def scan_repo(root: str, cfg: dict, graph: Graph) -> dict:
+    """Full sweep. Preserves human-written summaries and planned/delete marks."""
+    files = tracked_files(root, cfg)
+    added = updated = 0
+    for path in files:
+        node, subs = scan_file(root, path, cfg)
+        old = graph.nodes.get(path)
+        if old:
+            # never clobber intent: keep richer summary and lifecycle statuses
+            if old.summary and not node.summary:
+                node.summary = old.summary
+            if len(old.summary) > len(node.summary):
+                node.summary = old.summary
+            if old.status == "to-be-deleted":
+                node.status = old.status
+            updated += 1
+        else:
+            added += 1
+        graph.nodes[path] = node
+        for s in subs:
+            prev = graph.nodes.get(s.name)
+            if prev and len(prev.summary) > len(s.summary):
+                s.summary = prev.summary
+            graph.nodes[s.name] = s
+    return {"files": len(files), "added": added, "updated": updated}

graphcoding/store.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""Graph store — the in-repo knowledge graph.
+The graph lives at .graphcoding/graph.jsonl: one JSON object per line, sorted
+by node name. Sorted JSONL keeps diffs small and merges sane — a node edit
+touches one line, and two branches adding different nodes rarely conflict.
+Node shape:
+    {
+      "name":     "src/app.py"           # repo-relative path, or "path::Symbol"
+      "type":     "CodeFile",            # see NODE_TYPES
+      "status":   "ok",                  # ok | planned | needs-analysis | to-be-deleted
+      "language": "python",
+      "summary":  "One line: what this file is for.",
+      "edges":    [{"to": "src/db.py", "type": "IMPORTS"}, ...]
+    }
+Edges are stored on the source node. Edge targets may name nodes that do not
+exist yet — a link to a planned node is work to do, by design.
+"""
+from __future__ import annotations
+import json
+import os
+from dataclasses import dataclass, field
+NODE_TYPES = [
+    "CodeFile", "CodeFunction", "CodeClass", "CodeModule",
+    "Component", "Hook", "TypeDef", "ServiceDef", "ConfigFile", "Doc",
+]
+EDGE_TYPES = [
+    "IMPORTS", "CALLS", "CONTAINS", "INHERITS", "IMPLEMENTS",
+    "REFERENCES", "DEPENDS_ON", "RELATED_TO",
+]
+STATUSES = ["ok", "planned", "needs-analysis", "to-be-deleted"]
+GRAPH_DIR = ".graphcoding"
+GRAPH_FILE = "graph.jsonl"
+CONFIG_FILE = "config.json"
+DEFAULT_CONFIG = {
+    "track_extensions": [
+        ".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".rb", ".java",
+        ".kt", ".c", ".h", ".cpp", ".hpp", ".cs", ".php", ".swift",
+        ".css", ".scss", ".sql", ".sh", ".html", ".vue", ".svelte",
+        ".json", ".yaml", ".yml", ".toml", ".md",
+    ],
+    "ignore_segments": [
+        "node_modules", ".git", ".venv", "venv", "dist", "build", "target",
+        "__pycache__", ".next", ".nuxt", "coverage", "vendor", ".graphcoding",
+    ],
+    "ignore_tests": True,
+    "scan_symbols": False,
+}
+@dataclass
+class Node:
+    name: str
+    type: str = "CodeFile"
+    status: str = "ok"
+    language: str = ""
+    summary: str = ""
+    edges: list = field(default_factory=list)
+    def to_dict(self) -> dict:
+        d = {"name": self.name, "type": self.type, "status": self.status}
+        if self.language:
+            d["language"] = self.language
+        if self.summary:
+            d["summary"] = self.summary
+        if self.edges:
+            d["edges"] = sorted(self.edges, key=lambda e: (e["type"], e["to"]))
+        return d
+    @classmethod
+    def from_dict(cls, d: dict) -> "Node":
+        return cls(
+            name=d["name"],
+            type=d.get("type", "CodeFile"),
+            status=d.get("status", "ok"),
+            language=d.get("language", ""),
+            summary=d.get("summary", ""),
+            edges=list(d.get("edges", [])),
+        )
+    def add_edge(self, to: str, etype: str) -> bool:
+        for e in self.edges:
+            if e["to"] == to and e["type"] == etype:
+                return False
+        self.edges.append({"to": to, "type": etype})
+        return True
+class Graph:
+    """The whole graph, loaded in memory; save() rewrites the sorted JSONL."""
+    def __init__(self, root: str):
+        self.root = root
+        self.path = os.path.join(root, GRAPH_DIR, GRAPH_FILE)
+        self.nodes: dict[str, Node] = {}
+    # -- persistence -----------------------------------------------------
+    @classmethod
+    def load(cls, root: str) -> "Graph":
+        g = cls(root)
+        if os.path.exists(g.path):
+            with open(g.path, encoding="utf-8") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    n = Node.from_dict(json.loads(line))
+                    g.nodes[n.name] = n
+        return g
+    def save(self) -> None:
+        os.makedirs(os.path.dirname(self.path), exist_ok=True)
+        with open(self.path, "w", encoding="utf-8") as f:
+            for name in sorted(self.nodes):
+                f.write(json.dumps(self.nodes[name].to_dict(),
+                                   ensure_ascii=False, sort_keys=True) + "\n")
+    # -- ops ---------------------------------------------------------------
+    def upsert(self, node: Node) -> Node:
+        existing = self.nodes.get(node.name)
+        if existing:
+            existing.type = node.type or existing.type
+            existing.language = node.language or existing.language
+            if node.summary:
+                existing.summary = node.summary
+            existing.status = node.status
+            if node.edges:
+                for e in node.edges:
+                    existing.add_edge(e["to"], e["type"])
+            return existing
+        self.nodes[node.name] = node
+        return node
+    def delete(self, name: str) -> bool:
+        """Remove a node and every edge pointing at it."""
+        found = self.nodes.pop(name, None) is not None
+        for n in self.nodes.values():
+            n.edges = [e for e in n.edges if e["to"] != name]
+        return found
+    def incoming(self, name: str) -> list[tuple[str, str]]:
+        """Who points at this node — the blast radius. [(source, edge_type)]"""
+        out = []
+        for n in self.nodes.values():
+            for e in n.edges:
+                if e["to"] == name:
+                    out.append((n.name, e["type"]))
+        return sorted(out)
+    def file_nodes(self) -> dict[str, Node]:
+        """Nodes that represent files (no ::symbol suffix)."""
+        return {k: v for k, v in self.nodes.items() if "::" not in k}
+    def with_status(self, status: str) -> list[Node]:
+        return sorted((n for n in self.nodes.values() if n.status == status),
+                      key=lambda n: n.name)
+    def search(self, terms: list[str], limit: int = 20) -> list[tuple[float, Node]]:
+        """Rank nodes by token overlap across name + summary. No server needed."""
+        terms = [t.lower() for t in terms if t]
+        scored = []
+        for n in self.nodes.values():
+            hay = (n.name + " " + n.summary).lower()
+            score = sum(2.0 if t in n.name.lower() else 1.0
+                        for t in terms if t in hay)
+            if score > 0:
+                scored.append((score, n))
+        scored.sort(key=lambda s: (-s[0], s[1].name))
+        return scored[:limit]
+# -- config -----------------------------------------------------------------
+def config_path(root: str) -> str:
+    return os.path.join(root, GRAPH_DIR, CONFIG_FILE)
+def load_config(root: str) -> dict:
+    cfg = dict(DEFAULT_CONFIG)
+    p = config_path(root)
+    if os.path.exists(p):
+        with open(p, encoding="utf-8") as f:
+            cfg.update(json.load(f))
+    return cfg
+def find_root(start: str | None = None) -> str | None:
+    """Walk up from start (or cwd) to the directory containing .graphcoding/."""
+    d = os.path.abspath(start or os.getcwd())
+    while True:
+        if os.path.isdir(os.path.join(d, GRAPH_DIR)):
+            return d
+        parent = os.path.dirname(d)
+        if parent == d:
+            return None
+        d = parent

graphcoding/sync.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""Sync — reconcile the graph with a set of changed files.
+Sources of the change set:
+  --staged        files staged right now (pre-commit)
+  --commit REF    files changed in a commit (post-commit, default HEAD)
+  --files a b c   explicit list
+  (none)          every drifting file from a fresh drift report
+Rules:
+  * added/modified file  -> rescan; planned becomes ok (the design was built);
+                            human summaries survive unless the file's own
+                            docstring/comment is richer
+  * deleted file         -> node removed, along with edges pointing at it
+  * to-be-deleted + gone -> node removed (deletion completed)
+"""
+from __future__ import annotations
+import os
+import subprocess
+from .drift import compute_drift
+from .scan import scan_file, trackable
+from .store import Graph
+def _git_changed(root: str, staged: bool, commit: str | None) -> list[tuple[str, str]]:
+    if staged:
+        cmd = ["git", "-C", root, "diff", "--cached", "--name-status"]
+    else:
+        ref = commit or "HEAD"
+        cmd = ["git", "-C", root, "diff", "--name-status", f"{ref}~1..{ref}"]
+    try:
+        out = subprocess.run(cmd, capture_output=True, text=True, check=True).stdout
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return []
+    changes = []
+    for line in out.splitlines():
+        parts = line.split("\t")
+        if len(parts) >= 2:
+            changes.append((parts[-1], parts[0][0]))  # renames: new path wins
+    return changes
+def sync(root: str, cfg: dict, graph: Graph,
+         staged: bool = False, commit: str | None = None,
+         files: list[str] | None = None) -> dict:
+    if files:
+        changes = [(f, "D" if not os.path.exists(os.path.join(root, f)) else "M")
+                   for f in files]
+    elif staged or commit:
+        changes = _git_changed(root, staged, commit)
+    else:
+        rep = compute_drift(root, cfg, graph)
+        changes = ([(p, "M") for p in rep["missing_node"] + rep["built_not_synced"]]
+                   + [(p, "D") for p in rep["ghost_node"] + rep["not_deleted"]])
+        # not_deleted files still exist; deleting the node is wrong — the FILE
+        # should go. Surface them instead of silently "fixing" the graph.
+        changes = [(p, s) for p, s in changes if p not in rep["not_deleted"]]
+    upserted, removed, skipped = [], [], []
+    for path, st in changes:
+        if not trackable(path, cfg):
+            continue
+        if st == "D":
+            if os.path.exists(os.path.join(root, path)):
+                skipped.append(path)  # marked deleted in git but still on disk
+                continue
+            # drop the file node and its symbol sub-nodes
+            for name in [n for n in graph.nodes
+                         if n == path or n.startswith(path + "::")]:
+                graph.delete(name)
+            removed.append(path)
+        else:
+            node, subs = scan_file(root, path, cfg)
+            old = graph.nodes.get(path)
+            if old and len(old.summary) > len(node.summary):
+                node.summary = old.summary
+            graph.nodes[path] = node
+            for s in subs:
+                prev = graph.nodes.get(s.name)
+                if prev and len(prev.summary) > len(s.summary):
+                    s.summary = prev.summary
+                graph.nodes[s.name] = s
+            upserted.append(path)
+    graph.save()
+    return {"upserted": upserted, "removed": removed, "skipped": skipped}