PyPI - graphifyy - Versions diffs - 0.1.1__py3-none-any.whl - Mend

graphifyy 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

graphify/__init__.py +27 -0
graphify/__main__.py +89 -0
graphify/analyze.py +429 -0
graphify/benchmark.py +126 -0
graphify/build.py +31 -0
graphify/cache.py +118 -0
graphify/cluster.py +104 -0
graphify/detect.py +274 -0
graphify/export.py +656 -0
graphify/extract.py +2440 -0
graphify/ingest.py +289 -0
graphify/manifest.py +4 -0
graphify/report.py +133 -0
graphify/security.py +166 -0
graphify/serve.py +328 -0
graphify/skill.md +1036 -0
graphify/validate.py +71 -0
graphify/watch.py +82 -0
graphifyy-0.1.1.dist-info/METADATA +271 -0
graphifyy-0.1.1.dist-info/RECORD +23 -0
graphifyy-0.1.1.dist-info/WHEEL +5 -0
graphifyy-0.1.1.dist-info/entry_points.txt +2 -0
graphifyy-0.1.1.dist-info/top_level.txt +1 -0

graphify/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""graphify — extract · build · cluster · analyze · report."""
+def __getattr__(name):
+    # Lazy imports so `graphify install` works before heavy deps are in place.
+    _map = {
+        "extract": ("graphify.extract", "extract"),
+        "collect_files": ("graphify.extract", "collect_files"),
+        "build_from_json": ("graphify.build", "build_from_json"),
+        "cluster": ("graphify.cluster", "cluster"),
+        "score_all": ("graphify.cluster", "score_all"),
+        "cohesion_score": ("graphify.cluster", "cohesion_score"),
+        "god_nodes": ("graphify.analyze", "god_nodes"),
+        "surprising_connections": ("graphify.analyze", "surprising_connections"),
+        "suggest_questions": ("graphify.analyze", "suggest_questions"),
+        "generate": ("graphify.report", "generate"),
+        "to_json": ("graphify.export", "to_json"),
+        "to_html": ("graphify.export", "to_html"),
+        "to_svg": ("graphify.export", "to_svg"),
+        "to_canvas": ("graphify.export", "to_canvas"),
+    }
+    if name in _map:
+        import importlib
+        mod_name, attr = _map[name]
+        mod = importlib.import_module(mod_name)
+        return getattr(mod, attr)
+    raise AttributeError(f"module 'graphify' has no attribute {name!r}")

graphify/__main__.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""graphify CLI — `graphify install` sets up the Claude Code skill."""
+from __future__ import annotations
+import json
+import shutil
+import sys
+from pathlib import Path
+_SKILL_REGISTRATION = (
+    "\n# graphify\n"
+    "- **graphify** (`~/.claude/skills/graphify/SKILL.md`) "
+    "— any input to knowledge graph. Trigger: `/graphify`\n"
+    "When the user types `/graphify`, invoke the Skill tool "
+    "with `skill: \"graphify\"` before doing anything else.\n"
+)
+def _bundled_skill() -> Path:
+    """Path to the skill.md bundled with this package."""
+    return Path(__file__).parent / "skill.md"
+def install() -> None:
+    skill_src = _bundled_skill()
+    if not skill_src.exists():
+        print("error: skill.md not found in package — reinstall graphify", file=sys.stderr)
+        sys.exit(1)
+    # Copy skill to ~/.claude/skills/graphify/SKILL.md
+    skill_dst = Path.home() / ".claude" / "skills" / "graphify" / "SKILL.md"
+    skill_dst.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy(skill_src, skill_dst)
+    print(f"  skill installed  →  {skill_dst}")
+    # Register in ~/.claude/CLAUDE.md
+    claude_md = Path.home() / ".claude" / "CLAUDE.md"
+    if claude_md.exists():
+        content = claude_md.read_text()
+        if "graphify" in content:
+            print(f"  CLAUDE.md        →  already registered (no change)")
+        else:
+            claude_md.write_text(content.rstrip() + _SKILL_REGISTRATION)
+            print(f"  CLAUDE.md        →  skill registered in {claude_md}")
+    else:
+        claude_md.parent.mkdir(parents=True, exist_ok=True)
+        claude_md.write_text(_SKILL_REGISTRATION.lstrip())
+        print(f"  CLAUDE.md        →  created at {claude_md}")
+    print()
+    print("Done. Open Claude Code in any directory and type:")
+    print()
+    print("  /graphify .")
+    print()
+def main() -> None:
+    if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
+        print("Usage: graphify <command>")
+        print()
+        print("Commands:")
+        print("  install                 copy skill to ~/.claude/skills/ and register in CLAUDE.md")
+        print("  benchmark [graph.json]  measure token reduction vs naive full-corpus approach")
+        print()
+        return
+    cmd = sys.argv[1]
+    if cmd == "install":
+        install()
+    elif cmd == "benchmark":
+        from graphify.benchmark import run_benchmark, print_benchmark
+        graph_path = sys.argv[2] if len(sys.argv) > 2 else ".graphify/graph.json"
+        # Try to load corpus_words from detect output
+        corpus_words = None
+        detect_path = Path(".graphify_detect.json")
+        if detect_path.exists():
+            try:
+                detect_data = json.loads(detect_path.read_text())
+                corpus_words = detect_data.get("total_words")
+            except Exception:
+                pass
+        result = run_benchmark(graph_path, corpus_words=corpus_words)
+        print_benchmark(result)
+    else:
+        print(f"error: unknown command '{cmd}'", file=sys.stderr)
+        print("Run 'graphify --help' for usage.", file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

graphify/analyze.py ADDED Viewed

@@ -0,0 +1,429 @@
+"""Graph analysis: god nodes (most connected), surprising connections (cross-community), suggested questions."""
+from __future__ import annotations
+import networkx as nx
+def _is_file_node(G: nx.Graph, node_id: str) -> bool:
+    """
+    Return True if this node is a file-level hub node (e.g. 'client', 'models')
+    or an AST method stub (e.g. '.auth_flow()', '.__init__()').
+    These are synthetic nodes created by the AST extractor and should be excluded
+    from god nodes, surprising connections, and knowledge gap reporting.
+    """
+    label = G.nodes[node_id].get("label", "")
+    if not label:
+        return False
+    # File-level hub: label is a filename with a code extension
+    if label.split(".")[-1] in ("py", "ts", "js", "go", "rs", "java", "rb", "cpp", "c", "h"):
+        return True
+    # Method stub: AST extractor labels methods as '.method_name()'
+    if label.startswith(".") and label.endswith("()"):
+        return True
+    # Module-level function stub: labeled 'function_name()' — only has a contains edge
+    # These are real functions but structurally isolated by definition; not a gap worth flagging
+    if label.endswith("()") and G.degree(node_id) <= 1:
+        return True
+    return False
+def god_nodes(G: nx.Graph, top_n: int = 10) -> list[dict]:
+    """Return the top_n most-connected real entities — the core abstractions.
+    File-level hub nodes are excluded: they accumulate import/contains edges
+    mechanically and don't represent meaningful architectural abstractions.
+    """
+    degree = dict(G.degree())
+    sorted_nodes = sorted(degree.items(), key=lambda x: x[1], reverse=True)
+    result = []
+    for node_id, deg in sorted_nodes:
+        if _is_file_node(G, node_id) or _is_concept_node(G, node_id):
+            continue
+        result.append({
+            "id": node_id,
+            "label": G.nodes[node_id].get("label", node_id),
+            "edges": deg,
+        })
+        if len(result) >= top_n:
+            break
+    return result
+def surprising_connections(
+    G: nx.Graph,
+    communities: dict[int, list[str]] | None = None,
+    top_n: int = 5,
+) -> list[dict]:
+    """
+    Find connections that are genuinely surprising — not obvious from file structure.
+    Strategy:
+    - Multi-file corpora: cross-file edges between real entities (not concept nodes).
+      Sorted AMBIGUOUS → INFERRED → EXTRACTED.
+    - Single-file / single-source corpora: cross-community edges that bridge
+      distant parts of the graph (betweenness centrality on edges).
+      These reveal non-obvious structural couplings.
+    Concept nodes (empty source_file, or injected semantic annotations) are excluded
+    from surprising connections because they are intentional, not discovered.
+    """
+    # Identify unique source files (ignore empty/null source_file)
+    source_files = {
+        data.get("source_file", "")
+        for _, data in G.nodes(data=True)
+        if data.get("source_file", "")
+    }
+    is_multi_source = len(source_files) > 1
+    if is_multi_source:
+        return _cross_file_surprises(G, communities or {}, top_n)
+    else:
+        return _cross_community_surprises(G, communities or {}, top_n)
+def _is_concept_node(G: nx.Graph, node_id: str) -> bool:
+    """
+    Return True if this node is a manually-injected semantic concept node
+    rather than a real entity found in source code.
+    Signals:
+    - Empty source_file
+    - source_file doesn't look like a real file path (no extension)
+    """
+    data = G.nodes[node_id]
+    source = data.get("source_file", "")
+    if not source:
+        return True
+    # Has no file extension → probably a concept label, not a real file
+    if "." not in source.split("/")[-1]:
+        return True
+    return False
+def _cross_file_surprises(G: nx.Graph, communities: dict[int, list[str]], top_n: int) -> list[dict]:
+    """
+    Cross-file edges between real code/doc entities.
+    Excludes concept nodes, file hub nodes, and plain import edges.
+    Sorted AMBIGUOUS → INFERRED → EXTRACTED.
+    """
+    surprises = []
+    order = {"AMBIGUOUS": 0, "INFERRED": 1, "EXTRACTED": 2}
+    for u, v, data in G.edges(data=True):
+        # Skip structural scaffolding — not insights
+        relation = data.get("relation", "")
+        if relation in ("imports", "imports_from", "contains", "method"):
+            continue
+        # Skip if either endpoint is a concept or file-level node
+        if _is_concept_node(G, u) or _is_concept_node(G, v):
+            continue
+        if _is_file_node(G, u) or _is_file_node(G, v):
+            continue
+        u_source = G.nodes[u].get("source_file", "")
+        v_source = G.nodes[v].get("source_file", "")
+        if u_source and v_source and u_source != v_source:
+            # Respect original edge direction stored in _src/_tgt (if present),
+            # otherwise fall back to u/v which may be in arbitrary order.
+            src_id = data.get("_src", u)
+            tgt_id = data.get("_tgt", v)
+            surprises.append({
+                "source": G.nodes[src_id].get("label", src_id),
+                "target": G.nodes[tgt_id].get("label", tgt_id),
+                "source_files": [
+                    G.nodes[src_id].get("source_file", ""),
+                    G.nodes[tgt_id].get("source_file", ""),
+                ],
+                "confidence": data.get("confidence", "EXTRACTED"),
+                "relation": relation,
+            })
+    surprises.sort(key=lambda x: order.get(x["confidence"], 3))
+    if surprises:
+        return surprises[:top_n]
+    # Fallback: no semantic cross-file edges found (pure AST corpus).
+    # Surface cross-community bridge edges as structural surprises instead.
+    return _cross_community_surprises(G, communities, top_n)
+def _cross_community_surprises(
+    G: nx.Graph,
+    communities: dict[int, list[str]],
+    top_n: int,
+) -> list[dict]:
+    """
+    For single-source corpora: find edges that bridge different communities.
+    These are surprising because Leiden grouped everything else tightly —
+    these edges cut across the natural structure.
+    Falls back to high-betweenness edges if no community info is provided.
+    """
+    if not communities:
+        # No community info — use edge betweenness centrality
+        if G.number_of_edges() == 0:
+            return []
+        betweenness = nx.edge_betweenness_centrality(G)
+        top_edges = sorted(betweenness.items(), key=lambda x: x[1], reverse=True)[:top_n]
+        result = []
+        for (u, v), score in top_edges:
+            data = G.edges[u, v]
+            result.append({
+                "source": G.nodes[u].get("label", u),
+                "target": G.nodes[v].get("label", v),
+                "source_files": [
+                    G.nodes[u].get("source_file", ""),
+                    G.nodes[v].get("source_file", ""),
+                ],
+                "confidence": data.get("confidence", "EXTRACTED"),
+                "relation": data.get("relation", ""),
+                "note": f"Bridges graph structure (betweenness={score:.3f})",
+            })
+        return result
+    # Build node → community map
+    node_community = {n: cid for cid, nodes in communities.items() for n in nodes}
+    surprises = []
+    for u, v, data in G.edges(data=True):
+        cid_u = node_community.get(u)
+        cid_v = node_community.get(v)
+        if cid_u is None or cid_v is None or cid_u == cid_v:
+            continue
+        # Skip file hub nodes and plain structural edges
+        if _is_file_node(G, u) or _is_file_node(G, v):
+            continue
+        relation = data.get("relation", "")
+        if relation in ("imports", "imports_from", "contains", "method"):
+            continue
+        # This edge crosses community boundaries — interesting
+        confidence = data.get("confidence", "EXTRACTED")
+        src_id = data.get("_src", u)
+        tgt_id = data.get("_tgt", v)
+        surprises.append({
+            "source": G.nodes[src_id].get("label", src_id),
+            "target": G.nodes[tgt_id].get("label", tgt_id),
+            "source_files": [
+                G.nodes[src_id].get("source_file", ""),
+                G.nodes[tgt_id].get("source_file", ""),
+            ],
+            "confidence": confidence,
+            "relation": relation,
+            "note": f"Bridges community {cid_u} → community {cid_v}",
+            "_pair": tuple(sorted([cid_u, cid_v])),
+        })
+    # Sort: AMBIGUOUS first, then INFERRED, then EXTRACTED
+    order = {"AMBIGUOUS": 0, "INFERRED": 1, "EXTRACTED": 2}
+    surprises.sort(key=lambda x: order.get(x["confidence"], 3))
+    # Deduplicate by community pair — one representative edge per (A→B) boundary.
+    # Without this, a single high-betweenness god node dominates all results.
+    seen_pairs: set[tuple] = set()
+    deduped = []
+    for s in surprises:
+        pair = s.pop("_pair")
+        if pair not in seen_pairs:
+            seen_pairs.add(pair)
+            deduped.append(s)
+    return deduped[:top_n]
+def suggest_questions(
+    G: nx.Graph,
+    communities: dict[int, list[str]],
+    community_labels: dict[int, str],
+    top_n: int = 7,
+) -> list[dict]:
+    """
+    Generate questions the graph is uniquely positioned to answer.
+    Based on: AMBIGUOUS edges, bridge nodes, underexplored god nodes, isolated nodes.
+    Each question has a 'type', 'question', and 'why' field.
+    """
+    questions = []
+    node_community = {n: cid for cid, nodes in communities.items() for n in nodes}
+    # 1. AMBIGUOUS edges → unresolved relationship questions
+    for u, v, data in G.edges(data=True):
+        if data.get("confidence") == "AMBIGUOUS":
+            ul = G.nodes[u].get("label", u)
+            vl = G.nodes[v].get("label", v)
+            relation = data.get("relation", "related to")
+            questions.append({
+                "type": "ambiguous_edge",
+                "question": f"What is the exact relationship between `{ul}` and `{vl}`?",
+                "why": f"Edge tagged AMBIGUOUS (relation: {relation}) — confidence is low.",
+            })
+    # 2. Bridge nodes (high betweenness) → cross-cutting concern questions
+    if G.number_of_edges() > 0:
+        betweenness = nx.betweenness_centrality(G)
+        # Top bridge nodes that are NOT file-level hubs
+        bridges = sorted(
+            [(n, s) for n, s in betweenness.items()
+             if not _is_file_node(G, n) and not _is_concept_node(G, n) and s > 0],
+            key=lambda x: x[1],
+            reverse=True,
+        )[:3]
+        for node_id, score in bridges:
+            label = G.nodes[node_id].get("label", node_id)
+            cid = node_community.get(node_id)
+            comm_label = community_labels.get(cid, f"Community {cid}") if cid is not None else "unknown"
+            neighbors = list(G.neighbors(node_id))
+            neighbor_comms = {node_community.get(n) for n in neighbors if node_community.get(n) != cid}
+            if neighbor_comms:
+                other_labels = [community_labels.get(c, f"Community {c}") for c in neighbor_comms]
+                questions.append({
+                    "type": "bridge_node",
+                    "question": f"Why does `{label}` connect `{comm_label}` to {', '.join(f'`{l}`' for l in other_labels)}?",
+                    "why": f"High betweenness centrality ({score:.3f}) — this node is a cross-community bridge.",
+                })
+    # 3. God nodes with many INFERRED edges → verification questions
+    degree = dict(G.degree())
+    top_nodes = sorted(
+        [(n, d) for n, d in degree.items() if not _is_file_node(G, n)],
+        key=lambda x: x[1],
+        reverse=True,
+    )[:5]
+    for node_id, _ in top_nodes:
+        inferred = [
+            (u, v, d) for u, v, d in G.edges(node_id, data=True)
+            if d.get("confidence") == "INFERRED"
+        ]
+        if len(inferred) >= 2:
+            label = G.nodes[node_id].get("label", node_id)
+            # Use _src/_tgt to get the correct direction; fall back to v (the other node)
+            others = []
+            for u, v, d in inferred[:2]:
+                src_id = d.get("_src", u)
+                tgt_id = d.get("_tgt", v)
+                other_id = tgt_id if src_id == node_id else src_id
+                others.append(G.nodes[other_id].get("label", other_id))
+            questions.append({
+                "type": "verify_inferred",
+                "question": f"Are the {len(inferred)} inferred relationships involving `{label}` (e.g. with `{others[0]}` and `{others[1]}`) actually correct?",
+                "why": f"`{label}` has {len(inferred)} INFERRED edges — model-reasoned connections that need verification.",
+            })
+    # 4. Isolated or weakly-connected nodes → exploration questions
+    isolated = [
+        n for n in G.nodes()
+        if G.degree(n) <= 1 and not _is_file_node(G, n) and not _is_concept_node(G, n)
+    ]
+    if isolated:
+        labels = [G.nodes[n].get("label", n) for n in isolated[:3]]
+        questions.append({
+            "type": "isolated_nodes",
+            "question": f"What connects {', '.join(f'`{l}`' for l in labels)} to the rest of the system?",
+            "why": f"{len(isolated)} weakly-connected nodes found — possible documentation gaps or missing edges.",
+        })
+    # 5. Low-cohesion communities → structural questions
+    from .cluster import cohesion_score
+    for cid, nodes in communities.items():
+        score = cohesion_score(G, nodes)
+        if score < 0.15 and len(nodes) >= 5:
+            label = community_labels.get(cid, f"Community {cid}")
+            questions.append({
+                "type": "low_cohesion",
+                "question": f"Should `{label}` be split into smaller, more focused modules?",
+                "why": f"Cohesion score {score} — nodes in this community are weakly interconnected.",
+            })
+    if not questions:
+        return [{
+            "type": "no_signal",
+            "question": None,
+            "why": (
+                "Not enough signal to generate questions. "
+                "This usually means the corpus has no AMBIGUOUS edges, no bridge nodes, "
+                "no INFERRED relationships, and all communities are tightly cohesive. "
+                "Add more files or run with --mode deep to extract richer edges."
+            ),
+        }]
+    return questions[:top_n]
+def graph_diff(G_old: nx.Graph, G_new: nx.Graph) -> dict:
+    """Compare two graph snapshots and return what changed.
+    Returns:
+        {
+          "new_nodes": [{"id": ..., "label": ...}],
+          "removed_nodes": [{"id": ..., "label": ...}],
+          "new_edges": [{"source": ..., "target": ..., "relation": ..., "confidence": ...}],
+          "removed_edges": [...],
+          "summary": "3 new nodes, 5 new edges, 1 node removed"
+        }
+    """
+    old_nodes = set(G_old.nodes())
+    new_nodes = set(G_new.nodes())
+    added_node_ids = new_nodes - old_nodes
+    removed_node_ids = old_nodes - new_nodes
+    new_nodes_list = [
+        {"id": n, "label": G_new.nodes[n].get("label", n)}
+        for n in added_node_ids
+    ]
+    removed_nodes_list = [
+        {"id": n, "label": G_old.nodes[n].get("label", n)}
+        for n in removed_node_ids
+    ]
+    def edge_key(G: nx.Graph, u: str, v: str, data: dict) -> tuple:
+        return (u, v, data.get("relation", ""))
+    old_edge_keys = {
+        edge_key(G_old, u, v, d)
+        for u, v, d in G_old.edges(data=True)
+    }
+    new_edge_keys = {
+        edge_key(G_new, u, v, d)
+        for u, v, d in G_new.edges(data=True)
+    }
+    added_edge_keys = new_edge_keys - old_edge_keys
+    removed_edge_keys = old_edge_keys - new_edge_keys
+    new_edges_list = []
+    for u, v, d in G_new.edges(data=True):
+        if edge_key(G_new, u, v, d) in added_edge_keys:
+            new_edges_list.append({
+                "source": u,
+                "target": v,
+                "relation": d.get("relation", ""),
+                "confidence": d.get("confidence", ""),
+            })
+    removed_edges_list = []
+    for u, v, d in G_old.edges(data=True):
+        if edge_key(G_old, u, v, d) in removed_edge_keys:
+            removed_edges_list.append({
+                "source": u,
+                "target": v,
+                "relation": d.get("relation", ""),
+                "confidence": d.get("confidence", ""),
+            })
+    parts = []
+    if new_nodes_list:
+        parts.append(f"{len(new_nodes_list)} new node{'s' if len(new_nodes_list) != 1 else ''}")
+    if new_edges_list:
+        parts.append(f"{len(new_edges_list)} new edge{'s' if len(new_edges_list) != 1 else ''}")
+    if removed_nodes_list:
+        parts.append(f"{len(removed_nodes_list)} node{'s' if len(removed_nodes_list) != 1 else ''} removed")
+    if removed_edges_list:
+        parts.append(f"{len(removed_edges_list)} edge{'s' if len(removed_edges_list) != 1 else ''} removed")
+    summary = ", ".join(parts) if parts else "no changes"
+    return {
+        "new_nodes": new_nodes_list,
+        "removed_nodes": removed_nodes_list,
+        "new_edges": new_edges_list,
+        "removed_edges": removed_edges_list,
+        "summary": summary,
+    }

graphify/benchmark.py ADDED Viewed

@@ -0,0 +1,126 @@
+"""Token-reduction benchmark — measures how much context graphify saves vs naive full-corpus approach."""
+from __future__ import annotations
+import json
+from pathlib import Path
+import networkx as nx
+from networkx.readwrite import json_graph
+_CHARS_PER_TOKEN = 4  # standard approximation
+def _estimate_tokens(text: str) -> int:
+    return max(1, len(text) // _CHARS_PER_TOKEN)
+def _query_subgraph_tokens(G: nx.Graph, question: str, depth: int = 3) -> int:
+    """Run BFS from best-matching nodes and return estimated tokens in the subgraph context."""
+    terms = [t.lower() for t in question.split() if len(t) > 2]
+    scored = []
+    for nid, data in G.nodes(data=True):
+        label = data.get("label", "").lower()
+        score = sum(1 for t in terms if t in label)
+        if score > 0:
+            scored.append((score, nid))
+    scored.sort(reverse=True)
+    start_nodes = [nid for _, nid in scored[:3]]
+    if not start_nodes:
+        return 0
+    visited: set[str] = set(start_nodes)
+    frontier = set(start_nodes)
+    edges_seen: list[tuple] = []
+    for _ in range(depth):
+        next_frontier: set[str] = set()
+        for n in frontier:
+            for neighbor in G.neighbors(n):
+                if neighbor not in visited:
+                    next_frontier.add(neighbor)
+                    edges_seen.append((n, neighbor))
+        visited.update(next_frontier)
+        frontier = next_frontier
+    lines = []
+    for nid in visited:
+        d = G.nodes[nid]
+        lines.append(f"NODE {d.get('label', nid)} src={d.get('source_file', '')} loc={d.get('source_location', '')}")
+    for u, v in edges_seen:
+        if u in visited and v in visited:
+            d = G.edges[u, v]
+            lines.append(f"EDGE {G.nodes[u].get('label', u)} --{d.get('relation', '')}--> {G.nodes[v].get('label', v)}")
+    return _estimate_tokens("\n".join(lines))
+_SAMPLE_QUESTIONS = [
+    "how does authentication work",
+    "what is the main entry point",
+    "how are errors handled",
+    "what connects the data layer to the api",
+    "what are the core abstractions",
+]
+def run_benchmark(
+    graph_path: str = ".graphify/graph.json",
+    corpus_words: int | None = None,
+    questions: list[str] | None = None,
+) -> dict:
+    """Measure token reduction: corpus tokens vs graphify query tokens.
+    Args:
+        graph_path: path to the built graph
+        corpus_words: total word count from detect() output; if None, estimated from graph
+        questions: list of questions to benchmark; defaults to _SAMPLE_QUESTIONS
+    Returns dict with: corpus_tokens, avg_query_tokens, reduction_ratio, per_question
+    """
+    data = json.loads(Path(graph_path).read_text())
+    G = json_graph.node_link_graph(data, edges="links")
+    if corpus_words is None:
+        # Rough estimate: each node label is ~3 words, plus source context
+        corpus_words = G.number_of_nodes() * 50
+    corpus_tokens = corpus_words * 100 // 75  # words → tokens (100 words ≈ 133 tokens)
+    qs = questions or _SAMPLE_QUESTIONS
+    per_question = []
+    for q in qs:
+        qt = _query_subgraph_tokens(G, q)
+        if qt > 0:
+            per_question.append({"question": q, "query_tokens": qt, "reduction": round(corpus_tokens / qt, 1)})
+    if not per_question:
+        return {"error": "No matching nodes found for sample questions. Build the graph first."}
+    avg_query_tokens = sum(p["query_tokens"] for p in per_question) // len(per_question)
+    reduction_ratio = round(corpus_tokens / avg_query_tokens, 1) if avg_query_tokens > 0 else 0
+    return {
+        "corpus_tokens": corpus_tokens,
+        "corpus_words": corpus_words,
+        "nodes": G.number_of_nodes(),
+        "edges": G.number_of_edges(),
+        "avg_query_tokens": avg_query_tokens,
+        "reduction_ratio": reduction_ratio,
+        "per_question": per_question,
+    }
+def print_benchmark(result: dict) -> None:
+    """Print a human-readable benchmark report."""
+    if "error" in result:
+        print(f"Benchmark error: {result['error']}")
+        return
+    print(f"\ngraphify token reduction benchmark")
+    print(f"{'─' * 50}")
+    print(f"  Corpus:          {result['corpus_words']:,} words → ~{result['corpus_tokens']:,} tokens (naive)")
+    print(f"  Graph:           {result['nodes']:,} nodes, {result['edges']:,} edges")
+    print(f"  Avg query cost:  ~{result['avg_query_tokens']:,} tokens")
+    print(f"  Reduction:       {result['reduction_ratio']}x fewer tokens per query")
+    print(f"\n  Per question:")
+    for p in result["per_question"]:
+        print(f"    [{p['reduction']}x] {p['question'][:55]}")
+    print()