PyPI - ontosight-codegraph - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ontosight-codegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

ontosight_codegraph/__init__.py +38 -0
ontosight_codegraph/cli.py +96 -0
ontosight_codegraph/query.py +59 -0
ontosight_codegraph/store.py +589 -0
ontosight_codegraph/topology.py +262 -0
ontosight_codegraph/topology_display.py +102 -0
ontosight_codegraph/view.py +267 -0
ontosight_codegraph-0.1.0.dist-info/METADATA +71 -0
ontosight_codegraph-0.1.0.dist-info/RECORD +11 -0
ontosight_codegraph-0.1.0.dist-info/WHEEL +4 -0
ontosight_codegraph-0.1.0.dist-info/entry_points.txt +2 -0

ontosight_codegraph/topology.py ADDED Viewed

@@ -0,0 +1,262 @@
+"""Graph topology analysis — rank nodes by connectivity and structural importance."""
+from __future__ import annotations
+from dataclasses import asdict, dataclass
+from typing import Any, Callable, List, Optional, Sequence, Tuple, TypeVar
+NodeSchema = TypeVar("NodeSchema")
+EdgeSchema = TypeVar("EdgeSchema")
+TIER_CRITICAL = "critical"
+TIER_HIGH = "high"
+TIER_MEDIUM = "medium"
+TIER_LOW = "low"
+VALID_METRICS = frozenset({"degree", "betweenness", "composite"})
+def _ensure_networkx():
+    """Lazy import networkx; optional for betweenness and articulation."""
+    try:
+        import networkx as nx
+        return nx
+    except ImportError:
+        return None
+def _normalize(values: dict[str, float]) -> dict[str, float]:
+    if not values:
+        return {}
+    lo = min(values.values())
+    hi = max(values.values())
+    if hi == lo:
+        return {k: 1.0 if hi > 0 else 0.0 for k in values}
+    return {k: (v - lo) / (hi - lo) for k, v in values.items()}
+def _assign_tiers(
+    node_ids: list[str],
+    importance: dict[str, float],
+    articulation: set[str],
+) -> dict[str, str]:
+    if not node_ids:
+        return {}
+    sorted_ids = sorted(node_ids, key=lambda nid: importance.get(nid, 0.0), reverse=True)
+    n = len(sorted_ids)
+    critical_cutoff = max(1, int(n * 0.1))
+    high_cutoff = max(critical_cutoff + 1, int(n * 0.25))
+    tiers: dict[str, str] = {}
+    for i, nid in enumerate(sorted_ids):
+        if nid in articulation or i < critical_cutoff:
+            tiers[nid] = TIER_CRITICAL
+        elif i < high_cutoff:
+            tiers[nid] = TIER_HIGH
+        elif i < max(high_cutoff + 1, int(n * 0.6)):
+            tiers[nid] = TIER_MEDIUM
+        else:
+            tiers[nid] = TIER_LOW
+    return tiers
+@dataclass(frozen=True)
+class NodeRanking:
+    """Ranked node with topology metrics."""
+    node_id: str
+    label: str
+    degree: int
+    betweenness: float
+    is_articulation: bool
+    importance: float
+    tier: str
+    def model_dump(self) -> dict[str, Any]:
+        return asdict(self)
+def _build_undirected_graph(
+    node_ids: set[str],
+    edges: list[tuple[str, str]],
+) -> Any:
+    nx = _ensure_networkx()
+    if nx is None:
+        return None
+    g = nx.Graph()
+    g.add_nodes_from(node_ids)
+    for a, b in edges:
+        if a in node_ids and b in node_ids and a != b:
+            g.add_edge(a, b)
+    return g
+def _compute_degrees(
+    node_ids: set[str],
+    edges: list[tuple[str, str]],
+) -> dict[str, int]:
+    degree: dict[str, int] = {nid: 0 for nid in node_ids}
+    for a, b in edges:
+        if a not in node_ids or b not in node_ids:
+            continue
+        if a == b:
+            continue
+        degree[a] += 1
+        degree[b] += 1
+    return degree
+def _rank_nodes(
+    node_list: Sequence[NodeSchema],
+    edge_pairs: list[tuple[str, str]],
+    *,
+    node_id_extractor: Callable[[NodeSchema], str],
+    node_label_extractor: Optional[Callable[[NodeSchema], str]],
+    top_k: int = 10,
+    metric: str = "composite",
+    include_betweenness: bool = True,
+) -> list[NodeRanking]:
+    if metric not in VALID_METRICS:
+        raise ValueError(f"metric must be one of {sorted(VALID_METRICS)}, got {metric!r}")
+    label_fn = node_label_extractor or (lambda n: str(node_id_extractor(n)))
+    node_ids: set[str] = set()
+    labels: dict[str, str] = {}
+    for node in node_list:
+        nid = str(node_id_extractor(node))
+        node_ids.add(nid)
+        labels[nid] = str(label_fn(node))
+    if not node_ids:
+        return []
+    degree = _compute_degrees(node_ids, edge_pairs)
+    nx_graph = _build_undirected_graph(node_ids, edge_pairs) if include_betweenness else None
+    betweenness: dict[str, float] = {nid: 0.0 for nid in node_ids}
+    articulation: set[str] = set()
+    if nx_graph is not None and nx_graph.number_of_edges() > 0:
+        nx = _ensure_networkx()
+        assert nx is not None
+        raw_betweenness = nx.betweenness_centrality(nx_graph, normalized=True)
+        betweenness = {nid: float(raw_betweenness.get(nid, 0.0)) for nid in node_ids}
+        articulation = set(nx.articulation_points(nx_graph))
+    norm_degree = _normalize({nid: float(degree[nid]) for nid in node_ids})
+    norm_betweenness = _normalize(betweenness)
+    importance: dict[str, float] = {}
+    for nid in node_ids:
+        if metric == "degree":
+            importance[nid] = norm_degree[nid]
+        elif metric == "betweenness":
+            importance[nid] = norm_betweenness[nid]
+        else:
+            if nx_graph is not None and nx_graph.number_of_edges() > 0:
+                importance[nid] = 0.6 * norm_degree[nid] + 0.4 * norm_betweenness[nid]
+            else:
+                importance[nid] = norm_degree[nid]
+    tiers = _assign_tiers(list(node_ids), importance, articulation)
+    rankings = [
+        NodeRanking(
+            node_id=nid,
+            label=labels[nid],
+            degree=degree[nid],
+            betweenness=round(betweenness[nid], 6),
+            is_articulation=nid in articulation,
+            importance=round(importance[nid], 6),
+            tier=tiers[nid],
+        )
+        for nid in node_ids
+    ]
+    rankings.sort(
+        key=lambda r: (r.importance, r.degree, r.betweenness, r.label),
+        reverse=True,
+    )
+    if top_k > 0:
+        rankings = rankings[:top_k]
+    return rankings
+def rank_graph_nodes(
+    node_list: Sequence[NodeSchema],
+    edge_list: Sequence[EdgeSchema],
+    *,
+    node_id_extractor: Callable[[NodeSchema], str],
+    node_ids_in_edge_extractor: Callable[[EdgeSchema], Tuple[str, str]],
+    node_label_extractor: Optional[Callable[[NodeSchema], str]] = None,
+    top_k: int = 10,
+    metric: str = "composite",
+    include_betweenness: bool = True,
+) -> list[NodeRanking]:
+    """Rank nodes in a pairwise graph by topology metrics."""
+    edge_pairs: list[tuple[str, str]] = []
+    for edge in edge_list:
+        source_id, target_id = node_ids_in_edge_extractor(edge)
+        edge_pairs.append((str(source_id), str(target_id)))
+    return _rank_nodes(
+        node_list,
+        edge_pairs,
+        node_id_extractor=node_id_extractor,
+        node_label_extractor=node_label_extractor,
+        top_k=top_k,
+        metric=metric,
+        include_betweenness=include_betweenness,
+    )
+def rank_hypergraph_nodes(
+    node_list: Sequence[NodeSchema],
+    edge_list: Sequence[EdgeSchema],
+    *,
+    node_id_extractor: Callable[[NodeSchema], str],
+    nodes_in_edge_extractor: Callable[[EdgeSchema], Sequence[str]],
+    node_label_extractor: Optional[Callable[[NodeSchema], str]] = None,
+    top_k: int = 10,
+    metric: str = "composite",
+    include_betweenness: bool = True,
+) -> list[NodeRanking]:
+    """Rank nodes in a hypergraph by hyperedge participation (clique-expanded for betweenness)."""
+    edge_pairs: list[tuple[str, str]] = []
+    for edge in edge_list:
+        members = [str(m) for m in nodes_in_edge_extractor(edge)]
+        unique_members = list(dict.fromkeys(members))
+        for i, a in enumerate(unique_members):
+            for b in unique_members[i + 1 :]:
+                edge_pairs.append((a, b))
+    return _rank_nodes(
+        node_list,
+        edge_pairs,
+        node_id_extractor=node_id_extractor,
+        node_label_extractor=node_label_extractor,
+        top_k=top_k,
+        metric=metric,
+        include_betweenness=include_betweenness,
+    )
+def topology_summary(rankings: list[NodeRanking], total_nodes: int) -> dict[str, Any]:
+    """Aggregate KPIs for CLI / MCP display."""
+    if total_nodes == 0:
+        return {
+            "total_nodes": 0,
+            "avg_degree": 0.0,
+            "articulation_count": 0,
+            "critical_count": 0,
+        }
+    all_degrees = [r.degree for r in rankings]
+    avg_degree = sum(all_degrees) / total_nodes if total_nodes else 0.0
+    return {
+        "total_nodes": total_nodes,
+        "avg_degree": round(avg_degree, 2),
+        "articulation_count": sum(1 for r in rankings if r.is_articulation),
+        "critical_count": sum(1 for r in rankings if r.tier == TIER_CRITICAL),
+    }

ontosight_codegraph/topology_display.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Rich terminal display for graph topology rankings."""
+from __future__ import annotations
+from typing import Any, Optional
+from rich.console import Console
+from rich.table import Table
+_TIER_STYLES = {
+    "critical": "bold #F59E0B",
+    "high": "#1E40AF",
+    "medium": "dim",
+    "low": "dim italic",
+}
+def _summary_from_rows(rankings: list[dict[str, Any]], total_nodes: int) -> dict[str, Any]:
+    if total_nodes == 0:
+        return {
+            "total_nodes": 0,
+            "avg_degree": 0.0,
+            "articulation_count": 0,
+            "critical_count": 0,
+        }
+    return {
+        "total_nodes": total_nodes,
+        "avg_degree": round(
+            sum(r.get("degree", 0) for r in rankings) / total_nodes,
+            2,
+        ),
+        "articulation_count": sum(1 for r in rankings if r.get("is_articulation")),
+        "critical_count": sum(1 for r in rankings if r.get("tier") == "critical"),
+    }
+def print_topology_table(
+    rankings: list[dict[str, Any]],
+    *,
+    total_nodes: int,
+    metric: str = "composite",
+    console: Optional[Console] = None,
+) -> None:
+    """Print a data-dense critical-nodes table to the terminal."""
+    if not rankings:
+        return
+    out = console or Console()
+    summary = _summary_from_rows(rankings, total_nodes)
+    out.print(
+        f"[dim]Topology[/dim]  nodes={summary['total_nodes']}  "
+        f"avg_degree={summary['avg_degree']}  "
+        f"articulation={summary['articulation_count']}  "
+        f"critical={summary['critical_count']}  "
+        f"metric={metric}"
+    )
+    table = Table(title="Critical / Hub Nodes", show_header=True, header_style="bold")
+    table.add_column("#", justify="right", style="dim", width=4)
+    table.add_column("Node", style="cyan", max_width=40, overflow="ellipsis")
+    table.add_column("Degree", justify="right")
+    table.add_column("Between.", justify="right")
+    table.add_column("Importance", justify="right")
+    table.add_column("Tier")
+    for i, row in enumerate(rankings, start=1):
+        tier = row.get("tier", "low")
+        style = _TIER_STYLES.get(tier, "")
+        articulation = " [dim]cut[/dim]" if row.get("is_articulation") else ""
+        table.add_row(
+            str(i),
+            str(row.get("label", row.get("node_id", ""))) + articulation,
+            str(row.get("degree", 0)),
+            f"{row.get('betweenness', 0):.3f}",
+            f"{row.get('importance', 0):.3f}",
+            f"[{style}]{tier}[/{style}]" if style else tier,
+        )
+    out.print(table)
+def rankings_to_json_payload(
+    rankings: list[dict[str, Any]],
+    *,
+    total_nodes: int,
+    metric: str = "composite",
+    all_rankings: Optional[list[dict[str, Any]]] = None,
+) -> dict[str, Any]:
+    """Serialize rankings for ``he analyze --json`` and MCP."""
+    full = all_rankings if all_rankings is not None else rankings
+    return {
+        "metric": metric,
+        "total_nodes": total_nodes,
+        "summary": _summary_from_rows(full, total_nodes),
+        "rankings": rankings,
+        "critical_node_ids": [
+            r["node_id"]
+            for r in full
+            if r.get("tier") in ("critical", "high")
+        ],
+    }

ontosight_codegraph/view.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""Launch OntoSight visualization for a CodeGraph call subgraph."""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from ontosight import view_graph
+from ontosight.core.storage import GraphStorage
+from ontosight.server.state import global_state
+from ontosight_codegraph.store import (
+    CodeCallEdge,
+    CodeSymbolNode,
+    SubgraphResult,
+    load_call_subgraph,
+    make_search_callback,
+)
+from ontosight_codegraph.topology import (
+    TIER_CRITICAL,
+    TIER_HIGH,
+    rank_graph_nodes,
+)
+logger = logging.getLogger(__name__)
+def _node_label(node: CodeSymbolNode) -> str:
+    return f"{node.name} ({node.kind})"
+def _topology_view_context(
+    nodes: List[CodeSymbolNode],
+    edges: List[CodeCallEdge],
+    *,
+    top_k: int = 10,
+    highlight_critical: bool = True,
+) -> dict:
+    all_rankings = rank_graph_nodes(
+        nodes,
+        edges,
+        node_id_extractor=lambda n: n.id,
+        node_ids_in_edge_extractor=lambda e: (e.source_id, e.target_id),
+        node_label_extractor=_node_label,
+        top_k=0,
+        metric="composite",
+    )
+    display_rankings = all_rankings[:top_k] if top_k > 0 else all_rankings
+    critical_ids = [
+        r.node_id for r in all_rankings if r.tier in (TIER_CRITICAL, TIER_HIGH)
+    ]
+    if not highlight_critical:
+        critical_ids = []
+    return {
+        "node_rankings": [r.model_dump() for r in display_rankings],
+        "critical_node_ids": critical_ids,
+        "topology_metric": "composite",
+        "topology_total_nodes": len(nodes),
+    }
+def _build_view_payload(
+    graph_export: Dict[str, Any],
+    *,
+    rankings: List[dict],
+    meta: Dict[str, Any],
+    truncated: bool,
+    filter_summary: str,
+    languages: List[str],
+) -> Dict[str, Any]:
+    return {
+        "nodes": graph_export.get("nodes", []),
+        "edges": graph_export.get("edges", []),
+        "rankings": rankings,
+        "meta": meta,
+        "truncated": truncated,
+        "filter_summary": filter_summary,
+        "languages": languages,
+    }
+def apply_subgraph_to_view(
+    result: SubgraphResult,
+    *,
+    max_nodes: int,
+    top_k_critical: int = 10,
+    highlight_critical: bool = True,
+) -> Dict[str, Any]:
+    """Rebuild GraphStorage and visualization metadata from a subgraph result."""
+    if not result.nodes:
+        raise ValueError(
+            "No symbols found for the given filters. "
+            "Try a broader path, symbol, or task."
+        )
+    topology_context = _topology_view_context(
+        result.nodes,
+        result.edges,
+        top_k=top_k_critical,
+        highlight_critical=highlight_critical,
+    )
+    languages = ", ".join(result.languages) if result.languages else "unknown"
+    meta_stats = {
+        "Nodes": len(result.nodes),
+        "Edges": len(result.edges),
+        "Source": "CodeGraph",
+        "Filter": result.filter_summary,
+        "Languages": languages,
+    }
+    if result.truncated:
+        meta_stats["Note"] = f"Truncated at {max_nodes} nodes"
+    storage = GraphStorage(
+        node_list=result.nodes,
+        edge_list=result.edges,
+        node_id_extractor=lambda n: n.id,
+        node_ids_in_edge_extractor=lambda e: (e.source_id, e.target_id),
+        edge_label_extractor=lambda _: "calls",
+        node_label_extractor=_node_label,
+        node_rankings=topology_context.get("node_rankings"),
+        critical_node_ids=topology_context.get("critical_node_ids"),
+    )
+    global_state.set_storage(storage)
+    stats = storage.get_stats()
+    meta_data = {
+        "Nodes": stats["total_nodes"],
+        "Edges": stats["total_edges"],
+        "Average Node Degree": stats["avg_degree"],
+        "Average Edge Degree": 2,
+        **meta_stats,
+    }
+    global_state.set_visualization_data("meta_data", meta_data)
+    global_state.set_visualization_data("source", "codegraph")
+    global_state.set_visualization_data("filter_summary", result.filter_summary)
+    global_state.set_visualization_data("codegraph_languages", result.languages)
+    global_state.set_visualization_data("codegraph_meta_stats", meta_stats)
+    global_state.set_visualization_data(
+        "node_rankings", topology_context.get("node_rankings", [])
+    )
+    global_state.set_visualization_data(
+        "critical_node_ids", topology_context.get("critical_node_ids", [])
+    )
+    global_state.set_visualization_data(
+        "topology_metric", topology_context.get("topology_metric", "composite")
+    )
+    global_state.set_visualization_data(
+        "topology_total_nodes",
+        topology_context.get("topology_total_nodes", len(result.nodes)),
+    )
+    graph_export = storage.export_full_graph()
+    return _build_view_payload(
+        graph_export,
+        rankings=topology_context.get("node_rankings", []),
+        meta=meta_data,
+        truncated=result.truncated,
+        filter_summary=result.filter_summary,
+        languages=result.languages,
+    )
+def show_codegraph(
+    project_path: Path,
+    *,
+    path_filter: Optional[str] = None,
+    symbol: Optional[str] = None,
+    task: Optional[str] = None,
+    hops: int = 2,
+    max_nodes: int = 200,
+    top_k_critical: int = 10,
+    highlight_critical: bool = True,
+    print_topology_summary: bool = True,
+) -> SubgraphResult:
+    """Visualize a CodeGraph call subgraph in OntoSight."""
+    result = load_call_subgraph(
+        project_path,
+        path_filter=path_filter,
+        symbol=symbol,
+        task=task,
+        hops=hops,
+        max_nodes=max_nodes,
+    )
+    if not result.nodes:
+        raise ValueError(
+            "No symbols found for the given filters. "
+            "Try a broader --path, --symbol, or --task."
+        )
+    topology_context = _topology_view_context(
+        result.nodes,
+        result.edges,
+        top_k=top_k_critical,
+        highlight_critical=highlight_critical,
+    )
+    if print_topology_summary and topology_context.get("node_rankings"):
+        from ontosight_codegraph.topology_display import print_topology_table
+        print_topology_table(
+            topology_context["node_rankings"],
+            total_nodes=topology_context.get("topology_total_nodes", len(result.nodes)),
+            metric=topology_context.get("topology_metric", "composite"),
+        )
+    languages = ", ".join(result.languages) if result.languages else "unknown"
+    meta_stats = {
+        "Nodes": len(result.nodes),
+        "Edges": len(result.edges),
+        "Source": "CodeGraph",
+        "Filter": result.filter_summary,
+        "Languages": languages,
+    }
+    if result.truncated:
+        meta_stats["Note"] = f"Truncated at {max_nodes} nodes"
+    context = {
+        **topology_context,
+        "source": "codegraph",
+        "filter_summary": result.filter_summary,
+        "codegraph_languages": result.languages,
+        "codegraph_meta_stats": meta_stats,
+        "codegraph_truncated": result.truncated,
+        "codegraph_project_path": str(project_path.resolve()),
+        "codegraph_default_hops": hops,
+        "codegraph_default_max_nodes": max_nodes,
+        "codegraph_default_path_filter": path_filter,
+    }
+    search_callback = make_search_callback(project_path, path_filter)
+    from ontosight_codegraph.query import make_query_callback
+    query_callback = make_query_callback(
+        project_path,
+        default_path_filter=path_filter,
+        default_hops=hops,
+        default_max_nodes=max_nodes,
+        top_k_critical=top_k_critical,
+        highlight_critical=highlight_critical,
+    )
+    logger.info(
+        "Opening CodeGraph viewer: nodes=%d edges=%d filter=%s",
+        len(result.nodes),
+        len(result.edges),
+        result.filter_summary,
+    )
+    view_graph(
+        node_list=result.nodes,
+        edge_list=result.edges,
+        node_schema=CodeSymbolNode,
+        edge_schema=CodeCallEdge,
+        node_id_extractor=lambda n: n.id,
+        node_ids_in_edge_extractor=lambda e: (e.source_id, e.target_id),
+        node_label_extractor=_node_label,
+        edge_label_extractor=lambda _: "calls",
+        on_search=search_callback,
+        on_chat=None,
+        context=context,
+        callbacks={"codegraph_query": query_callback},
+    )
+    return result

ontosight_codegraph-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,71 @@
+Metadata-Version: 2.4
+Name: ontosight-codegraph
+Version: 0.1.0
+Summary: Visualize CodeGraph call subgraphs in OntoSight
+Project-URL: Homepage, https://github.com/yifanfeng97/hyper-extract
+Project-URL: Repository, https://github.com/yifanfeng97/hyper-extract
+Author-email: Yifan Feng <evanfeng97@gmail.com>
+License: Apache-2.0
+Keywords: call-graph,codegraph,ontosight,visualization
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.11
+Requires-Dist: networkx>=3.0
+Requires-Dist: ontosight>=0.2.0
+Requires-Dist: pydantic>=2.0
+Requires-Dist: rich>=13.7.0
+Requires-Dist: typer>=0.13.0
+Provides-Extra: dev
+Requires-Dist: pytest>=9.0.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# OntoSight CodeGraph
+Read a local [CodeGraph](https://github.com/colbymchenry/codegraph) index (`.codegraph/codegraph.db`) and visualize call subgraphs in [OntoSight](https://pypi.org/project/ontosight/).
+## Install
+```bash
+pip install ontosight-codegraph
+# or
+uvx ontosight-codegraph .
+```
+## Usage
+```bash
+# Ensure index exists
+npx @colbymchenry/codegraph init -i
+# Auto-seed from highest fan-in symbols
+ontosight-codegraph .
+# Seed around a symbol
+ontosight-codegraph . --symbol view_graph --path vendor/ontosight/
+# Task-scoped subgraph
+ontosight-codegraph . --task "auth flow" --hops 2 --max-nodes 200
+```
+## npm wrapper
+For a zero-Python-install workflow (uses `uvx` under the hood; auto-runs CodeGraph init when the index is missing):
+```bash
+npx @royalsolution/ontosight .
+```
+See [`packages/ontosight/`](../ontosight/) and [`packages/ontosight/AGENTS.md`](../ontosight/AGENTS.md) for npm package details and **AI agent usage**.
+## Publish (maintainers)
+```bash
+cd packages/ontosight-codegraph
+python -m build
+twine check dist/*
+twine upload dist/*
+```