PyPI - codebeacon - Versions diffs - 0.1.2__py3-none-any.whl - Mend

codebeacon 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

codebeacon/__init__.py +1 -0
codebeacon/__main__.py +3 -0
codebeacon/cache.py +136 -0
codebeacon/cli.py +391 -0
codebeacon/common/__init__.py +0 -0
codebeacon/common/filters.py +170 -0
codebeacon/common/symbols.py +121 -0
codebeacon/common/types.py +98 -0
codebeacon/config.py +144 -0
codebeacon/contextmap/__init__.py +0 -0
codebeacon/contextmap/generator.py +602 -0
codebeacon/discover/__init__.py +0 -0
codebeacon/discover/detector.py +388 -0
codebeacon/discover/scanner.py +192 -0
codebeacon/export/__init__.py +0 -0
codebeacon/export/mcp.py +515 -0
codebeacon/export/obsidian.py +812 -0
codebeacon/extract/__init__.py +22 -0
codebeacon/extract/base.py +372 -0
codebeacon/extract/components.py +357 -0
codebeacon/extract/dependencies.py +140 -0
codebeacon/extract/entities.py +575 -0
codebeacon/extract/queries/README.md +116 -0
codebeacon/extract/queries/actix.scm +115 -0
codebeacon/extract/queries/angular.scm +155 -0
codebeacon/extract/queries/aspnet.scm +159 -0
codebeacon/extract/queries/django.scm +122 -0
codebeacon/extract/queries/express.scm +124 -0
codebeacon/extract/queries/fastapi.scm +152 -0
codebeacon/extract/queries/flask.scm +120 -0
codebeacon/extract/queries/gin.scm +142 -0
codebeacon/extract/queries/ktor.scm +144 -0
codebeacon/extract/queries/laravel.scm +172 -0
codebeacon/extract/queries/nestjs.scm +183 -0
codebeacon/extract/queries/rails.scm +114 -0
codebeacon/extract/queries/react.scm +111 -0
codebeacon/extract/queries/spring_boot.scm +204 -0
codebeacon/extract/queries/svelte.scm +73 -0
codebeacon/extract/queries/vapor.scm +130 -0
codebeacon/extract/queries/vue.scm +123 -0
codebeacon/extract/routes.py +910 -0
codebeacon/extract/semantic.py +280 -0
codebeacon/extract/services.py +597 -0
codebeacon/graph/__init__.py +1 -0
codebeacon/graph/analyze.py +281 -0
codebeacon/graph/build.py +320 -0
codebeacon/graph/cluster.py +160 -0
codebeacon/graph/enrich.py +206 -0
codebeacon/skill/SKILL.md +127 -0
codebeacon/wave.py +292 -0
codebeacon/wiki/__init__.py +0 -0
codebeacon/wiki/generator.py +376 -0
codebeacon/wiki/index.py +95 -0
codebeacon/wiki/templates.py +467 -0
codebeacon-0.1.2.dist-info/METADATA +319 -0
codebeacon-0.1.2.dist-info/RECORD +59 -0
codebeacon-0.1.2.dist-info/WHEEL +4 -0
codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0

codebeacon/graph/analyze.py ADDED Viewed

@@ -0,0 +1,281 @@
+"""Graph analysis: god nodes, surprising connections, hub files, cohesion scoring.
+These metrics help users understand their codebase structure at a glance.
+Public API:
+    god_nodes(G, top_n, min_degree)          → list[GodNode]
+    surprising_connections(G, communities)    → list[SurprisingConnection]
+    hub_files(G, top_n)                       → list[HubFile]
+    analyze(G, communities, cohesion_scores)  → GraphReport
+    report_to_markdown(report)                → str
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Optional
+import networkx as nx
+# ── Data classes ──────────────────────────────────────────────────────────────
+@dataclass
+class GodNode:
+    """A node with unusually high degree (hub / bottleneck)."""
+    node_id: str
+    label: str
+    type: str
+    in_degree: int
+    out_degree: int
+    degree: int
+    centrality: float
+    source_file: str
+@dataclass
+class SurprisingConnection:
+    """A cross-community edge that may indicate unexpected coupling."""
+    source_id: str
+    source_label: str
+    target_id: str
+    target_label: str
+    relation: str
+    src_community: int
+    tgt_community: int
+    source_file: str
+@dataclass
+class HubFile:
+    """A source file imported by many other files (potential God file)."""
+    file_path: str
+    import_count: int
+    node_count: int
+@dataclass
+class GraphReport:
+    """Complete analysis report for a built graph."""
+    node_count: int = 0
+    edge_count: int = 0
+    community_count: int = 0
+    god_nodes: list[GodNode] = field(default_factory=list)
+    surprising_connections: list[SurprisingConnection] = field(default_factory=list)
+    hub_files: list[HubFile] = field(default_factory=list)
+    cohesion_scores: dict[int, float] = field(default_factory=dict)
+    isolated_nodes: int = 0
+    density: float = 0.0
+# ── Analysis functions ────────────────────────────────────────────────────────
+def god_nodes(
+    G: nx.DiGraph,
+    top_n: int = 20,
+    min_degree: int = 5,
+) -> list[GodNode]:
+    """Find nodes with the highest degree (potential god classes / bottlenecks).
+    Args:
+        G: the knowledge graph
+        top_n: return at most this many nodes
+        min_degree: minimum total degree to qualify
+    Returns:
+        List of GodNode sorted by degree descending.
+    """
+    centrality = nx.degree_centrality(G)
+    results: list[GodNode] = []
+    for node_id, data in G.nodes(data=True):
+        deg = G.degree(node_id)
+        if deg < min_degree:
+            continue
+        results.append(GodNode(
+            node_id=node_id,
+            label=data.get("label", node_id),
+            type=data.get("type", "unknown"),
+            in_degree=G.in_degree(node_id),
+            out_degree=G.out_degree(node_id),
+            degree=deg,
+            centrality=centrality.get(node_id, 0.0),
+            source_file=data.get("source_file", ""),
+        ))
+    results.sort(key=lambda n: n.degree, reverse=True)
+    return results[:top_n]
+def surprising_connections(
+    G: nx.DiGraph,
+    communities: dict[str, int],
+    top_n: int = 20,
+) -> list[SurprisingConnection]:
+    """Find cross-community edges that may indicate unexpected coupling.
+    Expected cross-service relations (calls_api, shares_db_entity) are excluded
+    because they are intentional architectural connections.
+    Args:
+        G: the knowledge graph
+        communities: node_id → community_id mapping from cluster.py
+        top_n: return at most this many connections
+    Returns:
+        List of SurprisingConnection sorted by relation type (most surprising first).
+    """
+    # Relations that are expected to cross communities
+    expected_relations = frozenset({"calls_api", "shares_db_entity"})
+    # Priority: lower = more surprising
+    priority = {"injects": 0, "calls": 1, "imports": 2, "imports_from": 3}
+    results: list[SurprisingConnection] = []
+    for src, tgt, edge_data in G.edges(data=True):
+        relation = edge_data.get("relation", "")
+        if relation in expected_relations:
+            continue
+        src_community = communities.get(src, -1)
+        tgt_community = communities.get(tgt, -1)
+        if src_community < 0 or tgt_community < 0:
+            continue
+        if src_community == tgt_community:
+            continue
+        src_data = G.nodes.get(src, {})
+        tgt_data = G.nodes.get(tgt, {})
+        results.append(SurprisingConnection(
+            source_id=src,
+            source_label=src_data.get("label", src),
+            target_id=tgt,
+            target_label=tgt_data.get("label", tgt),
+            relation=relation,
+            src_community=src_community,
+            tgt_community=tgt_community,
+            source_file=edge_data.get("source_file", ""),
+        ))
+    results.sort(key=lambda c: (priority.get(c.relation, 99), c.source_label))
+    return results[:top_n]
+def hub_files(
+    G: nx.DiGraph,
+    top_n: int = 20,
+) -> list[HubFile]:
+    """Find source files imported by many other files.
+    Args:
+        G: the knowledge graph
+        top_n: return at most this many files
+    Returns:
+        List of HubFile sorted by import_count descending.
+    """
+    file_imports: dict[str, int] = {}
+    file_nodes: dict[str, int] = {}
+    for _node_id, data in G.nodes(data=True):
+        sf = data.get("source_file", "")
+        if sf:
+            file_nodes[sf] = file_nodes.get(sf, 0) + 1
+    for _src, _tgt, edge_data in G.edges(data=True):
+        if edge_data.get("relation") not in ("imports", "imports_from"):
+            continue
+        sf = edge_data.get("source_file", "")
+        if sf:
+            file_imports[sf] = file_imports.get(sf, 0) + 1
+    results = [
+        HubFile(
+            file_path=fp,
+            import_count=cnt,
+            node_count=file_nodes.get(fp, 0),
+        )
+        for fp, cnt in file_imports.items()
+    ]
+    results.sort(key=lambda h: h.import_count, reverse=True)
+    return results[:top_n]
+def analyze(
+    G: nx.DiGraph,
+    communities: Optional[dict[str, int]] = None,
+    cohesion_scores: Optional[dict[int, float]] = None,
+) -> GraphReport:
+    """Run all analyses and return a unified GraphReport.
+    Args:
+        G: built knowledge graph (output of build.py + optional enrich.py)
+        communities: optional community mapping from cluster.py
+        cohesion_scores: optional per-community cohesion scores from cluster.score_all()
+    """
+    report = GraphReport(
+        node_count=G.number_of_nodes(),
+        edge_count=G.number_of_edges(),
+        community_count=len(set(communities.values())) if communities else 0,
+        cohesion_scores=cohesion_scores or {},
+        density=nx.density(G),
+        isolated_nodes=sum(1 for n in G.nodes() if G.degree(n) == 0),
+    )
+    report.god_nodes = god_nodes(G)
+    report.hub_files = hub_files(G)
+    if communities:
+        report.surprising_connections = surprising_connections(G, communities)
+    return report
+def report_to_markdown(report: GraphReport) -> str:
+    """Render a GraphReport as a Markdown string."""
+    lines = [
+        "# CodeBeacon Graph Report",
+        "",
+        "## Statistics",
+        f"- Nodes: {report.node_count}",
+        f"- Edges: {report.edge_count}",
+        f"- Communities: {report.community_count}",
+        f"- Graph density: {report.density:.4f}",
+        f"- Isolated nodes: {report.isolated_nodes}",
+        "",
+    ]
+    if report.god_nodes:
+        lines += ["## God Nodes (High Coupling)", ""]
+        lines.append(f"{'Node':<40} {'Type':<12} {'Degree':>6} {'Centrality':>10}")
+        lines.append("-" * 72)
+        for gn in report.god_nodes[:10]:
+            lines.append(
+                f"{gn.label:<40} {gn.type:<12} {gn.degree:>6} {gn.centrality:>10.4f}"
+            )
+        lines.append("")
+    if report.surprising_connections:
+        lines += ["## Surprising Connections (Cross-Community Coupling)", ""]
+        for sc in report.surprising_connections[:10]:
+            lines.append(
+                f"- [{sc.relation}] {sc.source_label} (C{sc.src_community})"
+                f" → {sc.target_label} (C{sc.tgt_community})"
+            )
+        lines.append("")
+    if report.hub_files:
+        lines += ["## Hub Files (Most Imported)", ""]
+        for hf in report.hub_files[:10]:
+            lines.append(f"- {hf.file_path} ({hf.import_count} imports)")
+        lines.append("")
+    if report.cohesion_scores:
+        lines += ["## Community Cohesion Scores", ""]
+        for cid, score in sorted(report.cohesion_scores.items()):
+            lines.append(f"- Community {cid}: {score:.3f}")
+        lines.append("")
+    return "\n".join(lines)

codebeacon/graph/build.py ADDED Viewed

@@ -0,0 +1,320 @@
+"""Graph build: merge WaveResults → symbol resolve → filter → NetworkX DiGraph.
+This is Pass 2 of the two-pass extraction pipeline.
+Input:  list[WaveResult] from wave.auto_wave()
+Output: networkx.DiGraph with annotated node and edge attributes
+Pipeline:
+  1. Convert WaveResult data → Node / Edge objects
+  2. Build SymbolTable from all nodes across all projects
+  3. Resolve UnresolvedRefs → Edges (interface→impl, direct class match)
+  4. Apply filters: build artifacts, cross-language imports, cross-service false edges
+  5. Construct NetworkX DiGraph (node attrs as flat key=value, not nested dicts)
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+import networkx as nx
+from codebeacon.common.types import Edge, Node, UnresolvedRef
+from codebeacon.common.symbols import SymbolTable
+from codebeacon.common.filters import (
+    filter_build_artifacts,
+    filter_cross_language,
+    filter_cross_service,
+)
+from codebeacon.wave import WaveResult
+def build_graph(
+    wave_results: list[WaveResult],
+    apply_filters: bool = True,
+) -> nx.DiGraph:
+    """Build a NetworkX DiGraph from one or more WaveResults.
+    Args:
+        wave_results: list of WaveResult objects (one per project)
+        apply_filters: whether to run build-artifact, cross-language,
+                       and cross-service filters (default: True)
+    Returns:
+        Annotated nx.DiGraph ready for enrichment, clustering, and analysis.
+    """
+    all_nodes: list[Node] = []
+    all_edges: list[Edge] = []
+    all_unresolved: list[UnresolvedRef] = []
+    # node_id → project name, used by cross-service filter
+    service_roots: dict[str, str] = {}
+    for wave in wave_results:
+        project_name = wave.project.name
+        _ingest_wave(wave, project_name, all_nodes, all_edges, all_unresolved, service_roots)
+    # Remap import edges: file_path → raw_import  ➜  node_id → node_id
+    all_edges = _remap_import_edges(all_nodes, all_edges)
+    # Pass 2: resolve DI references
+    symbol_table = SymbolTable()
+    symbol_table.build(all_nodes)
+    resolved_edges, _ = symbol_table.resolve_all(all_unresolved)
+    all_edges.extend(resolved_edges)
+    # Filter pass
+    if apply_filters:
+        all_nodes, all_edges = filter_build_artifacts(all_nodes, all_edges)
+        node_dict = {n.id: n for n in all_nodes}
+        all_edges = filter_cross_language(all_edges, node_dict)
+        all_edges = filter_cross_service(all_edges, node_dict, service_roots)
+    else:
+        node_dict = {n.id: n for n in all_nodes}
+    # Construct NetworkX DiGraph
+    return _build_nx_graph(all_nodes, all_edges, node_dict)
+# ── Wave ingestion ────────────────────────────────────────────────────────────
+def _ingest_wave(
+    wave: WaveResult,
+    project_name: str,
+    all_nodes: list[Node],
+    all_edges: list[Edge],
+    all_unresolved: list[UnresolvedRef],
+    service_roots: dict[str, str],
+) -> None:
+    """Convert one WaveResult's extraction data into Node/Edge/UnresolvedRef objects."""
+    # Routes → route nodes
+    for route in wave.routes:
+        node_id = f"{project_name}::{route.handler}::route::{route.method}::{route.path}"
+        node = Node(
+            id=node_id,
+            label=f"{route.handler} [{route.method} {route.path}]",
+            type="route",
+            source_file=route.source_file,
+            line=route.line,
+            metadata={
+                "method": route.method,
+                "path": route.path,
+                "prefix": route.prefix,
+                "framework": route.framework,
+                "tags": route.tags,
+                "project": project_name,
+            },
+        )
+        all_nodes.append(node)
+        service_roots[node_id] = project_name
+    # Services → class nodes + unresolved DI refs
+    for svc in wave.services:
+        node_id = f"{project_name}::{svc.class_name}"
+        node = Node(
+            id=node_id,
+            label=svc.class_name,
+            type="class",
+            source_file=svc.source_file,
+            line=svc.line,
+            metadata={
+                "methods": svc.methods,
+                "dependencies": svc.dependencies,
+                "annotations": svc.annotations,
+                "framework": svc.framework,
+                "project": project_name,
+            },
+        )
+        all_nodes.append(node)
+        service_roots[node_id] = project_name
+        # Each declared dependency becomes an UnresolvedRef
+        for dep_name in svc.dependencies:
+            all_unresolved.append(UnresolvedRef(
+                source_node_id=node_id,
+                ref_type="depends",
+                ref_name=dep_name,
+                framework=svc.framework,
+            ))
+    # Entities → entity nodes
+    for ent in wave.entities:
+        node_id = f"{project_name}::{ent.name}"
+        node = Node(
+            id=node_id,
+            label=ent.name,
+            type="entity",
+            source_file=ent.source_file,
+            line=ent.line,
+            metadata={
+                "table_name": ent.table_name,
+                "fields": ent.fields,
+                "relations": ent.relations,
+                "framework": ent.framework,
+                "project": project_name,
+            },
+        )
+        all_nodes.append(node)
+        service_roots[node_id] = project_name
+    # Components → component nodes
+    for comp in wave.components:
+        node_id = f"{project_name}::{comp.name}"
+        node = Node(
+            id=node_id,
+            label=comp.name,
+            type="component",
+            source_file=comp.source_file,
+            line=comp.line,
+            metadata={
+                "props": comp.props,
+                "hooks": comp.hooks,
+                "is_page": comp.is_page,
+                "route_path": comp.route_path,
+                "framework": comp.framework,
+                "project": project_name,
+            },
+        )
+        all_nodes.append(node)
+        service_roots[node_id] = project_name
+    # Import edges from Pass 1
+    all_edges.extend(wave.import_edges)
+    # Remaining unresolved refs from Pass 1 (e.g. @Autowired)
+    all_unresolved.extend(wave.unresolved)
+# ── Import edge remapping ────────────────────────────────────────────────────
+def _remap_import_edges(all_nodes: list[Node], all_edges: list[Edge]) -> list[Edge]:
+    """Remap import edges from file_path → raw_import to node_id → node_id.
+    dependencies.py emits Edge(source=file_path, target=raw_import_string).
+    Graph nodes use IDs like "project::ClassName".  This function bridges the
+    two by building reverse maps and resolving both sides.
+    """
+    # source_file → [node_id, ...]
+    file_to_nodes: dict[str, list[str]] = {}
+    # label (class/component name) → [node_id, ...]
+    label_to_nodes: dict[str, list[str]] = {}
+    for node in all_nodes:
+        file_to_nodes.setdefault(node.source_file, []).append(node.id)
+        label_to_nodes.setdefault(node.label, []).append(node.id)
+    remapped: list[Edge] = []
+    non_import: list[Edge] = []
+    for edge in all_edges:
+        if edge.relation != "imports_from":
+            non_import.append(edge)
+            continue
+        # Resolve source: file_path → node_ids in that file
+        source_ids = file_to_nodes.get(edge.source, [])
+        if not source_ids:
+            continue
+        # Resolve target: raw import string → node_id via label matching
+        target_label = _import_to_label(edge.target)
+        target_ids = label_to_nodes.get(target_label, [])
+        if not target_ids:
+            continue
+        for src_id in source_ids:
+            src_project = src_id.split("::")[0] if "::" in src_id else ""
+            # Prefer same-project target
+            target_id = target_ids[0]
+            for tid in target_ids:
+                if tid.startswith(src_project + "::"):
+                    target_id = tid
+                    break
+            if src_id != target_id:
+                remapped.append(Edge(
+                    source=src_id,
+                    target=target_id,
+                    relation=edge.relation,
+                    confidence=edge.confidence,
+                    confidence_score=edge.confidence_score,
+                    source_file=edge.source_file,
+                ))
+    return non_import + remapped
+def _import_to_label(raw_import: str) -> str:
+    """Extract a class/component name from a raw import string.
+    Examples:
+        "@/components/Button"           → "Button"
+        "com.example.service.UserSvc"   → "UserSvc"
+        "../auth/AuthService"           → "AuthService"
+        "./UserPage"                    → "UserPage"
+    """
+    # Java-style package: no slashes, dots as separators
+    if "." in raw_import and "/" not in raw_import:
+        return raw_import.rsplit(".", 1)[-1]
+    # Path-style: take last segment
+    name = raw_import.rsplit("/", 1)[-1]
+    # Strip file extension
+    if "." in name:
+        name = name.rsplit(".", 1)[0]
+    return name
+# ── NetworkX construction ─────────────────────────────────────────────────────
+def _build_nx_graph(
+    nodes: list[Node],
+    edges: list[Edge],
+    node_dict: dict[str, Node],
+) -> nx.DiGraph:
+    G = nx.DiGraph()
+    for node in nodes:
+        attrs = _node_attrs(node)
+        G.add_node(node.id, **attrs)
+    for edge in edges:
+        if edge.source not in G:
+            continue
+        if edge.target not in G:
+            # Add external stub for unresolved targets
+            G.add_node(
+                edge.target,
+                label=edge.target,
+                type="external",
+                source_file="",
+                line=0,
+                project="",
+            )
+        G.add_edge(
+            edge.source,
+            edge.target,
+            relation=edge.relation,
+            confidence=edge.confidence,
+            confidence_score=edge.confidence_score,
+            source_file=edge.source_file,
+        )
+    return G
+def _node_attrs(node: Node) -> dict[str, Any]:
+    """Flatten a Node into NetworkX attribute dict (no nested dicts)."""
+    attrs: dict[str, Any] = {
+        "label": node.label,
+        "type": node.type,
+        "source_file": node.source_file,
+        "line": node.line,
+    }
+    # Flatten metadata as top-level keys
+    for k, v in (node.metadata or {}).items():
+        # Stringify lists/dicts for simple serialisation
+        if isinstance(v, (list, dict)):
+            attrs[k] = v  # NetworkX handles these fine in memory
+        else:
+            attrs[k] = v
+    return attrs