PyPI - commiter-cli - Versions diffs - 0.3.0__py3-none-any.whl - Mend

commiter-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

commiter/__init__.py +3 -0
commiter/adapters/__init__.py +0 -0
commiter/adapters/base.py +96 -0
commiter/adapters/django_rest.py +247 -0
commiter/adapters/express.py +204 -0
commiter/adapters/fastapi.py +170 -0
commiter/adapters/flask.py +169 -0
commiter/adapters/nextjs.py +180 -0
commiter/adapters/prisma.py +76 -0
commiter/adapters/raw_sql.py +191 -0
commiter/adapters/react.py +129 -0
commiter/adapters/sqlalchemy.py +99 -0
commiter/adapters/supabase.py +68 -0
commiter/auth.py +130 -0
commiter/cli.py +667 -0
commiter/correlator.py +208 -0
commiter/extractors/__init__.py +0 -0
commiter/extractors/api_calls.py +91 -0
commiter/extractors/api_endpoints.py +354 -0
commiter/extractors/backend_files.py +33 -0
commiter/extractors/base.py +40 -0
commiter/extractors/db_operations.py +69 -0
commiter/extractors/dependencies.py +219 -0
commiter/generic_resolver.py +204 -0
commiter/handler_index.py +97 -0
commiter/lib.py +63 -0
commiter/middleware_index.py +350 -0
commiter/models.py +117 -0
commiter/parser.py +1283 -0
commiter/prefix_index.py +211 -0
commiter/report/__init__.py +0 -0
commiter/report/ai.py +120 -0
commiter/report/api_guide.py +217 -0
commiter/report/architecture.py +930 -0
commiter/report/console.py +254 -0
commiter/report/json_output.py +122 -0
commiter/report/markdown.py +163 -0
commiter/scanner.py +383 -0
commiter/type_index.py +304 -0
commiter/uploader.py +46 -0
commiter/utils/__init__.py +0 -0
commiter/utils/env_reader.py +78 -0
commiter/utils/file_classifier.py +187 -0
commiter/utils/path_helpers.py +73 -0
commiter/utils/tsconfig_resolver.py +281 -0
commiter/wrapper_index.py +288 -0
commiter_cli-0.3.0.dist-info/METADATA +14 -0
commiter_cli-0.3.0.dist-info/RECORD +96 -0
commiter_cli-0.3.0.dist-info/WHEEL +5 -0
commiter_cli-0.3.0.dist-info/entry_points.txt +2 -0
commiter_cli-0.3.0.dist-info/top_level.txt +2 -0
tests/__init__.py +0 -0
tests/fixtures/arch_backend/app.py +22 -0
tests/fixtures/arch_backend/middleware/__init__.py +0 -0
tests/fixtures/arch_backend/middleware/rate_limit.py +4 -0
tests/fixtures/arch_backend/routes/__init__.py +0 -0
tests/fixtures/arch_backend/routes/analytics.py +20 -0
tests/fixtures/arch_backend/routes/auth.py +29 -0
tests/fixtures/arch_backend/routes/projects.py +60 -0
tests/fixtures/arch_backend/routes/users.py +55 -0
tests/fixtures/arch_monorepo/apps/api/app.py +30 -0
tests/fixtures/arch_monorepo/apps/api/middleware/__init__.py +0 -0
tests/fixtures/arch_monorepo/apps/api/middleware/auth.py +17 -0
tests/fixtures/arch_monorepo/apps/api/middleware/rate_limit.py +10 -0
tests/fixtures/arch_monorepo/apps/api/routes/__init__.py +0 -0
tests/fixtures/arch_monorepo/apps/api/routes/auth.py +46 -0
tests/fixtures/arch_monorepo/apps/api/routes/invites.py +30 -0
tests/fixtures/arch_monorepo/apps/api/routes/notifications.py +25 -0
tests/fixtures/arch_monorepo/apps/api/routes/projects.py +80 -0
tests/fixtures/arch_monorepo/apps/api/routes/tasks.py +91 -0
tests/fixtures/arch_monorepo/apps/api/routes/users.py +48 -0
tests/fixtures/arch_monorepo/apps/api/services/__init__.py +0 -0
tests/fixtures/arch_monorepo/apps/api/services/email.py +11 -0
tests/fixtures/backend_b/app.py +17 -0
tests/fixtures/fastapi_app/app.py +48 -0
tests/fixtures/fastapi_crossfile/routes.py +18 -0
tests/fixtures/fastapi_crossfile/schemas.py +21 -0
tests/fixtures/flask_app/app.py +33 -0
tests/fixtures/flask_blueprint/app.py +7 -0
tests/fixtures/flask_blueprint/routes/items.py +13 -0
tests/fixtures/flask_blueprint/routes/users.py +20 -0
tests/fixtures/middleware_test_flask/routes/public.py +8 -0
tests/fixtures/middleware_test_flask/routes/users.py +26 -0
tests/fixtures/python_deep_imports/app/__init__.py +0 -0
tests/fixtures/python_deep_imports/app/api/__init__.py +0 -0
tests/fixtures/python_deep_imports/app/api/health.py +11 -0
tests/fixtures/python_deep_imports/app/api/v1/__init__.py +0 -0
tests/fixtures/python_deep_imports/app/api/v1/items.py +18 -0
tests/fixtures/python_deep_imports/app/api/v1/users.py +27 -0
tests/fixtures/python_deep_imports/app/schemas/__init__.py +0 -0
tests/fixtures/python_deep_imports/app/schemas/item.py +13 -0
tests/fixtures/python_deep_imports/app/schemas/user.py +15 -0
tests/fixtures/python_deep_imports/app/shared/__init__.py +0 -0
tests/fixtures/python_deep_imports/app/shared/models.py +7 -0
tests/fixtures/raw_sql_test/app.py +54 -0
tests/test_architecture.py +757 -0

commiter/report/architecture.py ADDED Viewed

@@ -0,0 +1,930 @@
+"""Generate architecture graph JSON for the interactive frontend diagram."""
+from __future__ import annotations
+import hashlib
+import json
+import os
+import re
+import subprocess
+from pathlib import Path
+from commiter.models import (
+    APICall,
+    APIEndpoint,
+    DBOperation,
+    FileClassification,
+    FileRole,
+    RepoDocumentation,
+)
+# Avoid circular import — ScanResult is only used for type hints.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from commiter.scanner import ScanResult
+    from commiter.middleware_index import MiddlewareIndex
+    from commiter.type_index import TypeIndex
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def _get_repo_full_name(repo_root: str) -> str | None:
+    """Extract the GitHub owner/repo from the git remote URL.
+    Handles both SSH (git@github.com:owner/repo.git) and HTTPS
+    (https://github.com/owner/repo.git) formats.
+    Returns None if not a git repo or no remote configured.
+    """
+    try:
+        result = subprocess.run(
+            ["git", "-C", repo_root, "remote", "get-url", "origin"],
+            capture_output=True, text=True, timeout=5,
+        )
+        if result.returncode != 0:
+            return None
+        url = result.stdout.strip()
+        # SSH: git@github.com:owner/repo.git
+        m = re.match(r"git@[^:]+:(.+?)(?:\.git)?$", url)
+        if m:
+            return m.group(1)
+        # HTTPS: https://github.com/owner/repo.git
+        m = re.match(r"https?://[^/]+/(.+?)(?:\.git)?$", url)
+        if m:
+            return m.group(1)
+        return None
+    except Exception:
+        return None
+def generate_architecture(scan_results: list[ScanResult]) -> str:
+    """Build the full architecture payload and return it as a JSON string."""
+    all_nodes: list[dict] = []
+    all_edges: list[dict] = []
+    all_file_trees: list[dict] = []
+    all_analysis: dict[str, dict] = {}
+    multi_repo = len(scan_results) > 1
+    for sr in scan_results:
+        prefix = sr.doc.repo_name if multi_repo else ""
+        nodes = _build_nodes(sr.doc, sr.file_list, sr.doc.repo_path, prefix)
+        all_nodes.extend(nodes)
+        all_file_trees.extend(_build_file_tree(sr.file_list, sr.doc.repo_path))
+    # Edges need the full node list to resolve cross-repo references
+    all_edges = _build_edges(
+        [sr.doc for sr in scan_results],
+        all_nodes,
+        scan_results,
+    )
+    # Layout is handled by the frontend (dagre) — CLI sends x:0, y:0
+    # _compute_layout(all_nodes)
+    all_analysis = _build_node_analysis(all_nodes, scan_results, all_edges)
+    node_hashes = _compute_node_hashes(all_nodes, all_analysis)
+    # Collect git remote full names for commit history correlation
+    repo_full_names = []
+    for sr in scan_results:
+        full_name = _get_repo_full_name(sr.doc.repo_path)
+        if full_name:
+            repo_full_names.append(full_name)
+    return json.dumps(
+        {
+            "nodes": all_nodes,
+            "edges": all_edges,
+            "fileTree": all_file_trees,
+            "nodeAnalysis": all_analysis,
+            "nodeHashes": node_hashes,
+            "repoFullNames": repo_full_names,
+        },
+        indent=2,
+        default=str,
+    )
+# ---------------------------------------------------------------------------
+# Node building
+# ---------------------------------------------------------------------------
+_PAGE_PATTERNS = re.compile(
+    r"(^|/)page\.(tsx?|jsx?)$"
+    r"|/pages/(?!api/)[^/]+\.(tsx?|jsx?)$"
+)
+_PROVIDER_PATTERN = re.compile(r"provider", re.IGNORECASE)
+_API_CLIENT_DIRS = {"lib/api", "services/api", "utils/api", "lib/services"}
+_BACKEND_ROUTE_DIRS = {"routes", "controllers", "handlers", "views", "endpoints", "api"}
+_DB_CLIENT_PATTERNS = re.compile(r"supabase|prisma|database|db", re.IGNORECASE)
+_INFRA_FILES = {"Dockerfile", "docker-compose.yml", "docker-compose.yaml", ".env", "vercel.json", "fly.toml"}
+# Files that get a node on the canvas but should not be connected with edges.
+# Useful for context files (env vars, config) that the AI chat can reference
+# but don't represent meaningful architectural connections.
+ISOLATED_NODE_FILES = {".env", ".env.local", ".env.production", ".env.development"}
+def _slugify(text: str) -> str:
+    """Convert a label to a URL-safe node ID slug."""
+    s = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
+    return s or "node"
+def _label_from_path(rel_path: str) -> str:
+    """Derive a human-friendly label from a relative file path."""
+    p = Path(rel_path)
+    # For page.tsx files use the parent directory name
+    if re.match(r"^page\.", p.name) or re.match(r"^route\.", p.name):
+        return p.parent.name.replace("-", " ").replace("_", " ").title()
+    return p.stem.replace("-", " ").replace("_", " ").title()
+def _classify_node(fc: FileClassification, rel_path: str) -> tuple[str, str] | None:
+    """Return (node_type, category) for a file, or None to skip it."""
+    role = fc.role
+    # Skip non-meaningful files
+    if Path(rel_path).name == "__init__.py":
+        return None
+    if role in (FileRole.TEST, FileRole.MIGRATION):
+        return None
+    if role == FileRole.CONFIG:
+        if Path(rel_path).name in _INFRA_FILES:
+            return ("service", "infra")
+        return None
+    rp = rel_path.replace("\\", "/")
+    # Frontend pages
+    if _PAGE_PATTERNS.search(rp):
+        return ("page", "frontend")
+    # Providers
+    if _PROVIDER_PATTERN.search(rp) and role == FileRole.FRONTEND:
+        return ("provider", "frontend")
+    # Frontend components
+    if role == FileRole.FRONTEND:
+        return ("component", "frontend")
+    # API client / wrapper layer (JS/TS files in lib/api etc.)
+    for api_dir in _API_CLIENT_DIRS:
+        if api_dir in rp and fc.language in ("javascript", "typescript", "tsx"):
+            return ("api", "backend")
+    # Backend route files → will be grouped into one service node
+    if role == FileRole.BACKEND:
+        return ("service", "backend")
+    # DB client files
+    if _DB_CLIENT_PATTERNS.search(rp):
+        return ("database", "data")
+    # Files with DB operations but no other classification
+    return None
+def _build_nodes(
+    doc: RepoDocumentation,
+    file_list: list[str],
+    repo_root: str,
+    repo_prefix: str,
+) -> list[dict]:
+    """Assemble architecture nodes using smart grouping."""
+    nodes: list[dict] = []
+    used_ids: set[str] = set()
+    # Map file_path -> classification for quick lookup
+    fc_map: dict[str, FileClassification] = {
+        fc.file_path: fc for fc in doc.file_classifications
+    }
+    # Files that have endpoints are backend regardless of classifier result
+    files_with_endpoints: set[str] = {ep.file_path for ep in doc.endpoints}
+    # Collect backend route files to group later
+    backend_route_files: list[str] = []
+    # Collect DB operations to create database nodes
+    db_orms: dict[str, list[str]] = {}  # orm_library -> list of file paths
+    for abs_path in file_list:
+        rel = os.path.relpath(abs_path, repo_root).replace("\\", "/")
+        fc = fc_map.get(abs_path)
+        if fc is None:
+            continue
+        classification = _classify_node(fc, rel)
+        # Fallback: files with endpoints are always backend services
+        if classification is None and abs_path in files_with_endpoints:
+            classification = ("service", "backend")
+        if classification is None:
+            continue
+        node_type, category = classification
+        # Backend route files are grouped — collect and handle after the loop
+        if node_type == "service" and category == "backend":
+            backend_route_files.append(abs_path)
+            continue
+        label = _label_from_path(rel)
+        node_id = _make_id(node_type, label, repo_prefix, used_ids)
+        node_dict = {
+            "id": node_id,
+            "label": label,
+            "type": node_type,
+            "category": category,
+            "x": 0,
+            "y": 0,
+            "description": "",
+            "files": [rel],
+        }
+        if Path(rel).name in ISOLATED_NODE_FILES:
+            node_dict["isolated"] = True
+        nodes.append(node_dict)
+    # --- Grouped backend service node ---
+    if backend_route_files:
+        # Determine a label from the primary framework
+        fw = doc.frameworks[0] if doc.frameworks else "Backend"
+        label = f"{fw.title()} Backend"
+        node_id = _make_id("service", label, repo_prefix, used_ids)
+        nodes.append({
+            "id": node_id,
+            "label": label,
+            "type": "service",
+            "category": "backend",
+            "x": 0,
+            "y": 0,
+            "description": "",
+            "files": [
+                os.path.relpath(f, repo_root).replace("\\", "/")
+                for f in backend_route_files
+            ],
+        })
+    # --- Database nodes (one per ORM detected) ---
+    for op in doc.db_operations:
+        db_orms.setdefault(op.orm_library, []).append(op.file_path)
+    for orm, file_paths in db_orms.items():
+        label = orm.title() if orm != "raw_sql" else "Database"
+        node_id = _make_id("database", label, repo_prefix, used_ids)
+        rel_files = sorted({
+            os.path.relpath(f, repo_root).replace("\\", "/") for f in file_paths
+        })
+        nodes.append({
+            "id": node_id,
+            "label": label,
+            "type": "database",
+            "category": "data",
+            "x": 0,
+            "y": 0,
+            "description": "",
+            "files": rel_files,
+        })
+    return nodes
+def _make_id(node_type: str, label: str, repo_prefix: str, used: set[str]) -> str:
+    """Generate a unique node ID."""
+    base = f"{node_type}-{_slugify(label)}"
+    if repo_prefix:
+        base = f"{_slugify(repo_prefix)}-{base}"
+    node_id = base
+    counter = 2
+    while node_id in used:
+        node_id = f"{base}-{counter}"
+        counter += 1
+    used.add(node_id)
+    return node_id
+# ---------------------------------------------------------------------------
+# Edge building
+# ---------------------------------------------------------------------------
+def _find_node_for_file(nodes: list[dict], rel_path: str) -> dict | None:
+    """Find the node that owns a given relative file path."""
+    for node in nodes:
+        if rel_path in node["files"]:
+            return node
+    return None
+def _find_node_for_abs_file(nodes: list[dict], abs_path: str, repo_root: str) -> dict | None:
+    """Find the node that owns a given absolute file path."""
+    rel = os.path.relpath(abs_path, repo_root).replace("\\", "/")
+    return _find_node_for_file(nodes, rel)
+def _find_api_gateway(call: APICall, nodes: list[dict]) -> dict | None:
+    """If a call was traced through a wrapper in an api-type node, return that node.
+    Detects patterns like 'apiClient() in client.ts:5' from the traced_from field,
+    then finds the api-type node whose files contain that wrapper file.
+    """
+    traced = call.traced_from
+    if not traced or " in " not in traced:
+        return None
+    file_part = traced.split(" in ", 1)[1].split(":")[0]  # "client.ts"
+    for node in nodes:
+        if node["type"] != "api":
+            continue
+        for f in node["files"]:
+            if f.endswith(file_part) or f.endswith("/" + file_part):
+                return node
+    return None
+def _build_edges(
+    docs: list[RepoDocumentation],
+    nodes: list[dict],
+    scan_results: list[ScanResult],
+) -> list[dict]:
+    """Build edges from extracted relationships."""
+    edges: list[dict] = []
+    edge_counter = 0
+    seen_edges: set[tuple[str, str, str]] = set()  # (from, to, type) dedup
+    isolated_ids = {n["id"] for n in nodes if n.get("isolated")}
+    def _clean_edge_label(label: str) -> str:
+        """Strip protocol, host, and port from URL labels to keep just the path."""
+        if not label:
+            return label
+        cleaned = re.sub(r"^https?://[^/]+", "", label)
+        # Remove leading env var placeholders like :NEXT_PUBLIC_API_URL
+        cleaned = re.sub(r"^:[A-Z_]+", "", cleaned)
+        return cleaned.strip() or label
+    def _add_edge(from_id: str, to_id: str, edge_type: str, label: str = "") -> None:
+        nonlocal edge_counter
+        if from_id in isolated_ids or to_id in isolated_ids:
+            return
+        key = (from_id, to_id, edge_type)
+        if key in seen_edges or from_id == to_id:
+            return
+        seen_edges.add(key)
+        edge_counter += 1
+        label = _clean_edge_label(label)
+        edges.append({
+            "id": f"e-{edge_counter}",
+            "from": from_id,
+            "to": to_id,
+            "type": edge_type,
+            **({"label": label} if label else {}),
+        })
+    # Build a lookup: endpoint route -> node_id (for correlating calls)
+    ep_route_to_node: dict[str, str] = {}
+    for sr in scan_results:
+        doc = sr.doc
+        for ep in doc.endpoints:
+            rel = os.path.relpath(ep.file_path, doc.repo_path).replace("\\", "/")
+            node = _find_node_for_file(nodes, rel)
+            if node:
+                key = f"{ep.http_method.upper()} {ep.route_pattern}"
+                ep_route_to_node[key] = node["id"]
+    # --- API edges: frontend call → backend endpoint ---
+    for sr in scan_results:
+        doc = sr.doc
+        for call in doc.api_calls:
+            rel = os.path.relpath(call.file_path, doc.repo_path).replace("\\", "/")
+            source_node = _find_node_for_file(nodes, rel)
+            if not source_node:
+                continue
+            # Try to find matching endpoint via service_relationships
+            target_node_id = None
+            for srel in doc.service_relationships:
+                if srel.source_file.startswith(call.file_path) and srel.connection_type == "api_call":
+                    target_key = srel.target_endpoint
+                    target_node_id = ep_route_to_node.get(target_key)
+                    break
+            # Fallback: try matching by URL pattern within same repo
+            if not target_node_id:
+                for ep in doc.endpoints:
+                    if _url_matches(call.url_pattern, ep.route_pattern):
+                        ep_rel = os.path.relpath(ep.file_path, doc.repo_path).replace("\\", "/")
+                        target = _find_node_for_file(nodes, ep_rel)
+                        if target:
+                            target_node_id = target["id"]
+                            break
+            # Also try cross-repo endpoints
+            if not target_node_id:
+                for other_sr in scan_results:
+                    if other_sr is sr:
+                        continue
+                    for ep in other_sr.doc.endpoints:
+                        if _url_matches(call.url_pattern, ep.route_pattern):
+                            ep_rel = os.path.relpath(ep.file_path, other_sr.doc.repo_path).replace("\\", "/")
+                            target = _find_node_for_file(nodes, ep_rel)
+                            if target:
+                                target_node_id = target["id"]
+                                break
+                    if target_node_id:
+                        break
+            if target_node_id:
+                # Route through API gateway node if the call was traced through a wrapper
+                gateway = _find_api_gateway(call, nodes)
+                if gateway and gateway["id"] != source_node["id"]:
+                    _add_edge(source_node["id"], gateway["id"], "api", call.url_pattern)
+                    _add_edge(gateway["id"], target_node_id, "api", "")
+                else:
+                    _add_edge(source_node["id"], target_node_id, "api", call.url_pattern)
+    # --- Data edges: node with DB operations → database node ---
+    db_nodes = [n for n in nodes if n["type"] == "database"]
+    for sr in scan_results:
+        doc = sr.doc
+        for op in doc.db_operations:
+            rel = os.path.relpath(op.file_path, doc.repo_path).replace("\\", "/")
+            source_node = _find_node_for_file(nodes, rel)
+            if not source_node:
+                continue
+            # Find the database node for this ORM
+            for db_node in db_nodes:
+                if op.orm_library.lower() in db_node["label"].lower() or db_node["label"] == "Database":
+                    _add_edge(source_node["id"], db_node["id"], "data", op.table_name)
+                    break
+    # --- Auth edges: service node → auth provider (heuristic) ---
+    auth_provider = None
+    for n in nodes:
+        if n["type"] == "provider" and re.search(r"auth", n["label"], re.IGNORECASE):
+            auth_provider = n
+            break
+    if auth_provider:
+        for sr in scan_results:
+            for ep in sr.doc.endpoints:
+                if ep.auth_decorators or any(
+                    re.search(r"auth|jwt|login|protect", mw, re.IGNORECASE)
+                    for mw in ep.middleware
+                ):
+                    rel = os.path.relpath(ep.file_path, sr.doc.repo_path).replace("\\", "/")
+                    source_node = _find_node_for_file(nodes, rel)
+                    if source_node:
+                        _add_edge(source_node["id"], auth_provider["id"], "auth", "requires auth")
+    # --- Dependency / render edges: page/component imports → other nodes ---
+    for sr in scan_results:
+        doc = sr.doc
+        alias_resolver = sr.alias_resolver
+        for node in nodes:
+            for rel_file in node["files"]:
+                abs_path = os.path.join(doc.repo_path, rel_file)
+                if not os.path.isfile(abs_path):
+                    continue
+                imports = _extract_imports_simple(abs_path)
+                for imp_path in imports:
+                    target = _resolve_import_to_node(
+                        imp_path, rel_file, doc.repo_path, nodes,
+                        alias_resolver=alias_resolver, caller_abs=abs_path,
+                    )
+                    if target and target["id"] != node["id"]:
+                        edge_type = "render" if node["type"] in ("page", "component") and target["type"] in ("component", "provider") else "dependency"
+                        _add_edge(node["id"], target["id"], edge_type, "")
+    return edges
+def _url_matches(call_url: str, route_pattern: str) -> bool:
+    """Quick check if a frontend URL could match a backend route."""
+    # Normalize both to comparable segments
+    call_clean = re.sub(r"\$\{[^}]+\}", ":param", call_url)
+    call_clean = re.sub(r"^https?://[^/]+", "", call_clean).strip("/")
+    # Normalize :varName params (from template literal resolution) to :param
+    call_clean = re.sub(r":(\w+)", ":param", call_clean)
+    route_clean = re.sub(r"<(?:\w+:)?(\w+)>", r":param", route_pattern)
+    route_clean = re.sub(r"\{(\w+)\}", r":param", route_clean)
+    route_clean = re.sub(r"\[(\w+)\]", r":param", route_clean).strip("/")
+    if not call_clean or not route_clean:
+        return False
+    # Check if one ends with the other (frontend may include /api prefix)
+    return call_clean.endswith(route_clean) or route_clean.endswith(call_clean) or call_clean == route_clean
+def _extract_imports_simple(file_path: str) -> list[str]:
+    """Extract import paths from a file using simple regex (no AST needed)."""
+    try:
+        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
+            source = f.read(50_000)  # cap read size
+    except OSError:
+        return []
+    imports = []
+    # JS/TS: import ... from "path"
+    for m in re.finditer(r'''(?:import|from)\s+.*?["']([^"']+)["']''', source):
+        imports.append(m.group(1))
+    # Python: from path import ... / import path
+    for m in re.finditer(r"^(?:from\s+([\w.]+)|import\s+([\w.]+))", source, re.MULTILINE):
+        imports.append(m.group(1) or m.group(2))
+    return imports
+def _resolve_import_to_node(
+    import_path: str,
+    source_rel: str,
+    repo_root: str,
+    nodes: list[dict],
+    alias_resolver: object | None = None,
+    caller_abs: str = "",
+) -> dict | None:
+    """Try to resolve an import path to a node by matching against node file lists."""
+    # Try tsconfig alias resolution first (handles @/, ~/, etc.)
+    if alias_resolver and not import_path.startswith("."):
+        resolved_abs = alias_resolver.resolve(import_path, caller_abs)
+        if resolved_abs:
+            resolved_rel = os.path.relpath(resolved_abs, repo_root).replace("\\", "/")
+            for node in nodes:
+                for nf in node["files"]:
+                    if nf.replace("\\", "/") == resolved_rel:
+                        return node
+        return None
+    if not import_path.startswith("."):
+        # Non-relative imports without alias resolver — skip
+        return None
+    source_dir = str(Path(source_rel).parent)
+    resolved = os.path.normpath(os.path.join(source_dir, import_path)).replace("\\", "/")
+    # Try common extensions
+    candidates = [resolved]
+    for ext in (".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"):
+        candidates.append(resolved + ext)
+    for node in nodes:
+        for node_file in node["files"]:
+            nf = node_file.replace("\\", "/")
+            for candidate in candidates:
+                if nf == candidate or nf.startswith(candidate + "/"):
+                    return node
+    return None
+# ---------------------------------------------------------------------------
+# Per-node hashing (for incremental cache)
+# ---------------------------------------------------------------------------
+def _compute_node_hashes(
+    nodes: list[dict],
+    node_analysis: dict[str, dict],
+) -> dict[str, str]:
+    """Compute a SHA-256 hash per node based on its structural data.
+    The hash covers the node's identity, files, and analysis content (endpoints,
+    rules, data sources) but NOT descriptions — those are what the AI generates.
+    A changed hash means the node needs re-enrichment.
+    """
+    hashes: dict[str, str] = {}
+    for node in nodes:
+        node_data: dict = {
+            "id": node["id"],
+            "type": node["type"],
+            "category": node["category"],
+            "files": sorted(node["files"]),
+        }
+        analysis = node_analysis.get(node["id"], {})
+        node_data["endpoints"] = [
+            {"method": ep["method"], "path": ep["path"], "category": ep.get("category", "")}
+            for ep in analysis.get("endpoints", [])
+        ]
+        node_data["rules"] = [r["name"] for r in analysis.get("rules", [])]
+        node_data["dataUsed"] = [d["source"] for d in analysis.get("dataUsed", [])]
+        content = json.dumps(node_data, sort_keys=True)
+        hashes[node["id"]] = hashlib.sha256(content.encode()).hexdigest()
+    return hashes
+# ---------------------------------------------------------------------------
+# Layout
+# ---------------------------------------------------------------------------
+_LAYER_ORDER = [
+    ("frontend", "page"),
+    ("frontend", "component"),
+    ("frontend", "provider"),
+    ("backend", "api"),
+    ("backend", "service"),
+    ("infra", "service"),
+    ("data", "database"),
+]
+_LAYER_Y = {
+    0: 80,
+    1: 280,
+    2: 280,   # components + providers share a row
+    3: 480,
+    4: 680,
+    5: 680,   # infra shares row with backend services
+    6: 880,
+}
+def _compute_layout(nodes: list[dict]) -> None:
+    """Assign x/y coordinates using a simple layered layout."""
+    layers: dict[int, list[dict]] = {}
+    for node in nodes:
+        key = (node["category"], node["type"])
+        layer_idx = None
+        for i, (cat, ntype) in enumerate(_LAYER_ORDER):
+            if key == (cat, ntype):
+                layer_idx = i
+                break
+        if layer_idx is None:
+            layer_idx = 4  # default to backend service layer
+        layers.setdefault(layer_idx, []).append(node)
+    for layer_idx, layer_nodes in layers.items():
+        y = _LAYER_Y.get(layer_idx, 400)
+        count = len(layer_nodes)
+        spacing = 280 if count <= 5 else max(160, 1400 // count)
+        start_x = 120
+        for i, node in enumerate(layer_nodes):
+            node["x"] = start_x + i * spacing
+            node["y"] = y
+# ---------------------------------------------------------------------------
+# File tree
+# ---------------------------------------------------------------------------
+def _build_file_tree(file_list: list[str], repo_root: str) -> list[dict]:
+    """Convert a flat file list into a nested FileTreeNode structure."""
+    tree: dict = {}
+    for abs_path in file_list:
+        rel = os.path.relpath(abs_path, repo_root).replace("\\", "/")
+        parts = rel.split("/")
+        current = tree
+        for i, part in enumerate(parts):
+            if part not in current:
+                current[part] = {} if i < len(parts) - 1 else None
+            if current[part] is not None:
+                current = current[part]
+    return _dict_to_tree(tree, "")
+def _dict_to_tree(d: dict, prefix: str) -> list[dict]:
+    """Recursively convert a nested dict into FileTreeNode list."""
+    result = []
+    for name in sorted(d.keys()):
+        path = f"{prefix}/{name}".lstrip("/") if prefix else name
+        if d[name] is None:
+            # File
+            result.append({"name": name, "type": "file", "path": path})
+        else:
+            # Folder
+            children = _dict_to_tree(d[name], path)
+            result.append({"name": name, "type": "folder", "path": path, "children": children})
+    return result
+# ---------------------------------------------------------------------------
+# Node analysis
+# ---------------------------------------------------------------------------
+def _build_node_analysis(
+    nodes: list[dict],
+    scan_results: list[ScanResult],
+    edges: list[dict],
+) -> dict[str, dict]:
+    """Assemble NodeAnalysis data for each node."""
+    analysis: dict[str, dict] = {}
+    # Build reverse lookup: node_id -> list of edges targeting it
+    edges_by_source: dict[str, list[dict]] = {}
+    for e in edges:
+        edges_by_source.setdefault(e["from"], []).append(e)
+    # Flatten all docs and build lookup helpers
+    all_endpoints: list[tuple[RepoDocumentation, ScanResult, APIEndpoint]] = []
+    all_calls: list[tuple[RepoDocumentation, APICall]] = []
+    all_db_ops: list[tuple[RepoDocumentation, DBOperation]] = []
+    for sr in scan_results:
+        doc = sr.doc
+        for ep in doc.endpoints:
+            all_endpoints.append((doc, sr, ep))
+        for call in doc.api_calls:
+            all_calls.append((doc, call))
+        for op in doc.db_operations:
+            all_db_ops.append((doc, op))
+    # Build lookup: endpoint route_pattern -> list of frontend node labels that call it
+    ep_used_by: dict[str, list[str]] = {}
+    for doc, call in all_calls:
+        call_rel = os.path.relpath(call.file_path, doc.repo_path).replace("\\", "/")
+        caller_node = _find_node_for_file(nodes, call_rel)
+        if not caller_node:
+            continue
+        for _, _, ep in all_endpoints:
+            if _url_matches(call.url_pattern, ep.route_pattern):
+                if caller_node["label"] not in ep_used_by.get(ep.route_pattern, []):
+                    ep_used_by.setdefault(ep.route_pattern, []).append(caller_node["label"])
+    for node in nodes:
+        node_files = set(node["files"])
+        # --- dataUsed: API calls made by this node's files ---
+        data_used = []
+        seen_sources: set[str] = set()
+        for doc, call in all_calls:
+            call_rel = os.path.relpath(call.file_path, doc.repo_path).replace("\\", "/")
+            if call_rel in node_files:
+                source = f"{call.http_method} {call.url_pattern} via {call.client_library}"
+                if source in seen_sources:
+                    continue
+                seen_sources.add(source)
+                url_name = call.url_pattern.rstrip("/").rsplit("/", 1)[-1] or call.url_pattern
+                data_used.append({
+                    "name": url_name.replace("-", " ").replace("_", " ").title(),
+                    "source": source,
+                })
+        # --- endpoints: for service/api nodes ---
+        endpoints_data = None
+        if node["type"] in ("service", "api"):
+            endpoints_data = []
+            for doc, sr, ep in all_endpoints:
+                ep_rel = os.path.relpath(ep.file_path, doc.repo_path).replace("\\", "/")
+                if ep_rel in node_files:
+                    # Derive category from route path
+                    category = _category_from_route(ep.route_pattern)
+                    # Resolve request/response shapes via type index
+                    req_shape = _resolve_type_shape(ep.request_body_type, ep.request_body_fields, sr.type_index)
+                    res_shape = _resolve_type_shape(ep.response_type, ep.response_fields, sr.type_index)
+                    # DB operations for this endpoint
+                    db_ops = []
+                    for table in ep.db_tables:
+                        # Find matching operation type
+                        op_types = set()
+                        for d, op in all_db_ops:
+                            if op.table_name == table:
+                                op_types.add(op.operation_type.upper())
+                        for op_type in sorted(op_types) or ["SELECT"]:
+                            db_ops.append({
+                                "type": op_type,
+                                "table": table,
+                                "description": "",
+                            })
+                    used_by = ep_used_by.get(ep.route_pattern, [])
+                    ep_dict: dict = {
+                        "method": ep.http_method.upper(),
+                        "path": ep.route_pattern,
+                        "description": "",
+                        "category": category,
+                        "usedBy": used_by,
+                    }
+                    if req_shape:
+                        ep_dict["requestShape"] = req_shape
+                    if res_shape:
+                        ep_dict["responseShape"] = res_shape
+                    if db_ops:
+                        ep_dict["dbOperations"] = db_ops
+                    endpoints_data.append(ep_dict)
+        # --- rules: middleware/decorators for service nodes ---
+        rules_data = None
+        if node["type"] in ("service",) and endpoints_data:
+            rules_data = _build_rules(node, scan_results)
+        entry: dict = {
+            "userFacing": "",
+            "dataUsed": data_used,
+            "commits": [],
+            "issues": [],
+        }
+        if endpoints_data:
+            entry["endpoints"] = endpoints_data
+        if rules_data:
+            entry["rules"] = rules_data
+        analysis[node["id"]] = entry
+    return analysis
+def _category_from_route(route: str) -> str:
+    """Derive a category label from a route pattern's first meaningful segment."""
+    parts = [p for p in route.strip("/").split("/") if p and not re.match(r"^(v\d+|api)$", p)]
+    if parts:
+        return parts[0].replace("-", " ").replace("_", " ").title()
+    return "General"
+def _resolve_type_shape(
+    type_name: str | None,
+    flat_fields: list[str],
+    type_index: TypeIndex,
+) -> dict | None:
+    """Resolve a type name to a {field: type} dict via the type index."""
+    if type_name:
+        typedef = type_index.resolve(type_name, None, "")
+        if typedef and typedef.fields:
+            shape = {}
+            for field in typedef.fields:
+                field_type = field.type_str
+                if field.optional:
+                    field_type += "?"
+                shape[field.name] = field_type
+            return shape
+    # Fallback: use flat field list
+    if flat_fields:
+        return {f.split(":")[0].strip(): f.split(":", 1)[1].strip() if ":" in f else "unknown" for f in flat_fields}
+    return None
+def _build_rules(node: dict, scan_results: list[ScanResult]) -> list[dict]:
+    """Extract middleware/decorator rules for a service node."""
+    rules: list[dict] = []
+    seen_names: set[str] = set()
+    for sr in scan_results:
+        doc = sr.doc
+        node_files = set(node["files"])
+        # Collect unique middleware/decorators from endpoints in this node
+        for ep in doc.endpoints:
+            ep_rel = os.path.relpath(ep.file_path, doc.repo_path).replace("\\", "/")
+            if ep_rel not in node_files:
+                continue
+            # Auth decorators
+            for dec in ep.auth_decorators:
+                if dec not in seen_names:
+                    seen_names.add(dec)
+                    rules.append({
+                        "name": dec,
+                        "type": _classify_rule_type(dec),
+                        "appliedTo": [ep.route_pattern],
+                        "description": "",
+                        "implementation": f"{ep_rel}:{ep.line}",
+                    })
+                else:
+                    # Add route to existing rule
+                    for r in rules:
+                        if r["name"] == dec and ep.route_pattern not in r["appliedTo"]:
+                            r["appliedTo"].append(ep.route_pattern)
+            # Middleware
+            for mw in ep.middleware:
+                if mw not in seen_names:
+                    seen_names.add(mw)
+                    rules.append({
+                        "name": mw,
+                        "type": _classify_rule_type(mw),
+                        "appliedTo": [ep.route_pattern],
+                        "description": "",
+                        "implementation": f"{ep_rel}:{ep.line}",
+                    })
+                else:
+                    for r in rules:
+                        if r["name"] == mw and ep.route_pattern not in r["appliedTo"]:
+                            r["appliedTo"].append(ep.route_pattern)
+    return rules
+def _classify_rule_type(name: str) -> str:
+    """Classify a middleware/decorator name into a rule type."""
+    lower = name.lower()
+    if re.search(r"auth|jwt|login|protect|require_auth|token", lower):
+        return "guard"
+    if re.search(r"valid|sanitiz|schema|check", lower):
+        return "validator"
+    if name.startswith("@"):
+        return "decorator"
+    return "middleware"