PyPI - polycodegraph - Versions diffs - 0.1.0__py3-none-any.whl - Mend

polycodegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

codegraph/__init__.py +10 -0
codegraph/analysis/__init__.py +30 -0
codegraph/analysis/_common.py +125 -0
codegraph/analysis/blast_radius.py +63 -0
codegraph/analysis/cycles.py +79 -0
codegraph/analysis/dataflow.py +861 -0
codegraph/analysis/dead_code.py +165 -0
codegraph/analysis/hotspots.py +68 -0
codegraph/analysis/infrastructure.py +439 -0
codegraph/analysis/metrics.py +52 -0
codegraph/analysis/report.py +222 -0
codegraph/analysis/roles.py +323 -0
codegraph/analysis/untested.py +79 -0
codegraph/cli.py +1506 -0
codegraph/config.py +64 -0
codegraph/embed/__init__.py +35 -0
codegraph/embed/chunker.py +120 -0
codegraph/embed/embedder.py +113 -0
codegraph/embed/query.py +181 -0
codegraph/embed/store.py +360 -0
codegraph/graph/__init__.py +0 -0
codegraph/graph/builder.py +212 -0
codegraph/graph/schema.py +69 -0
codegraph/graph/store_networkx.py +55 -0
codegraph/graph/store_sqlite.py +249 -0
codegraph/mcp_server/__init__.py +6 -0
codegraph/mcp_server/server.py +933 -0
codegraph/parsers/__init__.py +0 -0
codegraph/parsers/base.py +70 -0
codegraph/parsers/go.py +570 -0
codegraph/parsers/python.py +1707 -0
codegraph/parsers/typescript.py +1397 -0
codegraph/py.typed +0 -0
codegraph/resolve/__init__.py +4 -0
codegraph/resolve/calls.py +480 -0
codegraph/review/__init__.py +31 -0
codegraph/review/baseline.py +32 -0
codegraph/review/differ.py +211 -0
codegraph/review/hook.py +70 -0
codegraph/review/risk.py +219 -0
codegraph/review/rules.py +342 -0
codegraph/viz/__init__.py +17 -0
codegraph/viz/_style.py +45 -0
codegraph/viz/dashboard.py +740 -0
codegraph/viz/diagrams.py +370 -0
codegraph/viz/explore.py +453 -0
codegraph/viz/hld.py +683 -0
codegraph/viz/html.py +115 -0
codegraph/viz/mermaid.py +111 -0
codegraph/viz/svg.py +77 -0
codegraph/web/__init__.py +4 -0
codegraph/web/server.py +165 -0
codegraph/web/static/app.css +664 -0
codegraph/web/static/app.js +919 -0
codegraph/web/static/index.html +112 -0
codegraph/web/static/views/architecture.js +1671 -0
codegraph/web/static/views/graph3d.css +564 -0
codegraph/web/static/views/graph3d.js +999 -0
codegraph/web/static/views/graph3d_transform.js +984 -0
codegraph/workspace/__init__.py +34 -0
codegraph/workspace/config.py +110 -0
codegraph/workspace/operations.py +294 -0
polycodegraph-0.1.0.dist-info/METADATA +687 -0
polycodegraph-0.1.0.dist-info/RECORD +67 -0
polycodegraph-0.1.0.dist-info/WHEEL +4 -0
polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0

codegraph/analysis/dataflow.py ADDED Viewed

@@ -0,0 +1,861 @@
+"""End-to-end data-flow tracing across the structural / behavioural / dataflow
+graph layers.
+This module exposes two complementary functions:
+* :func:`match_route` — given a frontend ``FETCH_CALL`` URL + method, find the
+  qualname of the backend handler whose ``ROUTE`` edge matches.
+* :func:`trace` — given an entry symbol (function qualname or ``url:METHOD path``
+  shape), walk the call graph + cross-layer edges to produce an ordered
+  :class:`DataFlow`. Implemented by DF4.
+The dataclasses are stable contract — never modify the shapes here without
+coordination.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from typing import Any
+import networkx as nx
+from codegraph.graph.schema import EdgeKind, NodeKind
+@dataclass
+class FlowHop:
+    """One step in a traced data-flow.
+    ``layer`` distinguishes ``frontend`` / ``backend`` / ``db`` so consumers
+    (CLI, MCP, dashboard) can render lanes. ``confidence`` is the per-hop match
+    quality — 1.0 for direct call-graph edges, lower for fuzzy URL matches.
+    """
+    layer: str  # "frontend" | "backend" | "db"
+    qualname: str
+    file: str = ""
+    line: int = 0
+    method: str | None = None  # HTTP verb, when applicable
+    path: str | None = None  # URL path, when applicable
+    args: list[str] = field(default_factory=list)
+    kwargs: dict[str, str] = field(default_factory=dict)
+    role: str | None = None  # HANDLER / SERVICE / COMPONENT / REPO if known
+    confidence: float = 1.0
+    # pragma: codegraph-public-api
+    def to_dict(self) -> dict[str, Any]:
+        out: dict[str, Any] = {
+            "layer": self.layer,
+            "qualname": self.qualname,
+            "file": self.file,
+            "line": self.line,
+            "args": list(self.args),
+            "kwargs": dict(self.kwargs),
+            "confidence": self.confidence,
+        }
+        if self.method is not None:
+            out["method"] = self.method
+        if self.path is not None:
+            out["path"] = self.path
+        if self.role is not None:
+            out["role"] = self.role
+        return out
+@dataclass
+class DataFlow:
+    """Ordered sequence of :class:`FlowHop` objects describing one trace.
+    ``confidence`` is the minimum across hops — the chain is only as strong as
+    its weakest match.
+    """
+    entry: str
+    hops: list[FlowHop] = field(default_factory=list)
+    confidence: float = 1.0
+    # pragma: codegraph-public-api
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "entry": self.entry,
+            "hops": [h.to_dict() for h in self.hops],
+            "confidence": self.confidence,
+        }
+_PLACEHOLDER_RE = re.compile(r"^(\{[^}]*\}|\$\{[^}]*\}|:[A-Za-z_][A-Za-z0-9_]*|-?\d+)$")
+def _strip_query_fragment(path: str) -> str:
+    """Drop ``?query`` and ``#fragment``; collapse trailing slash."""
+    for sep in ("?", "#"):
+        if sep in path:
+            path = path.split(sep, 1)[0]
+    if len(path) > 1 and path.endswith("/"):
+        path = path.rstrip("/")
+    return path
+def _segments(path: str) -> list[str]:
+    """Split ``/api/users/{id}`` into ``['api', 'users', '{id}']``."""
+    return [s for s in _strip_query_fragment(path).split("/") if s]
+def _is_placeholder(seg: str) -> bool:
+    """A segment is a placeholder if it's purely numeric, or wrapped in
+    ``{...}`` / ``${...}`` / leading ``:`` (Express style)."""
+    return bool(_PLACEHOLDER_RE.match(seg))
+def _normalise_path(path: str) -> list[str]:
+    """Return the list of normalised segments where every placeholder
+    becomes the marker ``{*}`` so two paths with different placeholder
+    syntaxes compare equal segment-by-segment."""
+    return ["{*}" if _is_placeholder(s) else s for s in _segments(path)]
+def _path_specificity(segs: list[str]) -> int:
+    """How "concrete" a path is — more literal segments means more specific.
+    Used to break ties when two routes match the same fetch."""
+    return sum(1 for s in segs if s != "{*}")
+def _route_candidates(graph: nx.MultiDiGraph) -> list[tuple[str, str, str]]:
+    """Yield ``(handler_qualname, method, path)`` for every ROUTE edge.
+    ROUTE edges go from a backend handler FUNCTION/METHOD to a synthetic
+    target node with id ``route::<METHOD>::<path>``. The handler qualname
+    is the source node's qualname.
+    """
+    out: list[tuple[str, str, str]] = []
+    for src, _dst, key, edata in graph.edges(keys=True, data=True):
+        if key != EdgeKind.ROUTE.value:
+            continue
+        meta = edata.get("metadata") or {}
+        if not isinstance(meta, dict):
+            continue
+        method = str(meta.get("method") or "").upper()
+        path = str(meta.get("path") or "")
+        if not method or not path:
+            continue
+        attrs = graph.nodes.get(src) or {}
+        qn = str(attrs.get("qualname") or src)
+        out.append((qn, method, path))
+    return out
+def _handler_param_names(graph: nx.MultiDiGraph, handler_qn: str) -> list[str]:
+    """Extract parameter names for the handler function, for body-key
+    overlap scoring. Looks up the node by qualname and reads
+    ``metadata.params`` (populated by DF0)."""
+    for _nid, attrs in graph.nodes(data=True):
+        if str(attrs.get("qualname") or "") != handler_qn:
+            continue
+        kind = str(attrs.get("kind") or "")
+        if kind not in (NodeKind.FUNCTION.value, NodeKind.METHOD.value):
+            continue
+        meta = attrs.get("metadata") or {}
+        params = meta.get("params") or [] if isinstance(meta, dict) else []
+        names: list[str] = []
+        for p in params:
+            if isinstance(p, dict):
+                name = str(p.get("name") or "").lstrip("*")
+                if name and name not in ("self", "cls"):
+                    names.append(name)
+        return names
+    return []
+def match_route(
+    graph: nx.MultiDiGraph,
+    fetch_url: str,
+    fetch_method: str = "GET",
+    *,
+    body_keys: list[str] | None = None,
+) -> tuple[str, float] | None:
+    """Return ``(handler_qualname, confidence)`` for the backend ROUTE that
+    matches this frontend fetch, or ``None`` if no route matches.
+    Confidence rubric:
+      * **1.0** — exact literal-segment match, no placeholders involved
+      * **0.9** — placeholders in either side normalise to the same shape
+      * up to **+0.05** bonus if the fetch's ``body_keys`` overlap with the
+        handler's parameter names (clamped at 0.95 / 1.0 ceilings)
+      * **0.5** — only a path *prefix* matches (last-resort fuzzy)
+      * **None** — method mismatch or no overlap
+    Trailing slashes, query strings, and fragments are stripped before
+    matching. Method comparison is case-insensitive.
+    When multiple routes match at the same top confidence, the more
+    specific one (more literal segments) wins.
+    """
+    method = (fetch_method or "GET").upper()
+    fetch_segs = _normalise_path(fetch_url)
+    raw_fetch_segs = _segments(fetch_url)
+    fetch_is_literal = all(not _is_placeholder(s) for s in raw_fetch_segs)
+    best: tuple[str, float, int] | None = None  # (qn, score, specificity)
+    for handler_qn, route_method, route_path in _route_candidates(graph):
+        if route_method != method:
+            continue
+        route_segs = _normalise_path(route_path)
+        raw_route_segs = _segments(route_path)
+        route_is_literal = all(not _is_placeholder(s) for s in raw_route_segs)
+        if fetch_segs == route_segs:
+            base = 1.0 if (fetch_is_literal and route_is_literal) else 0.9
+            specificity = _path_specificity(route_segs)
+        elif (
+            len(fetch_segs) >= len(route_segs)
+            and fetch_segs[: len(route_segs)] == route_segs
+            and len(route_segs) > 0
+        ):
+            base = 0.5
+            specificity = _path_specificity(route_segs)
+        else:
+            continue
+        # Body-key bonus: any overlap with handler params nudges score up.
+        if body_keys:
+            param_names = _handler_param_names(graph, handler_qn)
+            overlap = set(body_keys) & set(param_names)
+            if overlap:
+                cap = 1.0 if base >= 1.0 else (0.95 if base >= 0.9 else 0.7)
+                base = min(cap, base + 0.05)
+        if best is None or base > best[1] or (
+            base == best[1] and specificity > best[2]
+        ):
+            best = (handler_qn, base, specificity)
+    if best is None:
+        return None
+    return (best[0], best[1])
+_FRONTEND_EXTS = (".tsx", ".jsx")
+_FETCH_ENTRY_RE = re.compile(
+    r"^\s*(?:url:\s*)?(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS|TRACE|WEBSOCKET)\s+(\S+)\s*$",
+    re.IGNORECASE,
+)
+def _resolve_entry_node(
+    graph: nx.MultiDiGraph, entry: str
+) -> str | None:
+    """Find a node id matching the given qualname (exact, case-insensitive)."""
+    target = entry.strip()
+    for nid, attrs in graph.nodes(data=True):
+        qn = str(attrs.get("qualname") or "")
+        if qn == target:
+            return str(nid)
+    # Case-insensitive fallback
+    lower = target.lower()
+    for nid, attrs in graph.nodes(data=True):
+        qn = str(attrs.get("qualname") or "")
+        if qn.lower() == lower:
+            return str(nid)
+    return None
+def _layer_for(attrs: dict[str, Any]) -> str:
+    """Pick a layer label from a node's attrs."""
+    metadata = attrs.get("metadata") or {}
+    role = ""
+    if isinstance(metadata, dict):
+        role_val = metadata.get("role")
+        role = str(role_val) if role_val else ""
+    if role == "REPO":
+        return "db"
+    if role == "COMPONENT":
+        return "frontend"
+    file_path = str(attrs.get("file") or "").lower()
+    if any(file_path.endswith(ext) for ext in _FRONTEND_EXTS):
+        return "frontend"
+    return "backend"
+def _hop_from_node(
+    graph: nx.MultiDiGraph,
+    node_id: str,
+    *,
+    args: list[str] | None = None,
+    kwargs: dict[str, str] | None = None,
+    confidence: float = 1.0,
+) -> FlowHop:
+    attrs = graph.nodes.get(node_id) or {}
+    metadata = attrs.get("metadata") or {}
+    role = None
+    if isinstance(metadata, dict):
+        role_val = metadata.get("role")
+        if role_val:
+            role = str(role_val)
+    return FlowHop(
+        layer=_layer_for(attrs),
+        qualname=str(attrs.get("qualname") or node_id),
+        file=str(attrs.get("file") or ""),
+        line=int(attrs.get("line_start") or 0),
+        args=list(args or []),
+        kwargs=dict(kwargs or {}),
+        role=role,
+        confidence=confidence,
+    )
+def _outgoing_calls(
+    graph: nx.MultiDiGraph, node_id: str
+) -> list[tuple[str, dict[str, Any]]]:
+    """Return [(target_id, edge_metadata)] for outgoing CALLS edges.
+    Resolved targets (real nodes) are returned before unresolved sentinel
+    targets (``unresolved::*``). Trace traversal picks the first non-visited
+    callee, so this ordering matters: we want real next-hops to win over
+    decorator stubs and unbound name references.
+    """
+    out: list[tuple[str, dict[str, Any]]] = []
+    for _src, dst, key, edata in graph.out_edges(node_id, keys=True, data=True):
+        if key == EdgeKind.CALLS.value:
+            meta = edata.get("metadata") or {}
+            out.append((str(dst), meta if isinstance(meta, dict) else {}))
+    def _is_unresolved(target_id: str) -> bool:
+        attrs = graph.nodes.get(target_id) or {}
+        qn = str(attrs.get("qualname") or target_id)
+        return qn.startswith("unresolved::") or target_id.startswith("unresolved::")
+    out.sort(key=lambda t: (1 if _is_unresolved(t[0]) else 0))
+    return out
+def _outgoing_data_edges(
+    graph: nx.MultiDiGraph, node_id: str
+) -> list[tuple[str, str]]:
+    """Return [(target_id, edge_kind)] for READS_FROM / WRITES_TO edges."""
+    out: list[tuple[str, str]] = []
+    for _src, dst, key in graph.out_edges(node_id, keys=True):
+        if key in (EdgeKind.READS_FROM.value, EdgeKind.WRITES_TO.value):
+            out.append((str(dst), str(key)))
+    return out
+def _outgoing_fetches(
+    graph: nx.MultiDiGraph, node_id: str
+) -> list[dict[str, Any]]:
+    """Return list of FETCH_CALL edge metadata dicts originating from this node."""
+    out: list[dict[str, Any]] = []
+    for _src, _dst, key, edata in graph.out_edges(node_id, keys=True, data=True):
+        if key == EdgeKind.FETCH_CALL.value:
+            meta = edata.get("metadata") or {}
+            if isinstance(meta, dict):
+                out.append(meta)
+    return out
+def _edge_args(meta: dict[str, Any]) -> tuple[list[str], dict[str, str]]:
+    args_raw = meta.get("args") or []
+    kwargs_raw = meta.get("kwargs") or {}
+    args = [str(a) for a in args_raw] if isinstance(args_raw, list) else []
+    kwargs: dict[str, str] = {}
+    if isinstance(kwargs_raw, dict):
+        kwargs = {str(k): str(v) for k, v in kwargs_raw.items()}
+    return args, kwargs
+def trace(
+    graph: nx.MultiDiGraph,
+    entry: str,
+    *,
+    max_depth: int = 6,
+) -> DataFlow | None:
+    """Trace a data-flow starting from ``entry``.
+    ``entry`` may be:
+      * a fully-qualified symbol name — walk forwards over CALLS edges
+      * ``"METHOD /path"`` (or ``"url:METHOD /path"``) — find a backend handler
+        via :func:`match_route` and walk from there
+    Returns ``None`` when the entry cannot be located.
+    Hop construction:
+      * each visited node becomes a :class:`FlowHop`
+      * args/kwargs come from the *incoming* CALLS edge that brought us here
+      * READS_FROM / WRITES_TO edges become trailing ``layer=db`` hops
+      * outgoing FETCH_CALL edges trigger a cross-layer match via
+        :func:`match_route`; if no match, the partial chain is returned with
+        confidence dropped accordingly
+      * cycle detection: already-visited nodes are skipped silently
+      * stop after ``max_depth`` outgoing hops
+    """
+    # ---- Resolve the starting node --------------------------------------
+    fetch_match = _FETCH_ENTRY_RE.match(entry)
+    start_node: str | None = None
+    initial_confidence = 1.0
+    initial_args: list[str] = []
+    initial_kwargs: dict[str, str] = {}
+    initial_method: str | None = None
+    initial_path: str | None = None
+    if fetch_match:
+        method = fetch_match.group(1).upper()
+        path = fetch_match.group(2)
+        result = match_route(graph, path, method)
+        if result is None:
+            return DataFlow(entry=entry, hops=[], confidence=0.0)
+        handler_qn, conf = result
+        start_node = _resolve_entry_node(graph, handler_qn)
+        initial_confidence = conf
+        initial_method = method
+        initial_path = path
+    else:
+        start_node = _resolve_entry_node(graph, entry)
+    if start_node is None:
+        return None
+    # ---- Walk the graph -------------------------------------------------
+    hops: list[FlowHop] = []
+    visited: set[str] = set()
+    first_hop = _hop_from_node(
+        graph,
+        start_node,
+        args=initial_args,
+        kwargs=initial_kwargs,
+        confidence=initial_confidence,
+    )
+    if initial_method is not None:
+        first_hop.method = initial_method
+    if initial_path is not None:
+        first_hop.path = initial_path
+    hops.append(first_hop)
+    visited.add(start_node)
+    current = start_node
+    confidences: list[float] = [initial_confidence]
+    depth = 0
+    while depth < max_depth:
+        # 1. Cross-layer fetch transition (if any)
+        fetches = _outgoing_fetches(graph, current)
+        consumed_via_fetch = False
+        for fmeta in fetches:
+            url = str(fmeta.get("url") or "")
+            method = str(fmeta.get("method") or "GET")
+            body_keys_raw = fmeta.get("body_keys") or []
+            body_keys = (
+                [str(k) for k in body_keys_raw]
+                if isinstance(body_keys_raw, list)
+                else None
+            )
+            result = match_route(graph, url, method, body_keys=body_keys)
+            if result is None:
+                continue
+            handler_qn, conf = result
+            handler_node = _resolve_entry_node(graph, handler_qn)
+            if handler_node is None or handler_node in visited:
+                continue
+            hop = _hop_from_node(
+                graph,
+                handler_node,
+                args=[],
+                kwargs={},
+                confidence=conf,
+            )
+            hop.method = method
+            hop.path = url
+            hops.append(hop)
+            visited.add(handler_node)
+            confidences.append(conf)
+            current = handler_node
+            consumed_via_fetch = True
+            break  # follow one fetch per hop
+        if consumed_via_fetch:
+            depth += 1
+            continue
+        # 2. Standard CALLS traversal — pick the first outgoing edge
+        callees = _outgoing_calls(graph, current)
+        next_step: tuple[str, list[str], dict[str, str]] | None = None
+        for dst, meta in callees:
+            if dst in visited:
+                continue
+            args, kwargs = _edge_args(meta)
+            next_step = (dst, args, kwargs)
+            break
+        if next_step is not None:
+            dst, args, kwargs = next_step
+            hop = _hop_from_node(
+                graph,
+                dst,
+                args=args,
+                kwargs=kwargs,
+                confidence=1.0,
+            )
+            hops.append(hop)
+            visited.add(dst)
+            confidences.append(1.0)
+            current = dst
+            depth += 1
+            continue
+        # 3. Terminal data edges (READS_FROM / WRITES_TO) — emit and stop
+        for dst, kind in _outgoing_data_edges(graph, current):
+            if dst in visited:
+                continue
+            db_attrs = graph.nodes.get(dst) or {}
+            db_qn = str(db_attrs.get("qualname") or dst)
+            db_hop = FlowHop(
+                layer="db",
+                qualname=db_qn,
+                file=str(db_attrs.get("file") or ""),
+                line=int(db_attrs.get("line_start") or 0),
+                role="REPO",
+                confidence=1.0,
+            )
+            db_hop.kwargs = {"op": kind}
+            hops.append(db_hop)
+            visited.add(dst)
+            confidences.append(1.0)
+        break
+    final_conf = min(confidences) if confidences else 0.0
+    return DataFlow(entry=entry, hops=hops, confidence=final_conf)
+def shape_hops_for_handler(
+    graph: nx.MultiDiGraph,
+    handler_qn: str,
+    *,
+    method: str = "",
+    path: str = "",
+    max_depth: int = 6,
+) -> dict[str, Any]:
+    """Return the per-handler ``dataflow`` payload for the HLD view.
+    Output shape (matches the v0.3 unified-trace contract)::
+        {
+          "hops": [
+            {"kind": "FETCH_CALL"|"ROUTE"|"CALL"|"READS_FROM"|"WRITES_TO",
+             "qualname": str, "file": str, "line": int,
+             "args": [str, ...], "role": str | None,
+             "body_keys": [str, ...]   # FETCH_CALL only
+            },
+            ...
+          ],
+          "confidence": float,
+        }
+    Each hop drops the per-hop ``confidence`` field; only the top-level
+    chain confidence is reported.
+    The shaping logic:
+    * walk forward from ``handler_qn`` using :func:`trace`
+    * tag the entry hop ``ROUTE`` and stamp method/path
+    * tag intermediate FlowHops ``CALL``
+    * tag terminal db-layer hops ``READS_FROM`` / ``WRITES_TO`` based on
+      the ``op`` recorded by :func:`trace`
+    * prepend any frontend ``FETCH_CALL`` callers whose ``match_route``
+      resolves to this handler — that gives the chain a real frontend
+      entry point in repos that have one
+    """
+    if not handler_qn:
+        return {"hops": [], "confidence": 0.0}
+    flow = trace(graph, handler_qn, max_depth=max_depth)
+    if flow is None or not flow.hops:
+        return {"hops": [], "confidence": 0.0}
+    hops_out: list[dict[str, Any]] = []
+    # Prepend any frontend FETCH_CALL caller(s) that resolve to this handler.
+    fetch_hops = _frontend_fetch_hops_for_handler(graph, handler_qn)
+    hops_out.extend(fetch_hops)
+    real_idx = 0
+    for hop in flow.hops:
+        # Skip unresolved sentinel nodes (e.g. decorator call sites the
+        # resolver never bound). They aren't useful in the trace UI.
+        if hop.qualname.startswith("unresolved::"):
+            continue
+        kind = _classify_hop_kind(hop, real_idx)
+        real_idx += 1
+        # For terminal db hops, FlowHop hard-codes role="REPO" — re-read
+        # the actual node metadata so unannotated CLASS nodes (e.g. plain
+        # SQLAlchemy models) surface as null per the v0.3 contract.
+        role: str | None = hop.role
+        if kind in ("READS_FROM", "WRITES_TO"):
+            role = _node_role(graph, hop.qualname)
+        entry: dict[str, Any] = {
+            "kind": kind,
+            "qualname": hop.qualname,
+            "file": hop.file,
+            "line": hop.line,
+            "args": list(hop.args),
+            "role": role,
+        }
+        hops_out.append(entry)
+    # Stamp method/path onto the ROUTE hop — find it (might not be hop 0
+    # if FETCH_CALL was prepended). Also backfill the ROUTE hop's args from
+    # the handler's own DF0 params when args is empty: trace() can't supply
+    # args at the entry hop (no incoming CALLS edge), but the handler's
+    # signature IS the contract for path / query / body params, so it's the
+    # right starting-key source for arg_flow propagation.
+    handler_params = _handler_param_names_for_arg_flow(graph, handler_qn)
+    for h in hops_out:
+        if h.get("kind") != "ROUTE":
+            continue
+        if method:
+            h["method"] = method
+        if path:
+            h["path"] = path
+        if not h.get("args") and handler_params:
+            h["args"] = list(handler_params)
+        break
+    # Per-hop arg_flow: every hop carries the same starting-key set, mapping
+    # each starting key to its locally-renamed name at this hop (or None).
+    starting_keys = _starting_keys_from_hops(hops_out)
+    for h in hops_out:
+        hop_args_raw = h.get("args") or []
+        hop_args = (
+            [str(a) for a in hop_args_raw] if isinstance(hop_args_raw, list) else []
+        )
+        h["arg_flow"] = _compute_arg_flow(starting_keys, hop_args)
+    return {"hops": hops_out, "confidence": float(flow.confidence)}
+def _node_role(graph: nx.MultiDiGraph, qualname: str) -> str | None:
+    """Read the ``metadata.role`` field off the node with this qualname."""
+    if not qualname:
+        return None
+    for _nid, attrs in graph.nodes(data=True):
+        if str(attrs.get("qualname") or "") != qualname:
+            continue
+        meta = attrs.get("metadata") or {}
+        if isinstance(meta, dict):
+            role = meta.get("role")
+            if role:
+                return str(role)
+        return None
+    return None
+def _handler_param_names_for_arg_flow(
+    graph: nx.MultiDiGraph, qualname: str
+) -> list[str]:
+    """Read handler param names from ``metadata.params`` for arg-flow seeding.
+    DF0 captures the function's signature on the node itself. The
+    :func:`trace` walker can't see this for the entry hop (no incoming
+    CALLS edge to read args off), so we read it from the node directly
+    when shaping the ROUTE hop. Skips ``self`` / ``cls``.
+    """
+    if not qualname:
+        return []
+    for _nid, attrs in graph.nodes(data=True):
+        if str(attrs.get("qualname") or "") != qualname:
+            continue
+        meta = attrs.get("metadata") or {}
+        if not isinstance(meta, dict):
+            return []
+        params = meta.get("params") or []
+        if not isinstance(params, list):
+            return []
+        out: list[str] = []
+        for p in params:
+            if not isinstance(p, dict):
+                continue
+            name = str(p.get("name") or "")
+            if not name or name in ("self", "cls"):
+                continue
+            out.append(name)
+        return out
+    return []
+_CAMEL_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
+def _normalise_arg_name(name: str) -> str:
+    """Normalise an argument name for cross-hop matching.
+    Rules (in order):
+    1. strip surrounding whitespace and quotes
+    2. lowercase
+    3. strip leading and trailing underscores
+    4. split on underscores and on camelCase boundaries
+    5. concatenate the resulting tokens
+    Examples::
+        userId       -> "userid"
+        user_id      -> "userid"
+        _user_id     -> "userid"
+        UserID       -> "userid"
+        userid       -> "userid"
+    """
+    if not name:
+        return ""
+    cleaned = name.strip().strip("'\"`")
+    if not cleaned:
+        return ""
+    cleaned = cleaned.strip("_")
+    if not cleaned:
+        return ""
+    # Split camelCase boundaries first, then split on underscores.
+    camel_split = _CAMEL_BOUNDARY_RE.sub("_", cleaned)
+    tokens = [t for t in camel_split.split("_") if t]
+    return "".join(tokens).lower()
+def _compute_arg_flow(
+    starting_keys: list[str], hop_args: list[str]
+) -> dict[str, str | None]:
+    """Map each starting key to the first hop arg with a matching normalised name.
+    Returns a dict keyed by every starting key (preserving the original key
+    spelling). Value is the local arg name at this hop, or ``None`` if no
+    match. The set of keys is identical for every hop in a chain so consumers
+    can render a stable column count.
+    """
+    out: dict[str, str | None] = {}
+    if not starting_keys:
+        return out
+    normalised_args: list[tuple[str, str]] = [
+        (a, _normalise_arg_name(a)) for a in hop_args
+    ]
+    for key in starting_keys:
+        nkey = _normalise_arg_name(key)
+        match: str | None = None
+        if nkey:
+            for original_arg, narg in normalised_args:
+                if narg and narg == nkey:
+                    match = original_arg
+                    break
+        out[key] = match
+    return out
+def _starting_keys_from_hops(hops: list[dict[str, Any]]) -> list[str]:
+    """Determine starting keys for arg-flow propagation.
+    Per the v0.3 stretch contract:
+    * If there is a FETCH_CALL hop, starting keys = its ``body_keys`` plus its
+      positional ``args`` (after stripping quotes), de-duplicated, preserving
+      first-seen order.
+    * Otherwise, starting keys = the first hop's (ROUTE) ``args``.
+    * If neither yields anything, return ``[]`` and ``arg_flow`` becomes
+      ``{}`` for every hop.
+    """
+    if not hops:
+        return []
+    fetch_hop: dict[str, Any] | None = None
+    for h in hops:
+        if h.get("kind") == "FETCH_CALL":
+            fetch_hop = h
+            break
+    raw: list[str] = []
+    if fetch_hop is not None:
+        body_keys = fetch_hop.get("body_keys") or []
+        if isinstance(body_keys, list):
+            raw.extend(str(k) for k in body_keys)
+        fetch_args = fetch_hop.get("args") or []
+        if isinstance(fetch_args, list):
+            raw.extend(str(a) for a in fetch_args)
+    else:
+        first_args = hops[0].get("args") or []
+        if isinstance(first_args, list):
+            raw.extend(str(a) for a in first_args)
+    seen: set[str] = set()
+    out: list[str] = []
+    for k in raw:
+        cleaned = k.strip().strip("'\"`")
+        if not cleaned or cleaned in seen:
+            continue
+        seen.add(cleaned)
+        out.append(cleaned)
+    return out
+def _classify_hop_kind(hop: FlowHop, index: int) -> str:
+    """Map a :class:`FlowHop` to one of the contract ``kind`` strings."""
+    if hop.layer == "db":
+        op = hop.kwargs.get("op") if hop.kwargs else ""
+        if op in (EdgeKind.WRITES_TO.value, "WRITES_TO"):
+            return "WRITES_TO"
+        return "READS_FROM"
+    if index == 0:
+        return "ROUTE"
+    return "CALL"
+def _frontend_fetch_hops_for_handler(
+    graph: nx.MultiDiGraph, handler_qn: str
+) -> list[dict[str, Any]]:
+    """Return zero or more FETCH_CALL hop dicts whose route resolves to handler_qn."""
+    out: list[dict[str, Any]] = []
+    seen: set[str] = set()
+    for src, _dst, key, edata in graph.edges(keys=True, data=True):
+        if key != EdgeKind.FETCH_CALL.value:
+            continue
+        meta = edata.get("metadata") or {}
+        if not isinstance(meta, dict):
+            continue
+        url = str(meta.get("url") or "")
+        method = str(meta.get("method") or "GET")
+        body_keys_raw = meta.get("body_keys") or []
+        body_keys = (
+            [str(k) for k in body_keys_raw]
+            if isinstance(body_keys_raw, list)
+            else []
+        )
+        result = match_route(graph, url, method, body_keys=body_keys or None)
+        if result is None or result[0] != handler_qn:
+            continue
+        src_attrs = graph.nodes.get(src) or {}
+        caller_qn = str(src_attrs.get("qualname") or "")
+        if not caller_qn or caller_qn in seen:
+            continue
+        seen.add(caller_qn)
+        role_val = None
+        node_md = src_attrs.get("metadata") or {}
+        if isinstance(node_md, dict):
+            r = node_md.get("role")
+            if r:
+                role_val = str(r)
+        out.append({
+            "kind": "FETCH_CALL",
+            "qualname": caller_qn,
+            "file": str(src_attrs.get("file") or ""),
+            "line": int(src_attrs.get("line_start") or 0),
+            "args": [],
+            "role": role_val,
+            "body_keys": body_keys,
+            "method": method,
+            "url": url,
+        })
+    out.sort(key=lambda e: (str(e["qualname"]), str(e.get("url") or "")))
+    return out
+__all__ = [
+    "DataFlow",
+    "FlowHop",
+    "match_route",
+    "shape_hops_for_handler",
+    "trace",
+]