PyPI - codedebrief - Versions diffs - 0.11.0__py3-none-any.whl - Mend

codedebrief 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

codedebrief/__init__.py +12 -0
codedebrief/analysis/__init__.py +16 -0
codedebrief/analysis/common.py +527 -0
codedebrief/analysis/discovery.py +100 -0
codedebrief/analysis/languages/__init__.py +6 -0
codedebrief/analysis/languages/_common.py +68 -0
codedebrief/analysis/languages/c.py +96 -0
codedebrief/analysis/languages/cpp.py +146 -0
codedebrief/analysis/languages/csharp.py +137 -0
codedebrief/analysis/languages/go.py +157 -0
codedebrief/analysis/languages/java.py +158 -0
codedebrief/analysis/languages/php.py +83 -0
codedebrief/analysis/languages/ruby.py +75 -0
codedebrief/analysis/languages/rust.py +96 -0
codedebrief/analysis/project.py +373 -0
codedebrief/analysis/python.py +939 -0
codedebrief/analysis/registry.py +320 -0
codedebrief/analysis/treesitter.py +884 -0
codedebrief/analysis/typescript.py +1019 -0
codedebrief/artifacts.py +49 -0
codedebrief/cli.py +585 -0
codedebrief/config.py +226 -0
codedebrief/doctor.py +175 -0
codedebrief/install.py +441 -0
codedebrief/mcp_server.py +2720 -0
codedebrief/model.py +189 -0
codedebrief/py.typed +1 -0
codedebrief/quality.py +392 -0
codedebrief/query.py +641 -0
codedebrief/render/__init__.py +6 -0
codedebrief/render/assets/generated/codedebrief-viewer-runtime.iife.js +10 -0
codedebrief/render/assets/panels.js +462 -0
codedebrief/render/assets/shell.js +1649 -0
codedebrief/render/assets/styles.css +1715 -0
codedebrief/render/assets/tree.js +616 -0
codedebrief/render/html.py +191 -0
codedebrief/render/markdown.py +153 -0
codedebrief/render/payload.py +326 -0
codedebrief/render/snapshot.py +769 -0
codedebrief/schema/codedebrief.schema.json +449 -0
codedebrief/util.py +65 -0
codedebrief/validation.py +214 -0
codedebrief-0.11.0.dist-info/METADATA +426 -0
codedebrief-0.11.0.dist-info/RECORD +48 -0
codedebrief-0.11.0.dist-info/WHEEL +4 -0
codedebrief-0.11.0.dist-info/entry_points.txt +2 -0
codedebrief-0.11.0.dist-info/licenses/LICENSE +176 -0
codedebrief-0.11.0.dist-info/licenses/NOTICE +9 -0

codedebrief/render/html.py ADDED Viewed

@@ -0,0 +1,191 @@
+from __future__ import annotations
+import json
+import re
+from pathlib import Path
+from codedebrief.model import ProjectModel
+from codedebrief.render.payload import build_payload
+# A literal ``</script`` anywhere inside an inlined ``<script>`` body terminates the script
+# element early in the HTML parser, corrupting the page (it does not matter that it sits in
+# a JS string or comment). Neutralize ONLY that exact sequence -- ``<\/script`` is identical
+# to ``</script`` inside any JS string/regex/comment, so this is behavior-preserving, and it
+# leaves other ``</...`` markup (e.g. ``</filter>`` inside an innerHTML template) untouched.
+_SCRIPT_CLOSE = re.compile(r"</(script)", re.IGNORECASE)
+def _asset(name: str) -> str:
+    return (Path(__file__).parent / "assets" / name).read_text(encoding="utf-8")
+def _inline_js(name: str) -> str:
+    return _SCRIPT_CLOSE.sub(r"<\\/\1", _asset(name))
+def _optional_inline_js(name: str) -> str:
+    path = Path(__file__).parent / "assets" / name
+    if not path.exists():
+        return ""
+    return _SCRIPT_CLOSE.sub(r"<\\/\1", path.read_text(encoding="utf-8"))
+def render_html(model: ProjectModel, source_root: Path | None = None) -> str:
+    payload_data = build_payload(model, source_root)
+    payload = json.dumps(payload_data, ensure_ascii=False).replace("</", "<\\/")
+    css = _asset("styles.css")
+    js = _inline_js("shell.js")
+    tree_js = _inline_js("tree.js")
+    panels_js = _inline_js("panels.js")
+    viewer_runtime_js = _optional_inline_js("generated/codedebrief-viewer-runtime.iife.js")
+    return (
+        _HTML_TEMPLATE.replace("__STYLES__", css)
+        .replace("__SHELL_JS__", js)
+        .replace("__TREE_JS__", tree_js)
+        .replace("__PANELS_JS__", panels_js)
+        .replace("__VIEWER_RUNTIME_JS__", viewer_runtime_js)
+        .replace("__CODEDEBRIEF_DATA__", payload)
+    )
+_HTML_TEMPLATE = r"""<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>CodeDebrief</title>
+  <link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 26 40'%3E%3Ccircle cx='13' cy='7.5' r='5.5' fill='%232f63ef'/%3E%3Cline x1='13' y1='17.5' x2='13' y2='21' stroke='%237458dc' stroke-width='3' stroke-linecap='round'/%3E%3Cpolygon points='13,25 19.5,31 13,37 6.5,31' fill='%23df9a12'/%3E%3C/svg%3E">
+  <style>__STYLES__</style>
+</head>
+<body>
+  <div class="shell">
+    <header>
+      <div class="brand">
+        <div class="brand-mark" aria-hidden="true">
+          <svg viewBox="0 0 26 40" xmlns="http://www.w3.org/2000/svg">
+            <circle class="logo-node" cx="13" cy="7.5" r="5.5"></circle>
+            <line class="logo-link" x1="13" y1="17.5" x2="13" y2="21"></line>
+            <polygon class="logo-decision" points="13,25 19.5,31 13,37 6.5,31"></polygon>
+          </svg>
+        </div>
+        <div><h1>CodeDebrief</h1></div>
+      </div>
+      <div class="flow-heading">
+        <div class="eyebrow" id="flowKind">No flow selected</div>
+        <h2 id="flowTitle">Analyze a project to begin</h2>
+      </div>
+      <div class="metrics">
+        <div class="metric"><strong id="flowCount">0</strong> <span>flows</span></div>
+        <div class="metric"><strong id="entryCount">0</strong> <span>entries</span></div>
+      </div>
+    </header>
+    <aside class="left-rail" id="leftRail">
+      <div class="rail-inner">
+        <div class="rail-head codebase-head">
+          <div class="rail-head-row">
+            <h2 class="rail-title">Codebase</h2>
+          </div>
+          <input class="filter" id="globalSearch" type="search" placeholder="Find path, symbol, or flow" aria-label="Find path, symbol, or flow">
+          <select class="filter compact-filter" id="langFilter" aria-label="Filter by language" style="display:none"></select>
+        </div>
+        <div class="tree" id="tree" role="tree" aria-label="Directory tree"></div>
+        <div class="legend">
+          <span>Action</span><span class="decision">Decision</span>
+          <span class="call">Subflow</span><span class="outcome">Outcome</span>
+        </div>
+      </div>
+      <div class="rail-resizer rail-resizer-left" id="leftRailResizer" role="separator" tabindex="0" aria-label="Resize codebase sidebar" aria-orientation="vertical" aria-valuemin="240" aria-valuemax="560" aria-valuenow="312" title="Resize codebase sidebar"></div>
+    </aside>
+    <main>
+      <nav id="breadcrumb" class="breadcrumb" aria-label="Canvas level"></nav>
+      <div class="canvas-toolbar" aria-label="Canvas controls">
+        <div class="tool-group" aria-label="Panels">
+          <button class="tool" id="menuButton" title="Toggle codebase tree" aria-label="Toggle codebase tree">&#9776;</button>
+          <button class="tool detail-tool" id="detailButton" title="Show source and details" aria-label="Toggle source and details" aria-pressed="false">i</button>
+        </div>
+        <div class="tool-group" aria-label="Graph viewport">
+          <button class="tool reset-tool command-tool" id="resetView" title="Collapse all expanded sections and return to the codebase root" aria-label="Collapse all expanded sections and return to the codebase root">RESET</button>
+          <button class="tool expand-tool command-tool" id="expandView" title="Expand all scopes and flows in the current graph" aria-label="Expand all scopes and flows in the current graph">EXPAND</button>
+          <button class="tool" id="fitView" title="Fit current flowchart" aria-label="Fit current flowchart">&#8982;</button>
+          <button class="tool" id="zoomOut" title="Zoom out" aria-label="Zoom out">&minus;</button>
+          <button class="tool" id="zoomIn" title="Zoom in" aria-label="Zoom in">+</button>
+        </div>
+        <div class="tool-group" aria-label="Output">
+          <button class="tool export-tool" id="exportPng" title="Export current flowchart as PNG" aria-label="Export current flowchart as PNG">PNG</button>
+          <button class="tool export-tool" id="exportJpg" title="Export current flowchart as JPG" aria-label="Export current flowchart as JPG">JPG</button>
+          <button class="tool" id="fullscreenToggle" data-action="fullscreen" title="Full screen (Esc to exit)" aria-label="Toggle full-screen canvas" aria-pressed="false">&#9974;</button>
+        </div>
+      </div>
+      <div id="typedViewerHost" class="typed-viewer-host" hidden aria-label="Framework-backed flowchart"></div>
+      <div class="empty" id="emptyState"><p>No matching flow was found.</p></div>
+    </main>
+    <aside class="right-rail" id="rightRail">
+      <div class="rail-resizer rail-resizer-right" id="rightRailResizer" role="separator" tabindex="0" aria-label="Resize details sidebar" aria-orientation="vertical" aria-valuemin="280" aria-valuemax="640" aria-valuenow="336" title="Resize details sidebar"></div>
+      <div class="rail-inner">
+        <div class="detail-drawer-head">
+          <span>Details</span>
+          <div class="panel-stack-tools" aria-label="Details sections">
+            <button class="panel-stack-control" id="detailsCollapseAll" type="button" title="Collapse all detail sections" aria-label="Collapse all detail sections">-</button>
+            <button class="panel-stack-control" id="detailsExpandAll" type="button" title="Expand all detail sections" aria-label="Expand all detail sections">+</button>
+          </div>
+          <button class="panel-close" id="detailsClose" type="button" title="Hide source and details" aria-label="Hide source and details">&times;</button>
+        </div>
+        <section class="panel panel-quality" id="qualityPanel" aria-label="Analysis health" data-collapsible-panel data-panel-state="quality">
+          <div class="panel-head" data-panel-heading>
+            <button class="panel-collapse-toggle" id="qualityPanelToggle" type="button" data-panel-toggle aria-expanded="true" aria-controls="quality" title="Collapse Analysis health" aria-label="Collapse Analysis health"><span class="panel-chevron" aria-hidden="true"></span></button>
+            <h2 class="rail-title">Analysis health</h2>
+            <span class="panel-count" id="qualityCount" aria-hidden="true"></span>
+          </div>
+          <div class="panel-body quality-scroll" id="quality" role="region" aria-label="Analysis health metrics"></div>
+        </section>
+        <section class="panel panel-source" id="sourcePanel" aria-label="Source" data-collapsible-panel data-panel-state="source" hidden>
+          <div class="panel-head" data-panel-heading>
+            <button class="panel-collapse-toggle" id="sourcePanelToggle" type="button" data-panel-toggle aria-expanded="true" aria-controls="source" title="Collapse Source" aria-label="Collapse Source"><span class="panel-chevron" aria-hidden="true"></span></button>
+            <h2 class="rail-title">Source</h2>
+            <span class="panel-file" id="sourceFile"></span>
+          </div>
+          <div class="panel-body source-scroll" id="source" role="region" aria-label="Source code">
+            <p class="panel-empty">Select a flow or node to view its source.</p>
+          </div>
+        </section>
+      </div>
+    </aside>
+  </div>
+  <!-- Visually-hidden polite live region: panels.js announces source/detail changes
+       on each selection so screen-reader users are notified when the panels re-render. -->
+  <div id="panelStatus" class="sr-only" role="status" aria-live="polite"></div>
+  <script id="codedebrief-data" type="application/json">__CODEDEBRIEF_DATA__</script>
+  <script>__SHELL_JS__</script>
+  <script>__TREE_JS__</script>
+  <script>__PANELS_JS__</script>
+  <script>__VIEWER_RUNTIME_JS__</script>
+  <script>
+    (function () {
+      const runtime = window.CodeDebriefViewer;
+      const host = document.getElementById("typedViewerHost");
+      const data = document.getElementById("codedebrief-data");
+      if (!runtime || !host || !data || !runtime.mountStandaloneCodeDebriefViewer) {
+        document.body.dataset.runtime = "unavailable";
+        return;
+      }
+      document.body.dataset.runtime = "react";
+      host.hidden = false;
+      try {
+        const payload = JSON.parse(data.textContent || "{}");
+        window.codedebriefTypedViewer = runtime.mountStandaloneCodeDebriefViewer(host, payload);
+        if (window.CodeDebrief && window.CodeDebrief.syncShellFromHash) window.CodeDebrief.syncShellFromHash();
+      } catch (error) {
+        host.hidden = true;
+        document.body.dataset.runtime = "unavailable";
+        console.error("Unable to start React viewer runtime", error);
+      }
+    })();
+  </script>
+</body>
+</html>
+"""

codedebrief/render/markdown.py ADDED Viewed

@@ -0,0 +1,153 @@
+from __future__ import annotations
+import re
+from enum import Enum
+from urllib.parse import quote
+from codedebrief.model import Flow, FlowNode, NodeKind, ProjectModel
+def render_markdown(model: ProjectModel) -> str:
+    entrypoints = [flow for flow in model.flows if flow.is_entrypoint]
+    lines = [
+        "# CodeDebrief Decision Flows",
+        "",
+        "> Generated from source code. Do not edit this file manually.",
+        "",
+        f"- **Generated:** {_code_span(model.generated_at)}",
+        f"- **Source root:** {_code_span(model.root)}",
+        f"- **Flows:** {len(model.flows)}",
+        f"- **Entry points:** {len(entrypoints)}",
+    ]
+    scopes = model.metadata.get("scopes", {})
+    if scopes:
+        lines.append(
+            "- **Scopes:** "
+            + " · ".join(f"{_md_inline(name)} ({count})" for name, count in scopes.items())
+        )
+    lines.extend(["", "## Project Map", ""])
+    lines.extend(_project_map(model, entrypoints))
+    lines.extend(["", "## Entry Point Flows", ""])
+    for flow in entrypoints:
+        lines.extend(_flow_section(flow))
+    subflows = [
+        flow for flow in model.flows if not flow.is_entrypoint and not flow.metadata.get("test")
+    ]
+    if subflows:
+        lines.extend(["", "## Referenced Subflows", ""])
+        for flow in subflows:
+            if flow.called_by:
+                lines.extend(_flow_section(flow))
+    return "\n".join(lines).rstrip() + "\n"
+def _project_map(model: ProjectModel, entrypoints: list[Flow]) -> list[str]:
+    if not entrypoints:
+        return ["No entry points were detected."]
+    by_id = {flow.id: flow for flow in model.flows}
+    lines = ["```mermaid", "flowchart TD"]
+    rendered_nodes: set[str] = set()
+    for flow in entrypoints:
+        lines.append(f'  {_mermaid_id(flow.id)}["{_escape(flow.name)}"]')
+        rendered_nodes.add(flow.id)
+    rendered: set[tuple[str, str]] = set()
+    for flow in entrypoints:
+        for target_id in flow.calls:
+            target = by_id.get(target_id)
+            if target is None:
+                continue
+            pair = (flow.id, target.id)
+            if pair in rendered:
+                continue
+            rendered.add(pair)
+            if target.id not in rendered_nodes:
+                lines.append(f'  {_mermaid_id(target.id)}["{_escape(target.name)}"]')
+                rendered_nodes.add(target.id)
+            lines.append(f"  {_mermaid_id(flow.id)} --> {_mermaid_id(target.id)}")
+    lines.append("```")
+    return lines
+def _flow_section(flow: Flow) -> list[str]:
+    source = _source_reference(flow.location.path, flow.location.start_line)
+    lines = [
+        f"### {_md_inline(flow.name)}",
+        "",
+        f"{_code_span(flow.entry_kind)} · {_code_span(flow.language)} · "
+        f"{_code_span(flow.framework)} · {source}",
+        "",
+        "```mermaid",
+        "flowchart TD",
+    ]
+    for node in flow.nodes:
+        lines.append(f"  {_render_node(node)}")
+    for edge in flow.edges:
+        label = f'|"{_escape(edge.label)}"|' if edge.label else ""
+        lines.append(f"  {_mermaid_id(edge.source)} -->{label} {_mermaid_id(edge.target)}")
+    lines.append("```")
+    lines.append("")
+    return lines
+def _enum_value(value: object) -> str:
+    if isinstance(value, Enum):
+        return str(value.value)
+    return str(value)
+def _render_node(node: FlowNode) -> str:
+    node_id = _mermaid_id(node.id)
+    label = _escape(node.label)
+    if node.kind is NodeKind.DECISION:
+        return f'{node_id}{{"{label}"}}'
+    if node.kind is NodeKind.CALL:
+        return f'{node_id}[["{label}"]]'
+    if node.kind is NodeKind.ERROR:
+        return f'{node_id}{{{{"{label}"}}}}'
+    if node.kind in {NodeKind.ENTRY, NodeKind.TERMINAL}:
+        return f'{node_id}(["{label}"])'
+    return f'{node_id}["{label}"]'
+def _mermaid_id(value: str) -> str:
+    return "m" + "".join(character if character.isalnum() else "_" for character in value)
+def _escape(value: str) -> str:
+    # Mermaid quoted labels: neutralize quote breakout and HTML so a source-derived
+    # label can't malform the diagram or smuggle markup.
+    return (
+        value.replace("\\", "\\\\")
+        .replace('"', "&quot;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace("\n", " ")
+    )
+# Inline metacharacters that could turn source-derived text into a live link, emphasis,
+# code span, table cell, or raw HTML in a committed report.
+_MD_INLINE_SPECIAL = re.compile(r"([\\`*_\[\]()<>|#~])")
+def _md_inline(value: str) -> str:
+    """Escape a source-derived string for safe inline Markdown prose."""
+    collapsed = re.sub(r"\s+", " ", value)
+    return _MD_INLINE_SPECIAL.sub(r"\\\1", collapsed)
+def _code_span(value: str) -> str:
+    """Render a label as an inline code span, neutralizing backtick breakout."""
+    return "`" + value.replace("`", "'").replace("\n", " ") + "`"
+def _source_reference(path: str, line: int) -> str:
+    # The path is source-derived (a file name an attacker could choose). Neutralize
+    # backtick breakout in the visible inline-code span, and percent-encode the link
+    # destination so a `)`, `<`, `>`, or space can't close the link or smuggle markup.
+    label = _code_span(f"{path}:{line}")
+    destination = quote(f"../{path}", safe="/")
+    return f"[{label}]({destination}#L{line})"

codedebrief/render/payload.py ADDED Viewed

@@ -0,0 +1,326 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+from codedebrief.model import FileRecord, Flow, ProjectModel
+from codedebrief.util import metadata_scope_names
+def build_payload(model: ProjectModel, source_root: Path | None = None) -> dict[str, Any]:
+    data = model.to_dict()
+    if source_root is not None:
+        data["root"] = str(source_root)
+    data["tree"] = build_tree(model.files, model.flows)
+    scopes = build_scope_index(model.flows)
+    data["scopes"] = scopes
+    data["languages"] = build_language_index(model.flows)
+    data["scope_edges"] = build_scope_edges(model.flows, scopes)
+    # Embed the actual source lines the viewer's source panel needs to show real code
+    # offline. Each file's lines are embedded ONCE in ``data["source_files"]`` and each
+    # flow gets a lightweight reference into it (the only new data; mutates ``data``).
+    data["source_files"] = attach_source_snippets(data["flows"], source_root)
+    return data
+# Per-flow cap on embedded source. A flow over a huge function (e.g. a 1000-line handler)
+# must not embed every line: keep at most this many HEAD lines and mark the tail elided, so
+# the page stays small and the panel can show an "N more lines" marker. Bounds the payload
+# regardless of function size while keeping the head (where the entry/decisions live) intact.
+MAX_SNIPPET_LINES = 200
+def attach_source_snippets(
+    flows: list[dict[str, Any]], source_root: Path | None
+) -> dict[str, dict[str, Any]]:
+    """Attach a lightweight source reference to each flow and return the shared file store.
+    For every flow, ``flow["source"]`` becomes either ``None`` (no source available) or a
+    reference ``{"path", "start_line", "end_line", "elided"?}`` into the returned
+    ``source_files`` map. ``source_files[path] = {"start_line": int, "lines": [str, ...]}``
+    embeds, ONCE per file, the union of the (capped) line ranges every non-test flow needs
+    in that file -- so a file with many flows is embedded a single time, not once per flow.
+    Bounding (two layers, both general over function/file size):
+    * **Per-flow cap.** A flow spanning more than :data:`MAX_SNIPPET_LINES` lines keeps only
+      its first ``MAX_SNIPPET_LINES`` lines; its reference carries ``"elided": True`` and the
+      ``end_line`` is the original (uncapped) end so the panel can show how many lines were
+      dropped. The file store only ever embeds the capped (head) range.
+    * **File-level de-dup.** Each file's lines are read and stored once, covering the union
+      of the capped ranges its flows need -- never the same lines twice, never whole trees.
+    Self-contained (no fetch), language-agnostic (line slices work for any supported
+    language), and deliberately tolerant so it stays general for any codebase: a flow whose
+    file is missing, outside ``source_root``, binary, or otherwise unreadable gets
+    ``flow["source"] = None`` and never raises. Each file is read at most once.
+    ``flows`` is the JSON-serializable dict form (post ``model.to_dict()``); the reference is
+    added to each dict so it rides along in the embedded payload.
+    """
+    if source_root is None:
+        for flow in flows:
+            flow["source"] = None
+        return {}
+    root = source_root
+    root_resolved = root.resolve()
+    # path -> list[str] of the file's lines (newline-stripped), or None when unreadable.
+    file_cache: dict[str, list[str] | None] = {}
+    def lines_for(path: str) -> list[str] | None:
+        if path in file_cache:
+            return file_cache[path]
+        result: list[str] | None
+        try:
+            # Resolve under the source root and guard against path escapes (a flow whose
+            # location.path is absolute or climbs out of the tree gets no snippet).
+            target = (root / path).resolve()
+            if root_resolved != target and root_resolved not in target.parents:
+                result = None
+            else:
+                result = target.read_text(encoding="utf-8").splitlines()
+        except (OSError, UnicodeDecodeError, ValueError):
+            result = None
+        file_cache[path] = result
+        return result
+    # First pass: resolve each flow's (clamped, capped) reference and remember the line
+    # range each file must cover. ``needed[path]`` = (min start, max end) over its flows'
+    # clamped+capped ranges, so the file is embedded once across the union.
+    needed: dict[str, tuple[int, int]] = {}
+    for flow in flows:
+        location = flow.get("location") or {}
+        path = location.get("path")
+        start = location.get("start_line")
+        end = location.get("end_line")
+        if not path or not isinstance(start, int) or not isinstance(end, int):
+            flow["source"] = None
+            continue
+        file_lines = lines_for(path)
+        if file_lines is None:
+            flow["source"] = None
+            continue
+        # Lines are 1-based and inclusive; clamp to the file so an out-of-range end never
+        # over-reads and a degenerate range still yields whatever overlaps the file.
+        lo = max(1, start)
+        hi = min(len(file_lines), end)
+        if hi < lo:
+            flow["source"] = None
+            continue
+        # Per-flow cap: keep at most MAX_SNIPPET_LINES head lines; mark the rest elided.
+        capped_hi = min(hi, lo + MAX_SNIPPET_LINES - 1)
+        elided = capped_hi < hi
+        ref: dict[str, Any] = {"path": path, "start_line": lo, "end_line": hi}
+        if elided:
+            ref["elided"] = True
+        flow["source"] = ref
+        # The file store only needs the capped (embedded) range for each flow.
+        prev = needed.get(path)
+        if prev is None:
+            needed[path] = (lo, capped_hi)
+        else:
+            needed[path] = (min(prev[0], lo), max(prev[1], capped_hi))
+    # Second pass: embed each file once, covering the union of the capped ranges. The flow
+    # references slice their own (capped) window out of this on the client.
+    source_files: dict[str, dict[str, Any]] = {}
+    for path, (lo, hi) in needed.items():
+        file_lines = lines_for(path)
+        if file_lines is None:
+            continue
+        source_files[path] = {"start_line": lo, "lines": file_lines[lo - 1 : hi]}
+    return source_files
+def _is_test_flow(flow: Flow) -> bool:
+    """Whether a flow is a test flow, mirroring the old left rail's ``!flow.metadata.test``."""
+    return bool(flow.metadata.get("test"))
+def build_tree(files: list[FileRecord], flows: list[Flow]) -> dict[str, Any]:
+    """Fold file paths into a nested dir/file tree.
+    Each node has the shape ``{name, path, type, children, flow_ids}``. ``flow_ids``
+    is populated on file leaves with the ids of flows whose ``location.path`` is that
+    file; directories always carry ``[]``. A flow whose file is missing from ``files``
+    still gets a leaf so no flow is dropped from the tree. Children are ordered
+    deterministically: directories before files, each group sorted by name.
+    Test flows are excluded (the old left rail hid them via ``!flow.metadata.test``);
+    a file with only test flows is dropped entirely, and a directory that ends up
+    with no surviving descendants is dropped too, so counts are not inflated.
+    """
+    # Only non-test flows are eligible. Map each file path to the surviving flow ids.
+    non_test = [flow for flow in flows if not _is_test_flow(flow)]
+    by_id = {flow.id: flow for flow in non_test}
+    flows_for_path: dict[str, list[str]] = {}
+    seen_for_path: dict[str, set[str]] = {}
+    for record in files:
+        # Keep the file's flow ids, but only the non-test ones.
+        seen = seen_for_path.setdefault(record.path, set())
+        kept: list[str] = []
+        for flow_id in record.flow_ids:
+            if flow_id in by_id and flow_id not in seen:
+                kept.append(flow_id)
+                seen.add(flow_id)
+        if kept:
+            flows_for_path.setdefault(record.path, []).extend(kept)
+    for flow in non_test:
+        path = flow.location.path
+        ids = flows_for_path.setdefault(path, [])
+        seen = seen_for_path.setdefault(path, set())
+        if flow.id not in seen:
+            ids.append(flow.id)
+            seen.add(flow.id)
+    root = _new_node("", "", "dir")
+    children_index: dict[str, dict[str, dict[str, Any]]] = {"": {}}
+    for path in flows_for_path:
+        _insert_path(root, path, flows_for_path[path], children_index)
+    _prune_empty(root)
+    _sort_children(root)
+    return root
+def build_language_index(flows: list[Flow]) -> list[str]:
+    """Sorted list of distinct ``flow.language`` across non-test flows.
+    Powers the viewer's language dropdown for polyglot repos. Test flows are excluded
+    so a language that only appears in tests does not surface a filter option for it.
+    """
+    languages = {flow.language for flow in flows if not _is_test_flow(flow) and flow.language}
+    return sorted(languages)
+def build_scope_index(flows: list[Flow]) -> dict[str, list[str]]:
+    """Group flow ids by scope.
+    Uses ``flow.metadata["scope"]`` (a list) when present; otherwise infers the
+    scope as the top-level directory segment of ``flow.location.path`` (so it works
+    with no ``[codedebrief.scopes]`` declared). Never hard-codes scope names.
+    Test flows are excluded with the same predicate ``build_tree`` /
+    ``build_language_index`` / ``build_scope_edges`` use, so L0 scope counts and the
+    L1 nodes agree with the directory tree's non-test universe. A scope that would
+    contain only test flows (e.g. an inferred ``tests`` scope) is dropped entirely,
+    rather than surfacing a super-node the tree hides.
+    """
+    index: dict[str, list[str]] = {}
+    for flow in flows:
+        if _is_test_flow(flow):
+            continue
+        scopes = metadata_scope_names(flow.metadata)
+        if not scopes:
+            scopes = [_top_level_segment(flow.location.path)]
+        for scope in scopes:
+            index.setdefault(scope, []).append(flow.id)
+    return index
+def build_scope_edges(flows: list[Flow], scope_index: dict[str, list[str]]) -> list[dict[str, Any]]:
+    """Aggregate cross-scope calls into ``[{from, to, count}]`` edges.
+    For each non-test flow ``f`` and each resolved call target ``t`` (a flow id in the
+    model), attribute the call to *every* (srcScope, dstScope) pair drawn from ``f``'s
+    and ``t``'s scope memberships. A flow may belong to several scopes
+    (``metadata["scope"]`` is a list), so its cross-scope calls are double-counted under
+    each membership -- the documented convention that matches ``build_scope_index``,
+    which already places a flow under every listed scope. Same-scope pairs (and calls
+    to unresolved/external ids not in the model) are dropped: L0 shows only cross-scope
+    structure. Keeping these edges in the payload makes the viewer deterministic and
+    avoids deriving cross-scope topology in the browser.
+    """
+    by_id = {flow.id: flow for flow in flows}
+    # flow id -> its scope memberships, recomputed the same way build_scope_index does.
+    flow_scopes: dict[str, list[str]] = {}
+    for scope, ids in scope_index.items():
+        for flow_id in ids:
+            flow_scopes.setdefault(flow_id, []).append(scope)
+    counts: dict[tuple[str, str], int] = {}
+    for flow in flows:
+        if _is_test_flow(flow):
+            continue
+        src_scopes = flow_scopes.get(flow.id, [])
+        if not src_scopes:
+            continue
+        for target in flow.calls:
+            target_flow = by_id.get(target)
+            # Mirror renderFlow's `if (!start || !end) return;` -- skip unresolved or
+            # external call targets, and never count a call into a test flow.
+            if target_flow is None or _is_test_flow(target_flow):
+                continue
+            dst_scopes = flow_scopes.get(target, [])
+            for src in src_scopes:
+                for dst in dst_scopes:
+                    if src != dst:
+                        counts[(src, dst)] = counts.get((src, dst), 0) + 1
+    return [
+        {"from": src, "to": dst, "count": count} for (src, dst), count in sorted(counts.items())
+    ]
+def _top_level_segment(path: str) -> str:
+    """The first path segment, or the file's own name for a root-level file."""
+    parts = [part for part in path.split("/") if part]
+    return parts[0] if parts else path
+def _new_node(name: str, path: str, node_type: str) -> dict[str, Any]:
+    return {"name": name, "path": path, "type": node_type, "children": [], "flow_ids": []}
+def _insert_path(
+    root: dict[str, Any],
+    path: str,
+    flow_ids: list[str],
+    children_index: dict[str, dict[str, dict[str, Any]]],
+) -> None:
+    segments = [part for part in path.split("/") if part]
+    if not segments:
+        return
+    node = root
+    prefix = ""
+    parent_path = ""
+    for index, segment in enumerate(segments):
+        prefix = f"{prefix}/{segment}" if prefix else segment
+        is_leaf = index == len(segments) - 1
+        siblings = children_index.setdefault(parent_path, {})
+        child = siblings.get(segment)
+        if child is None:
+            child = _new_node(segment, prefix, "file" if is_leaf else "dir")
+            node["children"].append(child)
+            siblings[segment] = child
+            children_index.setdefault(prefix, {})
+        node = child
+        parent_path = prefix
+    # `node` is now the leaf; attach flow ids without duplicating.
+    for flow_id in flow_ids:
+        if flow_id not in node["flow_ids"]:
+            node["flow_ids"].append(flow_id)
+def _prune_empty(node: dict[str, Any]) -> None:
+    """Drop file leaves with no flow ids and directories with no surviving descendants.
+    Recurses depth-first so a directory whose children all get pruned is itself dropped.
+    The root is never removed by this (callers keep it), only its empty subtrees.
+    """
+    kept: list[dict[str, Any]] = []
+    for child in node["children"]:
+        if child["type"] == "dir":
+            _prune_empty(child)
+            if child["children"]:
+                kept.append(child)
+        elif child["flow_ids"]:
+            kept.append(child)
+    node["children"] = kept
+def _sort_children(node: dict[str, Any]) -> None:
+    node["children"].sort(key=lambda c: (c["type"] != "dir", c["name"]))
+    for child in node["children"]:
+        _sort_children(child)