codedebrief 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. codedebrief/__init__.py +12 -0
  2. codedebrief/analysis/__init__.py +16 -0
  3. codedebrief/analysis/common.py +527 -0
  4. codedebrief/analysis/discovery.py +100 -0
  5. codedebrief/analysis/languages/__init__.py +6 -0
  6. codedebrief/analysis/languages/_common.py +68 -0
  7. codedebrief/analysis/languages/c.py +96 -0
  8. codedebrief/analysis/languages/cpp.py +146 -0
  9. codedebrief/analysis/languages/csharp.py +137 -0
  10. codedebrief/analysis/languages/go.py +157 -0
  11. codedebrief/analysis/languages/java.py +158 -0
  12. codedebrief/analysis/languages/php.py +83 -0
  13. codedebrief/analysis/languages/ruby.py +75 -0
  14. codedebrief/analysis/languages/rust.py +96 -0
  15. codedebrief/analysis/project.py +373 -0
  16. codedebrief/analysis/python.py +939 -0
  17. codedebrief/analysis/registry.py +320 -0
  18. codedebrief/analysis/treesitter.py +884 -0
  19. codedebrief/analysis/typescript.py +1019 -0
  20. codedebrief/artifacts.py +49 -0
  21. codedebrief/cli.py +585 -0
  22. codedebrief/config.py +226 -0
  23. codedebrief/doctor.py +175 -0
  24. codedebrief/install.py +441 -0
  25. codedebrief/mcp_server.py +2720 -0
  26. codedebrief/model.py +189 -0
  27. codedebrief/py.typed +1 -0
  28. codedebrief/quality.py +392 -0
  29. codedebrief/query.py +641 -0
  30. codedebrief/render/__init__.py +6 -0
  31. codedebrief/render/assets/generated/codedebrief-viewer-runtime.iife.js +10 -0
  32. codedebrief/render/assets/panels.js +462 -0
  33. codedebrief/render/assets/shell.js +1649 -0
  34. codedebrief/render/assets/styles.css +1715 -0
  35. codedebrief/render/assets/tree.js +616 -0
  36. codedebrief/render/html.py +191 -0
  37. codedebrief/render/markdown.py +153 -0
  38. codedebrief/render/payload.py +326 -0
  39. codedebrief/render/snapshot.py +769 -0
  40. codedebrief/schema/codedebrief.schema.json +449 -0
  41. codedebrief/util.py +65 -0
  42. codedebrief/validation.py +214 -0
  43. codedebrief-0.11.0.dist-info/METADATA +426 -0
  44. codedebrief-0.11.0.dist-info/RECORD +48 -0
  45. codedebrief-0.11.0.dist-info/WHEEL +4 -0
  46. codedebrief-0.11.0.dist-info/entry_points.txt +2 -0
  47. codedebrief-0.11.0.dist-info/licenses/LICENSE +176 -0
  48. codedebrief-0.11.0.dist-info/licenses/NOTICE +9 -0
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from codedebrief.model import ProjectModel
8
+ from codedebrief.render.payload import build_payload
9
+
10
+ # A literal ``</script`` anywhere inside an inlined ``<script>`` body terminates the script
11
+ # element early in the HTML parser, corrupting the page (it does not matter that it sits in
12
+ # a JS string or comment). Neutralize ONLY that exact sequence -- ``<\/script`` is identical
13
+ # to ``</script`` inside any JS string/regex/comment, so this is behavior-preserving, and it
14
+ # leaves other ``</...`` markup (e.g. ``</filter>`` inside an innerHTML template) untouched.
15
+ _SCRIPT_CLOSE = re.compile(r"</(script)", re.IGNORECASE)
16
+
17
+
18
+ def _asset(name: str) -> str:
19
+ return (Path(__file__).parent / "assets" / name).read_text(encoding="utf-8")
20
+
21
+
22
+ def _inline_js(name: str) -> str:
23
+ return _SCRIPT_CLOSE.sub(r"<\\/\1", _asset(name))
24
+
25
+
26
+ def _optional_inline_js(name: str) -> str:
27
+ path = Path(__file__).parent / "assets" / name
28
+ if not path.exists():
29
+ return ""
30
+ return _SCRIPT_CLOSE.sub(r"<\\/\1", path.read_text(encoding="utf-8"))
31
+
32
+
33
+ def render_html(model: ProjectModel, source_root: Path | None = None) -> str:
34
+ payload_data = build_payload(model, source_root)
35
+ payload = json.dumps(payload_data, ensure_ascii=False).replace("</", "<\\/")
36
+ css = _asset("styles.css")
37
+ js = _inline_js("shell.js")
38
+ tree_js = _inline_js("tree.js")
39
+ panels_js = _inline_js("panels.js")
40
+ viewer_runtime_js = _optional_inline_js("generated/codedebrief-viewer-runtime.iife.js")
41
+ return (
42
+ _HTML_TEMPLATE.replace("__STYLES__", css)
43
+ .replace("__SHELL_JS__", js)
44
+ .replace("__TREE_JS__", tree_js)
45
+ .replace("__PANELS_JS__", panels_js)
46
+ .replace("__VIEWER_RUNTIME_JS__", viewer_runtime_js)
47
+ .replace("__CODEDEBRIEF_DATA__", payload)
48
+ )
49
+
50
+
51
+ _HTML_TEMPLATE = r"""<!doctype html>
52
+ <html lang="en">
53
+ <head>
54
+ <meta charset="utf-8">
55
+ <meta name="viewport" content="width=device-width, initial-scale=1">
56
+ <title>CodeDebrief</title>
57
+ <link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 26 40'%3E%3Ccircle cx='13' cy='7.5' r='5.5' fill='%232f63ef'/%3E%3Cline x1='13' y1='17.5' x2='13' y2='21' stroke='%237458dc' stroke-width='3' stroke-linecap='round'/%3E%3Cpolygon points='13,25 19.5,31 13,37 6.5,31' fill='%23df9a12'/%3E%3C/svg%3E">
58
+ <style>__STYLES__</style>
59
+ </head>
60
+ <body>
61
+ <div class="shell">
62
+ <header>
63
+ <div class="brand">
64
+ <div class="brand-mark" aria-hidden="true">
65
+ <svg viewBox="0 0 26 40" xmlns="http://www.w3.org/2000/svg">
66
+ <circle class="logo-node" cx="13" cy="7.5" r="5.5"></circle>
67
+ <line class="logo-link" x1="13" y1="17.5" x2="13" y2="21"></line>
68
+ <polygon class="logo-decision" points="13,25 19.5,31 13,37 6.5,31"></polygon>
69
+ </svg>
70
+ </div>
71
+ <div><h1>CodeDebrief</h1></div>
72
+ </div>
73
+ <div class="flow-heading">
74
+ <div class="eyebrow" id="flowKind">No flow selected</div>
75
+ <h2 id="flowTitle">Analyze a project to begin</h2>
76
+ </div>
77
+ <div class="metrics">
78
+ <div class="metric"><strong id="flowCount">0</strong> <span>flows</span></div>
79
+ <div class="metric"><strong id="entryCount">0</strong> <span>entries</span></div>
80
+ </div>
81
+ </header>
82
+
83
+ <aside class="left-rail" id="leftRail">
84
+ <div class="rail-inner">
85
+ <div class="rail-head codebase-head">
86
+ <div class="rail-head-row">
87
+ <h2 class="rail-title">Codebase</h2>
88
+ </div>
89
+ <input class="filter" id="globalSearch" type="search" placeholder="Find path, symbol, or flow" aria-label="Find path, symbol, or flow">
90
+ <select class="filter compact-filter" id="langFilter" aria-label="Filter by language" style="display:none"></select>
91
+ </div>
92
+ <div class="tree" id="tree" role="tree" aria-label="Directory tree"></div>
93
+ <div class="legend">
94
+ <span>Action</span><span class="decision">Decision</span>
95
+ <span class="call">Subflow</span><span class="outcome">Outcome</span>
96
+ </div>
97
+ </div>
98
+ <div class="rail-resizer rail-resizer-left" id="leftRailResizer" role="separator" tabindex="0" aria-label="Resize codebase sidebar" aria-orientation="vertical" aria-valuemin="240" aria-valuemax="560" aria-valuenow="312" title="Resize codebase sidebar"></div>
99
+ </aside>
100
+
101
+ <main>
102
+ <nav id="breadcrumb" class="breadcrumb" aria-label="Canvas level"></nav>
103
+ <div class="canvas-toolbar" aria-label="Canvas controls">
104
+ <div class="tool-group" aria-label="Panels">
105
+ <button class="tool" id="menuButton" title="Toggle codebase tree" aria-label="Toggle codebase tree">&#9776;</button>
106
+ <button class="tool detail-tool" id="detailButton" title="Show source and details" aria-label="Toggle source and details" aria-pressed="false">i</button>
107
+ </div>
108
+ <div class="tool-group" aria-label="Graph viewport">
109
+ <button class="tool reset-tool command-tool" id="resetView" title="Collapse all expanded sections and return to the codebase root" aria-label="Collapse all expanded sections and return to the codebase root">RESET</button>
110
+ <button class="tool expand-tool command-tool" id="expandView" title="Expand all scopes and flows in the current graph" aria-label="Expand all scopes and flows in the current graph">EXPAND</button>
111
+ <button class="tool" id="fitView" title="Fit current flowchart" aria-label="Fit current flowchart">&#8982;</button>
112
+ <button class="tool" id="zoomOut" title="Zoom out" aria-label="Zoom out">&minus;</button>
113
+ <button class="tool" id="zoomIn" title="Zoom in" aria-label="Zoom in">+</button>
114
+ </div>
115
+ <div class="tool-group" aria-label="Output">
116
+ <button class="tool export-tool" id="exportPng" title="Export current flowchart as PNG" aria-label="Export current flowchart as PNG">PNG</button>
117
+ <button class="tool export-tool" id="exportJpg" title="Export current flowchart as JPG" aria-label="Export current flowchart as JPG">JPG</button>
118
+ <button class="tool" id="fullscreenToggle" data-action="fullscreen" title="Full screen (Esc to exit)" aria-label="Toggle full-screen canvas" aria-pressed="false">&#9974;</button>
119
+ </div>
120
+ </div>
121
+ <div id="typedViewerHost" class="typed-viewer-host" hidden aria-label="Framework-backed flowchart"></div>
122
+ <div class="empty" id="emptyState"><p>No matching flow was found.</p></div>
123
+ </main>
124
+
125
+ <aside class="right-rail" id="rightRail">
126
+ <div class="rail-resizer rail-resizer-right" id="rightRailResizer" role="separator" tabindex="0" aria-label="Resize details sidebar" aria-orientation="vertical" aria-valuemin="280" aria-valuemax="640" aria-valuenow="336" title="Resize details sidebar"></div>
127
+ <div class="rail-inner">
128
+ <div class="detail-drawer-head">
129
+ <span>Details</span>
130
+ <div class="panel-stack-tools" aria-label="Details sections">
131
+ <button class="panel-stack-control" id="detailsCollapseAll" type="button" title="Collapse all detail sections" aria-label="Collapse all detail sections">-</button>
132
+ <button class="panel-stack-control" id="detailsExpandAll" type="button" title="Expand all detail sections" aria-label="Expand all detail sections">+</button>
133
+ </div>
134
+ <button class="panel-close" id="detailsClose" type="button" title="Hide source and details" aria-label="Hide source and details">&times;</button>
135
+ </div>
136
+ <section class="panel panel-quality" id="qualityPanel" aria-label="Analysis health" data-collapsible-panel data-panel-state="quality">
137
+ <div class="panel-head" data-panel-heading>
138
+ <button class="panel-collapse-toggle" id="qualityPanelToggle" type="button" data-panel-toggle aria-expanded="true" aria-controls="quality" title="Collapse Analysis health" aria-label="Collapse Analysis health"><span class="panel-chevron" aria-hidden="true"></span></button>
139
+ <h2 class="rail-title">Analysis health</h2>
140
+ <span class="panel-count" id="qualityCount" aria-hidden="true"></span>
141
+ </div>
142
+ <div class="panel-body quality-scroll" id="quality" role="region" aria-label="Analysis health metrics"></div>
143
+ </section>
144
+ <section class="panel panel-source" id="sourcePanel" aria-label="Source" data-collapsible-panel data-panel-state="source" hidden>
145
+ <div class="panel-head" data-panel-heading>
146
+ <button class="panel-collapse-toggle" id="sourcePanelToggle" type="button" data-panel-toggle aria-expanded="true" aria-controls="source" title="Collapse Source" aria-label="Collapse Source"><span class="panel-chevron" aria-hidden="true"></span></button>
147
+ <h2 class="rail-title">Source</h2>
148
+ <span class="panel-file" id="sourceFile"></span>
149
+ </div>
150
+ <div class="panel-body source-scroll" id="source" role="region" aria-label="Source code">
151
+ <p class="panel-empty">Select a flow or node to view its source.</p>
152
+ </div>
153
+ </section>
154
+ </div>
155
+ </aside>
156
+ </div>
157
+
158
+ <!-- Visually-hidden polite live region: panels.js announces source/detail changes
159
+ on each selection so screen-reader users are notified when the panels re-render. -->
160
+ <div id="panelStatus" class="sr-only" role="status" aria-live="polite"></div>
161
+
162
+ <script id="codedebrief-data" type="application/json">__CODEDEBRIEF_DATA__</script>
163
+ <script>__SHELL_JS__</script>
164
+ <script>__TREE_JS__</script>
165
+ <script>__PANELS_JS__</script>
166
+ <script>__VIEWER_RUNTIME_JS__</script>
167
+ <script>
168
+ (function () {
169
+ const runtime = window.CodeDebriefViewer;
170
+ const host = document.getElementById("typedViewerHost");
171
+ const data = document.getElementById("codedebrief-data");
172
+ if (!runtime || !host || !data || !runtime.mountStandaloneCodeDebriefViewer) {
173
+ document.body.dataset.runtime = "unavailable";
174
+ return;
175
+ }
176
+ document.body.dataset.runtime = "react";
177
+ host.hidden = false;
178
+ try {
179
+ const payload = JSON.parse(data.textContent || "{}");
180
+ window.codedebriefTypedViewer = runtime.mountStandaloneCodeDebriefViewer(host, payload);
181
+ if (window.CodeDebrief && window.CodeDebrief.syncShellFromHash) window.CodeDebrief.syncShellFromHash();
182
+ } catch (error) {
183
+ host.hidden = true;
184
+ document.body.dataset.runtime = "unavailable";
185
+ console.error("Unable to start React viewer runtime", error);
186
+ }
187
+ })();
188
+ </script>
189
+ </body>
190
+ </html>
191
+ """
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from enum import Enum
5
+ from urllib.parse import quote
6
+
7
+ from codedebrief.model import Flow, FlowNode, NodeKind, ProjectModel
8
+
9
+
10
+ def render_markdown(model: ProjectModel) -> str:
11
+ entrypoints = [flow for flow in model.flows if flow.is_entrypoint]
12
+ lines = [
13
+ "# CodeDebrief Decision Flows",
14
+ "",
15
+ "> Generated from source code. Do not edit this file manually.",
16
+ "",
17
+ f"- **Generated:** {_code_span(model.generated_at)}",
18
+ f"- **Source root:** {_code_span(model.root)}",
19
+ f"- **Flows:** {len(model.flows)}",
20
+ f"- **Entry points:** {len(entrypoints)}",
21
+ ]
22
+ scopes = model.metadata.get("scopes", {})
23
+ if scopes:
24
+ lines.append(
25
+ "- **Scopes:** "
26
+ + " · ".join(f"{_md_inline(name)} ({count})" for name, count in scopes.items())
27
+ )
28
+ lines.extend(["", "## Project Map", ""])
29
+ lines.extend(_project_map(model, entrypoints))
30
+
31
+ lines.extend(["", "## Entry Point Flows", ""])
32
+ for flow in entrypoints:
33
+ lines.extend(_flow_section(flow))
34
+
35
+ subflows = [
36
+ flow for flow in model.flows if not flow.is_entrypoint and not flow.metadata.get("test")
37
+ ]
38
+ if subflows:
39
+ lines.extend(["", "## Referenced Subflows", ""])
40
+ for flow in subflows:
41
+ if flow.called_by:
42
+ lines.extend(_flow_section(flow))
43
+
44
+ return "\n".join(lines).rstrip() + "\n"
45
+
46
+
47
+ def _project_map(model: ProjectModel, entrypoints: list[Flow]) -> list[str]:
48
+ if not entrypoints:
49
+ return ["No entry points were detected."]
50
+ by_id = {flow.id: flow for flow in model.flows}
51
+ lines = ["```mermaid", "flowchart TD"]
52
+ rendered_nodes: set[str] = set()
53
+ for flow in entrypoints:
54
+ lines.append(f' {_mermaid_id(flow.id)}["{_escape(flow.name)}"]')
55
+ rendered_nodes.add(flow.id)
56
+ rendered: set[tuple[str, str]] = set()
57
+ for flow in entrypoints:
58
+ for target_id in flow.calls:
59
+ target = by_id.get(target_id)
60
+ if target is None:
61
+ continue
62
+ pair = (flow.id, target.id)
63
+ if pair in rendered:
64
+ continue
65
+ rendered.add(pair)
66
+ if target.id not in rendered_nodes:
67
+ lines.append(f' {_mermaid_id(target.id)}["{_escape(target.name)}"]')
68
+ rendered_nodes.add(target.id)
69
+ lines.append(f" {_mermaid_id(flow.id)} --> {_mermaid_id(target.id)}")
70
+ lines.append("```")
71
+ return lines
72
+
73
+
74
+ def _flow_section(flow: Flow) -> list[str]:
75
+ source = _source_reference(flow.location.path, flow.location.start_line)
76
+ lines = [
77
+ f"### {_md_inline(flow.name)}",
78
+ "",
79
+ f"{_code_span(flow.entry_kind)} · {_code_span(flow.language)} · "
80
+ f"{_code_span(flow.framework)} · {source}",
81
+ "",
82
+ "```mermaid",
83
+ "flowchart TD",
84
+ ]
85
+ for node in flow.nodes:
86
+ lines.append(f" {_render_node(node)}")
87
+ for edge in flow.edges:
88
+ label = f'|"{_escape(edge.label)}"|' if edge.label else ""
89
+ lines.append(f" {_mermaid_id(edge.source)} -->{label} {_mermaid_id(edge.target)}")
90
+ lines.append("```")
91
+ lines.append("")
92
+ return lines
93
+
94
+
95
+ def _enum_value(value: object) -> str:
96
+ if isinstance(value, Enum):
97
+ return str(value.value)
98
+ return str(value)
99
+
100
+
101
+ def _render_node(node: FlowNode) -> str:
102
+ node_id = _mermaid_id(node.id)
103
+ label = _escape(node.label)
104
+ if node.kind is NodeKind.DECISION:
105
+ return f'{node_id}{{"{label}"}}'
106
+ if node.kind is NodeKind.CALL:
107
+ return f'{node_id}[["{label}"]]'
108
+ if node.kind is NodeKind.ERROR:
109
+ return f'{node_id}{{{{"{label}"}}}}'
110
+ if node.kind in {NodeKind.ENTRY, NodeKind.TERMINAL}:
111
+ return f'{node_id}(["{label}"])'
112
+ return f'{node_id}["{label}"]'
113
+
114
+
115
+ def _mermaid_id(value: str) -> str:
116
+ return "m" + "".join(character if character.isalnum() else "_" for character in value)
117
+
118
+
119
+ def _escape(value: str) -> str:
120
+ # Mermaid quoted labels: neutralize quote breakout and HTML so a source-derived
121
+ # label can't malform the diagram or smuggle markup.
122
+ return (
123
+ value.replace("\\", "\\\\")
124
+ .replace('"', "&quot;")
125
+ .replace("<", "&lt;")
126
+ .replace(">", "&gt;")
127
+ .replace("\n", " ")
128
+ )
129
+
130
+
131
+ # Inline metacharacters that could turn source-derived text into a live link, emphasis,
132
+ # code span, table cell, or raw HTML in a committed report.
133
+ _MD_INLINE_SPECIAL = re.compile(r"([\\`*_\[\]()<>|#~])")
134
+
135
+
136
+ def _md_inline(value: str) -> str:
137
+ """Escape a source-derived string for safe inline Markdown prose."""
138
+ collapsed = re.sub(r"\s+", " ", value)
139
+ return _MD_INLINE_SPECIAL.sub(r"\\\1", collapsed)
140
+
141
+
142
+ def _code_span(value: str) -> str:
143
+ """Render a label as an inline code span, neutralizing backtick breakout."""
144
+ return "`" + value.replace("`", "'").replace("\n", " ") + "`"
145
+
146
+
147
+ def _source_reference(path: str, line: int) -> str:
148
+ # The path is source-derived (a file name an attacker could choose). Neutralize
149
+ # backtick breakout in the visible inline-code span, and percent-encode the link
150
+ # destination so a `)`, `<`, `>`, or space can't close the link or smuggle markup.
151
+ label = _code_span(f"{path}:{line}")
152
+ destination = quote(f"../{path}", safe="/")
153
+ return f"[{label}]({destination}#L{line})"
@@ -0,0 +1,326 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from codedebrief.model import FileRecord, Flow, ProjectModel
7
+ from codedebrief.util import metadata_scope_names
8
+
9
+
10
+ def build_payload(model: ProjectModel, source_root: Path | None = None) -> dict[str, Any]:
11
+ data = model.to_dict()
12
+ if source_root is not None:
13
+ data["root"] = str(source_root)
14
+ data["tree"] = build_tree(model.files, model.flows)
15
+ scopes = build_scope_index(model.flows)
16
+ data["scopes"] = scopes
17
+ data["languages"] = build_language_index(model.flows)
18
+ data["scope_edges"] = build_scope_edges(model.flows, scopes)
19
+ # Embed the actual source lines the viewer's source panel needs to show real code
20
+ # offline. Each file's lines are embedded ONCE in ``data["source_files"]`` and each
21
+ # flow gets a lightweight reference into it (the only new data; mutates ``data``).
22
+ data["source_files"] = attach_source_snippets(data["flows"], source_root)
23
+ return data
24
+
25
+
26
+ # Per-flow cap on embedded source. A flow over a huge function (e.g. a 1000-line handler)
27
+ # must not embed every line: keep at most this many HEAD lines and mark the tail elided, so
28
+ # the page stays small and the panel can show an "N more lines" marker. Bounds the payload
29
+ # regardless of function size while keeping the head (where the entry/decisions live) intact.
30
+ MAX_SNIPPET_LINES = 200
31
+
32
+
33
+ def attach_source_snippets(
34
+ flows: list[dict[str, Any]], source_root: Path | None
35
+ ) -> dict[str, dict[str, Any]]:
36
+ """Attach a lightweight source reference to each flow and return the shared file store.
37
+
38
+ For every flow, ``flow["source"]`` becomes either ``None`` (no source available) or a
39
+ reference ``{"path", "start_line", "end_line", "elided"?}`` into the returned
40
+ ``source_files`` map. ``source_files[path] = {"start_line": int, "lines": [str, ...]}``
41
+ embeds, ONCE per file, the union of the (capped) line ranges every non-test flow needs
42
+ in that file -- so a file with many flows is embedded a single time, not once per flow.
43
+
44
+ Bounding (two layers, both general over function/file size):
45
+
46
+ * **Per-flow cap.** A flow spanning more than :data:`MAX_SNIPPET_LINES` lines keeps only
47
+ its first ``MAX_SNIPPET_LINES`` lines; its reference carries ``"elided": True`` and the
48
+ ``end_line`` is the original (uncapped) end so the panel can show how many lines were
49
+ dropped. The file store only ever embeds the capped (head) range.
50
+ * **File-level de-dup.** Each file's lines are read and stored once, covering the union
51
+ of the capped ranges its flows need -- never the same lines twice, never whole trees.
52
+
53
+ Self-contained (no fetch), language-agnostic (line slices work for any supported
54
+ language), and deliberately tolerant so it stays general for any codebase: a flow whose
55
+ file is missing, outside ``source_root``, binary, or otherwise unreadable gets
56
+ ``flow["source"] = None`` and never raises. Each file is read at most once.
57
+
58
+ ``flows`` is the JSON-serializable dict form (post ``model.to_dict()``); the reference is
59
+ added to each dict so it rides along in the embedded payload.
60
+ """
61
+ if source_root is None:
62
+ for flow in flows:
63
+ flow["source"] = None
64
+ return {}
65
+
66
+ root = source_root
67
+ root_resolved = root.resolve()
68
+ # path -> list[str] of the file's lines (newline-stripped), or None when unreadable.
69
+ file_cache: dict[str, list[str] | None] = {}
70
+
71
+ def lines_for(path: str) -> list[str] | None:
72
+ if path in file_cache:
73
+ return file_cache[path]
74
+ result: list[str] | None
75
+ try:
76
+ # Resolve under the source root and guard against path escapes (a flow whose
77
+ # location.path is absolute or climbs out of the tree gets no snippet).
78
+ target = (root / path).resolve()
79
+ if root_resolved != target and root_resolved not in target.parents:
80
+ result = None
81
+ else:
82
+ result = target.read_text(encoding="utf-8").splitlines()
83
+ except (OSError, UnicodeDecodeError, ValueError):
84
+ result = None
85
+ file_cache[path] = result
86
+ return result
87
+
88
+ # First pass: resolve each flow's (clamped, capped) reference and remember the line
89
+ # range each file must cover. ``needed[path]`` = (min start, max end) over its flows'
90
+ # clamped+capped ranges, so the file is embedded once across the union.
91
+ needed: dict[str, tuple[int, int]] = {}
92
+ for flow in flows:
93
+ location = flow.get("location") or {}
94
+ path = location.get("path")
95
+ start = location.get("start_line")
96
+ end = location.get("end_line")
97
+ if not path or not isinstance(start, int) or not isinstance(end, int):
98
+ flow["source"] = None
99
+ continue
100
+ file_lines = lines_for(path)
101
+ if file_lines is None:
102
+ flow["source"] = None
103
+ continue
104
+ # Lines are 1-based and inclusive; clamp to the file so an out-of-range end never
105
+ # over-reads and a degenerate range still yields whatever overlaps the file.
106
+ lo = max(1, start)
107
+ hi = min(len(file_lines), end)
108
+ if hi < lo:
109
+ flow["source"] = None
110
+ continue
111
+ # Per-flow cap: keep at most MAX_SNIPPET_LINES head lines; mark the rest elided.
112
+ capped_hi = min(hi, lo + MAX_SNIPPET_LINES - 1)
113
+ elided = capped_hi < hi
114
+ ref: dict[str, Any] = {"path": path, "start_line": lo, "end_line": hi}
115
+ if elided:
116
+ ref["elided"] = True
117
+ flow["source"] = ref
118
+ # The file store only needs the capped (embedded) range for each flow.
119
+ prev = needed.get(path)
120
+ if prev is None:
121
+ needed[path] = (lo, capped_hi)
122
+ else:
123
+ needed[path] = (min(prev[0], lo), max(prev[1], capped_hi))
124
+
125
+ # Second pass: embed each file once, covering the union of the capped ranges. The flow
126
+ # references slice their own (capped) window out of this on the client.
127
+ source_files: dict[str, dict[str, Any]] = {}
128
+ for path, (lo, hi) in needed.items():
129
+ file_lines = lines_for(path)
130
+ if file_lines is None:
131
+ continue
132
+ source_files[path] = {"start_line": lo, "lines": file_lines[lo - 1 : hi]}
133
+ return source_files
134
+
135
+
136
+ def _is_test_flow(flow: Flow) -> bool:
137
+ """Whether a flow is a test flow, mirroring the old left rail's ``!flow.metadata.test``."""
138
+ return bool(flow.metadata.get("test"))
139
+
140
+
141
+ def build_tree(files: list[FileRecord], flows: list[Flow]) -> dict[str, Any]:
142
+ """Fold file paths into a nested dir/file tree.
143
+
144
+ Each node has the shape ``{name, path, type, children, flow_ids}``. ``flow_ids``
145
+ is populated on file leaves with the ids of flows whose ``location.path`` is that
146
+ file; directories always carry ``[]``. A flow whose file is missing from ``files``
147
+ still gets a leaf so no flow is dropped from the tree. Children are ordered
148
+ deterministically: directories before files, each group sorted by name.
149
+
150
+ Test flows are excluded (the old left rail hid them via ``!flow.metadata.test``);
151
+ a file with only test flows is dropped entirely, and a directory that ends up
152
+ with no surviving descendants is dropped too, so counts are not inflated.
153
+ """
154
+ # Only non-test flows are eligible. Map each file path to the surviving flow ids.
155
+ non_test = [flow for flow in flows if not _is_test_flow(flow)]
156
+ by_id = {flow.id: flow for flow in non_test}
157
+
158
+ flows_for_path: dict[str, list[str]] = {}
159
+ seen_for_path: dict[str, set[str]] = {}
160
+ for record in files:
161
+ # Keep the file's flow ids, but only the non-test ones.
162
+ seen = seen_for_path.setdefault(record.path, set())
163
+ kept: list[str] = []
164
+ for flow_id in record.flow_ids:
165
+ if flow_id in by_id and flow_id not in seen:
166
+ kept.append(flow_id)
167
+ seen.add(flow_id)
168
+ if kept:
169
+ flows_for_path.setdefault(record.path, []).extend(kept)
170
+ for flow in non_test:
171
+ path = flow.location.path
172
+ ids = flows_for_path.setdefault(path, [])
173
+ seen = seen_for_path.setdefault(path, set())
174
+ if flow.id not in seen:
175
+ ids.append(flow.id)
176
+ seen.add(flow.id)
177
+
178
+ root = _new_node("", "", "dir")
179
+ children_index: dict[str, dict[str, dict[str, Any]]] = {"": {}}
180
+ for path in flows_for_path:
181
+ _insert_path(root, path, flows_for_path[path], children_index)
182
+ _prune_empty(root)
183
+ _sort_children(root)
184
+ return root
185
+
186
+
187
+ def build_language_index(flows: list[Flow]) -> list[str]:
188
+ """Sorted list of distinct ``flow.language`` across non-test flows.
189
+
190
+ Powers the viewer's language dropdown for polyglot repos. Test flows are excluded
191
+ so a language that only appears in tests does not surface a filter option for it.
192
+ """
193
+ languages = {flow.language for flow in flows if not _is_test_flow(flow) and flow.language}
194
+ return sorted(languages)
195
+
196
+
197
+ def build_scope_index(flows: list[Flow]) -> dict[str, list[str]]:
198
+ """Group flow ids by scope.
199
+
200
+ Uses ``flow.metadata["scope"]`` (a list) when present; otherwise infers the
201
+ scope as the top-level directory segment of ``flow.location.path`` (so it works
202
+ with no ``[codedebrief.scopes]`` declared). Never hard-codes scope names.
203
+
204
+ Test flows are excluded with the same predicate ``build_tree`` /
205
+ ``build_language_index`` / ``build_scope_edges`` use, so L0 scope counts and the
206
+ L1 nodes agree with the directory tree's non-test universe. A scope that would
207
+ contain only test flows (e.g. an inferred ``tests`` scope) is dropped entirely,
208
+ rather than surfacing a super-node the tree hides.
209
+ """
210
+ index: dict[str, list[str]] = {}
211
+ for flow in flows:
212
+ if _is_test_flow(flow):
213
+ continue
214
+ scopes = metadata_scope_names(flow.metadata)
215
+ if not scopes:
216
+ scopes = [_top_level_segment(flow.location.path)]
217
+ for scope in scopes:
218
+ index.setdefault(scope, []).append(flow.id)
219
+ return index
220
+
221
+
222
+ def build_scope_edges(flows: list[Flow], scope_index: dict[str, list[str]]) -> list[dict[str, Any]]:
223
+ """Aggregate cross-scope calls into ``[{from, to, count}]`` edges.
224
+
225
+ For each non-test flow ``f`` and each resolved call target ``t`` (a flow id in the
226
+ model), attribute the call to *every* (srcScope, dstScope) pair drawn from ``f``'s
227
+ and ``t``'s scope memberships. A flow may belong to several scopes
228
+ (``metadata["scope"]`` is a list), so its cross-scope calls are double-counted under
229
+ each membership -- the documented convention that matches ``build_scope_index``,
230
+ which already places a flow under every listed scope. Same-scope pairs (and calls
231
+ to unresolved/external ids not in the model) are dropped: L0 shows only cross-scope
232
+ structure. Keeping these edges in the payload makes the viewer deterministic and
233
+ avoids deriving cross-scope topology in the browser.
234
+ """
235
+ by_id = {flow.id: flow for flow in flows}
236
+ # flow id -> its scope memberships, recomputed the same way build_scope_index does.
237
+ flow_scopes: dict[str, list[str]] = {}
238
+ for scope, ids in scope_index.items():
239
+ for flow_id in ids:
240
+ flow_scopes.setdefault(flow_id, []).append(scope)
241
+
242
+ counts: dict[tuple[str, str], int] = {}
243
+ for flow in flows:
244
+ if _is_test_flow(flow):
245
+ continue
246
+ src_scopes = flow_scopes.get(flow.id, [])
247
+ if not src_scopes:
248
+ continue
249
+ for target in flow.calls:
250
+ target_flow = by_id.get(target)
251
+ # Mirror renderFlow's `if (!start || !end) return;` -- skip unresolved or
252
+ # external call targets, and never count a call into a test flow.
253
+ if target_flow is None or _is_test_flow(target_flow):
254
+ continue
255
+ dst_scopes = flow_scopes.get(target, [])
256
+ for src in src_scopes:
257
+ for dst in dst_scopes:
258
+ if src != dst:
259
+ counts[(src, dst)] = counts.get((src, dst), 0) + 1
260
+
261
+ return [
262
+ {"from": src, "to": dst, "count": count} for (src, dst), count in sorted(counts.items())
263
+ ]
264
+
265
+
266
+ def _top_level_segment(path: str) -> str:
267
+ """The first path segment, or the file's own name for a root-level file."""
268
+ parts = [part for part in path.split("/") if part]
269
+ return parts[0] if parts else path
270
+
271
+
272
+ def _new_node(name: str, path: str, node_type: str) -> dict[str, Any]:
273
+ return {"name": name, "path": path, "type": node_type, "children": [], "flow_ids": []}
274
+
275
+
276
+ def _insert_path(
277
+ root: dict[str, Any],
278
+ path: str,
279
+ flow_ids: list[str],
280
+ children_index: dict[str, dict[str, dict[str, Any]]],
281
+ ) -> None:
282
+ segments = [part for part in path.split("/") if part]
283
+ if not segments:
284
+ return
285
+ node = root
286
+ prefix = ""
287
+ parent_path = ""
288
+ for index, segment in enumerate(segments):
289
+ prefix = f"{prefix}/{segment}" if prefix else segment
290
+ is_leaf = index == len(segments) - 1
291
+ siblings = children_index.setdefault(parent_path, {})
292
+ child = siblings.get(segment)
293
+ if child is None:
294
+ child = _new_node(segment, prefix, "file" if is_leaf else "dir")
295
+ node["children"].append(child)
296
+ siblings[segment] = child
297
+ children_index.setdefault(prefix, {})
298
+ node = child
299
+ parent_path = prefix
300
+ # `node` is now the leaf; attach flow ids without duplicating.
301
+ for flow_id in flow_ids:
302
+ if flow_id not in node["flow_ids"]:
303
+ node["flow_ids"].append(flow_id)
304
+
305
+
306
+ def _prune_empty(node: dict[str, Any]) -> None:
307
+ """Drop file leaves with no flow ids and directories with no surviving descendants.
308
+
309
+ Recurses depth-first so a directory whose children all get pruned is itself dropped.
310
+ The root is never removed by this (callers keep it), only its empty subtrees.
311
+ """
312
+ kept: list[dict[str, Any]] = []
313
+ for child in node["children"]:
314
+ if child["type"] == "dir":
315
+ _prune_empty(child)
316
+ if child["children"]:
317
+ kept.append(child)
318
+ elif child["flow_ids"]:
319
+ kept.append(child)
320
+ node["children"] = kept
321
+
322
+
323
+ def _sort_children(node: dict[str, Any]) -> None:
324
+ node["children"].sort(key=lambda c: (c["type"] != "dir", c["name"]))
325
+ for child in node["children"]:
326
+ _sort_children(child)