codedebrief 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. codedebrief/__init__.py +12 -0
  2. codedebrief/analysis/__init__.py +16 -0
  3. codedebrief/analysis/common.py +527 -0
  4. codedebrief/analysis/discovery.py +100 -0
  5. codedebrief/analysis/languages/__init__.py +6 -0
  6. codedebrief/analysis/languages/_common.py +68 -0
  7. codedebrief/analysis/languages/c.py +96 -0
  8. codedebrief/analysis/languages/cpp.py +146 -0
  9. codedebrief/analysis/languages/csharp.py +137 -0
  10. codedebrief/analysis/languages/go.py +157 -0
  11. codedebrief/analysis/languages/java.py +158 -0
  12. codedebrief/analysis/languages/php.py +83 -0
  13. codedebrief/analysis/languages/ruby.py +75 -0
  14. codedebrief/analysis/languages/rust.py +96 -0
  15. codedebrief/analysis/project.py +373 -0
  16. codedebrief/analysis/python.py +939 -0
  17. codedebrief/analysis/registry.py +320 -0
  18. codedebrief/analysis/treesitter.py +884 -0
  19. codedebrief/analysis/typescript.py +1019 -0
  20. codedebrief/artifacts.py +49 -0
  21. codedebrief/cli.py +585 -0
  22. codedebrief/config.py +226 -0
  23. codedebrief/doctor.py +175 -0
  24. codedebrief/install.py +441 -0
  25. codedebrief/mcp_server.py +2720 -0
  26. codedebrief/model.py +189 -0
  27. codedebrief/py.typed +1 -0
  28. codedebrief/quality.py +392 -0
  29. codedebrief/query.py +641 -0
  30. codedebrief/render/__init__.py +6 -0
  31. codedebrief/render/assets/generated/codedebrief-viewer-runtime.iife.js +10 -0
  32. codedebrief/render/assets/panels.js +462 -0
  33. codedebrief/render/assets/shell.js +1649 -0
  34. codedebrief/render/assets/styles.css +1715 -0
  35. codedebrief/render/assets/tree.js +616 -0
  36. codedebrief/render/html.py +191 -0
  37. codedebrief/render/markdown.py +153 -0
  38. codedebrief/render/payload.py +326 -0
  39. codedebrief/render/snapshot.py +769 -0
  40. codedebrief/schema/codedebrief.schema.json +449 -0
  41. codedebrief/util.py +65 -0
  42. codedebrief/validation.py +214 -0
  43. codedebrief-0.11.0.dist-info/METADATA +426 -0
  44. codedebrief-0.11.0.dist-info/RECORD +48 -0
  45. codedebrief-0.11.0.dist-info/WHEEL +4 -0
  46. codedebrief-0.11.0.dist-info/entry_points.txt +2 -0
  47. codedebrief-0.11.0.dist-info/licenses/LICENSE +176 -0
  48. codedebrief-0.11.0.dist-info/licenses/NOTICE +9 -0
codedebrief/model.py ADDED
@@ -0,0 +1,189 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict, dataclass, field
4
+ from datetime import datetime, timezone
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+
10
+ class Evidence(str, Enum):
11
+ VERIFIED = "VERIFIED"
12
+ INFERRED = "INFERRED"
13
+ POTENTIAL_GAP = "POTENTIAL_GAP"
14
+
15
+
16
+ class NodeKind(str, Enum):
17
+ ENTRY = "entry"
18
+ ACTION = "action"
19
+ DECISION = "decision"
20
+ CALL = "call"
21
+ TERMINAL = "terminal"
22
+ ERROR = "error"
23
+
24
+
25
+ @dataclass(slots=True)
26
+ class SourceLocation:
27
+ path: str
28
+ start_line: int
29
+ end_line: int
30
+
31
+
32
+ @dataclass(slots=True)
33
+ class FlowNode:
34
+ id: str
35
+ kind: NodeKind
36
+ label: str
37
+ location: SourceLocation
38
+ evidence: Evidence = Evidence.VERIFIED
39
+ detail: str = ""
40
+ metadata: dict[str, Any] = field(default_factory=dict)
41
+
42
+
43
+ @dataclass(slots=True)
44
+ class FlowEdge:
45
+ id: str
46
+ source: str
47
+ target: str
48
+ label: str = ""
49
+ evidence: Evidence = Evidence.VERIFIED
50
+
51
+
52
+ @dataclass(slots=True)
53
+ class Flow:
54
+ id: str
55
+ name: str
56
+ symbol: str
57
+ language: str
58
+ framework: str
59
+ entry_kind: str
60
+ is_entrypoint: bool
61
+ location: SourceLocation
62
+ nodes: list[FlowNode] = field(default_factory=list)
63
+ edges: list[FlowEdge] = field(default_factory=list)
64
+ calls: list[str] = field(default_factory=list)
65
+ called_by: list[str] = field(default_factory=list)
66
+ tests: list[str] = field(default_factory=list)
67
+ metadata: dict[str, Any] = field(default_factory=dict)
68
+
69
+
70
+ @dataclass(slots=True)
71
+ class FileRecord:
72
+ path: str
73
+ language: str
74
+ sha256: str
75
+ flow_ids: list[str] = field(default_factory=list)
76
+ dependencies: list[str] = field(default_factory=list)
77
+
78
+
79
+ @dataclass(slots=True)
80
+ class FileAnalysis:
81
+ path: str
82
+ language: str
83
+ sha256: str
84
+ flows: list[Flow] = field(default_factory=list)
85
+ enums: dict[str, list[str]] = field(default_factory=dict)
86
+ constants: dict[str, bool] = field(default_factory=dict)
87
+ dependencies: list[str] = field(default_factory=list)
88
+
89
+ def to_dict(self) -> dict[str, Any]:
90
+ return asdict(self)
91
+
92
+ @classmethod
93
+ def from_dict(cls, data: dict[str, Any]) -> FileAnalysis:
94
+ return cls(
95
+ path=data["path"],
96
+ language=data["language"],
97
+ sha256=data["sha256"],
98
+ flows=[_flow_from_dict(item) for item in data.get("flows", [])],
99
+ enums=data.get("enums", {}),
100
+ constants=data.get("constants", {}),
101
+ dependencies=data.get("dependencies", []),
102
+ )
103
+
104
+
105
+ @dataclass(slots=True)
106
+ class ProjectModel:
107
+ schema_version: str
108
+ generated_at: str
109
+ root: str
110
+ flows: list[Flow] = field(default_factory=list)
111
+ files: list[FileRecord] = field(default_factory=list)
112
+ metadata: dict[str, Any] = field(default_factory=dict)
113
+
114
+ @classmethod
115
+ def empty(cls, root: Path) -> ProjectModel:
116
+ return cls(
117
+ schema_version="2.0",
118
+ generated_at=datetime.now(timezone.utc).isoformat(),
119
+ root=str(root.resolve()),
120
+ )
121
+
122
+ def to_dict(self) -> dict[str, Any]:
123
+ return asdict(self)
124
+
125
+ @classmethod
126
+ def from_dict(cls, data: dict[str, Any]) -> ProjectModel:
127
+ # Loading a committed `codedebrief.json` deserializes untrusted JSON, so a malformed
128
+ # shape must surface as a clean ValueError, not a raw KeyError / TypeError traceback
129
+ # leaking to the CLI or the MCP transport.
130
+ if not isinstance(data, dict):
131
+ raise ValueError("malformed codedebrief.json: expected a JSON object at the top level")
132
+ try:
133
+ flows = [_flow_from_dict(item) for item in data.get("flows", [])]
134
+ files = [FileRecord(**item) for item in data.get("files", [])]
135
+ return cls(
136
+ schema_version=data["schema_version"],
137
+ generated_at=data["generated_at"],
138
+ root=data["root"],
139
+ flows=flows,
140
+ files=files,
141
+ metadata=data.get("metadata", {}),
142
+ )
143
+ except (KeyError, TypeError, ValueError) as error:
144
+ raise ValueError(f"malformed codedebrief.json: {error}") from error
145
+
146
+
147
+ def _location_from_dict(data: dict[str, Any]) -> SourceLocation:
148
+ return SourceLocation(**data)
149
+
150
+
151
+ def _node_from_dict(data: dict[str, Any]) -> FlowNode:
152
+ return FlowNode(
153
+ id=data["id"],
154
+ kind=NodeKind(data["kind"]),
155
+ label=data["label"],
156
+ location=_location_from_dict(data["location"]),
157
+ evidence=Evidence(data.get("evidence", Evidence.VERIFIED.value)),
158
+ detail=data.get("detail", ""),
159
+ metadata=data.get("metadata", {}),
160
+ )
161
+
162
+
163
+ def _edge_from_dict(data: dict[str, Any]) -> FlowEdge:
164
+ return FlowEdge(
165
+ id=data["id"],
166
+ source=data["source"],
167
+ target=data["target"],
168
+ label=data.get("label", ""),
169
+ evidence=Evidence(data.get("evidence", Evidence.VERIFIED.value)),
170
+ )
171
+
172
+
173
+ def _flow_from_dict(data: dict[str, Any]) -> Flow:
174
+ return Flow(
175
+ id=data["id"],
176
+ name=data["name"],
177
+ symbol=data["symbol"],
178
+ language=data["language"],
179
+ framework=data.get("framework", "generic"),
180
+ entry_kind=data.get("entry_kind", "function"),
181
+ is_entrypoint=data.get("is_entrypoint", False),
182
+ location=_location_from_dict(data["location"]),
183
+ nodes=[_node_from_dict(item) for item in data.get("nodes", [])],
184
+ edges=[_edge_from_dict(item) for item in data.get("edges", [])],
185
+ calls=data.get("calls", []),
186
+ called_by=data.get("called_by", []),
187
+ tests=data.get("tests", []),
188
+ metadata=data.get("metadata", {}),
189
+ )
codedebrief/py.typed ADDED
@@ -0,0 +1 @@
1
+
codedebrief/quality.py ADDED
@@ -0,0 +1,392 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from typing import Any
5
+
6
+ from codedebrief.model import Flow, FlowNode, NodeKind, ProjectModel
7
+
8
+ GENERIC_LABELS = {
9
+ "call",
10
+ "return",
11
+ "raise",
12
+ "action",
13
+ "branch",
14
+ "condition",
15
+ "unknown",
16
+ }
17
+ LOW_CONFIDENCE = {"low", "none"}
18
+ HUGE_FLOW_NODE_THRESHOLD = 60
19
+ DENSE_EDGE_RATIO_THRESHOLD = 2.6
20
+
21
+
22
+ def model_quality(model: ProjectModel) -> dict[str, Any]:
23
+ """Deterministic analyzer-quality metrics derived from one persisted model."""
24
+ non_test_flows = [flow for flow in model.flows if not flow.metadata.get("test")]
25
+ call_nodes = [node for flow in model.flows for node in flow.nodes if node.kind is NodeKind.CALL]
26
+ resolved = [node for node in call_nodes if node.metadata.get("target_flow")]
27
+ ambiguous = [node for node in call_nodes if len(node.metadata.get("call_candidates", [])) > 1]
28
+ unresolved = [
29
+ node
30
+ for node in call_nodes
31
+ if not node.metadata.get("target_flow") and not node.metadata.get("call_candidates")
32
+ ]
33
+ low_confidence = [
34
+ node
35
+ for node in call_nodes
36
+ if str(node.metadata.get("link_confidence", "")).lower() in LOW_CONFIDENCE
37
+ ]
38
+ node_count = sum(len(flow.nodes) for flow in model.flows)
39
+ edge_count = sum(len(flow.edges) for flow in model.flows)
40
+ generic_labels = _generic_label_nodes(model.flows)
41
+ source_locations = _source_location_nodes(model.flows)
42
+ skipped_files = _skipped_files(model)
43
+ parse_error_files = _parse_error_files(model.flows)
44
+ huge_flows = [
45
+ {
46
+ "flow_id": flow.id,
47
+ "name": flow.name,
48
+ "nodes": len(flow.nodes),
49
+ "source": f"{flow.location.path}:{flow.location.start_line}",
50
+ }
51
+ for flow in non_test_flows
52
+ if len(flow.nodes) >= HUGE_FLOW_NODE_THRESHOLD
53
+ ]
54
+ edge_ratio = round(edge_count / node_count, 2) if node_count else 0.0
55
+ return {
56
+ "files": {
57
+ "total": len(model.files),
58
+ "by_language": dict(Counter(record.language for record in model.files)),
59
+ "empty": sum(1 for record in model.files if not record.flow_ids),
60
+ "skipped": {
61
+ "total": len(skipped_files),
62
+ "by_reason": dict(Counter(item["reason"] for item in skipped_files)),
63
+ "sample": skipped_files[:20],
64
+ },
65
+ "parse_errors": {
66
+ "total": len(parse_error_files),
67
+ "by_language": dict(Counter(item["language"] for item in parse_error_files)),
68
+ "sample": parse_error_files[:20],
69
+ },
70
+ },
71
+ "flows": {
72
+ "total": len(model.flows),
73
+ "non_test": len(non_test_flows),
74
+ "entrypoints": sum(flow.is_entrypoint for flow in non_test_flows),
75
+ "by_language": dict(Counter(flow.language for flow in non_test_flows)),
76
+ "by_entry_kind": dict(Counter(flow.entry_kind for flow in non_test_flows)),
77
+ "per_file": _flow_distribution(model.flows),
78
+ "huge": huge_flows[:20],
79
+ },
80
+ "calls": {
81
+ "total": len(call_nodes),
82
+ "resolved": len(resolved),
83
+ "unresolved": len(unresolved),
84
+ "ambiguous": len(ambiguous),
85
+ "low_confidence": len(low_confidence),
86
+ "resolution_rate": _ratio(len(resolved), len(call_nodes)),
87
+ },
88
+ "languages": _language_depth(
89
+ model,
90
+ non_test_flows=non_test_flows,
91
+ resolved_calls=resolved,
92
+ unresolved_calls=unresolved,
93
+ generic_labels=generic_labels,
94
+ skipped_files=skipped_files,
95
+ parse_error_files=parse_error_files,
96
+ ),
97
+ "labels": {
98
+ "generic_nodes": len(generic_labels),
99
+ "generic_ratio": _ratio(len(generic_labels), node_count),
100
+ "sample": generic_labels[:20],
101
+ },
102
+ "source_locations": {
103
+ "nodes_with_source": len(source_locations),
104
+ "coverage": _ratio(len(source_locations), node_count),
105
+ },
106
+ "graph": {
107
+ "nodes": node_count,
108
+ "edges": edge_count,
109
+ "edge_to_node_ratio": edge_ratio,
110
+ "dense_graph_warning": edge_ratio >= DENSE_EDGE_RATIO_THRESHOLD,
111
+ },
112
+ }
113
+
114
+
115
+ def render_quality(quality: dict[str, Any]) -> str:
116
+ files = quality["files"]
117
+ flows = quality["flows"]
118
+ calls = quality["calls"]
119
+ labels = quality["labels"]
120
+ source = quality["source_locations"]
121
+ graph = quality["graph"]
122
+ languages = quality.get("languages", {})
123
+ language_depth = languages.get("depth", {}) if isinstance(languages, dict) else {}
124
+ attention = languages.get("attention", []) if isinstance(languages, dict) else []
125
+ lines = [
126
+ "Analysis quality:",
127
+ f"- Files: {files['total']} ({_format_counts(files['by_language'])})",
128
+ f"- Skipped files: {files['skipped']['total']}",
129
+ f"- Parse warnings: {files.get('parse_errors', {}).get('total', 0)}",
130
+ f"- Flows: {flows['total']} total, {flows['entrypoints']} entrypoints "
131
+ f"({_format_counts(flows['by_language'])})",
132
+ f"- Calls: {calls['resolved']}/{calls['total']} resolved "
133
+ f"({calls['resolution_rate']:.0%}); {calls['unresolved']} unresolved, "
134
+ f"{calls['ambiguous']} ambiguous, {calls['low_confidence']} low-confidence",
135
+ f"- Labels: {labels['generic_nodes']} generic nodes ({labels['generic_ratio']:.0%})",
136
+ f"- Source coverage: {source['nodes_with_source']} nodes ({source['coverage']:.0%})",
137
+ f"- Graph density: {graph['edges']} edges / {graph['nodes']} nodes "
138
+ f"({graph['edge_to_node_ratio']})",
139
+ ]
140
+ if language_depth:
141
+ lines.append(f"- Language depth: {len(language_depth)} observed language(s)")
142
+ lines.extend(
143
+ _format_language_depth(language, metrics)
144
+ for language, metrics in sorted(language_depth.items())[:5]
145
+ )
146
+ if attention:
147
+ lines.append("- Language attention signals:")
148
+ lines.extend(
149
+ f" - {item['language']}: {', '.join(item['signals'])}" for item in attention[:5]
150
+ )
151
+ if flows["huge"]:
152
+ lines.append("- Huge flows:")
153
+ lines.extend(
154
+ f" - {item['name']} ({item['nodes']} nodes, {item['source']})"
155
+ for item in flows["huge"][:5]
156
+ )
157
+ if files["skipped"]["sample"]:
158
+ lines.append("- Skipped file samples:")
159
+ lines.extend(
160
+ f" - {item['path']} ({item['reason']})" for item in files["skipped"]["sample"][:5]
161
+ )
162
+ parse_errors = files.get("parse_errors", {})
163
+ if isinstance(parse_errors, dict) and parse_errors.get("sample"):
164
+ lines.append("- Parse warning samples:")
165
+ lines.extend(
166
+ f" - {item['path']}:{item['line']} ({item['reason']})"
167
+ for item in parse_errors["sample"][:5]
168
+ )
169
+ if labels["sample"]:
170
+ lines.append("- Generic label samples:")
171
+ lines.extend(f" - {item['label']} ({item['source']})" for item in labels["sample"][:5])
172
+ if graph["dense_graph_warning"]:
173
+ lines.append("- Warning: graph edge density is high; inspect layout and call resolution.")
174
+ return "\n".join(lines)
175
+
176
+
177
+ def _flow_distribution(flows: list[Flow]) -> dict[str, Any]:
178
+ counts = Counter(flow.location.path for flow in flows)
179
+ values = sorted(counts.values())
180
+ if not values:
181
+ return {"min": 0, "max": 0, "avg": 0.0}
182
+ return {
183
+ "min": values[0],
184
+ "max": values[-1],
185
+ "avg": round(sum(values) / len(values), 2),
186
+ }
187
+
188
+
189
+ def _language_depth(
190
+ model: ProjectModel,
191
+ *,
192
+ non_test_flows: list[Flow],
193
+ resolved_calls: list[FlowNode],
194
+ unresolved_calls: list[FlowNode],
195
+ generic_labels: list[dict[str, Any]],
196
+ skipped_files: list[dict[str, str]],
197
+ parse_error_files: list[dict[str, Any]],
198
+ ) -> dict[str, Any]:
199
+ file_counts = Counter(record.language for record in model.files)
200
+ files_with_flows = Counter(
201
+ record.language for record in model.files if getattr(record, "flow_ids", [])
202
+ )
203
+ flow_counts = Counter(flow.language for flow in non_test_flows)
204
+ entrypoint_counts = Counter(flow.language for flow in non_test_flows if flow.is_entrypoint)
205
+ decision_counts = Counter(
206
+ flow.language
207
+ for flow in non_test_flows
208
+ for node in flow.nodes
209
+ if node.kind is NodeKind.DECISION
210
+ )
211
+ resolved_ids = {id(node) for node in resolved_calls}
212
+ unresolved_ids = {id(node) for node in unresolved_calls}
213
+ call_counts: Counter[str] = Counter()
214
+ resolved_counts: Counter[str] = Counter()
215
+ unresolved_counts: Counter[str] = Counter()
216
+ source_counts: Counter[str] = Counter()
217
+ for flow in non_test_flows:
218
+ for node in flow.nodes:
219
+ if node.kind is NodeKind.CALL:
220
+ call_counts[flow.language] += 1
221
+ if id(node) in resolved_ids:
222
+ resolved_counts[flow.language] += 1
223
+ if id(node) in unresolved_ids:
224
+ unresolved_counts[flow.language] += 1
225
+ if node.location.path and node.location.start_line > 0 and node.location.end_line > 0:
226
+ source_counts[flow.language] += 1
227
+ generic_counts = Counter(_sample_language(item) for item in generic_labels)
228
+ node_counts = Counter(flow.language for flow in non_test_flows for _node in flow.nodes)
229
+ skipped_counts = Counter(item.get("language", "") for item in skipped_files)
230
+ parse_error_counts = Counter(item.get("language", "") for item in parse_error_files)
231
+ capabilities = model.metadata.get("language_capabilities", {})
232
+ languages = sorted(
233
+ {
234
+ *file_counts.keys(),
235
+ *flow_counts.keys(),
236
+ *skipped_counts.keys(),
237
+ *parse_error_counts.keys(),
238
+ }
239
+ - {""}
240
+ )
241
+ depth: dict[str, dict[str, Any]] = {}
242
+ attention: list[dict[str, Any]] = []
243
+ for language in languages:
244
+ files = file_counts[language]
245
+ flows = flow_counts[language]
246
+ calls = call_counts[language]
247
+ resolved = resolved_counts[language]
248
+ skipped = skipped_counts[language]
249
+ parse_errors = parse_error_counts[language]
250
+ nodes = node_counts[language]
251
+ metrics = {
252
+ "files": files,
253
+ "files_with_flows": files_with_flows[language],
254
+ "flow_file_coverage": _ratio(files_with_flows[language], files),
255
+ "flows": flows,
256
+ "entrypoints": entrypoint_counts[language],
257
+ "decisions": decision_counts[language],
258
+ "calls": calls,
259
+ "resolved_calls": resolved,
260
+ "unresolved_calls": unresolved_counts[language],
261
+ "call_resolution_rate": _ratio(resolved, calls),
262
+ "generic_nodes": generic_counts[language],
263
+ "generic_ratio": _ratio(generic_counts[language], nodes),
264
+ "source_coverage": _ratio(source_counts[language], nodes),
265
+ "skipped_files": skipped,
266
+ "parse_error_files": parse_errors,
267
+ "capability": capabilities.get(language, {}),
268
+ }
269
+ signals = _language_attention_signals(metrics)
270
+ if signals:
271
+ attention.append({"language": language, "signals": signals})
272
+ depth[language] = metrics
273
+ return {"depth": depth, "attention": attention}
274
+
275
+
276
+ def _language_attention_signals(metrics: dict[str, Any]) -> list[str]:
277
+ signals = []
278
+ if metrics["skipped_files"]:
279
+ signals.append("skipped_files")
280
+ if metrics.get("parse_error_files"):
281
+ signals.append("parse_errors")
282
+ if metrics["files"] and not metrics["files_with_flows"]:
283
+ signals.append("no_flow_files")
284
+ if metrics["calls"] and metrics["call_resolution_rate"] < 0.5:
285
+ signals.append("low_call_resolution")
286
+ if metrics["generic_ratio"] >= 0.2:
287
+ signals.append("generic_labels")
288
+ if metrics["flows"] and metrics["source_coverage"] < 0.9:
289
+ signals.append("low_source_coverage")
290
+ return signals
291
+
292
+
293
+ def _sample_language(item: dict[str, Any]) -> str:
294
+ return str(item.get("language", ""))
295
+
296
+
297
+ def _format_language_depth(language: str, metrics: dict[str, Any]) -> str:
298
+ return (
299
+ f" - {language}: {metrics['files']} files, {metrics['flows']} flows, "
300
+ f"{metrics['decisions']} decisions, {metrics['resolved_calls']}/{metrics['calls']} "
301
+ "calls resolved"
302
+ )
303
+
304
+
305
+ def _generic_label_nodes(flows: list[Flow]) -> list[dict[str, Any]]:
306
+ rows: list[dict[str, Any]] = []
307
+ for flow in flows:
308
+ for node in flow.nodes:
309
+ if not _generic_label(node):
310
+ continue
311
+ rows.append(
312
+ {
313
+ "flow_id": flow.id,
314
+ "node_id": node.id,
315
+ "label": node.label,
316
+ "language": flow.language,
317
+ "source": f"{node.location.path}:{node.location.start_line}",
318
+ }
319
+ )
320
+ return rows
321
+
322
+
323
+ def _generic_label(node: FlowNode) -> bool:
324
+ label = " ".join(node.label.lower().split())
325
+ if label in GENERIC_LABELS:
326
+ return True
327
+ if node.kind is NodeKind.CALL and label.startswith("call "):
328
+ return len(label.split()) <= 2
329
+ return node.kind is NodeKind.ACTION and label in {"do work", "handle", "process"}
330
+
331
+
332
+ def _source_location_nodes(flows: list[Flow]) -> list[FlowNode]:
333
+ return [
334
+ node
335
+ for flow in flows
336
+ for node in flow.nodes
337
+ if node.location.path and node.location.start_line > 0 and node.location.end_line > 0
338
+ ]
339
+
340
+
341
+ def _parse_error_files(flows: list[Flow]) -> list[dict[str, Any]]:
342
+ by_path: dict[str, dict[str, Any]] = {}
343
+ for flow in flows:
344
+ parse_error = flow.metadata.get("parse_error")
345
+ if not isinstance(parse_error, dict):
346
+ continue
347
+ path = str(parse_error.get("path") or flow.location.path)
348
+ if not path:
349
+ continue
350
+ by_path.setdefault(
351
+ path,
352
+ {
353
+ "path": path,
354
+ "language": str(parse_error.get("language") or flow.language),
355
+ "line": int(parse_error.get("line") or flow.location.start_line),
356
+ "kind": str(parse_error.get("kind") or "ERROR"),
357
+ "reason": str(parse_error.get("reason") or "tree-sitter parse warning"),
358
+ },
359
+ )
360
+ return [by_path[path] for path in sorted(by_path)]
361
+
362
+
363
+ def _skipped_files(model: ProjectModel) -> list[dict[str, str]]:
364
+ rows = model.metadata.get("skipped_files", [])
365
+ if not isinstance(rows, list):
366
+ return []
367
+ normalized = []
368
+ for item in rows:
369
+ if not isinstance(item, dict):
370
+ continue
371
+ path = item.get("path")
372
+ reason = item.get("reason")
373
+ language = item.get("language")
374
+ if isinstance(path, str) and isinstance(reason, str):
375
+ normalized.append(
376
+ {
377
+ "path": path,
378
+ "language": language if isinstance(language, str) else "",
379
+ "reason": reason,
380
+ }
381
+ )
382
+ return normalized
383
+
384
+
385
+ def _ratio(numerator: int, denominator: int) -> float:
386
+ return round(numerator / denominator, 4) if denominator else 0.0
387
+
388
+
389
+ def _format_counts(counts: dict[str, int]) -> str:
390
+ if not counts:
391
+ return "none"
392
+ return ", ".join(f"{key}={value}" for key, value in sorted(counts.items()))