bareagent-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. bareagent/__init__.py +10 -0
  2. bareagent/concurrency/__init__.py +6 -0
  3. bareagent/concurrency/background.py +97 -0
  4. bareagent/concurrency/notification.py +61 -0
  5. bareagent/concurrency/scheduler.py +136 -0
  6. bareagent/config.toml +299 -0
  7. bareagent/core/__init__.py +1 -0
  8. bareagent/core/config_paths.py +49 -0
  9. bareagent/core/context.py +127 -0
  10. bareagent/core/fileutil.py +103 -0
  11. bareagent/core/goal.py +214 -0
  12. bareagent/core/handlers/__init__.py +1 -0
  13. bareagent/core/handlers/bash.py +79 -0
  14. bareagent/core/handlers/file_edit.py +47 -0
  15. bareagent/core/handlers/file_read.py +270 -0
  16. bareagent/core/handlers/file_write.py +34 -0
  17. bareagent/core/handlers/glob_search.py +30 -0
  18. bareagent/core/handlers/goal.py +60 -0
  19. bareagent/core/handlers/grep_search.py +52 -0
  20. bareagent/core/handlers/memory.py +71 -0
  21. bareagent/core/handlers/plan.py +106 -0
  22. bareagent/core/handlers/search_utils.py +77 -0
  23. bareagent/core/handlers/skill.py +87 -0
  24. bareagent/core/handlers/subagent_send.py +70 -0
  25. bareagent/core/handlers/web_fetch.py +126 -0
  26. bareagent/core/handlers/web_search.py +165 -0
  27. bareagent/core/handlers/workflow.py +190 -0
  28. bareagent/core/loop.py +535 -0
  29. bareagent/core/retry.py +131 -0
  30. bareagent/core/sandbox.py +27 -0
  31. bareagent/core/schema.py +21 -0
  32. bareagent/core/tools.py +779 -0
  33. bareagent/core/workflow.py +517 -0
  34. bareagent/core/workflow_registry.py +219 -0
  35. bareagent/debug/__init__.py +0 -0
  36. bareagent/debug/interaction_log.py +263 -0
  37. bareagent/debug/viewer.html +1750 -0
  38. bareagent/debug/web_viewer.py +157 -0
  39. bareagent/hooks/__init__.py +32 -0
  40. bareagent/hooks/config.py +118 -0
  41. bareagent/hooks/engine.py +197 -0
  42. bareagent/hooks/errors.py +14 -0
  43. bareagent/hooks/events.py +22 -0
  44. bareagent/lsp/__init__.py +63 -0
  45. bareagent/lsp/config.py +134 -0
  46. bareagent/lsp/coord.py +118 -0
  47. bareagent/lsp/diagnostics.py +240 -0
  48. bareagent/lsp/errors.py +24 -0
  49. bareagent/lsp/manager.py +866 -0
  50. bareagent/lsp/tools.py +629 -0
  51. bareagent/lsp/workspace_edit.py +305 -0
  52. bareagent/main.py +4205 -0
  53. bareagent/mcp/__init__.py +69 -0
  54. bareagent/mcp/_sse.py +69 -0
  55. bareagent/mcp/client.py +341 -0
  56. bareagent/mcp/config.py +169 -0
  57. bareagent/mcp/errors.py +32 -0
  58. bareagent/mcp/manager.py +318 -0
  59. bareagent/mcp/protocol.py +187 -0
  60. bareagent/mcp/registry.py +557 -0
  61. bareagent/mcp/transport/__init__.py +15 -0
  62. bareagent/mcp/transport/base.py +149 -0
  63. bareagent/mcp/transport/http_legacy.py +192 -0
  64. bareagent/mcp/transport/http_streamable.py +217 -0
  65. bareagent/mcp/transport/stdio.py +202 -0
  66. bareagent/memory/__init__.py +1 -0
  67. bareagent/memory/compact.py +203 -0
  68. bareagent/memory/conversation_io.py +226 -0
  69. bareagent/memory/embedding.py +194 -0
  70. bareagent/memory/persistent.py +515 -0
  71. bareagent/memory/token_counter.py +67 -0
  72. bareagent/memory/token_tracker.py +262 -0
  73. bareagent/memory/transcript.py +100 -0
  74. bareagent/permission/__init__.py +1 -0
  75. bareagent/permission/guard.py +329 -0
  76. bareagent/permission/rules.py +19 -0
  77. bareagent/planning/__init__.py +19 -0
  78. bareagent/planning/agent_types.py +169 -0
  79. bareagent/planning/skill_gen.py +141 -0
  80. bareagent/planning/skill_store.py +173 -0
  81. bareagent/planning/skills.py +146 -0
  82. bareagent/planning/subagent.py +355 -0
  83. bareagent/planning/subagent_registry.py +77 -0
  84. bareagent/planning/tasks.py +348 -0
  85. bareagent/planning/todo.py +153 -0
  86. bareagent/planning/worktree.py +122 -0
  87. bareagent/provider/__init__.py +1 -0
  88. bareagent/provider/anthropic.py +348 -0
  89. bareagent/provider/base.py +136 -0
  90. bareagent/provider/factory.py +130 -0
  91. bareagent/provider/openai.py +881 -0
  92. bareagent/provider/presets.py +72 -0
  93. bareagent/provider/setup.py +356 -0
  94. bareagent/skills/.gitkeep +1 -0
  95. bareagent/skills/code-review/SKILL.md +68 -0
  96. bareagent/skills/git/SKILL.md +68 -0
  97. bareagent/skills/test/SKILL.md +70 -0
  98. bareagent/team/__init__.py +17 -0
  99. bareagent/team/autonomous.py +193 -0
  100. bareagent/team/mailbox.py +239 -0
  101. bareagent/team/manager.py +155 -0
  102. bareagent/team/protocols.py +129 -0
  103. bareagent/tracing/__init__.py +12 -0
  104. bareagent/tracing/_api.py +92 -0
  105. bareagent/tracing/_proxy.py +60 -0
  106. bareagent/tracing/composite.py +115 -0
  107. bareagent/tracing/json_file.py +115 -0
  108. bareagent/tracing/langfuse.py +139 -0
  109. bareagent/tracing/otel.py +107 -0
  110. bareagent/tracing/setup.py +85 -0
  111. bareagent/ui/__init__.py +24 -0
  112. bareagent/ui/console.py +167 -0
  113. bareagent/ui/prompt.py +78 -0
  114. bareagent/ui/protocol.py +24 -0
  115. bareagent/ui/stream.py +66 -0
  116. bareagent/ui/theme.py +240 -0
  117. bareagent_cli-0.1.0.dist-info/METADATA +331 -0
  118. bareagent_cli-0.1.0.dist-info/RECORD +121 -0
  119. bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
  120. bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
  121. bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,517 @@
1
+ """Deterministic workflow orchestration: run a static DAG of subagent nodes.
2
+
3
+ Pure logic with no LLM / loop / threading / SDK dependencies, so DAG parsing,
4
+ validation (cycle detection / dangling deps / limits), the ready-set scheduler,
5
+ result threading, and summary formatting are all unit-testable with injected
6
+ callbacks (mirrors the ``src/core/goal.py`` and ``src/core/retry.py`` pure-module
7
+ pattern).
8
+
9
+ Division of labor (see task 06-06-workflow-deterministic-orchestration):
10
+ - This module owns the *control flow*: which nodes are ready to run given what
11
+ has completed, how a failed node skips its transitive dependents, how upstream
12
+ results are threaded into a downstream prompt, and how the final summary reads
13
+ (``run_workflow`` + the pure helpers).
14
+ - The REPL (``main.py``) owns the side-effecting parts: executing one node as an
15
+ isolated ``run_subagent`` call and running a batch of ready nodes concurrently
16
+ on a thread pool. These are injected into :func:`run_workflow` as the
17
+ ``execute_node`` / ``map_concurrent`` callbacks.
18
+
19
+ The LLM authors the DAG on the fly via the isolated ``workflow`` tool (declarative
20
+ nodes + ``depends_on`` edges, NOT executable code), keeping orchestration
21
+ deterministic and free of any code-``exec`` sandbox. Loops / conditionals /
22
+ dynamic fan-out are intentionally out of scope (static DAG); the model can issue
23
+ another ``workflow`` call from the main loop when it needs to branch.
24
+
25
+ Execution is *layered*: each iteration runs the whole current ready set
26
+ concurrently and waits for it before recomputing the next ready set. A node
27
+ therefore waits for all of its layer peers, not just its own dependencies -- a
28
+ known MVP simplification (a continuous as-completed scheduler is a later
29
+ extension). Failure semantics are *fail-soft* (decision (b)): a node whose
30
+ executor raises becomes ``FAILED``; its transitive dependents become ``SKIPPED``;
31
+ independent branches keep running.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import re
37
+ from collections.abc import Callable
38
+ from dataclasses import dataclass, field, replace
39
+ from enum import Enum
40
+ from typing import Any
41
+
42
+ # Default ceiling on how many nodes one workflow may declare; guards against an
43
+ # LLM emitting a pathologically large DAG that floods the thread pool. Override
44
+ # via ``[workflow] max_nodes``.
45
+ DEFAULT_MAX_NODES = 20
46
+ # Default cap on concurrently-running nodes. Conservative because each node is a
47
+ # full subagent that may itself spawn work. Override via ``[workflow]
48
+ # max_concurrency``.
49
+ DEFAULT_MAX_CONCURRENCY = 8
50
+
51
+ # Matches ``{{ node_id }}`` placeholders in a node prompt for upstream-result
52
+ # substitution. Ids are restricted to a safe identifier-ish charset.
53
+ _PLACEHOLDER = re.compile(r"\{\{\s*([A-Za-z0-9_.\-]+)\s*\}\}")
54
+
55
+ # Sentinel embedded in a node's ``error`` when it was skipped because the run hit
56
+ # its token budget. ``format_summary`` matches this to surface a budget note, so
57
+ # it must stay in sync between the writer (``run_workflow``) and the reader.
58
+ _BUDGET_EXHAUSTED_REASON = "token budget exhausted"
59
+
60
+
61
+ class WorkflowError(Exception):
62
+ """Raised when workflow input is structurally unusable (not a node list)."""
63
+
64
+
65
+ class NodeStatus(Enum):
66
+ PENDING = "pending"
67
+ DONE = "done"
68
+ FAILED = "failed"
69
+ SKIPPED = "skipped"
70
+
71
+
72
+ @dataclass(slots=True)
73
+ class WorkflowNode:
74
+ """One unit of work in the DAG: an isolated subagent task.
75
+
76
+ ``depends_on`` lists the ids whose outputs must complete (DONE) before this
77
+ node runs. ``phase`` / ``label`` are organizational metadata surfaced in the
78
+ summary; they do not affect scheduling.
79
+ """
80
+
81
+ id: str
82
+ prompt: str
83
+ agent_type: str | None = None
84
+ depends_on: list[str] = field(default_factory=list)
85
+ phase: str | None = None
86
+ label: str | None = None
87
+
88
+
89
+ @dataclass(slots=True)
90
+ class NodeResult:
91
+ """Terminal (or in-flight PENDING) state of a node after the scheduler runs.
92
+
93
+ ``reused`` marks a result carried over from a prior run by :func:`compute_resume_plan`
94
+ (the node did not execute this run, so it consumed no token budget); the
95
+ status is still ``DONE`` so downstream scheduling is unaffected.
96
+ """
97
+
98
+ id: str
99
+ status: NodeStatus
100
+ output: str = ""
101
+ error: str = ""
102
+ reused: bool = False
103
+
104
+
105
+ @dataclass(slots=True)
106
+ class WorkflowSpec:
107
+ """A parsed (not yet validated) workflow DAG."""
108
+
109
+ nodes: list[WorkflowNode]
110
+
111
+
112
+ def _coerce_node(raw: Any) -> WorkflowNode:
113
+ """Coerce one raw node mapping into a :class:`WorkflowNode` defensively.
114
+
115
+ A non-dict entry becomes an empty-id node so :func:`validate_workflow` reports
116
+ it rather than crashing here. Missing optional fields fall back to ``None`` /
117
+ ``[]``.
118
+ """
119
+ if not isinstance(raw, dict):
120
+ return WorkflowNode(id="", prompt="")
121
+
122
+ def _opt_str(key: str) -> str | None:
123
+ value = raw.get(key)
124
+ if isinstance(value, str) and value.strip():
125
+ return value.strip()
126
+ return None
127
+
128
+ depends_raw = raw.get("depends_on")
129
+ if isinstance(depends_raw, list):
130
+ depends_on = [str(d).strip() for d in depends_raw if str(d).strip()]
131
+ elif isinstance(depends_raw, str) and depends_raw.strip():
132
+ depends_on = [depends_raw.strip()]
133
+ else:
134
+ depends_on = []
135
+
136
+ return WorkflowNode(
137
+ id=str(raw.get("id", "") or "").strip(),
138
+ prompt=str(raw.get("prompt", "") or ""),
139
+ agent_type=_opt_str("agent_type"),
140
+ depends_on=depends_on,
141
+ phase=_opt_str("phase"),
142
+ label=_opt_str("label"),
143
+ )
144
+
145
+
146
+ def parse_workflow(tool_input: Any) -> WorkflowSpec:
147
+ """Parse a ``workflow`` tool input into a :class:`WorkflowSpec`.
148
+
149
+ Raises :class:`WorkflowError` only for a fundamentally unusable shape (no
150
+ ``nodes`` array). Per-node coercion is lenient; semantic problems (empty id,
151
+ dangling dep, cycle) are reported by :func:`validate_workflow`.
152
+ """
153
+ if not isinstance(tool_input, dict):
154
+ raise WorkflowError("workflow input must be an object with a 'nodes' array.")
155
+ raw_nodes = tool_input.get("nodes")
156
+ if not isinstance(raw_nodes, list):
157
+ raise WorkflowError("workflow 'nodes' must be an array.")
158
+ return WorkflowSpec(nodes=[_coerce_node(raw) for raw in raw_nodes])
159
+
160
+
161
+ def _find_cycle(nodes: list[WorkflowNode], valid_ids: set[str]) -> list[str] | None:
162
+ """Return a node-id cycle path (``a -> b -> a``) if the DAG has one, else None.
163
+
164
+ Only edges to existing ids are considered (dangling deps are reported
165
+ separately), and self-loops are ignored here (also reported separately).
166
+ """
167
+ graph: dict[str, list[str]] = {
168
+ n.id: [d for d in n.depends_on if d in valid_ids and d != n.id] for n in nodes if n.id
169
+ }
170
+ WHITE, GRAY, BLACK = 0, 1, 2
171
+ color = dict.fromkeys(graph, WHITE)
172
+ path: list[str] = []
173
+
174
+ def visit(node_id: str) -> list[str] | None:
175
+ color[node_id] = GRAY
176
+ path.append(node_id)
177
+ for dep in graph.get(node_id, []):
178
+ dep_color = color.get(dep, BLACK)
179
+ if dep_color == GRAY:
180
+ return path[path.index(dep) :] + [dep]
181
+ if dep_color == WHITE:
182
+ found = visit(dep)
183
+ if found is not None:
184
+ return found
185
+ path.pop()
186
+ color[node_id] = BLACK
187
+ return None
188
+
189
+ for node_id in graph:
190
+ if color[node_id] == WHITE:
191
+ found = visit(node_id)
192
+ if found is not None:
193
+ return found
194
+ return None
195
+
196
+
197
+ def validate_workflow(spec: WorkflowSpec, *, max_nodes: int = DEFAULT_MAX_NODES) -> list[str]:
198
+ """Return a list of human-readable validation errors (empty == valid).
199
+
200
+ Checks: at least one node, node count within ``max_nodes``, non-empty unique
201
+ ids, non-empty prompts, no self-dependency, every ``depends_on`` references an
202
+ existing node, and the dependency graph is acyclic.
203
+ """
204
+ errors: list[str] = []
205
+ nodes = spec.nodes
206
+ if not nodes:
207
+ errors.append("workflow must contain at least one node.")
208
+ return errors
209
+ if max_nodes > 0 and len(nodes) > max_nodes:
210
+ errors.append(f"workflow has {len(nodes)} nodes, exceeding the limit of {max_nodes}.")
211
+
212
+ seen: set[str] = set()
213
+ duplicates: set[str] = set()
214
+ has_blank_id = False
215
+ for node in nodes:
216
+ if not node.id:
217
+ has_blank_id = True
218
+ elif node.id in seen:
219
+ duplicates.add(node.id)
220
+ else:
221
+ seen.add(node.id)
222
+ if has_blank_id:
223
+ errors.append("every node must have a non-empty 'id'.")
224
+ for dup in sorted(duplicates):
225
+ errors.append(f"duplicate node id: {dup!r}.")
226
+
227
+ for node in nodes:
228
+ if not node.prompt.strip():
229
+ errors.append(f"node {node.id!r} has an empty 'prompt'.")
230
+ for dep in node.depends_on:
231
+ if dep == node.id:
232
+ errors.append(f"node {node.id!r} cannot depend on itself.")
233
+ elif dep not in seen:
234
+ errors.append(f"node {node.id!r} depends on unknown node {dep!r}.")
235
+
236
+ cycle = _find_cycle(nodes, seen)
237
+ if cycle is not None:
238
+ errors.append("workflow has a dependency cycle: " + " -> ".join(cycle))
239
+ return errors
240
+
241
+
242
+ def compute_ready(spec: WorkflowSpec, results: dict[str, NodeResult]) -> list[WorkflowNode]:
243
+ """Return PENDING nodes whose every dependency is DONE (runnable now)."""
244
+ ready: list[WorkflowNode] = []
245
+ for node in spec.nodes:
246
+ if results[node.id].status is not NodeStatus.PENDING:
247
+ continue
248
+ if all(results[dep].status is NodeStatus.DONE for dep in node.depends_on):
249
+ ready.append(node)
250
+ return ready
251
+
252
+
253
+ def propagate_skips(spec: WorkflowSpec, results: dict[str, NodeResult]) -> set[str]:
254
+ """Mark every PENDING node with a FAILED/SKIPPED dependency as SKIPPED.
255
+
256
+ Runs to a fixpoint so skips cascade transitively. Returns the set of newly
257
+ skipped ids. Assumes a validated spec (all ``depends_on`` ids exist in
258
+ ``results``).
259
+ """
260
+ newly_skipped: set[str] = set()
261
+ changed = True
262
+ while changed:
263
+ changed = False
264
+ for node in spec.nodes:
265
+ if results[node.id].status is not NodeStatus.PENDING:
266
+ continue
267
+ if any(
268
+ results[dep].status in (NodeStatus.FAILED, NodeStatus.SKIPPED)
269
+ for dep in node.depends_on
270
+ ):
271
+ results[node.id] = NodeResult(
272
+ id=node.id,
273
+ status=NodeStatus.SKIPPED,
274
+ error="upstream dependency failed or was skipped",
275
+ )
276
+ newly_skipped.add(node.id)
277
+ changed = True
278
+ return newly_skipped
279
+
280
+
281
+ def compute_resume_plan(
282
+ spec: WorkflowSpec,
283
+ prior_spec: WorkflowSpec,
284
+ prior_results: dict[str, NodeResult],
285
+ ) -> dict[str, NodeResult]:
286
+ """Return the prior results that may be reused when resuming ``spec``.
287
+
288
+ A node is reusable iff it is a *direct cache hit* -- the same ``id`` existed
289
+ in ``prior_spec`` with an identical (raw, pre-substitution) ``prompt`` and
290
+ completed ``DONE`` last run -- *and* every one of its (transitive)
291
+ dependencies is also reusable. The cascade is the load-bearing rule: if any
292
+ upstream node must re-run (changed prompt, was FAILED/SKIPPED, or is new),
293
+ the downstream's cached output is stale and must be recomputed even when the
294
+ downstream's own prompt is unchanged.
295
+
296
+ Returned results are copies marked ``reused=True`` (status stays ``DONE``);
297
+ the caller seeds them so the scheduler skips them and threads their output
298
+ into dependents. Assumes ``spec`` is acyclic (validated upstream); a cache
299
+ guard still prevents re-entrancy if that ever breaks.
300
+ """
301
+ prior_prompts = {node.id: node.prompt for node in prior_spec.nodes if node.id}
302
+ node_by_id = {node.id: node for node in spec.nodes if node.id}
303
+ cache: dict[str, bool] = {}
304
+
305
+ def _is_reusable(node_id: str) -> bool:
306
+ cached = cache.get(node_id)
307
+ if cached is not None:
308
+ return cached
309
+ # Default to False before recursing so a (validated-away) cycle cannot
310
+ # loop forever.
311
+ cache[node_id] = False
312
+ node = node_by_id.get(node_id)
313
+ if node is None:
314
+ return False
315
+ prior = prior_results.get(node_id)
316
+ direct_hit = (
317
+ node_id in prior_prompts
318
+ and prior_prompts[node_id] == node.prompt
319
+ and prior is not None
320
+ and prior.status is NodeStatus.DONE
321
+ )
322
+ result = direct_hit and all(_is_reusable(dep) for dep in node.depends_on)
323
+ cache[node_id] = result
324
+ return result
325
+
326
+ reuse: dict[str, NodeResult] = {}
327
+ for node in spec.nodes:
328
+ if node.id and _is_reusable(node.id):
329
+ reuse[node.id] = replace(prior_results[node.id], reused=True)
330
+ return reuse
331
+
332
+
333
+ def build_node_prompt(node: WorkflowNode, upstream: dict[str, NodeResult]) -> str:
334
+ """Build a node's prompt, threading in its upstream dependency outputs.
335
+
336
+ ``{{dep_id}}`` placeholders are replaced with that dependency's output text.
337
+ Any dependency not referenced by a placeholder is appended verbatim under an
338
+ "Upstream results" section so the node always sees what it depends on. An
339
+ unknown placeholder (not a declared dependency) is left untouched.
340
+ """
341
+ used: set[str] = set()
342
+
343
+ def _sub(match: re.Match[str]) -> str:
344
+ key = match.group(1)
345
+ result = upstream.get(key)
346
+ if result is None:
347
+ return match.group(0)
348
+ used.add(key)
349
+ return result.output
350
+
351
+ prompt = _PLACEHOLDER.sub(_sub, node.prompt)
352
+
353
+ extras = [result for dep_id, result in upstream.items() if dep_id not in used]
354
+ if extras:
355
+ parts = [prompt.rstrip(), "", "# Upstream results"]
356
+ for result in extras:
357
+ parts.append(f'\n<result from="{result.id}">\n{result.output.strip()}\n</result>')
358
+ prompt = "\n".join(parts)
359
+ return prompt
360
+
361
+
362
+ def format_summary(spec: WorkflowSpec, results: dict[str, NodeResult]) -> str:
363
+ """Render the aggregated, structured workflow result fed back to the LLM."""
364
+ done = sum(1 for n in spec.nodes if results[n.id].status is NodeStatus.DONE)
365
+ failed = sum(1 for n in spec.nodes if results[n.id].status is NodeStatus.FAILED)
366
+ skipped = sum(1 for n in spec.nodes if results[n.id].status is NodeStatus.SKIPPED)
367
+ reused = sum(1 for n in spec.nodes if results[n.id].reused)
368
+ budget_hit = any(
369
+ results[n.id].status is NodeStatus.SKIPPED
370
+ and _BUDGET_EXHAUSTED_REASON in results[n.id].error
371
+ for n in spec.nodes
372
+ )
373
+
374
+ headline = (
375
+ f"Workflow finished: {done} done, {failed} failed, {skipped} skipped "
376
+ f"(of {len(spec.nodes)} nodes)."
377
+ )
378
+ if reused:
379
+ headline += f" {reused} reused from a prior run."
380
+ if budget_hit:
381
+ headline += " Stopped early: token budget exhausted."
382
+ blocks = [headline]
383
+ for node in spec.nodes:
384
+ result = results[node.id]
385
+ marker = "reused" if result.reused else result.status.value
386
+ title = f"## [{marker}] {node.id}"
387
+ if node.phase:
388
+ title += f" (phase: {node.phase})"
389
+ if node.label:
390
+ title += f" - {node.label}"
391
+ blocks.append(title)
392
+ if result.status is NodeStatus.DONE:
393
+ blocks.append(result.output.strip() or "(no output)")
394
+ elif result.status is NodeStatus.FAILED:
395
+ blocks.append(f"Error: {result.error}")
396
+ else:
397
+ blocks.append(result.error or "Skipped.")
398
+ return "\n\n".join(blocks)
399
+
400
+
401
+ def _make_node_thunk(
402
+ node: WorkflowNode,
403
+ results: dict[str, NodeResult],
404
+ execute_node: Callable[[WorkflowNode, dict[str, NodeResult]], Any],
405
+ ) -> Callable[[], NodeResult]:
406
+ """Build a total (never-raising) thunk that runs one node and returns a result.
407
+
408
+ The upstream snapshot is taken at thunk-build time (deps are already DONE).
409
+ The executor's exceptions become a FAILED result -- failure is data the
410
+ scheduler propagates, not control flow that aborts the batch.
411
+ """
412
+ upstream = {dep: results[dep] for dep in node.depends_on}
413
+
414
+ def _thunk() -> NodeResult:
415
+ try:
416
+ output = execute_node(node, upstream)
417
+ except Exception as exc: # noqa: BLE001 - any node failure is fail-soft
418
+ return NodeResult(
419
+ id=node.id,
420
+ status=NodeStatus.FAILED,
421
+ error=f"{type(exc).__name__}: {exc}",
422
+ )
423
+ return NodeResult(id=node.id, status=NodeStatus.DONE, output=str(output))
424
+
425
+ return _thunk
426
+
427
+
428
+ def run_workflow(
429
+ spec: WorkflowSpec,
430
+ *,
431
+ execute_node: Callable[[WorkflowNode, dict[str, NodeResult]], Any],
432
+ map_concurrent: Callable[[list[Callable[[], NodeResult]]], list[NodeResult]],
433
+ on_progress: Callable[[str], None] | None = None,
434
+ on_node_status: Callable[[str, NodeResult], None] | None = None,
435
+ reused_results: dict[str, NodeResult] | None = None,
436
+ token_budget: int = 0,
437
+ tokens_spent: Callable[[], int] | None = None,
438
+ ) -> dict[str, NodeResult]:
439
+ """Drive a validated DAG to completion, returning each node's terminal result.
440
+
441
+ - ``execute_node(node, upstream)`` runs one node and returns its output text;
442
+ raising marks the node FAILED (its dependents become SKIPPED).
443
+ - ``map_concurrent(thunks)`` runs a batch of ready-node thunks (which never
444
+ raise) concurrently and returns their results in order. Tests inject a
445
+ synchronous map; ``main.py`` injects a thread-pool-backed one.
446
+ - ``on_progress`` receives human-readable progress lines (main thread only).
447
+ - ``on_node_status(node_id, result)`` fires whenever a node reaches a terminal
448
+ state (reused-seed, executed, or skipped). ``main.py`` wires this to a
449
+ locked registry update so the ``/workflows`` panel shows live progress;
450
+ tests can assert the transition sequence. Unlike ``on_progress`` it carries
451
+ structured data, not prose.
452
+ - ``reused_results`` seeds DONE results carried over from a prior run (see
453
+ :func:`compute_resume_plan`); those nodes are not executed and consume no
454
+ budget.
455
+ - ``token_budget`` (>0) caps the run: before launching each layer, if
456
+ ``tokens_spent()`` has reached the budget, every remaining PENDING node is
457
+ marked SKIPPED ("token budget exhausted") and the run stops. Already-running
458
+ layers finish (the check is at layer boundaries, not mid-node). ``0`` (or a
459
+ missing ``tokens_spent``) means unlimited -- the legacy behavior.
460
+
461
+ Must be called on a spec that passed :func:`validate_workflow` (acyclic, all
462
+ deps resolved), otherwise the scheduler could stall.
463
+ """
464
+ results = {node.id: NodeResult(id=node.id, status=NodeStatus.PENDING) for node in spec.nodes}
465
+
466
+ def emit(message: str) -> None:
467
+ if on_progress is not None:
468
+ on_progress(message)
469
+
470
+ def set_result(result: NodeResult) -> None:
471
+ results[result.id] = result
472
+ if on_node_status is not None:
473
+ on_node_status(result.id, result)
474
+
475
+ # Seed reused results from a prior run before scheduling so the scheduler
476
+ # treats them as already DONE (skips execution, threads their output into
477
+ # dependents).
478
+ if reused_results:
479
+ reused_ids = [n.id for n in spec.nodes if n.id in reused_results]
480
+ for node_id in reused_ids:
481
+ set_result(reused_results[node_id])
482
+ if reused_ids:
483
+ emit(f"Reusing {len(reused_ids)} node(s) from a prior run: " + ", ".join(reused_ids))
484
+
485
+ def _budget_exhausted() -> bool:
486
+ return token_budget > 0 and tokens_spent is not None and tokens_spent() >= token_budget
487
+
488
+ while True:
489
+ for node_id in propagate_skips(spec, results):
490
+ result = results[node_id]
491
+ emit(f" skipped: {node_id} ({result.error})")
492
+ if on_node_status is not None:
493
+ on_node_status(node_id, result)
494
+ if _budget_exhausted():
495
+ spent = tokens_spent() if tokens_spent is not None else 0
496
+ reason = f"{_BUDGET_EXHAUSTED_REASON} ({spent} >= {token_budget} tokens)"
497
+ pending = [n.id for n in spec.nodes if results[n.id].status is NodeStatus.PENDING]
498
+ if pending:
499
+ emit(
500
+ f"Token budget exhausted ({spent} >= {token_budget}); "
501
+ "skipping remaining node(s)."
502
+ )
503
+ for node_id in pending:
504
+ set_result(NodeResult(id=node_id, status=NodeStatus.SKIPPED, error=reason))
505
+ break
506
+ ready = compute_ready(spec, results)
507
+ if not ready:
508
+ break
509
+ emit("Running " + str(len(ready)) + " node(s): " + ", ".join(n.id for n in ready))
510
+ batch = map_concurrent([_make_node_thunk(node, results, execute_node) for node in ready])
511
+ for result in batch:
512
+ set_result(result)
513
+ if result.status is NodeStatus.DONE:
514
+ emit(f" done: {result.id}")
515
+ else:
516
+ emit(f" {result.status.value}: {result.id} ({result.error})")
517
+ return results