bareagent-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bareagent/__init__.py +10 -0
- bareagent/concurrency/__init__.py +6 -0
- bareagent/concurrency/background.py +97 -0
- bareagent/concurrency/notification.py +61 -0
- bareagent/concurrency/scheduler.py +136 -0
- bareagent/config.toml +299 -0
- bareagent/core/__init__.py +1 -0
- bareagent/core/config_paths.py +49 -0
- bareagent/core/context.py +127 -0
- bareagent/core/fileutil.py +103 -0
- bareagent/core/goal.py +214 -0
- bareagent/core/handlers/__init__.py +1 -0
- bareagent/core/handlers/bash.py +79 -0
- bareagent/core/handlers/file_edit.py +47 -0
- bareagent/core/handlers/file_read.py +270 -0
- bareagent/core/handlers/file_write.py +34 -0
- bareagent/core/handlers/glob_search.py +30 -0
- bareagent/core/handlers/goal.py +60 -0
- bareagent/core/handlers/grep_search.py +52 -0
- bareagent/core/handlers/memory.py +71 -0
- bareagent/core/handlers/plan.py +106 -0
- bareagent/core/handlers/search_utils.py +77 -0
- bareagent/core/handlers/skill.py +87 -0
- bareagent/core/handlers/subagent_send.py +70 -0
- bareagent/core/handlers/web_fetch.py +126 -0
- bareagent/core/handlers/web_search.py +165 -0
- bareagent/core/handlers/workflow.py +190 -0
- bareagent/core/loop.py +535 -0
- bareagent/core/retry.py +131 -0
- bareagent/core/sandbox.py +27 -0
- bareagent/core/schema.py +21 -0
- bareagent/core/tools.py +779 -0
- bareagent/core/workflow.py +517 -0
- bareagent/core/workflow_registry.py +219 -0
- bareagent/debug/__init__.py +0 -0
- bareagent/debug/interaction_log.py +263 -0
- bareagent/debug/viewer.html +1750 -0
- bareagent/debug/web_viewer.py +157 -0
- bareagent/hooks/__init__.py +32 -0
- bareagent/hooks/config.py +118 -0
- bareagent/hooks/engine.py +197 -0
- bareagent/hooks/errors.py +14 -0
- bareagent/hooks/events.py +22 -0
- bareagent/lsp/__init__.py +63 -0
- bareagent/lsp/config.py +134 -0
- bareagent/lsp/coord.py +118 -0
- bareagent/lsp/diagnostics.py +240 -0
- bareagent/lsp/errors.py +24 -0
- bareagent/lsp/manager.py +866 -0
- bareagent/lsp/tools.py +629 -0
- bareagent/lsp/workspace_edit.py +305 -0
- bareagent/main.py +4205 -0
- bareagent/mcp/__init__.py +69 -0
- bareagent/mcp/_sse.py +69 -0
- bareagent/mcp/client.py +341 -0
- bareagent/mcp/config.py +169 -0
- bareagent/mcp/errors.py +32 -0
- bareagent/mcp/manager.py +318 -0
- bareagent/mcp/protocol.py +187 -0
- bareagent/mcp/registry.py +557 -0
- bareagent/mcp/transport/__init__.py +15 -0
- bareagent/mcp/transport/base.py +149 -0
- bareagent/mcp/transport/http_legacy.py +192 -0
- bareagent/mcp/transport/http_streamable.py +217 -0
- bareagent/mcp/transport/stdio.py +202 -0
- bareagent/memory/__init__.py +1 -0
- bareagent/memory/compact.py +203 -0
- bareagent/memory/conversation_io.py +226 -0
- bareagent/memory/embedding.py +194 -0
- bareagent/memory/persistent.py +515 -0
- bareagent/memory/token_counter.py +67 -0
- bareagent/memory/token_tracker.py +262 -0
- bareagent/memory/transcript.py +100 -0
- bareagent/permission/__init__.py +1 -0
- bareagent/permission/guard.py +329 -0
- bareagent/permission/rules.py +19 -0
- bareagent/planning/__init__.py +19 -0
- bareagent/planning/agent_types.py +169 -0
- bareagent/planning/skill_gen.py +141 -0
- bareagent/planning/skill_store.py +173 -0
- bareagent/planning/skills.py +146 -0
- bareagent/planning/subagent.py +355 -0
- bareagent/planning/subagent_registry.py +77 -0
- bareagent/planning/tasks.py +348 -0
- bareagent/planning/todo.py +153 -0
- bareagent/planning/worktree.py +122 -0
- bareagent/provider/__init__.py +1 -0
- bareagent/provider/anthropic.py +348 -0
- bareagent/provider/base.py +136 -0
- bareagent/provider/factory.py +130 -0
- bareagent/provider/openai.py +881 -0
- bareagent/provider/presets.py +72 -0
- bareagent/provider/setup.py +356 -0
- bareagent/skills/.gitkeep +1 -0
- bareagent/skills/code-review/SKILL.md +68 -0
- bareagent/skills/git/SKILL.md +68 -0
- bareagent/skills/test/SKILL.md +70 -0
- bareagent/team/__init__.py +17 -0
- bareagent/team/autonomous.py +193 -0
- bareagent/team/mailbox.py +239 -0
- bareagent/team/manager.py +155 -0
- bareagent/team/protocols.py +129 -0
- bareagent/tracing/__init__.py +12 -0
- bareagent/tracing/_api.py +92 -0
- bareagent/tracing/_proxy.py +60 -0
- bareagent/tracing/composite.py +115 -0
- bareagent/tracing/json_file.py +115 -0
- bareagent/tracing/langfuse.py +139 -0
- bareagent/tracing/otel.py +107 -0
- bareagent/tracing/setup.py +85 -0
- bareagent/ui/__init__.py +24 -0
- bareagent/ui/console.py +167 -0
- bareagent/ui/prompt.py +78 -0
- bareagent/ui/protocol.py +24 -0
- bareagent/ui/stream.py +66 -0
- bareagent/ui/theme.py +240 -0
- bareagent_cli-0.1.0.dist-info/METADATA +331 -0
- bareagent_cli-0.1.0.dist-info/RECORD +121 -0
- bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
- bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
- bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,517 @@
|
|
|
1
|
+
"""Deterministic workflow orchestration: run a static DAG of subagent nodes.
|
|
2
|
+
|
|
3
|
+
Pure logic with no LLM / loop / threading / SDK dependencies, so DAG parsing,
|
|
4
|
+
validation (cycle detection / dangling deps / limits), the ready-set scheduler,
|
|
5
|
+
result threading, and summary formatting are all unit-testable with injected
|
|
6
|
+
callbacks (mirrors the ``src/core/goal.py`` and ``src/core/retry.py`` pure-module
|
|
7
|
+
pattern).
|
|
8
|
+
|
|
9
|
+
Division of labor (see task 06-06-workflow-deterministic-orchestration):
|
|
10
|
+
- This module owns the *control flow*: which nodes are ready to run given what
|
|
11
|
+
has completed, how a failed node skips its transitive dependents, how upstream
|
|
12
|
+
results are threaded into a downstream prompt, and how the final summary reads
|
|
13
|
+
(``run_workflow`` + the pure helpers).
|
|
14
|
+
- The REPL (``main.py``) owns the side-effecting parts: executing one node as an
|
|
15
|
+
isolated ``run_subagent`` call and running a batch of ready nodes concurrently
|
|
16
|
+
on a thread pool. These are injected into :func:`run_workflow` as the
|
|
17
|
+
``execute_node`` / ``map_concurrent`` callbacks.
|
|
18
|
+
|
|
19
|
+
The LLM authors the DAG on the fly via the isolated ``workflow`` tool (declarative
|
|
20
|
+
nodes + ``depends_on`` edges, NOT executable code), keeping orchestration
|
|
21
|
+
deterministic and free of any code-``exec`` sandbox. Loops / conditionals /
|
|
22
|
+
dynamic fan-out are intentionally out of scope (static DAG); the model can issue
|
|
23
|
+
another ``workflow`` call from the main loop when it needs to branch.
|
|
24
|
+
|
|
25
|
+
Execution is *layered*: each iteration runs the whole current ready set
|
|
26
|
+
concurrently and waits for it before recomputing the next ready set. A node
|
|
27
|
+
therefore waits for all of its layer peers, not just its own dependencies -- a
|
|
28
|
+
known MVP simplification (a continuous as-completed scheduler is a later
|
|
29
|
+
extension). Failure semantics are *fail-soft* (decision (b)): a node whose
|
|
30
|
+
executor raises becomes ``FAILED``; its transitive dependents become ``SKIPPED``;
|
|
31
|
+
independent branches keep running.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import re
|
|
37
|
+
from collections.abc import Callable
|
|
38
|
+
from dataclasses import dataclass, field, replace
|
|
39
|
+
from enum import Enum
|
|
40
|
+
from typing import Any
|
|
41
|
+
|
|
42
|
+
# Default ceiling on how many nodes one workflow may declare; guards against an
|
|
43
|
+
# LLM emitting a pathologically large DAG that floods the thread pool. Override
|
|
44
|
+
# via ``[workflow] max_nodes``.
|
|
45
|
+
DEFAULT_MAX_NODES = 20
|
|
46
|
+
# Default cap on concurrently-running nodes. Conservative because each node is a
|
|
47
|
+
# full subagent that may itself spawn work. Override via ``[workflow]
|
|
48
|
+
# max_concurrency``.
|
|
49
|
+
DEFAULT_MAX_CONCURRENCY = 8
|
|
50
|
+
|
|
51
|
+
# Matches ``{{ node_id }}`` placeholders in a node prompt for upstream-result
|
|
52
|
+
# substitution. Ids are restricted to a safe identifier-ish charset.
|
|
53
|
+
_PLACEHOLDER = re.compile(r"\{\{\s*([A-Za-z0-9_.\-]+)\s*\}\}")
|
|
54
|
+
|
|
55
|
+
# Sentinel embedded in a node's ``error`` when it was skipped because the run hit
|
|
56
|
+
# its token budget. ``format_summary`` matches this to surface a budget note, so
|
|
57
|
+
# it must stay in sync between the writer (``run_workflow``) and the reader.
|
|
58
|
+
_BUDGET_EXHAUSTED_REASON = "token budget exhausted"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class WorkflowError(Exception):
|
|
62
|
+
"""Raised when workflow input is structurally unusable (not a node list)."""
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class NodeStatus(Enum):
|
|
66
|
+
PENDING = "pending"
|
|
67
|
+
DONE = "done"
|
|
68
|
+
FAILED = "failed"
|
|
69
|
+
SKIPPED = "skipped"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass(slots=True)
|
|
73
|
+
class WorkflowNode:
|
|
74
|
+
"""One unit of work in the DAG: an isolated subagent task.
|
|
75
|
+
|
|
76
|
+
``depends_on`` lists the ids whose outputs must complete (DONE) before this
|
|
77
|
+
node runs. ``phase`` / ``label`` are organizational metadata surfaced in the
|
|
78
|
+
summary; they do not affect scheduling.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
id: str
|
|
82
|
+
prompt: str
|
|
83
|
+
agent_type: str | None = None
|
|
84
|
+
depends_on: list[str] = field(default_factory=list)
|
|
85
|
+
phase: str | None = None
|
|
86
|
+
label: str | None = None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass(slots=True)
|
|
90
|
+
class NodeResult:
|
|
91
|
+
"""Terminal (or in-flight PENDING) state of a node after the scheduler runs.
|
|
92
|
+
|
|
93
|
+
``reused`` marks a result carried over from a prior run by :func:`compute_resume_plan`
|
|
94
|
+
(the node did not execute this run, so it consumed no token budget); the
|
|
95
|
+
status is still ``DONE`` so downstream scheduling is unaffected.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
id: str
|
|
99
|
+
status: NodeStatus
|
|
100
|
+
output: str = ""
|
|
101
|
+
error: str = ""
|
|
102
|
+
reused: bool = False
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass(slots=True)
|
|
106
|
+
class WorkflowSpec:
|
|
107
|
+
"""A parsed (not yet validated) workflow DAG."""
|
|
108
|
+
|
|
109
|
+
nodes: list[WorkflowNode]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _coerce_node(raw: Any) -> WorkflowNode:
|
|
113
|
+
"""Coerce one raw node mapping into a :class:`WorkflowNode` defensively.
|
|
114
|
+
|
|
115
|
+
A non-dict entry becomes an empty-id node so :func:`validate_workflow` reports
|
|
116
|
+
it rather than crashing here. Missing optional fields fall back to ``None`` /
|
|
117
|
+
``[]``.
|
|
118
|
+
"""
|
|
119
|
+
if not isinstance(raw, dict):
|
|
120
|
+
return WorkflowNode(id="", prompt="")
|
|
121
|
+
|
|
122
|
+
def _opt_str(key: str) -> str | None:
|
|
123
|
+
value = raw.get(key)
|
|
124
|
+
if isinstance(value, str) and value.strip():
|
|
125
|
+
return value.strip()
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
depends_raw = raw.get("depends_on")
|
|
129
|
+
if isinstance(depends_raw, list):
|
|
130
|
+
depends_on = [str(d).strip() for d in depends_raw if str(d).strip()]
|
|
131
|
+
elif isinstance(depends_raw, str) and depends_raw.strip():
|
|
132
|
+
depends_on = [depends_raw.strip()]
|
|
133
|
+
else:
|
|
134
|
+
depends_on = []
|
|
135
|
+
|
|
136
|
+
return WorkflowNode(
|
|
137
|
+
id=str(raw.get("id", "") or "").strip(),
|
|
138
|
+
prompt=str(raw.get("prompt", "") or ""),
|
|
139
|
+
agent_type=_opt_str("agent_type"),
|
|
140
|
+
depends_on=depends_on,
|
|
141
|
+
phase=_opt_str("phase"),
|
|
142
|
+
label=_opt_str("label"),
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def parse_workflow(tool_input: Any) -> WorkflowSpec:
|
|
147
|
+
"""Parse a ``workflow`` tool input into a :class:`WorkflowSpec`.
|
|
148
|
+
|
|
149
|
+
Raises :class:`WorkflowError` only for a fundamentally unusable shape (no
|
|
150
|
+
``nodes`` array). Per-node coercion is lenient; semantic problems (empty id,
|
|
151
|
+
dangling dep, cycle) are reported by :func:`validate_workflow`.
|
|
152
|
+
"""
|
|
153
|
+
if not isinstance(tool_input, dict):
|
|
154
|
+
raise WorkflowError("workflow input must be an object with a 'nodes' array.")
|
|
155
|
+
raw_nodes = tool_input.get("nodes")
|
|
156
|
+
if not isinstance(raw_nodes, list):
|
|
157
|
+
raise WorkflowError("workflow 'nodes' must be an array.")
|
|
158
|
+
return WorkflowSpec(nodes=[_coerce_node(raw) for raw in raw_nodes])
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _find_cycle(nodes: list[WorkflowNode], valid_ids: set[str]) -> list[str] | None:
|
|
162
|
+
"""Return a node-id cycle path (``a -> b -> a``) if the DAG has one, else None.
|
|
163
|
+
|
|
164
|
+
Only edges to existing ids are considered (dangling deps are reported
|
|
165
|
+
separately), and self-loops are ignored here (also reported separately).
|
|
166
|
+
"""
|
|
167
|
+
graph: dict[str, list[str]] = {
|
|
168
|
+
n.id: [d for d in n.depends_on if d in valid_ids and d != n.id] for n in nodes if n.id
|
|
169
|
+
}
|
|
170
|
+
WHITE, GRAY, BLACK = 0, 1, 2
|
|
171
|
+
color = dict.fromkeys(graph, WHITE)
|
|
172
|
+
path: list[str] = []
|
|
173
|
+
|
|
174
|
+
def visit(node_id: str) -> list[str] | None:
|
|
175
|
+
color[node_id] = GRAY
|
|
176
|
+
path.append(node_id)
|
|
177
|
+
for dep in graph.get(node_id, []):
|
|
178
|
+
dep_color = color.get(dep, BLACK)
|
|
179
|
+
if dep_color == GRAY:
|
|
180
|
+
return path[path.index(dep) :] + [dep]
|
|
181
|
+
if dep_color == WHITE:
|
|
182
|
+
found = visit(dep)
|
|
183
|
+
if found is not None:
|
|
184
|
+
return found
|
|
185
|
+
path.pop()
|
|
186
|
+
color[node_id] = BLACK
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
for node_id in graph:
|
|
190
|
+
if color[node_id] == WHITE:
|
|
191
|
+
found = visit(node_id)
|
|
192
|
+
if found is not None:
|
|
193
|
+
return found
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def validate_workflow(spec: WorkflowSpec, *, max_nodes: int = DEFAULT_MAX_NODES) -> list[str]:
|
|
198
|
+
"""Return a list of human-readable validation errors (empty == valid).
|
|
199
|
+
|
|
200
|
+
Checks: at least one node, node count within ``max_nodes``, non-empty unique
|
|
201
|
+
ids, non-empty prompts, no self-dependency, every ``depends_on`` references an
|
|
202
|
+
existing node, and the dependency graph is acyclic.
|
|
203
|
+
"""
|
|
204
|
+
errors: list[str] = []
|
|
205
|
+
nodes = spec.nodes
|
|
206
|
+
if not nodes:
|
|
207
|
+
errors.append("workflow must contain at least one node.")
|
|
208
|
+
return errors
|
|
209
|
+
if max_nodes > 0 and len(nodes) > max_nodes:
|
|
210
|
+
errors.append(f"workflow has {len(nodes)} nodes, exceeding the limit of {max_nodes}.")
|
|
211
|
+
|
|
212
|
+
seen: set[str] = set()
|
|
213
|
+
duplicates: set[str] = set()
|
|
214
|
+
has_blank_id = False
|
|
215
|
+
for node in nodes:
|
|
216
|
+
if not node.id:
|
|
217
|
+
has_blank_id = True
|
|
218
|
+
elif node.id in seen:
|
|
219
|
+
duplicates.add(node.id)
|
|
220
|
+
else:
|
|
221
|
+
seen.add(node.id)
|
|
222
|
+
if has_blank_id:
|
|
223
|
+
errors.append("every node must have a non-empty 'id'.")
|
|
224
|
+
for dup in sorted(duplicates):
|
|
225
|
+
errors.append(f"duplicate node id: {dup!r}.")
|
|
226
|
+
|
|
227
|
+
for node in nodes:
|
|
228
|
+
if not node.prompt.strip():
|
|
229
|
+
errors.append(f"node {node.id!r} has an empty 'prompt'.")
|
|
230
|
+
for dep in node.depends_on:
|
|
231
|
+
if dep == node.id:
|
|
232
|
+
errors.append(f"node {node.id!r} cannot depend on itself.")
|
|
233
|
+
elif dep not in seen:
|
|
234
|
+
errors.append(f"node {node.id!r} depends on unknown node {dep!r}.")
|
|
235
|
+
|
|
236
|
+
cycle = _find_cycle(nodes, seen)
|
|
237
|
+
if cycle is not None:
|
|
238
|
+
errors.append("workflow has a dependency cycle: " + " -> ".join(cycle))
|
|
239
|
+
return errors
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def compute_ready(spec: WorkflowSpec, results: dict[str, NodeResult]) -> list[WorkflowNode]:
|
|
243
|
+
"""Return PENDING nodes whose every dependency is DONE (runnable now)."""
|
|
244
|
+
ready: list[WorkflowNode] = []
|
|
245
|
+
for node in spec.nodes:
|
|
246
|
+
if results[node.id].status is not NodeStatus.PENDING:
|
|
247
|
+
continue
|
|
248
|
+
if all(results[dep].status is NodeStatus.DONE for dep in node.depends_on):
|
|
249
|
+
ready.append(node)
|
|
250
|
+
return ready
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def propagate_skips(spec: WorkflowSpec, results: dict[str, NodeResult]) -> set[str]:
|
|
254
|
+
"""Mark every PENDING node with a FAILED/SKIPPED dependency as SKIPPED.
|
|
255
|
+
|
|
256
|
+
Runs to a fixpoint so skips cascade transitively. Returns the set of newly
|
|
257
|
+
skipped ids. Assumes a validated spec (all ``depends_on`` ids exist in
|
|
258
|
+
``results``).
|
|
259
|
+
"""
|
|
260
|
+
newly_skipped: set[str] = set()
|
|
261
|
+
changed = True
|
|
262
|
+
while changed:
|
|
263
|
+
changed = False
|
|
264
|
+
for node in spec.nodes:
|
|
265
|
+
if results[node.id].status is not NodeStatus.PENDING:
|
|
266
|
+
continue
|
|
267
|
+
if any(
|
|
268
|
+
results[dep].status in (NodeStatus.FAILED, NodeStatus.SKIPPED)
|
|
269
|
+
for dep in node.depends_on
|
|
270
|
+
):
|
|
271
|
+
results[node.id] = NodeResult(
|
|
272
|
+
id=node.id,
|
|
273
|
+
status=NodeStatus.SKIPPED,
|
|
274
|
+
error="upstream dependency failed or was skipped",
|
|
275
|
+
)
|
|
276
|
+
newly_skipped.add(node.id)
|
|
277
|
+
changed = True
|
|
278
|
+
return newly_skipped
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def compute_resume_plan(
|
|
282
|
+
spec: WorkflowSpec,
|
|
283
|
+
prior_spec: WorkflowSpec,
|
|
284
|
+
prior_results: dict[str, NodeResult],
|
|
285
|
+
) -> dict[str, NodeResult]:
|
|
286
|
+
"""Return the prior results that may be reused when resuming ``spec``.
|
|
287
|
+
|
|
288
|
+
A node is reusable iff it is a *direct cache hit* -- the same ``id`` existed
|
|
289
|
+
in ``prior_spec`` with an identical (raw, pre-substitution) ``prompt`` and
|
|
290
|
+
completed ``DONE`` last run -- *and* every one of its (transitive)
|
|
291
|
+
dependencies is also reusable. The cascade is the load-bearing rule: if any
|
|
292
|
+
upstream node must re-run (changed prompt, was FAILED/SKIPPED, or is new),
|
|
293
|
+
the downstream's cached output is stale and must be recomputed even when the
|
|
294
|
+
downstream's own prompt is unchanged.
|
|
295
|
+
|
|
296
|
+
Returned results are copies marked ``reused=True`` (status stays ``DONE``);
|
|
297
|
+
the caller seeds them so the scheduler skips them and threads their output
|
|
298
|
+
into dependents. Assumes ``spec`` is acyclic (validated upstream); a cache
|
|
299
|
+
guard still prevents re-entrancy if that ever breaks.
|
|
300
|
+
"""
|
|
301
|
+
prior_prompts = {node.id: node.prompt for node in prior_spec.nodes if node.id}
|
|
302
|
+
node_by_id = {node.id: node for node in spec.nodes if node.id}
|
|
303
|
+
cache: dict[str, bool] = {}
|
|
304
|
+
|
|
305
|
+
def _is_reusable(node_id: str) -> bool:
|
|
306
|
+
cached = cache.get(node_id)
|
|
307
|
+
if cached is not None:
|
|
308
|
+
return cached
|
|
309
|
+
# Default to False before recursing so a (validated-away) cycle cannot
|
|
310
|
+
# loop forever.
|
|
311
|
+
cache[node_id] = False
|
|
312
|
+
node = node_by_id.get(node_id)
|
|
313
|
+
if node is None:
|
|
314
|
+
return False
|
|
315
|
+
prior = prior_results.get(node_id)
|
|
316
|
+
direct_hit = (
|
|
317
|
+
node_id in prior_prompts
|
|
318
|
+
and prior_prompts[node_id] == node.prompt
|
|
319
|
+
and prior is not None
|
|
320
|
+
and prior.status is NodeStatus.DONE
|
|
321
|
+
)
|
|
322
|
+
result = direct_hit and all(_is_reusable(dep) for dep in node.depends_on)
|
|
323
|
+
cache[node_id] = result
|
|
324
|
+
return result
|
|
325
|
+
|
|
326
|
+
reuse: dict[str, NodeResult] = {}
|
|
327
|
+
for node in spec.nodes:
|
|
328
|
+
if node.id and _is_reusable(node.id):
|
|
329
|
+
reuse[node.id] = replace(prior_results[node.id], reused=True)
|
|
330
|
+
return reuse
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def build_node_prompt(node: WorkflowNode, upstream: dict[str, NodeResult]) -> str:
|
|
334
|
+
"""Build a node's prompt, threading in its upstream dependency outputs.
|
|
335
|
+
|
|
336
|
+
``{{dep_id}}`` placeholders are replaced with that dependency's output text.
|
|
337
|
+
Any dependency not referenced by a placeholder is appended verbatim under an
|
|
338
|
+
"Upstream results" section so the node always sees what it depends on. An
|
|
339
|
+
unknown placeholder (not a declared dependency) is left untouched.
|
|
340
|
+
"""
|
|
341
|
+
used: set[str] = set()
|
|
342
|
+
|
|
343
|
+
def _sub(match: re.Match[str]) -> str:
|
|
344
|
+
key = match.group(1)
|
|
345
|
+
result = upstream.get(key)
|
|
346
|
+
if result is None:
|
|
347
|
+
return match.group(0)
|
|
348
|
+
used.add(key)
|
|
349
|
+
return result.output
|
|
350
|
+
|
|
351
|
+
prompt = _PLACEHOLDER.sub(_sub, node.prompt)
|
|
352
|
+
|
|
353
|
+
extras = [result for dep_id, result in upstream.items() if dep_id not in used]
|
|
354
|
+
if extras:
|
|
355
|
+
parts = [prompt.rstrip(), "", "# Upstream results"]
|
|
356
|
+
for result in extras:
|
|
357
|
+
parts.append(f'\n<result from="{result.id}">\n{result.output.strip()}\n</result>')
|
|
358
|
+
prompt = "\n".join(parts)
|
|
359
|
+
return prompt
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def format_summary(spec: WorkflowSpec, results: dict[str, NodeResult]) -> str:
|
|
363
|
+
"""Render the aggregated, structured workflow result fed back to the LLM."""
|
|
364
|
+
done = sum(1 for n in spec.nodes if results[n.id].status is NodeStatus.DONE)
|
|
365
|
+
failed = sum(1 for n in spec.nodes if results[n.id].status is NodeStatus.FAILED)
|
|
366
|
+
skipped = sum(1 for n in spec.nodes if results[n.id].status is NodeStatus.SKIPPED)
|
|
367
|
+
reused = sum(1 for n in spec.nodes if results[n.id].reused)
|
|
368
|
+
budget_hit = any(
|
|
369
|
+
results[n.id].status is NodeStatus.SKIPPED
|
|
370
|
+
and _BUDGET_EXHAUSTED_REASON in results[n.id].error
|
|
371
|
+
for n in spec.nodes
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
headline = (
|
|
375
|
+
f"Workflow finished: {done} done, {failed} failed, {skipped} skipped "
|
|
376
|
+
f"(of {len(spec.nodes)} nodes)."
|
|
377
|
+
)
|
|
378
|
+
if reused:
|
|
379
|
+
headline += f" {reused} reused from a prior run."
|
|
380
|
+
if budget_hit:
|
|
381
|
+
headline += " Stopped early: token budget exhausted."
|
|
382
|
+
blocks = [headline]
|
|
383
|
+
for node in spec.nodes:
|
|
384
|
+
result = results[node.id]
|
|
385
|
+
marker = "reused" if result.reused else result.status.value
|
|
386
|
+
title = f"## [{marker}] {node.id}"
|
|
387
|
+
if node.phase:
|
|
388
|
+
title += f" (phase: {node.phase})"
|
|
389
|
+
if node.label:
|
|
390
|
+
title += f" - {node.label}"
|
|
391
|
+
blocks.append(title)
|
|
392
|
+
if result.status is NodeStatus.DONE:
|
|
393
|
+
blocks.append(result.output.strip() or "(no output)")
|
|
394
|
+
elif result.status is NodeStatus.FAILED:
|
|
395
|
+
blocks.append(f"Error: {result.error}")
|
|
396
|
+
else:
|
|
397
|
+
blocks.append(result.error or "Skipped.")
|
|
398
|
+
return "\n\n".join(blocks)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _make_node_thunk(
|
|
402
|
+
node: WorkflowNode,
|
|
403
|
+
results: dict[str, NodeResult],
|
|
404
|
+
execute_node: Callable[[WorkflowNode, dict[str, NodeResult]], Any],
|
|
405
|
+
) -> Callable[[], NodeResult]:
|
|
406
|
+
"""Build a total (never-raising) thunk that runs one node and returns a result.
|
|
407
|
+
|
|
408
|
+
The upstream snapshot is taken at thunk-build time (deps are already DONE).
|
|
409
|
+
The executor's exceptions become a FAILED result -- failure is data the
|
|
410
|
+
scheduler propagates, not control flow that aborts the batch.
|
|
411
|
+
"""
|
|
412
|
+
upstream = {dep: results[dep] for dep in node.depends_on}
|
|
413
|
+
|
|
414
|
+
def _thunk() -> NodeResult:
|
|
415
|
+
try:
|
|
416
|
+
output = execute_node(node, upstream)
|
|
417
|
+
except Exception as exc: # noqa: BLE001 - any node failure is fail-soft
|
|
418
|
+
return NodeResult(
|
|
419
|
+
id=node.id,
|
|
420
|
+
status=NodeStatus.FAILED,
|
|
421
|
+
error=f"{type(exc).__name__}: {exc}",
|
|
422
|
+
)
|
|
423
|
+
return NodeResult(id=node.id, status=NodeStatus.DONE, output=str(output))
|
|
424
|
+
|
|
425
|
+
return _thunk
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def run_workflow(
|
|
429
|
+
spec: WorkflowSpec,
|
|
430
|
+
*,
|
|
431
|
+
execute_node: Callable[[WorkflowNode, dict[str, NodeResult]], Any],
|
|
432
|
+
map_concurrent: Callable[[list[Callable[[], NodeResult]]], list[NodeResult]],
|
|
433
|
+
on_progress: Callable[[str], None] | None = None,
|
|
434
|
+
on_node_status: Callable[[str, NodeResult], None] | None = None,
|
|
435
|
+
reused_results: dict[str, NodeResult] | None = None,
|
|
436
|
+
token_budget: int = 0,
|
|
437
|
+
tokens_spent: Callable[[], int] | None = None,
|
|
438
|
+
) -> dict[str, NodeResult]:
|
|
439
|
+
"""Drive a validated DAG to completion, returning each node's terminal result.
|
|
440
|
+
|
|
441
|
+
- ``execute_node(node, upstream)`` runs one node and returns its output text;
|
|
442
|
+
raising marks the node FAILED (its dependents become SKIPPED).
|
|
443
|
+
- ``map_concurrent(thunks)`` runs a batch of ready-node thunks (which never
|
|
444
|
+
raise) concurrently and returns their results in order. Tests inject a
|
|
445
|
+
synchronous map; ``main.py`` injects a thread-pool-backed one.
|
|
446
|
+
- ``on_progress`` receives human-readable progress lines (main thread only).
|
|
447
|
+
- ``on_node_status(node_id, result)`` fires whenever a node reaches a terminal
|
|
448
|
+
state (reused-seed, executed, or skipped). ``main.py`` wires this to a
|
|
449
|
+
locked registry update so the ``/workflows`` panel shows live progress;
|
|
450
|
+
tests can assert the transition sequence. Unlike ``on_progress`` it carries
|
|
451
|
+
structured data, not prose.
|
|
452
|
+
- ``reused_results`` seeds DONE results carried over from a prior run (see
|
|
453
|
+
:func:`compute_resume_plan`); those nodes are not executed and consume no
|
|
454
|
+
budget.
|
|
455
|
+
- ``token_budget`` (>0) caps the run: before launching each layer, if
|
|
456
|
+
``tokens_spent()`` has reached the budget, every remaining PENDING node is
|
|
457
|
+
marked SKIPPED ("token budget exhausted") and the run stops. Already-running
|
|
458
|
+
layers finish (the check is at layer boundaries, not mid-node). ``0`` (or a
|
|
459
|
+
missing ``tokens_spent``) means unlimited -- the legacy behavior.
|
|
460
|
+
|
|
461
|
+
Must be called on a spec that passed :func:`validate_workflow` (acyclic, all
|
|
462
|
+
deps resolved), otherwise the scheduler could stall.
|
|
463
|
+
"""
|
|
464
|
+
results = {node.id: NodeResult(id=node.id, status=NodeStatus.PENDING) for node in spec.nodes}
|
|
465
|
+
|
|
466
|
+
def emit(message: str) -> None:
|
|
467
|
+
if on_progress is not None:
|
|
468
|
+
on_progress(message)
|
|
469
|
+
|
|
470
|
+
def set_result(result: NodeResult) -> None:
|
|
471
|
+
results[result.id] = result
|
|
472
|
+
if on_node_status is not None:
|
|
473
|
+
on_node_status(result.id, result)
|
|
474
|
+
|
|
475
|
+
# Seed reused results from a prior run before scheduling so the scheduler
|
|
476
|
+
# treats them as already DONE (skips execution, threads their output into
|
|
477
|
+
# dependents).
|
|
478
|
+
if reused_results:
|
|
479
|
+
reused_ids = [n.id for n in spec.nodes if n.id in reused_results]
|
|
480
|
+
for node_id in reused_ids:
|
|
481
|
+
set_result(reused_results[node_id])
|
|
482
|
+
if reused_ids:
|
|
483
|
+
emit(f"Reusing {len(reused_ids)} node(s) from a prior run: " + ", ".join(reused_ids))
|
|
484
|
+
|
|
485
|
+
def _budget_exhausted() -> bool:
|
|
486
|
+
return token_budget > 0 and tokens_spent is not None and tokens_spent() >= token_budget
|
|
487
|
+
|
|
488
|
+
while True:
|
|
489
|
+
for node_id in propagate_skips(spec, results):
|
|
490
|
+
result = results[node_id]
|
|
491
|
+
emit(f" skipped: {node_id} ({result.error})")
|
|
492
|
+
if on_node_status is not None:
|
|
493
|
+
on_node_status(node_id, result)
|
|
494
|
+
if _budget_exhausted():
|
|
495
|
+
spent = tokens_spent() if tokens_spent is not None else 0
|
|
496
|
+
reason = f"{_BUDGET_EXHAUSTED_REASON} ({spent} >= {token_budget} tokens)"
|
|
497
|
+
pending = [n.id for n in spec.nodes if results[n.id].status is NodeStatus.PENDING]
|
|
498
|
+
if pending:
|
|
499
|
+
emit(
|
|
500
|
+
f"Token budget exhausted ({spent} >= {token_budget}); "
|
|
501
|
+
"skipping remaining node(s)."
|
|
502
|
+
)
|
|
503
|
+
for node_id in pending:
|
|
504
|
+
set_result(NodeResult(id=node_id, status=NodeStatus.SKIPPED, error=reason))
|
|
505
|
+
break
|
|
506
|
+
ready = compute_ready(spec, results)
|
|
507
|
+
if not ready:
|
|
508
|
+
break
|
|
509
|
+
emit("Running " + str(len(ready)) + " node(s): " + ", ".join(n.id for n in ready))
|
|
510
|
+
batch = map_concurrent([_make_node_thunk(node, results, execute_node) for node in ready])
|
|
511
|
+
for result in batch:
|
|
512
|
+
set_result(result)
|
|
513
|
+
if result.status is NodeStatus.DONE:
|
|
514
|
+
emit(f" done: {result.id}")
|
|
515
|
+
else:
|
|
516
|
+
emit(f" {result.status.value}: {result.id} ({result.error})")
|
|
517
|
+
return results
|