@tagma/sdk 0.6.12 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +56 -15
  2. package/dist/bootstrap.d.ts +6 -6
  3. package/dist/bootstrap.d.ts.map +1 -1
  4. package/dist/bootstrap.js +5 -6
  5. package/dist/bootstrap.js.map +1 -1
  6. package/dist/config.d.ts +8 -0
  7. package/dist/config.d.ts.map +1 -0
  8. package/dist/config.js +5 -0
  9. package/dist/config.js.map +1 -0
  10. package/dist/core/dataflow.d.ts +23 -0
  11. package/dist/core/dataflow.d.ts.map +1 -0
  12. package/dist/core/dataflow.js +63 -0
  13. package/dist/core/dataflow.js.map +1 -0
  14. package/dist/core/log-prune.d.ts +16 -0
  15. package/dist/core/log-prune.d.ts.map +1 -0
  16. package/dist/core/log-prune.js +34 -0
  17. package/dist/core/log-prune.js.map +1 -0
  18. package/dist/core/preflight.d.ts +13 -0
  19. package/dist/core/preflight.d.ts.map +1 -0
  20. package/dist/core/preflight.js +61 -0
  21. package/dist/core/preflight.js.map +1 -0
  22. package/dist/core/run-context.d.ts +52 -0
  23. package/dist/core/run-context.d.ts.map +1 -0
  24. package/dist/core/run-context.js +156 -0
  25. package/dist/core/run-context.js.map +1 -0
  26. package/dist/core/run-state.d.ts +25 -0
  27. package/dist/core/run-state.d.ts.map +1 -0
  28. package/dist/core/run-state.js +93 -0
  29. package/dist/core/run-state.js.map +1 -0
  30. package/dist/core/scheduler.d.ts +13 -0
  31. package/dist/core/scheduler.d.ts.map +1 -0
  32. package/dist/core/scheduler.js +35 -0
  33. package/dist/core/scheduler.js.map +1 -0
  34. package/dist/core/task-executor.d.ts +13 -0
  35. package/dist/core/task-executor.d.ts.map +1 -0
  36. package/dist/core/task-executor.js +639 -0
  37. package/dist/core/task-executor.js.map +1 -0
  38. package/dist/core/trigger-errors.d.ts +9 -0
  39. package/dist/core/trigger-errors.d.ts.map +1 -0
  40. package/dist/core/trigger-errors.js +15 -0
  41. package/dist/core/trigger-errors.js.map +1 -0
  42. package/dist/engine.d.ts +6 -14
  43. package/dist/engine.d.ts.map +1 -1
  44. package/dist/engine.js +68 -1035
  45. package/dist/engine.js.map +1 -1
  46. package/dist/index.d.ts +9 -0
  47. package/dist/index.d.ts.map +1 -0
  48. package/dist/index.js +6 -0
  49. package/dist/index.js.map +1 -0
  50. package/dist/pipeline-definition.d.ts +3 -0
  51. package/dist/pipeline-definition.d.ts.map +1 -0
  52. package/dist/pipeline-definition.js +4 -0
  53. package/dist/pipeline-definition.js.map +1 -0
  54. package/dist/pipeline-runner.d.ts +2 -1
  55. package/dist/pipeline-runner.d.ts.map +1 -1
  56. package/dist/pipeline-runner.js +2 -2
  57. package/dist/pipeline-runner.js.map +1 -1
  58. package/dist/plugins.d.ts +5 -0
  59. package/dist/plugins.d.ts.map +1 -0
  60. package/dist/plugins.js +3 -0
  61. package/dist/plugins.js.map +1 -0
  62. package/dist/registry.d.ts +3 -19
  63. package/dist/registry.d.ts.map +1 -1
  64. package/dist/registry.js +7 -35
  65. package/dist/registry.js.map +1 -1
  66. package/dist/tagma.d.ts +24 -0
  67. package/dist/tagma.d.ts.map +1 -0
  68. package/dist/tagma.js +23 -0
  69. package/dist/tagma.js.map +1 -0
  70. package/dist/utils-api.d.ts +2 -0
  71. package/dist/utils-api.d.ts.map +1 -0
  72. package/dist/utils-api.js +2 -0
  73. package/dist/utils-api.js.map +1 -0
  74. package/dist/yaml.d.ts +4 -0
  75. package/dist/yaml.d.ts.map +1 -0
  76. package/dist/yaml.js +3 -0
  77. package/dist/yaml.js.map +1 -0
  78. package/package.json +52 -7
  79. package/src/bootstrap.ts +6 -6
  80. package/src/config.ts +26 -0
  81. package/src/core/dataflow.test.ts +167 -0
  82. package/src/core/dataflow.ts +118 -0
  83. package/src/core/log-prune.test.ts +58 -0
  84. package/src/core/log-prune.ts +43 -0
  85. package/src/core/preflight.test.ts +49 -0
  86. package/src/core/preflight.ts +89 -0
  87. package/src/core/run-context.test.ts +244 -0
  88. package/src/core/run-context.ts +207 -0
  89. package/src/core/run-state.test.ts +98 -0
  90. package/src/core/run-state.ts +122 -0
  91. package/src/core/scheduler.test.ts +83 -0
  92. package/src/core/scheduler.ts +42 -0
  93. package/src/core/task-executor.ts +803 -0
  94. package/src/core/trigger-errors.ts +15 -0
  95. package/src/engine.ts +80 -1248
  96. package/src/index.ts +28 -0
  97. package/src/pipeline-definition.ts +5 -0
  98. package/src/pipeline-runner.ts +3 -2
  99. package/src/plugin-registry.test.ts +7 -10
  100. package/src/plugins.ts +18 -0
  101. package/src/registry.ts +7 -49
  102. package/src/tagma.test.ts +84 -0
  103. package/src/tagma.ts +47 -0
  104. package/src/utils-api.ts +8 -0
  105. package/src/yaml.ts +11 -0
  106. package/dist/sdk.d.ts +0 -32
  107. package/dist/sdk.d.ts.map +0 -1
  108. package/dist/sdk.js +0 -41
  109. package/dist/sdk.js.map +0 -1
  110. package/src/sdk.ts +0 -151
package/src/engine.ts CHANGED
@@ -1,77 +1,38 @@
1
1
  import { resolve } from 'path';
2
- import { readdir, rm } from 'fs/promises';
3
2
  import type {
4
3
  PipelineConfig,
5
4
  TaskConfig,
6
5
  TaskState,
7
- TaskStatus,
8
- TaskResult,
9
- DriverPlugin,
10
- TriggerPlugin,
11
- CompletionPlugin,
12
- MiddlewarePlugin,
13
- MiddlewareContext,
14
- DriverContext,
15
- OnFailure,
16
- PromptDocument,
17
- Permissions,
18
- AbortReason,
19
6
  RunEventPayload,
20
7
  RunTaskState,
21
8
  } from './types';
22
- import { buildDag, type Dag } from './dag';
23
- import { defaultRegistry, type PluginRegistry } from './registry';
24
- import { runSpawn, runCommand } from './runner';
9
+ import { buildDag } from './dag';
10
+ import type { PluginRegistry } from './registry';
25
11
  import { parseDuration, nowISO, generateRunId } from './utils';
26
- import {
27
- promptDocumentFromString,
28
- serializePromptDocument,
29
- prependContext,
30
- renderInputsBlock,
31
- renderOutputSchemaBlock,
32
- } from './prompt-doc';
33
- import {
34
- extractTaskBindingOutputs,
35
- extractTaskOutputs,
36
- inferPromptPorts,
37
- resolveTaskBindingInputs,
38
- resolveTaskInputs,
39
- substituteInputs,
40
- } from './ports';
41
- import type { UpstreamBindingData } from './ports';
42
- import type { TaskPorts } from './types';
43
12
  import {
44
13
  executeHook,
45
14
  buildPipelineStartContext,
46
- buildTaskContext,
47
15
  buildPipelineCompleteContext,
48
16
  buildPipelineErrorContext,
49
17
  type PipelineInfo,
50
- type TrackInfo,
51
- type TaskInfo,
52
18
  } from './hooks';
53
- import { Logger, tailLines, clip } from './logger';
19
+ import { Logger } from './logger';
54
20
  import { InMemoryApprovalGateway, type ApprovalGateway } from './approval';
55
-
56
- // ═══ A7: Typed trigger errors ═══
57
- // Replace string-matching on error messages with structured error types so
58
- // coincidental substrings don't cause misclassification.
59
-
60
- export class TriggerBlockedError extends Error {
61
- readonly code = 'TRIGGER_BLOCKED' as const;
62
- constructor(message: string) {
63
- super(message);
64
- this.name = 'TriggerBlockedError';
65
- }
66
- }
67
-
68
- export class TriggerTimeoutError extends Error {
69
- readonly code = 'TRIGGER_TIMEOUT' as const;
70
- constructor(message: string) {
71
- super(message);
72
- this.name = 'TriggerTimeoutError';
73
- }
74
- }
21
+ import {
22
+ freezeStates,
23
+ summarizeStates,
24
+ toRunTaskState,
25
+ } from './core/run-state';
26
+ import { preflight } from './core/preflight';
27
+ import { pruneLogDirs } from './core/log-prune';
28
+ import { RunContext } from './core/run-context';
29
+ import {
30
+ allTasksTerminal,
31
+ findLaunchableTasks,
32
+ skipNonTerminalTasks,
33
+ } from './core/scheduler';
34
+ import { executeTask } from './core/task-executor';
35
+ export { TriggerBlockedError, TriggerTimeoutError } from './core/trigger-errors';
75
36
 
76
37
  function isPromptTaskConfig(
77
38
  task: TaskConfig,
@@ -79,85 +40,6 @@ function isPromptTaskConfig(
79
40
  return task.prompt !== undefined && task.command === undefined;
80
41
  }
81
42
 
82
- function isCommandTaskConfig(
83
- task: TaskConfig,
84
- ): task is TaskConfig & { readonly command: string; readonly prompt?: undefined } {
85
- return task.command !== undefined && task.prompt === undefined;
86
- }
87
-
88
- // ═══ Preflight Validation ═══
89
-
90
- function preflight(config: PipelineConfig, dag: Dag, registry: PluginRegistry): void {
91
- const errors: string[] = [];
92
-
93
- for (const [, node] of dag.nodes) {
94
- const task = node.task;
95
- const track = node.track;
96
- const driverName = task.driver ?? track.driver ?? config.driver ?? 'opencode';
97
-
98
- // Pure command tasks don't use a driver — skip driver registration check.
99
- const isCommandOnly = isCommandTaskConfig(task);
100
-
101
- if (!isCommandOnly && !registry.hasHandler('drivers', driverName)) {
102
- errors.push(`Task "${node.taskId}": driver "${driverName}" not registered`);
103
- }
104
-
105
- if (task.trigger && !registry.hasHandler('triggers', task.trigger.type)) {
106
- errors.push(`Task "${node.taskId}": trigger type "${task.trigger.type}" not registered`);
107
- }
108
-
109
- if (task.completion && !registry.hasHandler('completions', task.completion.type)) {
110
- errors.push(
111
- `Task "${node.taskId}": completion type "${task.completion.type}" not registered`,
112
- );
113
- }
114
-
115
- const mws = task.middlewares ?? track.middlewares ?? [];
116
- for (const mw of mws) {
117
- if (!registry.hasHandler('middlewares', mw.type)) {
118
- errors.push(`Task "${node.taskId}": middleware type "${mw.type}" not registered`);
119
- }
120
- }
121
-
122
- if (task.continue_from && registry.hasHandler('drivers', driverName)) {
123
- const driver = registry.getHandler<DriverPlugin>('drivers', driverName);
124
- if (!driver.capabilities.sessionResume) {
125
- // buildDag has already qualified `continue_from` and stored the result
126
- // on the node; preflight runs after buildDag, so the upstream id is
127
- // always available here without re-resolving.
128
- const upstreamId = node.resolvedContinueFrom;
129
- if (upstreamId) {
130
- const upstream = dag.nodes.get(upstreamId);
131
- if (upstream) {
132
- // A handoff is possible via session resume (already ruled out above),
133
- // OR in-memory text injection through normalizedMap
134
- // (when the upstream driver implements parseResult and returns normalizedOutput).
135
- const upstreamDriverName =
136
- upstream.task.driver ?? upstream.track.driver ?? config.driver ?? 'opencode';
137
- const upstreamDriver = registry.hasHandler('drivers', upstreamDriverName)
138
- ? registry.getHandler<DriverPlugin>('drivers', upstreamDriverName)
139
- : null;
140
- const canNormalize = typeof upstreamDriver?.parseResult === 'function';
141
-
142
- if (!canNormalize) {
143
- errors.push(
144
- `Task "${node.taskId}" uses continue_from: "${task.continue_from}", ` +
145
- `but upstream task "${upstreamId}" its driver ` +
146
- `does not implement parseResult for text-injection handoff. ` +
147
- `Use a driver with parseResult, or remove continue_from.`,
148
- );
149
- }
150
- }
151
- }
152
- }
153
- }
154
- }
155
-
156
- if (errors.length > 0) {
157
- throw new Error(`Preflight validation failed:\n - ${errors.join('\n - ')}`);
158
- }
159
- }
160
-
161
43
  // ═══ Engine ═══
162
44
 
163
45
  export interface EngineResult {
@@ -186,50 +68,6 @@ export interface EngineResult {
186
68
  // into @tagma/types directly.
187
69
  export type { RunEventPayload } from './types';
188
70
 
189
- // ═══ Helpers ═══
190
-
191
- /**
192
- * Project the engine's internal TaskState onto the wire RunTaskState
193
- * shape. `logs` / `totalLogCount` default to empty — they are populated
194
- * on the server side from streamed `task_log` events, not from state.
195
- */
196
- function toRunTaskState(
197
- taskId: string,
198
- trackId: string,
199
- taskName: string,
200
- state: TaskState,
201
- ): RunTaskState {
202
- const result = state.result;
203
- const cfg = state.config;
204
- return {
205
- taskId,
206
- trackId,
207
- taskName,
208
- status: state.status,
209
- startedAt: state.startedAt,
210
- finishedAt: state.finishedAt,
211
- durationMs: result?.durationMs ?? null,
212
- exitCode: result?.exitCode ?? null,
213
- stdout: result?.stdout ?? '',
214
- stderr: result?.stderr ?? '',
215
- stdoutPath: result?.stdoutPath ?? null,
216
- stderrPath: result?.stderrPath ?? null,
217
- stdoutBytes: result?.stdoutBytes ?? null,
218
- stderrBytes: result?.stderrBytes ?? null,
219
- sessionId: result?.sessionId ?? null,
220
- normalizedOutput: result?.normalizedOutput ?? null,
221
- resolvedDriver: cfg.driver ?? null,
222
- resolvedModel: cfg.model ?? null,
223
- resolvedPermissions: (cfg.permissions as Permissions | undefined) ?? null,
224
- // Ports not yet wired through the engine's event surface. Null placeholder
225
- // keeps the wire type honest until the ports extraction pass lands.
226
- outputs: result?.outputs ?? null,
227
- inputs: null,
228
- logs: [],
229
- totalLogCount: 0,
230
- };
231
- }
232
-
233
71
  export interface RunPipelineOptions {
234
72
  readonly approvalGateway?: ApprovalGateway;
235
73
  /**
@@ -262,11 +100,10 @@ export interface RunPipelineOptions {
262
100
  readonly skipPluginLoading?: boolean;
263
101
  /**
264
102
  * Plugin registry to resolve drivers/triggers/completions/middlewares from.
265
- * Defaults to the process-wide `defaultRegistry`. Multi-tenant hosts pass a
266
- * per-workspace registry so concurrent runs in different workspaces see
267
- * isolated handler sets.
103
+ * Callers pass a per-instance or per-workspace registry so concurrent runs
104
+ * do not share handler state.
268
105
  */
269
- readonly registry?: PluginRegistry;
106
+ readonly registry: PluginRegistry;
270
107
  }
271
108
 
272
109
  // Poll interval when no tasks are in-flight but non-terminal tasks remain
@@ -281,11 +118,16 @@ const MAX_NORMALIZED_BYTES = 1_000_000;
281
118
  export async function runPipeline(
282
119
  config: PipelineConfig,
283
120
  workDir: string,
284
- options: RunPipelineOptions = {},
121
+ options: RunPipelineOptions,
285
122
  ): Promise<EngineResult> {
286
123
  const approvalGateway = options.approvalGateway ?? new InMemoryApprovalGateway();
287
124
  const maxLogRuns = options.maxLogRuns ?? 20;
288
- const registry = options.registry ?? defaultRegistry;
125
+ const registry = options.registry;
126
+ if (!registry) {
127
+ throw new Error(
128
+ 'runPipeline requires options.registry. Use createTagma().run(...) for the public SDK API.',
129
+ );
130
+ }
289
131
 
290
132
  // Load any plugins declared in the pipeline config before preflight so that
291
133
  // drivers, completions, and middlewares referenced in YAML are registered.
@@ -339,18 +181,18 @@ export async function runPipeline(
339
181
  }
340
182
  log.quiet('');
341
183
 
342
- // Initialize states (before hook, so we can return them even if blocked)
343
- const states = new Map<string, TaskState>();
344
- for (const [id, node] of dag.nodes) {
345
- states.set(id, {
346
- config: node.task,
347
- trackConfig: node.track,
348
- status: 'idle',
349
- result: null,
350
- startedAt: null,
351
- finishedAt: null,
352
- });
353
- }
184
+ // Per-run state container. Constructed before the pipeline_start hook
185
+ // so the early-return path (blocked pipeline) can call freezeStates on
186
+ // the populated idle-state map. The constructor has no side effects —
187
+ // no listeners installed, no events emitted.
188
+ const ctx = new RunContext({
189
+ runId,
190
+ dag,
191
+ config,
192
+ workDir,
193
+ pipelineInfo,
194
+ onEvent: options.onEvent,
195
+ });
354
196
 
355
197
  // Pipeline start hook (gate). Runs BEFORE the engine emits run_start so
356
198
  // a blocked pipeline produces zero wire events (the server treats the
@@ -382,12 +224,12 @@ export async function runPipeline(
382
224
  timeout: 0,
383
225
  blocked: 0,
384
226
  },
385
- states: freezeStates(states),
227
+ states: freezeStates(ctx.states),
386
228
  };
387
229
  }
388
230
 
389
231
  // Pipeline approved — transition all tasks to waiting.
390
- for (const [, state] of states) {
232
+ for (const [, state] of ctx.states) {
391
233
  state.status = 'waiting';
392
234
  }
393
235
  // Emit run_start with a wire-shape snapshot so SSE subscribers can
@@ -396,66 +238,35 @@ export async function runPipeline(
396
238
  // the engine owns the lifecycle boundary.
397
239
  const runStartTasks: RunTaskState[] = [];
398
240
  for (const [id, node] of dag.nodes) {
399
- const s = states.get(id)!;
241
+ const s = ctx.states.get(id)!;
400
242
  runStartTasks.push(toRunTaskState(id, node.track.id, node.task.name ?? id, s));
401
243
  }
402
- emit({ type: 'run_start', runId, tasks: runStartTasks });
244
+ ctx.emit({ type: 'run_start', runId, tasks: runStartTasks });
403
245
 
404
- const sessionMap = new Map<string, string>();
405
- const normalizedMap = new Map<string, string>();
406
- // Published structured outputs keyed by fully-qualified task id.
407
- // Includes lightweight task.outputs and strict ports.outputs.
408
- const outputValuesMap = new Map<string, Readonly<Record<string, unknown>>>();
409
- // Full upstream result data for lightweight input bindings such as
410
- // `taskId.stdout` and `taskId.outputs.name`.
411
- const bindingDataMap = new Map<string, UpstreamBindingData>();
412
- // Resolved port inputs keyed by fully-qualified task id. Written once,
413
- // just before a task runs, so every subsequent task_update event can
414
- // echo them to the UI without re-resolving.
415
- const resolvedInputsMap = new Map<string, Readonly<Record<string, unknown>>>();
416
- // Reverse adjacency: for each task, list the direct-downstream task ids
417
- // (tasks whose `depends_on` includes this one after DAG qualification).
418
- // Computed once up front so Prompt-task port inference — which needs
419
- // "what Commands directly consume me?" — is O(1) instead of O(tasks)
420
- // per Prompt start. `dag.nodes` only exposes forward edges via
421
- // `dependsOn`, so we build this locally.
422
- const directDownstreams = new Map<string, string[]>();
423
- for (const [id] of dag.nodes) directDownstreams.set(id, []);
424
- for (const [id, node] of dag.nodes) {
425
- for (const upstream of node.dependsOn) {
426
- const list = directDownstreams.get(upstream);
427
- if (list) list.push(id);
428
- }
429
- }
430
-
431
- // Pipeline timeout + abort reason tracking.
432
- //
433
- // `abortReason` replaces the previous `pipelineAborted: boolean`: it
434
- // carries the concrete cause (timeout / stop_all / external) through
435
- // to run_end and the pipeline_error hook so downstream consumers can
436
- // distinguish them without scraping message strings.
246
+ // Pipeline timeout. `ctx.abortReason` carries the concrete cause
247
+ // (timeout / stop_all / external) through to run_end and the
248
+ // pipeline_error hook so downstream consumers can distinguish them
249
+ // without scraping message strings.
437
250
  const pipelineTimeoutMs = config.timeout ? parseDuration(config.timeout) : 0;
438
- let abortReason: AbortReason | null = null;
439
- const abortController = new AbortController();
440
251
  let pipelineTimer: ReturnType<typeof setTimeout> | null = null;
441
252
 
442
253
  if (pipelineTimeoutMs > 0) {
443
254
  pipelineTimer = setTimeout(() => {
444
- if (abortReason === null) abortReason = 'timeout';
445
- abortController.abort();
255
+ if (ctx.abortReason === null) ctx.abortReason = 'timeout';
256
+ ctx.abortController.abort();
446
257
  }, pipelineTimeoutMs);
447
258
  }
448
259
 
449
260
  // When the pipeline is aborted (timeout, stop_all, external), drain
450
261
  // all pending approvals so waiting triggers unblock immediately.
451
- abortController.signal.addEventListener('abort', () => {
262
+ ctx.abortController.signal.addEventListener('abort', () => {
452
263
  approvalGateway.abortAll('pipeline aborted');
453
264
  });
454
265
 
455
266
  // Wire external cancel signal into the internal abort controller.
456
267
  const externalAbortHandler = () => {
457
- if (abortReason === null) abortReason = 'external';
458
- abortController.abort();
268
+ if (ctx.abortReason === null) ctx.abortReason = 'external';
269
+ ctx.abortController.abort();
459
270
  };
460
271
  if (options.signal) {
461
272
  if (options.signal.aborted) {
@@ -470,7 +281,7 @@ export async function runPipeline(
470
281
  // updates. The server no longer needs its own gateway subscription.
471
282
  const unsubscribeApprovals = approvalGateway.subscribe((ev) => {
472
283
  if (ev.type === 'requested') {
473
- emit({
284
+ ctx.emit({
474
285
  type: 'approval_request',
475
286
  runId,
476
287
  request: {
@@ -492,7 +303,7 @@ export async function runPipeline(
492
303
  : ev.type === 'expired'
493
304
  ? 'timeout'
494
305
  : 'aborted';
495
- emit({
306
+ ctx.emit({
496
307
  type: 'approval_resolved',
497
308
  runId,
498
309
  requestId: ev.request.id,
@@ -501,901 +312,7 @@ export async function runPipeline(
501
312
  }
502
313
  });
503
314
 
504
- // ── Helpers ──
505
-
506
- function emit(event: RunEventPayload): void {
507
- options.onEvent?.(event);
508
- }
509
-
510
- function setTaskStatus(taskId: string, newStatus: TaskStatus): void {
511
- const state = states.get(taskId)!;
512
- // Terminal lock: once a task reaches a terminal state it must not be
513
- // re-transitioned. This prevents stop_all from marking running tasks as
514
- // skipped and then having their in-flight processTask promise overwrite
515
- // that with success/failed, producing an invalid double transition.
516
- if (isTerminal(state.status)) return;
517
- state.status = newStatus;
518
- const result = state.result;
519
- const cfg = state.config;
520
- emit({
521
- type: 'task_update',
522
- runId,
523
- taskId,
524
- status: newStatus,
525
- startedAt: state.startedAt ?? undefined,
526
- finishedAt: state.finishedAt ?? undefined,
527
- durationMs: result?.durationMs,
528
- exitCode: result?.exitCode,
529
- stdout: result?.stdout,
530
- stderr: result?.stderr,
531
- stdoutPath: result?.stdoutPath ?? null,
532
- stderrPath: result?.stderrPath ?? null,
533
- stdoutBytes: result?.stdoutBytes ?? null,
534
- stderrBytes: result?.stderrBytes ?? null,
535
- sessionId: result?.sessionId ?? null,
536
- normalizedOutput: result?.normalizedOutput ?? null,
537
- inputs: resolvedInputsMap.get(taskId) ?? null,
538
- outputs: outputValuesMap.get(taskId) ?? null,
539
- resolvedDriver: cfg.driver ?? null,
540
- resolvedModel: cfg.model ?? null,
541
- resolvedPermissions: (cfg.permissions as Permissions | undefined) ?? null,
542
- });
543
- }
544
-
545
- function getOnFailure(taskId: string): OnFailure {
546
- return dag.nodes.get(taskId)?.track.on_failure ?? 'skip_downstream';
547
- }
548
-
549
- function isDependencySatisfied(depId: string): 'satisfied' | 'unsatisfied' | 'skip' {
550
- const depState = states.get(depId);
551
- if (!depState) return 'skip';
552
- switch (depState.status) {
553
- case 'success':
554
- return 'satisfied';
555
- case 'skipped':
556
- return 'skip';
557
- case 'failed':
558
- case 'timeout':
559
- case 'blocked':
560
- return getOnFailure(depId) === 'ignore' ? 'satisfied' : 'skip';
561
- default:
562
- return 'unsatisfied';
563
- }
564
- }
565
-
566
- /**
567
- * H3: "stop_all" historically only stopped tasks within the same track,
568
- * which contradicted both its name and user expectations. It now stops
569
- * the **entire pipeline**:
570
- * - In-flight tasks are signalled via the shared abort controller so
571
- * drivers / runner.ts can cancel cooperatively (returning
572
- * `failureKind: 'timeout'`).
573
- * - Still-waiting tasks across every track are immediately marked
574
- * skipped so the run completes promptly.
575
- * The terminal lock in setTaskStatus prevents any later re-transition
576
- * should a completed running task try to overwrite the skipped state.
577
- */
578
- function applyStopAll(_failedTrackId: string): void {
579
- if (abortReason === null) abortReason = 'stop_all';
580
- abortController.abort();
581
- for (const [id, state] of states) {
582
- if (state.status === 'waiting') {
583
- state.finishedAt = nowISO();
584
- setTaskStatus(id, 'skipped');
585
- }
586
- }
587
- }
588
-
589
- function buildTaskInfoObj(taskId: string): TaskInfo {
590
- const state = states.get(taskId)!;
591
- return {
592
- id: taskId,
593
- name: state.config.name,
594
- type: isPromptTaskConfig(state.config) ? 'ai' : 'command',
595
- status: state.status,
596
- exit_code: state.result?.exitCode ?? null,
597
- duration_ms: state.result?.durationMs ?? null,
598
- stderr_path: state.result?.stderrPath ?? null,
599
- session_id: state.result?.sessionId ?? null,
600
- started_at: state.startedAt,
601
- finished_at: state.finishedAt,
602
- };
603
- }
604
-
605
- function trackInfoOf(taskId: string): TrackInfo {
606
- const node = dag.nodes.get(taskId)!;
607
- return { id: node.track.id, name: node.track.name };
608
- }
609
-
610
- async function fireHook(taskId: string, event: 'task_success' | 'task_failure'): Promise<void> {
611
- await executeHook(
612
- config.hooks,
613
- event,
614
- buildTaskContext(event, pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)),
615
- workDir,
616
- abortController.signal,
617
- );
618
- }
619
-
620
315
  // ── Process a single task ──
621
-
622
- async function processTask(taskId: string): Promise<void> {
623
- const state = states.get(taskId)!;
624
- const node = dag.nodes.get(taskId)!;
625
- const task = node.task;
626
- const track = node.track;
627
-
628
- log.section(`Task ${taskId}`, taskId);
629
- log.debug(
630
- `[task:${taskId}]`,
631
- `type=${isPromptTaskConfig(task) ? 'ai' : 'cmd'} track=${track.id} deps=[${node.dependsOn.join(', ') || '(root)'}]`,
632
- );
633
-
634
- // 1. Check dependencies
635
- for (const depId of node.dependsOn) {
636
- const result = isDependencySatisfied(depId);
637
- if (result === 'skip') {
638
- const depStatus = states.get(depId)?.status ?? 'unknown';
639
- log.debug(`[task:${taskId}]`, `skipped (upstream "${depId}" status=${depStatus})`);
640
- state.finishedAt = nowISO();
641
- setTaskStatus(taskId, 'skipped');
642
- return;
643
- }
644
- if (result === 'unsatisfied') return; // still waiting
645
- }
646
-
647
- // 2. Check trigger
648
- if (task.trigger) {
649
- log.debug(
650
- `[task:${taskId}]`,
651
- `trigger wait: type=${task.trigger.type} ${JSON.stringify(task.trigger)}`,
652
- );
653
- try {
654
- const triggerPlugin = registry.getHandler<TriggerPlugin>('triggers', task.trigger.type);
655
- // R6: race the plugin's watch() against the pipeline's abort signal
656
- // AND the task-level timeout. Third-party triggers may forget to
657
- // wire up ctx.signal — without the abort race, an aborted pipeline
658
- // would hang forever waiting for the plugin's watch promise to
659
- // resolve. And without the timeout race, a buggy watch() that never
660
- // settles would ignore the user's `task.timeout` (which the spawn
661
- // path at step 4 already honours) — a task could wedge the whole
662
- // pipeline until pipeline-level timeout fires (or forever, if none
663
- // is set). Honouring task.timeout here makes the two stages
664
- // symmetric. The cleanup paths in finally never run on the orphaned
665
- // plugin promise (it's allowed to leak a watcher; the pipeline is
666
- // being torn down anyway).
667
- const triggerTimeoutMs = task.timeout ? parseDuration(task.timeout) : 0;
668
- await new Promise<unknown>((resolve, reject) => {
669
- let settled = false;
670
- let timer: ReturnType<typeof setTimeout> | null = null;
671
- const onAbort = () => {
672
- if (settled) return;
673
- settled = true;
674
- if (timer !== null) clearTimeout(timer);
675
- reject(new Error('Pipeline aborted'));
676
- };
677
- if (abortController.signal.aborted) {
678
- onAbort();
679
- return;
680
- }
681
- abortController.signal.addEventListener('abort', onAbort, { once: true });
682
- if (triggerTimeoutMs > 0) {
683
- timer = setTimeout(() => {
684
- if (settled) return;
685
- settled = true;
686
- abortController.signal.removeEventListener('abort', onAbort);
687
- reject(
688
- new TriggerTimeoutError(
689
- `Trigger "${task.trigger!.type}" did not settle within ${task.timeout} (task-level timeout)`,
690
- ),
691
- );
692
- }, triggerTimeoutMs);
693
- }
694
- triggerPlugin
695
- .watch(task.trigger as Record<string, unknown>, {
696
- taskId: node.taskId,
697
- trackId: track.id,
698
- workDir: task.cwd ?? workDir,
699
- signal: abortController.signal,
700
- approvalGateway,
701
- })
702
- .then(
703
- (v) => {
704
- if (settled) return;
705
- settled = true;
706
- if (timer !== null) clearTimeout(timer);
707
- abortController.signal.removeEventListener('abort', onAbort);
708
- resolve(v);
709
- },
710
- (e) => {
711
- if (settled) return;
712
- settled = true;
713
- if (timer !== null) clearTimeout(timer);
714
- abortController.signal.removeEventListener('abort', onAbort);
715
- reject(e);
716
- },
717
- );
718
- });
719
- log.debug(`[task:${taskId}]`, `trigger fired`);
720
- } catch (err: unknown) {
721
- // If pipeline was aborted while we were still waiting for the trigger,
722
- // this task never entered running state → skipped, not timeout.
723
- state.finishedAt = nowISO();
724
- if (abortReason !== null) {
725
- setTaskStatus(taskId, 'skipped');
726
- } else if (err instanceof TriggerBlockedError) {
727
- setTaskStatus(taskId, 'blocked'); // user/policy rejection
728
- } else if (err instanceof TriggerTimeoutError) {
729
- setTaskStatus(taskId, 'timeout'); // genuine trigger wait timeout
730
- } else {
731
- // A7 fallback: also check message strings for backward-compat with
732
- // third-party trigger plugins that don't throw typed errors yet.
733
- const msg = err instanceof Error ? err.message : String(err);
734
- if (msg.includes('rejected') || msg.includes('denied')) {
735
- setTaskStatus(taskId, 'blocked');
736
- } else if (msg.includes('timeout')) {
737
- setTaskStatus(taskId, 'timeout');
738
- } else {
739
- setTaskStatus(taskId, 'failed'); // plugin error, watcher crash, etc.
740
- }
741
- }
742
- try {
743
- await fireHook(taskId, 'task_failure');
744
- } catch (hookErr) {
745
- log.error(
746
- `[task:${taskId}]`,
747
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
748
- );
749
- }
750
- return;
751
- }
752
- }
753
-
754
- // 3. task_start hook (gate)
755
- const hookResult = await executeHook(
756
- config.hooks,
757
- 'task_start',
758
- buildTaskContext('task_start', pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)),
759
- workDir,
760
- abortController.signal,
761
- );
762
- if (hookResult.exitCode !== 0 || config.hooks?.task_start) {
763
- log.debug(
764
- `[task:${taskId}]`,
765
- `task_start hook exit=${hookResult.exitCode} allowed=${hookResult.allowed}`,
766
- );
767
- }
768
- if (!hookResult.allowed) {
769
- state.finishedAt = nowISO();
770
- setTaskStatus(taskId, 'blocked');
771
- try {
772
- await fireHook(taskId, 'task_failure');
773
- } catch (hookErr) {
774
- log.error(
775
- `[task:${taskId}]`,
776
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
777
- );
778
- }
779
- return;
780
- }
781
-
782
- // 3.5. Resolve port inputs from upstream outputs. This is the last
783
- // gate before execution: missing-required inputs block the task
784
- // without ever spawning a process, so the caller sees a clear
785
- // "blocked: missing input X" rather than a cryptic runtime error
786
- // from a command that expanded a placeholder to the empty string.
787
- // Resolution runs even for tasks that declare no ports — the call
788
- // is cheap and returns `{kind: 'ready', inputs: {}}` in that case,
789
- // which downstream code handles uniformly.
790
- //
791
- // Prompt Tasks have no declared ports — their I/O contract is
792
- // inferred from direct-neighbor Command Tasks (see ports.ts:
793
- // `inferPromptPorts`). We synthesize a `TaskPorts` object and
794
- // feed it into the same resolve/substitute/render/extract
795
- // pipeline the Command path uses. Collisions that a Prompt can't
796
- // disambiguate (same input name on two upstreams, incompatible
797
- // downstream output types) block the task with a clear message.
798
- const isPromptTask = isPromptTaskConfig(task);
799
- let effectivePorts: TaskPorts | undefined = task.ports;
800
- let promptInferenceBlockReason: string | null = null;
801
-
802
- if (isPromptTask) {
803
- const inference = inferPromptPorts({
804
- upstreams: node.dependsOn.map((upstreamId) => {
805
- const upstream = dag.nodes.get(upstreamId);
806
- const isUpstreamCommand = upstream ? isCommandTaskConfig(upstream.task) : false;
807
- return {
808
- taskId: upstreamId,
809
- outputs: isUpstreamCommand ? upstream?.task.ports?.outputs : undefined,
810
- };
811
- }),
812
- downstreams: (directDownstreams.get(taskId) ?? []).map((downstreamId) => {
813
- const downstream = dag.nodes.get(downstreamId);
814
- const isDownstreamCommand = downstream ? isCommandTaskConfig(downstream.task) : false;
815
- return {
816
- taskId: downstreamId,
817
- inputs: isDownstreamCommand ? downstream?.task.ports?.inputs : undefined,
818
- };
819
- }),
820
- });
821
- effectivePorts = inference.ports;
822
- if (inference.inputConflicts.length > 0 || inference.outputConflicts.length > 0) {
823
- const lines: string[] = [];
824
- for (const c of inference.inputConflicts) lines.push(c.reason);
825
- for (const c of inference.outputConflicts) lines.push(c.reason);
826
- promptInferenceBlockReason = lines.join('\n');
827
- }
828
- }
829
-
830
- if (promptInferenceBlockReason !== null) {
831
- log.error(
832
- `[task:${taskId}]`,
833
- `blocked — prompt port inference failed:\n${promptInferenceBlockReason}`,
834
- );
835
- state.result = {
836
- exitCode: -1,
837
- stdout: '',
838
- stderr: `[engine] prompt port inference failed:\n${promptInferenceBlockReason}`,
839
- stdoutPath: null,
840
- stderrPath: null,
841
- durationMs: 0,
842
- sessionId: null,
843
- normalizedOutput: null,
844
- failureKind: 'spawn_error',
845
- outputs: null,
846
- };
847
- state.finishedAt = nowISO();
848
- setTaskStatus(taskId, 'blocked');
849
- try {
850
- await fireHook(taskId, 'task_failure');
851
- } catch (hookErr) {
852
- log.error(
853
- `[task:${taskId}]`,
854
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
855
- );
856
- }
857
- if (getOnFailure(taskId) === 'stop_all') applyStopAll(node.track.id);
858
- return;
859
- }
860
-
861
- const bindingResolution = resolveTaskBindingInputs(task, bindingDataMap, node.dependsOn);
862
- if (bindingResolution.kind === 'blocked') {
863
- log.error(
864
- `[task:${taskId}]`,
865
- `blocked — cannot resolve task input bindings:\n${bindingResolution.reason}`,
866
- );
867
- state.result = {
868
- exitCode: -1,
869
- stdout: '',
870
- stderr: `[engine] task input binding resolution failed:\n${bindingResolution.reason}`,
871
- stdoutPath: null,
872
- stderrPath: null,
873
- durationMs: 0,
874
- sessionId: null,
875
- normalizedOutput: null,
876
- failureKind: 'spawn_error',
877
- outputs: null,
878
- };
879
- state.finishedAt = nowISO();
880
- setTaskStatus(taskId, 'blocked');
881
- try {
882
- await fireHook(taskId, 'task_failure');
883
- } catch (hookErr) {
884
- log.error(
885
- `[task:${taskId}]`,
886
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
887
- );
888
- }
889
- if (getOnFailure(taskId) === 'stop_all') applyStopAll(node.track.id);
890
- return;
891
- }
892
- if (bindingResolution.missingOptional.length > 0) {
893
- log.debug(
894
- `[task:${taskId}]`,
895
- `optional input bindings unresolved (empty in placeholders): ${bindingResolution.missingOptional.join(', ')}`,
896
- );
897
- }
898
-
899
- // Feed effective ports into `resolveTaskInputs` by shallow-cloning
900
- // the task. Prompt tasks get the inferred ports; Command tasks are
901
- // unchanged (effectivePorts === task.ports).
902
- const taskForResolve: TaskConfig =
903
- effectivePorts === task.ports ? task : { ...task, ports: effectivePorts };
904
- const inputResolution = resolveTaskInputs(taskForResolve, outputValuesMap, node.dependsOn);
905
- if (inputResolution.kind === 'blocked') {
906
- log.error(
907
- `[task:${taskId}]`,
908
- `blocked — cannot resolve port inputs:\n${inputResolution.reason}`,
909
- );
910
- state.result = {
911
- exitCode: -1,
912
- stdout: '',
913
- stderr: `[engine] port input resolution failed:\n${inputResolution.reason}`,
914
- stdoutPath: null,
915
- stderrPath: null,
916
- durationMs: 0,
917
- sessionId: null,
918
- normalizedOutput: null,
919
- failureKind: 'spawn_error',
920
- outputs: null,
921
- };
922
- state.finishedAt = nowISO();
923
- setTaskStatus(taskId, 'blocked');
924
- try {
925
- await fireHook(taskId, 'task_failure');
926
- } catch (hookErr) {
927
- log.error(
928
- `[task:${taskId}]`,
929
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
930
- );
931
- }
932
- if (getOnFailure(taskId) === 'stop_all') applyStopAll(node.track.id);
933
- return;
934
- }
935
- const resolvedInputs = { ...bindingResolution.inputs, ...inputResolution.inputs };
936
- resolvedInputsMap.set(taskId, resolvedInputs);
937
- if (inputResolution.missingOptional.length > 0) {
938
- log.debug(
939
- `[task:${taskId}]`,
940
- `optional inputs unresolved (empty in placeholders): ${inputResolution.missingOptional.join(', ')}`,
941
- );
942
- }
943
- if (effectivePorts?.inputs && effectivePorts.inputs.length > 0) {
944
- log.debug(
945
- `[task:${taskId}]`,
946
- `resolved inputs: ${JSON.stringify(resolvedInputs)}` +
947
- (isPromptTask ? ' (inferred from upstream Commands)' : ''),
948
- );
949
- }
950
-
951
- // 4. Mark running — set startedAt before emitting so subscribers see a
952
- // complete task_update (startedAt non-null) on the status transition.
953
- state.startedAt = nowISO();
954
- setTaskStatus(taskId, 'running');
955
- log.info(
956
- `[task:${taskId}]`,
957
- isCommandTaskConfig(task) ? `running: ${task.command}` : `running (driver task)`,
958
- );
959
-
960
- // File-only: resolved config for this task
961
- const resolvedDriver = task.driver ?? track.driver ?? config.driver ?? 'opencode';
962
- const resolvedModel = task.model ?? track.model ?? config.model ?? '(default)';
963
- const resolvedPerms = task.permissions ?? track.permissions ?? '(default)';
964
- const resolvedCwd = task.cwd ?? track.cwd ?? workDir;
965
- log.debug(
966
- `[task:${taskId}]`,
967
- `resolved: driver=${resolvedDriver} model=${resolvedModel} cwd=${resolvedCwd}`,
968
- );
969
- log.debug(`[task:${taskId}]`, `permissions: ${JSON.stringify(resolvedPerms)}`);
970
- if (task.continue_from) {
971
- log.debug(`[task:${taskId}]`, `continue_from: "${task.continue_from}"`);
972
- }
973
- if (task.timeout) {
974
- log.debug(`[task:${taskId}]`, `timeout: ${task.timeout}`);
975
- }
976
-
977
- try {
978
- let result: TaskResult;
979
- const timeoutMs = task.timeout ? parseDuration(task.timeout) : undefined;
980
-
981
- // Stream child stdout/stderr directly to disk in the logger's run dir
982
- // and keep only a bounded tail in the returned TaskResult. Filenames
983
- // mirror the existing `.stderr` naming — dots in task ids are replaced
984
- // so hierarchical ids (e.g. `track1.task2`) map cleanly to a flat dir.
985
- const fsSafeTaskId = taskId.replace(/\./g, '_');
986
- const stdoutPath = resolve(log.dir, `${fsSafeTaskId}.stdout`);
987
- const stderrPath = resolve(log.dir, `${fsSafeTaskId}.stderr`);
988
- const runOpts = {
989
- timeoutMs,
990
- signal: abortController.signal,
991
- stdoutPath,
992
- stderrPath,
993
- };
994
-
995
- if (isCommandTaskConfig(task)) {
996
- // Substitute `{{inputs.X}}` placeholders into the command
997
- // string. Tasks with no declared inputs always produce the same
998
- // string back (no placeholders to match). Unresolved references
999
- // render empty — validate-raw flags undeclared references as
1000
- // errors, so the only way to land here with an unresolved is an
1001
- // optional input that had no upstream producer and no default,
1002
- // which we surface in the log.
1003
- const { text: expandedCommand, unresolved } = substituteInputs(
1004
- task.command,
1005
- resolvedInputs,
1006
- );
1007
- if (unresolved.length > 0) {
1008
- log.debug(
1009
- `[task:${taskId}]`,
1010
- `command placeholders rendered empty: ${unresolved.join(', ')}`,
1011
- );
1012
- }
1013
- log.debug(`[task:${taskId}]`, `command: ${expandedCommand}`);
1014
- result = await runCommand(expandedCommand, task.cwd ?? workDir, runOpts);
1015
- } else {
1016
- // AI task: apply middleware chain against a structured PromptDocument.
1017
- const driverName = task.driver ?? track.driver ?? config.driver ?? 'opencode';
1018
- const driver = registry.getHandler<DriverPlugin>('drivers', driverName);
1019
-
1020
- // Substitute placeholders in the user-authored prompt before
1021
- // wrapping into a PromptDocument so middlewares see the
1022
- // already-resolved task text.
1023
- const { text: expandedPrompt, unresolved } = substituteInputs(
1024
- task.prompt!,
1025
- resolvedInputs,
1026
- );
1027
- if (unresolved.length > 0) {
1028
- log.debug(
1029
- `[task:${taskId}]`,
1030
- `prompt placeholders rendered empty: ${unresolved.join(', ')}`,
1031
- );
1032
- }
1033
- const originalLen = expandedPrompt.length;
1034
- let doc: PromptDocument = promptDocumentFromString(expandedPrompt);
1035
- // Prepend port-related context blocks so the model sees them
1036
- // before any middleware-added retrieval / memory blocks. Order
1037
- // matters: [Output Format] first (sets the deliverable), then
1038
- // [Inputs] (the concrete data to operate on). Empty blocks are
1039
- // filtered out — tasks without ports get no extra blocks at all.
1040
- const outputFormatBlock = renderOutputSchemaBlock(effectivePorts?.outputs);
1041
- if (outputFormatBlock) {
1042
- doc = prependContext(doc, outputFormatBlock);
1043
- }
1044
- const inputsBlock = renderInputsBlock(effectivePorts?.inputs, resolvedInputs);
1045
- if (inputsBlock) {
1046
- doc = prependContext(doc, inputsBlock);
1047
- }
1048
- const mws = task.middlewares !== undefined ? task.middlewares : track.middlewares;
1049
- if (mws && mws.length > 0) {
1050
- log.debug(
1051
- `[task:${taskId}]`,
1052
- `middleware chain: ${mws.map((m) => m.type).join(' → ')}`,
1053
- );
1054
- const mwCtx: MiddlewareContext = {
1055
- task,
1056
- track,
1057
- workDir: task.cwd ?? workDir,
1058
- };
1059
- for (const mwConfig of mws) {
1060
- const mwPlugin = registry.getHandler<MiddlewarePlugin>('middlewares', mwConfig.type);
1061
- const beforeBlocks = doc.contexts.length;
1062
- const beforeLen = serializePromptDocument(doc).length;
1063
-
1064
- // Prefer the structured API. Fall back to the legacy
1065
- // `enhance(string) → string` path so v0.x plugins keep
1066
- // working — that fallback loses context structure (the
1067
- // middleware's output becomes the new task body) but never
1068
- // silently drops content.
1069
- if (typeof mwPlugin.enhanceDoc === 'function') {
1070
- const next = await mwPlugin.enhanceDoc(
1071
- doc,
1072
- mwConfig as Record<string, unknown>,
1073
- mwCtx,
1074
- );
1075
- if (
1076
- !next ||
1077
- typeof next !== 'object' ||
1078
- !Array.isArray((next as PromptDocument).contexts) ||
1079
- typeof (next as PromptDocument).task !== 'string'
1080
- ) {
1081
- throw new Error(
1082
- `middleware "${mwConfig.type}".enhanceDoc() returned a malformed PromptDocument`,
1083
- );
1084
- }
1085
- doc = next as PromptDocument;
1086
- } else if (typeof mwPlugin.enhance === 'function') {
1087
- const asString = serializePromptDocument(doc);
1088
- const next = await mwPlugin.enhance(
1089
- asString,
1090
- mwConfig as Record<string, unknown>,
1091
- mwCtx,
1092
- );
1093
- // R3: a middleware that returns undefined / null / a non-string
1094
- // would silently corrupt the prompt. Fail loud.
1095
- if (typeof next !== 'string') {
1096
- throw new Error(
1097
- `middleware "${mwConfig.type}".enhance() returned ${next === null ? 'null' : typeof next}, expected string`,
1098
- );
1099
- }
1100
- // Legacy fallback: collapse the returned string into a
1101
- // fresh doc. Earlier structure is folded into the string
1102
- // (serializePromptDocument just ran), so bytes the driver
1103
- // sees match the old string pipeline.
1104
- doc = { contexts: [], task: next };
1105
- } else {
1106
- throw new Error(
1107
- `middleware "${mwConfig.type}" provides neither enhanceDoc nor enhance`,
1108
- );
1109
- }
1110
- const afterLen = serializePromptDocument(doc).length;
1111
- const addedBlocks = doc.contexts.length - beforeBlocks;
1112
- log.debug(
1113
- `[task:${taskId}]`,
1114
- ` ${mwConfig.type}: ${beforeLen} → ${afterLen} chars` +
1115
- (addedBlocks > 0
1116
- ? ` (+${addedBlocks} context block${addedBlocks > 1 ? 's' : ''})`
1117
- : ''),
1118
- );
1119
- }
1120
- }
1121
- const prompt = serializePromptDocument(doc);
1122
- log.debug(
1123
- `[task:${taskId}]`,
1124
- `prompt: ${originalLen} chars (final: ${prompt.length} chars, ${doc.contexts.length} block${doc.contexts.length === 1 ? '' : 's'})`,
1125
- );
1126
- log.quiet(`--- prompt (final) ---\n${clip(prompt)}\n--- end prompt ---`, taskId);
1127
-
1128
- // H1: hand the driver a continue_from that has already been
1129
- // qualified by dag.ts. Without this, drivers like codex/opencode/
1130
- // claude-code look up maps directly with
1131
- // the user's raw (possibly bare) string, which races whenever two
1132
- // tracks share a task name. dag.ts has the only authoritative
1133
- // resolver, so we use its precomputed answer here.
1134
- // Drivers key sessionMap/normalizedMap by fully-qualified id. buildDag
1135
- // guarantees `resolvedContinueFrom` is set for every task that has a
1136
- // `continue_from`, so if we see the bare form here something upstream
1137
- // is broken — fail loud instead of silently miskeying the lookup.
1138
- if (task.continue_from && !node.resolvedContinueFrom) {
1139
- throw new Error(
1140
- `Internal: task "${taskId}" has continue_from "${task.continue_from}" ` +
1141
- `but no resolvedContinueFrom. buildDag should have qualified it.`,
1142
- );
1143
- }
1144
- const enrichedTask: TaskConfig = {
1145
- ...task,
1146
- prompt,
1147
- continue_from: node.resolvedContinueFrom,
1148
- // Hand the driver the EFFECTIVE port schema rather than the
1149
- // raw task.ports. For Prompt tasks this is the one inferred
1150
- // from neighbor Commands; Command tasks are unchanged.
1151
- // Drivers that introspect ports (e.g. to annotate a system
1152
- // prompt with the I/O contract) otherwise saw `undefined`
1153
- // for every prompt and had no way to know the contract.
1154
- ports: effectivePorts,
1155
- };
1156
- const driverCtx: DriverContext = {
1157
- sessionMap,
1158
- normalizedMap,
1159
- workDir: task.cwd ?? workDir,
1160
- // Structured view for drivers that want fine-grained control
1161
- // over serialization (e.g. inserting [Previous Output] between
1162
- // contexts and task). Drivers that read task.prompt see the
1163
- // default serialization and need no changes.
1164
- promptDoc: doc,
1165
- // Ports feature: resolved input values keyed by port name,
1166
- // already coerced to the declared port type. Drivers that
1167
- // need to re-substitute placeholders inside a custom envelope
1168
- // can read this and call `substituteInputs`; most drivers can
1169
- // ignore it because the engine has already expanded
1170
- // `{{inputs.X}}` into `task.prompt` upstream.
1171
- inputs: resolvedInputs,
1172
- };
1173
- const spec = await driver.buildCommand(enrichedTask, track, driverCtx);
1174
- log.debug(`[task:${taskId}]`, `driver=${driverName}`);
1175
- log.debug(`[task:${taskId}]`, `spawn args: ${JSON.stringify(spec.args)}`);
1176
- if (spec.cwd) log.debug(`[task:${taskId}]`, `spawn cwd: ${spec.cwd}`);
1177
- if (spec.env)
1178
- log.debug(
1179
- `[task:${taskId}]`,
1180
- `spawn env overrides: ${Object.keys(spec.env).join(', ')}`,
1181
- );
1182
- if (spec.stdin) log.debug(`[task:${taskId}]`, `spawn stdin: ${spec.stdin.length} chars`);
1183
- result = await runSpawn(spec, driver, runOpts);
1184
- }
1185
-
1186
- // 6. Determine terminal status (without emitting yet — result must be complete first)
1187
- // H2: branch on failureKind so spawn errors no longer masquerade as
1188
- // timeouts. Old runners that don't set failureKind still work — we
1189
- // fall back to the historical `exitCode === -1 → timeout` heuristic so
1190
- // pre-existing third-party drivers don't regress.
1191
- let terminalStatus: TaskStatus;
1192
- const kind = result.failureKind;
1193
- if (kind === 'timeout') {
1194
- terminalStatus = 'timeout';
1195
- } else if (kind === 'spawn_error') {
1196
- terminalStatus = 'failed';
1197
- } else if (kind === undefined && result.exitCode === -1) {
1198
- // Legacy path: pre-H2 driver returned -1 with no kind. Treat as
1199
- // timeout for backward compatibility (the previous behaviour).
1200
- terminalStatus = 'timeout';
1201
- } else if (result.exitCode !== 0) {
1202
- terminalStatus = 'failed';
1203
- } else if (task.completion) {
1204
- const plugin = registry.getHandler<CompletionPlugin>('completions', task.completion.type);
1205
- const completionCtx = { workDir: task.cwd ?? workDir, signal: abortController.signal };
1206
- const passed = await plugin.check(
1207
- task.completion as Record<string, unknown>,
1208
- result,
1209
- completionCtx,
1210
- );
1211
- // R4: strict boolean check. Truthy strings/numbers used to be coerced
1212
- // to success — a check returning "ok" would let a failing task pass.
1213
- if (typeof passed !== 'boolean') {
1214
- throw new Error(
1215
- `completion "${task.completion.type}".check() returned ${passed === null ? 'null' : typeof passed}, expected boolean`,
1216
- );
1217
- }
1218
- terminalStatus = passed ? 'success' : 'failed';
1219
- } else {
1220
- terminalStatus = 'success';
1221
- }
1222
-
1223
- // Extract declared outputs from the task's output stream. Only
1224
- // meaningful on success — a failed task's output is whatever the
1225
- // child happened to emit before exiting, and downstream tasks
1226
- // shouldn't receive partial data.
1227
- let extractedOutputs: Readonly<Record<string, unknown>> | null = null;
1228
- if (terminalStatus === 'success') {
1229
- const looseExtraction = extractTaskBindingOutputs(
1230
- task.outputs,
1231
- result.stdout,
1232
- result.stderr,
1233
- result.normalizedOutput,
1234
- );
1235
- if (task.outputs && Object.keys(task.outputs).length > 0) {
1236
- extractedOutputs = looseExtraction.outputs;
1237
- log.debug(
1238
- `[task:${taskId}]`,
1239
- `extracted binding outputs: ${JSON.stringify(looseExtraction.outputs)}`,
1240
- );
1241
- if (looseExtraction.diagnostic) {
1242
- log.debug(`[task:${taskId}]`, looseExtraction.diagnostic);
1243
- }
1244
- }
1245
-
1246
- // Prompt tasks use inferred ports (from direct-downstream Command
1247
- // inputs); Command tasks use their declared ports. Either way,
1248
- // `extractTaskOutputs` is a no-op when there are no declared
1249
- // outputs to pull, so pre-ports tasks pay nothing for this call.
1250
- const extraction = extractTaskOutputs(
1251
- effectivePorts,
1252
- result.stdout,
1253
- result.normalizedOutput,
1254
- );
1255
- if (effectivePorts?.outputs && effectivePorts.outputs.length > 0) {
1256
- extractedOutputs = { ...(extractedOutputs ?? {}), ...extraction.outputs };
1257
- log.debug(
1258
- `[task:${taskId}]`,
1259
- `extracted outputs: ${JSON.stringify(extraction.outputs)}` +
1260
- (isPromptTask ? ' (inferred from downstream Commands)' : ''),
1261
- );
1262
- if (extraction.diagnostic) {
1263
- log.error(`[task:${taskId}]`, extraction.diagnostic);
1264
- const note = `\n[engine] ${extraction.diagnostic}`;
1265
- result = { ...result, stderr: result.stderr + note };
1266
- }
1267
- }
1268
- }
1269
- // Attach outputs to the result (null when task has no declared
1270
- // outputs or extraction failed entirely). Consumers of TaskResult
1271
- // — hooks, wire events, test assertions — all go through this
1272
- // one field rather than re-running extraction.
1273
- result = { ...result, outputs: extractedOutputs };
1274
- if (extractedOutputs !== null) {
1275
- outputValuesMap.set(taskId, extractedOutputs);
1276
- }
1277
- bindingDataMap.set(taskId, {
1278
- outputs: extractedOutputs,
1279
- stdout: result.stdout,
1280
- stderr: result.stderr,
1281
- normalizedOutput: result.normalizedOutput,
1282
- exitCode: result.exitCode,
1283
- });
1284
-
1285
- // Store normalized text separately (in-memory) for continue_from handoff.
1286
- // R15: clip oversized values so a runaway parseResult can't accumulate
1287
- // hundreds of MB across tasks.
1288
- if (result.normalizedOutput !== null) {
1289
- const clipped =
1290
- result.normalizedOutput.length > MAX_NORMALIZED_BYTES
1291
- ? result.normalizedOutput.slice(0, MAX_NORMALIZED_BYTES) +
1292
- `\n[…clipped at ${MAX_NORMALIZED_BYTES} bytes]`
1293
- : result.normalizedOutput;
1294
- normalizedMap.set(taskId, clipped);
1295
- }
1296
-
1297
- // Note: stderr is already persisted by runner.ts as it streams; the
1298
- // old "write full string after the fact" block is gone — that's what
1299
- // the streaming rewrite fixed (unbounded in-memory buffering).
1300
-
1301
- if (result.sessionId) {
1302
- // H1: qualified-only key.
1303
- sessionMap.set(taskId, result.sessionId);
1304
- }
1305
-
1306
- // Set result and finishedAt before emitting terminal status so listeners see complete state
1307
- state.result = result;
1308
- state.finishedAt = nowISO();
1309
- setTaskStatus(taskId, terminalStatus);
1310
-
1311
- // Log task outcome with relevant details
1312
- const durSec = (result.durationMs / 1000).toFixed(1);
1313
- if (terminalStatus === 'success') {
1314
- log.info(`[task:${taskId}]`, `success (${durSec}s)`);
1315
- } else {
1316
- log.error(
1317
- `[task:${taskId}]`,
1318
- `${terminalStatus} exit=${result.exitCode} duration=${durSec}s`,
1319
- );
1320
- if (result.stderr) {
1321
- const tail = tailLines(result.stderr, 10);
1322
- log.error(`[task:${taskId}]`, `stderr tail:\n${tail}`);
1323
- }
1324
- }
1325
-
1326
- // File-only: byte counts (prefer full totals from the runner over the
1327
- // bounded tail length so oversized outputs show their real size) +
1328
- // paths to the on-disk full copies.
1329
- const stdoutSize = result.stdoutBytes ?? result.stdout.length;
1330
- const stderrSize = result.stderrBytes ?? result.stderr.length;
1331
- log.debug(`[task:${taskId}]`, `stdout: ${stdoutSize} bytes, stderr: ${stderrSize} bytes`);
1332
- if (result.sessionId) {
1333
- log.debug(`[task:${taskId}]`, `sessionId: ${result.sessionId}`);
1334
- }
1335
- if (result.stdoutPath) {
1336
- log.debug(`[task:${taskId}]`, `wrote stdout: ${result.stdoutPath}`);
1337
- }
1338
- if (result.stderrPath) {
1339
- log.debug(`[task:${taskId}]`, `wrote stderr: ${result.stderrPath}`);
1340
- }
1341
- if (result.stdout) {
1342
- log.quiet(
1343
- `--- stdout (${taskId}) ---\n${clip(result.stdout)}\n--- end stdout ---`,
1344
- taskId,
1345
- );
1346
- }
1347
- if (result.stderr) {
1348
- log.quiet(
1349
- `--- stderr (${taskId}) ---\n${clip(result.stderr)}\n--- end stderr ---`,
1350
- taskId,
1351
- );
1352
- }
1353
- if (task.completion) {
1354
- log.debug(
1355
- `[task:${taskId}]`,
1356
- `completion check: type=${task.completion.type} result=${terminalStatus}`,
1357
- );
1358
- }
1359
- } catch (err: unknown) {
1360
- const errMsg = err instanceof Error ? (err.stack ?? err.message) : String(err);
1361
- log.error(`[task:${taskId}]`, `failed before execution: ${errMsg}`);
1362
- state.result = {
1363
- exitCode: -1,
1364
- stdout: '',
1365
- stderr: errMsg,
1366
- stdoutPath: null,
1367
- stderrPath: null,
1368
- stdoutBytes: 0,
1369
- stderrBytes: errMsg.length,
1370
- durationMs: 0,
1371
- sessionId: null,
1372
- normalizedOutput: null,
1373
- // H2: Engine-level pre-execution errors (driver throw, middleware
1374
- // throw, getHandler 404) classify as spawn_error — the process never
1375
- // ran, so calling them "timeout" was actively misleading.
1376
- failureKind: 'spawn_error',
1377
- };
1378
- state.finishedAt = nowISO();
1379
- setTaskStatus(taskId, 'failed');
1380
- }
1381
-
1382
- // 7. Fire hooks
1383
- const finalStatus: TaskStatus = state.status;
1384
- try {
1385
- await fireHook(taskId, finalStatus === 'success' ? 'task_success' : 'task_failure');
1386
- } catch (hookErr) {
1387
- log.error(
1388
- `[task:${taskId}]`,
1389
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
1390
- );
1391
- }
1392
-
1393
- // 8. Handle stop_all for failure states
1394
- if (finalStatus !== 'success' && getOnFailure(taskId) === 'stop_all') {
1395
- applyStopAll(node.track.id);
1396
- }
1397
- }
1398
-
1399
316
  // ── Event loop ──
1400
317
  // Each task is launched as soon as ALL its deps reach a terminal state.
1401
318
  // We track in-flight tasks in `running` so a task completing mid-batch
@@ -1403,21 +320,21 @@ export async function runPipeline(
1403
320
  const running = new Map<string, Promise<void>>();
1404
321
 
1405
322
  try {
1406
- while (abortReason === null) {
323
+ while (ctx.abortReason === null) {
1407
324
  // Launch every task whose deps are all terminal and that isn't already in-flight
1408
- for (const [id, state] of states) {
1409
- if (state.status !== 'waiting' || running.has(id)) continue;
1410
- const node = dag.nodes.get(id)!;
1411
- const allDepsTerminal =
1412
- node.dependsOn.length === 0 ||
1413
- node.dependsOn.every((d) => isTerminal(states.get(d)!.status));
1414
- if (!allDepsTerminal) continue;
1415
- const p = processTask(id).finally(() => running.delete(id));
325
+ for (const id of findLaunchableTasks(ctx, new Set(running.keys()))) {
326
+ const p = executeTask({
327
+ taskId: id,
328
+ ctx,
329
+ registry,
330
+ log,
331
+ approvalGateway,
332
+ }).finally(() => running.delete(id));
1416
333
  running.set(id, p);
1417
334
  }
1418
335
 
1419
336
  // All tasks terminal — done
1420
- if ([...states.values()].every((s) => isTerminal(s.status))) break;
337
+ if (allTasksTerminal(ctx)) break;
1421
338
 
1422
339
  if (running.size === 0) {
1423
340
  // Nothing in-flight but non-terminal tasks exist (e.g. trigger-wait states
@@ -1429,18 +346,13 @@ export async function runPipeline(
1429
346
  }
1430
347
  }
1431
348
 
1432
- if (abortReason !== null) {
349
+ if (ctx.abortReason !== null) {
1433
350
  // Wait for in-flight tasks to honour the abort signal before marking states.
1434
351
  if (running.size > 0) await Promise.allSettled(running.values());
1435
- for (const [id, state] of states) {
1436
- if (!isTerminal(state.status)) {
1437
- // By the time allSettled resolves, processTask's try/finally has already
1438
- // set running tasks to success/failed/timeout. The only non-terminal
1439
- // statuses remaining here are waiting/idle tasks that were never started.
1440
- state.finishedAt = nowISO();
1441
- setTaskStatus(id, 'skipped');
1442
- }
1443
- }
352
+ // By the time allSettled resolves, processTask's try/finally has already
353
+ // set running tasks to success/failed/timeout. The only non-terminal
354
+ // statuses remaining here are waiting/idle tasks that were never started.
355
+ skipNonTerminalTasks(ctx);
1444
356
  }
1445
357
  } finally {
1446
358
  if (pipelineTimer) clearTimeout(pipelineTimer);
@@ -1459,42 +371,22 @@ export async function runPipeline(
1459
371
  }
1460
372
 
1461
373
  // ── Summary ──
1462
- const summary = { total: 0, success: 0, failed: 0, skipped: 0, timeout: 0, blocked: 0 };
1463
- for (const [, state] of states) {
1464
- summary.total++;
1465
- switch (state.status) {
1466
- case 'success':
1467
- summary.success++;
1468
- break;
1469
- case 'failed':
1470
- summary.failed++;
1471
- break;
1472
- case 'skipped':
1473
- summary.skipped++;
1474
- break;
1475
- case 'timeout':
1476
- summary.timeout++;
1477
- break;
1478
- case 'blocked':
1479
- summary.blocked++;
1480
- break;
1481
- }
1482
- }
374
+ const summary = summarizeStates(ctx.states);
1483
375
 
1484
376
  const finishedAt = nowISO();
1485
377
  const durationMs = new Date(finishedAt).getTime() - new Date(startedAt).getTime();
1486
378
 
1487
- if (abortReason !== null) {
379
+ if (ctx.abortReason !== null) {
1488
380
  const reasonText =
1489
- abortReason === 'timeout'
381
+ ctx.abortReason === 'timeout'
1490
382
  ? 'Pipeline timeout exceeded'
1491
- : abortReason === 'stop_all'
383
+ : ctx.abortReason === 'stop_all'
1492
384
  ? 'Pipeline stopped (on_failure: stop_all)'
1493
385
  : 'Pipeline aborted by host';
1494
386
  await executeHook(
1495
387
  config.hooks,
1496
388
  'pipeline_error',
1497
- buildPipelineErrorContext(pipelineInfo, reasonText, undefined, abortReason),
389
+ buildPipelineErrorContext(pipelineInfo, reasonText, undefined, ctx.abortReason),
1498
390
  workDir,
1499
391
  );
1500
392
  } else {
@@ -1510,14 +402,14 @@ export async function runPipeline(
1510
402
  }
1511
403
 
1512
404
  const allSuccess =
1513
- abortReason === null &&
405
+ ctx.abortReason === null &&
1514
406
  summary.failed === 0 &&
1515
407
  summary.timeout === 0 &&
1516
408
  summary.blocked === 0;
1517
409
 
1518
410
  log.section('Pipeline summary');
1519
411
  log.quiet(
1520
- `status: ${abortReason !== null ? `aborted (${abortReason})` : 'completed'}`,
412
+ `status: ${ctx.abortReason !== null ? `aborted (${ctx.abortReason})` : 'completed'}`,
1521
413
  );
1522
414
  log.quiet(`duration: ${(durationMs / 1000).toFixed(1)}s`);
1523
415
  log.quiet(
@@ -1527,7 +419,7 @@ export async function runPipeline(
1527
419
  );
1528
420
  log.quiet('');
1529
421
  log.quiet('per-task:');
1530
- for (const [id, state] of states) {
422
+ for (const [id, state] of ctx.states) {
1531
423
  const dur =
1532
424
  state.result?.durationMs != null ? `${(state.result.durationMs / 1000).toFixed(1)}s` : '-';
1533
425
  const exit = state.result?.exitCode ?? '-';
@@ -1542,8 +434,8 @@ export async function runPipeline(
1542
434
  log.info('[pipeline]', `Duration: ${(durationMs / 1000).toFixed(1)}s`);
1543
435
  log.info('[pipeline]', `Log: ${log.path}`);
1544
436
 
1545
- emit({ type: 'run_end', runId, success: allSuccess, abortReason });
1546
- return { success: allSuccess, runId, logPath: log.path, summary, states: freezeStates(states) };
437
+ ctx.emit({ type: 'run_end', runId, success: allSuccess, abortReason: ctx.abortReason });
438
+ return { success: allSuccess, runId, logPath: log.path, summary, states: freezeStates(ctx.states) };
1547
439
  } finally {
1548
440
  // Close the persistent log file handle before pruning.
1549
441
  log.close();
@@ -1555,64 +447,4 @@ export async function runPipeline(
1555
447
  }
1556
448
  }
1557
449
 
1558
- /**
1559
- * Delete the oldest subdirectories under `logsDir`, keeping only the most recent `keep`
1560
- * total runs (including the currently-live run identified by `excludeRunId`).
1561
- * Directories are sorted lexicographically; because runIds are prefixed with a base-36
1562
- * timestamp, lexicographic order equals chronological order.
1563
- *
1564
- * `excludeRunId` is always skipped from deletion even if it would otherwise be pruned —
1565
- * this prevents a concurrent run from removing a live log directory that is still in use.
1566
- *
1567
- * D10: The live run occupies one slot out of `keep`, so the maximum number of
1568
- * *historical* dirs to retain is `keep - 1`. Without this adjustment the function
1569
- * kept `keep` historical dirs plus 1 live dir = `keep + 1` total on disk.
1570
- */
1571
- async function pruneLogDirs(logsDir: string, keep: number, excludeRunId: string): Promise<void> {
1572
- let entries: string[];
1573
- try {
1574
- entries = await readdir(logsDir);
1575
- } catch {
1576
- return; // logsDir doesn't exist yet — nothing to prune
1577
- }
1578
-
1579
- // Only consider directories that look like run IDs (run_<...>), excluding the live run.
1580
- const runDirs = entries.filter((e) => e.startsWith('run_') && e !== excludeRunId).sort();
1581
- // keep - 1 historical slots (1 slot is reserved for the live excludeRunId).
1582
- const historyKeep = Math.max(0, keep - 1);
1583
- const toDelete = runDirs.slice(0, Math.max(0, runDirs.length - historyKeep));
1584
-
1585
- await Promise.all(
1586
- toDelete.map((dir) =>
1587
- rm(resolve(logsDir, dir), { recursive: true, force: true }).catch(() => {
1588
- // Ignore deletion errors — stale dirs are better than a crash
1589
- }),
1590
- ),
1591
- );
1592
- }
1593
450
 
1594
- function isTerminal(status: TaskStatus): boolean {
1595
- return (
1596
- status === 'success' ||
1597
- status === 'failed' ||
1598
- status === 'timeout' ||
1599
- status === 'skipped' ||
1600
- status === 'blocked'
1601
- );
1602
- }
1603
-
1604
- /** Return a deep-copied, caller-safe snapshot of the states map. */
1605
- function freezeStates(states: Map<string, TaskState>): ReadonlyMap<string, TaskState> {
1606
- const copy = new Map<string, TaskState>();
1607
- for (const [id, s] of states) {
1608
- copy.set(id, {
1609
- config: { ...s.config },
1610
- trackConfig: { ...s.trackConfig },
1611
- status: s.status,
1612
- result: s.result ? { ...s.result } : null,
1613
- startedAt: s.startedAt,
1614
- finishedAt: s.finishedAt,
1615
- });
1616
- }
1617
- return copy;
1618
- }