@tagma/sdk 0.4.12 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +572 -566
  2. package/dist/adapters/websocket-approval.d.ts.map +1 -1
  3. package/dist/adapters/websocket-approval.js +3 -1
  4. package/dist/adapters/websocket-approval.js.map +1 -1
  5. package/dist/approval.d.ts.map +1 -1
  6. package/dist/approval.js.map +1 -1
  7. package/dist/completions/exit-code.d.ts.map +1 -1
  8. package/dist/completions/exit-code.js.map +1 -1
  9. package/dist/completions/file-exists.d.ts.map +1 -1
  10. package/dist/completions/file-exists.js.map +1 -1
  11. package/dist/completions/output-check.js +2 -7
  12. package/dist/completions/output-check.js.map +1 -1
  13. package/dist/config-ops.d.ts.map +1 -1
  14. package/dist/config-ops.js +24 -26
  15. package/dist/config-ops.js.map +1 -1
  16. package/dist/dag.d.ts.map +1 -1
  17. package/dist/dag.js +1 -1
  18. package/dist/dag.js.map +1 -1
  19. package/dist/drivers/claude-code.d.ts.map +1 -1
  20. package/dist/drivers/claude-code.js +10 -5
  21. package/dist/drivers/claude-code.js.map +1 -1
  22. package/dist/engine.d.ts.map +1 -1
  23. package/dist/engine.js +54 -27
  24. package/dist/engine.js.map +1 -1
  25. package/dist/hooks.d.ts.map +1 -1
  26. package/dist/hooks.js +1 -3
  27. package/dist/hooks.js.map +1 -1
  28. package/dist/logger.d.ts.map +1 -1
  29. package/dist/logger.js +4 -2
  30. package/dist/logger.js.map +1 -1
  31. package/dist/pipeline-runner.d.ts.map +1 -1
  32. package/dist/pipeline-runner.js +10 -4
  33. package/dist/pipeline-runner.js.map +1 -1
  34. package/dist/registry.d.ts +11 -1
  35. package/dist/registry.d.ts.map +1 -1
  36. package/dist/registry.js +28 -3
  37. package/dist/registry.js.map +1 -1
  38. package/dist/runner.d.ts.map +1 -1
  39. package/dist/runner.js +18 -13
  40. package/dist/runner.js.map +1 -1
  41. package/dist/schema.d.ts.map +1 -1
  42. package/dist/schema.js +14 -14
  43. package/dist/schema.js.map +1 -1
  44. package/dist/schema.test.js +5 -1
  45. package/dist/schema.test.js.map +1 -1
  46. package/dist/sdk.d.ts +2 -2
  47. package/dist/sdk.d.ts.map +1 -1
  48. package/dist/sdk.js +1 -1
  49. package/dist/sdk.js.map +1 -1
  50. package/dist/triggers/file.d.ts.map +1 -1
  51. package/dist/triggers/file.js +11 -4
  52. package/dist/triggers/file.js.map +1 -1
  53. package/dist/triggers/manual.d.ts.map +1 -1
  54. package/dist/triggers/manual.js +2 -1
  55. package/dist/triggers/manual.js.map +1 -1
  56. package/dist/utils.d.ts.map +1 -1
  57. package/dist/utils.js +13 -6
  58. package/dist/utils.js.map +1 -1
  59. package/dist/validate-raw.d.ts.map +1 -1
  60. package/dist/validate-raw.js +40 -11
  61. package/dist/validate-raw.js.map +1 -1
  62. package/package.json +2 -2
  63. package/scripts/preinstall.js +1 -1
  64. package/src/adapters/stdin-approval.ts +106 -106
  65. package/src/adapters/websocket-approval.ts +224 -220
  66. package/src/approval.ts +131 -125
  67. package/src/bootstrap.ts +37 -37
  68. package/src/completions/exit-code.ts +34 -30
  69. package/src/completions/file-exists.ts +66 -60
  70. package/src/completions/output-check.ts +86 -86
  71. package/src/config-ops.ts +307 -322
  72. package/src/dag.ts +234 -228
  73. package/src/drivers/claude-code.ts +250 -240
  74. package/src/engine.ts +1098 -935
  75. package/src/hooks.ts +187 -179
  76. package/src/logger.ts +182 -178
  77. package/src/middlewares/static-context.ts +45 -45
  78. package/src/pipeline-runner.ts +156 -150
  79. package/src/registry.ts +51 -23
  80. package/src/runner.ts +395 -397
  81. package/src/schema.test.ts +5 -1
  82. package/src/schema.ts +338 -328
  83. package/src/sdk.ts +91 -81
  84. package/src/triggers/file.ts +33 -14
  85. package/src/triggers/manual.ts +86 -81
  86. package/src/types.ts +18 -18
  87. package/src/utils.ts +202 -191
  88. package/src/validate-raw.ts +442 -409
package/src/engine.ts CHANGED
@@ -1,935 +1,1098 @@
1
- import { resolve } from 'path';
2
- import { readdir, rm } from 'fs/promises';
3
- import type {
4
- PipelineConfig, TaskConfig, TaskState, TaskStatus,
5
- TaskResult, DriverPlugin, TriggerPlugin, CompletionPlugin,
6
- MiddlewarePlugin, MiddlewareContext, DriverContext,
7
- OnFailure,
8
- } from './types';
9
- import { buildDag, type Dag } from './dag';
10
- import { getHandler, hasHandler, loadPlugins } from './registry';
11
- import { runSpawn, runCommand } from './runner';
12
- import { parseDuration, nowISO, generateRunId } from './utils';
13
- import {
14
- executeHook,
15
- buildPipelineStartContext, buildTaskContext,
16
- buildPipelineCompleteContext, buildPipelineErrorContext,
17
- type PipelineInfo, type TrackInfo, type TaskInfo,
18
- } from './hooks';
19
- import { Logger, tailLines, clip, type LogLevel } from './logger';
20
- import { InMemoryApprovalGateway, type ApprovalGateway } from './approval';
21
-
22
- // ═══ A7: Typed trigger errors ═══
23
- // Replace string-matching on error messages with structured error types so
24
- // coincidental substrings don't cause misclassification.
25
-
26
- export class TriggerBlockedError extends Error {
27
- readonly code = 'TRIGGER_BLOCKED' as const;
28
- constructor(message: string) {
29
- super(message);
30
- this.name = 'TriggerBlockedError';
31
- }
32
- }
33
-
34
- export class TriggerTimeoutError extends Error {
35
- readonly code = 'TRIGGER_TIMEOUT' as const;
36
- constructor(message: string) {
37
- super(message);
38
- this.name = 'TriggerTimeoutError';
39
- }
40
- }
41
-
42
- // ═══ Preflight Validation ═══
43
-
44
- function preflight(config: PipelineConfig, dag: Dag): void {
45
- const errors: string[] = [];
46
-
47
- for (const [, node] of dag.nodes) {
48
- const task = node.task;
49
- const track = node.track;
50
- const driverName = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
51
-
52
- // Pure command tasks don't use a driver — skip driver registration check.
53
- const isCommandOnly = task.command && !task.prompt;
54
-
55
- if (!isCommandOnly && !hasHandler('drivers', driverName)) {
56
- errors.push(`Task "${node.taskId}": driver "${driverName}" not registered`);
57
- }
58
-
59
- if (task.trigger && !hasHandler('triggers', task.trigger.type)) {
60
- errors.push(`Task "${node.taskId}": trigger type "${task.trigger.type}" not registered`);
61
- }
62
-
63
- if (task.completion && !hasHandler('completions', task.completion.type)) {
64
- errors.push(`Task "${node.taskId}": completion type "${task.completion.type}" not registered`);
65
- }
66
-
67
- const mws = task.middlewares ?? track.middlewares ?? [];
68
- for (const mw of mws) {
69
- if (!hasHandler('middlewares', mw.type)) {
70
- errors.push(`Task "${node.taskId}": middleware type "${mw.type}" not registered`);
71
- }
72
- }
73
-
74
- if (task.continue_from && hasHandler('drivers', driverName)) {
75
- const driver = getHandler<DriverPlugin>('drivers', driverName);
76
- if (!driver.capabilities.sessionResume) {
77
- const upstreamId = resolveRefInDag(dag, task.continue_from, track.id);
78
- if (upstreamId) {
79
- const upstream = dag.nodes.get(upstreamId);
80
- if (upstream) {
81
- // A handoff is possible via session resume (already ruled out above),
82
- // OR in-memory text injection through normalizedMap
83
- // (when the upstream driver implements parseResult and returns normalizedOutput).
84
- const upstreamDriverName = upstream.task.driver ?? upstream.track.driver
85
- ?? config.driver ?? 'claude-code';
86
- const upstreamDriver = hasHandler('drivers', upstreamDriverName)
87
- ? getHandler<DriverPlugin>('drivers', upstreamDriverName)
88
- : null;
89
- const canNormalize = typeof upstreamDriver?.parseResult === 'function';
90
-
91
- if (!canNormalize) {
92
- errors.push(
93
- `Task "${node.taskId}" uses continue_from: "${task.continue_from}", ` +
94
- `but upstream task "${upstreamId}" its driver ` +
95
- `does not implement parseResult for text-injection handoff. ` +
96
- `Use a driver with parseResult, or remove continue_from.`
97
- );
98
- }
99
- }
100
- }
101
- }
102
- }
103
- }
104
-
105
- if (errors.length > 0) {
106
- throw new Error(`Preflight validation failed:\n - ${errors.join('\n - ')}`);
107
- }
108
- }
109
-
110
- function resolveRefInDag(dag: Dag, ref: string, fromTrackId: string): string | null {
111
- // Already fully qualified
112
- if (dag.nodes.has(ref)) return ref;
113
- // Same-track match (preferred)
114
- const sameTrack = `${fromTrackId}.${ref}`;
115
- if (dag.nodes.has(sameTrack)) return sameTrack;
116
- // Cross-track bare name lookup — must be unambiguous (aligned with buildDag's resolveRef)
117
- let match: string | null = null;
118
- for (const [id] of dag.nodes) {
119
- if (id.endsWith(`.${ref}`)) {
120
- if (match !== null) {
121
- // Ambiguous: multiple tasks share the bare name across tracks
122
- return null;
123
- }
124
- match = id;
125
- }
126
- }
127
- return match;
128
- }
129
-
130
- // ═══ Engine ═══
131
-
132
- export interface EngineResult {
133
- readonly success: boolean;
134
- readonly runId: string;
135
- readonly logPath: string;
136
- readonly summary: {
137
- total: number; success: number; failed: number;
138
- skipped: number; timeout: number; blocked: number;
139
- };
140
- readonly states: ReadonlyMap<string, TaskState>;
141
- }
142
-
143
- // ═══ Pipeline Events ═══
144
-
145
- export type PipelineEvent =
146
- | { readonly type: 'task_status_change'; readonly taskId: string; readonly status: TaskStatus; readonly prevStatus: TaskStatus; readonly runId: string; readonly state: TaskState }
147
- | { readonly type: 'pipeline_start'; readonly runId: string; readonly states: ReadonlyMap<string, TaskState> }
148
- | { readonly type: 'pipeline_end'; readonly runId: string; readonly success: boolean }
149
- /**
150
- * Fine-grained log line emitted alongside every write to pipeline.log.
151
- * Consumers use this to stream the full run process into UIs without
152
- * tailing the log file. `taskId` is non-null for task-scoped lines and
153
- * null for pipeline-wide messages (e.g. configuration dumps, DAG
154
- * topology, pipeline start/end).
155
- */
156
- | { readonly type: 'task_log'; readonly runId: string; readonly taskId: string | null; readonly level: LogLevel; readonly timestamp: string; readonly text: string };
157
-
158
- export interface RunPipelineOptions {
159
- readonly approvalGateway?: ApprovalGateway;
160
- /**
161
- * Maximum number of per-run log directories to retain under `<workDir>/.tagma/logs/`.
162
- * Oldest directories are deleted after each run. Defaults to 20. Set to 0 to disable cleanup.
163
- */
164
- readonly maxLogRuns?: number;
165
- /**
166
- * Caller-supplied run ID. When provided the engine uses this instead of
167
- * generating its own via `generateRunId()`, keeping the editor and SDK
168
- * log directories aligned on the same ID.
169
- */
170
- readonly runId?: string;
171
- /**
172
- * External AbortSignal — aborting it cancels the pipeline immediately.
173
- * Equivalent to the pipeline timeout firing, but caller-controlled.
174
- */
175
- readonly signal?: AbortSignal;
176
- /**
177
- * Called on every pipeline/task status transition.
178
- * Use for real-time UI updates (e.g. updating a visual workflow graph).
179
- */
180
- readonly onEvent?: (event: PipelineEvent) => void;
181
- /**
182
- * Skip the engine's built-in `loadPlugins(config.plugins)` call.
183
- * Use this when the host has already pre-loaded plugins from a custom
184
- * resolution path (e.g. a user workspace's node_modules) so the engine
185
- * doesn't re-resolve them via Node's default cwd-based import.
186
- */
187
- readonly skipPluginLoading?: boolean;
188
- }
189
-
190
- // Poll interval when no tasks are in-flight but non-terminal tasks remain
191
- // (e.g. tasks waiting on a file or manual trigger).
192
- const POLL_INTERVAL_MS = 50;
193
-
194
- // R15: cap on each normalized-output entry stored in normalizedMap so a
195
- // runaway parseResult can't accumulate hundreds of MB across tasks. 1 MB
196
- // is generous for any text-context handoff between AI tasks.
197
- const MAX_NORMALIZED_BYTES = 1_000_000;
198
-
199
- export async function runPipeline(
200
- config: PipelineConfig,
201
- workDir: string,
202
- options: RunPipelineOptions = {},
203
- ): Promise<EngineResult> {
204
- const approvalGateway = options.approvalGateway ?? new InMemoryApprovalGateway();
205
- const maxLogRuns = options.maxLogRuns ?? 20;
206
-
207
- // Load any plugins declared in the pipeline config before preflight so that
208
- // drivers, completions, and middlewares referenced in YAML are registered.
209
- // Hosts that pre-load plugins from a custom path (e.g. the editor loading
210
- // from the user's workspace node_modules) pass skipPluginLoading: true so
211
- // we don't re-resolve via Node's cwd-based default import.
212
- if (!options.skipPluginLoading && config.plugins?.length) {
213
- await loadPlugins(config.plugins);
214
- }
215
-
216
- const dag = buildDag(config);
217
- const runId = options.runId ?? generateRunId();
218
- preflight(config, dag);
219
-
220
- const startedAt = nowISO();
221
- const pipelineInfo: PipelineInfo = { name: config.name, run_id: runId, started_at: startedAt };
222
- // Forward every structured log line to subscribers as task_log events.
223
- // Reading options.onEvent inside the callback (vs. capturing it once) keeps
224
- // the SDK behavior correct if callers pass a fresh onEvent on each run.
225
- const log = new Logger(workDir, runId, (record) => {
226
- options.onEvent?.({
227
- type: 'task_log',
228
- runId,
229
- taskId: record.taskId,
230
- level: record.level,
231
- timestamp: record.timestamp,
232
- text: record.text,
233
- });
234
- });
235
-
236
- try {
237
-
238
- log.info('[pipeline]', `start "${config.name}" run_id=${runId}`);
239
-
240
- // File-only: dump the resolved pipeline shape + DAG topology for post-mortem.
241
- log.section('Pipeline configuration');
242
- log.quiet(`name: ${config.name}`);
243
- log.quiet(`driver: ${config.driver ?? '(default: claude-code)'}`);
244
- log.quiet(`timeout: ${config.timeout ?? '(none)'}`);
245
- log.quiet(`tracks: ${config.tracks.length}`);
246
- log.quiet(`tasks (total): ${dag.nodes.size}`);
247
- log.quiet(`plugins: ${(config.plugins ?? []).join(', ') || '(none)'}`);
248
- log.quiet(`hooks: ${config.hooks ? Object.keys(config.hooks).join(', ') || '(none)' : '(none)'}`);
249
-
250
- log.section('DAG topology');
251
- for (const [id, node] of dag.nodes) {
252
- const deps = node.dependsOn.length ? node.dependsOn.join(', ') : '(root)';
253
- const kind = node.task.prompt ? 'ai' : 'cmd';
254
- log.quiet(` • ${id} [${kind}] track=${node.track.id} deps=[${deps}]`);
255
- }
256
- log.quiet('');
257
-
258
- // Initialize states (before hook, so we can return them even if blocked)
259
- const states = new Map<string, TaskState>();
260
- for (const [id, node] of dag.nodes) {
261
- states.set(id, {
262
- config: node.task,
263
- trackConfig: node.track,
264
- status: 'idle',
265
- result: null,
266
- startedAt: null,
267
- finishedAt: null,
268
- });
269
- }
270
-
271
- // Pipeline start hook (gate)
272
- const startHook = await executeHook(
273
- config.hooks, 'pipeline_start', buildPipelineStartContext(pipelineInfo), workDir,
274
- );
275
- if (!startHook.allowed) {
276
- console.error(`Pipeline blocked by pipeline_start hook (exit code ${startHook.exitCode})`);
277
- await executeHook(config.hooks, 'pipeline_error',
278
- buildPipelineErrorContext(pipelineInfo, 'pipeline_blocked', 'pipeline_blocked'), workDir);
279
- // All tasks stay idle — pipeline never started
280
- return {
281
- success: false,
282
- runId,
283
- logPath: log.path,
284
- summary: { total: dag.nodes.size, success: 0, failed: 0, skipped: 0, timeout: 0, blocked: 0 },
285
- states: freezeStates(states),
286
- };
287
- }
288
-
289
- // Pipeline approved transition all tasks to waiting
290
- for (const [, state] of states) {
291
- state.status = 'waiting';
292
- }
293
- // Include a full states snapshot so listeners can initialize their mirrors without missing events
294
- const statesSnapshot: ReadonlyMap<string, TaskState> = new Map(
295
- [...states.entries()].map(([id, s]) => [id, { ...s }])
296
- );
297
- options.onEvent?.({ type: 'pipeline_start', runId, states: statesSnapshot });
298
-
299
- const sessionMap = new Map<string, string>();
300
- const normalizedMap = new Map<string, string>();
301
-
302
- // Pipeline timeout
303
- const pipelineTimeoutMs = config.timeout ? parseDuration(config.timeout) : 0;
304
- let pipelineAborted = false;
305
- const abortController = new AbortController();
306
- let pipelineTimer: ReturnType<typeof setTimeout> | null = null;
307
-
308
- if (pipelineTimeoutMs > 0) {
309
- pipelineTimer = setTimeout(() => {
310
- pipelineAborted = true;
311
- abortController.abort();
312
- }, pipelineTimeoutMs);
313
- }
314
-
315
- // When the pipeline is aborted (timeout, external shutdown), drain all
316
- // pending approvals so waiting triggers unblock immediately.
317
- abortController.signal.addEventListener('abort', () => {
318
- approvalGateway.abortAll('pipeline aborted');
319
- });
320
-
321
- // Wire external cancel signal into the internal abort controller.
322
- const externalAbortHandler = () => {
323
- pipelineAborted = true;
324
- abortController.abort();
325
- };
326
- if (options.signal) {
327
- if (options.signal.aborted) {
328
- externalAbortHandler();
329
- } else {
330
- options.signal.addEventListener('abort', externalAbortHandler, { once: true });
331
- }
332
- }
333
-
334
- // ── Helpers ──
335
-
336
- function emit(event: PipelineEvent): void {
337
- options.onEvent?.(event);
338
- }
339
-
340
- function setTaskStatus(taskId: string, newStatus: TaskStatus): void {
341
- const state = states.get(taskId)!;
342
- // Terminal lock: once a task reaches a terminal state it must not be
343
- // re-transitioned. This prevents stop_all from marking running tasks as
344
- // skipped and then having their in-flight processTask promise overwrite
345
- // that with success/failed, producing an invalid double transition.
346
- if (isTerminal(state.status)) return;
347
- const prevStatus = state.status;
348
- state.status = newStatus;
349
- // Snapshot state at emit time — result and finishedAt must be set before calling this for terminal statuses
350
- const snapshot: TaskState = {
351
- config: state.config,
352
- trackConfig: state.trackConfig,
353
- status: state.status,
354
- result: state.result,
355
- startedAt: state.startedAt,
356
- finishedAt: state.finishedAt,
357
- };
358
- emit({ type: 'task_status_change', taskId, status: newStatus, prevStatus, runId, state: snapshot });
359
- }
360
-
361
- function getOnFailure(taskId: string): OnFailure {
362
- return dag.nodes.get(taskId)?.track.on_failure ?? 'skip_downstream';
363
- }
364
-
365
- function isDependencySatisfied(depId: string): 'satisfied' | 'unsatisfied' | 'skip' {
366
- const depState = states.get(depId);
367
- if (!depState) return 'skip';
368
- switch (depState.status) {
369
- case 'success': return 'satisfied';
370
- case 'skipped': return 'skip';
371
- case 'failed': case 'timeout': case 'blocked':
372
- return getOnFailure(depId) === 'ignore' ? 'satisfied' : 'skip';
373
- default: return 'unsatisfied';
374
- }
375
- }
376
-
377
- /**
378
- * H3: "stop_all" historically only stopped tasks within the same track,
379
- * which contradicted both its name and user expectations. It now stops
380
- * the **entire pipeline**:
381
- * - In-flight tasks are signalled via the shared abort controller so
382
- * drivers / runner.ts can cancel cooperatively (returning
383
- * `failureKind: 'timeout'`).
384
- * - Still-waiting tasks across every track are immediately marked
385
- * skipped so the run completes promptly.
386
- * The terminal lock in setTaskStatus prevents any later re-transition
387
- * should a completed running task try to overwrite the skipped state.
388
- */
389
- function applyStopAll(_failedTrackId: string): void {
390
- pipelineAborted = true;
391
- abortController.abort();
392
- for (const [id, state] of states) {
393
- if (state.status === 'waiting') {
394
- state.finishedAt = nowISO();
395
- setTaskStatus(id, 'skipped');
396
- }
397
- }
398
- }
399
-
400
- function buildTaskInfoObj(taskId: string): TaskInfo {
401
- const state = states.get(taskId)!;
402
- return {
403
- id: taskId,
404
- name: state.config.name,
405
- type: state.config.prompt ? 'ai' : 'command',
406
- status: state.status,
407
- exit_code: state.result?.exitCode ?? null,
408
- duration_ms: state.result?.durationMs ?? null,
409
- stderr_path: state.result?.stderrPath ?? null,
410
- session_id: state.result?.sessionId ?? null,
411
- started_at: state.startedAt,
412
- finished_at: state.finishedAt,
413
- };
414
- }
415
-
416
- function trackInfoOf(taskId: string): TrackInfo {
417
- const node = dag.nodes.get(taskId)!;
418
- return { id: node.track.id, name: node.track.name };
419
- }
420
-
421
- async function fireHook(taskId: string, event: 'task_success' | 'task_failure'): Promise<void> {
422
- await executeHook(config.hooks, event,
423
- buildTaskContext(event, pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)), workDir, abortController.signal);
424
- }
425
-
426
- // ── Process a single task ──
427
-
428
- async function processTask(taskId: string): Promise<void> {
429
- const state = states.get(taskId)!;
430
- const node = dag.nodes.get(taskId)!;
431
- const task = node.task;
432
- const track = node.track;
433
-
434
- log.section(`Task ${taskId}`, taskId);
435
- log.debug(`[task:${taskId}]`,
436
- `type=${task.prompt ? 'ai' : 'cmd'} track=${track.id} deps=[${node.dependsOn.join(', ') || '(root)'}]`);
437
-
438
- // 1. Check dependencies
439
- for (const depId of node.dependsOn) {
440
- const result = isDependencySatisfied(depId);
441
- if (result === 'skip') {
442
- const depStatus = states.get(depId)?.status ?? 'unknown';
443
- log.debug(`[task:${taskId}]`, `skipped (upstream "${depId}" status=${depStatus})`);
444
- state.finishedAt = nowISO();
445
- setTaskStatus(taskId, 'skipped');
446
- return;
447
- }
448
- if (result === 'unsatisfied') return; // still waiting
449
- }
450
-
451
- // 2. Check trigger
452
- if (task.trigger) {
453
- log.debug(`[task:${taskId}]`, `trigger wait: type=${task.trigger.type} ${JSON.stringify(task.trigger)}`);
454
- try {
455
- const triggerPlugin = getHandler<TriggerPlugin>('triggers', task.trigger.type);
456
- // R6: race the plugin's watch() against the pipeline's abort signal.
457
- // Third-party triggers may forget to wire up ctx.signal without
458
- // this race, an aborted pipeline would hang forever waiting for the
459
- // plugin's watch promise to resolve. The race resolves on whichever
460
- // path settles first, and the cleanup paths in finally never run on
461
- // the orphaned plugin promise (it's allowed to leak a watcher; the
462
- // pipeline is being torn down anyway).
463
- await new Promise<unknown>((resolve, reject) => {
464
- let settled = false;
465
- const onAbort = () => {
466
- if (settled) return;
467
- settled = true;
468
- abortController.signal.removeEventListener('abort', onAbort);
469
- reject(new Error('Pipeline aborted'));
470
- };
471
- if (abortController.signal.aborted) { onAbort(); return; }
472
- abortController.signal.addEventListener('abort', onAbort, { once: true });
473
- triggerPlugin.watch(task.trigger as Record<string, unknown>, {
474
- taskId: node.taskId,
475
- trackId: track.id,
476
- workDir: task.cwd ?? workDir,
477
- signal: abortController.signal,
478
- approvalGateway,
479
- }).then(
480
- (v) => {
481
- if (settled) return;
482
- settled = true;
483
- abortController.signal.removeEventListener('abort', onAbort);
484
- resolve(v);
485
- },
486
- (e) => {
487
- if (settled) return;
488
- settled = true;
489
- abortController.signal.removeEventListener('abort', onAbort);
490
- reject(e);
491
- },
492
- );
493
- });
494
- log.debug(`[task:${taskId}]`, `trigger fired`);
495
- } catch (err: unknown) {
496
- // If pipeline was aborted while we were still waiting for the trigger,
497
- // this task never entered running state skipped, not timeout.
498
- state.finishedAt = nowISO();
499
- if (pipelineAborted) {
500
- setTaskStatus(taskId, 'skipped');
501
- } else if (err instanceof TriggerBlockedError) {
502
- setTaskStatus(taskId, 'blocked'); // user/policy rejection
503
- } else if (err instanceof TriggerTimeoutError) {
504
- setTaskStatus(taskId, 'timeout'); // genuine trigger wait timeout
505
- } else {
506
- // A7 fallback: also check message strings for backward-compat with
507
- // third-party trigger plugins that don't throw typed errors yet.
508
- const msg = err instanceof Error ? err.message : String(err);
509
- if (msg.includes('rejected') || msg.includes('denied')) {
510
- setTaskStatus(taskId, 'blocked');
511
- } else if (msg.includes('timeout')) {
512
- setTaskStatus(taskId, 'timeout');
513
- } else {
514
- setTaskStatus(taskId, 'failed'); // plugin error, watcher crash, etc.
515
- }
516
- }
517
- try {
518
- await fireHook(taskId, 'task_failure');
519
- } catch (hookErr) {
520
- log.error(`[task:${taskId}]`, `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`);
521
- }
522
- return;
523
- }
524
- }
525
-
526
- // 3. task_start hook (gate)
527
- const hookResult = await executeHook(config.hooks, 'task_start',
528
- buildTaskContext('task_start', pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)), workDir, abortController.signal);
529
- if (hookResult.exitCode !== 0 || config.hooks?.task_start) {
530
- log.debug(`[task:${taskId}]`,
531
- `task_start hook exit=${hookResult.exitCode} allowed=${hookResult.allowed}`);
532
- }
533
- if (!hookResult.allowed) {
534
- state.finishedAt = nowISO();
535
- setTaskStatus(taskId, 'blocked');
536
- try {
537
- await fireHook(taskId, 'task_failure');
538
- } catch (hookErr) {
539
- log.error(`[task:${taskId}]`, `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`);
540
- }
541
- return;
542
- }
543
-
544
- // 4. Mark running — set startedAt before emitting so subscribers see a
545
- // complete snapshot (startedAt non-null) in the task_status_change event.
546
- state.startedAt = nowISO();
547
- setTaskStatus(taskId, 'running');
548
- log.info(`[task:${taskId}]`, task.command ? `running: ${task.command}` : `running (driver task)`);
549
-
550
- // File-only: resolved config for this task
551
- const resolvedDriver = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
552
- const resolvedModel = task.model ?? track.model ?? config.model ?? '(default)';
553
- const resolvedPerms = task.permissions ?? track.permissions ?? '(default)';
554
- const resolvedCwd = task.cwd ?? track.cwd ?? workDir;
555
- log.debug(`[task:${taskId}]`,
556
- `resolved: driver=${resolvedDriver} model=${resolvedModel} cwd=${resolvedCwd}`);
557
- log.debug(`[task:${taskId}]`, `permissions: ${JSON.stringify(resolvedPerms)}`);
558
- if (task.continue_from) {
559
- log.debug(`[task:${taskId}]`, `continue_from: "${task.continue_from}"`);
560
- }
561
- if (task.timeout) {
562
- log.debug(`[task:${taskId}]`, `timeout: ${task.timeout}`);
563
- }
564
-
565
- try {
566
- let result: TaskResult;
567
- const timeoutMs = task.timeout ? parseDuration(task.timeout) : undefined;
568
-
569
- const runOpts = { timeoutMs, signal: abortController.signal };
570
-
571
- if (task.command) {
572
- log.debug(`[task:${taskId}]`, `command: ${task.command}`);
573
- result = await runCommand(task.command, task.cwd ?? workDir, runOpts);
574
- } else {
575
- // AI task: apply middleware chain
576
- const driverName = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
577
- const driver = getHandler<DriverPlugin>('drivers', driverName);
578
-
579
- let prompt = task.prompt!;
580
- const originalLen = prompt.length;
581
- const mws = task.middlewares !== undefined ? task.middlewares : track.middlewares;
582
- if (mws && mws.length > 0) {
583
- log.debug(`[task:${taskId}]`,
584
- `middleware chain: ${mws.map(m => m.type).join(' ')}`);
585
- const mwCtx: MiddlewareContext = {
586
- task, track, workDir: task.cwd ?? workDir,
587
- };
588
- for (const mwConfig of mws) {
589
- const before = prompt.length;
590
- const mwPlugin = getHandler<MiddlewarePlugin>('middlewares', mwConfig.type);
591
- const next = await mwPlugin.enhance(prompt, mwConfig as Record<string, unknown>, mwCtx);
592
- // R3: a middleware that returns undefined / null / a non-string
593
- // would silently corrupt the prompt sent to the driver. Fail loud
594
- // here so the user sees "middleware X.enhance returned ..." in the
595
- // task log instead of "[object Object]" arriving at the model.
596
- if (typeof next !== 'string') {
597
- throw new Error(
598
- `middleware "${mwConfig.type}".enhance() returned ${next === null ? 'null' : typeof next}, expected string`
599
- );
600
- }
601
- prompt = next;
602
- log.debug(`[task:${taskId}]`,
603
- ` ${mwConfig.type}: ${before} → ${prompt.length} chars`);
604
- }
605
- }
606
- log.debug(`[task:${taskId}]`,
607
- `prompt: ${originalLen} chars (final: ${prompt.length} chars)`);
608
- log.quiet(`--- prompt (final) ---\n${clip(prompt)}\n--- end prompt ---`, taskId);
609
-
610
- // H1: hand the driver a continue_from that has already been
611
- // qualified by dag.ts. Without this, drivers like codex/opencode/
612
- // claude-code look up maps directly with
613
- // the user's raw (possibly bare) string, which races whenever two
614
- // tracks share a task name. dag.ts has the only authoritative
615
- // resolver, so we use its precomputed answer here.
616
- const enrichedTask: TaskConfig = {
617
- ...task,
618
- prompt,
619
- continue_from: node.resolvedContinueFrom ?? task.continue_from,
620
- };
621
- const driverCtx: DriverContext = {
622
- sessionMap, normalizedMap, workDir: task.cwd ?? workDir,
623
- };
624
- const spec = await driver.buildCommand(enrichedTask, track, driverCtx);
625
- log.debug(`[task:${taskId}]`, `driver=${driverName}`);
626
- log.debug(`[task:${taskId}]`,
627
- `spawn args: ${JSON.stringify(spec.args)}`);
628
- if (spec.cwd) log.debug(`[task:${taskId}]`, `spawn cwd: ${spec.cwd}`);
629
- if (spec.env) log.debug(`[task:${taskId}]`,
630
- `spawn env overrides: ${Object.keys(spec.env).join(', ')}`);
631
- if (spec.stdin) log.debug(`[task:${taskId}]`,
632
- `spawn stdin: ${spec.stdin.length} chars`);
633
- result = await runSpawn(spec, driver, runOpts);
634
- }
635
-
636
- // 6. Determine terminal status (without emitting yet result must be complete first)
637
- // H2: branch on failureKind so spawn errors no longer masquerade as
638
- // timeouts. Old runners that don't set failureKind still work — we
639
- // fall back to the historical `exitCode === -1 → timeout` heuristic so
640
- // pre-existing third-party drivers don't regress.
641
- let terminalStatus: TaskStatus;
642
- const kind = result.failureKind;
643
- if (kind === 'timeout') {
644
- terminalStatus = 'timeout';
645
- } else if (kind === 'spawn_error') {
646
- terminalStatus = 'failed';
647
- } else if (kind === undefined && result.exitCode === -1) {
648
- // Legacy path: pre-H2 driver returned -1 with no kind. Treat as
649
- // timeout for backward compatibility (the previous behaviour).
650
- terminalStatus = 'timeout';
651
- } else if (result.exitCode !== 0) {
652
- terminalStatus = 'failed';
653
- } else if (task.completion) {
654
- const plugin = getHandler<CompletionPlugin>('completions', task.completion.type);
655
- const completionCtx = { workDir: task.cwd ?? workDir, signal: abortController.signal };
656
- const passed = await plugin.check(task.completion as Record<string, unknown>, result, completionCtx);
657
- // R4: strict boolean check. Truthy strings/numbers used to be coerced
658
- // to success — a check returning "ok" would let a failing task pass.
659
- if (typeof passed !== 'boolean') {
660
- throw new Error(
661
- `completion "${task.completion.type}".check() returned ${passed === null ? 'null' : typeof passed}, expected boolean`
662
- );
663
- }
664
- terminalStatus = passed ? 'success' : 'failed';
665
- } else {
666
- terminalStatus = 'success';
667
- }
668
-
669
- // Store normalized text separately (in-memory) for continue_from handoff.
670
- // R15: clip oversized values so a runaway parseResult can't accumulate
671
- // hundreds of MB across tasks.
672
- if (result.normalizedOutput !== null) {
673
- const clipped = result.normalizedOutput.length > MAX_NORMALIZED_BYTES
674
- ? result.normalizedOutput.slice(0, MAX_NORMALIZED_BYTES) +
675
- `\n[…clipped at ${MAX_NORMALIZED_BYTES} bytes]`
676
- : result.normalizedOutput;
677
- normalizedMap.set(taskId, clipped);
678
- }
679
-
680
- if (result.stderr) {
681
- const stderrPath = resolve(log.dir, `${taskId.replace(/\./g, '_')}.stderr`);
682
- await Bun.write(stderrPath, result.stderr);
683
- result = { ...result, stderrPath };
684
- }
685
-
686
- if (result.sessionId) {
687
- // H1: qualified-only key.
688
- sessionMap.set(taskId, result.sessionId);
689
- }
690
-
691
- // Set result and finishedAt before emitting terminal status so listeners see complete state
692
- state.result = result;
693
- state.finishedAt = nowISO();
694
- setTaskStatus(taskId, terminalStatus);
695
-
696
- // Log task outcome with relevant details
697
- const durSec = (result.durationMs / 1000).toFixed(1);
698
- if (terminalStatus === 'success') {
699
- log.info(`[task:${taskId}]`, `success (${durSec}s)`);
700
- } else {
701
- log.error(`[task:${taskId}]`,
702
- `${terminalStatus} exit=${result.exitCode} duration=${durSec}s`);
703
- if (result.stderr) {
704
- const tail = tailLines(result.stderr, 10);
705
- log.error(`[task:${taskId}]`, `stderr tail:\n${tail}`);
706
- }
707
- }
708
-
709
- // File-only: full stdout/stderr dump (clipped) + extracted metadata
710
- log.debug(`[task:${taskId}]`,
711
- `stdout: ${result.stdout.length} chars, stderr: ${result.stderr.length} chars`);
712
- if (result.sessionId) {
713
- log.debug(`[task:${taskId}]`, `sessionId: ${result.sessionId}`);
714
- }
715
- if (result.stderrPath) {
716
- log.debug(`[task:${taskId}]`, `wrote stderr: ${result.stderrPath}`);
717
- }
718
- if (result.stdout) {
719
- log.quiet(`--- stdout (${taskId}) ---\n${clip(result.stdout)}\n--- end stdout ---`, taskId);
720
- }
721
- if (result.stderr) {
722
- log.quiet(`--- stderr (${taskId}) ---\n${clip(result.stderr)}\n--- end stderr ---`, taskId);
723
- }
724
- if (task.completion) {
725
- log.debug(`[task:${taskId}]`,
726
- `completion check: type=${task.completion.type} result=${terminalStatus}`);
727
- }
728
-
729
- } catch (err: unknown) {
730
- const errMsg = err instanceof Error ? (err.stack ?? err.message) : String(err);
731
- log.error(`[task:${taskId}]`, `failed before execution: ${errMsg}`);
732
- state.result = {
733
- exitCode: -1,
734
- stdout: '',
735
- stderr: errMsg,
736
- stderrPath: null, durationMs: 0,
737
- sessionId: null, normalizedOutput: null,
738
- // H2: Engine-level pre-execution errors (driver throw, middleware
739
- // throw, getHandler 404) classify as spawn_error — the process never
740
- // ran, so calling them "timeout" was actively misleading.
741
- failureKind: 'spawn_error',
742
- };
743
- state.finishedAt = nowISO();
744
- setTaskStatus(taskId, 'failed');
745
- }
746
-
747
- // 7. Fire hooks
748
- const finalStatus: TaskStatus = state.status;
749
- try {
750
- await fireHook(taskId, finalStatus === 'success' ? 'task_success' : 'task_failure');
751
- } catch (hookErr) {
752
- log.error(`[task:${taskId}]`, `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`);
753
- }
754
-
755
- // 8. Handle stop_all for failure states
756
- if (finalStatus !== 'success' && getOnFailure(taskId) === 'stop_all') {
757
- applyStopAll(node.track.id);
758
- }
759
- }
760
-
761
- // ── Event loop ──
762
- // Each task is launched as soon as ALL its deps reach a terminal state.
763
- // We track in-flight tasks in `running` so a task completing mid-batch
764
- // immediately unblocks its dependents without waiting for sibling tasks.
765
- const running = new Map<string, Promise<void>>();
766
-
767
- try {
768
- while (!pipelineAborted) {
769
- // Launch every task whose deps are all terminal and that isn't already in-flight
770
- for (const [id, state] of states) {
771
- if (state.status !== 'waiting' || running.has(id)) continue;
772
- const node = dag.nodes.get(id)!;
773
- const allDepsTerminal = node.dependsOn.length === 0 ||
774
- node.dependsOn.every(d => isTerminal(states.get(d)!.status));
775
- if (!allDepsTerminal) continue;
776
- const p = processTask(id).finally(() => running.delete(id));
777
- running.set(id, p);
778
- }
779
-
780
- // All tasks terminal — done
781
- if ([...states.values()].every(s => isTerminal(s.status))) break;
782
-
783
- if (running.size === 0) {
784
- // Nothing in-flight but non-terminal tasks exist (e.g. trigger-wait states
785
- // that processTask hasn't been called for yet). Poll briefly.
786
- await new Promise(r => setTimeout(r, POLL_INTERVAL_MS));
787
- } else {
788
- // Wait for any one task to finish, then re-scan for new launchables.
789
- await Promise.race(running.values());
790
- }
791
- }
792
-
793
- if (pipelineAborted) {
794
- // Wait for in-flight tasks to honour the abort signal before marking states.
795
- if (running.size > 0) await Promise.allSettled(running.values());
796
- for (const [id, state] of states) {
797
- if (!isTerminal(state.status)) {
798
- // Running tasks get timeout (they were killed); waiting tasks get skipped
799
- state.finishedAt = nowISO();
800
- setTaskStatus(id, state.status === 'running' ? 'timeout' : 'skipped');
801
- }
802
- }
803
- }
804
- } finally {
805
- if (pipelineTimer) clearTimeout(pipelineTimer);
806
- // Clean up the external abort signal listener to prevent dead references
807
- // accumulating on long-lived shared AbortControllers.
808
- if (options.signal) {
809
- options.signal.removeEventListener('abort', externalAbortHandler);
810
- }
811
- // Safety net: drain any approvals still pending at shutdown (e.g. crash path).
812
- if (approvalGateway.pending().length > 0) {
813
- approvalGateway.abortAll('pipeline finished');
814
- }
815
- }
816
-
817
- // ── Summary ──
818
- const summary = { total: 0, success: 0, failed: 0, skipped: 0, timeout: 0, blocked: 0 };
819
- for (const [, state] of states) {
820
- summary.total++;
821
- switch (state.status) {
822
- case 'success': summary.success++; break;
823
- case 'failed': summary.failed++; break;
824
- case 'skipped': summary.skipped++; break;
825
- case 'timeout': summary.timeout++; break;
826
- case 'blocked': summary.blocked++; break;
827
- }
828
- }
829
-
830
- const finishedAt = nowISO();
831
- const durationMs = new Date(finishedAt).getTime() - new Date(startedAt).getTime();
832
-
833
- if (pipelineAborted) {
834
- await executeHook(config.hooks, 'pipeline_error',
835
- buildPipelineErrorContext(pipelineInfo, 'Pipeline timeout exceeded'), workDir);
836
- } else {
837
- await executeHook(config.hooks, 'pipeline_complete',
838
- buildPipelineCompleteContext(
839
- { ...pipelineInfo, finished_at: finishedAt, duration_ms: durationMs }, summary), workDir);
840
- }
841
-
842
- const allSuccess = !pipelineAborted
843
- && summary.failed === 0 && summary.timeout === 0 && summary.blocked === 0;
844
-
845
- log.section('Pipeline summary');
846
- log.quiet(`status: ${pipelineAborted ? 'aborted (timeout)' : 'completed'}`);
847
- log.quiet(`duration: ${(durationMs / 1000).toFixed(1)}s`);
848
- log.quiet(
849
- `counts: total=${summary.total} success=${summary.success} ` +
850
- `failed=${summary.failed} skipped=${summary.skipped} ` +
851
- `timeout=${summary.timeout} blocked=${summary.blocked}`);
852
- log.quiet('');
853
- log.quiet('per-task:');
854
- for (const [id, state] of states) {
855
- const dur = state.result?.durationMs != null
856
- ? `${(state.result.durationMs / 1000).toFixed(1)}s` : '-';
857
- const exit = state.result?.exitCode ?? '-';
858
- log.quiet(` ${state.status.padEnd(8)} ${id} (exit=${exit}, ${dur})`);
859
- }
860
-
861
- log.info('[pipeline]', `completed "${config.name}"`);
862
- log.info('[pipeline]', `Total: ${summary.total} | Success: ${summary.success} | Failed: ${summary.failed} | Skipped: ${summary.skipped} | Timeout: ${summary.timeout} | Blocked: ${summary.blocked}`);
863
- log.info('[pipeline]', `Duration: ${(durationMs / 1000).toFixed(1)}s`);
864
- log.info('[pipeline]', `Log: ${log.path}`);
865
-
866
- emit({ type: 'pipeline_end', runId, success: allSuccess });
867
- return { success: allSuccess, runId, logPath: log.path, summary, states: freezeStates(states) };
868
-
869
- } finally {
870
- // Close the persistent log file handle before pruning.
871
- log.close();
872
- // Prune old per-run log directories on every exit path (normal, blocked, or thrown).
873
- // Exclude the current runId so a concurrent run cannot delete its own live directory.
874
- if (maxLogRuns > 0) {
875
- await pruneLogDirs(resolve(workDir, '.tagma', 'logs'), maxLogRuns, runId);
876
- }
877
- }
878
- }
879
-
880
- /**
881
- * Delete the oldest subdirectories under `logsDir`, keeping only the most recent `keep`
882
- * total runs (including the currently-live run identified by `excludeRunId`).
883
- * Directories are sorted lexicographically; because runIds are prefixed with a base-36
884
- * timestamp, lexicographic order equals chronological order.
885
- *
886
- * `excludeRunId` is always skipped from deletion even if it would otherwise be pruned —
887
- * this prevents a concurrent run from removing a live log directory that is still in use.
888
- *
889
- * D10: The live run occupies one slot out of `keep`, so the maximum number of
890
- * *historical* dirs to retain is `keep - 1`. Without this adjustment the function
891
- * kept `keep` historical dirs plus 1 live dir = `keep + 1` total on disk.
892
- */
893
- async function pruneLogDirs(logsDir: string, keep: number, excludeRunId: string): Promise<void> {
894
- let entries: string[];
895
- try {
896
- entries = await readdir(logsDir);
897
- } catch {
898
- return; // logsDir doesn't exist yet — nothing to prune
899
- }
900
-
901
- // Only consider directories that look like run IDs (run_<...>), excluding the live run.
902
- const runDirs = entries.filter(e => e.startsWith('run_') && e !== excludeRunId).sort();
903
- // keep - 1 historical slots (1 slot is reserved for the live excludeRunId).
904
- const historyKeep = Math.max(0, keep - 1);
905
- const toDelete = runDirs.slice(0, Math.max(0, runDirs.length - historyKeep));
906
-
907
- await Promise.all(
908
- toDelete.map(dir =>
909
- rm(resolve(logsDir, dir), { recursive: true, force: true }).catch(() => {
910
- // Ignore deletion errors — stale dirs are better than a crash
911
- })
912
- )
913
- );
914
- }
915
-
916
- function isTerminal(status: TaskStatus): boolean {
917
- return status === 'success' || status === 'failed' || status === 'timeout'
918
- || status === 'skipped' || status === 'blocked';
919
- }
920
-
921
- /** Return a deep-copied, caller-safe snapshot of the states map. */
922
- function freezeStates(states: Map<string, TaskState>): ReadonlyMap<string, TaskState> {
923
- const copy = new Map<string, TaskState>();
924
- for (const [id, s] of states) {
925
- copy.set(id, {
926
- config: { ...s.config },
927
- trackConfig: { ...s.trackConfig },
928
- status: s.status,
929
- result: s.result ? { ...s.result } : null,
930
- startedAt: s.startedAt,
931
- finishedAt: s.finishedAt,
932
- });
933
- }
934
- return copy;
935
- }
1
+ import { resolve } from 'path';
2
+ import { readdir, rm } from 'fs/promises';
3
+ import type {
4
+ PipelineConfig,
5
+ TaskConfig,
6
+ TaskState,
7
+ TaskStatus,
8
+ TaskResult,
9
+ DriverPlugin,
10
+ TriggerPlugin,
11
+ CompletionPlugin,
12
+ MiddlewarePlugin,
13
+ MiddlewareContext,
14
+ DriverContext,
15
+ OnFailure,
16
+ } from './types';
17
+ import { buildDag, type Dag } from './dag';
18
+ import { getHandler, hasHandler, loadPlugins } from './registry';
19
+ import { runSpawn, runCommand } from './runner';
20
+ import { parseDuration, nowISO, generateRunId } from './utils';
21
+ import {
22
+ executeHook,
23
+ buildPipelineStartContext,
24
+ buildTaskContext,
25
+ buildPipelineCompleteContext,
26
+ buildPipelineErrorContext,
27
+ type PipelineInfo,
28
+ type TrackInfo,
29
+ type TaskInfo,
30
+ } from './hooks';
31
+ import { Logger, tailLines, clip, type LogLevel } from './logger';
32
+ import { InMemoryApprovalGateway, type ApprovalGateway } from './approval';
33
+
34
+ // ═══ A7: Typed trigger errors ═══
35
+ // Replace string-matching on error messages with structured error types so
36
+ // coincidental substrings don't cause misclassification.
37
+
38
+ export class TriggerBlockedError extends Error {
39
+ readonly code = 'TRIGGER_BLOCKED' as const;
40
+ constructor(message: string) {
41
+ super(message);
42
+ this.name = 'TriggerBlockedError';
43
+ }
44
+ }
45
+
46
+ export class TriggerTimeoutError extends Error {
47
+ readonly code = 'TRIGGER_TIMEOUT' as const;
48
+ constructor(message: string) {
49
+ super(message);
50
+ this.name = 'TriggerTimeoutError';
51
+ }
52
+ }
53
+
54
+ // ═══ Preflight Validation ═══
55
+
56
+ function preflight(config: PipelineConfig, dag: Dag): void {
57
+ const errors: string[] = [];
58
+
59
+ for (const [, node] of dag.nodes) {
60
+ const task = node.task;
61
+ const track = node.track;
62
+ const driverName = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
63
+
64
+ // Pure command tasks don't use a driver — skip driver registration check.
65
+ const isCommandOnly = task.command && !task.prompt;
66
+
67
+ if (!isCommandOnly && !hasHandler('drivers', driverName)) {
68
+ errors.push(`Task "${node.taskId}": driver "${driverName}" not registered`);
69
+ }
70
+
71
+ if (task.trigger && !hasHandler('triggers', task.trigger.type)) {
72
+ errors.push(`Task "${node.taskId}": trigger type "${task.trigger.type}" not registered`);
73
+ }
74
+
75
+ if (task.completion && !hasHandler('completions', task.completion.type)) {
76
+ errors.push(
77
+ `Task "${node.taskId}": completion type "${task.completion.type}" not registered`,
78
+ );
79
+ }
80
+
81
+ const mws = task.middlewares ?? track.middlewares ?? [];
82
+ for (const mw of mws) {
83
+ if (!hasHandler('middlewares', mw.type)) {
84
+ errors.push(`Task "${node.taskId}": middleware type "${mw.type}" not registered`);
85
+ }
86
+ }
87
+
88
+ if (task.continue_from && hasHandler('drivers', driverName)) {
89
+ const driver = getHandler<DriverPlugin>('drivers', driverName);
90
+ if (!driver.capabilities.sessionResume) {
91
+ const upstreamId = resolveRefInDag(dag, task.continue_from, track.id);
92
+ if (upstreamId) {
93
+ const upstream = dag.nodes.get(upstreamId);
94
+ if (upstream) {
95
+ // A handoff is possible via session resume (already ruled out above),
96
+ // OR in-memory text injection through normalizedMap
97
+ // (when the upstream driver implements parseResult and returns normalizedOutput).
98
+ const upstreamDriverName =
99
+ upstream.task.driver ?? upstream.track.driver ?? config.driver ?? 'claude-code';
100
+ const upstreamDriver = hasHandler('drivers', upstreamDriverName)
101
+ ? getHandler<DriverPlugin>('drivers', upstreamDriverName)
102
+ : null;
103
+ const canNormalize = typeof upstreamDriver?.parseResult === 'function';
104
+
105
+ if (!canNormalize) {
106
+ errors.push(
107
+ `Task "${node.taskId}" uses continue_from: "${task.continue_from}", ` +
108
+ `but upstream task "${upstreamId}" its driver ` +
109
+ `does not implement parseResult for text-injection handoff. ` +
110
+ `Use a driver with parseResult, or remove continue_from.`,
111
+ );
112
+ }
113
+ }
114
+ }
115
+ }
116
+ }
117
+ }
118
+
119
+ if (errors.length > 0) {
120
+ throw new Error(`Preflight validation failed:\n - ${errors.join('\n - ')}`);
121
+ }
122
+ }
123
+
124
+ function resolveRefInDag(dag: Dag, ref: string, fromTrackId: string): string | null {
125
+ // Already fully qualified
126
+ if (dag.nodes.has(ref)) return ref;
127
+ // Same-track match (preferred)
128
+ const sameTrack = `${fromTrackId}.${ref}`;
129
+ if (dag.nodes.has(sameTrack)) return sameTrack;
130
+ // Cross-track bare name lookup — must be unambiguous (aligned with buildDag's resolveRef)
131
+ let match: string | null = null;
132
+ for (const [id] of dag.nodes) {
133
+ if (id.endsWith(`.${ref}`)) {
134
+ if (match !== null) {
135
+ // Ambiguous: multiple tasks share the bare name across tracks
136
+ return null;
137
+ }
138
+ match = id;
139
+ }
140
+ }
141
+ return match;
142
+ }
143
+
144
+ // ═══ Engine ═══
145
+
146
+ export interface EngineResult {
147
+ readonly success: boolean;
148
+ readonly runId: string;
149
+ readonly logPath: string;
150
+ readonly summary: {
151
+ total: number;
152
+ success: number;
153
+ failed: number;
154
+ skipped: number;
155
+ timeout: number;
156
+ blocked: number;
157
+ };
158
+ readonly states: ReadonlyMap<string, TaskState>;
159
+ }
160
+
161
+ // ═══ Pipeline Events ═══
162
+
163
+ export type PipelineEvent =
164
+ | {
165
+ readonly type: 'task_status_change';
166
+ readonly taskId: string;
167
+ readonly status: TaskStatus;
168
+ readonly prevStatus: TaskStatus;
169
+ readonly runId: string;
170
+ readonly state: TaskState;
171
+ }
172
+ | {
173
+ readonly type: 'pipeline_start';
174
+ readonly runId: string;
175
+ readonly states: ReadonlyMap<string, TaskState>;
176
+ }
177
+ | { readonly type: 'pipeline_end'; readonly runId: string; readonly success: boolean }
178
+ /**
179
+ * Fine-grained log line emitted alongside every write to pipeline.log.
180
+ * Consumers use this to stream the full run process into UIs without
181
+ * tailing the log file. `taskId` is non-null for task-scoped lines and
182
+ * null for pipeline-wide messages (e.g. configuration dumps, DAG
183
+ * topology, pipeline start/end).
184
+ */
185
+ | {
186
+ readonly type: 'task_log';
187
+ readonly runId: string;
188
+ readonly taskId: string | null;
189
+ readonly level: LogLevel;
190
+ readonly timestamp: string;
191
+ readonly text: string;
192
+ };
193
+
194
+ export interface RunPipelineOptions {
195
+ readonly approvalGateway?: ApprovalGateway;
196
+ /**
197
+ * Maximum number of per-run log directories to retain under `<workDir>/.tagma/logs/`.
198
+ * Oldest directories are deleted after each run. Defaults to 20. Set to 0 to disable cleanup.
199
+ */
200
+ readonly maxLogRuns?: number;
201
+ /**
202
+ * Caller-supplied run ID. When provided the engine uses this instead of
203
+ * generating its own via `generateRunId()`, keeping the editor and SDK
204
+ * log directories aligned on the same ID.
205
+ */
206
+ readonly runId?: string;
207
+ /**
208
+ * External AbortSignal aborting it cancels the pipeline immediately.
209
+ * Equivalent to the pipeline timeout firing, but caller-controlled.
210
+ */
211
+ readonly signal?: AbortSignal;
212
+ /**
213
+ * Called on every pipeline/task status transition.
214
+ * Use for real-time UI updates (e.g. updating a visual workflow graph).
215
+ */
216
+ readonly onEvent?: (event: PipelineEvent) => void;
217
+ /**
218
+ * Skip the engine's built-in `loadPlugins(config.plugins)` call.
219
+ * Use this when the host has already pre-loaded plugins from a custom
220
+ * resolution path (e.g. a user workspace's node_modules) so the engine
221
+ * doesn't re-resolve them via Node's default cwd-based import.
222
+ */
223
+ readonly skipPluginLoading?: boolean;
224
+ }
225
+
226
+ // Poll interval when no tasks are in-flight but non-terminal tasks remain
227
+ // (e.g. tasks waiting on a file or manual trigger).
228
+ const POLL_INTERVAL_MS = 50;
229
+
230
+ // R15: cap on each normalized-output entry stored in normalizedMap so a
231
+ // runaway parseResult can't accumulate hundreds of MB across tasks. 1 MB
232
+ // is generous for any text-context handoff between AI tasks.
233
+ const MAX_NORMALIZED_BYTES = 1_000_000;
234
+
235
+ export async function runPipeline(
236
+ config: PipelineConfig,
237
+ workDir: string,
238
+ options: RunPipelineOptions = {},
239
+ ): Promise<EngineResult> {
240
+ const approvalGateway = options.approvalGateway ?? new InMemoryApprovalGateway();
241
+ const maxLogRuns = options.maxLogRuns ?? 20;
242
+
243
+ // Load any plugins declared in the pipeline config before preflight so that
244
+ // drivers, completions, and middlewares referenced in YAML are registered.
245
+ // Hosts that pre-load plugins from a custom path (e.g. the editor loading
246
+ // from the user's workspace node_modules) pass skipPluginLoading: true so
247
+ // we don't re-resolve via Node's cwd-based default import.
248
+ if (!options.skipPluginLoading && config.plugins?.length) {
249
+ await loadPlugins(config.plugins);
250
+ }
251
+
252
+ const dag = buildDag(config);
253
+ const runId = options.runId ?? generateRunId();
254
+ preflight(config, dag);
255
+
256
+ const startedAt = nowISO();
257
+ const pipelineInfo: PipelineInfo = { name: config.name, run_id: runId, started_at: startedAt };
258
+ // Forward every structured log line to subscribers as task_log events.
259
+ // Reading options.onEvent inside the callback (vs. capturing it once) keeps
260
+ // the SDK behavior correct if callers pass a fresh onEvent on each run.
261
+ const log = new Logger(workDir, runId, (record) => {
262
+ options.onEvent?.({
263
+ type: 'task_log',
264
+ runId,
265
+ taskId: record.taskId,
266
+ level: record.level,
267
+ timestamp: record.timestamp,
268
+ text: record.text,
269
+ });
270
+ });
271
+
272
+ try {
273
+ log.info('[pipeline]', `start "${config.name}" run_id=${runId}`);
274
+
275
+ // File-only: dump the resolved pipeline shape + DAG topology for post-mortem.
276
+ log.section('Pipeline configuration');
277
+ log.quiet(`name: ${config.name}`);
278
+ log.quiet(`driver: ${config.driver ?? '(default: claude-code)'}`);
279
+ log.quiet(`timeout: ${config.timeout ?? '(none)'}`);
280
+ log.quiet(`tracks: ${config.tracks.length}`);
281
+ log.quiet(`tasks (total): ${dag.nodes.size}`);
282
+ log.quiet(`plugins: ${(config.plugins ?? []).join(', ') || '(none)'}`);
283
+ log.quiet(
284
+ `hooks: ${config.hooks ? Object.keys(config.hooks).join(', ') || '(none)' : '(none)'}`,
285
+ );
286
+
287
+ log.section('DAG topology');
288
+ for (const [id, node] of dag.nodes) {
289
+ const deps = node.dependsOn.length ? node.dependsOn.join(', ') : '(root)';
290
+ const kind = node.task.prompt ? 'ai' : 'cmd';
291
+ log.quiet(` • ${id} [${kind}] track=${node.track.id} deps=[${deps}]`);
292
+ }
293
+ log.quiet('');
294
+
295
+ // Initialize states (before hook, so we can return them even if blocked)
296
+ const states = new Map<string, TaskState>();
297
+ for (const [id, node] of dag.nodes) {
298
+ states.set(id, {
299
+ config: node.task,
300
+ trackConfig: node.track,
301
+ status: 'idle',
302
+ result: null,
303
+ startedAt: null,
304
+ finishedAt: null,
305
+ });
306
+ }
307
+
308
+ // Pipeline start hook (gate)
309
+ const startHook = await executeHook(
310
+ config.hooks,
311
+ 'pipeline_start',
312
+ buildPipelineStartContext(pipelineInfo),
313
+ workDir,
314
+ );
315
+ if (!startHook.allowed) {
316
+ console.error(`Pipeline blocked by pipeline_start hook (exit code ${startHook.exitCode})`);
317
+ await executeHook(
318
+ config.hooks,
319
+ 'pipeline_error',
320
+ buildPipelineErrorContext(pipelineInfo, 'pipeline_blocked', 'pipeline_blocked'),
321
+ workDir,
322
+ );
323
+ // All tasks stay idle — pipeline never started
324
+ return {
325
+ success: false,
326
+ runId,
327
+ logPath: log.path,
328
+ summary: {
329
+ total: dag.nodes.size,
330
+ success: 0,
331
+ failed: 0,
332
+ skipped: 0,
333
+ timeout: 0,
334
+ blocked: 0,
335
+ },
336
+ states: freezeStates(states),
337
+ };
338
+ }
339
+
340
+ // Pipeline approved transition all tasks to waiting
341
+ for (const [, state] of states) {
342
+ state.status = 'waiting';
343
+ }
344
+ // Include a full states snapshot so listeners can initialize their mirrors without missing events
345
+ const statesSnapshot: ReadonlyMap<string, TaskState> = new Map(
346
+ [...states.entries()].map(([id, s]) => [id, { ...s }]),
347
+ );
348
+ options.onEvent?.({ type: 'pipeline_start', runId, states: statesSnapshot });
349
+
350
+ const sessionMap = new Map<string, string>();
351
+ const normalizedMap = new Map<string, string>();
352
+
353
+ // Pipeline timeout
354
+ const pipelineTimeoutMs = config.timeout ? parseDuration(config.timeout) : 0;
355
+ let pipelineAborted = false;
356
+ const abortController = new AbortController();
357
+ let pipelineTimer: ReturnType<typeof setTimeout> | null = null;
358
+
359
+ if (pipelineTimeoutMs > 0) {
360
+ pipelineTimer = setTimeout(() => {
361
+ pipelineAborted = true;
362
+ abortController.abort();
363
+ }, pipelineTimeoutMs);
364
+ }
365
+
366
+ // When the pipeline is aborted (timeout, external shutdown), drain all
367
+ // pending approvals so waiting triggers unblock immediately.
368
+ abortController.signal.addEventListener('abort', () => {
369
+ approvalGateway.abortAll('pipeline aborted');
370
+ });
371
+
372
+ // Wire external cancel signal into the internal abort controller.
373
+ const externalAbortHandler = () => {
374
+ pipelineAborted = true;
375
+ abortController.abort();
376
+ };
377
+ if (options.signal) {
378
+ if (options.signal.aborted) {
379
+ externalAbortHandler();
380
+ } else {
381
+ options.signal.addEventListener('abort', externalAbortHandler, { once: true });
382
+ }
383
+ }
384
+
385
+ // ── Helpers ──
386
+
387
+ function emit(event: PipelineEvent): void {
388
+ options.onEvent?.(event);
389
+ }
390
+
391
+ function setTaskStatus(taskId: string, newStatus: TaskStatus): void {
392
+ const state = states.get(taskId)!;
393
+ // Terminal lock: once a task reaches a terminal state it must not be
394
+ // re-transitioned. This prevents stop_all from marking running tasks as
395
+ // skipped and then having their in-flight processTask promise overwrite
396
+ // that with success/failed, producing an invalid double transition.
397
+ if (isTerminal(state.status)) return;
398
+ const prevStatus = state.status;
399
+ state.status = newStatus;
400
+ // Snapshot state at emit time — result and finishedAt must be set before calling this for terminal statuses
401
+ const snapshot: TaskState = {
402
+ config: state.config,
403
+ trackConfig: state.trackConfig,
404
+ status: state.status,
405
+ result: state.result,
406
+ startedAt: state.startedAt,
407
+ finishedAt: state.finishedAt,
408
+ };
409
+ emit({
410
+ type: 'task_status_change',
411
+ taskId,
412
+ status: newStatus,
413
+ prevStatus,
414
+ runId,
415
+ state: snapshot,
416
+ });
417
+ }
418
+
419
+ function getOnFailure(taskId: string): OnFailure {
420
+ return dag.nodes.get(taskId)?.track.on_failure ?? 'skip_downstream';
421
+ }
422
+
423
+ function isDependencySatisfied(depId: string): 'satisfied' | 'unsatisfied' | 'skip' {
424
+ const depState = states.get(depId);
425
+ if (!depState) return 'skip';
426
+ switch (depState.status) {
427
+ case 'success':
428
+ return 'satisfied';
429
+ case 'skipped':
430
+ return 'skip';
431
+ case 'failed':
432
+ case 'timeout':
433
+ case 'blocked':
434
+ return getOnFailure(depId) === 'ignore' ? 'satisfied' : 'skip';
435
+ default:
436
+ return 'unsatisfied';
437
+ }
438
+ }
439
+
440
+ /**
441
+ * H3: "stop_all" historically only stopped tasks within the same track,
442
+ * which contradicted both its name and user expectations. It now stops
443
+ * the **entire pipeline**:
444
+ * - In-flight tasks are signalled via the shared abort controller so
445
+ * drivers / runner.ts can cancel cooperatively (returning
446
+ * `failureKind: 'timeout'`).
447
+ * - Still-waiting tasks across every track are immediately marked
448
+ * skipped so the run completes promptly.
449
+ * The terminal lock in setTaskStatus prevents any later re-transition
450
+ * should a completed running task try to overwrite the skipped state.
451
+ */
452
+ function applyStopAll(_failedTrackId: string): void {
453
+ pipelineAborted = true;
454
+ abortController.abort();
455
+ for (const [id, state] of states) {
456
+ if (state.status === 'waiting') {
457
+ state.finishedAt = nowISO();
458
+ setTaskStatus(id, 'skipped');
459
+ }
460
+ }
461
+ }
462
+
463
+ function buildTaskInfoObj(taskId: string): TaskInfo {
464
+ const state = states.get(taskId)!;
465
+ return {
466
+ id: taskId,
467
+ name: state.config.name,
468
+ type: state.config.prompt ? 'ai' : 'command',
469
+ status: state.status,
470
+ exit_code: state.result?.exitCode ?? null,
471
+ duration_ms: state.result?.durationMs ?? null,
472
+ stderr_path: state.result?.stderrPath ?? null,
473
+ session_id: state.result?.sessionId ?? null,
474
+ started_at: state.startedAt,
475
+ finished_at: state.finishedAt,
476
+ };
477
+ }
478
+
479
+ function trackInfoOf(taskId: string): TrackInfo {
480
+ const node = dag.nodes.get(taskId)!;
481
+ return { id: node.track.id, name: node.track.name };
482
+ }
483
+
484
+ async function fireHook(taskId: string, event: 'task_success' | 'task_failure'): Promise<void> {
485
+ await executeHook(
486
+ config.hooks,
487
+ event,
488
+ buildTaskContext(event, pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)),
489
+ workDir,
490
+ abortController.signal,
491
+ );
492
+ }
493
+
494
+ // ── Process a single task ──
495
+
496
+ async function processTask(taskId: string): Promise<void> {
497
+ const state = states.get(taskId)!;
498
+ const node = dag.nodes.get(taskId)!;
499
+ const task = node.task;
500
+ const track = node.track;
501
+
502
+ log.section(`Task ${taskId}`, taskId);
503
+ log.debug(
504
+ `[task:${taskId}]`,
505
+ `type=${task.prompt ? 'ai' : 'cmd'} track=${track.id} deps=[${node.dependsOn.join(', ') || '(root)'}]`,
506
+ );
507
+
508
+ // 1. Check dependencies
509
+ for (const depId of node.dependsOn) {
510
+ const result = isDependencySatisfied(depId);
511
+ if (result === 'skip') {
512
+ const depStatus = states.get(depId)?.status ?? 'unknown';
513
+ log.debug(`[task:${taskId}]`, `skipped (upstream "${depId}" status=${depStatus})`);
514
+ state.finishedAt = nowISO();
515
+ setTaskStatus(taskId, 'skipped');
516
+ return;
517
+ }
518
+ if (result === 'unsatisfied') return; // still waiting
519
+ }
520
+
521
+ // 2. Check trigger
522
+ if (task.trigger) {
523
+ log.debug(
524
+ `[task:${taskId}]`,
525
+ `trigger wait: type=${task.trigger.type} ${JSON.stringify(task.trigger)}`,
526
+ );
527
+ try {
528
+ const triggerPlugin = getHandler<TriggerPlugin>('triggers', task.trigger.type);
529
+ // R6: race the plugin's watch() against the pipeline's abort signal.
530
+ // Third-party triggers may forget to wire up ctx.signal — without
531
+ // this race, an aborted pipeline would hang forever waiting for the
532
+ // plugin's watch promise to resolve. The race resolves on whichever
533
+ // path settles first, and the cleanup paths in finally never run on
534
+ // the orphaned plugin promise (it's allowed to leak a watcher; the
535
+ // pipeline is being torn down anyway).
536
+ await new Promise<unknown>((resolve, reject) => {
537
+ let settled = false;
538
+ const onAbort = () => {
539
+ if (settled) return;
540
+ settled = true;
541
+ abortController.signal.removeEventListener('abort', onAbort);
542
+ reject(new Error('Pipeline aborted'));
543
+ };
544
+ if (abortController.signal.aborted) {
545
+ onAbort();
546
+ return;
547
+ }
548
+ abortController.signal.addEventListener('abort', onAbort, { once: true });
549
+ triggerPlugin
550
+ .watch(task.trigger as Record<string, unknown>, {
551
+ taskId: node.taskId,
552
+ trackId: track.id,
553
+ workDir: task.cwd ?? workDir,
554
+ signal: abortController.signal,
555
+ approvalGateway,
556
+ })
557
+ .then(
558
+ (v) => {
559
+ if (settled) return;
560
+ settled = true;
561
+ abortController.signal.removeEventListener('abort', onAbort);
562
+ resolve(v);
563
+ },
564
+ (e) => {
565
+ if (settled) return;
566
+ settled = true;
567
+ abortController.signal.removeEventListener('abort', onAbort);
568
+ reject(e);
569
+ },
570
+ );
571
+ });
572
+ log.debug(`[task:${taskId}]`, `trigger fired`);
573
+ } catch (err: unknown) {
574
+ // If pipeline was aborted while we were still waiting for the trigger,
575
+ // this task never entered running state → skipped, not timeout.
576
+ state.finishedAt = nowISO();
577
+ if (pipelineAborted) {
578
+ setTaskStatus(taskId, 'skipped');
579
+ } else if (err instanceof TriggerBlockedError) {
580
+ setTaskStatus(taskId, 'blocked'); // user/policy rejection
581
+ } else if (err instanceof TriggerTimeoutError) {
582
+ setTaskStatus(taskId, 'timeout'); // genuine trigger wait timeout
583
+ } else {
584
+ // A7 fallback: also check message strings for backward-compat with
585
+ // third-party trigger plugins that don't throw typed errors yet.
586
+ const msg = err instanceof Error ? err.message : String(err);
587
+ if (msg.includes('rejected') || msg.includes('denied')) {
588
+ setTaskStatus(taskId, 'blocked');
589
+ } else if (msg.includes('timeout')) {
590
+ setTaskStatus(taskId, 'timeout');
591
+ } else {
592
+ setTaskStatus(taskId, 'failed'); // plugin error, watcher crash, etc.
593
+ }
594
+ }
595
+ try {
596
+ await fireHook(taskId, 'task_failure');
597
+ } catch (hookErr) {
598
+ log.error(
599
+ `[task:${taskId}]`,
600
+ `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
601
+ );
602
+ }
603
+ return;
604
+ }
605
+ }
606
+
607
+ // 3. task_start hook (gate)
608
+ const hookResult = await executeHook(
609
+ config.hooks,
610
+ 'task_start',
611
+ buildTaskContext('task_start', pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)),
612
+ workDir,
613
+ abortController.signal,
614
+ );
615
+ if (hookResult.exitCode !== 0 || config.hooks?.task_start) {
616
+ log.debug(
617
+ `[task:${taskId}]`,
618
+ `task_start hook exit=${hookResult.exitCode} allowed=${hookResult.allowed}`,
619
+ );
620
+ }
621
+ if (!hookResult.allowed) {
622
+ state.finishedAt = nowISO();
623
+ setTaskStatus(taskId, 'blocked');
624
+ try {
625
+ await fireHook(taskId, 'task_failure');
626
+ } catch (hookErr) {
627
+ log.error(
628
+ `[task:${taskId}]`,
629
+ `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
630
+ );
631
+ }
632
+ return;
633
+ }
634
+
635
+ // 4. Mark running — set startedAt before emitting so subscribers see a
636
+ // complete snapshot (startedAt non-null) in the task_status_change event.
637
+ state.startedAt = nowISO();
638
+ setTaskStatus(taskId, 'running');
639
+ log.info(
640
+ `[task:${taskId}]`,
641
+ task.command ? `running: ${task.command}` : `running (driver task)`,
642
+ );
643
+
644
+ // File-only: resolved config for this task
645
+ const resolvedDriver = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
646
+ const resolvedModel = task.model ?? track.model ?? config.model ?? '(default)';
647
+ const resolvedPerms = task.permissions ?? track.permissions ?? '(default)';
648
+ const resolvedCwd = task.cwd ?? track.cwd ?? workDir;
649
+ log.debug(
650
+ `[task:${taskId}]`,
651
+ `resolved: driver=${resolvedDriver} model=${resolvedModel} cwd=${resolvedCwd}`,
652
+ );
653
+ log.debug(`[task:${taskId}]`, `permissions: ${JSON.stringify(resolvedPerms)}`);
654
+ if (task.continue_from) {
655
+ log.debug(`[task:${taskId}]`, `continue_from: "${task.continue_from}"`);
656
+ }
657
+ if (task.timeout) {
658
+ log.debug(`[task:${taskId}]`, `timeout: ${task.timeout}`);
659
+ }
660
+
661
+ try {
662
+ let result: TaskResult;
663
+ const timeoutMs = task.timeout ? parseDuration(task.timeout) : undefined;
664
+
665
+ const runOpts = { timeoutMs, signal: abortController.signal };
666
+
667
+ if (task.command) {
668
+ log.debug(`[task:${taskId}]`, `command: ${task.command}`);
669
+ result = await runCommand(task.command, task.cwd ?? workDir, runOpts);
670
+ } else {
671
+ // AI task: apply middleware chain
672
+ const driverName = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
673
+ const driver = getHandler<DriverPlugin>('drivers', driverName);
674
+
675
+ let prompt = task.prompt!;
676
+ const originalLen = prompt.length;
677
+ const mws = task.middlewares !== undefined ? task.middlewares : track.middlewares;
678
+ if (mws && mws.length > 0) {
679
+ log.debug(
680
+ `[task:${taskId}]`,
681
+ `middleware chain: ${mws.map((m) => m.type).join(' ')}`,
682
+ );
683
+ const mwCtx: MiddlewareContext = {
684
+ task,
685
+ track,
686
+ workDir: task.cwd ?? workDir,
687
+ };
688
+ for (const mwConfig of mws) {
689
+ const before = prompt.length;
690
+ const mwPlugin = getHandler<MiddlewarePlugin>('middlewares', mwConfig.type);
691
+ const next = await mwPlugin.enhance(
692
+ prompt,
693
+ mwConfig as Record<string, unknown>,
694
+ mwCtx,
695
+ );
696
+ // R3: a middleware that returns undefined / null / a non-string
697
+ // would silently corrupt the prompt sent to the driver. Fail loud
698
+ // here so the user sees "middleware X.enhance returned ..." in the
699
+ // task log instead of "[object Object]" arriving at the model.
700
+ if (typeof next !== 'string') {
701
+ throw new Error(
702
+ `middleware "${mwConfig.type}".enhance() returned ${next === null ? 'null' : typeof next}, expected string`,
703
+ );
704
+ }
705
+ prompt = next;
706
+ log.debug(
707
+ `[task:${taskId}]`,
708
+ ` ${mwConfig.type}: ${before} → ${prompt.length} chars`,
709
+ );
710
+ }
711
+ }
712
+ log.debug(
713
+ `[task:${taskId}]`,
714
+ `prompt: ${originalLen} chars (final: ${prompt.length} chars)`,
715
+ );
716
+ log.quiet(`--- prompt (final) ---\n${clip(prompt)}\n--- end prompt ---`, taskId);
717
+
718
+ // H1: hand the driver a continue_from that has already been
719
+ // qualified by dag.ts. Without this, drivers like codex/opencode/
720
+ // claude-code look up maps directly with
721
+ // the user's raw (possibly bare) string, which races whenever two
722
+ // tracks share a task name. dag.ts has the only authoritative
723
+ // resolver, so we use its precomputed answer here.
724
+ const enrichedTask: TaskConfig = {
725
+ ...task,
726
+ prompt,
727
+ continue_from: node.resolvedContinueFrom ?? task.continue_from,
728
+ };
729
+ const driverCtx: DriverContext = {
730
+ sessionMap,
731
+ normalizedMap,
732
+ workDir: task.cwd ?? workDir,
733
+ };
734
+ const spec = await driver.buildCommand(enrichedTask, track, driverCtx);
735
+ log.debug(`[task:${taskId}]`, `driver=${driverName}`);
736
+ log.debug(`[task:${taskId}]`, `spawn args: ${JSON.stringify(spec.args)}`);
737
+ if (spec.cwd) log.debug(`[task:${taskId}]`, `spawn cwd: ${spec.cwd}`);
738
+ if (spec.env)
739
+ log.debug(
740
+ `[task:${taskId}]`,
741
+ `spawn env overrides: ${Object.keys(spec.env).join(', ')}`,
742
+ );
743
+ if (spec.stdin) log.debug(`[task:${taskId}]`, `spawn stdin: ${spec.stdin.length} chars`);
744
+ result = await runSpawn(spec, driver, runOpts);
745
+ }
746
+
747
+ // 6. Determine terminal status (without emitting yet — result must be complete first)
748
+ // H2: branch on failureKind so spawn errors no longer masquerade as
749
+ // timeouts. Old runners that don't set failureKind still work — we
750
+ // fall back to the historical `exitCode === -1 timeout` heuristic so
751
+ // pre-existing third-party drivers don't regress.
752
+ let terminalStatus: TaskStatus;
753
+ const kind = result.failureKind;
754
+ if (kind === 'timeout') {
755
+ terminalStatus = 'timeout';
756
+ } else if (kind === 'spawn_error') {
757
+ terminalStatus = 'failed';
758
+ } else if (kind === undefined && result.exitCode === -1) {
759
+ // Legacy path: pre-H2 driver returned -1 with no kind. Treat as
760
+ // timeout for backward compatibility (the previous behaviour).
761
+ terminalStatus = 'timeout';
762
+ } else if (result.exitCode !== 0) {
763
+ terminalStatus = 'failed';
764
+ } else if (task.completion) {
765
+ const plugin = getHandler<CompletionPlugin>('completions', task.completion.type);
766
+ const completionCtx = { workDir: task.cwd ?? workDir, signal: abortController.signal };
767
+ const passed = await plugin.check(
768
+ task.completion as Record<string, unknown>,
769
+ result,
770
+ completionCtx,
771
+ );
772
+ // R4: strict boolean check. Truthy strings/numbers used to be coerced
773
+ // to success a check returning "ok" would let a failing task pass.
774
+ if (typeof passed !== 'boolean') {
775
+ throw new Error(
776
+ `completion "${task.completion.type}".check() returned ${passed === null ? 'null' : typeof passed}, expected boolean`,
777
+ );
778
+ }
779
+ terminalStatus = passed ? 'success' : 'failed';
780
+ } else {
781
+ terminalStatus = 'success';
782
+ }
783
+
784
+ // Store normalized text separately (in-memory) for continue_from handoff.
785
+ // R15: clip oversized values so a runaway parseResult can't accumulate
786
+ // hundreds of MB across tasks.
787
+ if (result.normalizedOutput !== null) {
788
+ const clipped =
789
+ result.normalizedOutput.length > MAX_NORMALIZED_BYTES
790
+ ? result.normalizedOutput.slice(0, MAX_NORMALIZED_BYTES) +
791
+ `\n[…clipped at ${MAX_NORMALIZED_BYTES} bytes]`
792
+ : result.normalizedOutput;
793
+ normalizedMap.set(taskId, clipped);
794
+ }
795
+
796
+ if (result.stderr) {
797
+ const stderrPath = resolve(log.dir, `${taskId.replace(/\./g, '_')}.stderr`);
798
+ await Bun.write(stderrPath, result.stderr);
799
+ result = { ...result, stderrPath };
800
+ }
801
+
802
+ if (result.sessionId) {
803
+ // H1: qualified-only key.
804
+ sessionMap.set(taskId, result.sessionId);
805
+ }
806
+
807
+ // Set result and finishedAt before emitting terminal status so listeners see complete state
808
+ state.result = result;
809
+ state.finishedAt = nowISO();
810
+ setTaskStatus(taskId, terminalStatus);
811
+
812
+ // Log task outcome with relevant details
813
+ const durSec = (result.durationMs / 1000).toFixed(1);
814
+ if (terminalStatus === 'success') {
815
+ log.info(`[task:${taskId}]`, `success (${durSec}s)`);
816
+ } else {
817
+ log.error(
818
+ `[task:${taskId}]`,
819
+ `${terminalStatus} exit=${result.exitCode} duration=${durSec}s`,
820
+ );
821
+ if (result.stderr) {
822
+ const tail = tailLines(result.stderr, 10);
823
+ log.error(`[task:${taskId}]`, `stderr tail:\n${tail}`);
824
+ }
825
+ }
826
+
827
+ // File-only: full stdout/stderr dump (clipped) + extracted metadata
828
+ log.debug(
829
+ `[task:${taskId}]`,
830
+ `stdout: ${result.stdout.length} chars, stderr: ${result.stderr.length} chars`,
831
+ );
832
+ if (result.sessionId) {
833
+ log.debug(`[task:${taskId}]`, `sessionId: ${result.sessionId}`);
834
+ }
835
+ if (result.stderrPath) {
836
+ log.debug(`[task:${taskId}]`, `wrote stderr: ${result.stderrPath}`);
837
+ }
838
+ if (result.stdout) {
839
+ log.quiet(
840
+ `--- stdout (${taskId}) ---\n${clip(result.stdout)}\n--- end stdout ---`,
841
+ taskId,
842
+ );
843
+ }
844
+ if (result.stderr) {
845
+ log.quiet(
846
+ `--- stderr (${taskId}) ---\n${clip(result.stderr)}\n--- end stderr ---`,
847
+ taskId,
848
+ );
849
+ }
850
+ if (task.completion) {
851
+ log.debug(
852
+ `[task:${taskId}]`,
853
+ `completion check: type=${task.completion.type} result=${terminalStatus}`,
854
+ );
855
+ }
856
+ } catch (err: unknown) {
857
+ const errMsg = err instanceof Error ? (err.stack ?? err.message) : String(err);
858
+ log.error(`[task:${taskId}]`, `failed before execution: ${errMsg}`);
859
+ state.result = {
860
+ exitCode: -1,
861
+ stdout: '',
862
+ stderr: errMsg,
863
+ stderrPath: null,
864
+ durationMs: 0,
865
+ sessionId: null,
866
+ normalizedOutput: null,
867
+ // H2: Engine-level pre-execution errors (driver throw, middleware
868
+ // throw, getHandler 404) classify as spawn_error — the process never
869
+ // ran, so calling them "timeout" was actively misleading.
870
+ failureKind: 'spawn_error',
871
+ };
872
+ state.finishedAt = nowISO();
873
+ setTaskStatus(taskId, 'failed');
874
+ }
875
+
876
+ // 7. Fire hooks
877
+ const finalStatus: TaskStatus = state.status;
878
+ try {
879
+ await fireHook(taskId, finalStatus === 'success' ? 'task_success' : 'task_failure');
880
+ } catch (hookErr) {
881
+ log.error(
882
+ `[task:${taskId}]`,
883
+ `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
884
+ );
885
+ }
886
+
887
+ // 8. Handle stop_all for failure states
888
+ if (finalStatus !== 'success' && getOnFailure(taskId) === 'stop_all') {
889
+ applyStopAll(node.track.id);
890
+ }
891
+ }
892
+
893
+ // ── Event loop ──
894
+ // Each task is launched as soon as ALL its deps reach a terminal state.
895
+ // We track in-flight tasks in `running` so a task completing mid-batch
896
+ // immediately unblocks its dependents without waiting for sibling tasks.
897
+ const running = new Map<string, Promise<void>>();
898
+
899
+ try {
900
+ while (!pipelineAborted) {
901
+ // Launch every task whose deps are all terminal and that isn't already in-flight
902
+ for (const [id, state] of states) {
903
+ if (state.status !== 'waiting' || running.has(id)) continue;
904
+ const node = dag.nodes.get(id)!;
905
+ const allDepsTerminal =
906
+ node.dependsOn.length === 0 ||
907
+ node.dependsOn.every((d) => isTerminal(states.get(d)!.status));
908
+ if (!allDepsTerminal) continue;
909
+ const p = processTask(id).finally(() => running.delete(id));
910
+ running.set(id, p);
911
+ }
912
+
913
+ // All tasks terminal — done
914
+ if ([...states.values()].every((s) => isTerminal(s.status))) break;
915
+
916
+ if (running.size === 0) {
917
+ // Nothing in-flight but non-terminal tasks exist (e.g. trigger-wait states
918
+ // that processTask hasn't been called for yet). Poll briefly.
919
+ await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
920
+ } else {
921
+ // Wait for any one task to finish, then re-scan for new launchables.
922
+ await Promise.race(running.values());
923
+ }
924
+ }
925
+
926
+ if (pipelineAborted) {
927
+ // Wait for in-flight tasks to honour the abort signal before marking states.
928
+ if (running.size > 0) await Promise.allSettled(running.values());
929
+ for (const [id, state] of states) {
930
+ if (!isTerminal(state.status)) {
931
+ // By the time allSettled resolves, processTask's try/finally has already
932
+ // set running tasks to success/failed/timeout. The only non-terminal
933
+ // statuses remaining here are waiting/idle tasks that were never started.
934
+ state.finishedAt = nowISO();
935
+ setTaskStatus(id, 'skipped');
936
+ }
937
+ }
938
+ }
939
+ } finally {
940
+ if (pipelineTimer) clearTimeout(pipelineTimer);
941
+ // Clean up the external abort signal listener to prevent dead references
942
+ // accumulating on long-lived shared AbortControllers.
943
+ if (options.signal) {
944
+ options.signal.removeEventListener('abort', externalAbortHandler);
945
+ }
946
+ // Safety net: drain any approvals still pending at shutdown (e.g. crash path).
947
+ if (approvalGateway.pending().length > 0) {
948
+ approvalGateway.abortAll('pipeline finished');
949
+ }
950
+ }
951
+
952
+ // ── Summary ──
953
+ const summary = { total: 0, success: 0, failed: 0, skipped: 0, timeout: 0, blocked: 0 };
954
+ for (const [, state] of states) {
955
+ summary.total++;
956
+ switch (state.status) {
957
+ case 'success':
958
+ summary.success++;
959
+ break;
960
+ case 'failed':
961
+ summary.failed++;
962
+ break;
963
+ case 'skipped':
964
+ summary.skipped++;
965
+ break;
966
+ case 'timeout':
967
+ summary.timeout++;
968
+ break;
969
+ case 'blocked':
970
+ summary.blocked++;
971
+ break;
972
+ }
973
+ }
974
+
975
+ const finishedAt = nowISO();
976
+ const durationMs = new Date(finishedAt).getTime() - new Date(startedAt).getTime();
977
+
978
+ if (pipelineAborted) {
979
+ await executeHook(
980
+ config.hooks,
981
+ 'pipeline_error',
982
+ buildPipelineErrorContext(pipelineInfo, 'Pipeline timeout exceeded'),
983
+ workDir,
984
+ );
985
+ } else {
986
+ await executeHook(
987
+ config.hooks,
988
+ 'pipeline_complete',
989
+ buildPipelineCompleteContext(
990
+ { ...pipelineInfo, finished_at: finishedAt, duration_ms: durationMs },
991
+ summary,
992
+ ),
993
+ workDir,
994
+ );
995
+ }
996
+
997
+ const allSuccess =
998
+ !pipelineAborted && summary.failed === 0 && summary.timeout === 0 && summary.blocked === 0;
999
+
1000
+ log.section('Pipeline summary');
1001
+ log.quiet(`status: ${pipelineAborted ? 'aborted (timeout)' : 'completed'}`);
1002
+ log.quiet(`duration: ${(durationMs / 1000).toFixed(1)}s`);
1003
+ log.quiet(
1004
+ `counts: total=${summary.total} success=${summary.success} ` +
1005
+ `failed=${summary.failed} skipped=${summary.skipped} ` +
1006
+ `timeout=${summary.timeout} blocked=${summary.blocked}`,
1007
+ );
1008
+ log.quiet('');
1009
+ log.quiet('per-task:');
1010
+ for (const [id, state] of states) {
1011
+ const dur =
1012
+ state.result?.durationMs != null ? `${(state.result.durationMs / 1000).toFixed(1)}s` : '-';
1013
+ const exit = state.result?.exitCode ?? '-';
1014
+ log.quiet(` ${state.status.padEnd(8)} ${id} (exit=${exit}, ${dur})`);
1015
+ }
1016
+
1017
+ log.info('[pipeline]', `completed "${config.name}"`);
1018
+ log.info(
1019
+ '[pipeline]',
1020
+ `Total: ${summary.total} | Success: ${summary.success} | Failed: ${summary.failed} | Skipped: ${summary.skipped} | Timeout: ${summary.timeout} | Blocked: ${summary.blocked}`,
1021
+ );
1022
+ log.info('[pipeline]', `Duration: ${(durationMs / 1000).toFixed(1)}s`);
1023
+ log.info('[pipeline]', `Log: ${log.path}`);
1024
+
1025
+ emit({ type: 'pipeline_end', runId, success: allSuccess });
1026
+ return { success: allSuccess, runId, logPath: log.path, summary, states: freezeStates(states) };
1027
+ } finally {
1028
+ // Close the persistent log file handle before pruning.
1029
+ log.close();
1030
+ // Prune old per-run log directories on every exit path (normal, blocked, or thrown).
1031
+ // Exclude the current runId so a concurrent run cannot delete its own live directory.
1032
+ if (maxLogRuns > 0) {
1033
+ await pruneLogDirs(resolve(workDir, '.tagma', 'logs'), maxLogRuns, runId);
1034
+ }
1035
+ }
1036
+ }
1037
+
1038
+ /**
1039
+ * Delete the oldest subdirectories under `logsDir`, keeping only the most recent `keep`
1040
+ * total runs (including the currently-live run identified by `excludeRunId`).
1041
+ * Directories are sorted lexicographically; because runIds are prefixed with a base-36
1042
+ * timestamp, lexicographic order equals chronological order.
1043
+ *
1044
+ * `excludeRunId` is always skipped from deletion even if it would otherwise be pruned —
1045
+ * this prevents a concurrent run from removing a live log directory that is still in use.
1046
+ *
1047
+ * D10: The live run occupies one slot out of `keep`, so the maximum number of
1048
+ * *historical* dirs to retain is `keep - 1`. Without this adjustment the function
1049
+ * kept `keep` historical dirs plus 1 live dir = `keep + 1` total on disk.
1050
+ */
1051
+ async function pruneLogDirs(logsDir: string, keep: number, excludeRunId: string): Promise<void> {
1052
+ let entries: string[];
1053
+ try {
1054
+ entries = await readdir(logsDir);
1055
+ } catch {
1056
+ return; // logsDir doesn't exist yet — nothing to prune
1057
+ }
1058
+
1059
+ // Only consider directories that look like run IDs (run_<...>), excluding the live run.
1060
+ const runDirs = entries.filter((e) => e.startsWith('run_') && e !== excludeRunId).sort();
1061
+ // keep - 1 historical slots (1 slot is reserved for the live excludeRunId).
1062
+ const historyKeep = Math.max(0, keep - 1);
1063
+ const toDelete = runDirs.slice(0, Math.max(0, runDirs.length - historyKeep));
1064
+
1065
+ await Promise.all(
1066
+ toDelete.map((dir) =>
1067
+ rm(resolve(logsDir, dir), { recursive: true, force: true }).catch(() => {
1068
+ // Ignore deletion errors — stale dirs are better than a crash
1069
+ }),
1070
+ ),
1071
+ );
1072
+ }
1073
+
1074
+ function isTerminal(status: TaskStatus): boolean {
1075
+ return (
1076
+ status === 'success' ||
1077
+ status === 'failed' ||
1078
+ status === 'timeout' ||
1079
+ status === 'skipped' ||
1080
+ status === 'blocked'
1081
+ );
1082
+ }
1083
+
1084
+ /** Return a deep-copied, caller-safe snapshot of the states map. */
1085
+ function freezeStates(states: Map<string, TaskState>): ReadonlyMap<string, TaskState> {
1086
+ const copy = new Map<string, TaskState>();
1087
+ for (const [id, s] of states) {
1088
+ copy.set(id, {
1089
+ config: { ...s.config },
1090
+ trackConfig: { ...s.trackConfig },
1091
+ status: s.status,
1092
+ result: s.result ? { ...s.result } : null,
1093
+ startedAt: s.startedAt,
1094
+ finishedAt: s.finishedAt,
1095
+ });
1096
+ }
1097
+ return copy;
1098
+ }