@tagma/sdk 0.4.14 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +569 -569
  3. package/dist/dag.d.ts.map +1 -1
  4. package/dist/dag.js +22 -56
  5. package/dist/dag.js.map +1 -1
  6. package/dist/engine.d.ts.map +1 -1
  7. package/dist/engine.js +63 -37
  8. package/dist/engine.js.map +1 -1
  9. package/dist/middlewares/static-context.d.ts.map +1 -1
  10. package/dist/middlewares/static-context.js +7 -3
  11. package/dist/middlewares/static-context.js.map +1 -1
  12. package/dist/prompt-doc.d.ts +36 -0
  13. package/dist/prompt-doc.d.ts.map +1 -0
  14. package/dist/prompt-doc.js +44 -0
  15. package/dist/prompt-doc.js.map +1 -0
  16. package/dist/sdk.d.ts +3 -0
  17. package/dist/sdk.d.ts.map +1 -1
  18. package/dist/sdk.js +4 -0
  19. package/dist/sdk.js.map +1 -1
  20. package/dist/task-ref.d.ts +55 -0
  21. package/dist/task-ref.d.ts.map +1 -0
  22. package/dist/task-ref.js +101 -0
  23. package/dist/task-ref.js.map +1 -0
  24. package/dist/templates.d.ts +20 -0
  25. package/dist/templates.d.ts.map +1 -0
  26. package/dist/templates.js +93 -0
  27. package/dist/templates.js.map +1 -0
  28. package/dist/validate-raw.d.ts.map +1 -1
  29. package/dist/validate-raw.js +27 -53
  30. package/dist/validate-raw.js.map +1 -1
  31. package/package.json +2 -2
  32. package/scripts/preinstall.js +31 -31
  33. package/src/adapters/stdin-approval.ts +106 -106
  34. package/src/adapters/websocket-approval.ts +224 -224
  35. package/src/approval.ts +131 -131
  36. package/src/bootstrap.ts +37 -37
  37. package/src/completions/exit-code.ts +34 -34
  38. package/src/completions/file-exists.ts +66 -66
  39. package/src/completions/output-check.ts +86 -86
  40. package/src/config-ops.ts +307 -307
  41. package/src/dag.ts +24 -54
  42. package/src/drivers/claude-code.ts +250 -250
  43. package/src/engine.ts +1137 -1098
  44. package/src/hooks.ts +187 -187
  45. package/src/logger.ts +182 -182
  46. package/src/middlewares/static-context.ts +49 -45
  47. package/src/pipeline-runner.ts +156 -156
  48. package/src/prompt-doc.ts +49 -0
  49. package/src/registry.ts +242 -242
  50. package/src/runner.ts +395 -395
  51. package/src/schema.test.ts +101 -101
  52. package/src/schema.ts +338 -338
  53. package/src/sdk.ts +111 -92
  54. package/src/task-ref.ts +120 -0
  55. package/src/triggers/file.ts +164 -164
  56. package/src/triggers/manual.ts +86 -86
  57. package/src/types.ts +18 -18
  58. package/src/utils.ts +203 -203
  59. package/src/validate-raw.ts +412 -442
package/src/engine.ts CHANGED
@@ -1,1098 +1,1137 @@
1
- import { resolve } from 'path';
2
- import { readdir, rm } from 'fs/promises';
3
- import type {
4
- PipelineConfig,
5
- TaskConfig,
6
- TaskState,
7
- TaskStatus,
8
- TaskResult,
9
- DriverPlugin,
10
- TriggerPlugin,
11
- CompletionPlugin,
12
- MiddlewarePlugin,
13
- MiddlewareContext,
14
- DriverContext,
15
- OnFailure,
16
- } from './types';
17
- import { buildDag, type Dag } from './dag';
18
- import { getHandler, hasHandler, loadPlugins } from './registry';
19
- import { runSpawn, runCommand } from './runner';
20
- import { parseDuration, nowISO, generateRunId } from './utils';
21
- import {
22
- executeHook,
23
- buildPipelineStartContext,
24
- buildTaskContext,
25
- buildPipelineCompleteContext,
26
- buildPipelineErrorContext,
27
- type PipelineInfo,
28
- type TrackInfo,
29
- type TaskInfo,
30
- } from './hooks';
31
- import { Logger, tailLines, clip, type LogLevel } from './logger';
32
- import { InMemoryApprovalGateway, type ApprovalGateway } from './approval';
33
-
34
- // ═══ A7: Typed trigger errors ═══
35
- // Replace string-matching on error messages with structured error types so
36
- // coincidental substrings don't cause misclassification.
37
-
38
- export class TriggerBlockedError extends Error {
39
- readonly code = 'TRIGGER_BLOCKED' as const;
40
- constructor(message: string) {
41
- super(message);
42
- this.name = 'TriggerBlockedError';
43
- }
44
- }
45
-
46
- export class TriggerTimeoutError extends Error {
47
- readonly code = 'TRIGGER_TIMEOUT' as const;
48
- constructor(message: string) {
49
- super(message);
50
- this.name = 'TriggerTimeoutError';
51
- }
52
- }
53
-
54
- // ═══ Preflight Validation ═══
55
-
56
- function preflight(config: PipelineConfig, dag: Dag): void {
57
- const errors: string[] = [];
58
-
59
- for (const [, node] of dag.nodes) {
60
- const task = node.task;
61
- const track = node.track;
62
- const driverName = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
63
-
64
- // Pure command tasks don't use a driver skip driver registration check.
65
- const isCommandOnly = task.command && !task.prompt;
66
-
67
- if (!isCommandOnly && !hasHandler('drivers', driverName)) {
68
- errors.push(`Task "${node.taskId}": driver "${driverName}" not registered`);
69
- }
70
-
71
- if (task.trigger && !hasHandler('triggers', task.trigger.type)) {
72
- errors.push(`Task "${node.taskId}": trigger type "${task.trigger.type}" not registered`);
73
- }
74
-
75
- if (task.completion && !hasHandler('completions', task.completion.type)) {
76
- errors.push(
77
- `Task "${node.taskId}": completion type "${task.completion.type}" not registered`,
78
- );
79
- }
80
-
81
- const mws = task.middlewares ?? track.middlewares ?? [];
82
- for (const mw of mws) {
83
- if (!hasHandler('middlewares', mw.type)) {
84
- errors.push(`Task "${node.taskId}": middleware type "${mw.type}" not registered`);
85
- }
86
- }
87
-
88
- if (task.continue_from && hasHandler('drivers', driverName)) {
89
- const driver = getHandler<DriverPlugin>('drivers', driverName);
90
- if (!driver.capabilities.sessionResume) {
91
- const upstreamId = resolveRefInDag(dag, task.continue_from, track.id);
92
- if (upstreamId) {
93
- const upstream = dag.nodes.get(upstreamId);
94
- if (upstream) {
95
- // A handoff is possible via session resume (already ruled out above),
96
- // OR in-memory text injection through normalizedMap
97
- // (when the upstream driver implements parseResult and returns normalizedOutput).
98
- const upstreamDriverName =
99
- upstream.task.driver ?? upstream.track.driver ?? config.driver ?? 'claude-code';
100
- const upstreamDriver = hasHandler('drivers', upstreamDriverName)
101
- ? getHandler<DriverPlugin>('drivers', upstreamDriverName)
102
- : null;
103
- const canNormalize = typeof upstreamDriver?.parseResult === 'function';
104
-
105
- if (!canNormalize) {
106
- errors.push(
107
- `Task "${node.taskId}" uses continue_from: "${task.continue_from}", ` +
108
- `but upstream task "${upstreamId}" its driver ` +
109
- `does not implement parseResult for text-injection handoff. ` +
110
- `Use a driver with parseResult, or remove continue_from.`,
111
- );
112
- }
113
- }
114
- }
115
- }
116
- }
117
- }
118
-
119
- if (errors.length > 0) {
120
- throw new Error(`Preflight validation failed:\n - ${errors.join('\n - ')}`);
121
- }
122
- }
123
-
124
- function resolveRefInDag(dag: Dag, ref: string, fromTrackId: string): string | null {
125
- // Already fully qualified
126
- if (dag.nodes.has(ref)) return ref;
127
- // Same-track match (preferred)
128
- const sameTrack = `${fromTrackId}.${ref}`;
129
- if (dag.nodes.has(sameTrack)) return sameTrack;
130
- // Cross-track bare name lookup — must be unambiguous (aligned with buildDag's resolveRef)
131
- let match: string | null = null;
132
- for (const [id] of dag.nodes) {
133
- if (id.endsWith(`.${ref}`)) {
134
- if (match !== null) {
135
- // Ambiguous: multiple tasks share the bare name across tracks
136
- return null;
137
- }
138
- match = id;
139
- }
140
- }
141
- return match;
142
- }
143
-
144
- // ═══ Engine ═══
145
-
146
- export interface EngineResult {
147
- readonly success: boolean;
148
- readonly runId: string;
149
- readonly logPath: string;
150
- readonly summary: {
151
- total: number;
152
- success: number;
153
- failed: number;
154
- skipped: number;
155
- timeout: number;
156
- blocked: number;
157
- };
158
- readonly states: ReadonlyMap<string, TaskState>;
159
- }
160
-
161
- // ═══ Pipeline Events ═══
162
-
163
- export type PipelineEvent =
164
- | {
165
- readonly type: 'task_status_change';
166
- readonly taskId: string;
167
- readonly status: TaskStatus;
168
- readonly prevStatus: TaskStatus;
169
- readonly runId: string;
170
- readonly state: TaskState;
171
- }
172
- | {
173
- readonly type: 'pipeline_start';
174
- readonly runId: string;
175
- readonly states: ReadonlyMap<string, TaskState>;
176
- }
177
- | { readonly type: 'pipeline_end'; readonly runId: string; readonly success: boolean }
178
- /**
179
- * Fine-grained log line emitted alongside every write to pipeline.log.
180
- * Consumers use this to stream the full run process into UIs without
181
- * tailing the log file. `taskId` is non-null for task-scoped lines and
182
- * null for pipeline-wide messages (e.g. configuration dumps, DAG
183
- * topology, pipeline start/end).
184
- */
185
- | {
186
- readonly type: 'task_log';
187
- readonly runId: string;
188
- readonly taskId: string | null;
189
- readonly level: LogLevel;
190
- readonly timestamp: string;
191
- readonly text: string;
192
- };
193
-
194
- export interface RunPipelineOptions {
195
- readonly approvalGateway?: ApprovalGateway;
196
- /**
197
- * Maximum number of per-run log directories to retain under `<workDir>/.tagma/logs/`.
198
- * Oldest directories are deleted after each run. Defaults to 20. Set to 0 to disable cleanup.
199
- */
200
- readonly maxLogRuns?: number;
201
- /**
202
- * Caller-supplied run ID. When provided the engine uses this instead of
203
- * generating its own via `generateRunId()`, keeping the editor and SDK
204
- * log directories aligned on the same ID.
205
- */
206
- readonly runId?: string;
207
- /**
208
- * External AbortSignal — aborting it cancels the pipeline immediately.
209
- * Equivalent to the pipeline timeout firing, but caller-controlled.
210
- */
211
- readonly signal?: AbortSignal;
212
- /**
213
- * Called on every pipeline/task status transition.
214
- * Use for real-time UI updates (e.g. updating a visual workflow graph).
215
- */
216
- readonly onEvent?: (event: PipelineEvent) => void;
217
- /**
218
- * Skip the engine's built-in `loadPlugins(config.plugins)` call.
219
- * Use this when the host has already pre-loaded plugins from a custom
220
- * resolution path (e.g. a user workspace's node_modules) so the engine
221
- * doesn't re-resolve them via Node's default cwd-based import.
222
- */
223
- readonly skipPluginLoading?: boolean;
224
- }
225
-
226
- // Poll interval when no tasks are in-flight but non-terminal tasks remain
227
- // (e.g. tasks waiting on a file or manual trigger).
228
- const POLL_INTERVAL_MS = 50;
229
-
230
- // R15: cap on each normalized-output entry stored in normalizedMap so a
231
- // runaway parseResult can't accumulate hundreds of MB across tasks. 1 MB
232
- // is generous for any text-context handoff between AI tasks.
233
- const MAX_NORMALIZED_BYTES = 1_000_000;
234
-
235
- export async function runPipeline(
236
- config: PipelineConfig,
237
- workDir: string,
238
- options: RunPipelineOptions = {},
239
- ): Promise<EngineResult> {
240
- const approvalGateway = options.approvalGateway ?? new InMemoryApprovalGateway();
241
- const maxLogRuns = options.maxLogRuns ?? 20;
242
-
243
- // Load any plugins declared in the pipeline config before preflight so that
244
- // drivers, completions, and middlewares referenced in YAML are registered.
245
- // Hosts that pre-load plugins from a custom path (e.g. the editor loading
246
- // from the user's workspace node_modules) pass skipPluginLoading: true so
247
- // we don't re-resolve via Node's cwd-based default import.
248
- if (!options.skipPluginLoading && config.plugins?.length) {
249
- await loadPlugins(config.plugins);
250
- }
251
-
252
- const dag = buildDag(config);
253
- const runId = options.runId ?? generateRunId();
254
- preflight(config, dag);
255
-
256
- const startedAt = nowISO();
257
- const pipelineInfo: PipelineInfo = { name: config.name, run_id: runId, started_at: startedAt };
258
- // Forward every structured log line to subscribers as task_log events.
259
- // Reading options.onEvent inside the callback (vs. capturing it once) keeps
260
- // the SDK behavior correct if callers pass a fresh onEvent on each run.
261
- const log = new Logger(workDir, runId, (record) => {
262
- options.onEvent?.({
263
- type: 'task_log',
264
- runId,
265
- taskId: record.taskId,
266
- level: record.level,
267
- timestamp: record.timestamp,
268
- text: record.text,
269
- });
270
- });
271
-
272
- try {
273
- log.info('[pipeline]', `start "${config.name}" run_id=${runId}`);
274
-
275
- // File-only: dump the resolved pipeline shape + DAG topology for post-mortem.
276
- log.section('Pipeline configuration');
277
- log.quiet(`name: ${config.name}`);
278
- log.quiet(`driver: ${config.driver ?? '(default: claude-code)'}`);
279
- log.quiet(`timeout: ${config.timeout ?? '(none)'}`);
280
- log.quiet(`tracks: ${config.tracks.length}`);
281
- log.quiet(`tasks (total): ${dag.nodes.size}`);
282
- log.quiet(`plugins: ${(config.plugins ?? []).join(', ') || '(none)'}`);
283
- log.quiet(
284
- `hooks: ${config.hooks ? Object.keys(config.hooks).join(', ') || '(none)' : '(none)'}`,
285
- );
286
-
287
- log.section('DAG topology');
288
- for (const [id, node] of dag.nodes) {
289
- const deps = node.dependsOn.length ? node.dependsOn.join(', ') : '(root)';
290
- const kind = node.task.prompt ? 'ai' : 'cmd';
291
- log.quiet(` • ${id} [${kind}] track=${node.track.id} deps=[${deps}]`);
292
- }
293
- log.quiet('');
294
-
295
- // Initialize states (before hook, so we can return them even if blocked)
296
- const states = new Map<string, TaskState>();
297
- for (const [id, node] of dag.nodes) {
298
- states.set(id, {
299
- config: node.task,
300
- trackConfig: node.track,
301
- status: 'idle',
302
- result: null,
303
- startedAt: null,
304
- finishedAt: null,
305
- });
306
- }
307
-
308
- // Pipeline start hook (gate)
309
- const startHook = await executeHook(
310
- config.hooks,
311
- 'pipeline_start',
312
- buildPipelineStartContext(pipelineInfo),
313
- workDir,
314
- );
315
- if (!startHook.allowed) {
316
- console.error(`Pipeline blocked by pipeline_start hook (exit code ${startHook.exitCode})`);
317
- await executeHook(
318
- config.hooks,
319
- 'pipeline_error',
320
- buildPipelineErrorContext(pipelineInfo, 'pipeline_blocked', 'pipeline_blocked'),
321
- workDir,
322
- );
323
- // All tasks stay idle — pipeline never started
324
- return {
325
- success: false,
326
- runId,
327
- logPath: log.path,
328
- summary: {
329
- total: dag.nodes.size,
330
- success: 0,
331
- failed: 0,
332
- skipped: 0,
333
- timeout: 0,
334
- blocked: 0,
335
- },
336
- states: freezeStates(states),
337
- };
338
- }
339
-
340
- // Pipeline approved — transition all tasks to waiting
341
- for (const [, state] of states) {
342
- state.status = 'waiting';
343
- }
344
- // Include a full states snapshot so listeners can initialize their mirrors without missing events
345
- const statesSnapshot: ReadonlyMap<string, TaskState> = new Map(
346
- [...states.entries()].map(([id, s]) => [id, { ...s }]),
347
- );
348
- options.onEvent?.({ type: 'pipeline_start', runId, states: statesSnapshot });
349
-
350
- const sessionMap = new Map<string, string>();
351
- const normalizedMap = new Map<string, string>();
352
-
353
- // Pipeline timeout
354
- const pipelineTimeoutMs = config.timeout ? parseDuration(config.timeout) : 0;
355
- let pipelineAborted = false;
356
- const abortController = new AbortController();
357
- let pipelineTimer: ReturnType<typeof setTimeout> | null = null;
358
-
359
- if (pipelineTimeoutMs > 0) {
360
- pipelineTimer = setTimeout(() => {
361
- pipelineAborted = true;
362
- abortController.abort();
363
- }, pipelineTimeoutMs);
364
- }
365
-
366
- // When the pipeline is aborted (timeout, external shutdown), drain all
367
- // pending approvals so waiting triggers unblock immediately.
368
- abortController.signal.addEventListener('abort', () => {
369
- approvalGateway.abortAll('pipeline aborted');
370
- });
371
-
372
- // Wire external cancel signal into the internal abort controller.
373
- const externalAbortHandler = () => {
374
- pipelineAborted = true;
375
- abortController.abort();
376
- };
377
- if (options.signal) {
378
- if (options.signal.aborted) {
379
- externalAbortHandler();
380
- } else {
381
- options.signal.addEventListener('abort', externalAbortHandler, { once: true });
382
- }
383
- }
384
-
385
- // ── Helpers ──
386
-
387
- function emit(event: PipelineEvent): void {
388
- options.onEvent?.(event);
389
- }
390
-
391
- function setTaskStatus(taskId: string, newStatus: TaskStatus): void {
392
- const state = states.get(taskId)!;
393
- // Terminal lock: once a task reaches a terminal state it must not be
394
- // re-transitioned. This prevents stop_all from marking running tasks as
395
- // skipped and then having their in-flight processTask promise overwrite
396
- // that with success/failed, producing an invalid double transition.
397
- if (isTerminal(state.status)) return;
398
- const prevStatus = state.status;
399
- state.status = newStatus;
400
- // Snapshot state at emit time — result and finishedAt must be set before calling this for terminal statuses
401
- const snapshot: TaskState = {
402
- config: state.config,
403
- trackConfig: state.trackConfig,
404
- status: state.status,
405
- result: state.result,
406
- startedAt: state.startedAt,
407
- finishedAt: state.finishedAt,
408
- };
409
- emit({
410
- type: 'task_status_change',
411
- taskId,
412
- status: newStatus,
413
- prevStatus,
414
- runId,
415
- state: snapshot,
416
- });
417
- }
418
-
419
- function getOnFailure(taskId: string): OnFailure {
420
- return dag.nodes.get(taskId)?.track.on_failure ?? 'skip_downstream';
421
- }
422
-
423
- function isDependencySatisfied(depId: string): 'satisfied' | 'unsatisfied' | 'skip' {
424
- const depState = states.get(depId);
425
- if (!depState) return 'skip';
426
- switch (depState.status) {
427
- case 'success':
428
- return 'satisfied';
429
- case 'skipped':
430
- return 'skip';
431
- case 'failed':
432
- case 'timeout':
433
- case 'blocked':
434
- return getOnFailure(depId) === 'ignore' ? 'satisfied' : 'skip';
435
- default:
436
- return 'unsatisfied';
437
- }
438
- }
439
-
440
- /**
441
- * H3: "stop_all" historically only stopped tasks within the same track,
442
- * which contradicted both its name and user expectations. It now stops
443
- * the **entire pipeline**:
444
- * - In-flight tasks are signalled via the shared abort controller so
445
- * drivers / runner.ts can cancel cooperatively (returning
446
- * `failureKind: 'timeout'`).
447
- * - Still-waiting tasks across every track are immediately marked
448
- * skipped so the run completes promptly.
449
- * The terminal lock in setTaskStatus prevents any later re-transition
450
- * should a completed running task try to overwrite the skipped state.
451
- */
452
- function applyStopAll(_failedTrackId: string): void {
453
- pipelineAborted = true;
454
- abortController.abort();
455
- for (const [id, state] of states) {
456
- if (state.status === 'waiting') {
457
- state.finishedAt = nowISO();
458
- setTaskStatus(id, 'skipped');
459
- }
460
- }
461
- }
462
-
463
- function buildTaskInfoObj(taskId: string): TaskInfo {
464
- const state = states.get(taskId)!;
465
- return {
466
- id: taskId,
467
- name: state.config.name,
468
- type: state.config.prompt ? 'ai' : 'command',
469
- status: state.status,
470
- exit_code: state.result?.exitCode ?? null,
471
- duration_ms: state.result?.durationMs ?? null,
472
- stderr_path: state.result?.stderrPath ?? null,
473
- session_id: state.result?.sessionId ?? null,
474
- started_at: state.startedAt,
475
- finished_at: state.finishedAt,
476
- };
477
- }
478
-
479
- function trackInfoOf(taskId: string): TrackInfo {
480
- const node = dag.nodes.get(taskId)!;
481
- return { id: node.track.id, name: node.track.name };
482
- }
483
-
484
- async function fireHook(taskId: string, event: 'task_success' | 'task_failure'): Promise<void> {
485
- await executeHook(
486
- config.hooks,
487
- event,
488
- buildTaskContext(event, pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)),
489
- workDir,
490
- abortController.signal,
491
- );
492
- }
493
-
494
- // ── Process a single task ──
495
-
496
- async function processTask(taskId: string): Promise<void> {
497
- const state = states.get(taskId)!;
498
- const node = dag.nodes.get(taskId)!;
499
- const task = node.task;
500
- const track = node.track;
501
-
502
- log.section(`Task ${taskId}`, taskId);
503
- log.debug(
504
- `[task:${taskId}]`,
505
- `type=${task.prompt ? 'ai' : 'cmd'} track=${track.id} deps=[${node.dependsOn.join(', ') || '(root)'}]`,
506
- );
507
-
508
- // 1. Check dependencies
509
- for (const depId of node.dependsOn) {
510
- const result = isDependencySatisfied(depId);
511
- if (result === 'skip') {
512
- const depStatus = states.get(depId)?.status ?? 'unknown';
513
- log.debug(`[task:${taskId}]`, `skipped (upstream "${depId}" status=${depStatus})`);
514
- state.finishedAt = nowISO();
515
- setTaskStatus(taskId, 'skipped');
516
- return;
517
- }
518
- if (result === 'unsatisfied') return; // still waiting
519
- }
520
-
521
- // 2. Check trigger
522
- if (task.trigger) {
523
- log.debug(
524
- `[task:${taskId}]`,
525
- `trigger wait: type=${task.trigger.type} ${JSON.stringify(task.trigger)}`,
526
- );
527
- try {
528
- const triggerPlugin = getHandler<TriggerPlugin>('triggers', task.trigger.type);
529
- // R6: race the plugin's watch() against the pipeline's abort signal.
530
- // Third-party triggers may forget to wire up ctx.signal — without
531
- // this race, an aborted pipeline would hang forever waiting for the
532
- // plugin's watch promise to resolve. The race resolves on whichever
533
- // path settles first, and the cleanup paths in finally never run on
534
- // the orphaned plugin promise (it's allowed to leak a watcher; the
535
- // pipeline is being torn down anyway).
536
- await new Promise<unknown>((resolve, reject) => {
537
- let settled = false;
538
- const onAbort = () => {
539
- if (settled) return;
540
- settled = true;
541
- abortController.signal.removeEventListener('abort', onAbort);
542
- reject(new Error('Pipeline aborted'));
543
- };
544
- if (abortController.signal.aborted) {
545
- onAbort();
546
- return;
547
- }
548
- abortController.signal.addEventListener('abort', onAbort, { once: true });
549
- triggerPlugin
550
- .watch(task.trigger as Record<string, unknown>, {
551
- taskId: node.taskId,
552
- trackId: track.id,
553
- workDir: task.cwd ?? workDir,
554
- signal: abortController.signal,
555
- approvalGateway,
556
- })
557
- .then(
558
- (v) => {
559
- if (settled) return;
560
- settled = true;
561
- abortController.signal.removeEventListener('abort', onAbort);
562
- resolve(v);
563
- },
564
- (e) => {
565
- if (settled) return;
566
- settled = true;
567
- abortController.signal.removeEventListener('abort', onAbort);
568
- reject(e);
569
- },
570
- );
571
- });
572
- log.debug(`[task:${taskId}]`, `trigger fired`);
573
- } catch (err: unknown) {
574
- // If pipeline was aborted while we were still waiting for the trigger,
575
- // this task never entered running state → skipped, not timeout.
576
- state.finishedAt = nowISO();
577
- if (pipelineAborted) {
578
- setTaskStatus(taskId, 'skipped');
579
- } else if (err instanceof TriggerBlockedError) {
580
- setTaskStatus(taskId, 'blocked'); // user/policy rejection
581
- } else if (err instanceof TriggerTimeoutError) {
582
- setTaskStatus(taskId, 'timeout'); // genuine trigger wait timeout
583
- } else {
584
- // A7 fallback: also check message strings for backward-compat with
585
- // third-party trigger plugins that don't throw typed errors yet.
586
- const msg = err instanceof Error ? err.message : String(err);
587
- if (msg.includes('rejected') || msg.includes('denied')) {
588
- setTaskStatus(taskId, 'blocked');
589
- } else if (msg.includes('timeout')) {
590
- setTaskStatus(taskId, 'timeout');
591
- } else {
592
- setTaskStatus(taskId, 'failed'); // plugin error, watcher crash, etc.
593
- }
594
- }
595
- try {
596
- await fireHook(taskId, 'task_failure');
597
- } catch (hookErr) {
598
- log.error(
599
- `[task:${taskId}]`,
600
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
601
- );
602
- }
603
- return;
604
- }
605
- }
606
-
607
- // 3. task_start hook (gate)
608
- const hookResult = await executeHook(
609
- config.hooks,
610
- 'task_start',
611
- buildTaskContext('task_start', pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)),
612
- workDir,
613
- abortController.signal,
614
- );
615
- if (hookResult.exitCode !== 0 || config.hooks?.task_start) {
616
- log.debug(
617
- `[task:${taskId}]`,
618
- `task_start hook exit=${hookResult.exitCode} allowed=${hookResult.allowed}`,
619
- );
620
- }
621
- if (!hookResult.allowed) {
622
- state.finishedAt = nowISO();
623
- setTaskStatus(taskId, 'blocked');
624
- try {
625
- await fireHook(taskId, 'task_failure');
626
- } catch (hookErr) {
627
- log.error(
628
- `[task:${taskId}]`,
629
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
630
- );
631
- }
632
- return;
633
- }
634
-
635
- // 4. Mark running — set startedAt before emitting so subscribers see a
636
- // complete snapshot (startedAt non-null) in the task_status_change event.
637
- state.startedAt = nowISO();
638
- setTaskStatus(taskId, 'running');
639
- log.info(
640
- `[task:${taskId}]`,
641
- task.command ? `running: ${task.command}` : `running (driver task)`,
642
- );
643
-
644
- // File-only: resolved config for this task
645
- const resolvedDriver = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
646
- const resolvedModel = task.model ?? track.model ?? config.model ?? '(default)';
647
- const resolvedPerms = task.permissions ?? track.permissions ?? '(default)';
648
- const resolvedCwd = task.cwd ?? track.cwd ?? workDir;
649
- log.debug(
650
- `[task:${taskId}]`,
651
- `resolved: driver=${resolvedDriver} model=${resolvedModel} cwd=${resolvedCwd}`,
652
- );
653
- log.debug(`[task:${taskId}]`, `permissions: ${JSON.stringify(resolvedPerms)}`);
654
- if (task.continue_from) {
655
- log.debug(`[task:${taskId}]`, `continue_from: "${task.continue_from}"`);
656
- }
657
- if (task.timeout) {
658
- log.debug(`[task:${taskId}]`, `timeout: ${task.timeout}`);
659
- }
660
-
661
- try {
662
- let result: TaskResult;
663
- const timeoutMs = task.timeout ? parseDuration(task.timeout) : undefined;
664
-
665
- const runOpts = { timeoutMs, signal: abortController.signal };
666
-
667
- if (task.command) {
668
- log.debug(`[task:${taskId}]`, `command: ${task.command}`);
669
- result = await runCommand(task.command, task.cwd ?? workDir, runOpts);
670
- } else {
671
- // AI task: apply middleware chain
672
- const driverName = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
673
- const driver = getHandler<DriverPlugin>('drivers', driverName);
674
-
675
- let prompt = task.prompt!;
676
- const originalLen = prompt.length;
677
- const mws = task.middlewares !== undefined ? task.middlewares : track.middlewares;
678
- if (mws && mws.length > 0) {
679
- log.debug(
680
- `[task:${taskId}]`,
681
- `middleware chain: ${mws.map((m) => m.type).join(' ')}`,
682
- );
683
- const mwCtx: MiddlewareContext = {
684
- task,
685
- track,
686
- workDir: task.cwd ?? workDir,
687
- };
688
- for (const mwConfig of mws) {
689
- const before = prompt.length;
690
- const mwPlugin = getHandler<MiddlewarePlugin>('middlewares', mwConfig.type);
691
- const next = await mwPlugin.enhance(
692
- prompt,
693
- mwConfig as Record<string, unknown>,
694
- mwCtx,
695
- );
696
- // R3: a middleware that returns undefined / null / a non-string
697
- // would silently corrupt the prompt sent to the driver. Fail loud
698
- // here so the user sees "middleware X.enhance returned ..." in the
699
- // task log instead of "[object Object]" arriving at the model.
700
- if (typeof next !== 'string') {
701
- throw new Error(
702
- `middleware "${mwConfig.type}".enhance() returned ${next === null ? 'null' : typeof next}, expected string`,
703
- );
704
- }
705
- prompt = next;
706
- log.debug(
707
- `[task:${taskId}]`,
708
- ` ${mwConfig.type}: ${before} ${prompt.length} chars`,
709
- );
710
- }
711
- }
712
- log.debug(
713
- `[task:${taskId}]`,
714
- `prompt: ${originalLen} chars (final: ${prompt.length} chars)`,
715
- );
716
- log.quiet(`--- prompt (final) ---\n${clip(prompt)}\n--- end prompt ---`, taskId);
717
-
718
- // H1: hand the driver a continue_from that has already been
719
- // qualified by dag.ts. Without this, drivers like codex/opencode/
720
- // claude-code look up maps directly with
721
- // the user's raw (possibly bare) string, which races whenever two
722
- // tracks share a task name. dag.ts has the only authoritative
723
- // resolver, so we use its precomputed answer here.
724
- const enrichedTask: TaskConfig = {
725
- ...task,
726
- prompt,
727
- continue_from: node.resolvedContinueFrom ?? task.continue_from,
728
- };
729
- const driverCtx: DriverContext = {
730
- sessionMap,
731
- normalizedMap,
732
- workDir: task.cwd ?? workDir,
733
- };
734
- const spec = await driver.buildCommand(enrichedTask, track, driverCtx);
735
- log.debug(`[task:${taskId}]`, `driver=${driverName}`);
736
- log.debug(`[task:${taskId}]`, `spawn args: ${JSON.stringify(spec.args)}`);
737
- if (spec.cwd) log.debug(`[task:${taskId}]`, `spawn cwd: ${spec.cwd}`);
738
- if (spec.env)
739
- log.debug(
740
- `[task:${taskId}]`,
741
- `spawn env overrides: ${Object.keys(spec.env).join(', ')}`,
742
- );
743
- if (spec.stdin) log.debug(`[task:${taskId}]`, `spawn stdin: ${spec.stdin.length} chars`);
744
- result = await runSpawn(spec, driver, runOpts);
745
- }
746
-
747
- // 6. Determine terminal status (without emitting yet — result must be complete first)
748
- // H2: branch on failureKind so spawn errors no longer masquerade as
749
- // timeouts. Old runners that don't set failureKind still work — we
750
- // fall back to the historical `exitCode === -1 timeout` heuristic so
751
- // pre-existing third-party drivers don't regress.
752
- let terminalStatus: TaskStatus;
753
- const kind = result.failureKind;
754
- if (kind === 'timeout') {
755
- terminalStatus = 'timeout';
756
- } else if (kind === 'spawn_error') {
757
- terminalStatus = 'failed';
758
- } else if (kind === undefined && result.exitCode === -1) {
759
- // Legacy path: pre-H2 driver returned -1 with no kind. Treat as
760
- // timeout for backward compatibility (the previous behaviour).
761
- terminalStatus = 'timeout';
762
- } else if (result.exitCode !== 0) {
763
- terminalStatus = 'failed';
764
- } else if (task.completion) {
765
- const plugin = getHandler<CompletionPlugin>('completions', task.completion.type);
766
- const completionCtx = { workDir: task.cwd ?? workDir, signal: abortController.signal };
767
- const passed = await plugin.check(
768
- task.completion as Record<string, unknown>,
769
- result,
770
- completionCtx,
771
- );
772
- // R4: strict boolean check. Truthy strings/numbers used to be coerced
773
- // to success a check returning "ok" would let a failing task pass.
774
- if (typeof passed !== 'boolean') {
775
- throw new Error(
776
- `completion "${task.completion.type}".check() returned ${passed === null ? 'null' : typeof passed}, expected boolean`,
777
- );
778
- }
779
- terminalStatus = passed ? 'success' : 'failed';
780
- } else {
781
- terminalStatus = 'success';
782
- }
783
-
784
- // Store normalized text separately (in-memory) for continue_from handoff.
785
- // R15: clip oversized values so a runaway parseResult can't accumulate
786
- // hundreds of MB across tasks.
787
- if (result.normalizedOutput !== null) {
788
- const clipped =
789
- result.normalizedOutput.length > MAX_NORMALIZED_BYTES
790
- ? result.normalizedOutput.slice(0, MAX_NORMALIZED_BYTES) +
791
- `\n[…clipped at ${MAX_NORMALIZED_BYTES} bytes]`
792
- : result.normalizedOutput;
793
- normalizedMap.set(taskId, clipped);
794
- }
795
-
796
- if (result.stderr) {
797
- const stderrPath = resolve(log.dir, `${taskId.replace(/\./g, '_')}.stderr`);
798
- await Bun.write(stderrPath, result.stderr);
799
- result = { ...result, stderrPath };
800
- }
801
-
802
- if (result.sessionId) {
803
- // H1: qualified-only key.
804
- sessionMap.set(taskId, result.sessionId);
805
- }
806
-
807
- // Set result and finishedAt before emitting terminal status so listeners see complete state
808
- state.result = result;
809
- state.finishedAt = nowISO();
810
- setTaskStatus(taskId, terminalStatus);
811
-
812
- // Log task outcome with relevant details
813
- const durSec = (result.durationMs / 1000).toFixed(1);
814
- if (terminalStatus === 'success') {
815
- log.info(`[task:${taskId}]`, `success (${durSec}s)`);
816
- } else {
817
- log.error(
818
- `[task:${taskId}]`,
819
- `${terminalStatus} exit=${result.exitCode} duration=${durSec}s`,
820
- );
821
- if (result.stderr) {
822
- const tail = tailLines(result.stderr, 10);
823
- log.error(`[task:${taskId}]`, `stderr tail:\n${tail}`);
824
- }
825
- }
826
-
827
- // File-only: full stdout/stderr dump (clipped) + extracted metadata
828
- log.debug(
829
- `[task:${taskId}]`,
830
- `stdout: ${result.stdout.length} chars, stderr: ${result.stderr.length} chars`,
831
- );
832
- if (result.sessionId) {
833
- log.debug(`[task:${taskId}]`, `sessionId: ${result.sessionId}`);
834
- }
835
- if (result.stderrPath) {
836
- log.debug(`[task:${taskId}]`, `wrote stderr: ${result.stderrPath}`);
837
- }
838
- if (result.stdout) {
839
- log.quiet(
840
- `--- stdout (${taskId}) ---\n${clip(result.stdout)}\n--- end stdout ---`,
841
- taskId,
842
- );
843
- }
844
- if (result.stderr) {
845
- log.quiet(
846
- `--- stderr (${taskId}) ---\n${clip(result.stderr)}\n--- end stderr ---`,
847
- taskId,
848
- );
849
- }
850
- if (task.completion) {
851
- log.debug(
852
- `[task:${taskId}]`,
853
- `completion check: type=${task.completion.type} result=${terminalStatus}`,
854
- );
855
- }
856
- } catch (err: unknown) {
857
- const errMsg = err instanceof Error ? (err.stack ?? err.message) : String(err);
858
- log.error(`[task:${taskId}]`, `failed before execution: ${errMsg}`);
859
- state.result = {
860
- exitCode: -1,
861
- stdout: '',
862
- stderr: errMsg,
863
- stderrPath: null,
864
- durationMs: 0,
865
- sessionId: null,
866
- normalizedOutput: null,
867
- // H2: Engine-level pre-execution errors (driver throw, middleware
868
- // throw, getHandler 404) classify as spawn_error — the process never
869
- // ran, so calling them "timeout" was actively misleading.
870
- failureKind: 'spawn_error',
871
- };
872
- state.finishedAt = nowISO();
873
- setTaskStatus(taskId, 'failed');
874
- }
875
-
876
- // 7. Fire hooks
877
- const finalStatus: TaskStatus = state.status;
878
- try {
879
- await fireHook(taskId, finalStatus === 'success' ? 'task_success' : 'task_failure');
880
- } catch (hookErr) {
881
- log.error(
882
- `[task:${taskId}]`,
883
- `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
884
- );
885
- }
886
-
887
- // 8. Handle stop_all for failure states
888
- if (finalStatus !== 'success' && getOnFailure(taskId) === 'stop_all') {
889
- applyStopAll(node.track.id);
890
- }
891
- }
892
-
893
- // ── Event loop ──
894
- // Each task is launched as soon as ALL its deps reach a terminal state.
895
- // We track in-flight tasks in `running` so a task completing mid-batch
896
- // immediately unblocks its dependents without waiting for sibling tasks.
897
- const running = new Map<string, Promise<void>>();
898
-
899
- try {
900
- while (!pipelineAborted) {
901
- // Launch every task whose deps are all terminal and that isn't already in-flight
902
- for (const [id, state] of states) {
903
- if (state.status !== 'waiting' || running.has(id)) continue;
904
- const node = dag.nodes.get(id)!;
905
- const allDepsTerminal =
906
- node.dependsOn.length === 0 ||
907
- node.dependsOn.every((d) => isTerminal(states.get(d)!.status));
908
- if (!allDepsTerminal) continue;
909
- const p = processTask(id).finally(() => running.delete(id));
910
- running.set(id, p);
911
- }
912
-
913
- // All tasks terminal — done
914
- if ([...states.values()].every((s) => isTerminal(s.status))) break;
915
-
916
- if (running.size === 0) {
917
- // Nothing in-flight but non-terminal tasks exist (e.g. trigger-wait states
918
- // that processTask hasn't been called for yet). Poll briefly.
919
- await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
920
- } else {
921
- // Wait for any one task to finish, then re-scan for new launchables.
922
- await Promise.race(running.values());
923
- }
924
- }
925
-
926
- if (pipelineAborted) {
927
- // Wait for in-flight tasks to honour the abort signal before marking states.
928
- if (running.size > 0) await Promise.allSettled(running.values());
929
- for (const [id, state] of states) {
930
- if (!isTerminal(state.status)) {
931
- // By the time allSettled resolves, processTask's try/finally has already
932
- // set running tasks to success/failed/timeout. The only non-terminal
933
- // statuses remaining here are waiting/idle tasks that were never started.
934
- state.finishedAt = nowISO();
935
- setTaskStatus(id, 'skipped');
936
- }
937
- }
938
- }
939
- } finally {
940
- if (pipelineTimer) clearTimeout(pipelineTimer);
941
- // Clean up the external abort signal listener to prevent dead references
942
- // accumulating on long-lived shared AbortControllers.
943
- if (options.signal) {
944
- options.signal.removeEventListener('abort', externalAbortHandler);
945
- }
946
- // Safety net: drain any approvals still pending at shutdown (e.g. crash path).
947
- if (approvalGateway.pending().length > 0) {
948
- approvalGateway.abortAll('pipeline finished');
949
- }
950
- }
951
-
952
- // ── Summary ──
953
- const summary = { total: 0, success: 0, failed: 0, skipped: 0, timeout: 0, blocked: 0 };
954
- for (const [, state] of states) {
955
- summary.total++;
956
- switch (state.status) {
957
- case 'success':
958
- summary.success++;
959
- break;
960
- case 'failed':
961
- summary.failed++;
962
- break;
963
- case 'skipped':
964
- summary.skipped++;
965
- break;
966
- case 'timeout':
967
- summary.timeout++;
968
- break;
969
- case 'blocked':
970
- summary.blocked++;
971
- break;
972
- }
973
- }
974
-
975
- const finishedAt = nowISO();
976
- const durationMs = new Date(finishedAt).getTime() - new Date(startedAt).getTime();
977
-
978
- if (pipelineAborted) {
979
- await executeHook(
980
- config.hooks,
981
- 'pipeline_error',
982
- buildPipelineErrorContext(pipelineInfo, 'Pipeline timeout exceeded'),
983
- workDir,
984
- );
985
- } else {
986
- await executeHook(
987
- config.hooks,
988
- 'pipeline_complete',
989
- buildPipelineCompleteContext(
990
- { ...pipelineInfo, finished_at: finishedAt, duration_ms: durationMs },
991
- summary,
992
- ),
993
- workDir,
994
- );
995
- }
996
-
997
- const allSuccess =
998
- !pipelineAborted && summary.failed === 0 && summary.timeout === 0 && summary.blocked === 0;
999
-
1000
- log.section('Pipeline summary');
1001
- log.quiet(`status: ${pipelineAborted ? 'aborted (timeout)' : 'completed'}`);
1002
- log.quiet(`duration: ${(durationMs / 1000).toFixed(1)}s`);
1003
- log.quiet(
1004
- `counts: total=${summary.total} success=${summary.success} ` +
1005
- `failed=${summary.failed} skipped=${summary.skipped} ` +
1006
- `timeout=${summary.timeout} blocked=${summary.blocked}`,
1007
- );
1008
- log.quiet('');
1009
- log.quiet('per-task:');
1010
- for (const [id, state] of states) {
1011
- const dur =
1012
- state.result?.durationMs != null ? `${(state.result.durationMs / 1000).toFixed(1)}s` : '-';
1013
- const exit = state.result?.exitCode ?? '-';
1014
- log.quiet(` ${state.status.padEnd(8)} ${id} (exit=${exit}, ${dur})`);
1015
- }
1016
-
1017
- log.info('[pipeline]', `completed "${config.name}"`);
1018
- log.info(
1019
- '[pipeline]',
1020
- `Total: ${summary.total} | Success: ${summary.success} | Failed: ${summary.failed} | Skipped: ${summary.skipped} | Timeout: ${summary.timeout} | Blocked: ${summary.blocked}`,
1021
- );
1022
- log.info('[pipeline]', `Duration: ${(durationMs / 1000).toFixed(1)}s`);
1023
- log.info('[pipeline]', `Log: ${log.path}`);
1024
-
1025
- emit({ type: 'pipeline_end', runId, success: allSuccess });
1026
- return { success: allSuccess, runId, logPath: log.path, summary, states: freezeStates(states) };
1027
- } finally {
1028
- // Close the persistent log file handle before pruning.
1029
- log.close();
1030
- // Prune old per-run log directories on every exit path (normal, blocked, or thrown).
1031
- // Exclude the current runId so a concurrent run cannot delete its own live directory.
1032
- if (maxLogRuns > 0) {
1033
- await pruneLogDirs(resolve(workDir, '.tagma', 'logs'), maxLogRuns, runId);
1034
- }
1035
- }
1036
- }
1037
-
1038
- /**
1039
- * Delete the oldest subdirectories under `logsDir`, keeping only the most recent `keep`
1040
- * total runs (including the currently-live run identified by `excludeRunId`).
1041
- * Directories are sorted lexicographically; because runIds are prefixed with a base-36
1042
- * timestamp, lexicographic order equals chronological order.
1043
- *
1044
- * `excludeRunId` is always skipped from deletion even if it would otherwise be pruned —
1045
- * this prevents a concurrent run from removing a live log directory that is still in use.
1046
- *
1047
- * D10: The live run occupies one slot out of `keep`, so the maximum number of
1048
- * *historical* dirs to retain is `keep - 1`. Without this adjustment the function
1049
- * kept `keep` historical dirs plus 1 live dir = `keep + 1` total on disk.
1050
- */
1051
- async function pruneLogDirs(logsDir: string, keep: number, excludeRunId: string): Promise<void> {
1052
- let entries: string[];
1053
- try {
1054
- entries = await readdir(logsDir);
1055
- } catch {
1056
- return; // logsDir doesn't exist yet — nothing to prune
1057
- }
1058
-
1059
- // Only consider directories that look like run IDs (run_<...>), excluding the live run.
1060
- const runDirs = entries.filter((e) => e.startsWith('run_') && e !== excludeRunId).sort();
1061
- // keep - 1 historical slots (1 slot is reserved for the live excludeRunId).
1062
- const historyKeep = Math.max(0, keep - 1);
1063
- const toDelete = runDirs.slice(0, Math.max(0, runDirs.length - historyKeep));
1064
-
1065
- await Promise.all(
1066
- toDelete.map((dir) =>
1067
- rm(resolve(logsDir, dir), { recursive: true, force: true }).catch(() => {
1068
- // Ignore deletion errors — stale dirs are better than a crash
1069
- }),
1070
- ),
1071
- );
1072
- }
1073
-
1074
- function isTerminal(status: TaskStatus): boolean {
1075
- return (
1076
- status === 'success' ||
1077
- status === 'failed' ||
1078
- status === 'timeout' ||
1079
- status === 'skipped' ||
1080
- status === 'blocked'
1081
- );
1082
- }
1083
-
1084
- /** Return a deep-copied, caller-safe snapshot of the states map. */
1085
- function freezeStates(states: Map<string, TaskState>): ReadonlyMap<string, TaskState> {
1086
- const copy = new Map<string, TaskState>();
1087
- for (const [id, s] of states) {
1088
- copy.set(id, {
1089
- config: { ...s.config },
1090
- trackConfig: { ...s.trackConfig },
1091
- status: s.status,
1092
- result: s.result ? { ...s.result } : null,
1093
- startedAt: s.startedAt,
1094
- finishedAt: s.finishedAt,
1095
- });
1096
- }
1097
- return copy;
1098
- }
1
+ import { resolve } from 'path';
2
+ import { readdir, rm } from 'fs/promises';
3
+ import type {
4
+ PipelineConfig,
5
+ TaskConfig,
6
+ TaskState,
7
+ TaskStatus,
8
+ TaskResult,
9
+ DriverPlugin,
10
+ TriggerPlugin,
11
+ CompletionPlugin,
12
+ MiddlewarePlugin,
13
+ MiddlewareContext,
14
+ DriverContext,
15
+ OnFailure,
16
+ PromptDocument,
17
+ } from './types';
18
+ import { buildDag, type Dag } from './dag';
19
+ import { getHandler, hasHandler, loadPlugins } from './registry';
20
+ import { runSpawn, runCommand } from './runner';
21
+ import { parseDuration, nowISO, generateRunId } from './utils';
22
+ import { promptDocumentFromString, serializePromptDocument } from './prompt-doc';
23
+ import {
24
+ executeHook,
25
+ buildPipelineStartContext,
26
+ buildTaskContext,
27
+ buildPipelineCompleteContext,
28
+ buildPipelineErrorContext,
29
+ type PipelineInfo,
30
+ type TrackInfo,
31
+ type TaskInfo,
32
+ } from './hooks';
33
+ import { Logger, tailLines, clip, type LogLevel } from './logger';
34
+ import { InMemoryApprovalGateway, type ApprovalGateway } from './approval';
35
+
36
+ // ═══ A7: Typed trigger errors ═══
37
+ // Replace string-matching on error messages with structured error types so
38
+ // coincidental substrings don't cause misclassification.
39
+
40
+ export class TriggerBlockedError extends Error {
41
+ readonly code = 'TRIGGER_BLOCKED' as const;
42
+ constructor(message: string) {
43
+ super(message);
44
+ this.name = 'TriggerBlockedError';
45
+ }
46
+ }
47
+
48
+ export class TriggerTimeoutError extends Error {
49
+ readonly code = 'TRIGGER_TIMEOUT' as const;
50
+ constructor(message: string) {
51
+ super(message);
52
+ this.name = 'TriggerTimeoutError';
53
+ }
54
+ }
55
+
56
+ // ═══ Preflight Validation ═══
57
+
58
+ function preflight(config: PipelineConfig, dag: Dag): void {
59
+ const errors: string[] = [];
60
+
61
+ for (const [, node] of dag.nodes) {
62
+ const task = node.task;
63
+ const track = node.track;
64
+ const driverName = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
65
+
66
+ // Pure command tasks don't use a driver — skip driver registration check.
67
+ const isCommandOnly = task.command && !task.prompt;
68
+
69
+ if (!isCommandOnly && !hasHandler('drivers', driverName)) {
70
+ errors.push(`Task "${node.taskId}": driver "${driverName}" not registered`);
71
+ }
72
+
73
+ if (task.trigger && !hasHandler('triggers', task.trigger.type)) {
74
+ errors.push(`Task "${node.taskId}": trigger type "${task.trigger.type}" not registered`);
75
+ }
76
+
77
+ if (task.completion && !hasHandler('completions', task.completion.type)) {
78
+ errors.push(
79
+ `Task "${node.taskId}": completion type "${task.completion.type}" not registered`,
80
+ );
81
+ }
82
+
83
+ const mws = task.middlewares ?? track.middlewares ?? [];
84
+ for (const mw of mws) {
85
+ if (!hasHandler('middlewares', mw.type)) {
86
+ errors.push(`Task "${node.taskId}": middleware type "${mw.type}" not registered`);
87
+ }
88
+ }
89
+
90
+ if (task.continue_from && hasHandler('drivers', driverName)) {
91
+ const driver = getHandler<DriverPlugin>('drivers', driverName);
92
+ if (!driver.capabilities.sessionResume) {
93
+ // buildDag has already qualified `continue_from` and stored the result
94
+ // on the node; preflight runs after buildDag, so the upstream id is
95
+ // always available here without re-resolving.
96
+ const upstreamId = node.resolvedContinueFrom;
97
+ if (upstreamId) {
98
+ const upstream = dag.nodes.get(upstreamId);
99
+ if (upstream) {
100
+ // A handoff is possible via session resume (already ruled out above),
101
+ // OR in-memory text injection through normalizedMap
102
+ // (when the upstream driver implements parseResult and returns normalizedOutput).
103
+ const upstreamDriverName =
104
+ upstream.task.driver ?? upstream.track.driver ?? config.driver ?? 'claude-code';
105
+ const upstreamDriver = hasHandler('drivers', upstreamDriverName)
106
+ ? getHandler<DriverPlugin>('drivers', upstreamDriverName)
107
+ : null;
108
+ const canNormalize = typeof upstreamDriver?.parseResult === 'function';
109
+
110
+ if (!canNormalize) {
111
+ errors.push(
112
+ `Task "${node.taskId}" uses continue_from: "${task.continue_from}", ` +
113
+ `but upstream task "${upstreamId}" its driver ` +
114
+ `does not implement parseResult for text-injection handoff. ` +
115
+ `Use a driver with parseResult, or remove continue_from.`,
116
+ );
117
+ }
118
+ }
119
+ }
120
+ }
121
+ }
122
+ }
123
+
124
+ if (errors.length > 0) {
125
+ throw new Error(`Preflight validation failed:\n - ${errors.join('\n - ')}`);
126
+ }
127
+ }
128
+
129
+ // ═══ Engine ═══
130
+
131
+ export interface EngineResult {
132
+ readonly success: boolean;
133
+ readonly runId: string;
134
+ readonly logPath: string;
135
+ readonly summary: {
136
+ total: number;
137
+ success: number;
138
+ failed: number;
139
+ skipped: number;
140
+ timeout: number;
141
+ blocked: number;
142
+ };
143
+ readonly states: ReadonlyMap<string, TaskState>;
144
+ }
145
+
146
+ // ═══ Pipeline Events ═══
147
+
148
+ export type PipelineEvent =
149
+ | {
150
+ readonly type: 'task_status_change';
151
+ readonly taskId: string;
152
+ readonly status: TaskStatus;
153
+ readonly prevStatus: TaskStatus;
154
+ readonly runId: string;
155
+ readonly state: TaskState;
156
+ }
157
+ | {
158
+ readonly type: 'pipeline_start';
159
+ readonly runId: string;
160
+ readonly states: ReadonlyMap<string, TaskState>;
161
+ }
162
+ | { readonly type: 'pipeline_end'; readonly runId: string; readonly success: boolean }
163
+ /**
164
+ * Fine-grained log line emitted alongside every write to pipeline.log.
165
+ * Consumers use this to stream the full run process into UIs without
166
+ * tailing the log file. `taskId` is non-null for task-scoped lines and
167
+ * null for pipeline-wide messages (e.g. configuration dumps, DAG
168
+ * topology, pipeline start/end).
169
+ */
170
+ | {
171
+ readonly type: 'task_log';
172
+ readonly runId: string;
173
+ readonly taskId: string | null;
174
+ readonly level: LogLevel;
175
+ readonly timestamp: string;
176
+ readonly text: string;
177
+ };
178
+
179
+ export interface RunPipelineOptions {
180
+ readonly approvalGateway?: ApprovalGateway;
181
+ /**
182
+ * Maximum number of per-run log directories to retain under `<workDir>/.tagma/logs/`.
183
+ * Oldest directories are deleted after each run. Defaults to 20. Set to 0 to disable cleanup.
184
+ */
185
+ readonly maxLogRuns?: number;
186
+ /**
187
+ * Caller-supplied run ID. When provided the engine uses this instead of
188
+ * generating its own via `generateRunId()`, keeping the editor and SDK
189
+ * log directories aligned on the same ID.
190
+ */
191
+ readonly runId?: string;
192
+ /**
193
+ * External AbortSignal — aborting it cancels the pipeline immediately.
194
+ * Equivalent to the pipeline timeout firing, but caller-controlled.
195
+ */
196
+ readonly signal?: AbortSignal;
197
+ /**
198
+ * Called on every pipeline/task status transition.
199
+ * Use for real-time UI updates (e.g. updating a visual workflow graph).
200
+ */
201
+ readonly onEvent?: (event: PipelineEvent) => void;
202
+ /**
203
+ * Skip the engine's built-in `loadPlugins(config.plugins)` call.
204
+ * Use this when the host has already pre-loaded plugins from a custom
205
+ * resolution path (e.g. a user workspace's node_modules) so the engine
206
+ * doesn't re-resolve them via Node's default cwd-based import.
207
+ */
208
+ readonly skipPluginLoading?: boolean;
209
+ }
210
+
211
+ // Poll interval when no tasks are in-flight but non-terminal tasks remain
212
+ // (e.g. tasks waiting on a file or manual trigger).
213
+ const POLL_INTERVAL_MS = 50;
214
+
215
+ // R15: cap on each normalized-output entry stored in normalizedMap so a
216
+ // runaway parseResult can't accumulate hundreds of MB across tasks. 1 MB
217
+ // is generous for any text-context handoff between AI tasks.
218
+ const MAX_NORMALIZED_BYTES = 1_000_000;
219
+
220
+ export async function runPipeline(
221
+ config: PipelineConfig,
222
+ workDir: string,
223
+ options: RunPipelineOptions = {},
224
+ ): Promise<EngineResult> {
225
+ const approvalGateway = options.approvalGateway ?? new InMemoryApprovalGateway();
226
+ const maxLogRuns = options.maxLogRuns ?? 20;
227
+
228
+ // Load any plugins declared in the pipeline config before preflight so that
229
+ // drivers, completions, and middlewares referenced in YAML are registered.
230
+ // Hosts that pre-load plugins from a custom path (e.g. the editor loading
231
+ // from the user's workspace node_modules) pass skipPluginLoading: true so
232
+ // we don't re-resolve via Node's cwd-based default import.
233
+ if (!options.skipPluginLoading && config.plugins?.length) {
234
+ await loadPlugins(config.plugins);
235
+ }
236
+
237
+ const dag = buildDag(config);
238
+ const runId = options.runId ?? generateRunId();
239
+ preflight(config, dag);
240
+
241
+ const startedAt = nowISO();
242
+ const pipelineInfo: PipelineInfo = { name: config.name, run_id: runId, started_at: startedAt };
243
+ // Forward every structured log line to subscribers as task_log events.
244
+ // Reading options.onEvent inside the callback (vs. capturing it once) keeps
245
+ // the SDK behavior correct if callers pass a fresh onEvent on each run.
246
+ const log = new Logger(workDir, runId, (record) => {
247
+ options.onEvent?.({
248
+ type: 'task_log',
249
+ runId,
250
+ taskId: record.taskId,
251
+ level: record.level,
252
+ timestamp: record.timestamp,
253
+ text: record.text,
254
+ });
255
+ });
256
+
257
+ try {
258
+ log.info('[pipeline]', `start "${config.name}" run_id=${runId}`);
259
+
260
+ // File-only: dump the resolved pipeline shape + DAG topology for post-mortem.
261
+ log.section('Pipeline configuration');
262
+ log.quiet(`name: ${config.name}`);
263
+ log.quiet(`driver: ${config.driver ?? '(default: claude-code)'}`);
264
+ log.quiet(`timeout: ${config.timeout ?? '(none)'}`);
265
+ log.quiet(`tracks: ${config.tracks.length}`);
266
+ log.quiet(`tasks (total): ${dag.nodes.size}`);
267
+ log.quiet(`plugins: ${(config.plugins ?? []).join(', ') || '(none)'}`);
268
+ log.quiet(
269
+ `hooks: ${config.hooks ? Object.keys(config.hooks).join(', ') || '(none)' : '(none)'}`,
270
+ );
271
+
272
+ log.section('DAG topology');
273
+ for (const [id, node] of dag.nodes) {
274
+ const deps = node.dependsOn.length ? node.dependsOn.join(', ') : '(root)';
275
+ const kind = node.task.prompt ? 'ai' : 'cmd';
276
+ log.quiet(` • ${id} [${kind}] track=${node.track.id} deps=[${deps}]`);
277
+ }
278
+ log.quiet('');
279
+
280
+ // Initialize states (before hook, so we can return them even if blocked)
281
+ const states = new Map<string, TaskState>();
282
+ for (const [id, node] of dag.nodes) {
283
+ states.set(id, {
284
+ config: node.task,
285
+ trackConfig: node.track,
286
+ status: 'idle',
287
+ result: null,
288
+ startedAt: null,
289
+ finishedAt: null,
290
+ });
291
+ }
292
+
293
+ // Pipeline start hook (gate)
294
+ const startHook = await executeHook(
295
+ config.hooks,
296
+ 'pipeline_start',
297
+ buildPipelineStartContext(pipelineInfo),
298
+ workDir,
299
+ );
300
+ if (!startHook.allowed) {
301
+ console.error(`Pipeline blocked by pipeline_start hook (exit code ${startHook.exitCode})`);
302
+ await executeHook(
303
+ config.hooks,
304
+ 'pipeline_error',
305
+ buildPipelineErrorContext(pipelineInfo, 'pipeline_blocked', 'pipeline_blocked'),
306
+ workDir,
307
+ );
308
+ // All tasks stay idle — pipeline never started
309
+ return {
310
+ success: false,
311
+ runId,
312
+ logPath: log.path,
313
+ summary: {
314
+ total: dag.nodes.size,
315
+ success: 0,
316
+ failed: 0,
317
+ skipped: 0,
318
+ timeout: 0,
319
+ blocked: 0,
320
+ },
321
+ states: freezeStates(states),
322
+ };
323
+ }
324
+
325
+ // Pipeline approved — transition all tasks to waiting
326
+ for (const [, state] of states) {
327
+ state.status = 'waiting';
328
+ }
329
+ // Include a full states snapshot so listeners can initialize their mirrors without missing events
330
+ const statesSnapshot: ReadonlyMap<string, TaskState> = new Map(
331
+ [...states.entries()].map(([id, s]) => [id, { ...s }]),
332
+ );
333
+ options.onEvent?.({ type: 'pipeline_start', runId, states: statesSnapshot });
334
+
335
+ const sessionMap = new Map<string, string>();
336
+ const normalizedMap = new Map<string, string>();
337
+
338
+ // Pipeline timeout
339
+ const pipelineTimeoutMs = config.timeout ? parseDuration(config.timeout) : 0;
340
+ let pipelineAborted = false;
341
+ const abortController = new AbortController();
342
+ let pipelineTimer: ReturnType<typeof setTimeout> | null = null;
343
+
344
+ if (pipelineTimeoutMs > 0) {
345
+ pipelineTimer = setTimeout(() => {
346
+ pipelineAborted = true;
347
+ abortController.abort();
348
+ }, pipelineTimeoutMs);
349
+ }
350
+
351
+ // When the pipeline is aborted (timeout, external shutdown), drain all
352
+ // pending approvals so waiting triggers unblock immediately.
353
+ abortController.signal.addEventListener('abort', () => {
354
+ approvalGateway.abortAll('pipeline aborted');
355
+ });
356
+
357
+ // Wire external cancel signal into the internal abort controller.
358
+ const externalAbortHandler = () => {
359
+ pipelineAborted = true;
360
+ abortController.abort();
361
+ };
362
+ if (options.signal) {
363
+ if (options.signal.aborted) {
364
+ externalAbortHandler();
365
+ } else {
366
+ options.signal.addEventListener('abort', externalAbortHandler, { once: true });
367
+ }
368
+ }
369
+
370
+ // ── Helpers ──
371
+
372
+ function emit(event: PipelineEvent): void {
373
+ options.onEvent?.(event);
374
+ }
375
+
376
+ function setTaskStatus(taskId: string, newStatus: TaskStatus): void {
377
+ const state = states.get(taskId)!;
378
+ // Terminal lock: once a task reaches a terminal state it must not be
379
+ // re-transitioned. This prevents stop_all from marking running tasks as
380
+ // skipped and then having their in-flight processTask promise overwrite
381
+ // that with success/failed, producing an invalid double transition.
382
+ if (isTerminal(state.status)) return;
383
+ const prevStatus = state.status;
384
+ state.status = newStatus;
385
+ // Snapshot state at emit time — result and finishedAt must be set before calling this for terminal statuses
386
+ const snapshot: TaskState = {
387
+ config: state.config,
388
+ trackConfig: state.trackConfig,
389
+ status: state.status,
390
+ result: state.result,
391
+ startedAt: state.startedAt,
392
+ finishedAt: state.finishedAt,
393
+ };
394
+ emit({
395
+ type: 'task_status_change',
396
+ taskId,
397
+ status: newStatus,
398
+ prevStatus,
399
+ runId,
400
+ state: snapshot,
401
+ });
402
+ }
403
+
404
+ function getOnFailure(taskId: string): OnFailure {
405
+ return dag.nodes.get(taskId)?.track.on_failure ?? 'skip_downstream';
406
+ }
407
+
408
+ function isDependencySatisfied(depId: string): 'satisfied' | 'unsatisfied' | 'skip' {
409
+ const depState = states.get(depId);
410
+ if (!depState) return 'skip';
411
+ switch (depState.status) {
412
+ case 'success':
413
+ return 'satisfied';
414
+ case 'skipped':
415
+ return 'skip';
416
+ case 'failed':
417
+ case 'timeout':
418
+ case 'blocked':
419
+ return getOnFailure(depId) === 'ignore' ? 'satisfied' : 'skip';
420
+ default:
421
+ return 'unsatisfied';
422
+ }
423
+ }
424
+
425
+ /**
426
+ * H3: "stop_all" historically only stopped tasks within the same track,
427
+ * which contradicted both its name and user expectations. It now stops
428
+ * the **entire pipeline**:
429
+ * - In-flight tasks are signalled via the shared abort controller so
430
+ * drivers / runner.ts can cancel cooperatively (returning
431
+ * `failureKind: 'timeout'`).
432
+ * - Still-waiting tasks across every track are immediately marked
433
+ * skipped so the run completes promptly.
434
+ * The terminal lock in setTaskStatus prevents any later re-transition
435
+ * should a completed running task try to overwrite the skipped state.
436
+ */
437
+ function applyStopAll(_failedTrackId: string): void {
438
+ pipelineAborted = true;
439
+ abortController.abort();
440
+ for (const [id, state] of states) {
441
+ if (state.status === 'waiting') {
442
+ state.finishedAt = nowISO();
443
+ setTaskStatus(id, 'skipped');
444
+ }
445
+ }
446
+ }
447
+
448
+ function buildTaskInfoObj(taskId: string): TaskInfo {
449
+ const state = states.get(taskId)!;
450
+ return {
451
+ id: taskId,
452
+ name: state.config.name,
453
+ type: state.config.prompt ? 'ai' : 'command',
454
+ status: state.status,
455
+ exit_code: state.result?.exitCode ?? null,
456
+ duration_ms: state.result?.durationMs ?? null,
457
+ stderr_path: state.result?.stderrPath ?? null,
458
+ session_id: state.result?.sessionId ?? null,
459
+ started_at: state.startedAt,
460
+ finished_at: state.finishedAt,
461
+ };
462
+ }
463
+
464
+ function trackInfoOf(taskId: string): TrackInfo {
465
+ const node = dag.nodes.get(taskId)!;
466
+ return { id: node.track.id, name: node.track.name };
467
+ }
468
+
469
+ async function fireHook(taskId: string, event: 'task_success' | 'task_failure'): Promise<void> {
470
+ await executeHook(
471
+ config.hooks,
472
+ event,
473
+ buildTaskContext(event, pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)),
474
+ workDir,
475
+ abortController.signal,
476
+ );
477
+ }
478
+
479
+ // ── Process a single task ──
480
+
481
+ async function processTask(taskId: string): Promise<void> {
482
+ const state = states.get(taskId)!;
483
+ const node = dag.nodes.get(taskId)!;
484
+ const task = node.task;
485
+ const track = node.track;
486
+
487
+ log.section(`Task ${taskId}`, taskId);
488
+ log.debug(
489
+ `[task:${taskId}]`,
490
+ `type=${task.prompt ? 'ai' : 'cmd'} track=${track.id} deps=[${node.dependsOn.join(', ') || '(root)'}]`,
491
+ );
492
+
493
+ // 1. Check dependencies
494
+ for (const depId of node.dependsOn) {
495
+ const result = isDependencySatisfied(depId);
496
+ if (result === 'skip') {
497
+ const depStatus = states.get(depId)?.status ?? 'unknown';
498
+ log.debug(`[task:${taskId}]`, `skipped (upstream "${depId}" status=${depStatus})`);
499
+ state.finishedAt = nowISO();
500
+ setTaskStatus(taskId, 'skipped');
501
+ return;
502
+ }
503
+ if (result === 'unsatisfied') return; // still waiting
504
+ }
505
+
506
+ // 2. Check trigger
507
+ if (task.trigger) {
508
+ log.debug(
509
+ `[task:${taskId}]`,
510
+ `trigger wait: type=${task.trigger.type} ${JSON.stringify(task.trigger)}`,
511
+ );
512
+ try {
513
+ const triggerPlugin = getHandler<TriggerPlugin>('triggers', task.trigger.type);
514
+ // R6: race the plugin's watch() against the pipeline's abort signal.
515
+ // Third-party triggers may forget to wire up ctx.signal — without
516
+ // this race, an aborted pipeline would hang forever waiting for the
517
+ // plugin's watch promise to resolve. The race resolves on whichever
518
+ // path settles first, and the cleanup paths in finally never run on
519
+ // the orphaned plugin promise (it's allowed to leak a watcher; the
520
+ // pipeline is being torn down anyway).
521
+ await new Promise<unknown>((resolve, reject) => {
522
+ let settled = false;
523
+ const onAbort = () => {
524
+ if (settled) return;
525
+ settled = true;
526
+ abortController.signal.removeEventListener('abort', onAbort);
527
+ reject(new Error('Pipeline aborted'));
528
+ };
529
+ if (abortController.signal.aborted) {
530
+ onAbort();
531
+ return;
532
+ }
533
+ abortController.signal.addEventListener('abort', onAbort, { once: true });
534
+ triggerPlugin
535
+ .watch(task.trigger as Record<string, unknown>, {
536
+ taskId: node.taskId,
537
+ trackId: track.id,
538
+ workDir: task.cwd ?? workDir,
539
+ signal: abortController.signal,
540
+ approvalGateway,
541
+ })
542
+ .then(
543
+ (v) => {
544
+ if (settled) return;
545
+ settled = true;
546
+ abortController.signal.removeEventListener('abort', onAbort);
547
+ resolve(v);
548
+ },
549
+ (e) => {
550
+ if (settled) return;
551
+ settled = true;
552
+ abortController.signal.removeEventListener('abort', onAbort);
553
+ reject(e);
554
+ },
555
+ );
556
+ });
557
+ log.debug(`[task:${taskId}]`, `trigger fired`);
558
+ } catch (err: unknown) {
559
+ // If pipeline was aborted while we were still waiting for the trigger,
560
+ // this task never entered running state → skipped, not timeout.
561
+ state.finishedAt = nowISO();
562
+ if (pipelineAborted) {
563
+ setTaskStatus(taskId, 'skipped');
564
+ } else if (err instanceof TriggerBlockedError) {
565
+ setTaskStatus(taskId, 'blocked'); // user/policy rejection
566
+ } else if (err instanceof TriggerTimeoutError) {
567
+ setTaskStatus(taskId, 'timeout'); // genuine trigger wait timeout
568
+ } else {
569
+ // A7 fallback: also check message strings for backward-compat with
570
+ // third-party trigger plugins that don't throw typed errors yet.
571
+ const msg = err instanceof Error ? err.message : String(err);
572
+ if (msg.includes('rejected') || msg.includes('denied')) {
573
+ setTaskStatus(taskId, 'blocked');
574
+ } else if (msg.includes('timeout')) {
575
+ setTaskStatus(taskId, 'timeout');
576
+ } else {
577
+ setTaskStatus(taskId, 'failed'); // plugin error, watcher crash, etc.
578
+ }
579
+ }
580
+ try {
581
+ await fireHook(taskId, 'task_failure');
582
+ } catch (hookErr) {
583
+ log.error(
584
+ `[task:${taskId}]`,
585
+ `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
586
+ );
587
+ }
588
+ return;
589
+ }
590
+ }
591
+
592
+ // 3. task_start hook (gate)
593
+ const hookResult = await executeHook(
594
+ config.hooks,
595
+ 'task_start',
596
+ buildTaskContext('task_start', pipelineInfo, trackInfoOf(taskId), buildTaskInfoObj(taskId)),
597
+ workDir,
598
+ abortController.signal,
599
+ );
600
+ if (hookResult.exitCode !== 0 || config.hooks?.task_start) {
601
+ log.debug(
602
+ `[task:${taskId}]`,
603
+ `task_start hook exit=${hookResult.exitCode} allowed=${hookResult.allowed}`,
604
+ );
605
+ }
606
+ if (!hookResult.allowed) {
607
+ state.finishedAt = nowISO();
608
+ setTaskStatus(taskId, 'blocked');
609
+ try {
610
+ await fireHook(taskId, 'task_failure');
611
+ } catch (hookErr) {
612
+ log.error(
613
+ `[task:${taskId}]`,
614
+ `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
615
+ );
616
+ }
617
+ return;
618
+ }
619
+
620
+ // 4. Mark running — set startedAt before emitting so subscribers see a
621
+ // complete snapshot (startedAt non-null) in the task_status_change event.
622
+ state.startedAt = nowISO();
623
+ setTaskStatus(taskId, 'running');
624
+ log.info(
625
+ `[task:${taskId}]`,
626
+ task.command ? `running: ${task.command}` : `running (driver task)`,
627
+ );
628
+
629
+ // File-only: resolved config for this task
630
+ const resolvedDriver = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
631
+ const resolvedModel = task.model ?? track.model ?? config.model ?? '(default)';
632
+ const resolvedPerms = task.permissions ?? track.permissions ?? '(default)';
633
+ const resolvedCwd = task.cwd ?? track.cwd ?? workDir;
634
+ log.debug(
635
+ `[task:${taskId}]`,
636
+ `resolved: driver=${resolvedDriver} model=${resolvedModel} cwd=${resolvedCwd}`,
637
+ );
638
+ log.debug(`[task:${taskId}]`, `permissions: ${JSON.stringify(resolvedPerms)}`);
639
+ if (task.continue_from) {
640
+ log.debug(`[task:${taskId}]`, `continue_from: "${task.continue_from}"`);
641
+ }
642
+ if (task.timeout) {
643
+ log.debug(`[task:${taskId}]`, `timeout: ${task.timeout}`);
644
+ }
645
+
646
+ try {
647
+ let result: TaskResult;
648
+ const timeoutMs = task.timeout ? parseDuration(task.timeout) : undefined;
649
+
650
+ const runOpts = { timeoutMs, signal: abortController.signal };
651
+
652
+ if (task.command) {
653
+ log.debug(`[task:${taskId}]`, `command: ${task.command}`);
654
+ result = await runCommand(task.command, task.cwd ?? workDir, runOpts);
655
+ } else {
656
+ // AI task: apply middleware chain against a structured PromptDocument.
657
+ const driverName = task.driver ?? track.driver ?? config.driver ?? 'claude-code';
658
+ const driver = getHandler<DriverPlugin>('drivers', driverName);
659
+
660
+ const originalLen = task.prompt!.length;
661
+ let doc: PromptDocument = promptDocumentFromString(task.prompt!);
662
+ const mws = task.middlewares !== undefined ? task.middlewares : track.middlewares;
663
+ if (mws && mws.length > 0) {
664
+ log.debug(
665
+ `[task:${taskId}]`,
666
+ `middleware chain: ${mws.map((m) => m.type).join(' → ')}`,
667
+ );
668
+ const mwCtx: MiddlewareContext = {
669
+ task,
670
+ track,
671
+ workDir: task.cwd ?? workDir,
672
+ };
673
+ for (const mwConfig of mws) {
674
+ const mwPlugin = getHandler<MiddlewarePlugin>('middlewares', mwConfig.type);
675
+ const beforeBlocks = doc.contexts.length;
676
+ const beforeLen = serializePromptDocument(doc).length;
677
+
678
+ // Prefer the structured API. Fall back to the legacy
679
+ // `enhance(string) → string` path so v0.x plugins keep
680
+ // working — that fallback loses context structure (the
681
+ // middleware's output becomes the new task body) but never
682
+ // silently drops content.
683
+ if (typeof mwPlugin.enhanceDoc === 'function') {
684
+ const next = await mwPlugin.enhanceDoc(
685
+ doc,
686
+ mwConfig as Record<string, unknown>,
687
+ mwCtx,
688
+ );
689
+ if (
690
+ !next ||
691
+ typeof next !== 'object' ||
692
+ !Array.isArray((next as PromptDocument).contexts) ||
693
+ typeof (next as PromptDocument).task !== 'string'
694
+ ) {
695
+ throw new Error(
696
+ `middleware "${mwConfig.type}".enhanceDoc() returned a malformed PromptDocument`,
697
+ );
698
+ }
699
+ doc = next as PromptDocument;
700
+ } else if (typeof mwPlugin.enhance === 'function') {
701
+ const asString = serializePromptDocument(doc);
702
+ const next = await mwPlugin.enhance(
703
+ asString,
704
+ mwConfig as Record<string, unknown>,
705
+ mwCtx,
706
+ );
707
+ // R3: a middleware that returns undefined / null / a non-string
708
+ // would silently corrupt the prompt. Fail loud.
709
+ if (typeof next !== 'string') {
710
+ throw new Error(
711
+ `middleware "${mwConfig.type}".enhance() returned ${next === null ? 'null' : typeof next}, expected string`,
712
+ );
713
+ }
714
+ // Legacy fallback: collapse the returned string into a
715
+ // fresh doc. Earlier structure is folded into the string
716
+ // (serializePromptDocument just ran), so bytes the driver
717
+ // sees match the old string pipeline.
718
+ doc = { contexts: [], task: next };
719
+ } else {
720
+ throw new Error(
721
+ `middleware "${mwConfig.type}" provides neither enhanceDoc nor enhance`,
722
+ );
723
+ }
724
+ const afterLen = serializePromptDocument(doc).length;
725
+ const addedBlocks = doc.contexts.length - beforeBlocks;
726
+ log.debug(
727
+ `[task:${taskId}]`,
728
+ ` ${mwConfig.type}: ${beforeLen} → ${afterLen} chars` +
729
+ (addedBlocks > 0
730
+ ? ` (+${addedBlocks} context block${addedBlocks > 1 ? 's' : ''})`
731
+ : ''),
732
+ );
733
+ }
734
+ }
735
+ const prompt = serializePromptDocument(doc);
736
+ log.debug(
737
+ `[task:${taskId}]`,
738
+ `prompt: ${originalLen} chars (final: ${prompt.length} chars, ${doc.contexts.length} block${doc.contexts.length === 1 ? '' : 's'})`,
739
+ );
740
+ log.quiet(`--- prompt (final) ---\n${clip(prompt)}\n--- end prompt ---`, taskId);
741
+
742
+ // H1: hand the driver a continue_from that has already been
743
+ // qualified by dag.ts. Without this, drivers like codex/opencode/
744
+ // claude-code look up maps directly with
745
+ // the user's raw (possibly bare) string, which races whenever two
746
+ // tracks share a task name. dag.ts has the only authoritative
747
+ // resolver, so we use its precomputed answer here.
748
+ // Drivers key sessionMap/normalizedMap by fully-qualified id. buildDag
749
+ // guarantees `resolvedContinueFrom` is set for every task that has a
750
+ // `continue_from`, so if we see the bare form here something upstream
751
+ // is broken fail loud instead of silently miskeying the lookup.
752
+ if (task.continue_from && !node.resolvedContinueFrom) {
753
+ throw new Error(
754
+ `Internal: task "${taskId}" has continue_from "${task.continue_from}" ` +
755
+ `but no resolvedContinueFrom. buildDag should have qualified it.`,
756
+ );
757
+ }
758
+ const enrichedTask: TaskConfig = {
759
+ ...task,
760
+ prompt,
761
+ continue_from: node.resolvedContinueFrom,
762
+ };
763
+ const driverCtx: DriverContext = {
764
+ sessionMap,
765
+ normalizedMap,
766
+ workDir: task.cwd ?? workDir,
767
+ // Structured view for drivers that want fine-grained control
768
+ // over serialization (e.g. inserting [Previous Output] between
769
+ // contexts and task). Drivers that read task.prompt see the
770
+ // default serialization and need no changes.
771
+ promptDoc: doc,
772
+ };
773
+ const spec = await driver.buildCommand(enrichedTask, track, driverCtx);
774
+ log.debug(`[task:${taskId}]`, `driver=${driverName}`);
775
+ log.debug(`[task:${taskId}]`, `spawn args: ${JSON.stringify(spec.args)}`);
776
+ if (spec.cwd) log.debug(`[task:${taskId}]`, `spawn cwd: ${spec.cwd}`);
777
+ if (spec.env)
778
+ log.debug(
779
+ `[task:${taskId}]`,
780
+ `spawn env overrides: ${Object.keys(spec.env).join(', ')}`,
781
+ );
782
+ if (spec.stdin) log.debug(`[task:${taskId}]`, `spawn stdin: ${spec.stdin.length} chars`);
783
+ result = await runSpawn(spec, driver, runOpts);
784
+ }
785
+
786
+ // 6. Determine terminal status (without emitting yet — result must be complete first)
787
+ // H2: branch on failureKind so spawn errors no longer masquerade as
788
+ // timeouts. Old runners that don't set failureKind still work — we
789
+ // fall back to the historical `exitCode === -1 → timeout` heuristic so
790
+ // pre-existing third-party drivers don't regress.
791
+ let terminalStatus: TaskStatus;
792
+ const kind = result.failureKind;
793
+ if (kind === 'timeout') {
794
+ terminalStatus = 'timeout';
795
+ } else if (kind === 'spawn_error') {
796
+ terminalStatus = 'failed';
797
+ } else if (kind === undefined && result.exitCode === -1) {
798
+ // Legacy path: pre-H2 driver returned -1 with no kind. Treat as
799
+ // timeout for backward compatibility (the previous behaviour).
800
+ terminalStatus = 'timeout';
801
+ } else if (result.exitCode !== 0) {
802
+ terminalStatus = 'failed';
803
+ } else if (task.completion) {
804
+ const plugin = getHandler<CompletionPlugin>('completions', task.completion.type);
805
+ const completionCtx = { workDir: task.cwd ?? workDir, signal: abortController.signal };
806
+ const passed = await plugin.check(
807
+ task.completion as Record<string, unknown>,
808
+ result,
809
+ completionCtx,
810
+ );
811
+ // R4: strict boolean check. Truthy strings/numbers used to be coerced
812
+ // to success a check returning "ok" would let a failing task pass.
813
+ if (typeof passed !== 'boolean') {
814
+ throw new Error(
815
+ `completion "${task.completion.type}".check() returned ${passed === null ? 'null' : typeof passed}, expected boolean`,
816
+ );
817
+ }
818
+ terminalStatus = passed ? 'success' : 'failed';
819
+ } else {
820
+ terminalStatus = 'success';
821
+ }
822
+
823
+ // Store normalized text separately (in-memory) for continue_from handoff.
824
+ // R15: clip oversized values so a runaway parseResult can't accumulate
825
+ // hundreds of MB across tasks.
826
+ if (result.normalizedOutput !== null) {
827
+ const clipped =
828
+ result.normalizedOutput.length > MAX_NORMALIZED_BYTES
829
+ ? result.normalizedOutput.slice(0, MAX_NORMALIZED_BYTES) +
830
+ `\n[…clipped at ${MAX_NORMALIZED_BYTES} bytes]`
831
+ : result.normalizedOutput;
832
+ normalizedMap.set(taskId, clipped);
833
+ }
834
+
835
+ if (result.stderr) {
836
+ const stderrPath = resolve(log.dir, `${taskId.replace(/\./g, '_')}.stderr`);
837
+ await Bun.write(stderrPath, result.stderr);
838
+ result = { ...result, stderrPath };
839
+ }
840
+
841
+ if (result.sessionId) {
842
+ // H1: qualified-only key.
843
+ sessionMap.set(taskId, result.sessionId);
844
+ }
845
+
846
+ // Set result and finishedAt before emitting terminal status so listeners see complete state
847
+ state.result = result;
848
+ state.finishedAt = nowISO();
849
+ setTaskStatus(taskId, terminalStatus);
850
+
851
+ // Log task outcome with relevant details
852
+ const durSec = (result.durationMs / 1000).toFixed(1);
853
+ if (terminalStatus === 'success') {
854
+ log.info(`[task:${taskId}]`, `success (${durSec}s)`);
855
+ } else {
856
+ log.error(
857
+ `[task:${taskId}]`,
858
+ `${terminalStatus} exit=${result.exitCode} duration=${durSec}s`,
859
+ );
860
+ if (result.stderr) {
861
+ const tail = tailLines(result.stderr, 10);
862
+ log.error(`[task:${taskId}]`, `stderr tail:\n${tail}`);
863
+ }
864
+ }
865
+
866
+ // File-only: full stdout/stderr dump (clipped) + extracted metadata
867
+ log.debug(
868
+ `[task:${taskId}]`,
869
+ `stdout: ${result.stdout.length} chars, stderr: ${result.stderr.length} chars`,
870
+ );
871
+ if (result.sessionId) {
872
+ log.debug(`[task:${taskId}]`, `sessionId: ${result.sessionId}`);
873
+ }
874
+ if (result.stderrPath) {
875
+ log.debug(`[task:${taskId}]`, `wrote stderr: ${result.stderrPath}`);
876
+ }
877
+ if (result.stdout) {
878
+ log.quiet(
879
+ `--- stdout (${taskId}) ---\n${clip(result.stdout)}\n--- end stdout ---`,
880
+ taskId,
881
+ );
882
+ }
883
+ if (result.stderr) {
884
+ log.quiet(
885
+ `--- stderr (${taskId}) ---\n${clip(result.stderr)}\n--- end stderr ---`,
886
+ taskId,
887
+ );
888
+ }
889
+ if (task.completion) {
890
+ log.debug(
891
+ `[task:${taskId}]`,
892
+ `completion check: type=${task.completion.type} result=${terminalStatus}`,
893
+ );
894
+ }
895
+ } catch (err: unknown) {
896
+ const errMsg = err instanceof Error ? (err.stack ?? err.message) : String(err);
897
+ log.error(`[task:${taskId}]`, `failed before execution: ${errMsg}`);
898
+ state.result = {
899
+ exitCode: -1,
900
+ stdout: '',
901
+ stderr: errMsg,
902
+ stderrPath: null,
903
+ durationMs: 0,
904
+ sessionId: null,
905
+ normalizedOutput: null,
906
+ // H2: Engine-level pre-execution errors (driver throw, middleware
907
+ // throw, getHandler 404) classify as spawn_error — the process never
908
+ // ran, so calling them "timeout" was actively misleading.
909
+ failureKind: 'spawn_error',
910
+ };
911
+ state.finishedAt = nowISO();
912
+ setTaskStatus(taskId, 'failed');
913
+ }
914
+
915
+ // 7. Fire hooks
916
+ const finalStatus: TaskStatus = state.status;
917
+ try {
918
+ await fireHook(taskId, finalStatus === 'success' ? 'task_success' : 'task_failure');
919
+ } catch (hookErr) {
920
+ log.error(
921
+ `[task:${taskId}]`,
922
+ `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
923
+ );
924
+ }
925
+
926
+ // 8. Handle stop_all for failure states
927
+ if (finalStatus !== 'success' && getOnFailure(taskId) === 'stop_all') {
928
+ applyStopAll(node.track.id);
929
+ }
930
+ }
931
+
932
+ // ── Event loop ──
933
+ // Each task is launched as soon as ALL its deps reach a terminal state.
934
+ // We track in-flight tasks in `running` so a task completing mid-batch
935
+ // immediately unblocks its dependents without waiting for sibling tasks.
936
+ const running = new Map<string, Promise<void>>();
937
+
938
+ try {
939
+ while (!pipelineAborted) {
940
+ // Launch every task whose deps are all terminal and that isn't already in-flight
941
+ for (const [id, state] of states) {
942
+ if (state.status !== 'waiting' || running.has(id)) continue;
943
+ const node = dag.nodes.get(id)!;
944
+ const allDepsTerminal =
945
+ node.dependsOn.length === 0 ||
946
+ node.dependsOn.every((d) => isTerminal(states.get(d)!.status));
947
+ if (!allDepsTerminal) continue;
948
+ const p = processTask(id).finally(() => running.delete(id));
949
+ running.set(id, p);
950
+ }
951
+
952
+ // All tasks terminal — done
953
+ if ([...states.values()].every((s) => isTerminal(s.status))) break;
954
+
955
+ if (running.size === 0) {
956
+ // Nothing in-flight but non-terminal tasks exist (e.g. trigger-wait states
957
+ // that processTask hasn't been called for yet). Poll briefly.
958
+ await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
959
+ } else {
960
+ // Wait for any one task to finish, then re-scan for new launchables.
961
+ await Promise.race(running.values());
962
+ }
963
+ }
964
+
965
+ if (pipelineAborted) {
966
+ // Wait for in-flight tasks to honour the abort signal before marking states.
967
+ if (running.size > 0) await Promise.allSettled(running.values());
968
+ for (const [id, state] of states) {
969
+ if (!isTerminal(state.status)) {
970
+ // By the time allSettled resolves, processTask's try/finally has already
971
+ // set running tasks to success/failed/timeout. The only non-terminal
972
+ // statuses remaining here are waiting/idle tasks that were never started.
973
+ state.finishedAt = nowISO();
974
+ setTaskStatus(id, 'skipped');
975
+ }
976
+ }
977
+ }
978
+ } finally {
979
+ if (pipelineTimer) clearTimeout(pipelineTimer);
980
+ // Clean up the external abort signal listener to prevent dead references
981
+ // accumulating on long-lived shared AbortControllers.
982
+ if (options.signal) {
983
+ options.signal.removeEventListener('abort', externalAbortHandler);
984
+ }
985
+ // Safety net: drain any approvals still pending at shutdown (e.g. crash path).
986
+ if (approvalGateway.pending().length > 0) {
987
+ approvalGateway.abortAll('pipeline finished');
988
+ }
989
+ }
990
+
991
+ // ── Summary ──
992
+ const summary = { total: 0, success: 0, failed: 0, skipped: 0, timeout: 0, blocked: 0 };
993
+ for (const [, state] of states) {
994
+ summary.total++;
995
+ switch (state.status) {
996
+ case 'success':
997
+ summary.success++;
998
+ break;
999
+ case 'failed':
1000
+ summary.failed++;
1001
+ break;
1002
+ case 'skipped':
1003
+ summary.skipped++;
1004
+ break;
1005
+ case 'timeout':
1006
+ summary.timeout++;
1007
+ break;
1008
+ case 'blocked':
1009
+ summary.blocked++;
1010
+ break;
1011
+ }
1012
+ }
1013
+
1014
+ const finishedAt = nowISO();
1015
+ const durationMs = new Date(finishedAt).getTime() - new Date(startedAt).getTime();
1016
+
1017
+ if (pipelineAborted) {
1018
+ await executeHook(
1019
+ config.hooks,
1020
+ 'pipeline_error',
1021
+ buildPipelineErrorContext(pipelineInfo, 'Pipeline timeout exceeded'),
1022
+ workDir,
1023
+ );
1024
+ } else {
1025
+ await executeHook(
1026
+ config.hooks,
1027
+ 'pipeline_complete',
1028
+ buildPipelineCompleteContext(
1029
+ { ...pipelineInfo, finished_at: finishedAt, duration_ms: durationMs },
1030
+ summary,
1031
+ ),
1032
+ workDir,
1033
+ );
1034
+ }
1035
+
1036
+ const allSuccess =
1037
+ !pipelineAborted && summary.failed === 0 && summary.timeout === 0 && summary.blocked === 0;
1038
+
1039
+ log.section('Pipeline summary');
1040
+ log.quiet(`status: ${pipelineAborted ? 'aborted (timeout)' : 'completed'}`);
1041
+ log.quiet(`duration: ${(durationMs / 1000).toFixed(1)}s`);
1042
+ log.quiet(
1043
+ `counts: total=${summary.total} success=${summary.success} ` +
1044
+ `failed=${summary.failed} skipped=${summary.skipped} ` +
1045
+ `timeout=${summary.timeout} blocked=${summary.blocked}`,
1046
+ );
1047
+ log.quiet('');
1048
+ log.quiet('per-task:');
1049
+ for (const [id, state] of states) {
1050
+ const dur =
1051
+ state.result?.durationMs != null ? `${(state.result.durationMs / 1000).toFixed(1)}s` : '-';
1052
+ const exit = state.result?.exitCode ?? '-';
1053
+ log.quiet(` ${state.status.padEnd(8)} ${id} (exit=${exit}, ${dur})`);
1054
+ }
1055
+
1056
+ log.info('[pipeline]', `completed "${config.name}"`);
1057
+ log.info(
1058
+ '[pipeline]',
1059
+ `Total: ${summary.total} | Success: ${summary.success} | Failed: ${summary.failed} | Skipped: ${summary.skipped} | Timeout: ${summary.timeout} | Blocked: ${summary.blocked}`,
1060
+ );
1061
+ log.info('[pipeline]', `Duration: ${(durationMs / 1000).toFixed(1)}s`);
1062
+ log.info('[pipeline]', `Log: ${log.path}`);
1063
+
1064
+ emit({ type: 'pipeline_end', runId, success: allSuccess });
1065
+ return { success: allSuccess, runId, logPath: log.path, summary, states: freezeStates(states) };
1066
+ } finally {
1067
+ // Close the persistent log file handle before pruning.
1068
+ log.close();
1069
+ // Prune old per-run log directories on every exit path (normal, blocked, or thrown).
1070
+ // Exclude the current runId so a concurrent run cannot delete its own live directory.
1071
+ if (maxLogRuns > 0) {
1072
+ await pruneLogDirs(resolve(workDir, '.tagma', 'logs'), maxLogRuns, runId);
1073
+ }
1074
+ }
1075
+ }
1076
+
1077
+ /**
1078
+ * Delete the oldest subdirectories under `logsDir`, keeping only the most recent `keep`
1079
+ * total runs (including the currently-live run identified by `excludeRunId`).
1080
+ * Directories are sorted lexicographically; because runIds are prefixed with a base-36
1081
+ * timestamp, lexicographic order equals chronological order.
1082
+ *
1083
+ * `excludeRunId` is always skipped from deletion even if it would otherwise be pruned —
1084
+ * this prevents a concurrent run from removing a live log directory that is still in use.
1085
+ *
1086
+ * D10: The live run occupies one slot out of `keep`, so the maximum number of
1087
+ * *historical* dirs to retain is `keep - 1`. Without this adjustment the function
1088
+ * kept `keep` historical dirs plus 1 live dir = `keep + 1` total on disk.
1089
+ */
1090
+ async function pruneLogDirs(logsDir: string, keep: number, excludeRunId: string): Promise<void> {
1091
+ let entries: string[];
1092
+ try {
1093
+ entries = await readdir(logsDir);
1094
+ } catch {
1095
+ return; // logsDir doesn't exist yet — nothing to prune
1096
+ }
1097
+
1098
+ // Only consider directories that look like run IDs (run_<...>), excluding the live run.
1099
+ const runDirs = entries.filter((e) => e.startsWith('run_') && e !== excludeRunId).sort();
1100
+ // keep - 1 historical slots (1 slot is reserved for the live excludeRunId).
1101
+ const historyKeep = Math.max(0, keep - 1);
1102
+ const toDelete = runDirs.slice(0, Math.max(0, runDirs.length - historyKeep));
1103
+
1104
+ await Promise.all(
1105
+ toDelete.map((dir) =>
1106
+ rm(resolve(logsDir, dir), { recursive: true, force: true }).catch(() => {
1107
+ // Ignore deletion errors — stale dirs are better than a crash
1108
+ }),
1109
+ ),
1110
+ );
1111
+ }
1112
+
1113
+ function isTerminal(status: TaskStatus): boolean {
1114
+ return (
1115
+ status === 'success' ||
1116
+ status === 'failed' ||
1117
+ status === 'timeout' ||
1118
+ status === 'skipped' ||
1119
+ status === 'blocked'
1120
+ );
1121
+ }
1122
+
1123
+ /** Return a deep-copied, caller-safe snapshot of the states map. */
1124
+ function freezeStates(states: Map<string, TaskState>): ReadonlyMap<string, TaskState> {
1125
+ const copy = new Map<string, TaskState>();
1126
+ for (const [id, s] of states) {
1127
+ copy.set(id, {
1128
+ config: { ...s.config },
1129
+ trackConfig: { ...s.trackConfig },
1130
+ status: s.status,
1131
+ result: s.result ? { ...s.result } : null,
1132
+ startedAt: s.startedAt,
1133
+ finishedAt: s.finishedAt,
1134
+ });
1135
+ }
1136
+ return copy;
1137
+ }