@tagma/sdk 0.3.9 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tagma/sdk",
3
- "version": "0.3.9",
3
+ "version": "0.4.1",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",
@@ -31,13 +31,13 @@
31
31
  "dependencies": {
32
32
  "js-yaml": "^4.1.0",
33
33
  "chokidar": "^4.0.0",
34
- "@tagma/types": "0.1.7"
34
+ "@tagma/types": "0.2.0"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/js-yaml": "^4.0.9",
38
38
  "bun-types": "latest",
39
39
  "typescript": "^6.0.2",
40
- "@tagma/driver-codex": "0.1.5",
41
- "@tagma/driver-opencode": "0.1.4"
40
+ "@tagma/driver-codex": "0.1.9",
41
+ "@tagma/driver-opencode": "0.1.8"
42
42
  }
43
43
  }
@@ -21,6 +21,23 @@ import type { ApprovalGateway, ApprovalEvent } from '../approval';
21
21
  export interface WebSocketApprovalAdapterOptions {
22
22
  port?: number; // default: 3000
23
23
  hostname?: string; // default: 'localhost'
24
+ /**
25
+ * M11: shared secret required from the client during the WebSocket
26
+ * upgrade. The token can be supplied either as the `?token=` query
27
+ * parameter or in the `x-tagma-token` request header. When set, any
28
+ * upgrade request that fails the check is rejected with HTTP 401 and
29
+ * never reaches the WebSocket layer (so a misconfigured client cannot
30
+ * exhaust rate-limit slots either). Leave undefined for backward
31
+ * compatibility with localhost-only deployments.
32
+ */
33
+ token?: string;
34
+ /**
35
+ * M11: opt-out of origin checking. Defaults to false, meaning we accept
36
+ * any origin (including no Origin header). The recommended setup is to
37
+ * combine `token` with hostname='localhost' / loopback bind so the
38
+ * adapter is reachable only by trusted local processes.
39
+ */
40
+ allowAnyOrigin?: boolean;
24
41
  }
25
42
 
26
43
  export interface WebSocketApprovalAdapter {
@@ -40,6 +57,7 @@ export function attachWebSocketApprovalAdapter(
40
57
  ): WebSocketApprovalAdapter {
41
58
  const port = options.port ?? 3000;
42
59
  const hostname = options.hostname ?? 'localhost';
60
+ const requiredToken = options.token ?? null;
43
61
 
44
62
  type WS = import('bun').ServerWebSocket<unknown>;
45
63
  const clients = new Set<WS>();
@@ -74,6 +92,19 @@ export function attachWebSocketApprovalAdapter(
74
92
  hostname,
75
93
 
76
94
  fetch(req, server) {
95
+ // M11: enforce token before any upgrade so an unauthenticated client
96
+ // can't even open a socket. Tokens may arrive via header or query.
97
+ if (requiredToken !== null) {
98
+ const headerToken = req.headers.get('x-tagma-token') ?? '';
99
+ let queryToken = '';
100
+ try {
101
+ queryToken = new URL(req.url).searchParams.get('token') ?? '';
102
+ } catch { /* malformed URL — leave queryToken empty */ }
103
+ const presented = headerToken || queryToken;
104
+ if (presented !== requiredToken) {
105
+ return new Response('unauthorized', { status: 401 });
106
+ }
107
+ }
77
108
  if (server.upgrade(req)) return undefined;
78
109
  return new Response('tagma-sdk WebSocket approval endpoint', { status: 426 });
79
110
  },
package/src/dag.ts CHANGED
@@ -5,6 +5,15 @@ export interface DagNode {
5
5
  readonly task: TaskConfig;
6
6
  readonly track: TrackConfig;
7
7
  readonly dependsOn: readonly string[];
8
+ /**
9
+ * H1: `task.continue_from` may be written by users as a bare task id
10
+ * (e.g. `review`) or a same-track shorthand. The driver needs the
11
+ * fully-qualified upstream id to look up output/session/normalized maps
12
+ * deterministically — bare lookups race when two tracks happen to share
13
+ * a task name. dag.ts performs the qualification once, here, so the
14
+ * engine never has to.
15
+ */
16
+ readonly resolvedContinueFrom?: string;
8
17
  }
9
18
 
10
19
  export interface Dag {
@@ -77,6 +86,7 @@ export function buildDag(config: PipelineConfig): Dag {
77
86
  for (const task of track.tasks) {
78
87
  const qid = qualifyId(track.id, task.id);
79
88
  const deps: string[] = [];
89
+ let resolvedContinueFrom: string | undefined;
80
90
 
81
91
  if (task.depends_on) {
82
92
  for (const dep of task.depends_on) {
@@ -93,14 +103,15 @@ export function buildDag(config: PipelineConfig): Dag {
93
103
  `Use a fully-qualified reference (trackId.taskId) or ensure the target task exists.`
94
104
  );
95
105
  }
106
+ resolvedContinueFrom = resolved;
96
107
  if (!deps.includes(resolved)) {
97
108
  deps.push(resolved); // continue_from implies dependency
98
109
  }
99
110
  }
100
111
 
101
- // Replace node with resolved deps
112
+ // Replace node with resolved deps + qualified continue_from.
102
113
  const node = nodes.get(qid)!;
103
- nodes.set(qid, { ...node, dependsOn: deps });
114
+ nodes.set(qid, { ...node, dependsOn: deps, resolvedContinueFrom });
104
115
  }
105
116
  }
106
117
 
package/src/engine.ts CHANGED
@@ -375,14 +375,23 @@ export async function runPipeline(
375
375
  }
376
376
  }
377
377
 
378
- function applyStopAll(trackId: string): void {
378
+ /**
379
+ * H3: "stop_all" historically only stopped tasks within the same track,
380
+ * which contradicted both its name and user expectations. It now stops
381
+ * the **entire pipeline**:
382
+ * - In-flight tasks are signalled via the shared abort controller so
383
+ * drivers / runner.ts can cancel cooperatively (returning
384
+ * `failureKind: 'timeout'`).
385
+ * - Still-waiting tasks across every track are immediately marked
386
+ * skipped so the run completes promptly.
387
+ * The terminal lock in setTaskStatus prevents any later re-transition
388
+ * should a completed running task try to overwrite the skipped state.
389
+ */
390
+ function applyStopAll(_failedTrackId: string): void {
391
+ pipelineAborted = true;
392
+ abortController.abort();
379
393
  for (const [id, state] of states) {
380
- // Only skip tasks that are still waiting — tasks already running must be
381
- // allowed to complete naturally so their process is not orphaned and their
382
- // final status (success/failed/timeout) is recorded correctly.
383
- // The terminal lock in setTaskStatus prevents any later re-transition
384
- // should a completed running task try to overwrite the skipped state.
385
- if (state.trackConfig.id === trackId && state.status === 'waiting') {
394
+ if (state.status === 'waiting') {
386
395
  state.finishedAt = nowISO();
387
396
  setTaskStatus(id, 'skipped');
388
397
  }
@@ -600,7 +609,17 @@ export async function runPipeline(
600
609
  `prompt: ${originalLen} chars (final: ${prompt.length} chars)`);
601
610
  log.quiet(`--- prompt (final) ---\n${clip(prompt)}\n--- end prompt ---`, taskId);
602
611
 
603
- const enrichedTask: TaskConfig = { ...task, prompt };
612
+ // H1: hand the driver a continue_from that has already been
613
+ // qualified by dag.ts. Without this, drivers like codex/opencode/
614
+ // claude-code do `outputMap.get(task.continue_from)` directly with
615
+ // the user's raw (possibly bare) string, which races whenever two
616
+ // tracks share a task name. dag.ts has the only authoritative
617
+ // resolver, so we use its precomputed answer here.
618
+ const enrichedTask: TaskConfig = {
619
+ ...task,
620
+ prompt,
621
+ continue_from: node.resolvedContinueFrom ?? task.continue_from,
622
+ };
604
623
  const driverCtx: DriverContext = {
605
624
  sessionMap, outputMap, normalizedMap, workDir: task.cwd ?? workDir,
606
625
  };
@@ -627,14 +646,30 @@ export async function runPipeline(
627
646
  await mkdir(dirname(outPath), { recursive: true });
628
647
  await Bun.write(outPath, result.stdout);
629
648
  result = { ...result, outputPath: outPath };
649
+ // H1: only write the fully-qualified taskId. The previous "also store
650
+ // bare id when not yet present" trick produced non-deterministic
651
+ // continue_from lookups when two tracks shared a task name —
652
+ // whichever finished first won the bare key. dag.ts now resolves
653
+ // continue_from to a qualified id (DagNode.resolvedContinueFrom),
654
+ // and the enrichedTask handed to drivers carries that qualified
655
+ // version, so bare keys are no longer needed.
630
656
  outputMap.set(taskId, outPath);
631
- const bareId = taskId.includes('.') ? taskId.split('.').pop()! : taskId;
632
- if (!outputMap.has(bareId)) outputMap.set(bareId, outPath);
633
657
  }
634
658
 
635
659
  // 6. Determine terminal status (without emitting yet — result must be complete first)
660
+ // H2: branch on failureKind so spawn errors no longer masquerade as
661
+ // timeouts. Old runners that don't set failureKind still work — we
662
+ // fall back to the historical `exitCode === -1 → timeout` heuristic so
663
+ // pre-existing third-party drivers don't regress.
636
664
  let terminalStatus: TaskStatus;
637
- if (result.exitCode === -1) {
665
+ const kind = result.failureKind;
666
+ if (kind === 'timeout') {
667
+ terminalStatus = 'timeout';
668
+ } else if (kind === 'spawn_error') {
669
+ terminalStatus = 'failed';
670
+ } else if (kind === undefined && result.exitCode === -1) {
671
+ // Legacy path: pre-H2 driver returned -1 with no kind. Treat as
672
+ // timeout for backward compatibility (the previous behaviour).
638
673
  terminalStatus = 'timeout';
639
674
  } else if (result.exitCode !== 0) {
640
675
  terminalStatus = 'failed';
@@ -662,9 +697,8 @@ export async function runPipeline(
662
697
  ? result.normalizedOutput.slice(0, MAX_NORMALIZED_BYTES) +
663
698
  `\n[…clipped at ${MAX_NORMALIZED_BYTES} bytes]`
664
699
  : result.normalizedOutput;
700
+ // H1: qualified-only key (see comment near outputMap above).
665
701
  normalizedMap.set(taskId, clipped);
666
- const bareId = taskId.includes('.') ? taskId.split('.').pop()! : taskId;
667
- if (!normalizedMap.has(bareId)) normalizedMap.set(bareId, clipped);
668
702
  }
669
703
 
670
704
  if (result.stderr) {
@@ -674,9 +708,8 @@ export async function runPipeline(
674
708
  }
675
709
 
676
710
  if (result.sessionId) {
711
+ // H1: qualified-only key (see comment near outputMap above).
677
712
  sessionMap.set(taskId, result.sessionId);
678
- const bareId = taskId.includes('.') ? taskId.split('.').pop()! : taskId;
679
- if (!sessionMap.has(bareId)) sessionMap.set(bareId, result.sessionId);
680
713
  }
681
714
 
682
715
  // Set result and finishedAt before emitting terminal status so listeners see complete state
@@ -729,6 +762,10 @@ export async function runPipeline(
729
762
  stderr: errMsg,
730
763
  outputPath: null, stderrPath: null, durationMs: 0,
731
764
  sessionId: null, normalizedOutput: null,
765
+ // H2: Engine-level pre-execution errors (driver throw, middleware
766
+ // throw, getHandler 404) classify as spawn_error — the process never
767
+ // ran, so calling them "timeout" was actively misleading.
768
+ failureKind: 'spawn_error',
732
769
  };
733
770
  state.finishedAt = nowISO();
734
771
  setTaskStatus(taskId, 'failed');
package/src/runner.ts CHANGED
@@ -101,7 +101,11 @@ function resolveWindowsExe(
101
101
  return args;
102
102
  }
103
103
 
104
- /** Build a "failed before spawn" result. */
104
+ /**
105
+ * H2: Build a "failed before spawn" result. Tagged as 'spawn_error' so the
106
+ * engine can show a useful classification ("driver tried to launch X but
107
+ * the binary wasn't found") rather than the misleading "timeout".
108
+ */
105
109
  function failResult(stderr: string, durationMs: number): TaskResult {
106
110
  return {
107
111
  exitCode: -1,
@@ -112,6 +116,7 @@ function failResult(stderr: string, durationMs: number): TaskResult {
112
116
  durationMs,
113
117
  sessionId: null,
114
118
  normalizedOutput: null,
119
+ failureKind: 'spawn_error',
115
120
  };
116
121
  }
117
122
 
@@ -289,6 +294,9 @@ export async function runSpawn(
289
294
  durationMs,
290
295
  sessionId: null,
291
296
  normalizedOutput: null,
297
+ // H2: explicit kind so engine.ts no longer has to guess "is exitCode -1
298
+ // a timeout or a spawn-failure?" Both used to share the same code.
299
+ failureKind: 'timeout',
292
300
  };
293
301
  }
294
302
 
@@ -299,6 +307,12 @@ export async function runSpawn(
299
307
  // value doesn't poison sessionMap/normalizedMap downstream.
300
308
  let sessionId: string | null = null;
301
309
  let normalizedOutput: string | null = null;
310
+ // M12: drivers can flip a task's terminal status to failed even when the
311
+ // process exited 0 (e.g. opencode returning `{type:"error"}` JSON). When
312
+ // the flag is set, we synthesize a non-zero exit code and append a reason
313
+ // line to stderr so engine.ts marks the task as failed with a useful
314
+ // explanation instead of letting the error JSON pass through as success.
315
+ let forcedFailureMessage: string | null = null;
302
316
  if (driver?.parseResult) {
303
317
  try {
304
318
  const meta = driver.parseResult(stdout, stderr);
@@ -309,6 +323,11 @@ export async function runSpawn(
309
323
  if (typeof meta.normalizedOutput === 'string') {
310
324
  normalizedOutput = meta.normalizedOutput;
311
325
  }
326
+ if (meta.forceFailure === true) {
327
+ forcedFailureMessage = typeof meta.forceFailureReason === 'string'
328
+ ? meta.forceFailureReason
329
+ : 'Driver flagged task as failed (forceFailure)';
330
+ }
312
331
  }
313
332
  } catch (err) {
314
333
  // The spawn itself succeeded; only metadata extraction failed.
@@ -325,10 +344,32 @@ export async function runSpawn(
325
344
  durationMs,
326
345
  sessionId: null,
327
346
  normalizedOutput: null,
347
+ // H2: parseResult threw — the spawn itself succeeded, so the failure
348
+ // is "the process exited but the driver couldn't parse it". Surface
349
+ // that as exit_nonzero (when the actual exit was non-zero) or null
350
+ // (when the underlying exit was 0 — UI will still mark it failed via
351
+ // engine.ts because the result is incomplete).
352
+ failureKind: exitCode === 0 ? null : 'exit_nonzero',
328
353
  };
329
354
  }
330
355
  }
331
356
 
357
+ // M12: when the driver forced a failure, treat as exit_nonzero with the
358
+ // reason appended to stderr so users see WHY the task failed without
359
+ // having to dig through driver-specific JSON.
360
+ if (forcedFailureMessage !== null) {
361
+ return {
362
+ exitCode: exitCode === 0 ? 1 : exitCode,
363
+ stdout,
364
+ stderr: stderr + (stderr.endsWith('\n') ? '' : '\n') + `[driver] ${forcedFailureMessage}`,
365
+ outputPath: null,
366
+ stderrPath: null,
367
+ durationMs,
368
+ sessionId,
369
+ normalizedOutput,
370
+ failureKind: 'exit_nonzero',
371
+ };
372
+ }
332
373
  return {
333
374
  exitCode,
334
375
  stdout,
@@ -338,6 +379,9 @@ export async function runSpawn(
338
379
  durationMs,
339
380
  sessionId,
340
381
  normalizedOutput,
382
+ // H2: success vs nonzero exit. Engine uses this to short-circuit the
383
+ // timeout branch even if a third-party driver returns -1 by mistake.
384
+ failureKind: exitCode === 0 ? null : 'exit_nonzero',
341
385
  };
342
386
  }
343
387
 
package/src/schema.ts CHANGED
@@ -41,14 +41,16 @@ function validateRawTask(task: RawTaskConfig, trackId: string): void {
41
41
  if (!task.id) throw new Error(`track "${trackId}": task.id is required`);
42
42
  if (task.use) return; // template usage, validated later
43
43
 
44
- const hasPrompt = typeof task.prompt === 'string' && task.prompt.length > 0;
45
- const hasCommand = typeof task.command === 'string' && task.command.length > 0;
46
- if (!hasPrompt && !hasCommand) {
44
+ const hasPromptKey = typeof task.prompt === 'string';
45
+ const hasCommandKey = typeof task.command === 'string';
46
+ if (!hasPromptKey && !hasCommandKey) {
47
47
  throw new Error(`task "${task.id}": must have either "prompt" or "command"`);
48
48
  }
49
- if (hasPrompt && hasCommand) {
49
+ if (hasPromptKey && hasCommandKey) {
50
50
  throw new Error(`task "${task.id}": cannot have both "prompt" and "command"`);
51
51
  }
52
+ // Empty-content tasks (e.g. `prompt: ''`) are allowed at parse time and
53
+ // flagged as non-fatal validation errors by validate-raw.ts.
52
54
  }
53
55
 
54
56
  // ═══ Template Expansion ═══
@@ -16,10 +16,21 @@ function isValidDuration(input: string): boolean {
16
16
  const VALID_ON_FAILURE = new Set(['skip_downstream', 'stop_all', 'ignore']);
17
17
  const VALID_MODEL_TIERS = new Set(['low', 'medium', 'high']);
18
18
 
19
+ export type ValidationSeverity = 'error' | 'warning';
20
+
19
21
  export interface ValidationError {
20
22
  /** JSONPath-style location, e.g. "tracks[0].tasks[1].prompt" */
21
23
  path: string;
22
24
  message: string;
25
+ /**
26
+ * H8: not all "errors" are equally fatal. The DAG runtime is happy to
27
+ * insert implicit `continue_from → depends_on` ordering, so the matching
28
+ * validate-raw check is a *style* nit, not a hard failure. Severity lets
29
+ * the editor render it as a soft warning instead of blocking save / run.
30
+ * Existing call sites that don't read this field still treat every entry
31
+ * as fatal — defaulting `severity` to undefined preserves that behaviour.
32
+ */
33
+ severity?: ValidationSeverity;
23
34
  }
24
35
 
25
36
  /**
@@ -111,19 +122,30 @@ export function validateRaw(config: RawPipelineConfig): ValidationError[] {
111
122
  // Template-based tasks: skip prompt/command checks (params validated at runtime)
112
123
  if (task.use) continue;
113
124
 
114
- const hasPrompt = typeof task.prompt === 'string' && task.prompt.trim().length > 0;
115
- const hasCommand = typeof task.command === 'string' && task.command.trim().length > 0;
125
+ const hasPromptKey = typeof task.prompt === 'string';
126
+ const hasCommandKey = typeof task.command === 'string';
127
+ const promptEmpty = hasPromptKey && task.prompt!.trim().length === 0;
128
+ const commandEmpty = hasCommandKey && task.command!.trim().length === 0;
116
129
 
117
- if (!hasPrompt && !hasCommand) {
130
+ if (hasPromptKey && hasCommandKey) {
131
+ errors.push({
132
+ path: taskPath,
133
+ message: `Task "${task.id}": cannot have both "prompt" and "command"`,
134
+ });
135
+ } else if (!hasPromptKey && !hasCommandKey) {
118
136
  errors.push({
119
137
  path: taskPath,
120
138
  message: `Task "${task.id}": must have "prompt" or "command"`,
121
139
  });
122
- }
123
- if (hasPrompt && hasCommand) {
140
+ } else if (promptEmpty) {
124
141
  errors.push({
125
142
  path: taskPath,
126
- message: `Task "${task.id}": cannot have both "prompt" and "command"`,
143
+ message: `Task "${task.id}": prompt content cannot be empty`,
144
+ });
145
+ } else if (commandEmpty) {
146
+ errors.push({
147
+ path: taskPath,
148
+ message: `Task "${task.id}": command content cannot be empty`,
127
149
  });
128
150
  }
129
151
 
@@ -169,9 +191,15 @@ export function validateRaw(config: RawPipelineConfig): ValidationError[] {
169
191
  } else if (!task.depends_on || !task.depends_on.some(dep =>
170
192
  resolveDepRef(dep, track.id, allQualified, bareToQualified) === resolved
171
193
  )) {
194
+ // H8: demote to a warning. dag.ts/buildDag inserts continue_from
195
+ // as an implicit dependency at runtime, so the pipeline runs fine
196
+ // without the explicit listing. Treat as a style hint rather than
197
+ // blocking save / run, otherwise we frighten users with a red
198
+ // "Configuration error" for code that would have run successfully.
172
199
  errors.push({
173
200
  path: `${taskPath}.continue_from`,
174
- message: `Task "${task.id}": continue_from "${task.continue_from}" should also be listed in depends_on to ensure ordering`,
201
+ message: `Task "${task.id}": continue_from "${task.continue_from}" should also be listed in depends_on for clarity (the runtime will add it implicitly).`,
202
+ severity: 'warning',
175
203
  });
176
204
  }
177
205
  }