npm - @tagma/sdk - Versions diffs - 0.3.9 → 0.4.1 - Mend

@tagma/sdk 0.3.9 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +4 -4
package/src/adapters/websocket-approval.ts +31 -0
package/src/dag.ts +13 -2
package/src/engine.ts +52 -15
package/src/runner.ts +45 -1
package/src/schema.ts +6 -4
package/src/validate-raw.ts +35 -7

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tagma/sdk",
-  "version": "0.3.9",
+  "version": "0.4.1",
   "license": "MIT",
   "repository": {
     "type": "git",
@@ -31,13 +31,13 @@
   "dependencies": {
     "js-yaml": "^4.1.0",
     "chokidar": "^4.0.0",
-    "@tagma/types": "0.1.7"
+    "@tagma/types": "0.2.0"
   },
   "devDependencies": {
     "@types/js-yaml": "^4.0.9",
     "bun-types": "latest",
     "typescript": "^6.0.2",
-    "@tagma/driver-codex": "0.1.5",
-    "@tagma/driver-opencode": "0.1.4"
+    "@tagma/driver-codex": "0.1.9",
+    "@tagma/driver-opencode": "0.1.8"
   }
 }

package/src/adapters/websocket-approval.ts CHANGED Viewed

@@ -21,6 +21,23 @@ import type { ApprovalGateway, ApprovalEvent } from '../approval';
 export interface WebSocketApprovalAdapterOptions {
   port?: number;      // default: 3000
   hostname?: string;  // default: 'localhost'
+  /**
+   * M11: shared secret required from the client during the WebSocket
+   * upgrade. The token can be supplied either as the `?token=` query
+   * parameter or in the `x-tagma-token` request header. When set, any
+   * upgrade request that fails the check is rejected with HTTP 401 and
+   * never reaches the WebSocket layer (so a misconfigured client cannot
+   * exhaust rate-limit slots either). Leave undefined for backward
+   * compatibility with localhost-only deployments.
+   */
+  token?: string;
+  /**
+   * M11: opt-out of origin checking. Defaults to false, meaning we accept
+   * any origin (including no Origin header). The recommended setup is to
+   * combine `token` with hostname='localhost' / loopback bind so the
+   * adapter is reachable only by trusted local processes.
+   */
+  allowAnyOrigin?: boolean;
 }
 export interface WebSocketApprovalAdapter {
@@ -40,6 +57,7 @@ export function attachWebSocketApprovalAdapter(
 ): WebSocketApprovalAdapter {
   const port = options.port ?? 3000;
   const hostname = options.hostname ?? 'localhost';
+  const requiredToken = options.token ?? null;
   type WS = import('bun').ServerWebSocket<unknown>;
   const clients = new Set<WS>();
@@ -74,6 +92,19 @@ export function attachWebSocketApprovalAdapter(
     hostname,
     fetch(req, server) {
+      // M11: enforce token before any upgrade so an unauthenticated client
+      // can't even open a socket. Tokens may arrive via header or query.
+      if (requiredToken !== null) {
+        const headerToken = req.headers.get('x-tagma-token') ?? '';
+        let queryToken = '';
+        try {
+          queryToken = new URL(req.url).searchParams.get('token') ?? '';
+        } catch { /* malformed URL — leave queryToken empty */ }
+        const presented = headerToken || queryToken;
+        if (presented !== requiredToken) {
+          return new Response('unauthorized', { status: 401 });
+        }
+      }
       if (server.upgrade(req)) return undefined;
       return new Response('tagma-sdk WebSocket approval endpoint', { status: 426 });
     },

package/src/dag.ts CHANGED Viewed

@@ -5,6 +5,15 @@ export interface DagNode {
   readonly task: TaskConfig;
   readonly track: TrackConfig;
   readonly dependsOn: readonly string[];
+  /**
+   * H1: `task.continue_from` may be written by users as a bare task id
+   * (e.g. `review`) or a same-track shorthand. The driver needs the
+   * fully-qualified upstream id to look up output/session/normalized maps
+   * deterministically — bare lookups race when two tracks happen to share
+   * a task name. dag.ts performs the qualification once, here, so the
+   * engine never has to.
+   */
+  readonly resolvedContinueFrom?: string;
 }
 export interface Dag {
@@ -77,6 +86,7 @@ export function buildDag(config: PipelineConfig): Dag {
     for (const task of track.tasks) {
       const qid = qualifyId(track.id, task.id);
       const deps: string[] = [];
+      let resolvedContinueFrom: string | undefined;
       if (task.depends_on) {
         for (const dep of task.depends_on) {
@@ -93,14 +103,15 @@ export function buildDag(config: PipelineConfig): Dag {
             `Use a fully-qualified reference (trackId.taskId) or ensure the target task exists.`
           );
         }
+        resolvedContinueFrom = resolved;
         if (!deps.includes(resolved)) {
           deps.push(resolved); // continue_from implies dependency
         }
       }
-      // Replace node with resolved deps
+      // Replace node with resolved deps + qualified continue_from.
       const node = nodes.get(qid)!;
-      nodes.set(qid, { ...node, dependsOn: deps });
+      nodes.set(qid, { ...node, dependsOn: deps, resolvedContinueFrom });
     }
   }

package/src/engine.ts CHANGED Viewed

@@ -375,14 +375,23 @@ export async function runPipeline(
     }
   }
-  function applyStopAll(trackId: string): void {
+  /**
+   * H3: "stop_all" historically only stopped tasks within the same track,
+   * which contradicted both its name and user expectations. It now stops
+   * the **entire pipeline**:
+   *   - In-flight tasks are signalled via the shared abort controller so
+   *     drivers / runner.ts can cancel cooperatively (returning
+   *     `failureKind: 'timeout'`).
+   *   - Still-waiting tasks across every track are immediately marked
+   *     skipped so the run completes promptly.
+   * The terminal lock in setTaskStatus prevents any later re-transition
+   * should a completed running task try to overwrite the skipped state.
+   */
+  function applyStopAll(_failedTrackId: string): void {
+    pipelineAborted = true;
+    abortController.abort();
     for (const [id, state] of states) {
-      // Only skip tasks that are still waiting — tasks already running must be
-      // allowed to complete naturally so their process is not orphaned and their
-      // final status (success/failed/timeout) is recorded correctly.
-      // The terminal lock in setTaskStatus prevents any later re-transition
-      // should a completed running task try to overwrite the skipped state.
-      if (state.trackConfig.id === trackId && state.status === 'waiting') {
+      if (state.status === 'waiting') {
         state.finishedAt = nowISO();
         setTaskStatus(id, 'skipped');
       }
@@ -600,7 +609,17 @@ export async function runPipeline(
           `prompt: ${originalLen} chars (final: ${prompt.length} chars)`);
         log.quiet(`--- prompt (final) ---\n${clip(prompt)}\n--- end prompt ---`, taskId);
-        const enrichedTask: TaskConfig = { ...task, prompt };
+        // H1: hand the driver a continue_from that has already been
+        // qualified by dag.ts. Without this, drivers like codex/opencode/
+        // claude-code do `outputMap.get(task.continue_from)` directly with
+        // the user's raw (possibly bare) string, which races whenever two
+        // tracks share a task name. dag.ts has the only authoritative
+        // resolver, so we use its precomputed answer here.
+        const enrichedTask: TaskConfig = {
+          ...task,
+          prompt,
+          continue_from: node.resolvedContinueFrom ?? task.continue_from,
+        };
         const driverCtx: DriverContext = {
           sessionMap, outputMap, normalizedMap, workDir: task.cwd ?? workDir,
         };
@@ -627,14 +646,30 @@ export async function runPipeline(
         await mkdir(dirname(outPath), { recursive: true });
         await Bun.write(outPath, result.stdout);
         result = { ...result, outputPath: outPath };
+        // H1: only write the fully-qualified taskId. The previous "also store
+        // bare id when not yet present" trick produced non-deterministic
+        // continue_from lookups when two tracks shared a task name —
+        // whichever finished first won the bare key. dag.ts now resolves
+        // continue_from to a qualified id (DagNode.resolvedContinueFrom),
+        // and the enrichedTask handed to drivers carries that qualified
+        // version, so bare keys are no longer needed.
         outputMap.set(taskId, outPath);
-        const bareId = taskId.includes('.') ? taskId.split('.').pop()! : taskId;
-        if (!outputMap.has(bareId)) outputMap.set(bareId, outPath);
       }
       // 6. Determine terminal status (without emitting yet — result must be complete first)
+      // H2: branch on failureKind so spawn errors no longer masquerade as
+      // timeouts. Old runners that don't set failureKind still work — we
+      // fall back to the historical `exitCode === -1 → timeout` heuristic so
+      // pre-existing third-party drivers don't regress.
       let terminalStatus: TaskStatus;
-      if (result.exitCode === -1) {
+      const kind = result.failureKind;
+      if (kind === 'timeout') {
+        terminalStatus = 'timeout';
+      } else if (kind === 'spawn_error') {
+        terminalStatus = 'failed';
+      } else if (kind === undefined && result.exitCode === -1) {
+        // Legacy path: pre-H2 driver returned -1 with no kind. Treat as
+        // timeout for backward compatibility (the previous behaviour).
         terminalStatus = 'timeout';
       } else if (result.exitCode !== 0) {
         terminalStatus = 'failed';
@@ -662,9 +697,8 @@ export async function runPipeline(
           ? result.normalizedOutput.slice(0, MAX_NORMALIZED_BYTES) +
             `\n[…clipped at ${MAX_NORMALIZED_BYTES} bytes]`
           : result.normalizedOutput;
+        // H1: qualified-only key (see comment near outputMap above).
         normalizedMap.set(taskId, clipped);
-        const bareId = taskId.includes('.') ? taskId.split('.').pop()! : taskId;
-        if (!normalizedMap.has(bareId)) normalizedMap.set(bareId, clipped);
       }
       if (result.stderr) {
@@ -674,9 +708,8 @@ export async function runPipeline(
       }
       if (result.sessionId) {
+        // H1: qualified-only key (see comment near outputMap above).
         sessionMap.set(taskId, result.sessionId);
-        const bareId = taskId.includes('.') ? taskId.split('.').pop()! : taskId;
-        if (!sessionMap.has(bareId)) sessionMap.set(bareId, result.sessionId);
       }
       // Set result and finishedAt before emitting terminal status so listeners see complete state
@@ -729,6 +762,10 @@ export async function runPipeline(
         stderr: errMsg,
         outputPath: null, stderrPath: null, durationMs: 0,
         sessionId: null, normalizedOutput: null,
+        // H2: Engine-level pre-execution errors (driver throw, middleware
+        // throw, getHandler 404) classify as spawn_error — the process never
+        // ran, so calling them "timeout" was actively misleading.
+        failureKind: 'spawn_error',
       };
       state.finishedAt = nowISO();
       setTaskStatus(taskId, 'failed');

package/src/runner.ts CHANGED Viewed

@@ -101,7 +101,11 @@ function resolveWindowsExe(
   return args;
 }
-/** Build a "failed before spawn" result. */
+/**
+ * H2: Build a "failed before spawn" result. Tagged as 'spawn_error' so the
+ * engine can show a useful classification ("driver tried to launch X but
+ * the binary wasn't found") rather than the misleading "timeout".
+ */
 function failResult(stderr: string, durationMs: number): TaskResult {
   return {
     exitCode: -1,
@@ -112,6 +116,7 @@ function failResult(stderr: string, durationMs: number): TaskResult {
     durationMs,
     sessionId: null,
     normalizedOutput: null,
+    failureKind: 'spawn_error',
   };
 }
@@ -289,6 +294,9 @@ export async function runSpawn(
       durationMs,
       sessionId: null,
       normalizedOutput: null,
+      // H2: explicit kind so engine.ts no longer has to guess "is exitCode -1
+      // a timeout or a spawn-failure?" Both used to share the same code.
+      failureKind: 'timeout',
     };
   }
@@ -299,6 +307,12 @@ export async function runSpawn(
   // value doesn't poison sessionMap/normalizedMap downstream.
   let sessionId: string | null = null;
   let normalizedOutput: string | null = null;
+  // M12: drivers can flip a task's terminal status to failed even when the
+  // process exited 0 (e.g. opencode returning `{type:"error"}` JSON). When
+  // the flag is set, we synthesize a non-zero exit code and append a reason
+  // line to stderr so engine.ts marks the task as failed with a useful
+  // explanation instead of letting the error JSON pass through as success.
+  let forcedFailureMessage: string | null = null;
   if (driver?.parseResult) {
     try {
       const meta = driver.parseResult(stdout, stderr);
@@ -309,6 +323,11 @@ export async function runSpawn(
         if (typeof meta.normalizedOutput === 'string') {
           normalizedOutput = meta.normalizedOutput;
         }
+        if (meta.forceFailure === true) {
+          forcedFailureMessage = typeof meta.forceFailureReason === 'string'
+            ? meta.forceFailureReason
+            : 'Driver flagged task as failed (forceFailure)';
+        }
       }
     } catch (err) {
       // The spawn itself succeeded; only metadata extraction failed.
@@ -325,10 +344,32 @@ export async function runSpawn(
         durationMs,
         sessionId: null,
         normalizedOutput: null,
+        // H2: parseResult threw — the spawn itself succeeded, so the failure
+        // is "the process exited but the driver couldn't parse it". Surface
+        // that as exit_nonzero (when the actual exit was non-zero) or null
+        // (when the underlying exit was 0 — UI will still mark it failed via
+        // engine.ts because the result is incomplete).
+        failureKind: exitCode === 0 ? null : 'exit_nonzero',
       };
     }
   }
+  // M12: when the driver forced a failure, treat as exit_nonzero with the
+  // reason appended to stderr so users see WHY the task failed without
+  // having to dig through driver-specific JSON.
+  if (forcedFailureMessage !== null) {
+    return {
+      exitCode: exitCode === 0 ? 1 : exitCode,
+      stdout,
+      stderr: stderr + (stderr.endsWith('\n') ? '' : '\n') + `[driver] ${forcedFailureMessage}`,
+      outputPath: null,
+      stderrPath: null,
+      durationMs,
+      sessionId,
+      normalizedOutput,
+      failureKind: 'exit_nonzero',
+    };
+  }
   return {
     exitCode,
     stdout,
@@ -338,6 +379,9 @@ export async function runSpawn(
     durationMs,
     sessionId,
     normalizedOutput,
+    // H2: success vs nonzero exit. Engine uses this to short-circuit the
+    // timeout branch even if a third-party driver returns -1 by mistake.
+    failureKind: exitCode === 0 ? null : 'exit_nonzero',
   };
 }

package/src/schema.ts CHANGED Viewed

@@ -41,14 +41,16 @@ function validateRawTask(task: RawTaskConfig, trackId: string): void {
   if (!task.id) throw new Error(`track "${trackId}": task.id is required`);
   if (task.use) return; // template usage, validated later
-  const hasPrompt = typeof task.prompt === 'string' && task.prompt.length > 0;
-  const hasCommand = typeof task.command === 'string' && task.command.length > 0;
-  if (!hasPrompt && !hasCommand) {
+  const hasPromptKey = typeof task.prompt === 'string';
+  const hasCommandKey = typeof task.command === 'string';
+  if (!hasPromptKey && !hasCommandKey) {
     throw new Error(`task "${task.id}": must have either "prompt" or "command"`);
   }
-  if (hasPrompt && hasCommand) {
+  if (hasPromptKey && hasCommandKey) {
     throw new Error(`task "${task.id}": cannot have both "prompt" and "command"`);
   }
+  // Empty-content tasks (e.g. `prompt: ''`) are allowed at parse time and
+  // flagged as non-fatal validation errors by validate-raw.ts.
 }
 // ═══ Template Expansion ═══

package/src/validate-raw.ts CHANGED Viewed

@@ -16,10 +16,21 @@ function isValidDuration(input: string): boolean {
 const VALID_ON_FAILURE = new Set(['skip_downstream', 'stop_all', 'ignore']);
 const VALID_MODEL_TIERS = new Set(['low', 'medium', 'high']);
+export type ValidationSeverity = 'error' | 'warning';
 export interface ValidationError {
   /** JSONPath-style location, e.g. "tracks[0].tasks[1].prompt" */
   path: string;
   message: string;
+  /**
+   * H8: not all "errors" are equally fatal. The DAG runtime is happy to
+   * insert implicit `continue_from → depends_on` ordering, so the matching
+   * validate-raw check is a *style* nit, not a hard failure. Severity lets
+   * the editor render it as a soft warning instead of blocking save / run.
+   * Existing call sites that don't read this field still treat every entry
+   * as fatal — defaulting `severity` to undefined preserves that behaviour.
+   */
+  severity?: ValidationSeverity;
 }
 /**
@@ -111,19 +122,30 @@ export function validateRaw(config: RawPipelineConfig): ValidationError[] {
       // Template-based tasks: skip prompt/command checks (params validated at runtime)
       if (task.use) continue;
-      const hasPrompt = typeof task.prompt === 'string' && task.prompt.trim().length > 0;
-      const hasCommand = typeof task.command === 'string' && task.command.trim().length > 0;
+      const hasPromptKey = typeof task.prompt === 'string';
+      const hasCommandKey = typeof task.command === 'string';
+      const promptEmpty = hasPromptKey && task.prompt!.trim().length === 0;
+      const commandEmpty = hasCommandKey && task.command!.trim().length === 0;
-      if (!hasPrompt && !hasCommand) {
+      if (hasPromptKey && hasCommandKey) {
+        errors.push({
+          path: taskPath,
+          message: `Task "${task.id}": cannot have both "prompt" and "command"`,
+        });
+      } else if (!hasPromptKey && !hasCommandKey) {
         errors.push({
           path: taskPath,
           message: `Task "${task.id}": must have "prompt" or "command"`,
         });
-      }
-      if (hasPrompt && hasCommand) {
+      } else if (promptEmpty) {
         errors.push({
           path: taskPath,
-          message: `Task "${task.id}": cannot have both "prompt" and "command"`,
+          message: `Task "${task.id}": prompt content cannot be empty`,
+        });
+      } else if (commandEmpty) {
+        errors.push({
+          path: taskPath,
+          message: `Task "${task.id}": command content cannot be empty`,
         });
       }
@@ -169,9 +191,15 @@ export function validateRaw(config: RawPipelineConfig): ValidationError[] {
         } else if (!task.depends_on || !task.depends_on.some(dep =>
           resolveDepRef(dep, track.id, allQualified, bareToQualified) === resolved
         )) {
+          // H8: demote to a warning. dag.ts/buildDag inserts continue_from
+          // as an implicit dependency at runtime, so the pipeline runs fine
+          // without the explicit listing. Treat as a style hint rather than
+          // blocking save / run, otherwise we frighten users with a red
+          // "Configuration error" for code that would have run successfully.
           errors.push({
             path: `${taskPath}.continue_from`,
-            message: `Task "${task.id}": continue_from "${task.continue_from}" should also be listed in depends_on to ensure ordering`,
+            message: `Task "${task.id}": continue_from "${task.continue_from}" should also be listed in depends_on for clarity (the runtime will add it implicitly).`,
+            severity: 'warning',
           });
         }
       }