@tagma/sdk 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +74 -6
  2. package/dist/engine.d.ts.map +1 -1
  3. package/dist/engine.js +194 -21
  4. package/dist/engine.js.map +1 -1
  5. package/dist/pipeline-runner.d.ts.map +1 -1
  6. package/dist/pipeline-runner.js +3 -0
  7. package/dist/pipeline-runner.js.map +1 -1
  8. package/dist/ports.d.ts +118 -0
  9. package/dist/ports.d.ts.map +1 -0
  10. package/dist/ports.js +365 -0
  11. package/dist/ports.js.map +1 -0
  12. package/dist/prompt-doc.d.ts +35 -1
  13. package/dist/prompt-doc.d.ts.map +1 -1
  14. package/dist/prompt-doc.js +110 -0
  15. package/dist/prompt-doc.js.map +1 -1
  16. package/dist/runner.d.ts +17 -0
  17. package/dist/runner.d.ts.map +1 -1
  18. package/dist/runner.js +171 -8
  19. package/dist/runner.js.map +1 -1
  20. package/dist/schema.d.ts.map +1 -1
  21. package/dist/schema.js +8 -0
  22. package/dist/schema.js.map +1 -1
  23. package/dist/sdk.d.ts +3 -1
  24. package/dist/sdk.d.ts.map +1 -1
  25. package/dist/sdk.js +5 -1
  26. package/dist/sdk.js.map +1 -1
  27. package/dist/validate-raw.d.ts.map +1 -1
  28. package/dist/validate-raw.js +141 -0
  29. package/dist/validate-raw.js.map +1 -1
  30. package/package.json +2 -7
  31. package/src/dag.test.ts +56 -0
  32. package/src/engine-ports.test.ts +404 -0
  33. package/src/engine.ts +231 -24
  34. package/src/pipeline-runner.ts +3 -0
  35. package/src/ports.test.ts +301 -0
  36. package/src/ports.ts +442 -0
  37. package/src/prompt-doc.test.ts +174 -0
  38. package/src/prompt-doc.ts +121 -1
  39. package/src/runner.test.ts +142 -0
  40. package/src/runner.ts +198 -8
  41. package/src/schema-ports.test.ts +236 -0
  42. package/src/schema.ts +8 -0
  43. package/src/sdk.ts +14 -0
  44. package/src/validate-raw-ports.test.ts +198 -0
  45. package/src/validate-raw.ts +155 -1
  46. package/dist/plugin-registry.test.d.ts +0 -2
  47. package/dist/plugin-registry.test.d.ts.map +0 -1
  48. package/dist/plugin-registry.test.js +0 -188
  49. package/dist/plugin-registry.test.js.map +0 -1
  50. package/dist/schema.test.d.ts +0 -2
  51. package/dist/schema.test.d.ts.map +0 -1
  52. package/dist/schema.test.js +0 -94
  53. package/dist/schema.test.js.map +0 -1
  54. package/dist/task-ref.test.d.ts +0 -2
  55. package/dist/task-ref.test.d.ts.map +0 -1
  56. package/dist/task-ref.test.js +0 -364
  57. package/dist/task-ref.test.js.map +0 -1
package/src/engine.ts CHANGED
@@ -23,7 +23,14 @@ import { buildDag, type Dag } from './dag';
23
23
  import { defaultRegistry, type PluginRegistry } from './registry';
24
24
  import { runSpawn, runCommand } from './runner';
25
25
  import { parseDuration, nowISO, generateRunId } from './utils';
26
- import { promptDocumentFromString, serializePromptDocument } from './prompt-doc';
26
+ import {
27
+ promptDocumentFromString,
28
+ serializePromptDocument,
29
+ prependContext,
30
+ renderInputsBlock,
31
+ renderOutputSchemaBlock,
32
+ } from './prompt-doc';
33
+ import { extractTaskOutputs, resolveTaskInputs, substituteInputs } from './ports';
27
34
  import {
28
35
  executeHook,
29
36
  buildPipelineStartContext,
@@ -184,12 +191,19 @@ function toRunTaskState(
184
191
  exitCode: result?.exitCode ?? null,
185
192
  stdout: result?.stdout ?? '',
186
193
  stderr: result?.stderr ?? '',
194
+ stdoutPath: result?.stdoutPath ?? null,
187
195
  stderrPath: result?.stderrPath ?? null,
196
+ stdoutBytes: result?.stdoutBytes ?? null,
197
+ stderrBytes: result?.stderrBytes ?? null,
188
198
  sessionId: result?.sessionId ?? null,
189
199
  normalizedOutput: result?.normalizedOutput ?? null,
190
200
  resolvedDriver: cfg.driver ?? null,
191
201
  resolvedModel: cfg.model ?? null,
192
202
  resolvedPermissions: (cfg.permissions as Permissions | undefined) ?? null,
203
+ // Ports not yet wired through the engine's event surface. Null placeholder
204
+ // keeps the wire type honest until the ports extraction pass lands.
205
+ outputs: result?.outputs ?? null,
206
+ inputs: null,
193
207
  logs: [],
194
208
  totalLogCount: 0,
195
209
  };
@@ -368,6 +382,17 @@ export async function runPipeline(
368
382
 
369
383
  const sessionMap = new Map<string, string>();
370
384
  const normalizedMap = new Map<string, string>();
385
+ // Extracted port outputs keyed by fully-qualified task id. Populated
386
+ // after a task succeeds when its `ports.outputs` is declared; read by
387
+ // downstream tasks via `resolveTaskInputs` to assemble their inputs.
388
+ // Kept separate from normalizedMap so the continue_from text handoff
389
+ // and the typed-port data handoff don't pollute each other — they
390
+ // solve different problems and have different lifetimes.
391
+ const outputValuesMap = new Map<string, Readonly<Record<string, unknown>>>();
392
+ // Resolved port inputs keyed by fully-qualified task id. Written once,
393
+ // just before a task runs, so every subsequent task_update event can
394
+ // echo them to the UI without re-resolving.
395
+ const resolvedInputsMap = new Map<string, Readonly<Record<string, unknown>>>();
371
396
 
372
397
  // Pipeline timeout + abort reason tracking.
373
398
  //
@@ -469,9 +494,14 @@ export async function runPipeline(
469
494
  exitCode: result?.exitCode,
470
495
  stdout: result?.stdout,
471
496
  stderr: result?.stderr,
497
+ stdoutPath: result?.stdoutPath ?? null,
472
498
  stderrPath: result?.stderrPath ?? null,
499
+ stdoutBytes: result?.stdoutBytes ?? null,
500
+ stderrBytes: result?.stderrBytes ?? null,
473
501
  sessionId: result?.sessionId ?? null,
474
502
  normalizedOutput: result?.normalizedOutput ?? null,
503
+ inputs: resolvedInputsMap.get(taskId) ?? null,
504
+ outputs: outputValuesMap.get(taskId) ?? null,
475
505
  resolvedDriver: cfg.driver ?? null,
476
506
  resolvedModel: cfg.model ?? null,
477
507
  resolvedPermissions: (cfg.permissions as Permissions | undefined) ?? null,
@@ -588,19 +618,26 @@ export async function runPipeline(
588
618
  );
589
619
  try {
590
620
  const triggerPlugin = registry.getHandler<TriggerPlugin>('triggers', task.trigger.type);
591
- // R6: race the plugin's watch() against the pipeline's abort signal.
592
- // Third-party triggers may forget to wire up ctx.signal — without
593
- // this race, an aborted pipeline would hang forever waiting for the
594
- // plugin's watch promise to resolve. The race resolves on whichever
595
- // path settles first, and the cleanup paths in finally never run on
596
- // the orphaned plugin promise (it's allowed to leak a watcher; the
597
- // pipeline is being torn down anyway).
621
+ // R6: race the plugin's watch() against the pipeline's abort signal
622
+ // AND the task-level timeout. Third-party triggers may forget to
623
+ // wire up ctx.signal without the abort race, an aborted pipeline
624
+ // would hang forever waiting for the plugin's watch promise to
625
+ // resolve. And without the timeout race, a buggy watch() that never
626
+ // settles would ignore the user's `task.timeout` (which the spawn
627
+ // path at step 4 already honours) — a task could wedge the whole
628
+ // pipeline until pipeline-level timeout fires (or forever, if none
629
+ // is set). Honouring task.timeout here makes the two stages
630
+ // symmetric. The cleanup paths in finally never run on the orphaned
631
+ // plugin promise (it's allowed to leak a watcher; the pipeline is
632
+ // being torn down anyway).
633
+ const triggerTimeoutMs = task.timeout ? parseDuration(task.timeout) : 0;
598
634
  await new Promise<unknown>((resolve, reject) => {
599
635
  let settled = false;
636
+ let timer: ReturnType<typeof setTimeout> | null = null;
600
637
  const onAbort = () => {
601
638
  if (settled) return;
602
639
  settled = true;
603
- abortController.signal.removeEventListener('abort', onAbort);
640
+ if (timer !== null) clearTimeout(timer);
604
641
  reject(new Error('Pipeline aborted'));
605
642
  };
606
643
  if (abortController.signal.aborted) {
@@ -608,6 +645,18 @@ export async function runPipeline(
608
645
  return;
609
646
  }
610
647
  abortController.signal.addEventListener('abort', onAbort, { once: true });
648
+ if (triggerTimeoutMs > 0) {
649
+ timer = setTimeout(() => {
650
+ if (settled) return;
651
+ settled = true;
652
+ abortController.signal.removeEventListener('abort', onAbort);
653
+ reject(
654
+ new TriggerTimeoutError(
655
+ `Trigger "${task.trigger!.type}" did not settle within ${task.timeout} (task-level timeout)`,
656
+ ),
657
+ );
658
+ }, triggerTimeoutMs);
659
+ }
611
660
  triggerPlugin
612
661
  .watch(task.trigger as Record<string, unknown>, {
613
662
  taskId: node.taskId,
@@ -620,12 +669,14 @@ export async function runPipeline(
620
669
  (v) => {
621
670
  if (settled) return;
622
671
  settled = true;
672
+ if (timer !== null) clearTimeout(timer);
623
673
  abortController.signal.removeEventListener('abort', onAbort);
624
674
  resolve(v);
625
675
  },
626
676
  (e) => {
627
677
  if (settled) return;
628
678
  settled = true;
679
+ if (timer !== null) clearTimeout(timer);
629
680
  abortController.signal.removeEventListener('abort', onAbort);
630
681
  reject(e);
631
682
  },
@@ -694,6 +745,60 @@ export async function runPipeline(
694
745
  return;
695
746
  }
696
747
 
748
+ // 3.5. Resolve port inputs from upstream outputs. This is the last
749
+ // gate before execution: missing-required inputs block the task
750
+ // without ever spawning a process, so the caller sees a clear
751
+ // "blocked: missing input X" rather than a cryptic runtime error
752
+ // from a command that expanded a placeholder to the empty string.
753
+ // Resolution runs even for tasks that declare no ports — the call
754
+ // is cheap and returns `{kind: 'ready', inputs: {}}` in that case,
755
+ // which downstream code handles uniformly.
756
+ const inputResolution = resolveTaskInputs(task, outputValuesMap, node.dependsOn);
757
+ if (inputResolution.kind === 'blocked') {
758
+ log.error(
759
+ `[task:${taskId}]`,
760
+ `blocked — cannot resolve port inputs:\n${inputResolution.reason}`,
761
+ );
762
+ state.result = {
763
+ exitCode: -1,
764
+ stdout: '',
765
+ stderr: `[engine] port input resolution failed:\n${inputResolution.reason}`,
766
+ stdoutPath: null,
767
+ stderrPath: null,
768
+ durationMs: 0,
769
+ sessionId: null,
770
+ normalizedOutput: null,
771
+ failureKind: 'spawn_error',
772
+ outputs: null,
773
+ };
774
+ state.finishedAt = nowISO();
775
+ setTaskStatus(taskId, 'blocked');
776
+ try {
777
+ await fireHook(taskId, 'task_failure');
778
+ } catch (hookErr) {
779
+ log.error(
780
+ `[task:${taskId}]`,
781
+ `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
782
+ );
783
+ }
784
+ if (getOnFailure(taskId) === 'stop_all') applyStopAll(node.track.id);
785
+ return;
786
+ }
787
+ const resolvedInputs = inputResolution.inputs;
788
+ resolvedInputsMap.set(taskId, resolvedInputs);
789
+ if (inputResolution.missingOptional.length > 0) {
790
+ log.debug(
791
+ `[task:${taskId}]`,
792
+ `optional inputs unresolved (empty in placeholders): ${inputResolution.missingOptional.join(', ')}`,
793
+ );
794
+ }
795
+ if (task.ports?.inputs && task.ports.inputs.length > 0) {
796
+ log.debug(
797
+ `[task:${taskId}]`,
798
+ `resolved inputs: ${JSON.stringify(resolvedInputs)}`,
799
+ );
800
+ }
801
+
697
802
  // 4. Mark running — set startedAt before emitting so subscribers see a
698
803
  // complete task_update (startedAt non-null) on the status transition.
699
804
  state.startedAt = nowISO();
@@ -724,18 +829,73 @@ export async function runPipeline(
724
829
  let result: TaskResult;
725
830
  const timeoutMs = task.timeout ? parseDuration(task.timeout) : undefined;
726
831
 
727
- const runOpts = { timeoutMs, signal: abortController.signal };
832
+ // Stream child stdout/stderr directly to disk in the logger's run dir
833
+ // and keep only a bounded tail in the returned TaskResult. Filenames
834
+ // mirror the existing `.stderr` naming — dots in task ids are replaced
835
+ // so hierarchical ids (e.g. `track1.task2`) map cleanly to a flat dir.
836
+ const fsSafeTaskId = taskId.replace(/\./g, '_');
837
+ const stdoutPath = resolve(log.dir, `${fsSafeTaskId}.stdout`);
838
+ const stderrPath = resolve(log.dir, `${fsSafeTaskId}.stderr`);
839
+ const runOpts = {
840
+ timeoutMs,
841
+ signal: abortController.signal,
842
+ stdoutPath,
843
+ stderrPath,
844
+ };
728
845
 
729
846
  if (task.command) {
730
- log.debug(`[task:${taskId}]`, `command: ${task.command}`);
731
- result = await runCommand(task.command, task.cwd ?? workDir, runOpts);
847
+ // Substitute `{{inputs.X}}` placeholders into the command
848
+ // string. Tasks with no declared inputs always produce the same
849
+ // string back (no placeholders to match). Unresolved references
850
+ // render empty — validate-raw flags undeclared references as
851
+ // errors, so the only way to land here with an unresolved is an
852
+ // optional input that had no upstream producer and no default,
853
+ // which we surface in the log.
854
+ const { text: expandedCommand, unresolved } = substituteInputs(
855
+ task.command,
856
+ resolvedInputs,
857
+ );
858
+ if (unresolved.length > 0) {
859
+ log.debug(
860
+ `[task:${taskId}]`,
861
+ `command placeholders rendered empty: ${unresolved.join(', ')}`,
862
+ );
863
+ }
864
+ log.debug(`[task:${taskId}]`, `command: ${expandedCommand}`);
865
+ result = await runCommand(expandedCommand, task.cwd ?? workDir, runOpts);
732
866
  } else {
733
867
  // AI task: apply middleware chain against a structured PromptDocument.
734
868
  const driverName = task.driver ?? track.driver ?? config.driver ?? 'opencode';
735
869
  const driver = registry.getHandler<DriverPlugin>('drivers', driverName);
736
870
 
737
- const originalLen = task.prompt!.length;
738
- let doc: PromptDocument = promptDocumentFromString(task.prompt!);
871
+ // Substitute placeholders in the user-authored prompt before
872
+ // wrapping into a PromptDocument so middlewares see the
873
+ // already-resolved task text.
874
+ const { text: expandedPrompt, unresolved } = substituteInputs(
875
+ task.prompt!,
876
+ resolvedInputs,
877
+ );
878
+ if (unresolved.length > 0) {
879
+ log.debug(
880
+ `[task:${taskId}]`,
881
+ `prompt placeholders rendered empty: ${unresolved.join(', ')}`,
882
+ );
883
+ }
884
+ const originalLen = expandedPrompt.length;
885
+ let doc: PromptDocument = promptDocumentFromString(expandedPrompt);
886
+ // Prepend port-related context blocks so the model sees them
887
+ // before any middleware-added retrieval / memory blocks. Order
888
+ // matters: [Output Format] first (sets the deliverable), then
889
+ // [Inputs] (the concrete data to operate on). Empty blocks are
890
+ // filtered out — tasks without ports get no extra blocks at all.
891
+ const outputFormatBlock = renderOutputSchemaBlock(task.ports?.outputs);
892
+ if (outputFormatBlock) {
893
+ doc = prependContext(doc, outputFormatBlock);
894
+ }
895
+ const inputsBlock = renderInputsBlock(task.ports?.inputs, resolvedInputs);
896
+ if (inputsBlock) {
897
+ doc = prependContext(doc, inputsBlock);
898
+ }
739
899
  const mws = task.middlewares !== undefined ? task.middlewares : track.middlewares;
740
900
  if (mws && mws.length > 0) {
741
901
  log.debug(
@@ -846,6 +1006,13 @@ export async function runPipeline(
846
1006
  // contexts and task). Drivers that read task.prompt see the
847
1007
  // default serialization and need no changes.
848
1008
  promptDoc: doc,
1009
+ // Ports feature: resolved input values keyed by port name,
1010
+ // already coerced to the declared port type. Drivers that
1011
+ // need to re-substitute placeholders inside a custom envelope
1012
+ // can read this and call `substituteInputs`; most drivers can
1013
+ // ignore it because the engine has already expanded
1014
+ // `{{inputs.X}}` into `task.prompt` upstream.
1015
+ inputs: resolvedInputs,
849
1016
  };
850
1017
  const spec = await driver.buildCommand(enrichedTask, track, driverCtx);
851
1018
  log.debug(`[task:${taskId}]`, `driver=${driverName}`);
@@ -897,6 +1064,41 @@ export async function runPipeline(
897
1064
  terminalStatus = 'success';
898
1065
  }
899
1066
 
1067
+ // Extract declared port outputs from the task's output stream.
1068
+ // Only meaningful on success — a failed task's output is whatever
1069
+ // the child happened to emit before exiting, and downstream tasks
1070
+ // shouldn't receive partial data. `extractTaskOutputs` is a no-op
1071
+ // when the task has no declared outputs, so this is free for
1072
+ // pre-ports tasks. Diagnostics are appended to stderr so users
1073
+ // see *why* a downstream input is missing without having to dig
1074
+ // through driver-specific logs.
1075
+ let extractedOutputs: Readonly<Record<string, unknown>> | null = null;
1076
+ if (terminalStatus === 'success') {
1077
+ const extraction = extractTaskOutputs(
1078
+ task.ports,
1079
+ result.stdout,
1080
+ result.normalizedOutput,
1081
+ );
1082
+ if (task.ports?.outputs && task.ports.outputs.length > 0) {
1083
+ extractedOutputs = extraction.outputs;
1084
+ outputValuesMap.set(taskId, extraction.outputs);
1085
+ log.debug(
1086
+ `[task:${taskId}]`,
1087
+ `extracted outputs: ${JSON.stringify(extraction.outputs)}`,
1088
+ );
1089
+ if (extraction.diagnostic) {
1090
+ log.error(`[task:${taskId}]`, extraction.diagnostic);
1091
+ const note = `\n[engine] ${extraction.diagnostic}`;
1092
+ result = { ...result, stderr: result.stderr + note };
1093
+ }
1094
+ }
1095
+ }
1096
+ // Attach outputs to the result (null when task has no declared
1097
+ // outputs or extraction failed entirely). Consumers of TaskResult
1098
+ // — hooks, wire events, test assertions — all go through this
1099
+ // one field rather than re-running extraction.
1100
+ result = { ...result, outputs: extractedOutputs };
1101
+
900
1102
  // Store normalized text separately (in-memory) for continue_from handoff.
901
1103
  // R15: clip oversized values so a runaway parseResult can't accumulate
902
1104
  // hundreds of MB across tasks.
@@ -909,11 +1111,9 @@ export async function runPipeline(
909
1111
  normalizedMap.set(taskId, clipped);
910
1112
  }
911
1113
 
912
- if (result.stderr) {
913
- const stderrPath = resolve(log.dir, `${taskId.replace(/\./g, '_')}.stderr`);
914
- await Bun.write(stderrPath, result.stderr);
915
- result = { ...result, stderrPath };
916
- }
1114
+ // Note: stderr is already persisted by runner.ts as it streams; the
1115
+ // old "write full string after the fact" block is gone — that's what
1116
+ // the streaming rewrite fixed (unbounded in-memory buffering).
917
1117
 
918
1118
  if (result.sessionId) {
919
1119
  // H1: qualified-only key.
@@ -940,14 +1140,18 @@ export async function runPipeline(
940
1140
  }
941
1141
  }
942
1142
 
943
- // File-only: full stdout/stderr dump (clipped) + extracted metadata
944
- log.debug(
945
- `[task:${taskId}]`,
946
- `stdout: ${result.stdout.length} chars, stderr: ${result.stderr.length} chars`,
947
- );
1143
+ // File-only: byte counts (prefer full totals from the runner over the
1144
+ // bounded tail length so oversized outputs show their real size) +
1145
+ // paths to the on-disk full copies.
1146
+ const stdoutSize = result.stdoutBytes ?? result.stdout.length;
1147
+ const stderrSize = result.stderrBytes ?? result.stderr.length;
1148
+ log.debug(`[task:${taskId}]`, `stdout: ${stdoutSize} bytes, stderr: ${stderrSize} bytes`);
948
1149
  if (result.sessionId) {
949
1150
  log.debug(`[task:${taskId}]`, `sessionId: ${result.sessionId}`);
950
1151
  }
1152
+ if (result.stdoutPath) {
1153
+ log.debug(`[task:${taskId}]`, `wrote stdout: ${result.stdoutPath}`);
1154
+ }
951
1155
  if (result.stderrPath) {
952
1156
  log.debug(`[task:${taskId}]`, `wrote stderr: ${result.stderrPath}`);
953
1157
  }
@@ -976,7 +1180,10 @@ export async function runPipeline(
976
1180
  exitCode: -1,
977
1181
  stdout: '',
978
1182
  stderr: errMsg,
1183
+ stdoutPath: null,
979
1184
  stderrPath: null,
1185
+ stdoutBytes: 0,
1186
+ stderrBytes: errMsg.length,
980
1187
  durationMs: 0,
981
1188
  sessionId: null,
982
1189
  normalizedOutput: null,
@@ -126,7 +126,10 @@ export class PipelineRunner {
126
126
  exitCode: pick(event.exitCode, prev.exitCode),
127
127
  stdout: pick(event.stdout, prev.stdout),
128
128
  stderr: pick(event.stderr, prev.stderr),
129
+ stdoutPath: pick(event.stdoutPath, prev.stdoutPath),
129
130
  stderrPath: pick(event.stderrPath, prev.stderrPath),
131
+ stdoutBytes: pick(event.stdoutBytes, prev.stdoutBytes),
132
+ stderrBytes: pick(event.stderrBytes, prev.stderrBytes),
130
133
  sessionId: pick(event.sessionId, prev.sessionId),
131
134
  normalizedOutput: pick(event.normalizedOutput, prev.normalizedOutput),
132
135
  resolvedDriver: pick(event.resolvedDriver, prev.resolvedDriver),