@tagma/sdk 0.6.7 → 0.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,499 @@
1
+ import { describe, expect, test } from 'bun:test';
2
+ import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
3
+ import { tmpdir } from 'node:os';
4
+ import { join } from 'node:path';
5
+ import { PluginRegistry } from './registry';
6
+ import { bootstrapBuiltins } from './bootstrap';
7
+ import { runPipeline, type RunEventPayload } from './engine';
8
+ import type { DriverPlugin, PipelineConfig, TaskConfig, TaskPorts, TaskStatus } from './types';
9
+
10
+ // Mixed-mode port tests. Prompt Tasks do NOT declare ports — their I/O
11
+ // contract is inferred from direct-neighbor Command Tasks. The three
12
+ // cross-type boundaries the design has to cover:
13
+ //
14
+ // prompt → command (AI task produces outputs inferred from the
15
+ // downstream Command's declared inputs)
16
+ // command → prompt (AI task consumes the upstream Command's
17
+ // declared outputs via substitution + [Inputs])
18
+ // prompt → prompt (no structured port flow — free text only,
19
+ // carried by continue_from / normalizedOutput)
20
+ //
21
+ // A mock AI driver stands in for a real LLM. It records the engine's
22
+ // serialized prompt to a sidecar file and emits a per-task JSON
23
+ // response on the final stdout line, simulating the `[Output Format]`
24
+ // contract. Asserting on the sidecar record lets each test verify the
25
+ // engine prepended the right `[Inputs]` / `[Output Format]` blocks
26
+ // and expanded `{{inputs.X}}` placeholders inside the prompt.
27
+
28
+ const PERMS = { read: true, write: false, execute: false };
29
+
30
+ function makeDir(): string {
31
+ return mkdtempSync(join(tmpdir(), 'tagma-ports-mixed-'));
32
+ }
33
+
34
+ function writeEmitScript(dir: string, name: string, payload: Record<string, unknown>): string {
35
+ const path = join(dir, `${name}.js`);
36
+ const src = `process.stdout.write(${JSON.stringify(JSON.stringify(payload))});\nprocess.stdout.write('\\n');\n`;
37
+ writeFileSync(path, src);
38
+ return path;
39
+ }
40
+
41
+ function writeEchoArgsScript(dir: string, name: string): string {
42
+ const path = join(dir, `${name}.js`);
43
+ const src = `process.stdout.write(process.argv.slice(2).join('|'));\nprocess.stdout.write('\\n');\n`;
44
+ writeFileSync(path, src);
45
+ return path;
46
+ }
47
+
48
+ /**
49
+ * Mock-driver spawn script: read stdin (the serialized prompt), write
50
+ * it to a sidecar record file, echo it to stdout, then append the
51
+ * `MOCK_RESPONSE` env value as the final line — which extractTaskOutputs
52
+ * picks up as the model's JSON output.
53
+ */
54
+ function writeMockDriverScript(dir: string): string {
55
+ const path = join(dir, 'mock-driver.js');
56
+ const src = [
57
+ `const fs = require('fs');`,
58
+ `const recordPath = process.env.MOCK_RECORD_PATH;`,
59
+ `let buf = '';`,
60
+ `process.stdin.setEncoding('utf8');`,
61
+ `process.stdin.on('data', (c) => { buf += c; });`,
62
+ `process.stdin.on('end', () => {`,
63
+ ` if (recordPath) fs.writeFileSync(recordPath, buf);`,
64
+ ` process.stdout.write(buf);`,
65
+ ` if (!buf.endsWith('\\n')) process.stdout.write('\\n');`,
66
+ ` const resp = process.env.MOCK_RESPONSE || '';`,
67
+ ` if (resp) process.stdout.write(resp + '\\n');`,
68
+ `});`,
69
+ ].join('\n');
70
+ writeFileSync(path, src);
71
+ return path;
72
+ }
73
+
74
+ interface MockConfig {
75
+ /** Per-task-id JSON response the mock "model" emits as its final line. */
76
+ readonly responses: Readonly<Record<string, Record<string, unknown>>>;
77
+ /** Per-task-id file path where the echoed prompt is recorded. */
78
+ readonly records: Readonly<Record<string, string>>;
79
+ }
80
+
81
+ function makeMockDriver(scriptPath: string, cfg: MockConfig): DriverPlugin {
82
+ return {
83
+ name: 'mock-echo',
84
+ capabilities: { sessionResume: false, systemPrompt: true, outputFormat: true },
85
+ async buildCommand(task) {
86
+ const env: Record<string, string> = {};
87
+ const resp = cfg.responses[task.id];
88
+ if (resp) env.MOCK_RESPONSE = JSON.stringify(resp);
89
+ const recordPath = cfg.records[task.id];
90
+ if (recordPath) env.MOCK_RECORD_PATH = recordPath;
91
+ return {
92
+ args: ['node', scriptPath],
93
+ stdin: task.prompt ?? '',
94
+ env,
95
+ };
96
+ },
97
+ parseResult(stdout) {
98
+ // A real AI driver strips transport chrome and returns only the
99
+ // model's message here. For the mock, the entire stdout IS the
100
+ // model's echo + final JSON line, so exposing it unchanged is
101
+ // equivalent.
102
+ return { normalizedOutput: stdout };
103
+ },
104
+ };
105
+ }
106
+
107
+ function registryWithMock(scriptPath: string, cfg: MockConfig): PluginRegistry {
108
+ const reg = new PluginRegistry();
109
+ bootstrapBuiltins(reg);
110
+ reg.registerPlugin('drivers', 'mock-echo', makeMockDriver(scriptPath, cfg));
111
+ return reg;
112
+ }
113
+
114
+ function task(overrides: Partial<TaskConfig> & { id: string }): TaskConfig {
115
+ return {
116
+ name: overrides.id,
117
+ permissions: PERMS,
118
+ driver: 'opencode',
119
+ ...overrides,
120
+ };
121
+ }
122
+
123
+ function pipeline(tasks: TaskConfig[]): PipelineConfig {
124
+ return {
125
+ name: 'ports-mixed-test',
126
+ tracks: [
127
+ {
128
+ id: 't',
129
+ name: 'T',
130
+ driver: 'opencode',
131
+ permissions: PERMS,
132
+ on_failure: 'skip_downstream',
133
+ tasks,
134
+ },
135
+ ],
136
+ };
137
+ }
138
+
139
+ interface RunResult {
140
+ events: RunEventPayload[];
141
+ success: boolean;
142
+ }
143
+
144
+ async function run(
145
+ config: PipelineConfig,
146
+ workDir: string,
147
+ registry: PluginRegistry,
148
+ ): Promise<RunResult> {
149
+ const events: RunEventPayload[] = [];
150
+ const result = await runPipeline(config, workDir, {
151
+ registry,
152
+ skipPluginLoading: true,
153
+ onEvent: (e) => events.push(e),
154
+ });
155
+ return { events, success: result.success };
156
+ }
157
+
158
+ function finalUpdateFor(events: RunEventPayload[], qid: string): RunEventPayload | undefined {
159
+ let last: RunEventPayload | undefined;
160
+ for (const ev of events) {
161
+ if (ev.type === 'task_update' && ev.taskId === qid) last = ev;
162
+ }
163
+ return last;
164
+ }
165
+
166
+ function finalStatusFrom(events: RunEventPayload[], qid: string): TaskStatus | undefined {
167
+ const last = finalUpdateFor(events, qid);
168
+ return last && last.type === 'task_update' ? last.status : undefined;
169
+ }
170
+
171
+ describe('engine — ports: mixed prompt/command combinations', () => {
172
+ test('prompt → command: prompt outputs are inferred from downstream Command inputs', async () => {
173
+ const dir = makeDir();
174
+ try {
175
+ const mockScript = writeMockDriverScript(dir);
176
+ const echo = writeEchoArgsScript(dir, 'echo');
177
+ const upRecord = join(dir, 'up.prompt');
178
+ const responses: Record<string, Record<string, unknown>> = {
179
+ up: { city: 'Shanghai', id: 7 },
180
+ };
181
+ const records: Record<string, string> = { up: upRecord };
182
+
183
+ // `up` is a Prompt — it declares NO ports. Its output schema is
184
+ // inferred at runtime from `down`'s declared inputs, which drives
185
+ // the `[Output Format]` block the mock "model" sees.
186
+ const config = pipeline([
187
+ task({
188
+ id: 'up',
189
+ prompt: 'Pick a random city.',
190
+ driver: 'mock-echo',
191
+ }),
192
+ task({
193
+ id: 'down',
194
+ depends_on: ['up'],
195
+ command: `node "${echo}" "{{inputs.city}}" "{{inputs.id}}"`,
196
+ ports: {
197
+ inputs: [
198
+ { name: 'city', type: 'string', required: true },
199
+ { name: 'id', type: 'number', required: true },
200
+ ],
201
+ } as TaskPorts,
202
+ }),
203
+ ]);
204
+
205
+ const registry = registryWithMock(mockScript, { responses, records });
206
+ const { events, success } = await run(config, dir, registry);
207
+ expect(success).toBe(true);
208
+
209
+ // Upstream prompt was enriched with an [Output Format] block that
210
+ // names the keys `down` wants (city, id) — inferred, not declared.
211
+ expect(existsSync(upRecord)).toBe(true);
212
+ const upPrompt = readFileSync(upRecord, 'utf8');
213
+ expect(upPrompt).toContain('[Output Format]');
214
+ expect(upPrompt).toContain('city');
215
+ expect(upPrompt).toContain('id');
216
+
217
+ // Engine extracted the mock's final-line JSON from normalizedOutput
218
+ // using the inferred output schema.
219
+ const upFinal = finalUpdateFor(events, 't.up')!;
220
+ if (upFinal.type !== 'task_update') throw new Error('expected update');
221
+ expect(upFinal.status).toBe('success');
222
+ expect(upFinal.outputs).toEqual({ city: 'Shanghai', id: 7 });
223
+
224
+ // Downstream command saw the values post-substitution.
225
+ const downFinal = finalUpdateFor(events, 't.down')!;
226
+ if (downFinal.type !== 'task_update') throw new Error('expected update');
227
+ expect(downFinal.status).toBe('success');
228
+ expect((downFinal.stdout ?? '').trim()).toBe('Shanghai|7');
229
+ expect(downFinal.inputs).toEqual({ city: 'Shanghai', id: 7 });
230
+ } finally {
231
+ rmSync(dir, { recursive: true, force: true });
232
+ }
233
+ });
234
+
235
+ test('command → prompt: prompt inputs are inferred from upstream Command outputs', async () => {
236
+ const dir = makeDir();
237
+ try {
238
+ const mockScript = writeMockDriverScript(dir);
239
+ const emit = writeEmitScript(dir, 'emit', { city: 'Berlin', id: 3 });
240
+ const downRecord = join(dir, 'down.prompt');
241
+ const responses: Record<string, Record<string, unknown>> = {
242
+ down: { summary: 'ok' },
243
+ };
244
+ const records: Record<string, string> = { down: downRecord };
245
+
246
+ // `down` is a Prompt — it declares NO ports. Its input schema is
247
+ // inferred from `up`'s declared outputs; its output schema is
248
+ // empty (no downstream Command to infer from), so `down` is a
249
+ // terminal free-text Prompt with structured inputs only.
250
+ const config = pipeline([
251
+ task({
252
+ id: 'up',
253
+ command: `node "${emit}"`,
254
+ ports: {
255
+ outputs: [
256
+ { name: 'city', type: 'string' },
257
+ { name: 'id', type: 'number' },
258
+ ],
259
+ } as TaskPorts,
260
+ }),
261
+ task({
262
+ id: 'down',
263
+ depends_on: ['up'],
264
+ prompt: 'City is {{inputs.city}}, id={{inputs.id}}.',
265
+ driver: 'mock-echo',
266
+ }),
267
+ ]);
268
+
269
+ const registry = registryWithMock(mockScript, { responses, records });
270
+ const { events, success } = await run(config, dir, registry);
271
+ expect(success).toBe(true);
272
+
273
+ // Downstream prompt saw:
274
+ // 1. Placeholders substituted with concrete values
275
+ // 2. An [Inputs] context block listing the inferred values
276
+ // 3. NO [Output Format] block (no downstream Command to infer
277
+ // an output contract from — the Prompt is terminal)
278
+ const downPrompt = readFileSync(downRecord, 'utf8');
279
+ expect(downPrompt).toContain('City is Berlin, id=3.');
280
+ expect(downPrompt).toContain('[Inputs]');
281
+ expect(downPrompt).toMatch(/city:\s*"Berlin"/);
282
+ expect(downPrompt).toMatch(/id:\s*3\b/);
283
+ expect(downPrompt).not.toContain('[Output Format]');
284
+
285
+ const downFinal = finalUpdateFor(events, 't.down')!;
286
+ if (downFinal.type !== 'task_update') throw new Error('expected update');
287
+ expect(downFinal.inputs).toEqual({ city: 'Berlin', id: 3 });
288
+ // No downstream Command → no inferred outputs → outputs stay null.
289
+ expect(downFinal.outputs).toBeFalsy();
290
+ } finally {
291
+ rmSync(dir, { recursive: true, force: true });
292
+ }
293
+ });
294
+
295
+ test('command → prompt → command: prompt relays structured data both directions', async () => {
296
+ const dir = makeDir();
297
+ try {
298
+ const mockScript = writeMockDriverScript(dir);
299
+ const emit = writeEmitScript(dir, 'emit', { city: 'Paris' });
300
+ const echo = writeEchoArgsScript(dir, 'echo');
301
+ const midRecord = join(dir, 'mid.prompt');
302
+ const responses: Record<string, Record<string, unknown>> = {
303
+ mid: { greeting: 'Bonjour Paris' },
304
+ };
305
+ const records: Record<string, string> = { mid: midRecord };
306
+
307
+ // `mid` is a Prompt between two Commands. Its inferred inputs
308
+ // come from `up` (city), its inferred outputs come from `down`
309
+ // (greeting). No ports declared on `mid`.
310
+ const config = pipeline([
311
+ task({
312
+ id: 'up',
313
+ command: `node "${emit}"`,
314
+ ports: { outputs: [{ name: 'city', type: 'string' }] } as TaskPorts,
315
+ }),
316
+ task({
317
+ id: 'mid',
318
+ depends_on: ['up'],
319
+ prompt: 'Generate a greeting for {{inputs.city}}.',
320
+ driver: 'mock-echo',
321
+ }),
322
+ task({
323
+ id: 'down',
324
+ depends_on: ['mid'],
325
+ command: `node "${echo}" "{{inputs.greeting}}"`,
326
+ ports: {
327
+ inputs: [{ name: 'greeting', type: 'string', required: true }],
328
+ } as TaskPorts,
329
+ }),
330
+ ]);
331
+
332
+ const registry = registryWithMock(mockScript, { responses, records });
333
+ const { events, success } = await run(config, dir, registry);
334
+ expect(success).toBe(true);
335
+
336
+ // Middle prompt has both [Inputs] (from upstream) and
337
+ // [Output Format] (from downstream) — inferred in both directions.
338
+ const midPrompt = readFileSync(midRecord, 'utf8');
339
+ expect(midPrompt).toContain('[Inputs]');
340
+ expect(midPrompt).toMatch(/city:\s*"Paris"/);
341
+ expect(midPrompt).toContain('[Output Format]');
342
+ expect(midPrompt).toContain('greeting');
343
+ expect(midPrompt).toContain('Generate a greeting for Paris.');
344
+
345
+ const midFinal = finalUpdateFor(events, 't.mid')!;
346
+ if (midFinal.type !== 'task_update') throw new Error('expected update');
347
+ expect(midFinal.inputs).toEqual({ city: 'Paris' });
348
+ expect(midFinal.outputs).toEqual({ greeting: 'Bonjour Paris' });
349
+
350
+ const downFinal = finalUpdateFor(events, 't.down')!;
351
+ if (downFinal.type !== 'task_update') throw new Error('expected update');
352
+ expect((downFinal.stdout ?? '').trim()).toBe('Bonjour Paris');
353
+ } finally {
354
+ rmSync(dir, { recursive: true, force: true });
355
+ }
356
+ });
357
+
358
+ test('prompt → prompt: no structured port flow, free text only', async () => {
359
+ const dir = makeDir();
360
+ try {
361
+ const mockScript = writeMockDriverScript(dir);
362
+ const downRecord = join(dir, 'down.prompt');
363
+ const responses: Record<string, Record<string, unknown>> = {
364
+ up: { city: 'Tokyo' },
365
+ down: { greeting: 'hello Tokyo' },
366
+ };
367
+ const records: Record<string, string> = { down: downRecord };
368
+
369
+ // Neither Prompt has a Command neighbor in either direction, so
370
+ // both have empty inferred ports. `up`'s JSON final line is NOT
371
+ // extracted (no inferred outputs); `down` does NOT see `[Inputs]`
372
+ // or `[Output Format]`. Information between them flows only
373
+ // through continue_from / free text — and the downstream's
374
+ // `{{inputs.city}}` is an author error the engine logs as
375
+ // "placeholder rendered empty".
376
+ const config = pipeline([
377
+ task({
378
+ id: 'up',
379
+ prompt: 'Pick a city.',
380
+ driver: 'mock-echo',
381
+ }),
382
+ task({
383
+ id: 'down',
384
+ depends_on: ['up'],
385
+ prompt: 'Greet the city.',
386
+ driver: 'mock-echo',
387
+ }),
388
+ ]);
389
+
390
+ const registry = registryWithMock(mockScript, { responses, records });
391
+ const { events, success } = await run(config, dir, registry);
392
+ expect(success).toBe(true);
393
+ expect(finalStatusFrom(events, 't.up')).toBe('success');
394
+ expect(finalStatusFrom(events, 't.down')).toBe('success');
395
+
396
+ // No inferred outputs on either side.
397
+ const upFinal = finalUpdateFor(events, 't.up')!;
398
+ if (upFinal.type !== 'task_update') throw new Error('expected update');
399
+ expect(upFinal.outputs).toBeFalsy();
400
+
401
+ // Down's prompt has no [Inputs] / [Output Format] blocks.
402
+ const downPrompt = readFileSync(downRecord, 'utf8');
403
+ expect(downPrompt).not.toContain('[Inputs]');
404
+ expect(downPrompt).not.toContain('[Output Format]');
405
+
406
+ const downFinal = finalUpdateFor(events, 't.down')!;
407
+ if (downFinal.type !== 'task_update') throw new Error('expected update');
408
+ expect(downFinal.inputs).toEqual({});
409
+ expect(downFinal.outputs).toBeFalsy();
410
+ } finally {
411
+ rmSync(dir, { recursive: true, force: true });
412
+ }
413
+ });
414
+
415
+ test('prompt with two upstream Commands exporting the same name → blocked', async () => {
416
+ const dir = makeDir();
417
+ try {
418
+ const mockScript = writeMockDriverScript(dir);
419
+ const emitA = writeEmitScript(dir, 'emitA', { val: 'from-a' });
420
+ const emitB = writeEmitScript(dir, 'emitB', { val: 'from-b' });
421
+ const responses: Record<string, Record<string, unknown>> = {};
422
+ const records: Record<string, string> = {};
423
+
424
+ const config = pipeline([
425
+ task({
426
+ id: 'a',
427
+ command: `node "${emitA}"`,
428
+ ports: { outputs: [{ name: 'val', type: 'string' }] } as TaskPorts,
429
+ }),
430
+ task({
431
+ id: 'b',
432
+ command: `node "${emitB}"`,
433
+ ports: { outputs: [{ name: 'val', type: 'string' }] } as TaskPorts,
434
+ }),
435
+ task({
436
+ id: 'down',
437
+ depends_on: ['a', 'b'],
438
+ prompt: 'Use {{inputs.val}}',
439
+ driver: 'mock-echo',
440
+ }),
441
+ ]);
442
+
443
+ const registry = registryWithMock(mockScript, { responses, records });
444
+ const { events } = await run(config, dir, registry);
445
+ expect(finalStatusFrom(events, 't.down')).toBe('blocked');
446
+ const downFinal = finalUpdateFor(events, 't.down');
447
+ if (downFinal?.type === 'task_update') {
448
+ expect(downFinal.stderr ?? '').toMatch(/cannot disambiguate|produced by multiple upstream/i);
449
+ }
450
+ } finally {
451
+ rmSync(dir, { recursive: true, force: true });
452
+ }
453
+ });
454
+
455
+ test('prompt with two downstream Commands disagreeing on input type → blocked', async () => {
456
+ const dir = makeDir();
457
+ try {
458
+ const mockScript = writeMockDriverScript(dir);
459
+ const echo1 = writeEchoArgsScript(dir, 'echo1');
460
+ const echo2 = writeEchoArgsScript(dir, 'echo2');
461
+ const responses: Record<string, Record<string, unknown>> = {};
462
+ const records: Record<string, string> = {};
463
+
464
+ const config = pipeline([
465
+ task({
466
+ id: 'mid',
467
+ prompt: 'produce a date',
468
+ driver: 'mock-echo',
469
+ }),
470
+ task({
471
+ id: 'd1',
472
+ depends_on: ['mid'],
473
+ command: `node "${echo1}" "{{inputs.date}}"`,
474
+ ports: {
475
+ inputs: [{ name: 'date', type: 'string', required: true }],
476
+ } as TaskPorts,
477
+ }),
478
+ task({
479
+ id: 'd2',
480
+ depends_on: ['mid'],
481
+ command: `node "${echo2}" "{{inputs.date}}"`,
482
+ ports: {
483
+ inputs: [{ name: 'date', type: 'number', required: true }],
484
+ } as TaskPorts,
485
+ }),
486
+ ]);
487
+
488
+ const registry = registryWithMock(mockScript, { responses, records });
489
+ const { events } = await run(config, dir, registry);
490
+ expect(finalStatusFrom(events, 't.mid')).toBe('blocked');
491
+ const midFinal = finalUpdateFor(events, 't.mid');
492
+ if (midFinal?.type === 'task_update') {
493
+ expect(midFinal.stderr ?? '').toMatch(/conflicting type requirements|conflicting output/i);
494
+ }
495
+ } finally {
496
+ rmSync(dir, { recursive: true, force: true });
497
+ }
498
+ });
499
+ });
package/src/engine.ts CHANGED
@@ -30,7 +30,13 @@ import {
30
30
  renderInputsBlock,
31
31
  renderOutputSchemaBlock,
32
32
  } from './prompt-doc';
33
- import { extractTaskOutputs, resolveTaskInputs, substituteInputs } from './ports';
33
+ import {
34
+ extractTaskOutputs,
35
+ inferPromptPorts,
36
+ resolveTaskInputs,
37
+ substituteInputs,
38
+ } from './ports';
39
+ import type { TaskPorts } from './types';
34
40
  import {
35
41
  executeHook,
36
42
  buildPipelineStartContext,
@@ -393,6 +399,20 @@ export async function runPipeline(
393
399
  // just before a task runs, so every subsequent task_update event can
394
400
  // echo them to the UI without re-resolving.
395
401
  const resolvedInputsMap = new Map<string, Readonly<Record<string, unknown>>>();
402
+ // Reverse adjacency: for each task, list the direct-downstream task ids
403
+ // (tasks whose `depends_on` includes this one after DAG qualification).
404
+ // Computed once up front so Prompt-task port inference — which needs
405
+ // "what Commands directly consume me?" — is O(1) instead of O(tasks)
406
+ // per Prompt start. `dag.nodes` only exposes forward edges via
407
+ // `dependsOn`, so we build this locally.
408
+ const directDownstreams = new Map<string, string[]>();
409
+ for (const [id] of dag.nodes) directDownstreams.set(id, []);
410
+ for (const [id, node] of dag.nodes) {
411
+ for (const upstream of node.dependsOn) {
412
+ const list = directDownstreams.get(upstream);
413
+ if (list) list.push(id);
414
+ }
415
+ }
396
416
 
397
417
  // Pipeline timeout + abort reason tracking.
398
418
  //
@@ -753,7 +773,83 @@ export async function runPipeline(
753
773
  // Resolution runs even for tasks that declare no ports — the call
754
774
  // is cheap and returns `{kind: 'ready', inputs: {}}` in that case,
755
775
  // which downstream code handles uniformly.
756
- const inputResolution = resolveTaskInputs(task, outputValuesMap, node.dependsOn);
776
+ //
777
+ // Prompt Tasks have no declared ports — their I/O contract is
778
+ // inferred from direct-neighbor Command Tasks (see ports.ts:
779
+ // `inferPromptPorts`). We synthesize a `TaskPorts` object and
780
+ // feed it into the same resolve/substitute/render/extract
781
+ // pipeline the Command path uses. Collisions that a Prompt can't
782
+ // disambiguate (same input name on two upstreams, incompatible
783
+ // downstream output types) block the task with a clear message.
784
+ const isPromptTask = task.prompt !== undefined && task.command === undefined;
785
+ let effectivePorts: TaskPorts | undefined = task.ports;
786
+ let promptInferenceBlockReason: string | null = null;
787
+
788
+ if (isPromptTask) {
789
+ const inference = inferPromptPorts({
790
+ upstreams: node.dependsOn.map((upstreamId) => {
791
+ const upstream = dag.nodes.get(upstreamId);
792
+ const isUpstreamCommand = !!upstream?.task.command;
793
+ return {
794
+ taskId: upstreamId,
795
+ outputs: isUpstreamCommand ? upstream?.task.ports?.outputs : undefined,
796
+ };
797
+ }),
798
+ downstreams: (directDownstreams.get(taskId) ?? []).map((downstreamId) => {
799
+ const downstream = dag.nodes.get(downstreamId);
800
+ const isDownstreamCommand = !!downstream?.task.command;
801
+ return {
802
+ taskId: downstreamId,
803
+ inputs: isDownstreamCommand ? downstream?.task.ports?.inputs : undefined,
804
+ };
805
+ }),
806
+ });
807
+ effectivePorts = inference.ports;
808
+ if (inference.inputConflicts.length > 0 || inference.outputConflicts.length > 0) {
809
+ const lines: string[] = [];
810
+ for (const c of inference.inputConflicts) lines.push(c.reason);
811
+ for (const c of inference.outputConflicts) lines.push(c.reason);
812
+ promptInferenceBlockReason = lines.join('\n');
813
+ }
814
+ }
815
+
816
+ if (promptInferenceBlockReason !== null) {
817
+ log.error(
818
+ `[task:${taskId}]`,
819
+ `blocked — prompt port inference failed:\n${promptInferenceBlockReason}`,
820
+ );
821
+ state.result = {
822
+ exitCode: -1,
823
+ stdout: '',
824
+ stderr: `[engine] prompt port inference failed:\n${promptInferenceBlockReason}`,
825
+ stdoutPath: null,
826
+ stderrPath: null,
827
+ durationMs: 0,
828
+ sessionId: null,
829
+ normalizedOutput: null,
830
+ failureKind: 'spawn_error',
831
+ outputs: null,
832
+ };
833
+ state.finishedAt = nowISO();
834
+ setTaskStatus(taskId, 'blocked');
835
+ try {
836
+ await fireHook(taskId, 'task_failure');
837
+ } catch (hookErr) {
838
+ log.error(
839
+ `[task:${taskId}]`,
840
+ `hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
841
+ );
842
+ }
843
+ if (getOnFailure(taskId) === 'stop_all') applyStopAll(node.track.id);
844
+ return;
845
+ }
846
+
847
+ // Feed effective ports into `resolveTaskInputs` by shallow-cloning
848
+ // the task. Prompt tasks get the inferred ports; Command tasks are
849
+ // unchanged (effectivePorts === task.ports).
850
+ const taskForResolve: TaskConfig =
851
+ effectivePorts === task.ports ? task : { ...task, ports: effectivePorts };
852
+ const inputResolution = resolveTaskInputs(taskForResolve, outputValuesMap, node.dependsOn);
757
853
  if (inputResolution.kind === 'blocked') {
758
854
  log.error(
759
855
  `[task:${taskId}]`,
@@ -792,10 +888,11 @@ export async function runPipeline(
792
888
  `optional inputs unresolved (empty in placeholders): ${inputResolution.missingOptional.join(', ')}`,
793
889
  );
794
890
  }
795
- if (task.ports?.inputs && task.ports.inputs.length > 0) {
891
+ if (effectivePorts?.inputs && effectivePorts.inputs.length > 0) {
796
892
  log.debug(
797
893
  `[task:${taskId}]`,
798
- `resolved inputs: ${JSON.stringify(resolvedInputs)}`,
894
+ `resolved inputs: ${JSON.stringify(resolvedInputs)}` +
895
+ (isPromptTask ? ' (inferred from upstream Commands)' : ''),
799
896
  );
800
897
  }
801
898
 
@@ -888,11 +985,11 @@ export async function runPipeline(
888
985
  // matters: [Output Format] first (sets the deliverable), then
889
986
  // [Inputs] (the concrete data to operate on). Empty blocks are
890
987
  // filtered out — tasks without ports get no extra blocks at all.
891
- const outputFormatBlock = renderOutputSchemaBlock(task.ports?.outputs);
988
+ const outputFormatBlock = renderOutputSchemaBlock(effectivePorts?.outputs);
892
989
  if (outputFormatBlock) {
893
990
  doc = prependContext(doc, outputFormatBlock);
894
991
  }
895
- const inputsBlock = renderInputsBlock(task.ports?.inputs, resolvedInputs);
992
+ const inputsBlock = renderInputsBlock(effectivePorts?.inputs, resolvedInputs);
896
993
  if (inputsBlock) {
897
994
  doc = prependContext(doc, inputsBlock);
898
995
  }
@@ -996,6 +1093,13 @@ export async function runPipeline(
996
1093
  ...task,
997
1094
  prompt,
998
1095
  continue_from: node.resolvedContinueFrom,
1096
+ // Hand the driver the EFFECTIVE port schema rather than the
1097
+ // raw task.ports. For Prompt tasks this is the one inferred
1098
+ // from neighbor Commands; Command tasks are unchanged.
1099
+ // Drivers that introspect ports (e.g. to annotate a system
1100
+ // prompt with the I/O contract) otherwise saw `undefined`
1101
+ // for every prompt and had no way to know the contract.
1102
+ ports: effectivePorts,
999
1103
  };
1000
1104
  const driverCtx: DriverContext = {
1001
1105
  sessionMap,
@@ -1074,17 +1178,22 @@ export async function runPipeline(
1074
1178
  // through driver-specific logs.
1075
1179
  let extractedOutputs: Readonly<Record<string, unknown>> | null = null;
1076
1180
  if (terminalStatus === 'success') {
1181
+ // Prompt tasks use inferred ports (from direct-downstream Command
1182
+ // inputs); Command tasks use their declared ports. Either way,
1183
+ // `extractTaskOutputs` is a no-op when there are no declared
1184
+ // outputs to pull, so pre-ports tasks pay nothing for this call.
1077
1185
  const extraction = extractTaskOutputs(
1078
- task.ports,
1186
+ effectivePorts,
1079
1187
  result.stdout,
1080
1188
  result.normalizedOutput,
1081
1189
  );
1082
- if (task.ports?.outputs && task.ports.outputs.length > 0) {
1190
+ if (effectivePorts?.outputs && effectivePorts.outputs.length > 0) {
1083
1191
  extractedOutputs = extraction.outputs;
1084
1192
  outputValuesMap.set(taskId, extraction.outputs);
1085
1193
  log.debug(
1086
1194
  `[task:${taskId}]`,
1087
- `extracted outputs: ${JSON.stringify(extraction.outputs)}`,
1195
+ `extracted outputs: ${JSON.stringify(extraction.outputs)}` +
1196
+ (isPromptTask ? ' (inferred from downstream Commands)' : ''),
1088
1197
  );
1089
1198
  if (extraction.diagnostic) {
1090
1199
  log.error(`[task:${taskId}]`, extraction.diagnostic);