@tagma/sdk 0.6.7 → 0.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/engine.d.ts.map +1 -1
- package/dist/engine.js +108 -9
- package/dist/engine.js.map +1 -1
- package/dist/ports.d.ts +53 -1
- package/dist/ports.d.ts.map +1 -1
- package/dist/ports.js +142 -2
- package/dist/ports.js.map +1 -1
- package/dist/runner.d.ts.map +1 -1
- package/dist/runner.js +19 -6
- package/dist/runner.js.map +1 -1
- package/dist/sdk.d.ts +5 -3
- package/dist/sdk.d.ts.map +1 -1
- package/dist/sdk.js +3 -1
- package/dist/sdk.js.map +1 -1
- package/dist/validate-raw.d.ts.map +1 -1
- package/dist/validate-raw.js +240 -31
- package/dist/validate-raw.js.map +1 -1
- package/dist/yaml-compiler.d.ts +18 -0
- package/dist/yaml-compiler.d.ts.map +1 -0
- package/dist/yaml-compiler.js +59 -0
- package/dist/yaml-compiler.js.map +1 -0
- package/package.json +6 -1
- package/src/engine-ports-mixed.test.ts +499 -0
- package/src/engine.ts +118 -9
- package/src/ports.test.ts +170 -0
- package/src/ports.ts +231 -3
- package/src/runner.test.ts +3 -3
- package/src/runner.ts +21 -5
- package/src/sdk.ts +15 -2
- package/src/validate-raw-ports.test.ts +234 -49
- package/src/validate-raw.ts +269 -34
- package/src/yaml-compiler.ts +83 -0
|
@@ -0,0 +1,499 @@
|
|
|
1
|
+
import { describe, expect, test } from 'bun:test';
|
|
2
|
+
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
|
|
3
|
+
import { tmpdir } from 'node:os';
|
|
4
|
+
import { join } from 'node:path';
|
|
5
|
+
import { PluginRegistry } from './registry';
|
|
6
|
+
import { bootstrapBuiltins } from './bootstrap';
|
|
7
|
+
import { runPipeline, type RunEventPayload } from './engine';
|
|
8
|
+
import type { DriverPlugin, PipelineConfig, TaskConfig, TaskPorts, TaskStatus } from './types';
|
|
9
|
+
|
|
10
|
+
// Mixed-mode port tests. Prompt Tasks do NOT declare ports — their I/O
|
|
11
|
+
// contract is inferred from direct-neighbor Command Tasks. The three
|
|
12
|
+
// cross-type boundaries the design has to cover:
|
|
13
|
+
//
|
|
14
|
+
// prompt → command (AI task produces outputs inferred from the
|
|
15
|
+
// downstream Command's declared inputs)
|
|
16
|
+
// command → prompt (AI task consumes the upstream Command's
|
|
17
|
+
// declared outputs via substitution + [Inputs])
|
|
18
|
+
// prompt → prompt (no structured port flow — free text only,
|
|
19
|
+
// carried by continue_from / normalizedOutput)
|
|
20
|
+
//
|
|
21
|
+
// A mock AI driver stands in for a real LLM. It records the engine's
|
|
22
|
+
// serialized prompt to a sidecar file and emits a per-task JSON
|
|
23
|
+
// response on the final stdout line, simulating the `[Output Format]`
|
|
24
|
+
// contract. Asserting on the sidecar record lets each test verify the
|
|
25
|
+
// engine prepended the right `[Inputs]` / `[Output Format]` blocks
|
|
26
|
+
// and expanded `{{inputs.X}}` placeholders inside the prompt.
|
|
27
|
+
|
|
28
|
+
const PERMS = { read: true, write: false, execute: false };
|
|
29
|
+
|
|
30
|
+
function makeDir(): string {
|
|
31
|
+
return mkdtempSync(join(tmpdir(), 'tagma-ports-mixed-'));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function writeEmitScript(dir: string, name: string, payload: Record<string, unknown>): string {
|
|
35
|
+
const path = join(dir, `${name}.js`);
|
|
36
|
+
const src = `process.stdout.write(${JSON.stringify(JSON.stringify(payload))});\nprocess.stdout.write('\\n');\n`;
|
|
37
|
+
writeFileSync(path, src);
|
|
38
|
+
return path;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function writeEchoArgsScript(dir: string, name: string): string {
|
|
42
|
+
const path = join(dir, `${name}.js`);
|
|
43
|
+
const src = `process.stdout.write(process.argv.slice(2).join('|'));\nprocess.stdout.write('\\n');\n`;
|
|
44
|
+
writeFileSync(path, src);
|
|
45
|
+
return path;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Mock-driver spawn script: read stdin (the serialized prompt), write
|
|
50
|
+
* it to a sidecar record file, echo it to stdout, then append the
|
|
51
|
+
* `MOCK_RESPONSE` env value as the final line — which extractTaskOutputs
|
|
52
|
+
* picks up as the model's JSON output.
|
|
53
|
+
*/
|
|
54
|
+
function writeMockDriverScript(dir: string): string {
|
|
55
|
+
const path = join(dir, 'mock-driver.js');
|
|
56
|
+
const src = [
|
|
57
|
+
`const fs = require('fs');`,
|
|
58
|
+
`const recordPath = process.env.MOCK_RECORD_PATH;`,
|
|
59
|
+
`let buf = '';`,
|
|
60
|
+
`process.stdin.setEncoding('utf8');`,
|
|
61
|
+
`process.stdin.on('data', (c) => { buf += c; });`,
|
|
62
|
+
`process.stdin.on('end', () => {`,
|
|
63
|
+
` if (recordPath) fs.writeFileSync(recordPath, buf);`,
|
|
64
|
+
` process.stdout.write(buf);`,
|
|
65
|
+
` if (!buf.endsWith('\\n')) process.stdout.write('\\n');`,
|
|
66
|
+
` const resp = process.env.MOCK_RESPONSE || '';`,
|
|
67
|
+
` if (resp) process.stdout.write(resp + '\\n');`,
|
|
68
|
+
`});`,
|
|
69
|
+
].join('\n');
|
|
70
|
+
writeFileSync(path, src);
|
|
71
|
+
return path;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
interface MockConfig {
|
|
75
|
+
/** Per-task-id JSON response the mock "model" emits as its final line. */
|
|
76
|
+
readonly responses: Readonly<Record<string, Record<string, unknown>>>;
|
|
77
|
+
/** Per-task-id file path where the echoed prompt is recorded. */
|
|
78
|
+
readonly records: Readonly<Record<string, string>>;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function makeMockDriver(scriptPath: string, cfg: MockConfig): DriverPlugin {
|
|
82
|
+
return {
|
|
83
|
+
name: 'mock-echo',
|
|
84
|
+
capabilities: { sessionResume: false, systemPrompt: true, outputFormat: true },
|
|
85
|
+
async buildCommand(task) {
|
|
86
|
+
const env: Record<string, string> = {};
|
|
87
|
+
const resp = cfg.responses[task.id];
|
|
88
|
+
if (resp) env.MOCK_RESPONSE = JSON.stringify(resp);
|
|
89
|
+
const recordPath = cfg.records[task.id];
|
|
90
|
+
if (recordPath) env.MOCK_RECORD_PATH = recordPath;
|
|
91
|
+
return {
|
|
92
|
+
args: ['node', scriptPath],
|
|
93
|
+
stdin: task.prompt ?? '',
|
|
94
|
+
env,
|
|
95
|
+
};
|
|
96
|
+
},
|
|
97
|
+
parseResult(stdout) {
|
|
98
|
+
// A real AI driver strips transport chrome and returns only the
|
|
99
|
+
// model's message here. For the mock, the entire stdout IS the
|
|
100
|
+
// model's echo + final JSON line, so exposing it unchanged is
|
|
101
|
+
// equivalent.
|
|
102
|
+
return { normalizedOutput: stdout };
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function registryWithMock(scriptPath: string, cfg: MockConfig): PluginRegistry {
|
|
108
|
+
const reg = new PluginRegistry();
|
|
109
|
+
bootstrapBuiltins(reg);
|
|
110
|
+
reg.registerPlugin('drivers', 'mock-echo', makeMockDriver(scriptPath, cfg));
|
|
111
|
+
return reg;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function task(overrides: Partial<TaskConfig> & { id: string }): TaskConfig {
|
|
115
|
+
return {
|
|
116
|
+
name: overrides.id,
|
|
117
|
+
permissions: PERMS,
|
|
118
|
+
driver: 'opencode',
|
|
119
|
+
...overrides,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function pipeline(tasks: TaskConfig[]): PipelineConfig {
|
|
124
|
+
return {
|
|
125
|
+
name: 'ports-mixed-test',
|
|
126
|
+
tracks: [
|
|
127
|
+
{
|
|
128
|
+
id: 't',
|
|
129
|
+
name: 'T',
|
|
130
|
+
driver: 'opencode',
|
|
131
|
+
permissions: PERMS,
|
|
132
|
+
on_failure: 'skip_downstream',
|
|
133
|
+
tasks,
|
|
134
|
+
},
|
|
135
|
+
],
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
interface RunResult {
|
|
140
|
+
events: RunEventPayload[];
|
|
141
|
+
success: boolean;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async function run(
|
|
145
|
+
config: PipelineConfig,
|
|
146
|
+
workDir: string,
|
|
147
|
+
registry: PluginRegistry,
|
|
148
|
+
): Promise<RunResult> {
|
|
149
|
+
const events: RunEventPayload[] = [];
|
|
150
|
+
const result = await runPipeline(config, workDir, {
|
|
151
|
+
registry,
|
|
152
|
+
skipPluginLoading: true,
|
|
153
|
+
onEvent: (e) => events.push(e),
|
|
154
|
+
});
|
|
155
|
+
return { events, success: result.success };
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function finalUpdateFor(events: RunEventPayload[], qid: string): RunEventPayload | undefined {
|
|
159
|
+
let last: RunEventPayload | undefined;
|
|
160
|
+
for (const ev of events) {
|
|
161
|
+
if (ev.type === 'task_update' && ev.taskId === qid) last = ev;
|
|
162
|
+
}
|
|
163
|
+
return last;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function finalStatusFrom(events: RunEventPayload[], qid: string): TaskStatus | undefined {
|
|
167
|
+
const last = finalUpdateFor(events, qid);
|
|
168
|
+
return last && last.type === 'task_update' ? last.status : undefined;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
describe('engine — ports: mixed prompt/command combinations', () => {
|
|
172
|
+
test('prompt → command: prompt outputs are inferred from downstream Command inputs', async () => {
|
|
173
|
+
const dir = makeDir();
|
|
174
|
+
try {
|
|
175
|
+
const mockScript = writeMockDriverScript(dir);
|
|
176
|
+
const echo = writeEchoArgsScript(dir, 'echo');
|
|
177
|
+
const upRecord = join(dir, 'up.prompt');
|
|
178
|
+
const responses: Record<string, Record<string, unknown>> = {
|
|
179
|
+
up: { city: 'Shanghai', id: 7 },
|
|
180
|
+
};
|
|
181
|
+
const records: Record<string, string> = { up: upRecord };
|
|
182
|
+
|
|
183
|
+
// `up` is a Prompt — it declares NO ports. Its output schema is
|
|
184
|
+
// inferred at runtime from `down`'s declared inputs, which drives
|
|
185
|
+
// the `[Output Format]` block the mock "model" sees.
|
|
186
|
+
const config = pipeline([
|
|
187
|
+
task({
|
|
188
|
+
id: 'up',
|
|
189
|
+
prompt: 'Pick a random city.',
|
|
190
|
+
driver: 'mock-echo',
|
|
191
|
+
}),
|
|
192
|
+
task({
|
|
193
|
+
id: 'down',
|
|
194
|
+
depends_on: ['up'],
|
|
195
|
+
command: `node "${echo}" "{{inputs.city}}" "{{inputs.id}}"`,
|
|
196
|
+
ports: {
|
|
197
|
+
inputs: [
|
|
198
|
+
{ name: 'city', type: 'string', required: true },
|
|
199
|
+
{ name: 'id', type: 'number', required: true },
|
|
200
|
+
],
|
|
201
|
+
} as TaskPorts,
|
|
202
|
+
}),
|
|
203
|
+
]);
|
|
204
|
+
|
|
205
|
+
const registry = registryWithMock(mockScript, { responses, records });
|
|
206
|
+
const { events, success } = await run(config, dir, registry);
|
|
207
|
+
expect(success).toBe(true);
|
|
208
|
+
|
|
209
|
+
// Upstream prompt was enriched with an [Output Format] block that
|
|
210
|
+
// names the keys `down` wants (city, id) — inferred, not declared.
|
|
211
|
+
expect(existsSync(upRecord)).toBe(true);
|
|
212
|
+
const upPrompt = readFileSync(upRecord, 'utf8');
|
|
213
|
+
expect(upPrompt).toContain('[Output Format]');
|
|
214
|
+
expect(upPrompt).toContain('city');
|
|
215
|
+
expect(upPrompt).toContain('id');
|
|
216
|
+
|
|
217
|
+
// Engine extracted the mock's final-line JSON from normalizedOutput
|
|
218
|
+
// using the inferred output schema.
|
|
219
|
+
const upFinal = finalUpdateFor(events, 't.up')!;
|
|
220
|
+
if (upFinal.type !== 'task_update') throw new Error('expected update');
|
|
221
|
+
expect(upFinal.status).toBe('success');
|
|
222
|
+
expect(upFinal.outputs).toEqual({ city: 'Shanghai', id: 7 });
|
|
223
|
+
|
|
224
|
+
// Downstream command saw the values post-substitution.
|
|
225
|
+
const downFinal = finalUpdateFor(events, 't.down')!;
|
|
226
|
+
if (downFinal.type !== 'task_update') throw new Error('expected update');
|
|
227
|
+
expect(downFinal.status).toBe('success');
|
|
228
|
+
expect((downFinal.stdout ?? '').trim()).toBe('Shanghai|7');
|
|
229
|
+
expect(downFinal.inputs).toEqual({ city: 'Shanghai', id: 7 });
|
|
230
|
+
} finally {
|
|
231
|
+
rmSync(dir, { recursive: true, force: true });
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
test('command → prompt: prompt inputs are inferred from upstream Command outputs', async () => {
|
|
236
|
+
const dir = makeDir();
|
|
237
|
+
try {
|
|
238
|
+
const mockScript = writeMockDriverScript(dir);
|
|
239
|
+
const emit = writeEmitScript(dir, 'emit', { city: 'Berlin', id: 3 });
|
|
240
|
+
const downRecord = join(dir, 'down.prompt');
|
|
241
|
+
const responses: Record<string, Record<string, unknown>> = {
|
|
242
|
+
down: { summary: 'ok' },
|
|
243
|
+
};
|
|
244
|
+
const records: Record<string, string> = { down: downRecord };
|
|
245
|
+
|
|
246
|
+
// `down` is a Prompt — it declares NO ports. Its input schema is
|
|
247
|
+
// inferred from `up`'s declared outputs; its output schema is
|
|
248
|
+
// empty (no downstream Command to infer from), so `down` is a
|
|
249
|
+
// terminal free-text Prompt with structured inputs only.
|
|
250
|
+
const config = pipeline([
|
|
251
|
+
task({
|
|
252
|
+
id: 'up',
|
|
253
|
+
command: `node "${emit}"`,
|
|
254
|
+
ports: {
|
|
255
|
+
outputs: [
|
|
256
|
+
{ name: 'city', type: 'string' },
|
|
257
|
+
{ name: 'id', type: 'number' },
|
|
258
|
+
],
|
|
259
|
+
} as TaskPorts,
|
|
260
|
+
}),
|
|
261
|
+
task({
|
|
262
|
+
id: 'down',
|
|
263
|
+
depends_on: ['up'],
|
|
264
|
+
prompt: 'City is {{inputs.city}}, id={{inputs.id}}.',
|
|
265
|
+
driver: 'mock-echo',
|
|
266
|
+
}),
|
|
267
|
+
]);
|
|
268
|
+
|
|
269
|
+
const registry = registryWithMock(mockScript, { responses, records });
|
|
270
|
+
const { events, success } = await run(config, dir, registry);
|
|
271
|
+
expect(success).toBe(true);
|
|
272
|
+
|
|
273
|
+
// Downstream prompt saw:
|
|
274
|
+
// 1. Placeholders substituted with concrete values
|
|
275
|
+
// 2. An [Inputs] context block listing the inferred values
|
|
276
|
+
// 3. NO [Output Format] block (no downstream Command to infer
|
|
277
|
+
// an output contract from — the Prompt is terminal)
|
|
278
|
+
const downPrompt = readFileSync(downRecord, 'utf8');
|
|
279
|
+
expect(downPrompt).toContain('City is Berlin, id=3.');
|
|
280
|
+
expect(downPrompt).toContain('[Inputs]');
|
|
281
|
+
expect(downPrompt).toMatch(/city:\s*"Berlin"/);
|
|
282
|
+
expect(downPrompt).toMatch(/id:\s*3\b/);
|
|
283
|
+
expect(downPrompt).not.toContain('[Output Format]');
|
|
284
|
+
|
|
285
|
+
const downFinal = finalUpdateFor(events, 't.down')!;
|
|
286
|
+
if (downFinal.type !== 'task_update') throw new Error('expected update');
|
|
287
|
+
expect(downFinal.inputs).toEqual({ city: 'Berlin', id: 3 });
|
|
288
|
+
// No downstream Command → no inferred outputs → outputs stay null.
|
|
289
|
+
expect(downFinal.outputs).toBeFalsy();
|
|
290
|
+
} finally {
|
|
291
|
+
rmSync(dir, { recursive: true, force: true });
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
test('command → prompt → command: prompt relays structured data both directions', async () => {
|
|
296
|
+
const dir = makeDir();
|
|
297
|
+
try {
|
|
298
|
+
const mockScript = writeMockDriverScript(dir);
|
|
299
|
+
const emit = writeEmitScript(dir, 'emit', { city: 'Paris' });
|
|
300
|
+
const echo = writeEchoArgsScript(dir, 'echo');
|
|
301
|
+
const midRecord = join(dir, 'mid.prompt');
|
|
302
|
+
const responses: Record<string, Record<string, unknown>> = {
|
|
303
|
+
mid: { greeting: 'Bonjour Paris' },
|
|
304
|
+
};
|
|
305
|
+
const records: Record<string, string> = { mid: midRecord };
|
|
306
|
+
|
|
307
|
+
// `mid` is a Prompt between two Commands. Its inferred inputs
|
|
308
|
+
// come from `up` (city), its inferred outputs come from `down`
|
|
309
|
+
// (greeting). No ports declared on `mid`.
|
|
310
|
+
const config = pipeline([
|
|
311
|
+
task({
|
|
312
|
+
id: 'up',
|
|
313
|
+
command: `node "${emit}"`,
|
|
314
|
+
ports: { outputs: [{ name: 'city', type: 'string' }] } as TaskPorts,
|
|
315
|
+
}),
|
|
316
|
+
task({
|
|
317
|
+
id: 'mid',
|
|
318
|
+
depends_on: ['up'],
|
|
319
|
+
prompt: 'Generate a greeting for {{inputs.city}}.',
|
|
320
|
+
driver: 'mock-echo',
|
|
321
|
+
}),
|
|
322
|
+
task({
|
|
323
|
+
id: 'down',
|
|
324
|
+
depends_on: ['mid'],
|
|
325
|
+
command: `node "${echo}" "{{inputs.greeting}}"`,
|
|
326
|
+
ports: {
|
|
327
|
+
inputs: [{ name: 'greeting', type: 'string', required: true }],
|
|
328
|
+
} as TaskPorts,
|
|
329
|
+
}),
|
|
330
|
+
]);
|
|
331
|
+
|
|
332
|
+
const registry = registryWithMock(mockScript, { responses, records });
|
|
333
|
+
const { events, success } = await run(config, dir, registry);
|
|
334
|
+
expect(success).toBe(true);
|
|
335
|
+
|
|
336
|
+
// Middle prompt has both [Inputs] (from upstream) and
|
|
337
|
+
// [Output Format] (from downstream) — inferred in both directions.
|
|
338
|
+
const midPrompt = readFileSync(midRecord, 'utf8');
|
|
339
|
+
expect(midPrompt).toContain('[Inputs]');
|
|
340
|
+
expect(midPrompt).toMatch(/city:\s*"Paris"/);
|
|
341
|
+
expect(midPrompt).toContain('[Output Format]');
|
|
342
|
+
expect(midPrompt).toContain('greeting');
|
|
343
|
+
expect(midPrompt).toContain('Generate a greeting for Paris.');
|
|
344
|
+
|
|
345
|
+
const midFinal = finalUpdateFor(events, 't.mid')!;
|
|
346
|
+
if (midFinal.type !== 'task_update') throw new Error('expected update');
|
|
347
|
+
expect(midFinal.inputs).toEqual({ city: 'Paris' });
|
|
348
|
+
expect(midFinal.outputs).toEqual({ greeting: 'Bonjour Paris' });
|
|
349
|
+
|
|
350
|
+
const downFinal = finalUpdateFor(events, 't.down')!;
|
|
351
|
+
if (downFinal.type !== 'task_update') throw new Error('expected update');
|
|
352
|
+
expect((downFinal.stdout ?? '').trim()).toBe('Bonjour Paris');
|
|
353
|
+
} finally {
|
|
354
|
+
rmSync(dir, { recursive: true, force: true });
|
|
355
|
+
}
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
test('prompt → prompt: no structured port flow, free text only', async () => {
|
|
359
|
+
const dir = makeDir();
|
|
360
|
+
try {
|
|
361
|
+
const mockScript = writeMockDriverScript(dir);
|
|
362
|
+
const downRecord = join(dir, 'down.prompt');
|
|
363
|
+
const responses: Record<string, Record<string, unknown>> = {
|
|
364
|
+
up: { city: 'Tokyo' },
|
|
365
|
+
down: { greeting: 'hello Tokyo' },
|
|
366
|
+
};
|
|
367
|
+
const records: Record<string, string> = { down: downRecord };
|
|
368
|
+
|
|
369
|
+
// Neither Prompt has a Command neighbor in either direction, so
|
|
370
|
+
// both have empty inferred ports. `up`'s JSON final line is NOT
|
|
371
|
+
// extracted (no inferred outputs); `down` does NOT see `[Inputs]`
|
|
372
|
+
// or `[Output Format]`. Information between them flows only
|
|
373
|
+
// through continue_from / free text — and the downstream's
|
|
374
|
+
// `{{inputs.city}}` is an author error the engine logs as
|
|
375
|
+
// "placeholder rendered empty".
|
|
376
|
+
const config = pipeline([
|
|
377
|
+
task({
|
|
378
|
+
id: 'up',
|
|
379
|
+
prompt: 'Pick a city.',
|
|
380
|
+
driver: 'mock-echo',
|
|
381
|
+
}),
|
|
382
|
+
task({
|
|
383
|
+
id: 'down',
|
|
384
|
+
depends_on: ['up'],
|
|
385
|
+
prompt: 'Greet the city.',
|
|
386
|
+
driver: 'mock-echo',
|
|
387
|
+
}),
|
|
388
|
+
]);
|
|
389
|
+
|
|
390
|
+
const registry = registryWithMock(mockScript, { responses, records });
|
|
391
|
+
const { events, success } = await run(config, dir, registry);
|
|
392
|
+
expect(success).toBe(true);
|
|
393
|
+
expect(finalStatusFrom(events, 't.up')).toBe('success');
|
|
394
|
+
expect(finalStatusFrom(events, 't.down')).toBe('success');
|
|
395
|
+
|
|
396
|
+
// No inferred outputs on either side.
|
|
397
|
+
const upFinal = finalUpdateFor(events, 't.up')!;
|
|
398
|
+
if (upFinal.type !== 'task_update') throw new Error('expected update');
|
|
399
|
+
expect(upFinal.outputs).toBeFalsy();
|
|
400
|
+
|
|
401
|
+
// Down's prompt has no [Inputs] / [Output Format] blocks.
|
|
402
|
+
const downPrompt = readFileSync(downRecord, 'utf8');
|
|
403
|
+
expect(downPrompt).not.toContain('[Inputs]');
|
|
404
|
+
expect(downPrompt).not.toContain('[Output Format]');
|
|
405
|
+
|
|
406
|
+
const downFinal = finalUpdateFor(events, 't.down')!;
|
|
407
|
+
if (downFinal.type !== 'task_update') throw new Error('expected update');
|
|
408
|
+
expect(downFinal.inputs).toEqual({});
|
|
409
|
+
expect(downFinal.outputs).toBeFalsy();
|
|
410
|
+
} finally {
|
|
411
|
+
rmSync(dir, { recursive: true, force: true });
|
|
412
|
+
}
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
test('prompt with two upstream Commands exporting the same name → blocked', async () => {
|
|
416
|
+
const dir = makeDir();
|
|
417
|
+
try {
|
|
418
|
+
const mockScript = writeMockDriverScript(dir);
|
|
419
|
+
const emitA = writeEmitScript(dir, 'emitA', { val: 'from-a' });
|
|
420
|
+
const emitB = writeEmitScript(dir, 'emitB', { val: 'from-b' });
|
|
421
|
+
const responses: Record<string, Record<string, unknown>> = {};
|
|
422
|
+
const records: Record<string, string> = {};
|
|
423
|
+
|
|
424
|
+
const config = pipeline([
|
|
425
|
+
task({
|
|
426
|
+
id: 'a',
|
|
427
|
+
command: `node "${emitA}"`,
|
|
428
|
+
ports: { outputs: [{ name: 'val', type: 'string' }] } as TaskPorts,
|
|
429
|
+
}),
|
|
430
|
+
task({
|
|
431
|
+
id: 'b',
|
|
432
|
+
command: `node "${emitB}"`,
|
|
433
|
+
ports: { outputs: [{ name: 'val', type: 'string' }] } as TaskPorts,
|
|
434
|
+
}),
|
|
435
|
+
task({
|
|
436
|
+
id: 'down',
|
|
437
|
+
depends_on: ['a', 'b'],
|
|
438
|
+
prompt: 'Use {{inputs.val}}',
|
|
439
|
+
driver: 'mock-echo',
|
|
440
|
+
}),
|
|
441
|
+
]);
|
|
442
|
+
|
|
443
|
+
const registry = registryWithMock(mockScript, { responses, records });
|
|
444
|
+
const { events } = await run(config, dir, registry);
|
|
445
|
+
expect(finalStatusFrom(events, 't.down')).toBe('blocked');
|
|
446
|
+
const downFinal = finalUpdateFor(events, 't.down');
|
|
447
|
+
if (downFinal?.type === 'task_update') {
|
|
448
|
+
expect(downFinal.stderr ?? '').toMatch(/cannot disambiguate|produced by multiple upstream/i);
|
|
449
|
+
}
|
|
450
|
+
} finally {
|
|
451
|
+
rmSync(dir, { recursive: true, force: true });
|
|
452
|
+
}
|
|
453
|
+
});
|
|
454
|
+
|
|
455
|
+
test('prompt with two downstream Commands disagreeing on input type → blocked', async () => {
|
|
456
|
+
const dir = makeDir();
|
|
457
|
+
try {
|
|
458
|
+
const mockScript = writeMockDriverScript(dir);
|
|
459
|
+
const echo1 = writeEchoArgsScript(dir, 'echo1');
|
|
460
|
+
const echo2 = writeEchoArgsScript(dir, 'echo2');
|
|
461
|
+
const responses: Record<string, Record<string, unknown>> = {};
|
|
462
|
+
const records: Record<string, string> = {};
|
|
463
|
+
|
|
464
|
+
const config = pipeline([
|
|
465
|
+
task({
|
|
466
|
+
id: 'mid',
|
|
467
|
+
prompt: 'produce a date',
|
|
468
|
+
driver: 'mock-echo',
|
|
469
|
+
}),
|
|
470
|
+
task({
|
|
471
|
+
id: 'd1',
|
|
472
|
+
depends_on: ['mid'],
|
|
473
|
+
command: `node "${echo1}" "{{inputs.date}}"`,
|
|
474
|
+
ports: {
|
|
475
|
+
inputs: [{ name: 'date', type: 'string', required: true }],
|
|
476
|
+
} as TaskPorts,
|
|
477
|
+
}),
|
|
478
|
+
task({
|
|
479
|
+
id: 'd2',
|
|
480
|
+
depends_on: ['mid'],
|
|
481
|
+
command: `node "${echo2}" "{{inputs.date}}"`,
|
|
482
|
+
ports: {
|
|
483
|
+
inputs: [{ name: 'date', type: 'number', required: true }],
|
|
484
|
+
} as TaskPorts,
|
|
485
|
+
}),
|
|
486
|
+
]);
|
|
487
|
+
|
|
488
|
+
const registry = registryWithMock(mockScript, { responses, records });
|
|
489
|
+
const { events } = await run(config, dir, registry);
|
|
490
|
+
expect(finalStatusFrom(events, 't.mid')).toBe('blocked');
|
|
491
|
+
const midFinal = finalUpdateFor(events, 't.mid');
|
|
492
|
+
if (midFinal?.type === 'task_update') {
|
|
493
|
+
expect(midFinal.stderr ?? '').toMatch(/conflicting type requirements|conflicting output/i);
|
|
494
|
+
}
|
|
495
|
+
} finally {
|
|
496
|
+
rmSync(dir, { recursive: true, force: true });
|
|
497
|
+
}
|
|
498
|
+
});
|
|
499
|
+
});
|
package/src/engine.ts
CHANGED
|
@@ -30,7 +30,13 @@ import {
|
|
|
30
30
|
renderInputsBlock,
|
|
31
31
|
renderOutputSchemaBlock,
|
|
32
32
|
} from './prompt-doc';
|
|
33
|
-
import {
|
|
33
|
+
import {
|
|
34
|
+
extractTaskOutputs,
|
|
35
|
+
inferPromptPorts,
|
|
36
|
+
resolveTaskInputs,
|
|
37
|
+
substituteInputs,
|
|
38
|
+
} from './ports';
|
|
39
|
+
import type { TaskPorts } from './types';
|
|
34
40
|
import {
|
|
35
41
|
executeHook,
|
|
36
42
|
buildPipelineStartContext,
|
|
@@ -393,6 +399,20 @@ export async function runPipeline(
|
|
|
393
399
|
// just before a task runs, so every subsequent task_update event can
|
|
394
400
|
// echo them to the UI without re-resolving.
|
|
395
401
|
const resolvedInputsMap = new Map<string, Readonly<Record<string, unknown>>>();
|
|
402
|
+
// Reverse adjacency: for each task, list the direct-downstream task ids
|
|
403
|
+
// (tasks whose `depends_on` includes this one after DAG qualification).
|
|
404
|
+
// Computed once up front so Prompt-task port inference — which needs
|
|
405
|
+
// "what Commands directly consume me?" — is O(1) instead of O(tasks)
|
|
406
|
+
// per Prompt start. `dag.nodes` only exposes forward edges via
|
|
407
|
+
// `dependsOn`, so we build this locally.
|
|
408
|
+
const directDownstreams = new Map<string, string[]>();
|
|
409
|
+
for (const [id] of dag.nodes) directDownstreams.set(id, []);
|
|
410
|
+
for (const [id, node] of dag.nodes) {
|
|
411
|
+
for (const upstream of node.dependsOn) {
|
|
412
|
+
const list = directDownstreams.get(upstream);
|
|
413
|
+
if (list) list.push(id);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
396
416
|
|
|
397
417
|
// Pipeline timeout + abort reason tracking.
|
|
398
418
|
//
|
|
@@ -753,7 +773,83 @@ export async function runPipeline(
|
|
|
753
773
|
// Resolution runs even for tasks that declare no ports — the call
|
|
754
774
|
// is cheap and returns `{kind: 'ready', inputs: {}}` in that case,
|
|
755
775
|
// which downstream code handles uniformly.
|
|
756
|
-
|
|
776
|
+
//
|
|
777
|
+
// Prompt Tasks have no declared ports — their I/O contract is
|
|
778
|
+
// inferred from direct-neighbor Command Tasks (see ports.ts:
|
|
779
|
+
// `inferPromptPorts`). We synthesize a `TaskPorts` object and
|
|
780
|
+
// feed it into the same resolve/substitute/render/extract
|
|
781
|
+
// pipeline the Command path uses. Collisions that a Prompt can't
|
|
782
|
+
// disambiguate (same input name on two upstreams, incompatible
|
|
783
|
+
// downstream output types) block the task with a clear message.
|
|
784
|
+
const isPromptTask = task.prompt !== undefined && task.command === undefined;
|
|
785
|
+
let effectivePorts: TaskPorts | undefined = task.ports;
|
|
786
|
+
let promptInferenceBlockReason: string | null = null;
|
|
787
|
+
|
|
788
|
+
if (isPromptTask) {
|
|
789
|
+
const inference = inferPromptPorts({
|
|
790
|
+
upstreams: node.dependsOn.map((upstreamId) => {
|
|
791
|
+
const upstream = dag.nodes.get(upstreamId);
|
|
792
|
+
const isUpstreamCommand = !!upstream?.task.command;
|
|
793
|
+
return {
|
|
794
|
+
taskId: upstreamId,
|
|
795
|
+
outputs: isUpstreamCommand ? upstream?.task.ports?.outputs : undefined,
|
|
796
|
+
};
|
|
797
|
+
}),
|
|
798
|
+
downstreams: (directDownstreams.get(taskId) ?? []).map((downstreamId) => {
|
|
799
|
+
const downstream = dag.nodes.get(downstreamId);
|
|
800
|
+
const isDownstreamCommand = !!downstream?.task.command;
|
|
801
|
+
return {
|
|
802
|
+
taskId: downstreamId,
|
|
803
|
+
inputs: isDownstreamCommand ? downstream?.task.ports?.inputs : undefined,
|
|
804
|
+
};
|
|
805
|
+
}),
|
|
806
|
+
});
|
|
807
|
+
effectivePorts = inference.ports;
|
|
808
|
+
if (inference.inputConflicts.length > 0 || inference.outputConflicts.length > 0) {
|
|
809
|
+
const lines: string[] = [];
|
|
810
|
+
for (const c of inference.inputConflicts) lines.push(c.reason);
|
|
811
|
+
for (const c of inference.outputConflicts) lines.push(c.reason);
|
|
812
|
+
promptInferenceBlockReason = lines.join('\n');
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
if (promptInferenceBlockReason !== null) {
|
|
817
|
+
log.error(
|
|
818
|
+
`[task:${taskId}]`,
|
|
819
|
+
`blocked — prompt port inference failed:\n${promptInferenceBlockReason}`,
|
|
820
|
+
);
|
|
821
|
+
state.result = {
|
|
822
|
+
exitCode: -1,
|
|
823
|
+
stdout: '',
|
|
824
|
+
stderr: `[engine] prompt port inference failed:\n${promptInferenceBlockReason}`,
|
|
825
|
+
stdoutPath: null,
|
|
826
|
+
stderrPath: null,
|
|
827
|
+
durationMs: 0,
|
|
828
|
+
sessionId: null,
|
|
829
|
+
normalizedOutput: null,
|
|
830
|
+
failureKind: 'spawn_error',
|
|
831
|
+
outputs: null,
|
|
832
|
+
};
|
|
833
|
+
state.finishedAt = nowISO();
|
|
834
|
+
setTaskStatus(taskId, 'blocked');
|
|
835
|
+
try {
|
|
836
|
+
await fireHook(taskId, 'task_failure');
|
|
837
|
+
} catch (hookErr) {
|
|
838
|
+
log.error(
|
|
839
|
+
`[task:${taskId}]`,
|
|
840
|
+
`hook execution failed: ${hookErr instanceof Error ? hookErr.message : String(hookErr)}`,
|
|
841
|
+
);
|
|
842
|
+
}
|
|
843
|
+
if (getOnFailure(taskId) === 'stop_all') applyStopAll(node.track.id);
|
|
844
|
+
return;
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
// Feed effective ports into `resolveTaskInputs` by shallow-cloning
|
|
848
|
+
// the task. Prompt tasks get the inferred ports; Command tasks are
|
|
849
|
+
// unchanged (effectivePorts === task.ports).
|
|
850
|
+
const taskForResolve: TaskConfig =
|
|
851
|
+
effectivePorts === task.ports ? task : { ...task, ports: effectivePorts };
|
|
852
|
+
const inputResolution = resolveTaskInputs(taskForResolve, outputValuesMap, node.dependsOn);
|
|
757
853
|
if (inputResolution.kind === 'blocked') {
|
|
758
854
|
log.error(
|
|
759
855
|
`[task:${taskId}]`,
|
|
@@ -792,10 +888,11 @@ export async function runPipeline(
|
|
|
792
888
|
`optional inputs unresolved (empty in placeholders): ${inputResolution.missingOptional.join(', ')}`,
|
|
793
889
|
);
|
|
794
890
|
}
|
|
795
|
-
if (
|
|
891
|
+
if (effectivePorts?.inputs && effectivePorts.inputs.length > 0) {
|
|
796
892
|
log.debug(
|
|
797
893
|
`[task:${taskId}]`,
|
|
798
|
-
`resolved inputs: ${JSON.stringify(resolvedInputs)}
|
|
894
|
+
`resolved inputs: ${JSON.stringify(resolvedInputs)}` +
|
|
895
|
+
(isPromptTask ? ' (inferred from upstream Commands)' : ''),
|
|
799
896
|
);
|
|
800
897
|
}
|
|
801
898
|
|
|
@@ -888,11 +985,11 @@ export async function runPipeline(
|
|
|
888
985
|
// matters: [Output Format] first (sets the deliverable), then
|
|
889
986
|
// [Inputs] (the concrete data to operate on). Empty blocks are
|
|
890
987
|
// filtered out — tasks without ports get no extra blocks at all.
|
|
891
|
-
const outputFormatBlock = renderOutputSchemaBlock(
|
|
988
|
+
const outputFormatBlock = renderOutputSchemaBlock(effectivePorts?.outputs);
|
|
892
989
|
if (outputFormatBlock) {
|
|
893
990
|
doc = prependContext(doc, outputFormatBlock);
|
|
894
991
|
}
|
|
895
|
-
const inputsBlock = renderInputsBlock(
|
|
992
|
+
const inputsBlock = renderInputsBlock(effectivePorts?.inputs, resolvedInputs);
|
|
896
993
|
if (inputsBlock) {
|
|
897
994
|
doc = prependContext(doc, inputsBlock);
|
|
898
995
|
}
|
|
@@ -996,6 +1093,13 @@ export async function runPipeline(
|
|
|
996
1093
|
...task,
|
|
997
1094
|
prompt,
|
|
998
1095
|
continue_from: node.resolvedContinueFrom,
|
|
1096
|
+
// Hand the driver the EFFECTIVE port schema rather than the
|
|
1097
|
+
// raw task.ports. For Prompt tasks this is the one inferred
|
|
1098
|
+
// from neighbor Commands; Command tasks are unchanged.
|
|
1099
|
+
// Drivers that introspect ports (e.g. to annotate a system
|
|
1100
|
+
// prompt with the I/O contract) otherwise saw `undefined`
|
|
1101
|
+
// for every prompt and had no way to know the contract.
|
|
1102
|
+
ports: effectivePorts,
|
|
999
1103
|
};
|
|
1000
1104
|
const driverCtx: DriverContext = {
|
|
1001
1105
|
sessionMap,
|
|
@@ -1074,17 +1178,22 @@ export async function runPipeline(
|
|
|
1074
1178
|
// through driver-specific logs.
|
|
1075
1179
|
let extractedOutputs: Readonly<Record<string, unknown>> | null = null;
|
|
1076
1180
|
if (terminalStatus === 'success') {
|
|
1181
|
+
// Prompt tasks use inferred ports (from direct-downstream Command
|
|
1182
|
+
// inputs); Command tasks use their declared ports. Either way,
|
|
1183
|
+
// `extractTaskOutputs` is a no-op when there are no declared
|
|
1184
|
+
// outputs to pull, so pre-ports tasks pay nothing for this call.
|
|
1077
1185
|
const extraction = extractTaskOutputs(
|
|
1078
|
-
|
|
1186
|
+
effectivePorts,
|
|
1079
1187
|
result.stdout,
|
|
1080
1188
|
result.normalizedOutput,
|
|
1081
1189
|
);
|
|
1082
|
-
if (
|
|
1190
|
+
if (effectivePorts?.outputs && effectivePorts.outputs.length > 0) {
|
|
1083
1191
|
extractedOutputs = extraction.outputs;
|
|
1084
1192
|
outputValuesMap.set(taskId, extraction.outputs);
|
|
1085
1193
|
log.debug(
|
|
1086
1194
|
`[task:${taskId}]`,
|
|
1087
|
-
`extracted outputs: ${JSON.stringify(extraction.outputs)}
|
|
1195
|
+
`extracted outputs: ${JSON.stringify(extraction.outputs)}` +
|
|
1196
|
+
(isPromptTask ? ' (inferred from downstream Commands)' : ''),
|
|
1088
1197
|
);
|
|
1089
1198
|
if (extraction.diagnostic) {
|
|
1090
1199
|
log.error(`[task:${taskId}]`, extraction.diagnostic);
|