@ryanfw/prompt-orchestration-pipeline 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/docs/pop-task-guide.md +44 -0
- package/package.json +3 -2
- package/src/core/__tests__/agent-step.test.ts +83 -35
- package/src/core/__tests__/task-runner.test.ts +48 -0
- package/src/core/agent-step.ts +77 -40
- package/src/core/agent-types.ts +58 -0
- package/src/core/orchestrator.ts +2 -1
- package/src/core/pipeline-definition.ts +1 -1
- package/src/core/task-runner.ts +19 -0
- package/src/core/validation.ts +1 -1
- package/src/harness/__tests__/discovery.test.ts +183 -0
- package/src/harness/discovery.ts +99 -0
- package/src/harness/index.ts +22 -0
- package/src/harness/mcp-io-server.ts +1 -1
- package/src/ui/dist/assets/{index-D7hzshSS.js → index-CbS3OsW7.js} +115 -0
- package/src/ui/dist/assets/index-CbS3OsW7.js.map +1 -0
- package/src/ui/dist/index.html +1 -1
- package/src/ui/embedded-assets.js +6 -6
- package/src/ui/pages/Code.tsx +135 -0
- package/src/harness/__tests__/descriptors.test.ts +0 -378
- package/src/harness/__tests__/executor.test.ts +0 -193
- package/src/harness/__tests__/resolve.test.ts +0 -200
- package/src/harness/__tests__/types.test.ts +0 -297
- package/src/harness/descriptors/claude.ts +0 -132
- package/src/harness/descriptors/codex.ts +0 -126
- package/src/harness/descriptors/index.ts +0 -10
- package/src/harness/descriptors/opencode.ts +0 -147
- package/src/harness/executor.ts +0 -128
- package/src/harness/resolve.ts +0 -176
- package/src/harness/types.ts +0 -100
- package/src/ui/dist/assets/index-D7hzshSS.js.map +0 -1
package/README.md
CHANGED
|
@@ -50,6 +50,7 @@ Switch models globally or per-task without rewriting your logic.
|
|
|
50
50
|
* **Moonshot** (Kimi)
|
|
51
51
|
* **Zhipu** (GLM-4)
|
|
52
52
|
* **Claude Code** (CLI integration)
|
|
53
|
+
* **CLI Agents**: Tasks can also drive tool-using CLI coding agents (Claude, Codex, OpenCode) via the injected `runAgent()` helper — for file-aware, multi-turn work alongside single LLM calls. See the [Task Development Guide](docs/pop-task-guide.md#agent-api).
|
|
53
54
|
|
|
54
55
|
---
|
|
55
56
|
|
package/docs/pop-task-guide.md
CHANGED
|
@@ -192,6 +192,49 @@ const response = await llm.deepseek.chat({
|
|
|
192
192
|
|
|
193
193
|
---
|
|
194
194
|
|
|
195
|
+
## Agent API
|
|
196
|
+
|
|
197
|
+
Available via the `runAgent` function passed to stages. It runs a CLI coding
|
|
198
|
+
agent (the harness adapter) from inside a standard JavaScript task — the same
|
|
199
|
+
machinery behind pipeline `agent:` entries, but callable mid-task with a prompt
|
|
200
|
+
you build programmatically from upstream data.
|
|
201
|
+
|
|
202
|
+
```js
|
|
203
|
+
export const inference = async ({ runAgent, io, data, flags }) => {
|
|
204
|
+
const result = await runAgent({
|
|
205
|
+
harness: "claude", // "claude" | "codex" | "opencode"
|
|
206
|
+
prompt: "Read 'context.md', then write a summary to 'summary.md'.",
|
|
207
|
+
// model?: string // optional, passed through to the CLI
|
|
208
|
+
// io?: boolean // default true: bridge POP read/write artifacts
|
|
209
|
+
// timeoutMs?: number // optional wall-clock cap
|
|
210
|
+
// captureDiff?: boolean // capture a git diff as 'agent.patch'
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
if (!result.ok) {
|
|
214
|
+
throw new Error(`Agent failed: ${result.error}`);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// result: { ok, finalMessage, artifactsWritten, usage?, costUsd?, sessionId? }
|
|
218
|
+
const summary = await io.readArtifact("summary.md");
|
|
219
|
+
return { output: { summary }, flags };
|
|
220
|
+
};
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
By default (`io` is `true`) the agent shares the task's file I/O: it can call the
|
|
224
|
+
`read_artifact` / `write_artifact` tools to read and write the same artifacts the
|
|
225
|
+
task sees, and its `agent-result.md` is written automatically. Token usage and
|
|
226
|
+
cost flow into the job status like any other LLM call.
|
|
227
|
+
|
|
228
|
+
**`runAgent` vs `llm`**: use `llm.<provider>.chat()` for a single request/response
|
|
229
|
+
LLM call; use `runAgent()` when you need a tool-using CLI agent that reads and
|
|
230
|
+
writes files over multiple turns.
|
|
231
|
+
|
|
232
|
+
**`runAgent` vs an `agent:` pipeline entry**: an `agent:` entry takes a static
|
|
233
|
+
prompt from `pipeline.json`. `runAgent()` lets a JavaScript task compose the
|
|
234
|
+
prompt from seed/stage data and post-process the result in later stages.
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
195
238
|
## Validation API
|
|
196
239
|
|
|
197
240
|
Available via `validators` object in stages that need schema validation.
|
|
@@ -320,6 +363,7 @@ Each stage receives:
|
|
|
320
363
|
{
|
|
321
364
|
io, // File I/O (may be null)
|
|
322
365
|
llm, // LLM client
|
|
366
|
+
runAgent, // Run a CLI agent harness (see Agent API)
|
|
323
367
|
validators, // { validateWithSchema }
|
|
324
368
|
flags, // Control flags
|
|
325
369
|
meta: { taskName, workDir, jobId },
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ryanfw/prompt-orchestration-pipeline",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.1",
|
|
4
4
|
"description": "A Prompt-orchestration pipeline (POP) is a framework for building, running, and experimenting with complex chains of LLM tasks.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/ui/server/index.ts",
|
|
@@ -72,7 +72,8 @@
|
|
|
72
72
|
"tslib": "^2.8.1",
|
|
73
73
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
74
74
|
"@opencode-ai/sdk": "^1.17.4",
|
|
75
|
-
"zod": "^3.25.0"
|
|
75
|
+
"zod": "^3.25.0",
|
|
76
|
+
"local-llm-cli-adapter": "github:ryan-mahoney/local-llm-cli-adapter#2ea1aa2d8e8dbe43eb845eb4730b08a02618f476"
|
|
76
77
|
},
|
|
77
78
|
"devDependencies": {
|
|
78
79
|
"@eslint/js": "^9.37.0",
|
|
@@ -5,11 +5,11 @@ import { tmpdir } from "node:os";
|
|
|
5
5
|
import type { WriteOptions, TaskFileIO } from "../file-io.ts";
|
|
6
6
|
import type { McpIoServerHandle } from "../../harness/mcp-io-server.ts";
|
|
7
7
|
import type {
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
RunResult,
|
|
9
|
+
HarnessRun,
|
|
10
10
|
HarnessEvent,
|
|
11
|
-
} from "../../harness/
|
|
12
|
-
import { runAgentStep } from "../agent-step.ts";
|
|
11
|
+
} from "../../harness/index.ts";
|
|
12
|
+
import { runAgentStep, executeAgent } from "../agent-step.ts";
|
|
13
13
|
import { createTaskFileIO } from "../file-io.ts";
|
|
14
14
|
|
|
15
15
|
function createFakeIO(): TaskFileIO & { calls: string[] } {
|
|
@@ -84,26 +84,33 @@ function makeArgs(overrides?: {
|
|
|
84
84
|
};
|
|
85
85
|
}
|
|
86
86
|
|
|
87
|
-
function
|
|
87
|
+
function makeFakeHarnessRun(result: RunResult | Error): HarnessRun {
|
|
88
|
+
const resolved =
|
|
89
|
+
result instanceof Error
|
|
90
|
+
? Promise.reject(result)
|
|
91
|
+
: Promise.resolve(result);
|
|
92
|
+
return {
|
|
93
|
+
result: resolved,
|
|
94
|
+
sessionId: Promise.resolve("sess-1"),
|
|
95
|
+
abort() {},
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function makeSuccessResult(overrides?: Partial<RunResult>): RunResult {
|
|
88
100
|
return {
|
|
89
101
|
finalMessage: "task complete",
|
|
90
|
-
events: [],
|
|
91
102
|
exitCode: 0,
|
|
92
103
|
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
93
|
-
costUsd: 0.12,
|
|
94
104
|
sessionId: "sess-1",
|
|
95
105
|
...overrides,
|
|
96
106
|
};
|
|
97
107
|
}
|
|
98
108
|
|
|
99
|
-
function makeDeps(result:
|
|
100
|
-
const
|
|
101
|
-
if (result instanceof Error) throw result;
|
|
102
|
-
return result;
|
|
103
|
-
});
|
|
109
|
+
function makeDeps(result: RunResult | Error) {
|
|
110
|
+
const run = mock(() => makeFakeHarnessRun(result));
|
|
104
111
|
const startMcpIoServer = mock(async () => createFakeMcpHandle());
|
|
105
112
|
const createTaskFileIO = mock(() => createFakeIO());
|
|
106
|
-
return {
|
|
113
|
+
return { run, startMcpIoServer, createTaskFileIO };
|
|
107
114
|
}
|
|
108
115
|
|
|
109
116
|
function gitSync(args: string[], cwd: string): string {
|
|
@@ -119,13 +126,60 @@ function makeCaptureDeps(workDir: string) {
|
|
|
119
126
|
statusPath: join(workDir, "tasks-status.json"),
|
|
120
127
|
});
|
|
121
128
|
return {
|
|
122
|
-
|
|
129
|
+
run: mock(() => makeFakeHarnessRun(makeSuccessResult())),
|
|
123
130
|
startMcpIoServer: mock(async () => createFakeMcpHandle()),
|
|
124
131
|
createTaskFileIO: mock(() => io),
|
|
125
132
|
io,
|
|
126
133
|
};
|
|
127
134
|
}
|
|
128
135
|
|
|
136
|
+
describe("executeAgent", () => {
|
|
137
|
+
it("runs against a provided io and returns ok:true with merged artifacts", async () => {
|
|
138
|
+
const io = createFakeIO();
|
|
139
|
+
const run = mock(() => makeFakeHarnessRun(makeSuccessResult()));
|
|
140
|
+
const startMcpIoServer = mock(async () => createFakeMcpHandle(["explainer.md"]));
|
|
141
|
+
|
|
142
|
+
const result = await executeAgent(
|
|
143
|
+
{ io, entry: { name: "agent-explainer", harness: "claude", prompt: "do it" } },
|
|
144
|
+
{ run, startMcpIoServer },
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
expect(result.ok).toBe(true);
|
|
148
|
+
expect(result.finalMessage).toBe("task complete");
|
|
149
|
+
expect(result.artifactsWritten).toContain("explainer.md");
|
|
150
|
+
expect(result.artifactsWritten).toContain("agent-result.md");
|
|
151
|
+
expect(io.calls).toContain("writeArtifact:agent-result.md");
|
|
152
|
+
expect(startMcpIoServer).toHaveBeenCalled();
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("returns ok:false when the harness run rejects", async () => {
|
|
156
|
+
const io = createFakeIO();
|
|
157
|
+
const run = mock(() => makeFakeHarnessRun(new Error("spawn failed")));
|
|
158
|
+
const startMcpIoServer = mock(async () => createFakeMcpHandle());
|
|
159
|
+
|
|
160
|
+
const result = await executeAgent(
|
|
161
|
+
{ io, entry: { name: "agent-explainer", harness: "codex", prompt: "do it" } },
|
|
162
|
+
{ run, startMcpIoServer },
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
expect(result.ok).toBe(false);
|
|
166
|
+
expect(result.error).toBe("spawn failed");
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it("skips the MCP server when io is false", async () => {
|
|
170
|
+
const io = createFakeIO();
|
|
171
|
+
const run = mock(() => makeFakeHarnessRun(makeSuccessResult()));
|
|
172
|
+
const startMcpIoServer = mock(async () => createFakeMcpHandle());
|
|
173
|
+
|
|
174
|
+
await executeAgent(
|
|
175
|
+
{ io, entry: { name: "a", harness: "opencode", prompt: "p", io: false } },
|
|
176
|
+
{ run, startMcpIoServer },
|
|
177
|
+
);
|
|
178
|
+
|
|
179
|
+
expect(startMcpIoServer).not.toHaveBeenCalled();
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
129
183
|
describe("runAgentStep", () => {
|
|
130
184
|
it("success writes event log and agent-result.md and returns ok:true with usage/cost", async () => {
|
|
131
185
|
const deps = makeDeps(makeSuccessResult());
|
|
@@ -135,7 +189,7 @@ describe("runAgentStep", () => {
|
|
|
135
189
|
expect(result.ok).toBe(true);
|
|
136
190
|
expect(result.finalMessage).toBe("task complete");
|
|
137
191
|
expect(result.usage).toEqual({ inputTokens: 100, outputTokens: 50, totalTokens: 150 });
|
|
138
|
-
expect(result.costUsd).
|
|
192
|
+
expect(result.costUsd).toBeUndefined();
|
|
139
193
|
expect(result.sessionId).toBe("sess-1");
|
|
140
194
|
expect(result.artifactsWritten).toContain("agent-result.md");
|
|
141
195
|
expect(deps.startMcpIoServer).toHaveBeenCalled();
|
|
@@ -150,18 +204,18 @@ describe("runAgentStep", () => {
|
|
|
150
204
|
});
|
|
151
205
|
|
|
152
206
|
const events: HarnessEvent[] = [
|
|
153
|
-
{
|
|
154
|
-
{
|
|
207
|
+
{ harness: "claude", seq: 0, at: Date.now(), raw: { text: "hello" }, type: "assistant_message", text: "hello" },
|
|
208
|
+
{ harness: "claude", seq: 1, at: Date.now(), raw: { message: "done" }, type: "run_completed", result: makeSuccessResult() },
|
|
155
209
|
];
|
|
156
|
-
const
|
|
210
|
+
const run = mock((opts: { onEvent?: (event: HarnessEvent) => void }) => {
|
|
157
211
|
for (const event of events) {
|
|
158
212
|
opts.onEvent?.(event);
|
|
159
213
|
}
|
|
160
|
-
return makeSuccessResult();
|
|
214
|
+
return makeFakeHarnessRun(makeSuccessResult());
|
|
161
215
|
});
|
|
162
216
|
const startMcpIoServer = mock(async () => createFakeMcpHandle());
|
|
163
217
|
|
|
164
|
-
await runAgentStep(makeArgs(), {
|
|
218
|
+
await runAgentStep(makeArgs(), { run, startMcpIoServer, createTaskFileIO });
|
|
165
219
|
|
|
166
220
|
const logCalls = capturedIO!.calls.filter((c) => c.startsWith("writeLog:"));
|
|
167
221
|
expect(logCalls).toHaveLength(2);
|
|
@@ -177,15 +231,15 @@ describe("runAgentStep", () => {
|
|
|
177
231
|
});
|
|
178
232
|
|
|
179
233
|
let capturedPrompt: string | undefined;
|
|
180
|
-
const
|
|
234
|
+
const run = mock((opts: { prompt: string }) => {
|
|
181
235
|
capturedPrompt = opts.prompt;
|
|
182
|
-
return makeSuccessResult();
|
|
236
|
+
return makeFakeHarnessRun(makeSuccessResult());
|
|
183
237
|
});
|
|
184
238
|
const startMcpIoServer = mock(async () => createFakeMcpHandle());
|
|
185
239
|
|
|
186
240
|
const result = await runAgentStep(
|
|
187
241
|
makeArgs({ entry: { prompt: undefined, promptFrom: "my-prompt.md" } }),
|
|
188
|
-
{
|
|
242
|
+
{ run, startMcpIoServer, createTaskFileIO },
|
|
189
243
|
);
|
|
190
244
|
|
|
191
245
|
expect(result.ok).toBe(true);
|
|
@@ -197,9 +251,7 @@ describe("runAgentStep", () => {
|
|
|
197
251
|
const mcpHandle = createFakeMcpHandle();
|
|
198
252
|
const createTaskFileIO = mock(() => createFakeIO());
|
|
199
253
|
const deps = {
|
|
200
|
-
|
|
201
|
-
throw new Error("boom");
|
|
202
|
-
}),
|
|
254
|
+
run: mock(() => makeFakeHarnessRun(new Error("boom"))),
|
|
203
255
|
startMcpIoServer: mock(async () => mcpHandle),
|
|
204
256
|
createTaskFileIO,
|
|
205
257
|
};
|
|
@@ -216,7 +268,7 @@ describe("runAgentStep", () => {
|
|
|
216
268
|
const mcpHandle = createFakeMcpHandle();
|
|
217
269
|
const createTaskFileIO = mock(() => createFakeIO());
|
|
218
270
|
const deps = {
|
|
219
|
-
|
|
271
|
+
run: mock(() => makeFakeHarnessRun(makeSuccessResult())),
|
|
220
272
|
startMcpIoServer: mock(async () => mcpHandle),
|
|
221
273
|
createTaskFileIO,
|
|
222
274
|
};
|
|
@@ -229,9 +281,7 @@ describe("runAgentStep", () => {
|
|
|
229
281
|
const mcpHandle = createFakeMcpHandle();
|
|
230
282
|
const createTaskFileIO = mock(() => createFakeIO());
|
|
231
283
|
const deps = {
|
|
232
|
-
|
|
233
|
-
throw new Error("fail");
|
|
234
|
-
}),
|
|
284
|
+
run: mock(() => makeFakeHarnessRun(new Error("fail"))),
|
|
235
285
|
startMcpIoServer: mock(async () => mcpHandle),
|
|
236
286
|
createTaskFileIO,
|
|
237
287
|
};
|
|
@@ -244,9 +294,7 @@ describe("runAgentStep", () => {
|
|
|
244
294
|
const mcpHandle = createFakeMcpHandle();
|
|
245
295
|
const createTaskFileIO = mock(() => createFakeIO());
|
|
246
296
|
const deps = {
|
|
247
|
-
|
|
248
|
-
throw new Error('Harness "claude" timed out after 100ms');
|
|
249
|
-
}),
|
|
297
|
+
run: mock(() => makeFakeHarnessRun(new Error('Harness "claude" timed out after 100ms'))),
|
|
250
298
|
startMcpIoServer: mock(async () => mcpHandle),
|
|
251
299
|
createTaskFileIO,
|
|
252
300
|
};
|
|
@@ -258,7 +306,7 @@ describe("runAgentStep", () => {
|
|
|
258
306
|
it("does not start MCP server when io is false", async () => {
|
|
259
307
|
const createTaskFileIO = mock(() => createFakeIO());
|
|
260
308
|
const deps = {
|
|
261
|
-
|
|
309
|
+
run: mock(() => makeFakeHarnessRun(makeSuccessResult())),
|
|
262
310
|
startMcpIoServer: mock(async () => createFakeMcpHandle()),
|
|
263
311
|
createTaskFileIO,
|
|
264
312
|
};
|
|
@@ -284,7 +332,7 @@ describe("runAgentStep", () => {
|
|
|
284
332
|
const mcpHandle = createFakeMcpHandle(["custom-artifact.md", "agent-result.md"]);
|
|
285
333
|
const createTaskFileIO = mock(() => createFakeIO());
|
|
286
334
|
const deps = {
|
|
287
|
-
|
|
335
|
+
run: mock(() => makeFakeHarnessRun(makeSuccessResult())),
|
|
288
336
|
startMcpIoServer: mock(async () => mcpHandle),
|
|
289
337
|
createTaskFileIO,
|
|
290
338
|
};
|
|
@@ -211,3 +211,51 @@ describe("task-runner writes correct task-level state transitions", () => {
|
|
|
211
211
|
expect(task!.error).toBe("stage exploded");
|
|
212
212
|
});
|
|
213
213
|
});
|
|
214
|
+
|
|
215
|
+
describe("runPipeline runAgent injection", () => {
|
|
216
|
+
it("passes runAgent to stages and delegates to the supplied override", async () => {
|
|
217
|
+
const root = await makeTempRoot();
|
|
218
|
+
const workDir = path.join(root, "job-agent");
|
|
219
|
+
await mkdir(workDir, { recursive: true });
|
|
220
|
+
await writeFile(
|
|
221
|
+
path.join(workDir, "tasks-status.json"),
|
|
222
|
+
JSON.stringify({ id: "job-agent", tasks: {} }),
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
const modulePath = path.join(root, "agent-task.mjs");
|
|
226
|
+
await writeFile(
|
|
227
|
+
modulePath,
|
|
228
|
+
[
|
|
229
|
+
"export const ingestion = async ({ runAgent, flags }) => {",
|
|
230
|
+
" const r = await runAgent({ harness: 'claude', prompt: 'hello agent' });",
|
|
231
|
+
" return { output: r, flags };",
|
|
232
|
+
"};",
|
|
233
|
+
].join("\n"),
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
const calls: unknown[] = [];
|
|
237
|
+
const result = await runPipeline(modulePath, {
|
|
238
|
+
workDir,
|
|
239
|
+
taskName: "agent-task",
|
|
240
|
+
statusPath: path.join(workDir, "tasks-status.json"),
|
|
241
|
+
jobId: "job-agent",
|
|
242
|
+
envLoaded: true,
|
|
243
|
+
seed: { data: {} },
|
|
244
|
+
pipelineTasks: ["agent-task"],
|
|
245
|
+
llm: {} as never,
|
|
246
|
+
runAgent: async (options) => {
|
|
247
|
+
calls.push(options);
|
|
248
|
+
return { ok: true, finalMessage: "did it", artifactsWritten: ["x.md"] };
|
|
249
|
+
},
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
expect(result.ok).toBe(true);
|
|
253
|
+
expect(calls).toEqual([{ harness: "claude", prompt: "hello agent" }]);
|
|
254
|
+
if (result.ok) {
|
|
255
|
+
expect(result.context.data["ingestion"]).toMatchObject({
|
|
256
|
+
ok: true,
|
|
257
|
+
finalMessage: "did it",
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
});
|
|
261
|
+
});
|
package/src/core/agent-step.ts
CHANGED
|
@@ -3,14 +3,11 @@ import { existsSync } from "node:fs";
|
|
|
3
3
|
import { mkdir } from "node:fs/promises";
|
|
4
4
|
import { createTaskFileIO, generateLogName } from "./file-io.ts";
|
|
5
5
|
import { LogEvent, LogFileExtension } from "../config/log-events.ts";
|
|
6
|
-
import {
|
|
6
|
+
import { run } from "../harness/index.ts";
|
|
7
7
|
import { startMcpIoServer } from "../harness/mcp-io-server.ts";
|
|
8
8
|
import type { McpIoServerHandle } from "../harness/mcp-io-server.ts";
|
|
9
|
-
import type {
|
|
10
|
-
|
|
11
|
-
AgentStepResult,
|
|
12
|
-
HarnessEvent,
|
|
13
|
-
} from "../harness/types.ts";
|
|
9
|
+
import type { AgentEntryConfig, AgentStepResult } from "./agent-types.ts";
|
|
10
|
+
import type { RunOptions, HarnessRun, HarnessEvent } from "../harness/index.ts";
|
|
14
11
|
import type { TaskFileIO } from "./file-io.ts";
|
|
15
12
|
|
|
16
13
|
function gitSync(args: string[], cwd: string): { exitCode: number; stdout: string; stderr: string } {
|
|
@@ -65,78 +62,88 @@ async function captureDiff(io: TaskFileIO, cwd: string): Promise<boolean> {
|
|
|
65
62
|
}
|
|
66
63
|
}
|
|
67
64
|
|
|
68
|
-
|
|
65
|
+
/** Dependency seam shared by the agent runners (injected in tests). */
|
|
66
|
+
interface AgentRunDeps {
|
|
67
|
+
run?: typeof run;
|
|
68
|
+
startMcpIoServer?: typeof startMcpIoServer;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Runs a single CLI-agent harness step against an existing task `io`.
|
|
73
|
+
*
|
|
74
|
+
* This is the shared core behind both pipeline `agent:` entries (via
|
|
75
|
+
* {@link runAgentStep}) and the `runAgent()` helper injected into standard
|
|
76
|
+
* JavaScript task stages (via the task runner). The caller owns the `io`, so
|
|
77
|
+
* the agent reads and writes the same task artifacts the surrounding task sees.
|
|
78
|
+
*/
|
|
79
|
+
export async function executeAgent(
|
|
69
80
|
args: {
|
|
81
|
+
io: TaskFileIO;
|
|
70
82
|
entry: AgentEntryConfig & { name: string };
|
|
71
|
-
workDir: string;
|
|
72
|
-
statusPath: string;
|
|
73
|
-
jobId: string | undefined;
|
|
74
|
-
getStage: () => string;
|
|
75
|
-
},
|
|
76
|
-
deps?: {
|
|
77
|
-
runHarnessTask?: typeof runHarnessTask;
|
|
78
|
-
startMcpIoServer?: typeof startMcpIoServer;
|
|
79
|
-
createTaskFileIO?: typeof createTaskFileIO;
|
|
80
83
|
},
|
|
84
|
+
deps?: AgentRunDeps,
|
|
81
85
|
): Promise<AgentStepResult> {
|
|
82
|
-
const
|
|
86
|
+
const _run = deps?.run ?? run;
|
|
83
87
|
const _startMcpIoServer = deps?.startMcpIoServer ?? startMcpIoServer;
|
|
84
|
-
const
|
|
88
|
+
const { io, entry } = args;
|
|
85
89
|
|
|
86
|
-
const
|
|
87
|
-
workDir: args.workDir,
|
|
88
|
-
taskName: args.entry.name,
|
|
89
|
-
getStage: args.getStage,
|
|
90
|
-
statusPath: args.statusPath,
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
const cwd = args.entry.cwd ?? io.getTaskDir();
|
|
90
|
+
const cwd = entry.cwd ?? io.getTaskDir();
|
|
94
91
|
// The harness spawns with this cwd before any artifact is written, so the task
|
|
95
92
|
// dir may not exist yet — posix_spawn ENOENTs on a missing working directory.
|
|
96
93
|
await mkdir(cwd, { recursive: true });
|
|
97
94
|
|
|
98
95
|
let prompt: string;
|
|
99
|
-
if (
|
|
100
|
-
prompt =
|
|
101
|
-
} else if (
|
|
102
|
-
prompt = await io.readArtifact(
|
|
96
|
+
if (entry.prompt !== undefined) {
|
|
97
|
+
prompt = entry.prompt;
|
|
98
|
+
} else if (entry.promptFrom !== undefined) {
|
|
99
|
+
prompt = await io.readArtifact(entry.promptFrom);
|
|
103
100
|
} else {
|
|
104
101
|
throw new Error(
|
|
105
|
-
`Agent entry "${
|
|
102
|
+
`Agent entry "${entry.name}" must specify either "prompt" or "promptFrom"`,
|
|
106
103
|
);
|
|
107
104
|
}
|
|
108
105
|
|
|
109
106
|
let mcpHandle: McpIoServerHandle | undefined;
|
|
110
|
-
if (
|
|
107
|
+
if (entry.io !== false) {
|
|
111
108
|
mcpHandle = await _startMcpIoServer(io);
|
|
112
109
|
}
|
|
113
110
|
|
|
114
111
|
try {
|
|
115
112
|
const logName = generateLogName(
|
|
116
|
-
|
|
113
|
+
entry.name,
|
|
117
114
|
"agent",
|
|
118
115
|
LogEvent.DEBUG,
|
|
119
116
|
LogFileExtension.TEXT,
|
|
120
117
|
);
|
|
121
118
|
|
|
122
|
-
const
|
|
123
|
-
harness:
|
|
119
|
+
const options: RunOptions = {
|
|
120
|
+
harness: entry.harness,
|
|
124
121
|
prompt,
|
|
125
122
|
cwd,
|
|
126
|
-
model:
|
|
127
|
-
|
|
128
|
-
|
|
123
|
+
model: entry.model,
|
|
124
|
+
mcpServers: mcpHandle
|
|
125
|
+
? [{
|
|
126
|
+
name: "popio",
|
|
127
|
+
url: mcpHandle.connection.url,
|
|
128
|
+
headers: { Authorization: `Bearer ${mcpHandle.connection.token}` },
|
|
129
|
+
}]
|
|
130
|
+
: undefined,
|
|
131
|
+
timeoutMs: entry.timeoutMs,
|
|
132
|
+
permissionMode: "bypass",
|
|
129
133
|
onEvent: (event: HarnessEvent) => {
|
|
130
134
|
void io.writeLog(logName, JSON.stringify(event.raw) + "\n", {
|
|
131
135
|
mode: "append",
|
|
132
136
|
});
|
|
133
137
|
},
|
|
134
|
-
}
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const harnessRun: HarnessRun = _run(options);
|
|
141
|
+
const result = await harnessRun.result;
|
|
135
142
|
|
|
136
143
|
await io.writeArtifact("agent-result.md", result.finalMessage);
|
|
137
144
|
|
|
138
145
|
let patchWritten = false;
|
|
139
|
-
if (
|
|
146
|
+
if (entry.captureDiff) {
|
|
140
147
|
patchWritten = await captureDiff(io, cwd);
|
|
141
148
|
}
|
|
142
149
|
|
|
@@ -171,3 +178,33 @@ export async function runAgentStep(
|
|
|
171
178
|
}
|
|
172
179
|
}
|
|
173
180
|
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Runs a CLI-agent harness step for a pipeline `agent:` entry.
|
|
184
|
+
*
|
|
185
|
+
* Creates a task-scoped `io` from the run paths, then delegates to
|
|
186
|
+
* {@link executeAgent}. Used by the pipeline runner for `agent` entries.
|
|
187
|
+
*/
|
|
188
|
+
export async function runAgentStep(
|
|
189
|
+
args: {
|
|
190
|
+
entry: AgentEntryConfig & { name: string };
|
|
191
|
+
workDir: string;
|
|
192
|
+
statusPath: string;
|
|
193
|
+
jobId: string | undefined;
|
|
194
|
+
getStage: () => string;
|
|
195
|
+
},
|
|
196
|
+
deps?: AgentRunDeps & {
|
|
197
|
+
createTaskFileIO?: typeof createTaskFileIO;
|
|
198
|
+
},
|
|
199
|
+
): Promise<AgentStepResult> {
|
|
200
|
+
const _createTaskFileIO = deps?.createTaskFileIO ?? createTaskFileIO;
|
|
201
|
+
|
|
202
|
+
const io = _createTaskFileIO({
|
|
203
|
+
workDir: args.workDir,
|
|
204
|
+
taskName: args.entry.name,
|
|
205
|
+
getStage: args.getStage,
|
|
206
|
+
statusPath: args.statusPath,
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
return executeAgent({ io, entry: args.entry }, deps);
|
|
210
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { HarnessName, Usage } from "../harness/index.ts";
|
|
2
|
+
|
|
3
|
+
export interface McpServerConnection {
|
|
4
|
+
url: string;
|
|
5
|
+
token: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface AgentEntryConfig {
|
|
9
|
+
harness: HarnessName;
|
|
10
|
+
model?: string;
|
|
11
|
+
prompt?: string;
|
|
12
|
+
promptFrom?: string;
|
|
13
|
+
cwd?: string;
|
|
14
|
+
io?: boolean;
|
|
15
|
+
timeoutMs?: number;
|
|
16
|
+
captureDiff?: boolean;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface AgentStepResult {
|
|
20
|
+
ok: boolean;
|
|
21
|
+
finalMessage: string;
|
|
22
|
+
artifactsWritten: string[];
|
|
23
|
+
usage?: Usage;
|
|
24
|
+
costUsd?: number;
|
|
25
|
+
sessionId?: string;
|
|
26
|
+
error?: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Options for the `runAgent()` helper injected into standard JavaScript task
|
|
31
|
+
* stages. Mirrors {@link AgentEntryConfig} but requires an inline `prompt`
|
|
32
|
+
* (a task builds the prompt programmatically rather than reading it from an
|
|
33
|
+
* artifact via `promptFrom`).
|
|
34
|
+
*/
|
|
35
|
+
export interface TaskAgentOptions {
|
|
36
|
+
/** Which CLI agent to run: `"claude" | "codex" | "opencode"`. */
|
|
37
|
+
harness: HarnessName;
|
|
38
|
+
/** The instruction handed to the agent. */
|
|
39
|
+
prompt: string;
|
|
40
|
+
/** Optional model id passed through to the harness verbatim. */
|
|
41
|
+
model?: string;
|
|
42
|
+
/** Working directory for the agent. Defaults to the task directory. */
|
|
43
|
+
cwd?: string;
|
|
44
|
+
/**
|
|
45
|
+
* Bridge POP file I/O into the agent (read_artifact/write_artifact tools).
|
|
46
|
+
* Defaults to `true` so the agent shares the task's artifacts.
|
|
47
|
+
*/
|
|
48
|
+
io?: boolean;
|
|
49
|
+
/** Overall wall-clock cap in milliseconds. */
|
|
50
|
+
timeoutMs?: number;
|
|
51
|
+
/** Capture a git diff of the working tree as an `agent.patch` artifact. */
|
|
52
|
+
captureDiff?: boolean;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** The `runAgent()` function injected into JavaScript task stages. */
|
|
56
|
+
export type TaskAgentRunner = (
|
|
57
|
+
options: TaskAgentOptions,
|
|
58
|
+
) => Promise<AgentStepResult>;
|
package/src/core/orchestrator.ts
CHANGED
|
@@ -112,7 +112,8 @@ import { buildReexecArgs } from "../cli/self-reexec";
|
|
|
112
112
|
import { writeJobStatus } from "./status-writer";
|
|
113
113
|
import { initializeStatusFromArtifacts } from "./status-initializer";
|
|
114
114
|
import { materializeNormalizedPipelineDefinition } from "./pipeline-definition";
|
|
115
|
-
import {
|
|
115
|
+
import { discoverHarnesses } from "../harness/index.ts";
|
|
116
|
+
import { applyHarnessDiscovery } from "../harness/discovery.ts";
|
|
116
117
|
import {
|
|
117
118
|
listQueuedSeeds,
|
|
118
119
|
releaseJobSlot,
|