@themoltnet/pi-extension 0.13.5 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +228 -1
- package/dist/index.js +913 -188
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -2,17 +2,17 @@ import { createRequire } from "node:module";
|
|
|
2
2
|
import { execFileSync } from "node:child_process";
|
|
3
3
|
import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, statSync } from "node:fs";
|
|
4
4
|
import path, { join } from "node:path";
|
|
5
|
-
import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createWriteTool, createWriteToolDefinition, defineTool } from "@earendil-works/pi-coding-agent";
|
|
5
|
+
import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createSyntheticSourceInfo, createWriteTool, createWriteToolDefinition, defineTool, parseFrontmatter } from "@earendil-works/pi-coding-agent";
|
|
6
6
|
import { createHash } from "node:crypto";
|
|
7
7
|
import crypto, { createHash as createHash$1 } from "crypto";
|
|
8
8
|
import { readFile } from "node:fs/promises";
|
|
9
9
|
import { homedir } from "node:os";
|
|
10
10
|
import { Type, getModel } from "@earendil-works/pi-ai";
|
|
11
|
-
import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
|
|
11
|
+
import { MemoryProvider, RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
|
|
12
12
|
import { parseEnv } from "node:util";
|
|
13
13
|
import { SpanStatusCode, context, metrics, trace } from "@opentelemetry/api";
|
|
14
|
-
import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
|
|
15
14
|
import { Value } from "@sinclair/typebox/value";
|
|
15
|
+
import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
|
|
16
16
|
//#region \0rolldown/runtime.js
|
|
17
17
|
var __defProp = Object.defineProperty;
|
|
18
18
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
@@ -2424,13 +2424,31 @@ function problemToError(problem, statusCode) {
|
|
|
2424
2424
|
//#endregion
|
|
2425
2425
|
//#region ../sdk/src/agent-context.ts
|
|
2426
2426
|
function unwrapResult(result) {
|
|
2427
|
-
if (result.error) {
|
|
2427
|
+
if (result.error !== void 0 && result.error !== null) {
|
|
2428
2428
|
const error = result.error;
|
|
2429
|
-
throw problemToError(error, error.status
|
|
2429
|
+
if (isProblemDetails(error)) throw problemToError(error, error.status);
|
|
2430
|
+
if (error instanceof Error && result.response === void 0) {
|
|
2431
|
+
const networkError = new NetworkError(error.message, { detail: error.cause ? stringifyUnknown(error.cause) : void 0 });
|
|
2432
|
+
networkError.stack = error.stack;
|
|
2433
|
+
throw networkError;
|
|
2434
|
+
}
|
|
2435
|
+
throw new MoltNetError(`Unexpected error from MoltNet API: ${stringifyUnknown(error)}`, { code: "UNKNOWN" });
|
|
2430
2436
|
}
|
|
2431
2437
|
if (result.data === void 0) throw new MoltNetError("Unexpected empty response from MoltNet API", { code: "EMPTY_RESPONSE" });
|
|
2432
2438
|
return result.data;
|
|
2433
2439
|
}
|
|
2440
|
+
function isProblemDetails(error) {
|
|
2441
|
+
if (!error || typeof error !== "object") return false;
|
|
2442
|
+
return typeof error.status === "number" && ("title" in error || "detail" in error);
|
|
2443
|
+
}
|
|
2444
|
+
function stringifyUnknown(value) {
|
|
2445
|
+
if (value instanceof Error) return `${value.name}: ${value.message}`;
|
|
2446
|
+
try {
|
|
2447
|
+
return JSON.stringify(value) ?? String(value);
|
|
2448
|
+
} catch {
|
|
2449
|
+
return String(value);
|
|
2450
|
+
}
|
|
2451
|
+
}
|
|
2434
2452
|
function unwrapRequired(result, message, code) {
|
|
2435
2453
|
if (result.error || !result.data) throw new MoltNetError(message, { code });
|
|
2436
2454
|
return result.data;
|
|
@@ -8057,138 +8075,29 @@ function pruneOldSnapshots(maxCached, currentDir) {
|
|
|
8057
8075
|
});
|
|
8058
8076
|
}
|
|
8059
8077
|
//#endregion
|
|
8060
|
-
//#region src/
|
|
8061
|
-
/**
|
|
8062
|
-
* Gondolin tool operations: redirect pi's built-in tool operations
|
|
8063
|
-
* (read, write, edit, bash) to execute inside the VM.
|
|
8064
|
-
*
|
|
8065
|
-
* Follows the same pattern as upstream pi-gondolin.ts — pi's tool factories
|
|
8066
|
-
* accept an `operations` object that provides the underlying I/O.
|
|
8067
|
-
*/
|
|
8078
|
+
//#region src/vm-manager.ts
|
|
8068
8079
|
var GUEST_WORKSPACE$2 = "/workspace";
|
|
8069
|
-
function shQuote(s) {
|
|
8070
|
-
return "'" + s.replace(/'/g, "'\\''") + "'";
|
|
8071
|
-
}
|
|
8072
8080
|
/**
|
|
8073
|
-
*
|
|
8074
|
-
*
|
|
8075
|
-
|
|
8076
|
-
|
|
8077
|
-
|
|
8078
|
-
|
|
8079
|
-
|
|
8080
|
-
|
|
8081
|
-
|
|
8082
|
-
|
|
8083
|
-
|
|
8084
|
-
|
|
8085
|
-
|
|
8086
|
-
|
|
8087
|
-
|
|
8088
|
-
|
|
8089
|
-
|
|
8090
|
-
|
|
8091
|
-
|
|
8092
|
-
|
|
8093
|
-
"/bin/sh",
|
|
8094
|
-
"-lc",
|
|
8095
|
-
`test -r ${shQuote(toGuestPath(localCwd, p))}`
|
|
8096
|
-
])).ok) throw new Error(`not readable: ${p}`);
|
|
8097
|
-
},
|
|
8098
|
-
detectImageMimeType: async (p) => {
|
|
8099
|
-
try {
|
|
8100
|
-
const r = await vm.exec([
|
|
8101
|
-
"/bin/sh",
|
|
8102
|
-
"-lc",
|
|
8103
|
-
`file --mime-type -b ${shQuote(toGuestPath(localCwd, p))}`
|
|
8104
|
-
]);
|
|
8105
|
-
if (!r.ok) return null;
|
|
8106
|
-
const m = r.stdout.trim();
|
|
8107
|
-
return [
|
|
8108
|
-
"image/jpeg",
|
|
8109
|
-
"image/png",
|
|
8110
|
-
"image/gif",
|
|
8111
|
-
"image/webp"
|
|
8112
|
-
].includes(m) ? m : null;
|
|
8113
|
-
} catch {
|
|
8114
|
-
return null;
|
|
8115
|
-
}
|
|
8116
|
-
}
|
|
8117
|
-
};
|
|
8118
|
-
}
|
|
8119
|
-
function createGondolinWriteOps(vm, localCwd) {
|
|
8120
|
-
return {
|
|
8121
|
-
writeFile: async (p, content) => {
|
|
8122
|
-
const guestPath = toGuestPath(localCwd, p);
|
|
8123
|
-
const dir = path.posix.dirname(guestPath);
|
|
8124
|
-
const b64 = Buffer.from(content, "utf8").toString("base64");
|
|
8125
|
-
const r = await vm.exec([
|
|
8126
|
-
"/bin/sh",
|
|
8127
|
-
"-lc",
|
|
8128
|
-
[
|
|
8129
|
-
"set -eu",
|
|
8130
|
-
`mkdir -p ${shQuote(dir)}`,
|
|
8131
|
-
`echo ${shQuote(b64)} | base64 -d > ${shQuote(guestPath)}`
|
|
8132
|
-
].join("\n")
|
|
8133
|
-
]);
|
|
8134
|
-
if (!r.ok) throw new Error(`write failed (${r.exitCode}): ${r.stderr}`);
|
|
8135
|
-
},
|
|
8136
|
-
mkdir: async (dir) => {
|
|
8137
|
-
const r = await vm.exec([
|
|
8138
|
-
"/bin/mkdir",
|
|
8139
|
-
"-p",
|
|
8140
|
-
toGuestPath(localCwd, dir)
|
|
8141
|
-
]);
|
|
8142
|
-
if (!r.ok) throw new Error(`mkdir failed (${r.exitCode}): ${r.stderr}`);
|
|
8143
|
-
}
|
|
8144
|
-
};
|
|
8145
|
-
}
|
|
8146
|
-
function createGondolinEditOps(vm, localCwd) {
|
|
8147
|
-
const r = createGondolinReadOps(vm, localCwd);
|
|
8148
|
-
const w = createGondolinWriteOps(vm, localCwd);
|
|
8149
|
-
return {
|
|
8150
|
-
readFile: r.readFile,
|
|
8151
|
-
access: r.access,
|
|
8152
|
-
writeFile: w.writeFile
|
|
8153
|
-
};
|
|
8154
|
-
}
|
|
8155
|
-
function createGondolinBashOps(vm, localCwd) {
|
|
8156
|
-
return { exec: async (command, cwd, { onData, signal, timeout, env }) => {
|
|
8157
|
-
const guestCwd = toGuestPath(localCwd, cwd);
|
|
8158
|
-
const ac = new AbortController();
|
|
8159
|
-
const onAbort = () => ac.abort();
|
|
8160
|
-
signal?.addEventListener("abort", onAbort, { once: true });
|
|
8161
|
-
let timedOut = false;
|
|
8162
|
-
const timer = timeout && timeout > 0 ? setTimeout(() => {
|
|
8163
|
-
timedOut = true;
|
|
8164
|
-
ac.abort();
|
|
8165
|
-
}, timeout * 1e3) : void 0;
|
|
8166
|
-
try {
|
|
8167
|
-
const proc = vm.exec([
|
|
8168
|
-
"/bin/sh",
|
|
8169
|
-
"-lc",
|
|
8170
|
-
command
|
|
8171
|
-
], {
|
|
8172
|
-
cwd: guestCwd,
|
|
8173
|
-
signal: ac.signal,
|
|
8174
|
-
stdout: "pipe",
|
|
8175
|
-
stderr: "pipe"
|
|
8176
|
-
});
|
|
8177
|
-
for await (const chunk of proc.output()) onData(typeof chunk.data === "string" ? Buffer.from(chunk.data, "utf8") : chunk.data);
|
|
8178
|
-
return { exitCode: (await proc).exitCode };
|
|
8179
|
-
} catch (err) {
|
|
8180
|
-
if (signal?.aborted) throw new Error("aborted");
|
|
8181
|
-
if (timedOut) throw new Error(`timeout:${timeout}`);
|
|
8182
|
-
throw err;
|
|
8183
|
-
} finally {
|
|
8184
|
-
if (timer) clearTimeout(timer);
|
|
8185
|
-
signal?.removeEventListener("abort", onAbort);
|
|
8186
|
-
}
|
|
8187
|
-
} };
|
|
8188
|
-
}
|
|
8189
|
-
//#endregion
|
|
8190
|
-
//#region src/vm-manager.ts
|
|
8191
|
-
var GUEST_WORKSPACE$1 = "/workspace";
|
|
8081
|
+
* Memory-backed VFS mount used by the daemon to inject task-context
|
|
8082
|
+
* skills (#943 slice 1.5). Sibling of /workspace, NOT a sub-path —
|
|
8083
|
+
* Gondolin mounts can't nest. The agent's Gondolin-bound Read tool
|
|
8084
|
+
* accepts paths under this prefix (see toGuestPath in tool-operations.ts).
|
|
8085
|
+
*
|
|
8086
|
+
* Why MemoryProvider rather than a path under /workspace:
|
|
8087
|
+
* - Injected skills are ephemeral by intent: per-task-attempt input
|
|
8088
|
+
* scoped to the VM lifetime. MemoryProvider models that exactly —
|
|
8089
|
+
* in-memory, per-VM-instance, zero host artefacts, automatic
|
|
8090
|
+
* cleanup on VM close.
|
|
8091
|
+
* - Writing under /workspace fails in worktrees because we symlink
|
|
8092
|
+
* `.moltnet/` to the main repo (so credentials are reachable from
|
|
8093
|
+
* worktrees), and Gondolin's RealFSProvider correctly refuses to
|
|
8094
|
+
* create paths whose ancestors' realpath escapes the mount root.
|
|
8095
|
+
* That refusal is a deliberate sandbox-escape protection, not a
|
|
8096
|
+
* bug. See diary semantic entry cd27d9d3-efdc-4aec-ac0d-5fd8ce258d1f
|
|
8097
|
+
* and episodic 7affbfeb-18a2-4963-aeac-c177eb2afa2d for the full
|
|
8098
|
+
* investigation and the alternatives we rejected.
|
|
8099
|
+
*/
|
|
8100
|
+
var GUEST_TASK_SKILLS_MOUNT = "/moltnet-task-skills";
|
|
8192
8101
|
/**
|
|
8193
8102
|
* Resolve the main worktree root (where .moltnet/ lives — it's untracked,
|
|
8194
8103
|
* only exists in the main worktree, not in git worktrees).
|
|
@@ -8317,7 +8226,10 @@ async function resumeVm(config) {
|
|
|
8317
8226
|
env: vmEnv,
|
|
8318
8227
|
...resources?.memory && { memory: resources.memory },
|
|
8319
8228
|
...resources?.cpus && { cpus: resources.cpus },
|
|
8320
|
-
vfs: { mounts: {
|
|
8229
|
+
vfs: { mounts: {
|
|
8230
|
+
[GUEST_WORKSPACE$2]: workspaceProvider,
|
|
8231
|
+
[GUEST_TASK_SKILLS_MOUNT]: new MemoryProvider()
|
|
8232
|
+
} }
|
|
8321
8233
|
});
|
|
8322
8234
|
await vm.exec(`sh -c '
|
|
8323
8235
|
cp /etc/gondolin/mitm/ca.crt /usr/local/share/ca-certificates/gondolin-mitm.crt
|
|
@@ -8347,7 +8259,7 @@ nameserver 1.1.1.1" > /etc/resolv.conf'`);
|
|
|
8347
8259
|
vm,
|
|
8348
8260
|
credentials: creds,
|
|
8349
8261
|
mountPath: config.mountPath,
|
|
8350
|
-
guestWorkspace: GUEST_WORKSPACE$
|
|
8262
|
+
guestWorkspace: GUEST_WORKSPACE$2,
|
|
8351
8263
|
agentDir
|
|
8352
8264
|
};
|
|
8353
8265
|
}
|
|
@@ -8400,6 +8312,137 @@ function ensureRelativeWorktreePaths(gitconfig) {
|
|
|
8400
8312
|
return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
|
|
8401
8313
|
}
|
|
8402
8314
|
//#endregion
|
|
8315
|
+
//#region src/tool-operations.ts
|
|
8316
|
+
/**
|
|
8317
|
+
* Gondolin tool operations: redirect pi's built-in tool operations
|
|
8318
|
+
* (read, write, edit, bash) to execute inside the VM.
|
|
8319
|
+
*
|
|
8320
|
+
* Follows the same pattern as upstream pi-gondolin.ts — pi's tool factories
|
|
8321
|
+
* accept an `operations` object that provides the underlying I/O.
|
|
8322
|
+
*/
|
|
8323
|
+
var GUEST_WORKSPACE$1 = "/workspace";
|
|
8324
|
+
function shQuote(s) {
|
|
8325
|
+
return "'" + s.replace(/'/g, "'\\''") + "'";
|
|
8326
|
+
}
|
|
8327
|
+
/**
|
|
8328
|
+
* Map a host-side absolute path to a guest-side /workspace path.
|
|
8329
|
+
* Throws if the path escapes the workspace.
|
|
8330
|
+
*/
|
|
8331
|
+
function toGuestPath(localCwd, localPath) {
|
|
8332
|
+
if (localPath === GUEST_WORKSPACE$1 || localPath.startsWith(`${GUEST_WORKSPACE$1}/`)) return localPath;
|
|
8333
|
+
if (localPath === "/moltnet-task-skills" || localPath.startsWith(`/moltnet-task-skills/`)) return localPath;
|
|
8334
|
+
const rel = path.relative(localCwd, localPath);
|
|
8335
|
+
if (rel === "") return GUEST_WORKSPACE$1;
|
|
8336
|
+
if (rel.startsWith("..") || path.isAbsolute(rel)) throw new Error(`path escapes workspace: ${localPath}`);
|
|
8337
|
+
const posixRel = rel.split(path.sep).join(path.posix.sep);
|
|
8338
|
+
return path.posix.join(GUEST_WORKSPACE$1, posixRel);
|
|
8339
|
+
}
|
|
8340
|
+
function createGondolinReadOps(vm, localCwd) {
|
|
8341
|
+
return {
|
|
8342
|
+
readFile: async (p) => {
|
|
8343
|
+
const r = await vm.exec(["/bin/cat", toGuestPath(localCwd, p)]);
|
|
8344
|
+
if (!r.ok) throw new Error(`cat failed (${r.exitCode}): ${r.stderr}`);
|
|
8345
|
+
return r.stdoutBuffer;
|
|
8346
|
+
},
|
|
8347
|
+
access: async (p) => {
|
|
8348
|
+
if (!(await vm.exec([
|
|
8349
|
+
"/bin/sh",
|
|
8350
|
+
"-lc",
|
|
8351
|
+
`test -r ${shQuote(toGuestPath(localCwd, p))}`
|
|
8352
|
+
])).ok) throw new Error(`not readable: ${p}`);
|
|
8353
|
+
},
|
|
8354
|
+
detectImageMimeType: async (p) => {
|
|
8355
|
+
try {
|
|
8356
|
+
const r = await vm.exec([
|
|
8357
|
+
"/bin/sh",
|
|
8358
|
+
"-lc",
|
|
8359
|
+
`file --mime-type -b ${shQuote(toGuestPath(localCwd, p))}`
|
|
8360
|
+
]);
|
|
8361
|
+
if (!r.ok) return null;
|
|
8362
|
+
const m = r.stdout.trim();
|
|
8363
|
+
return [
|
|
8364
|
+
"image/jpeg",
|
|
8365
|
+
"image/png",
|
|
8366
|
+
"image/gif",
|
|
8367
|
+
"image/webp"
|
|
8368
|
+
].includes(m) ? m : null;
|
|
8369
|
+
} catch {
|
|
8370
|
+
return null;
|
|
8371
|
+
}
|
|
8372
|
+
}
|
|
8373
|
+
};
|
|
8374
|
+
}
|
|
8375
|
+
function createGondolinWriteOps(vm, localCwd) {
|
|
8376
|
+
return {
|
|
8377
|
+
writeFile: async (p, content) => {
|
|
8378
|
+
const guestPath = toGuestPath(localCwd, p);
|
|
8379
|
+
const dir = path.posix.dirname(guestPath);
|
|
8380
|
+
const b64 = Buffer.from(content, "utf8").toString("base64");
|
|
8381
|
+
const r = await vm.exec([
|
|
8382
|
+
"/bin/sh",
|
|
8383
|
+
"-lc",
|
|
8384
|
+
[
|
|
8385
|
+
"set -eu",
|
|
8386
|
+
`mkdir -p ${shQuote(dir)}`,
|
|
8387
|
+
`echo ${shQuote(b64)} | base64 -d > ${shQuote(guestPath)}`
|
|
8388
|
+
].join("\n")
|
|
8389
|
+
]);
|
|
8390
|
+
if (!r.ok) throw new Error(`write failed (${r.exitCode}): ${r.stderr}`);
|
|
8391
|
+
},
|
|
8392
|
+
mkdir: async (dir) => {
|
|
8393
|
+
const r = await vm.exec([
|
|
8394
|
+
"/bin/mkdir",
|
|
8395
|
+
"-p",
|
|
8396
|
+
toGuestPath(localCwd, dir)
|
|
8397
|
+
]);
|
|
8398
|
+
if (!r.ok) throw new Error(`mkdir failed (${r.exitCode}): ${r.stderr}`);
|
|
8399
|
+
}
|
|
8400
|
+
};
|
|
8401
|
+
}
|
|
8402
|
+
function createGondolinEditOps(vm, localCwd) {
|
|
8403
|
+
const r = createGondolinReadOps(vm, localCwd);
|
|
8404
|
+
const w = createGondolinWriteOps(vm, localCwd);
|
|
8405
|
+
return {
|
|
8406
|
+
readFile: r.readFile,
|
|
8407
|
+
access: r.access,
|
|
8408
|
+
writeFile: w.writeFile
|
|
8409
|
+
};
|
|
8410
|
+
}
|
|
8411
|
+
function createGondolinBashOps(vm, localCwd) {
|
|
8412
|
+
return { exec: async (command, cwd, { onData, signal, timeout, env }) => {
|
|
8413
|
+
const guestCwd = toGuestPath(localCwd, cwd);
|
|
8414
|
+
const ac = new AbortController();
|
|
8415
|
+
const onAbort = () => ac.abort();
|
|
8416
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
8417
|
+
let timedOut = false;
|
|
8418
|
+
const timer = timeout && timeout > 0 ? setTimeout(() => {
|
|
8419
|
+
timedOut = true;
|
|
8420
|
+
ac.abort();
|
|
8421
|
+
}, timeout * 1e3) : void 0;
|
|
8422
|
+
try {
|
|
8423
|
+
const proc = vm.exec([
|
|
8424
|
+
"/bin/sh",
|
|
8425
|
+
"-lc",
|
|
8426
|
+
command
|
|
8427
|
+
], {
|
|
8428
|
+
cwd: guestCwd,
|
|
8429
|
+
signal: ac.signal,
|
|
8430
|
+
stdout: "pipe",
|
|
8431
|
+
stderr: "pipe"
|
|
8432
|
+
});
|
|
8433
|
+
for await (const chunk of proc.output()) onData(typeof chunk.data === "string" ? Buffer.from(chunk.data, "utf8") : chunk.data);
|
|
8434
|
+
return { exitCode: (await proc).exitCode };
|
|
8435
|
+
} catch (err) {
|
|
8436
|
+
if (signal?.aborted) throw new Error("aborted");
|
|
8437
|
+
if (timedOut) throw new Error(`timeout:${timeout}`);
|
|
8438
|
+
throw err;
|
|
8439
|
+
} finally {
|
|
8440
|
+
if (timer) clearTimeout(timer);
|
|
8441
|
+
signal?.removeEventListener("abort", onAbort);
|
|
8442
|
+
}
|
|
8443
|
+
} };
|
|
8444
|
+
}
|
|
8445
|
+
//#endregion
|
|
8403
8446
|
//#region src/otel/index.ts
|
|
8404
8447
|
var TRACER_NAME = "@themoltnet/pi-extension/otel";
|
|
8405
8448
|
function stripReservedAttrs(attrs) {
|
|
@@ -8537,6 +8580,94 @@ function extractUsage(message) {
|
|
|
8537
8580
|
};
|
|
8538
8581
|
}
|
|
8539
8582
|
//#endregion
|
|
8583
|
+
//#region src/runtime/agent-session-factory.ts
|
|
8584
|
+
var NO_SKILLS = () => ({
|
|
8585
|
+
skills: [],
|
|
8586
|
+
diagnostics: []
|
|
8587
|
+
});
|
|
8588
|
+
/**
|
|
8589
|
+
* Construct an in-memory `AgentSession`. The caller is responsible for
|
|
8590
|
+
* eventually invoking `session.prompt(...)` and for tearing down — the
|
|
8591
|
+
* helper does no lifecycle management beyond construction.
|
|
8592
|
+
*/
|
|
8593
|
+
async function buildAgentSession(args) {
|
|
8594
|
+
const piOtelExtension = createPiOtelExtension({
|
|
8595
|
+
agentName: args.agentName,
|
|
8596
|
+
spanAttributes: args.otelSpanAttrs
|
|
8597
|
+
});
|
|
8598
|
+
const resourceLoader = new DefaultResourceLoader({
|
|
8599
|
+
cwd: args.mountPath,
|
|
8600
|
+
agentDir: args.piAuthDir,
|
|
8601
|
+
extensionFactories: [piOtelExtension],
|
|
8602
|
+
appendSystemPrompt: args.appendSystemPrompt,
|
|
8603
|
+
skillsOverride: args.skillsOverride ?? NO_SKILLS
|
|
8604
|
+
});
|
|
8605
|
+
await resourceLoader.reload();
|
|
8606
|
+
return (await createAgentSession({
|
|
8607
|
+
agentDir: args.piAuthDir,
|
|
8608
|
+
cwd: args.mountPath,
|
|
8609
|
+
model: args.modelHandle,
|
|
8610
|
+
customTools: args.customTools,
|
|
8611
|
+
sessionManager: SessionManager.inMemory(),
|
|
8612
|
+
resourceLoader
|
|
8613
|
+
})).session;
|
|
8614
|
+
}
|
|
8615
|
+
//#endregion
|
|
8616
|
+
//#region ../agent-runtime/src/context-bindings.ts
|
|
8617
|
+
var PROMPT_SEPARATOR = "\n\n---\n\n";
|
|
8618
|
+
/**
|
|
8619
|
+
* Resolve `task.input.context[]` into delivered side-effects (skills
|
|
8620
|
+
* persisted via `deliver.skill`) and prompt fragments
|
|
8621
|
+
* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
|
|
8622
|
+
* built prompt.
|
|
8623
|
+
*
|
|
8624
|
+
* Per-binding semantics (V1):
|
|
8625
|
+
* - `skill` → `deliver.skill({ slug, content })` once per ref.
|
|
8626
|
+
* Slug collisions on distinct contents are
|
|
8627
|
+
* refused loudly.
|
|
8628
|
+
* - `prompt_prefix` → content appended to `systemPromptPrefix` with
|
|
8629
|
+
* the canonical `\n\n---\n\n` separator (in
|
|
8630
|
+
* declared order).
|
|
8631
|
+
* - `user_inline` → content appended to `userInlineSuffix` in
|
|
8632
|
+
* declared order, same separator.
|
|
8633
|
+
*
|
|
8634
|
+
* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
|
|
8635
|
+
* and the task's `inputCid` already pins the entire input. The imposer
|
|
8636
|
+
* chose these bytes; the resolver just dispatches them.
|
|
8637
|
+
*
|
|
8638
|
+
* The function is pure with respect to its arguments: file writes are
|
|
8639
|
+
* confined to the injected `deliver` callback, which makes the
|
|
8640
|
+
* resolver trivial to test.
|
|
8641
|
+
*/
|
|
8642
|
+
async function resolveTaskContext(args) {
|
|
8643
|
+
const promptParts = [];
|
|
8644
|
+
const userParts = [];
|
|
8645
|
+
const injected = [];
|
|
8646
|
+
const usedSlugs = /* @__PURE__ */ new Map();
|
|
8647
|
+
for (const ref of args.context) {
|
|
8648
|
+
if (ref.binding === "skill") {
|
|
8649
|
+
const prior = usedSlugs.get(ref.slug);
|
|
8650
|
+
if (prior !== void 0) {
|
|
8651
|
+
if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
|
|
8652
|
+
injected.push(ref);
|
|
8653
|
+
continue;
|
|
8654
|
+
}
|
|
8655
|
+
usedSlugs.set(ref.slug, ref.content);
|
|
8656
|
+
await args.deliver.skill({
|
|
8657
|
+
slug: ref.slug,
|
|
8658
|
+
content: ref.content
|
|
8659
|
+
});
|
|
8660
|
+
} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
|
|
8661
|
+
else userParts.push(ref.content);
|
|
8662
|
+
injected.push(ref);
|
|
8663
|
+
}
|
|
8664
|
+
return {
|
|
8665
|
+
injected,
|
|
8666
|
+
systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
|
|
8667
|
+
userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
|
|
8668
|
+
};
|
|
8669
|
+
}
|
|
8670
|
+
//#endregion
|
|
8540
8671
|
//#region ../tasks/src/formats.ts
|
|
8541
8672
|
/**
|
|
8542
8673
|
* Register TypeBox string formats used across Task / TaskOutput / task-type
|
|
@@ -8551,6 +8682,55 @@ var UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a
|
|
|
8551
8682
|
if (!FormatRegistry.Has("uuid")) FormatRegistry.Set("uuid", (v) => UUID_RE.test(v));
|
|
8552
8683
|
if (!FormatRegistry.Has("date-time")) FormatRegistry.Set("date-time", (v) => !Number.isNaN(Date.parse(v)));
|
|
8553
8684
|
//#endregion
|
|
8685
|
+
//#region ../tasks/src/context.ts
|
|
8686
|
+
/**
|
|
8687
|
+
* How an executor delivers a context entry to its underlying LLM.
|
|
8688
|
+
* V1 bindings only; Tier-2 (reference_file, mcp_resource, imported_file,
|
|
8689
|
+
* tool_response_seed, additional_context_hook) ship in a later slice.
|
|
8690
|
+
*/
|
|
8691
|
+
var ContextBinding = Type$1.Union([
|
|
8692
|
+
Type$1.Literal("skill"),
|
|
8693
|
+
Type$1.Literal("prompt_prefix"),
|
|
8694
|
+
Type$1.Literal("user_inline")
|
|
8695
|
+
], { $id: "ContextBinding" });
|
|
8696
|
+
/**
|
|
8697
|
+
* One context entry. Bytes are inlined: the imposer chose them, and the
|
|
8698
|
+
* task's `inputCid` already pins the entire input — including
|
|
8699
|
+
* `context[]` — so we don't need a separate per-entry hash, fetcher, or
|
|
8700
|
+
* flagged-content gate. Tasks reference rendered packs (or any other
|
|
8701
|
+
* external content) by copying their bytes into `content` at task
|
|
8702
|
+
* creation time.
|
|
8703
|
+
*
|
|
8704
|
+
* - `slug` — short identifier the daemon uses to disambiguate
|
|
8705
|
+
* entries. For `skill` binding it becomes the directory
|
|
8706
|
+
* name under the runtime's skill discovery path. Must be
|
|
8707
|
+
* kebab-case-safe (alphanumeric + dashes/underscores).
|
|
8708
|
+
* - `binding` — how the bytes are delivered to the LLM (see above).
|
|
8709
|
+
* - `content` — the actual bytes (UTF-8 text). Capped at 32 KiB per
|
|
8710
|
+
* entry; total per-task context bytes are bounded by the
|
|
8711
|
+
* soft `maxItems` cap and per-binding daemon limits.
|
|
8712
|
+
*/
|
|
8713
|
+
var ContextRef = Type$1.Object({
|
|
8714
|
+
slug: Type$1.String({
|
|
8715
|
+
minLength: 1,
|
|
8716
|
+
maxLength: 64,
|
|
8717
|
+
pattern: "^[a-zA-Z0-9_-]+$"
|
|
8718
|
+
}),
|
|
8719
|
+
binding: ContextBinding,
|
|
8720
|
+
content: Type$1.String({
|
|
8721
|
+
minLength: 1,
|
|
8722
|
+
maxLength: 32768
|
|
8723
|
+
})
|
|
8724
|
+
}, {
|
|
8725
|
+
$id: "ContextRef",
|
|
8726
|
+
additionalProperties: false
|
|
8727
|
+
});
|
|
8728
|
+
/** Reusable input fragment for any task type. Soft cap at 5 items. */
|
|
8729
|
+
var TaskContext = Type$1.Array(ContextRef, {
|
|
8730
|
+
$id: "TaskContext",
|
|
8731
|
+
maxItems: 5
|
|
8732
|
+
});
|
|
8733
|
+
//#endregion
|
|
8554
8734
|
//#region ../tasks/src/rubric.ts
|
|
8555
8735
|
/**
|
|
8556
8736
|
* Rubric — structured acceptance criteria used by judgment tasks.
|
|
@@ -9099,6 +9279,60 @@ var RenderPackOutput = Type$1.Object({
|
|
|
9099
9279
|
additionalProperties: false
|
|
9100
9280
|
});
|
|
9101
9281
|
//#endregion
|
|
9282
|
+
//#region ../tasks/src/task-types/run-eval.ts
|
|
9283
|
+
/**
|
|
9284
|
+
* `run_eval` — execute a scenario prompt under a named variant for
|
|
9285
|
+
* later cross-variant grading by `judge_eval_variant` (Slice 2).
|
|
9286
|
+
*
|
|
9287
|
+
* output_kind: artifact
|
|
9288
|
+
* criteria: optional (when set, output.verification is required —
|
|
9289
|
+
* producer self-assessment; the judge is the binding evaluator)
|
|
9290
|
+
* references: not required (scenario lives entirely in input)
|
|
9291
|
+
*/
|
|
9292
|
+
var RUN_EVAL_TYPE = "run_eval";
|
|
9293
|
+
var RunEvalInput = Type$1.Object({
|
|
9294
|
+
scenario: Type$1.Object({
|
|
9295
|
+
prompt: Type$1.String({ minLength: 1 }),
|
|
9296
|
+
inputFiles: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
|
|
9297
|
+
}, { additionalProperties: false }),
|
|
9298
|
+
variantLabel: Type$1.String({
|
|
9299
|
+
minLength: 1,
|
|
9300
|
+
maxLength: 64
|
|
9301
|
+
}),
|
|
9302
|
+
context: TaskContext,
|
|
9303
|
+
successCriteria: Type$1.Optional(SuccessCriteria)
|
|
9304
|
+
}, {
|
|
9305
|
+
$id: "RunEvalInput",
|
|
9306
|
+
additionalProperties: false
|
|
9307
|
+
});
|
|
9308
|
+
var RunEvalOutput = Type$1.Object({
|
|
9309
|
+
response: Type$1.String({ minLength: 1 }),
|
|
9310
|
+
artifacts: Type$1.Optional(Type$1.Array(Type$1.Object({
|
|
9311
|
+
path: Type$1.String({ minLength: 1 }),
|
|
9312
|
+
cid: Type$1.String({ minLength: 1 })
|
|
9313
|
+
}, { additionalProperties: false }))),
|
|
9314
|
+
totalTokens: Type$1.Integer({ minimum: 0 }),
|
|
9315
|
+
durationMs: Type$1.Integer({ minimum: 0 }),
|
|
9316
|
+
traceparent: Type$1.String({ minLength: 1 }),
|
|
9317
|
+
verification: Type$1.Optional(VerificationRecord)
|
|
9318
|
+
}, {
|
|
9319
|
+
$id: "RunEvalOutput",
|
|
9320
|
+
additionalProperties: false
|
|
9321
|
+
});
|
|
9322
|
+
/**
|
|
9323
|
+
* Cross-field rule mirroring the `requireVerificationWhenCriteriaPresent`
|
|
9324
|
+
* rule used by the brief task types: when input declares
|
|
9325
|
+
* `successCriteria`, output MUST carry `verification`; when it doesn't,
|
|
9326
|
+
* output MUST NOT carry one.
|
|
9327
|
+
*/
|
|
9328
|
+
function validateRunEvalOutput(output, input) {
|
|
9329
|
+
const hasCriteria = input !== null && input !== void 0 && input.successCriteria !== void 0;
|
|
9330
|
+
const hasVerification = output !== null && output !== void 0 && output.verification !== void 0;
|
|
9331
|
+
if (hasCriteria && !hasVerification) return "output.verification is required because input.successCriteria is set; the producer LLM must self-assess against the criteria";
|
|
9332
|
+
if (!hasCriteria && hasVerification) return "output.verification was supplied but input.successCriteria is unset; omit verification when there are no criteria to assess against";
|
|
9333
|
+
return null;
|
|
9334
|
+
}
|
|
9335
|
+
//#endregion
|
|
9102
9336
|
//#region ../tasks/src/task-types/index.ts
|
|
9103
9337
|
/**
|
|
9104
9338
|
* Validate that a judgment-task input carries a rubric inside its
|
|
@@ -9177,6 +9411,14 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9177
9411
|
requiresReferences: true,
|
|
9178
9412
|
validateInput: validateJudgmentInput,
|
|
9179
9413
|
validateOutput: validateJudgePackOutput
|
|
9414
|
+
},
|
|
9415
|
+
[RUN_EVAL_TYPE]: {
|
|
9416
|
+
name: RUN_EVAL_TYPE,
|
|
9417
|
+
inputSchema: RunEvalInput,
|
|
9418
|
+
outputSchema: RunEvalOutput,
|
|
9419
|
+
outputKind: "artifact",
|
|
9420
|
+
requiresReferences: false,
|
|
9421
|
+
validateOutput: validateRunEvalOutput
|
|
9180
9422
|
}
|
|
9181
9423
|
};
|
|
9182
9424
|
//#endregion
|
|
@@ -9231,6 +9473,15 @@ function validateTaskOutput(taskType, output, input) {
|
|
|
9231
9473
|
function getTaskOutputSchema(taskType) {
|
|
9232
9474
|
return getTaskTypeEntry(taskType)?.outputSchema ?? null;
|
|
9233
9475
|
}
|
|
9476
|
+
/**
|
|
9477
|
+
* Whether sessions running this task type should have the generic
|
|
9478
|
+
* `subagent` custom tool registered. Returns `false` for unknown task
|
|
9479
|
+
* types and for task types that didn't opt in. See `TaskTypeEntry`
|
|
9480
|
+
* for the design rationale.
|
|
9481
|
+
*/
|
|
9482
|
+
function taskTypeUsesSubagents(taskType) {
|
|
9483
|
+
return getTaskTypeEntry(taskType)?.usesSubagents === true;
|
|
9484
|
+
}
|
|
9234
9485
|
//#endregion
|
|
9235
9486
|
//#region ../tasks/src/wire.ts
|
|
9236
9487
|
/**
|
|
@@ -9275,6 +9526,14 @@ var ExecutorTrustLevel = Type$1.Union([
|
|
|
9275
9526
|
Type$1.Literal("releaseVerifiedTool"),
|
|
9276
9527
|
Type$1.Literal("sandboxAttested")
|
|
9277
9528
|
], { $id: "ExecutorTrustLevel" });
|
|
9529
|
+
/** Identifies a (provider, model) daemon pair allowed to claim a task. */
|
|
9530
|
+
var ExecutorRef = Type$1.Object({
|
|
9531
|
+
provider: Type$1.String({ minLength: 1 }),
|
|
9532
|
+
model: Type$1.String({ minLength: 1 })
|
|
9533
|
+
}, {
|
|
9534
|
+
$id: "ExecutorRef",
|
|
9535
|
+
additionalProperties: false
|
|
9536
|
+
});
|
|
9278
9537
|
var OutputKind = Type$1.Union([Type$1.Literal("artifact"), Type$1.Literal("judgment")], { $id: "OutputKind" });
|
|
9279
9538
|
var TaskMessageKind = Type$1.Union([
|
|
9280
9539
|
Type$1.Literal("text_delta"),
|
|
@@ -9367,6 +9626,7 @@ Type$1.Object({
|
|
|
9367
9626
|
imposedByHumanId: Type$1.Union([Uuid, Type$1.Null()]),
|
|
9368
9627
|
acceptedAttemptN: Type$1.Union([Type$1.Number(), Type$1.Null()]),
|
|
9369
9628
|
requiredExecutorTrustLevel: ExecutorTrustLevel,
|
|
9629
|
+
allowedExecutors: Type$1.Array(ExecutorRef, { maxItems: 16 }),
|
|
9370
9630
|
status: TaskStatus,
|
|
9371
9631
|
queuedAt: IsoTimestamp,
|
|
9372
9632
|
completedAt: Type$1.Union([IsoTimestamp, Type$1.Null()]),
|
|
@@ -9552,7 +9812,7 @@ function buildFinalOutputBlock(opts) {
|
|
|
9552
9812
|
//#endregion
|
|
9553
9813
|
//#region ../agent-runtime/src/prompts/assess-brief.ts
|
|
9554
9814
|
/**
|
|
9555
|
-
* Build the
|
|
9815
|
+
* Build the first user-message prompt for an `assess_brief` judge attempt.
|
|
9556
9816
|
*
|
|
9557
9817
|
* Design note — no pre-resolved `target` projection
|
|
9558
9818
|
* --------------------------------------------------
|
|
@@ -9573,7 +9833,7 @@ function buildFinalOutputBlock(opts) {
|
|
|
9573
9833
|
* future task types whose products are docs / configs / changes /
|
|
9574
9834
|
* anything) work without any code path here.
|
|
9575
9835
|
*/
|
|
9576
|
-
function
|
|
9836
|
+
function buildAssessBriefUserPrompt(input, ctx) {
|
|
9577
9837
|
const rubric = input.successCriteria.rubric;
|
|
9578
9838
|
const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
|
|
9579
9839
|
const preambleSection = rubric.preamble ? [
|
|
@@ -9688,7 +9948,7 @@ function buildSelfVerificationBlock(taskId) {
|
|
|
9688
9948
|
//#endregion
|
|
9689
9949
|
//#region ../agent-runtime/src/prompts/curate-pack.ts
|
|
9690
9950
|
/**
|
|
9691
|
-
* Build the
|
|
9951
|
+
* Build the first user-message prompt for a `curate_pack` task.
|
|
9692
9952
|
*
|
|
9693
9953
|
* Design note: this prompt is deliberately NOT a numbered command
|
|
9694
9954
|
* sequence. The curator's value comes from judgment — inferring scope
|
|
@@ -9709,7 +9969,7 @@ function buildSelfVerificationBlock(taskId) {
|
|
|
9709
9969
|
* emits pruned state at phase boundaries so a follow-up session can
|
|
9710
9970
|
* resume without replaying the tool history.
|
|
9711
9971
|
*/
|
|
9712
|
-
function
|
|
9972
|
+
function buildCuratePackUserPrompt(input, ctx) {
|
|
9713
9973
|
const { diaryId, taskPrompt, entryTypes, tagFilters, tokenBudget, recipe } = input;
|
|
9714
9974
|
const entryTypesPinned = Boolean(entryTypes);
|
|
9715
9975
|
const resolvedRecipe = recipe ?? "topic-focused-v1";
|
|
@@ -9845,13 +10105,13 @@ function buildCuratePackPrompt(input, ctx) {
|
|
|
9845
10105
|
//#endregion
|
|
9846
10106
|
//#region ../agent-runtime/src/prompts/fulfill-brief.ts
|
|
9847
10107
|
/**
|
|
9848
|
-
* Build the
|
|
10108
|
+
* Build the first user-message prompt for a `fulfill_brief` task.
|
|
9849
10109
|
*
|
|
9850
10110
|
* Generalized from the original `resolve-issue` prompt. No longer
|
|
9851
10111
|
* GitHub-specific; references live on `Task.references[]` and the agent
|
|
9852
10112
|
* is told to inspect them itself.
|
|
9853
10113
|
*/
|
|
9854
|
-
function
|
|
10114
|
+
function buildFulfillBriefUserPrompt(input, ctx) {
|
|
9855
10115
|
const { brief, title, acceptanceCriteria, seedFiles, scopeHint } = input;
|
|
9856
10116
|
const criteriaSection = acceptanceCriteria?.length ? [
|
|
9857
10117
|
"### Acceptance criteria",
|
|
@@ -9931,7 +10191,7 @@ function buildFulfillBriefPrompt(input, ctx) {
|
|
|
9931
10191
|
}
|
|
9932
10192
|
//#endregion
|
|
9933
10193
|
//#region ../agent-runtime/src/prompts/judge-pack.ts
|
|
9934
|
-
function
|
|
10194
|
+
function buildJudgePackUserPrompt(input, ctx) {
|
|
9935
10195
|
const { renderedPackId, sourcePackId, successCriteria } = input;
|
|
9936
10196
|
const rubric = successCriteria.rubric;
|
|
9937
10197
|
const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
|
|
@@ -10058,10 +10318,10 @@ function buildJudgePackPrompt(input, ctx) {
|
|
|
10058
10318
|
//#endregion
|
|
10059
10319
|
//#region ../agent-runtime/src/prompts/render-pack.ts
|
|
10060
10320
|
/**
|
|
10061
|
-
* Build the
|
|
10321
|
+
* Build the first user-message prompt for a `render_pack` task. Almost mechanical:
|
|
10062
10322
|
* wraps `moltnet_pack_render` and emits the receipt.
|
|
10063
10323
|
*/
|
|
10064
|
-
function
|
|
10324
|
+
function buildRenderPackUserPrompt(input, ctx) {
|
|
10065
10325
|
const { packId, persist = true, pinned = false } = input;
|
|
10066
10326
|
return [
|
|
10067
10327
|
"# Render Pack Agent",
|
|
@@ -10115,19 +10375,87 @@ function buildRenderPackPrompt(input, ctx) {
|
|
|
10115
10375
|
].join("\n");
|
|
10116
10376
|
}
|
|
10117
10377
|
//#endregion
|
|
10378
|
+
//#region ../agent-runtime/src/prompts/run-eval.ts
|
|
10379
|
+
/**
|
|
10380
|
+
* Build the first user-message prompt for a `run_eval` task.
|
|
10381
|
+
*
|
|
10382
|
+
* Free-form: no git workflow, no commit ceremony. The executor produces
|
|
10383
|
+
* a textual response (and optional file artifacts) that a later
|
|
10384
|
+
* `judge_eval_variant` task (Slice 2) grades against the rubric.
|
|
10385
|
+
*
|
|
10386
|
+
* Context delivery is handled by `resolveTaskContext` (see
|
|
10387
|
+
* libs/agent-runtime/src/context-bindings.ts) and runs BEFORE this
|
|
10388
|
+
* prompt is rendered: `prompt_prefix` items are concatenated ahead of
|
|
10389
|
+
* the body, `skill` items are persisted at the runtime's skill path,
|
|
10390
|
+
* and `user_inline` items are appended to the first user message. This
|
|
10391
|
+
* builder does NOT inline `input.context[]` itself.
|
|
10392
|
+
*/
|
|
10393
|
+
function buildRunEvalUserPrompt(input, ctx) {
|
|
10394
|
+
const { scenario, variantLabel, successCriteria } = input;
|
|
10395
|
+
const inputFilesSection = scenario.inputFiles?.length ? [
|
|
10396
|
+
"### Input files",
|
|
10397
|
+
"",
|
|
10398
|
+
...scenario.inputFiles.map((f) => `- \`${f}\``),
|
|
10399
|
+
""
|
|
10400
|
+
].join("\n") : "";
|
|
10401
|
+
const verificationSection = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
|
|
10402
|
+
const correlationSection = ctx.correlationId ? [
|
|
10403
|
+
"### Correlation",
|
|
10404
|
+
"",
|
|
10405
|
+
`This task carries correlationId \`${ctx.correlationId}\`. It joins`,
|
|
10406
|
+
"this variant to its sibling `run_eval` tasks (other variants of the",
|
|
10407
|
+
"same scenario) and to the eventual `judge_eval_variant` task that",
|
|
10408
|
+
"will grade them together. You do not need to act on it directly —",
|
|
10409
|
+
"it is recorded for cross-variant aggregation at query time.",
|
|
10410
|
+
""
|
|
10411
|
+
].join("\n") : "";
|
|
10412
|
+
const finalOutputBlock = buildFinalOutputBlock({
|
|
10413
|
+
taskType: "run_eval",
|
|
10414
|
+
outputSchemaName: "RunEvalOutput",
|
|
10415
|
+
shapeSketch: [
|
|
10416
|
+
"{",
|
|
10417
|
+
" \"response\": \"<your free-form answer>\",",
|
|
10418
|
+
" \"artifacts\": [{ \"path\": \"...\", \"cid\": \"...\" }], // optional",
|
|
10419
|
+
" \"totalTokens\": <int>,",
|
|
10420
|
+
" \"durationMs\": <int>,",
|
|
10421
|
+
" \"traceparent\": \"<from claim>\",",
|
|
10422
|
+
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
10423
|
+
"}"
|
|
10424
|
+
].join("\n")
|
|
10425
|
+
});
|
|
10426
|
+
return [
|
|
10427
|
+
"# Run Eval Agent\n",
|
|
10428
|
+
`You are running an evaluation scenario as variant \`${variantLabel}\`.\nTask id: \`${ctx.taskId}\`\n`,
|
|
10429
|
+
correlationSection,
|
|
10430
|
+
`### Scenario\n\n${scenario.prompt}\n`,
|
|
10431
|
+
inputFilesSection,
|
|
10432
|
+
verificationSection,
|
|
10433
|
+
finalOutputBlock
|
|
10434
|
+
].filter((s) => s !== "").join("\n");
|
|
10435
|
+
}
|
|
10436
|
+
//#endregion
|
|
10118
10437
|
//#region ../agent-runtime/src/prompts/index.ts
|
|
10119
10438
|
/**
|
|
10120
|
-
* Resolve the correct prompt builder for `task.taskType` and
|
|
10121
|
-
* Throws if the type is unknown or the input fails TypeBox
|
|
10122
|
-
|
|
10123
|
-
|
|
10439
|
+
* Resolve the correct user-prompt builder for `task.taskType` and
|
|
10440
|
+
* invoke it. Throws if the type is unknown or the input fails TypeBox
|
|
10441
|
+
* validation.
|
|
10442
|
+
*
|
|
10443
|
+
* Role note: the returned string is delivered as the **first user
|
|
10444
|
+
* message** of the agent's session (pi-coding-agent's
|
|
10445
|
+
* `session.prompt(text)` puts text in the user role). The system
|
|
10446
|
+
* prompt is built separately by pi from `appendSystemPrompt` (the
|
|
10447
|
+
* runtime instructor lives there). Builders here are free-form Markdown
|
|
10448
|
+
* for the user turn; they don't replace or prepend to the system
|
|
10449
|
+
* prompt.
|
|
10450
|
+
*/
|
|
10451
|
+
function buildTaskUserPrompt(task, ctx) {
|
|
10124
10452
|
switch (task.taskType) {
|
|
10125
10453
|
case FULFILL_BRIEF_TYPE:
|
|
10126
10454
|
if (!Value.Check(FulfillBriefInput, task.input)) {
|
|
10127
10455
|
const errors = [...Value.Errors(FulfillBriefInput, task.input)];
|
|
10128
10456
|
throw new Error(`fulfill_brief input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10129
10457
|
}
|
|
10130
|
-
return
|
|
10458
|
+
return buildFulfillBriefUserPrompt(task.input, {
|
|
10131
10459
|
diaryId: ctx.diaryId,
|
|
10132
10460
|
taskId: ctx.taskId,
|
|
10133
10461
|
correlationId: task.correlationId
|
|
@@ -10137,7 +10465,7 @@ function buildPromptForTask(task, ctx) {
|
|
|
10137
10465
|
const errors = [...Value.Errors(AssessBriefInput, task.input)];
|
|
10138
10466
|
throw new Error(`assess_brief input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10139
10467
|
}
|
|
10140
|
-
return
|
|
10468
|
+
return buildAssessBriefUserPrompt(task.input, {
|
|
10141
10469
|
diaryId: ctx.diaryId,
|
|
10142
10470
|
taskId: ctx.taskId
|
|
10143
10471
|
});
|
|
@@ -10146,7 +10474,7 @@ function buildPromptForTask(task, ctx) {
|
|
|
10146
10474
|
const errors = [...Value.Errors(CuratePackInput, task.input)];
|
|
10147
10475
|
throw new Error(`curate_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10148
10476
|
}
|
|
10149
|
-
return
|
|
10477
|
+
return buildCuratePackUserPrompt(task.input, {
|
|
10150
10478
|
diaryId: ctx.diaryId,
|
|
10151
10479
|
taskId: ctx.taskId
|
|
10152
10480
|
});
|
|
@@ -10155,7 +10483,7 @@ function buildPromptForTask(task, ctx) {
|
|
|
10155
10483
|
const errors = [...Value.Errors(RenderPackInput, task.input)];
|
|
10156
10484
|
throw new Error(`render_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10157
10485
|
}
|
|
10158
|
-
return
|
|
10486
|
+
return buildRenderPackUserPrompt(task.input, {
|
|
10159
10487
|
diaryId: ctx.diaryId,
|
|
10160
10488
|
taskId: ctx.taskId
|
|
10161
10489
|
});
|
|
@@ -10164,10 +10492,20 @@ function buildPromptForTask(task, ctx) {
|
|
|
10164
10492
|
const errors = [...Value.Errors(JudgePackInput, task.input)];
|
|
10165
10493
|
throw new Error(`judge_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10166
10494
|
}
|
|
10167
|
-
return
|
|
10495
|
+
return buildJudgePackUserPrompt(task.input, {
|
|
10168
10496
|
diaryId: ctx.diaryId,
|
|
10169
10497
|
taskId: ctx.taskId
|
|
10170
10498
|
});
|
|
10499
|
+
case RUN_EVAL_TYPE:
|
|
10500
|
+
if (!Value.Check(RunEvalInput, task.input)) {
|
|
10501
|
+
const errors = [...Value.Errors(RunEvalInput, task.input)];
|
|
10502
|
+
throw new Error(`run_eval input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10503
|
+
}
|
|
10504
|
+
return buildRunEvalUserPrompt(task.input, {
|
|
10505
|
+
diaryId: ctx.diaryId,
|
|
10506
|
+
taskId: ctx.taskId,
|
|
10507
|
+
correlationId: task.correlationId
|
|
10508
|
+
});
|
|
10171
10509
|
default: throw new Error(`No prompt builder registered for taskType="${task.taskType}"`);
|
|
10172
10510
|
}
|
|
10173
10511
|
}
|
|
@@ -13639,6 +13977,133 @@ var require_multistream = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
|
13639
13977
|
module.exports.pino = pino;
|
|
13640
13978
|
})))();
|
|
13641
13979
|
//#endregion
|
|
13980
|
+
//#region ../agent-runtime/src/subagent-output-contracts.ts
|
|
13981
|
+
var REGISTRY = /* @__PURE__ */ new Map();
|
|
13982
|
+
/**
|
|
13983
|
+
* Resolve a subagent output contract by name. Returns `null` for
|
|
13984
|
+
* unknown names — callers (the subagent custom tool) decide whether
|
|
13985
|
+
* that's a tool error the parent LLM can recover from or a hard fail.
|
|
13986
|
+
*/
|
|
13987
|
+
function getSubagentOutputContract(name) {
|
|
13988
|
+
return REGISTRY.get(name) ?? null;
|
|
13989
|
+
}
|
|
13990
|
+
/**
|
|
13991
|
+
* List all registered contracts. Useful for diagnostics and for the
|
|
13992
|
+
* subagent tool's parameter description so a parent LLM can see what
|
|
13993
|
+
* contracts are available without enumerating them in its prompt.
|
|
13994
|
+
*/
|
|
13995
|
+
function listSubagentOutputContracts() {
|
|
13996
|
+
return [...REGISTRY.values()];
|
|
13997
|
+
}
|
|
13998
|
+
//#endregion
|
|
13999
|
+
//#region src/runtime/inject-task-context.ts
|
|
14000
|
+
/**
|
|
14001
|
+
* Slice 1.5 of #943 — wire the agent-runtime resolver into the
|
|
14002
|
+
* pi-extension execution path.
|
|
14003
|
+
*
|
|
14004
|
+
* `resolveTaskContext` is a pure dispatcher; this module provides the
|
|
14005
|
+
* Gondolin-aware deliverer and the post-resolution shape the
|
|
14006
|
+
* `execute-pi-task` caller needs to splice into pi's setup:
|
|
14007
|
+
*
|
|
14008
|
+
* - `systemPromptPrefix` → fed into `appendSystemPrompt` alongside
|
|
14009
|
+
* the runtime instructor (it IS a system-prompt fragment).
|
|
14010
|
+
* - `userInlineSuffix` → appended to the `buildTaskUserPrompt`
|
|
14011
|
+
* output BEFORE `session.prompt(text)`.
|
|
14012
|
+
* - `skills` → spliced into the `skillsOverride` callback's
|
|
14013
|
+
* return value. pi includes them in `<available_skills>` in the
|
|
14014
|
+
* system prompt; the agent fetches the body on demand via the
|
|
14015
|
+
* Read tool.
|
|
14016
|
+
*
|
|
14017
|
+
* Skill files are written into the VM at
|
|
14018
|
+
* `/workspace/.moltnet/skills/<slug>/SKILL.md`. The agent's
|
|
14019
|
+
* Gondolin-bound Read tool is scoped to `/workspace`, so that path is
|
|
14020
|
+
* the only location the agent can actually read at runtime. pi only
|
|
14021
|
+
* reads `<available_skills>` metadata (name, description, location),
|
|
14022
|
+
* never the file body, so we construct synthetic `Skill` objects
|
|
14023
|
+
* pointing at the in-VM path without ever materialising the file on
|
|
14024
|
+
* the host.
|
|
14025
|
+
*/
|
|
14026
|
+
/**
|
|
14027
|
+
* Where in the VM we write skill bodies — the memory-backed mount
|
|
14028
|
+
* declared in `vm-manager.ts`. See the comment on
|
|
14029
|
+
* `GUEST_TASK_SKILLS_MOUNT` there for the full rationale (ephemeral
|
|
14030
|
+
* by intent + the worktree symlink interaction with Gondolin's
|
|
14031
|
+
* sandbox-escape protection). The agent's Gondolin Read tool accepts
|
|
14032
|
+
* paths under this mount via `toGuestPath` in `tool-operations.ts`.
|
|
14033
|
+
*/
|
|
14034
|
+
var SKILL_ROOT_IN_VM = GUEST_TASK_SKILLS_MOUNT;
|
|
14035
|
+
/** Bounds borrowed from pi's skill validation; conservative caps so a
|
|
14036
|
+
* malformed SKILL.md doesn't bloat the system prompt. */
|
|
14037
|
+
var MAX_SKILL_NAME = 64;
|
|
14038
|
+
var MAX_SKILL_DESCRIPTION = 1024;
|
|
14039
|
+
/**
|
|
14040
|
+
* Resolve a task's `input.context[]` and inject the side effects pi
|
|
14041
|
+
* needs. Safe to call with an empty array — returns an inert result.
|
|
14042
|
+
*/
|
|
14043
|
+
async function injectTaskContext(args) {
|
|
14044
|
+
const skills = [];
|
|
14045
|
+
const resolved = await resolveTaskContext({
|
|
14046
|
+
context: args.context,
|
|
14047
|
+
deliver: { skill: async ({ slug, content }) => {
|
|
14048
|
+
const dir = `${SKILL_ROOT_IN_VM}/${slug}`;
|
|
14049
|
+
const filePath = `${dir}/SKILL.md`;
|
|
14050
|
+
await args.fs.mkdir(dir, { recursive: true });
|
|
14051
|
+
await args.fs.writeFile(filePath, content, { mode: 420 });
|
|
14052
|
+
skills.push(buildSyntheticSkill({
|
|
14053
|
+
slug,
|
|
14054
|
+
content,
|
|
14055
|
+
filePath,
|
|
14056
|
+
dir
|
|
14057
|
+
}));
|
|
14058
|
+
} }
|
|
14059
|
+
});
|
|
14060
|
+
return {
|
|
14061
|
+
injected: resolved.injected,
|
|
14062
|
+
skills,
|
|
14063
|
+
systemPromptPrefix: resolved.systemPromptPrefix,
|
|
14064
|
+
userInlineSuffix: resolved.userInlineSuffix
|
|
14065
|
+
};
|
|
14066
|
+
}
|
|
14067
|
+
/**
|
|
14068
|
+
* Build a `Skill` object pi will faithfully render in
|
|
14069
|
+
* `<available_skills>`. We extract `name` and `description` from the
|
|
14070
|
+
* skill content's YAML frontmatter using pi's own `parseFrontmatter`
|
|
14071
|
+
* helper (proper YAML, not a regex hack) and fall back to the slug +
|
|
14072
|
+
* a generic description so a SKILL.md without frontmatter still
|
|
14073
|
+
* renders something meaningful.
|
|
14074
|
+
*
|
|
14075
|
+
* Frontmatter parsing is best-effort: a malformed YAML block is
|
|
14076
|
+
* optional metadata, not a reason to fail the task. We swallow parser
|
|
14077
|
+
* errors and fall back to the slug-derived metadata; the skill body
|
|
14078
|
+
* is unaffected.
|
|
14079
|
+
*
|
|
14080
|
+
* pi's `formatSkillsForPrompt` only reads `name`, `description`, and
|
|
14081
|
+
* `filePath` — `sourceInfo`/`baseDir` exist on the type but never
|
|
14082
|
+
* surface in the prompt, so a synthetic `SourceInfo` is enough.
|
|
14083
|
+
*/
|
|
14084
|
+
function buildSyntheticSkill(args) {
|
|
14085
|
+
let fm = {};
|
|
14086
|
+
try {
|
|
14087
|
+
fm = parseFrontmatter(args.content).frontmatter;
|
|
14088
|
+
} catch {}
|
|
14089
|
+
return {
|
|
14090
|
+
name: clip(typeof fm.name === "string" && fm.name.trim().length > 0 ? fm.name.trim() : args.slug, MAX_SKILL_NAME),
|
|
14091
|
+
description: clip(typeof fm.description === "string" && fm.description.trim().length > 0 ? fm.description.trim() : `Task-injected context skill (${args.slug})`, MAX_SKILL_DESCRIPTION),
|
|
14092
|
+
filePath: args.filePath,
|
|
14093
|
+
baseDir: args.dir,
|
|
14094
|
+
sourceInfo: createSyntheticSourceInfo(args.filePath, {
|
|
14095
|
+
source: "moltnet:task-context",
|
|
14096
|
+
scope: "temporary",
|
|
14097
|
+
origin: "top-level",
|
|
14098
|
+
baseDir: args.dir
|
|
14099
|
+
}),
|
|
14100
|
+
disableModelInvocation: fm["disable-model-invocation"] === true
|
|
14101
|
+
};
|
|
14102
|
+
}
|
|
14103
|
+
function clip(s, max) {
|
|
14104
|
+
return s.length > max ? s.slice(0, max) : s;
|
|
14105
|
+
}
|
|
14106
|
+
//#endregion
|
|
13642
14107
|
//#region src/runtime/runtime-instructor.ts
|
|
13643
14108
|
/**
|
|
13644
14109
|
* Build the daemon-controlled invariant prose injected into the system prompt
|
|
@@ -13724,6 +14189,190 @@ function buildRuntimeInstructor(ctx) {
|
|
|
13724
14189
|
].join("\n");
|
|
13725
14190
|
}
|
|
13726
14191
|
//#endregion
|
|
14192
|
+
//#region src/runtime/subagent-tool.ts
|
|
14193
|
+
var SUBAGENT_SUBMIT_TOOL_NAME = "submit_subagent_output";
|
|
14194
|
+
/**
|
|
14195
|
+
* Parameters shape the parent LLM sees when calling the subagent tool.
|
|
14196
|
+
*
|
|
14197
|
+
* - `task` — natural-language instructions for the subagent.
|
|
14198
|
+
* The parent authors this per call. Must be
|
|
14199
|
+
* non-empty.
|
|
14200
|
+
* - `output_schema` — name of a registered SubagentOutputContract.
|
|
14201
|
+
* Resolved at call time; unknown names error.
|
|
14202
|
+
*/
|
|
14203
|
+
var SubagentToolParameters = Type$1.Object({
|
|
14204
|
+
task: Type$1.String({
|
|
14205
|
+
minLength: 1,
|
|
14206
|
+
description: "Natural-language instructions for the subagent. The subagent starts with a fresh conversation and a narrowed system prompt; this is the only context it has from you."
|
|
14207
|
+
}),
|
|
14208
|
+
output_schema: Type$1.String({
|
|
14209
|
+
minLength: 1,
|
|
14210
|
+
description: "Name of a registered subagent output contract. The subagent must submit a structured payload via `submit_subagent_output` matching this contract."
|
|
14211
|
+
})
|
|
14212
|
+
}, { additionalProperties: false });
|
|
14213
|
+
var DEFAULT_SUBAGENT_TIMEOUT_MS = 300 * 1e3;
|
|
14214
|
+
/**
|
|
14215
|
+
* Build the subagent custom tool for a parent session. The handle
|
|
14216
|
+
* exposes the call counter so executors can emit summary telemetry
|
|
14217
|
+
* when the parent terminates.
|
|
14218
|
+
*/
|
|
14219
|
+
function createSubagentTool(args) {
|
|
14220
|
+
const buildSession = args.buildAgentSession ?? buildAgentSession;
|
|
14221
|
+
let callCount = 0;
|
|
14222
|
+
return {
|
|
14223
|
+
tool: defineTool({
|
|
14224
|
+
name: "subagent",
|
|
14225
|
+
label: "Delegate to subagent",
|
|
14226
|
+
description: subagentToolDescription(),
|
|
14227
|
+
parameters: SubagentToolParameters,
|
|
14228
|
+
async execute(_id, params) {
|
|
14229
|
+
if (!Value.Check(SubagentToolParameters, params)) return toolError(`subagent: invalid parameters: ${JSON.stringify([...Value.Errors(SubagentToolParameters, params)].slice(0, 3))}`);
|
|
14230
|
+
const { task, output_schema } = params;
|
|
14231
|
+
const contract = getSubagentOutputContract(output_schema);
|
|
14232
|
+
if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${listSubagentOutputContracts().map((c) => c.name).join(", ")}]`);
|
|
14233
|
+
callCount += 1;
|
|
14234
|
+
const callIndex = callCount;
|
|
14235
|
+
let captured = null;
|
|
14236
|
+
const submitTool = defineTool({
|
|
14237
|
+
name: SUBAGENT_SUBMIT_TOOL_NAME,
|
|
14238
|
+
label: `Submit ${output_schema}`,
|
|
14239
|
+
description: `Submit your structured output for this subagent task. Call exactly once when done. Args MUST match the ${output_schema} contract; mismatches return a tool error you can recover from in the same session.`,
|
|
14240
|
+
parameters: contract.parametersSchema,
|
|
14241
|
+
async execute(_innerId, innerParams) {
|
|
14242
|
+
if (!Value.Check(contract.parametersSchema, innerParams)) return toolError(`submit_subagent_output: schema validation failed: ${[...Value.Errors(contract.parametersSchema, innerParams)].slice(0, 3).map((e) => `${e.path}: ${e.message}`).join("; ")}. Re-call with a corrected payload.`);
|
|
14243
|
+
captured = innerParams;
|
|
14244
|
+
return {
|
|
14245
|
+
content: [{
|
|
14246
|
+
type: "text",
|
|
14247
|
+
text: "Output captured. Subagent session will terminate; no further action needed."
|
|
14248
|
+
}],
|
|
14249
|
+
details: { captured: true },
|
|
14250
|
+
terminate: true
|
|
14251
|
+
};
|
|
14252
|
+
}
|
|
14253
|
+
});
|
|
14254
|
+
const subagentInstructor = buildSubagentInstructor({
|
|
14255
|
+
contractName: output_schema,
|
|
14256
|
+
contractDescription: contract.description,
|
|
14257
|
+
parentTaskId: args.parentTaskId,
|
|
14258
|
+
callIndex
|
|
14259
|
+
});
|
|
14260
|
+
const session = await buildSession({
|
|
14261
|
+
mountPath: args.mountPath,
|
|
14262
|
+
piAuthDir: args.piAuthDir,
|
|
14263
|
+
modelHandle: args.modelHandle,
|
|
14264
|
+
agentName: args.agentName,
|
|
14265
|
+
customTools: [...args.inheritedCustomTools, submitTool],
|
|
14266
|
+
appendSystemPrompt: [args.parentRuntimeInstructor, subagentInstructor],
|
|
14267
|
+
skillsOverride: () => ({
|
|
14268
|
+
skills: [],
|
|
14269
|
+
diagnostics: []
|
|
14270
|
+
}),
|
|
14271
|
+
otelSpanAttrs: {
|
|
14272
|
+
"moltnet.task.id": args.parentTaskId,
|
|
14273
|
+
"moltnet.task.type": args.parentTaskType,
|
|
14274
|
+
"moltnet.task.attempt": args.parentAttemptN,
|
|
14275
|
+
"moltnet.subagent.contract": output_schema,
|
|
14276
|
+
"moltnet.subagent.index": callIndex
|
|
14277
|
+
}
|
|
14278
|
+
});
|
|
14279
|
+
let abortReason = null;
|
|
14280
|
+
let abortInvoked = false;
|
|
14281
|
+
const fireAbort = (reason) => {
|
|
14282
|
+
if (abortInvoked) return;
|
|
14283
|
+
abortInvoked = true;
|
|
14284
|
+
abortReason = reason;
|
|
14285
|
+
session.abort().catch((err) => {
|
|
14286
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
14287
|
+
process.stderr.write(`[subagent] inner session.abort() failed: ${message}\n`);
|
|
14288
|
+
});
|
|
14289
|
+
};
|
|
14290
|
+
const cancelListener = args.parentCancelSignal ? (() => {
|
|
14291
|
+
const signal = args.parentCancelSignal;
|
|
14292
|
+
const listener = () => fireAbort("parent_cancelled");
|
|
14293
|
+
if (signal.aborted) listener();
|
|
14294
|
+
else signal.addEventListener("abort", listener, { once: true });
|
|
14295
|
+
return () => signal.removeEventListener("abort", listener);
|
|
14296
|
+
})() : null;
|
|
14297
|
+
const timeoutMs = args.timeoutMs === void 0 || args.timeoutMs < 0 ? DEFAULT_SUBAGENT_TIMEOUT_MS : args.timeoutMs;
|
|
14298
|
+
const timeoutHandle = timeoutMs > 0 ? setTimeout(() => fireAbort("subagent_timed_out"), timeoutMs) : null;
|
|
14299
|
+
try {
|
|
14300
|
+
await session.prompt(task);
|
|
14301
|
+
} catch (err) {
|
|
14302
|
+
return toolError(`subagent: inner session.prompt() threw: ${err instanceof Error ? err.message : String(err)}`);
|
|
14303
|
+
} finally {
|
|
14304
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
14305
|
+
if (cancelListener) cancelListener();
|
|
14306
|
+
}
|
|
14307
|
+
if (abortReason !== null) return toolError(`subagent: ${abortReason === "subagent_timed_out" ? `subagent timed out after ${timeoutMs}ms` : "parent task was cancelled"}. The parent should fail this task or retry with a clearer scope.`);
|
|
14308
|
+
if (captured === null) return toolError(`subagent: inner session ended without calling ${SUBAGENT_SUBMIT_TOOL_NAME}. The parent should retry with clearer instructions or fail the task.`);
|
|
14309
|
+
return {
|
|
14310
|
+
content: [{
|
|
14311
|
+
type: "text",
|
|
14312
|
+
text: JSON.stringify(captured)
|
|
14313
|
+
}],
|
|
14314
|
+
details: {
|
|
14315
|
+
captured: true,
|
|
14316
|
+
contract: output_schema,
|
|
14317
|
+
callIndex
|
|
14318
|
+
}
|
|
14319
|
+
};
|
|
14320
|
+
}
|
|
14321
|
+
}),
|
|
14322
|
+
getCallCount: () => callCount
|
|
14323
|
+
};
|
|
14324
|
+
}
|
|
14325
|
+
function subagentToolDescription() {
|
|
14326
|
+
return [
|
|
14327
|
+
"Delegate a sub-task to a fresh subagent session with isolated context.",
|
|
14328
|
+
"",
|
|
14329
|
+
"The subagent starts with no conversation history and only the `task` ",
|
|
14330
|
+
"string you provide as its instructions. It runs in the same VM with ",
|
|
14331
|
+
"the same tools you have (Gondolin-routed Read/Write/Edit/Bash, ",
|
|
14332
|
+
"moltnet_* tools), and is expected to call ",
|
|
14333
|
+
`\`${SUBAGENT_SUBMIT_TOOL_NAME}\` with a payload matching the named `,
|
|
14334
|
+
"contract before its session ends.",
|
|
14335
|
+
"",
|
|
14336
|
+
"On success, the tool result is the JSON-stringified subagent payload.",
|
|
14337
|
+
"On failure (unknown contract, validation error, subagent did not ",
|
|
14338
|
+
"submit) the tool returns isError:true with a recoverable message."
|
|
14339
|
+
].join("\n");
|
|
14340
|
+
}
|
|
14341
|
+
function buildSubagentInstructor(args) {
|
|
14342
|
+
return [
|
|
14343
|
+
"# You are a subagent",
|
|
14344
|
+
"",
|
|
14345
|
+
`Parent task: \`${args.parentTaskId}\` (subagent call #${args.callIndex}).`,
|
|
14346
|
+
"",
|
|
14347
|
+
`Your assigned output contract is \`${args.contractName}\`:`,
|
|
14348
|
+
`${args.contractDescription}`,
|
|
14349
|
+
"",
|
|
14350
|
+
"Rules for this session:",
|
|
14351
|
+
"",
|
|
14352
|
+
`- You MUST call \`${SUBAGENT_SUBMIT_TOOL_NAME}\` exactly once with a `,
|
|
14353
|
+
" payload matching the contract above. Your session terminates on ",
|
|
14354
|
+
" the valid call.",
|
|
14355
|
+
"- The parent's message above is your task. Do not invent additional ",
|
|
14356
|
+
" steps the parent did not request.",
|
|
14357
|
+
"- All MoltNet runtime invariants from the parent runtime instructor ",
|
|
14358
|
+
" apply (diary discipline, gh-auth pattern, etc.) IF you take any ",
|
|
14359
|
+
" action that would trigger them. Most subagents do not commit code ",
|
|
14360
|
+
" or open PRs — only do so if your task message explicitly requires it.",
|
|
14361
|
+
"- You do NOT have access to the `subagent` tool. Do not attempt nested ",
|
|
14362
|
+
" delegation; do the work yourself."
|
|
14363
|
+
].join("\n");
|
|
14364
|
+
}
|
|
14365
|
+
function toolError(text) {
|
|
14366
|
+
return {
|
|
14367
|
+
content: [{
|
|
14368
|
+
type: "text",
|
|
14369
|
+
text
|
|
14370
|
+
}],
|
|
14371
|
+
details: { captured: false },
|
|
14372
|
+
isError: true
|
|
14373
|
+
};
|
|
14374
|
+
}
|
|
14375
|
+
//#endregion
|
|
13727
14376
|
//#region src/runtime/task-output.ts
|
|
13728
14377
|
var METER_NAME = "@themoltnet/pi-extension/task-output";
|
|
13729
14378
|
var parseResultCounter = null;
|
|
@@ -13962,6 +14611,7 @@ function resolveSubmitTools(taskType, opts = {}) {
|
|
|
13962
14611
|
* Anthropic-SDK one) plug in via the `executeTask` function injected into
|
|
13963
14612
|
* `AgentRuntime`.
|
|
13964
14613
|
*/
|
|
14614
|
+
var noopTurnEventHandler = () => {};
|
|
13965
14615
|
/**
|
|
13966
14616
|
* Factory that builds a pi-specific `executeTask` function suitable for
|
|
13967
14617
|
* injection into `AgentRuntime`. The returned function caches the resolved
|
|
@@ -14034,6 +14684,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14034
14684
|
const taskTeamId = task.teamId ?? "";
|
|
14035
14685
|
let reporterOpen = false;
|
|
14036
14686
|
let session = null;
|
|
14687
|
+
let subagentHandle = null;
|
|
14037
14688
|
const finalUsage = emptyUsage(opts.provider, opts.model);
|
|
14038
14689
|
let cancelListener = null;
|
|
14039
14690
|
const makeFailedOutput = (code, message, usage = finalUsage) => ({
|
|
@@ -14058,10 +14709,25 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14058
14709
|
attemptN
|
|
14059
14710
|
});
|
|
14060
14711
|
reporterOpen = true;
|
|
14061
|
-
|
|
14062
|
-
|
|
14063
|
-
|
|
14064
|
-
})
|
|
14712
|
+
let onTurnEvent;
|
|
14713
|
+
if (opts.makeOnTurnEvent) try {
|
|
14714
|
+
onTurnEvent = opts.makeOnTurnEvent(claimedTask);
|
|
14715
|
+
} catch (err) {
|
|
14716
|
+
process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
14717
|
+
onTurnEvent = noopTurnEventHandler;
|
|
14718
|
+
}
|
|
14719
|
+
else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
|
|
14720
|
+
const emit = (kind, payload) => {
|
|
14721
|
+
try {
|
|
14722
|
+
onTurnEvent(kind, summarizePayloadForLog(kind, payload));
|
|
14723
|
+
} catch (err) {
|
|
14724
|
+
process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
|
|
14725
|
+
}
|
|
14726
|
+
return reporter.record({
|
|
14727
|
+
kind,
|
|
14728
|
+
payload
|
|
14729
|
+
});
|
|
14730
|
+
};
|
|
14065
14731
|
await emit("info", {
|
|
14066
14732
|
event: "execute_start",
|
|
14067
14733
|
taskType: task.taskType,
|
|
@@ -14071,7 +14737,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14071
14737
|
});
|
|
14072
14738
|
let taskPrompt;
|
|
14073
14739
|
try {
|
|
14074
|
-
taskPrompt =
|
|
14740
|
+
taskPrompt = buildTaskUserPrompt(task, {
|
|
14075
14741
|
diaryId,
|
|
14076
14742
|
taskId: task.id,
|
|
14077
14743
|
extras: opts.promptExtras
|
|
@@ -14084,6 +14750,30 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14084
14750
|
});
|
|
14085
14751
|
return makeFailedOutput("prompt_build_failed", message);
|
|
14086
14752
|
}
|
|
14753
|
+
const rawContext = task.input.context;
|
|
14754
|
+
let injectedContext;
|
|
14755
|
+
try {
|
|
14756
|
+
const contextArray = rawContext === void 0 ? [] : rawContext;
|
|
14757
|
+
if (!Value.Check(TaskContext, contextArray)) throw new Error(`task.input.context failed TaskContext validation: ${JSON.stringify([...Value.Errors(TaskContext, contextArray)].slice(0, 3))}`);
|
|
14758
|
+
injectedContext = await injectTaskContext({
|
|
14759
|
+
context: contextArray,
|
|
14760
|
+
fs: managed.vm.fs
|
|
14761
|
+
});
|
|
14762
|
+
} catch (err) {
|
|
14763
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
14764
|
+
await emit("error", {
|
|
14765
|
+
message,
|
|
14766
|
+
phase: "context_resolution"
|
|
14767
|
+
});
|
|
14768
|
+
return makeFailedOutput("context_resolution_failed", message);
|
|
14769
|
+
}
|
|
14770
|
+
if (injectedContext.injected.length > 0) await emit("info", {
|
|
14771
|
+
event: "context_injected",
|
|
14772
|
+
count: injectedContext.injected.length,
|
|
14773
|
+
bindings: injectedContext.injected.map((r) => r.binding),
|
|
14774
|
+
slugs: injectedContext.injected.map((r) => r.slug)
|
|
14775
|
+
});
|
|
14776
|
+
if (injectedContext.userInlineSuffix) taskPrompt = `${taskPrompt}\n\n---\n\n${injectedContext.userInlineSuffix}`;
|
|
14087
14777
|
const gondolinCustomTools = [
|
|
14088
14778
|
createReadToolDefinition(mountPath, { operations: createGondolinReadOps(managed.vm, mountPath) }),
|
|
14089
14779
|
createWriteToolDefinition(mountPath, { operations: createGondolinWriteOps(managed.vm, mountPath) }),
|
|
@@ -14112,14 +14802,6 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14112
14802
|
});
|
|
14113
14803
|
const piAuthDir = process.env.PI_CODING_AGENT_DIR ?? join(homedir(), ".pi", "agent");
|
|
14114
14804
|
const modelHandle = getModel(opts.provider, opts.model);
|
|
14115
|
-
const piOtelExtension = createPiOtelExtension({
|
|
14116
|
-
agentName: opts.agentName,
|
|
14117
|
-
spanAttributes: {
|
|
14118
|
-
"moltnet.task.id": task.id,
|
|
14119
|
-
"moltnet.task.attempt": attemptN,
|
|
14120
|
-
"moltnet.task.type": task.taskType
|
|
14121
|
-
}
|
|
14122
|
-
});
|
|
14123
14805
|
const runtimeInstructor = buildRuntimeInstructor({
|
|
14124
14806
|
taskId: task.id,
|
|
14125
14807
|
taskType: task.taskType,
|
|
@@ -14128,29 +14810,47 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14128
14810
|
agentName: opts.agentName,
|
|
14129
14811
|
correlationId: task.correlationId ?? null
|
|
14130
14812
|
});
|
|
14131
|
-
const
|
|
14132
|
-
|
|
14133
|
-
|
|
14134
|
-
|
|
14135
|
-
|
|
14136
|
-
|
|
14137
|
-
|
|
14138
|
-
|
|
14139
|
-
|
|
14140
|
-
|
|
14141
|
-
|
|
14142
|
-
|
|
14143
|
-
|
|
14144
|
-
|
|
14145
|
-
|
|
14813
|
+
const appendSystemPrompt = [runtimeInstructor];
|
|
14814
|
+
if (injectedContext.systemPromptPrefix) appendSystemPrompt.push(injectedContext.systemPromptPrefix);
|
|
14815
|
+
const injectedSkills = injectedContext.skills;
|
|
14816
|
+
const parentSubagentTools = [];
|
|
14817
|
+
if (taskTypeUsesSubagents(task.taskType)) {
|
|
14818
|
+
subagentHandle = createSubagentTool({
|
|
14819
|
+
mountPath,
|
|
14820
|
+
piAuthDir,
|
|
14821
|
+
modelHandle,
|
|
14822
|
+
agentName: opts.agentName,
|
|
14823
|
+
inheritedCustomTools: [...gondolinCustomTools, ...moltnetTools],
|
|
14824
|
+
parentRuntimeInstructor: runtimeInstructor,
|
|
14825
|
+
parentTaskId: task.id,
|
|
14826
|
+
parentTaskType: task.taskType,
|
|
14827
|
+
parentAttemptN: attemptN,
|
|
14828
|
+
parentCancelSignal: reporter.cancelSignal
|
|
14829
|
+
});
|
|
14830
|
+
parentSubagentTools.push(subagentHandle.tool);
|
|
14831
|
+
}
|
|
14832
|
+
session = await buildAgentSession({
|
|
14833
|
+
mountPath,
|
|
14834
|
+
piAuthDir,
|
|
14835
|
+
modelHandle,
|
|
14836
|
+
agentName: opts.agentName,
|
|
14146
14837
|
customTools: [
|
|
14147
14838
|
...gondolinCustomTools,
|
|
14148
14839
|
...moltnetTools,
|
|
14149
|
-
...submitTools
|
|
14840
|
+
...submitTools,
|
|
14841
|
+
...parentSubagentTools
|
|
14150
14842
|
],
|
|
14151
|
-
|
|
14152
|
-
|
|
14153
|
-
|
|
14843
|
+
appendSystemPrompt,
|
|
14844
|
+
skillsOverride: () => ({
|
|
14845
|
+
skills: injectedSkills,
|
|
14846
|
+
diagnostics: []
|
|
14847
|
+
}),
|
|
14848
|
+
otelSpanAttrs: {
|
|
14849
|
+
"moltnet.task.id": task.id,
|
|
14850
|
+
"moltnet.task.attempt": attemptN,
|
|
14851
|
+
"moltnet.task.type": task.taskType
|
|
14852
|
+
}
|
|
14853
|
+
});
|
|
14154
14854
|
} catch (err) {
|
|
14155
14855
|
const message = err instanceof Error ? err.message : String(err);
|
|
14156
14856
|
await emit("error", {
|
|
@@ -14221,6 +14921,10 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14221
14921
|
phase: "session_prompt"
|
|
14222
14922
|
});
|
|
14223
14923
|
}
|
|
14924
|
+
if (subagentHandle && subagentHandle.getCallCount() > 0) await emit("info", {
|
|
14925
|
+
event: "subagent_summary",
|
|
14926
|
+
callCount: subagentHandle.getCallCount()
|
|
14927
|
+
});
|
|
14224
14928
|
await Promise.all(recordingPromise);
|
|
14225
14929
|
const cancelled = reporter.cancelSignal.aborted;
|
|
14226
14930
|
let parsedOutput = null;
|
|
@@ -14359,6 +15063,27 @@ function wireSessionAbort(cancelSignal, session) {
|
|
|
14359
15063
|
* `task_messages.payload` row. Bodies above 4 KiB are replaced with a
|
|
14360
15064
|
* `{ truncated, original_size }` marker so the JSONL/DB size stays bounded.
|
|
14361
15065
|
*/
|
|
15066
|
+
function summarizePayloadForLog(kind, payload) {
|
|
15067
|
+
switch (kind) {
|
|
15068
|
+
case "text_delta": {
|
|
15069
|
+
const delta = payload.delta;
|
|
15070
|
+
return { chars: typeof delta === "string" ? delta.length : 0 };
|
|
15071
|
+
}
|
|
15072
|
+
case "tool_call_start": return { tool: payload.tool_name };
|
|
15073
|
+
case "tool_call_end": return {
|
|
15074
|
+
tool: payload.tool_name,
|
|
15075
|
+
is_error: payload.is_error === true,
|
|
15076
|
+
...payload.is_error === true && payload.result !== void 0 ? { result: payload.result } : {}
|
|
15077
|
+
};
|
|
15078
|
+
case "turn_end": return { stop_reason: payload.stop_reason };
|
|
15079
|
+
case "error": return {
|
|
15080
|
+
phase: payload.phase,
|
|
15081
|
+
message: typeof payload.message === "string" ? payload.message.slice(0, TRUNCATE_LIMIT) : payload.message
|
|
15082
|
+
};
|
|
15083
|
+
case "info": return Object.fromEntries(Object.entries(payload).map(([k, v]) => [k, typeof v === "string" ? v.slice(0, TRUNCATE_LIMIT) : v]));
|
|
15084
|
+
default: return payload;
|
|
15085
|
+
}
|
|
15086
|
+
}
|
|
14362
15087
|
var TRUNCATE_LIMIT = 4 * 1024;
|
|
14363
15088
|
function truncateForWire(value) {
|
|
14364
15089
|
if (value === null || value === void 0) return value;
|
|
@@ -14659,4 +15384,4 @@ function moltnetExtension(pi) {
|
|
|
14659
15384
|
registerMoltnetReflectCommand(pi, state);
|
|
14660
15385
|
}
|
|
14661
15386
|
//#endregion
|
|
14662
|
-
export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resumeVm, toGuestPath };
|
|
15387
|
+
export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildAgentSession, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, createSubagentTool, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, injectTaskContext, loadCredentials, resumeVm, toGuestPath };
|