@themoltnet/pi-extension 0.13.5 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.d.ts +228 -1
  2. package/dist/index.js +913 -188
  3. package/package.json +3 -3
package/dist/index.js CHANGED
@@ -2,17 +2,17 @@ import { createRequire } from "node:module";
2
2
  import { execFileSync } from "node:child_process";
3
3
  import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, statSync } from "node:fs";
4
4
  import path, { join } from "node:path";
5
- import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createWriteTool, createWriteToolDefinition, defineTool } from "@earendil-works/pi-coding-agent";
5
+ import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createSyntheticSourceInfo, createWriteTool, createWriteToolDefinition, defineTool, parseFrontmatter } from "@earendil-works/pi-coding-agent";
6
6
  import { createHash } from "node:crypto";
7
7
  import crypto, { createHash as createHash$1 } from "crypto";
8
8
  import { readFile } from "node:fs/promises";
9
9
  import { homedir } from "node:os";
10
10
  import { Type, getModel } from "@earendil-works/pi-ai";
11
- import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
11
+ import { MemoryProvider, RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
12
12
  import { parseEnv } from "node:util";
13
13
  import { SpanStatusCode, context, metrics, trace } from "@opentelemetry/api";
14
- import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
15
14
  import { Value } from "@sinclair/typebox/value";
15
+ import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
16
16
  //#region \0rolldown/runtime.js
17
17
  var __defProp = Object.defineProperty;
18
18
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
@@ -2424,13 +2424,31 @@ function problemToError(problem, statusCode) {
2424
2424
  //#endregion
2425
2425
  //#region ../sdk/src/agent-context.ts
2426
2426
  function unwrapResult(result) {
2427
- if (result.error) {
2427
+ if (result.error !== void 0 && result.error !== null) {
2428
2428
  const error = result.error;
2429
- throw problemToError(error, error.status ?? 500);
2429
+ if (isProblemDetails(error)) throw problemToError(error, error.status);
2430
+ if (error instanceof Error && result.response === void 0) {
2431
+ const networkError = new NetworkError(error.message, { detail: error.cause ? stringifyUnknown(error.cause) : void 0 });
2432
+ networkError.stack = error.stack;
2433
+ throw networkError;
2434
+ }
2435
+ throw new MoltNetError(`Unexpected error from MoltNet API: ${stringifyUnknown(error)}`, { code: "UNKNOWN" });
2430
2436
  }
2431
2437
  if (result.data === void 0) throw new MoltNetError("Unexpected empty response from MoltNet API", { code: "EMPTY_RESPONSE" });
2432
2438
  return result.data;
2433
2439
  }
2440
+ function isProblemDetails(error) {
2441
+ if (!error || typeof error !== "object") return false;
2442
+ return typeof error.status === "number" && ("title" in error || "detail" in error);
2443
+ }
2444
+ function stringifyUnknown(value) {
2445
+ if (value instanceof Error) return `${value.name}: ${value.message}`;
2446
+ try {
2447
+ return JSON.stringify(value) ?? String(value);
2448
+ } catch {
2449
+ return String(value);
2450
+ }
2451
+ }
2434
2452
  function unwrapRequired(result, message, code) {
2435
2453
  if (result.error || !result.data) throw new MoltNetError(message, { code });
2436
2454
  return result.data;
@@ -8057,138 +8075,29 @@ function pruneOldSnapshots(maxCached, currentDir) {
8057
8075
  });
8058
8076
  }
8059
8077
  //#endregion
8060
- //#region src/tool-operations.ts
8061
- /**
8062
- * Gondolin tool operations: redirect pi's built-in tool operations
8063
- * (read, write, edit, bash) to execute inside the VM.
8064
- *
8065
- * Follows the same pattern as upstream pi-gondolin.ts — pi's tool factories
8066
- * accept an `operations` object that provides the underlying I/O.
8067
- */
8078
+ //#region src/vm-manager.ts
8068
8079
  var GUEST_WORKSPACE$2 = "/workspace";
8069
- function shQuote(s) {
8070
- return "'" + s.replace(/'/g, "'\\''") + "'";
8071
- }
8072
8080
  /**
8073
- * Map a host-side absolute path to a guest-side /workspace path.
8074
- * Throws if the path escapes the workspace.
8075
- */
8076
- function toGuestPath(localCwd, localPath) {
8077
- if (localPath === GUEST_WORKSPACE$2 || localPath.startsWith(`${GUEST_WORKSPACE$2}/`)) return localPath;
8078
- const rel = path.relative(localCwd, localPath);
8079
- if (rel === "") return GUEST_WORKSPACE$2;
8080
- if (rel.startsWith("..") || path.isAbsolute(rel)) throw new Error(`path escapes workspace: ${localPath}`);
8081
- const posixRel = rel.split(path.sep).join(path.posix.sep);
8082
- return path.posix.join(GUEST_WORKSPACE$2, posixRel);
8083
- }
8084
- function createGondolinReadOps(vm, localCwd) {
8085
- return {
8086
- readFile: async (p) => {
8087
- const r = await vm.exec(["/bin/cat", toGuestPath(localCwd, p)]);
8088
- if (!r.ok) throw new Error(`cat failed (${r.exitCode}): ${r.stderr}`);
8089
- return r.stdoutBuffer;
8090
- },
8091
- access: async (p) => {
8092
- if (!(await vm.exec([
8093
- "/bin/sh",
8094
- "-lc",
8095
- `test -r ${shQuote(toGuestPath(localCwd, p))}`
8096
- ])).ok) throw new Error(`not readable: ${p}`);
8097
- },
8098
- detectImageMimeType: async (p) => {
8099
- try {
8100
- const r = await vm.exec([
8101
- "/bin/sh",
8102
- "-lc",
8103
- `file --mime-type -b ${shQuote(toGuestPath(localCwd, p))}`
8104
- ]);
8105
- if (!r.ok) return null;
8106
- const m = r.stdout.trim();
8107
- return [
8108
- "image/jpeg",
8109
- "image/png",
8110
- "image/gif",
8111
- "image/webp"
8112
- ].includes(m) ? m : null;
8113
- } catch {
8114
- return null;
8115
- }
8116
- }
8117
- };
8118
- }
8119
- function createGondolinWriteOps(vm, localCwd) {
8120
- return {
8121
- writeFile: async (p, content) => {
8122
- const guestPath = toGuestPath(localCwd, p);
8123
- const dir = path.posix.dirname(guestPath);
8124
- const b64 = Buffer.from(content, "utf8").toString("base64");
8125
- const r = await vm.exec([
8126
- "/bin/sh",
8127
- "-lc",
8128
- [
8129
- "set -eu",
8130
- `mkdir -p ${shQuote(dir)}`,
8131
- `echo ${shQuote(b64)} | base64 -d > ${shQuote(guestPath)}`
8132
- ].join("\n")
8133
- ]);
8134
- if (!r.ok) throw new Error(`write failed (${r.exitCode}): ${r.stderr}`);
8135
- },
8136
- mkdir: async (dir) => {
8137
- const r = await vm.exec([
8138
- "/bin/mkdir",
8139
- "-p",
8140
- toGuestPath(localCwd, dir)
8141
- ]);
8142
- if (!r.ok) throw new Error(`mkdir failed (${r.exitCode}): ${r.stderr}`);
8143
- }
8144
- };
8145
- }
8146
- function createGondolinEditOps(vm, localCwd) {
8147
- const r = createGondolinReadOps(vm, localCwd);
8148
- const w = createGondolinWriteOps(vm, localCwd);
8149
- return {
8150
- readFile: r.readFile,
8151
- access: r.access,
8152
- writeFile: w.writeFile
8153
- };
8154
- }
8155
- function createGondolinBashOps(vm, localCwd) {
8156
- return { exec: async (command, cwd, { onData, signal, timeout, env }) => {
8157
- const guestCwd = toGuestPath(localCwd, cwd);
8158
- const ac = new AbortController();
8159
- const onAbort = () => ac.abort();
8160
- signal?.addEventListener("abort", onAbort, { once: true });
8161
- let timedOut = false;
8162
- const timer = timeout && timeout > 0 ? setTimeout(() => {
8163
- timedOut = true;
8164
- ac.abort();
8165
- }, timeout * 1e3) : void 0;
8166
- try {
8167
- const proc = vm.exec([
8168
- "/bin/sh",
8169
- "-lc",
8170
- command
8171
- ], {
8172
- cwd: guestCwd,
8173
- signal: ac.signal,
8174
- stdout: "pipe",
8175
- stderr: "pipe"
8176
- });
8177
- for await (const chunk of proc.output()) onData(typeof chunk.data === "string" ? Buffer.from(chunk.data, "utf8") : chunk.data);
8178
- return { exitCode: (await proc).exitCode };
8179
- } catch (err) {
8180
- if (signal?.aborted) throw new Error("aborted");
8181
- if (timedOut) throw new Error(`timeout:${timeout}`);
8182
- throw err;
8183
- } finally {
8184
- if (timer) clearTimeout(timer);
8185
- signal?.removeEventListener("abort", onAbort);
8186
- }
8187
- } };
8188
- }
8189
- //#endregion
8190
- //#region src/vm-manager.ts
8191
- var GUEST_WORKSPACE$1 = "/workspace";
8081
+ * Memory-backed VFS mount used by the daemon to inject task-context
8082
+ * skills (#943 slice 1.5). Sibling of /workspace, NOT a sub-path —
8083
+ * Gondolin mounts can't nest. The agent's Gondolin-bound Read tool
8084
+ * accepts paths under this prefix (see toGuestPath in tool-operations.ts).
8085
+ *
8086
+ * Why MemoryProvider rather than a path under /workspace:
8087
+ * - Injected skills are ephemeral by intent: per-task-attempt input
8088
+ * scoped to the VM lifetime. MemoryProvider models that exactly
8089
+ * in-memory, per-VM-instance, zero host artefacts, automatic
8090
+ * cleanup on VM close.
8091
+ * - Writing under /workspace fails in worktrees because we symlink
8092
+ * `.moltnet/` to the main repo (so credentials are reachable from
8093
+ * worktrees), and Gondolin's RealFSProvider correctly refuses to
8094
+ * create paths whose ancestors' realpath escapes the mount root.
8095
+ * That refusal is a deliberate sandbox-escape protection, not a
8096
+ * bug. See diary semantic entry cd27d9d3-efdc-4aec-ac0d-5fd8ce258d1f
8097
+ * and episodic 7affbfeb-18a2-4963-aeac-c177eb2afa2d for the full
8098
+ * investigation and the alternatives we rejected.
8099
+ */
8100
+ var GUEST_TASK_SKILLS_MOUNT = "/moltnet-task-skills";
8192
8101
  /**
8193
8102
  * Resolve the main worktree root (where .moltnet/ lives — it's untracked,
8194
8103
  * only exists in the main worktree, not in git worktrees).
@@ -8317,7 +8226,10 @@ async function resumeVm(config) {
8317
8226
  env: vmEnv,
8318
8227
  ...resources?.memory && { memory: resources.memory },
8319
8228
  ...resources?.cpus && { cpus: resources.cpus },
8320
- vfs: { mounts: { [GUEST_WORKSPACE$1]: workspaceProvider } }
8229
+ vfs: { mounts: {
8230
+ [GUEST_WORKSPACE$2]: workspaceProvider,
8231
+ [GUEST_TASK_SKILLS_MOUNT]: new MemoryProvider()
8232
+ } }
8321
8233
  });
8322
8234
  await vm.exec(`sh -c '
8323
8235
  cp /etc/gondolin/mitm/ca.crt /usr/local/share/ca-certificates/gondolin-mitm.crt
@@ -8347,7 +8259,7 @@ nameserver 1.1.1.1" > /etc/resolv.conf'`);
8347
8259
  vm,
8348
8260
  credentials: creds,
8349
8261
  mountPath: config.mountPath,
8350
- guestWorkspace: GUEST_WORKSPACE$1,
8262
+ guestWorkspace: GUEST_WORKSPACE$2,
8351
8263
  agentDir
8352
8264
  };
8353
8265
  }
@@ -8400,6 +8312,137 @@ function ensureRelativeWorktreePaths(gitconfig) {
8400
8312
  return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
8401
8313
  }
8402
8314
  //#endregion
8315
+ //#region src/tool-operations.ts
8316
+ /**
8317
+ * Gondolin tool operations: redirect pi's built-in tool operations
8318
+ * (read, write, edit, bash) to execute inside the VM.
8319
+ *
8320
+ * Follows the same pattern as upstream pi-gondolin.ts — pi's tool factories
8321
+ * accept an `operations` object that provides the underlying I/O.
8322
+ */
8323
+ var GUEST_WORKSPACE$1 = "/workspace";
8324
+ function shQuote(s) {
8325
+ return "'" + s.replace(/'/g, "'\\''") + "'";
8326
+ }
8327
+ /**
8328
+ * Map a host-side absolute path to a guest-side /workspace path.
8329
+ * Throws if the path escapes the workspace.
8330
+ */
8331
+ function toGuestPath(localCwd, localPath) {
8332
+ if (localPath === GUEST_WORKSPACE$1 || localPath.startsWith(`${GUEST_WORKSPACE$1}/`)) return localPath;
8333
+ if (localPath === "/moltnet-task-skills" || localPath.startsWith(`/moltnet-task-skills/`)) return localPath;
8334
+ const rel = path.relative(localCwd, localPath);
8335
+ if (rel === "") return GUEST_WORKSPACE$1;
8336
+ if (rel.startsWith("..") || path.isAbsolute(rel)) throw new Error(`path escapes workspace: ${localPath}`);
8337
+ const posixRel = rel.split(path.sep).join(path.posix.sep);
8338
+ return path.posix.join(GUEST_WORKSPACE$1, posixRel);
8339
+ }
8340
+ function createGondolinReadOps(vm, localCwd) {
8341
+ return {
8342
+ readFile: async (p) => {
8343
+ const r = await vm.exec(["/bin/cat", toGuestPath(localCwd, p)]);
8344
+ if (!r.ok) throw new Error(`cat failed (${r.exitCode}): ${r.stderr}`);
8345
+ return r.stdoutBuffer;
8346
+ },
8347
+ access: async (p) => {
8348
+ if (!(await vm.exec([
8349
+ "/bin/sh",
8350
+ "-lc",
8351
+ `test -r ${shQuote(toGuestPath(localCwd, p))}`
8352
+ ])).ok) throw new Error(`not readable: ${p}`);
8353
+ },
8354
+ detectImageMimeType: async (p) => {
8355
+ try {
8356
+ const r = await vm.exec([
8357
+ "/bin/sh",
8358
+ "-lc",
8359
+ `file --mime-type -b ${shQuote(toGuestPath(localCwd, p))}`
8360
+ ]);
8361
+ if (!r.ok) return null;
8362
+ const m = r.stdout.trim();
8363
+ return [
8364
+ "image/jpeg",
8365
+ "image/png",
8366
+ "image/gif",
8367
+ "image/webp"
8368
+ ].includes(m) ? m : null;
8369
+ } catch {
8370
+ return null;
8371
+ }
8372
+ }
8373
+ };
8374
+ }
8375
+ function createGondolinWriteOps(vm, localCwd) {
8376
+ return {
8377
+ writeFile: async (p, content) => {
8378
+ const guestPath = toGuestPath(localCwd, p);
8379
+ const dir = path.posix.dirname(guestPath);
8380
+ const b64 = Buffer.from(content, "utf8").toString("base64");
8381
+ const r = await vm.exec([
8382
+ "/bin/sh",
8383
+ "-lc",
8384
+ [
8385
+ "set -eu",
8386
+ `mkdir -p ${shQuote(dir)}`,
8387
+ `echo ${shQuote(b64)} | base64 -d > ${shQuote(guestPath)}`
8388
+ ].join("\n")
8389
+ ]);
8390
+ if (!r.ok) throw new Error(`write failed (${r.exitCode}): ${r.stderr}`);
8391
+ },
8392
+ mkdir: async (dir) => {
8393
+ const r = await vm.exec([
8394
+ "/bin/mkdir",
8395
+ "-p",
8396
+ toGuestPath(localCwd, dir)
8397
+ ]);
8398
+ if (!r.ok) throw new Error(`mkdir failed (${r.exitCode}): ${r.stderr}`);
8399
+ }
8400
+ };
8401
+ }
8402
+ function createGondolinEditOps(vm, localCwd) {
8403
+ const r = createGondolinReadOps(vm, localCwd);
8404
+ const w = createGondolinWriteOps(vm, localCwd);
8405
+ return {
8406
+ readFile: r.readFile,
8407
+ access: r.access,
8408
+ writeFile: w.writeFile
8409
+ };
8410
+ }
8411
+ function createGondolinBashOps(vm, localCwd) {
8412
+ return { exec: async (command, cwd, { onData, signal, timeout, env }) => {
8413
+ const guestCwd = toGuestPath(localCwd, cwd);
8414
+ const ac = new AbortController();
8415
+ const onAbort = () => ac.abort();
8416
+ signal?.addEventListener("abort", onAbort, { once: true });
8417
+ let timedOut = false;
8418
+ const timer = timeout && timeout > 0 ? setTimeout(() => {
8419
+ timedOut = true;
8420
+ ac.abort();
8421
+ }, timeout * 1e3) : void 0;
8422
+ try {
8423
+ const proc = vm.exec([
8424
+ "/bin/sh",
8425
+ "-lc",
8426
+ command
8427
+ ], {
8428
+ cwd: guestCwd,
8429
+ signal: ac.signal,
8430
+ stdout: "pipe",
8431
+ stderr: "pipe"
8432
+ });
8433
+ for await (const chunk of proc.output()) onData(typeof chunk.data === "string" ? Buffer.from(chunk.data, "utf8") : chunk.data);
8434
+ return { exitCode: (await proc).exitCode };
8435
+ } catch (err) {
8436
+ if (signal?.aborted) throw new Error("aborted");
8437
+ if (timedOut) throw new Error(`timeout:${timeout}`);
8438
+ throw err;
8439
+ } finally {
8440
+ if (timer) clearTimeout(timer);
8441
+ signal?.removeEventListener("abort", onAbort);
8442
+ }
8443
+ } };
8444
+ }
8445
+ //#endregion
8403
8446
  //#region src/otel/index.ts
8404
8447
  var TRACER_NAME = "@themoltnet/pi-extension/otel";
8405
8448
  function stripReservedAttrs(attrs) {
@@ -8537,6 +8580,94 @@ function extractUsage(message) {
8537
8580
  };
8538
8581
  }
8539
8582
  //#endregion
8583
+ //#region src/runtime/agent-session-factory.ts
8584
+ var NO_SKILLS = () => ({
8585
+ skills: [],
8586
+ diagnostics: []
8587
+ });
8588
+ /**
8589
+ * Construct an in-memory `AgentSession`. The caller is responsible for
8590
+ * eventually invoking `session.prompt(...)` and for tearing down — the
8591
+ * helper does no lifecycle management beyond construction.
8592
+ */
8593
+ async function buildAgentSession(args) {
8594
+ const piOtelExtension = createPiOtelExtension({
8595
+ agentName: args.agentName,
8596
+ spanAttributes: args.otelSpanAttrs
8597
+ });
8598
+ const resourceLoader = new DefaultResourceLoader({
8599
+ cwd: args.mountPath,
8600
+ agentDir: args.piAuthDir,
8601
+ extensionFactories: [piOtelExtension],
8602
+ appendSystemPrompt: args.appendSystemPrompt,
8603
+ skillsOverride: args.skillsOverride ?? NO_SKILLS
8604
+ });
8605
+ await resourceLoader.reload();
8606
+ return (await createAgentSession({
8607
+ agentDir: args.piAuthDir,
8608
+ cwd: args.mountPath,
8609
+ model: args.modelHandle,
8610
+ customTools: args.customTools,
8611
+ sessionManager: SessionManager.inMemory(),
8612
+ resourceLoader
8613
+ })).session;
8614
+ }
8615
+ //#endregion
8616
+ //#region ../agent-runtime/src/context-bindings.ts
8617
+ var PROMPT_SEPARATOR = "\n\n---\n\n";
8618
+ /**
8619
+ * Resolve `task.input.context[]` into delivered side-effects (skills
8620
+ * persisted via `deliver.skill`) and prompt fragments
8621
+ * (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
8622
+ * built prompt.
8623
+ *
8624
+ * Per-binding semantics (V1):
8625
+ * - `skill` → `deliver.skill({ slug, content })` once per ref.
8626
+ * Slug collisions on distinct contents are
8627
+ * refused loudly.
8628
+ * - `prompt_prefix` → content appended to `systemPromptPrefix` with
8629
+ * the canonical `\n\n---\n\n` separator (in
8630
+ * declared order).
8631
+ * - `user_inline` → content appended to `userInlineSuffix` in
8632
+ * declared order, same separator.
8633
+ *
8634
+ * No fetching, no hashing — bytes are inlined in `ContextRef.content`,
8635
+ * and the task's `inputCid` already pins the entire input. The imposer
8636
+ * chose these bytes; the resolver just dispatches them.
8637
+ *
8638
+ * The function is pure with respect to its arguments: file writes are
8639
+ * confined to the injected `deliver` callback, which makes the
8640
+ * resolver trivial to test.
8641
+ */
8642
+ async function resolveTaskContext(args) {
8643
+ const promptParts = [];
8644
+ const userParts = [];
8645
+ const injected = [];
8646
+ const usedSlugs = /* @__PURE__ */ new Map();
8647
+ for (const ref of args.context) {
8648
+ if (ref.binding === "skill") {
8649
+ const prior = usedSlugs.get(ref.slug);
8650
+ if (prior !== void 0) {
8651
+ if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
8652
+ injected.push(ref);
8653
+ continue;
8654
+ }
8655
+ usedSlugs.set(ref.slug, ref.content);
8656
+ await args.deliver.skill({
8657
+ slug: ref.slug,
8658
+ content: ref.content
8659
+ });
8660
+ } else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
8661
+ else userParts.push(ref.content);
8662
+ injected.push(ref);
8663
+ }
8664
+ return {
8665
+ injected,
8666
+ systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
8667
+ userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
8668
+ };
8669
+ }
8670
+ //#endregion
8540
8671
  //#region ../tasks/src/formats.ts
8541
8672
  /**
8542
8673
  * Register TypeBox string formats used across Task / TaskOutput / task-type
@@ -8551,6 +8682,55 @@ var UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a
8551
8682
  if (!FormatRegistry.Has("uuid")) FormatRegistry.Set("uuid", (v) => UUID_RE.test(v));
8552
8683
  if (!FormatRegistry.Has("date-time")) FormatRegistry.Set("date-time", (v) => !Number.isNaN(Date.parse(v)));
8553
8684
  //#endregion
8685
+ //#region ../tasks/src/context.ts
8686
+ /**
8687
+ * How an executor delivers a context entry to its underlying LLM.
8688
+ * V1 bindings only; Tier-2 (reference_file, mcp_resource, imported_file,
8689
+ * tool_response_seed, additional_context_hook) ship in a later slice.
8690
+ */
8691
+ var ContextBinding = Type$1.Union([
8692
+ Type$1.Literal("skill"),
8693
+ Type$1.Literal("prompt_prefix"),
8694
+ Type$1.Literal("user_inline")
8695
+ ], { $id: "ContextBinding" });
8696
+ /**
8697
+ * One context entry. Bytes are inlined: the imposer chose them, and the
8698
+ * task's `inputCid` already pins the entire input — including
8699
+ * `context[]` — so we don't need a separate per-entry hash, fetcher, or
8700
+ * flagged-content gate. Tasks reference rendered packs (or any other
8701
+ * external content) by copying their bytes into `content` at task
8702
+ * creation time.
8703
+ *
8704
+ * - `slug` — short identifier the daemon uses to disambiguate
8705
+ * entries. For `skill` binding it becomes the directory
8706
+ * name under the runtime's skill discovery path. Must be
8707
+ * kebab-case-safe (alphanumeric + dashes/underscores).
8708
+ * - `binding` — how the bytes are delivered to the LLM (see above).
8709
+ * - `content` — the actual bytes (UTF-8 text). Capped at 32 KiB per
8710
+ * entry; total per-task context bytes are bounded by the
8711
+ * soft `maxItems` cap and per-binding daemon limits.
8712
+ */
8713
+ var ContextRef = Type$1.Object({
8714
+ slug: Type$1.String({
8715
+ minLength: 1,
8716
+ maxLength: 64,
8717
+ pattern: "^[a-zA-Z0-9_-]+$"
8718
+ }),
8719
+ binding: ContextBinding,
8720
+ content: Type$1.String({
8721
+ minLength: 1,
8722
+ maxLength: 32768
8723
+ })
8724
+ }, {
8725
+ $id: "ContextRef",
8726
+ additionalProperties: false
8727
+ });
8728
+ /** Reusable input fragment for any task type. Soft cap at 5 items. */
8729
+ var TaskContext = Type$1.Array(ContextRef, {
8730
+ $id: "TaskContext",
8731
+ maxItems: 5
8732
+ });
8733
+ //#endregion
8554
8734
  //#region ../tasks/src/rubric.ts
8555
8735
  /**
8556
8736
  * Rubric — structured acceptance criteria used by judgment tasks.
@@ -9099,6 +9279,60 @@ var RenderPackOutput = Type$1.Object({
9099
9279
  additionalProperties: false
9100
9280
  });
9101
9281
  //#endregion
9282
+ //#region ../tasks/src/task-types/run-eval.ts
9283
+ /**
9284
+ * `run_eval` — execute a scenario prompt under a named variant for
9285
+ * later cross-variant grading by `judge_eval_variant` (Slice 2).
9286
+ *
9287
+ * output_kind: artifact
9288
+ * criteria: optional (when set, output.verification is required —
9289
+ * producer self-assessment; the judge is the binding evaluator)
9290
+ * references: not required (scenario lives entirely in input)
9291
+ */
9292
+ var RUN_EVAL_TYPE = "run_eval";
9293
+ var RunEvalInput = Type$1.Object({
9294
+ scenario: Type$1.Object({
9295
+ prompt: Type$1.String({ minLength: 1 }),
9296
+ inputFiles: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
9297
+ }, { additionalProperties: false }),
9298
+ variantLabel: Type$1.String({
9299
+ minLength: 1,
9300
+ maxLength: 64
9301
+ }),
9302
+ context: TaskContext,
9303
+ successCriteria: Type$1.Optional(SuccessCriteria)
9304
+ }, {
9305
+ $id: "RunEvalInput",
9306
+ additionalProperties: false
9307
+ });
9308
+ var RunEvalOutput = Type$1.Object({
9309
+ response: Type$1.String({ minLength: 1 }),
9310
+ artifacts: Type$1.Optional(Type$1.Array(Type$1.Object({
9311
+ path: Type$1.String({ minLength: 1 }),
9312
+ cid: Type$1.String({ minLength: 1 })
9313
+ }, { additionalProperties: false }))),
9314
+ totalTokens: Type$1.Integer({ minimum: 0 }),
9315
+ durationMs: Type$1.Integer({ minimum: 0 }),
9316
+ traceparent: Type$1.String({ minLength: 1 }),
9317
+ verification: Type$1.Optional(VerificationRecord)
9318
+ }, {
9319
+ $id: "RunEvalOutput",
9320
+ additionalProperties: false
9321
+ });
9322
+ /**
9323
+ * Cross-field rule mirroring the `requireVerificationWhenCriteriaPresent`
9324
+ * rule used by the brief task types: when input declares
9325
+ * `successCriteria`, output MUST carry `verification`; when it doesn't,
9326
+ * output MUST NOT carry one.
9327
+ */
9328
+ function validateRunEvalOutput(output, input) {
9329
+ const hasCriteria = input !== null && input !== void 0 && input.successCriteria !== void 0;
9330
+ const hasVerification = output !== null && output !== void 0 && output.verification !== void 0;
9331
+ if (hasCriteria && !hasVerification) return "output.verification is required because input.successCriteria is set; the producer LLM must self-assess against the criteria";
9332
+ if (!hasCriteria && hasVerification) return "output.verification was supplied but input.successCriteria is unset; omit verification when there are no criteria to assess against";
9333
+ return null;
9334
+ }
9335
+ //#endregion
9102
9336
  //#region ../tasks/src/task-types/index.ts
9103
9337
  /**
9104
9338
  * Validate that a judgment-task input carries a rubric inside its
@@ -9177,6 +9411,14 @@ var BUILT_IN_TASK_TYPES = {
9177
9411
  requiresReferences: true,
9178
9412
  validateInput: validateJudgmentInput,
9179
9413
  validateOutput: validateJudgePackOutput
9414
+ },
9415
+ [RUN_EVAL_TYPE]: {
9416
+ name: RUN_EVAL_TYPE,
9417
+ inputSchema: RunEvalInput,
9418
+ outputSchema: RunEvalOutput,
9419
+ outputKind: "artifact",
9420
+ requiresReferences: false,
9421
+ validateOutput: validateRunEvalOutput
9180
9422
  }
9181
9423
  };
9182
9424
  //#endregion
@@ -9231,6 +9473,15 @@ function validateTaskOutput(taskType, output, input) {
9231
9473
  function getTaskOutputSchema(taskType) {
9232
9474
  return getTaskTypeEntry(taskType)?.outputSchema ?? null;
9233
9475
  }
9476
+ /**
9477
+ * Whether sessions running this task type should have the generic
9478
+ * `subagent` custom tool registered. Returns `false` for unknown task
9479
+ * types and for task types that didn't opt in. See `TaskTypeEntry`
9480
+ * for the design rationale.
9481
+ */
9482
+ function taskTypeUsesSubagents(taskType) {
9483
+ return getTaskTypeEntry(taskType)?.usesSubagents === true;
9484
+ }
9234
9485
  //#endregion
9235
9486
  //#region ../tasks/src/wire.ts
9236
9487
  /**
@@ -9275,6 +9526,14 @@ var ExecutorTrustLevel = Type$1.Union([
9275
9526
  Type$1.Literal("releaseVerifiedTool"),
9276
9527
  Type$1.Literal("sandboxAttested")
9277
9528
  ], { $id: "ExecutorTrustLevel" });
9529
+ /** Identifies a (provider, model) daemon pair allowed to claim a task. */
9530
+ var ExecutorRef = Type$1.Object({
9531
+ provider: Type$1.String({ minLength: 1 }),
9532
+ model: Type$1.String({ minLength: 1 })
9533
+ }, {
9534
+ $id: "ExecutorRef",
9535
+ additionalProperties: false
9536
+ });
9278
9537
  var OutputKind = Type$1.Union([Type$1.Literal("artifact"), Type$1.Literal("judgment")], { $id: "OutputKind" });
9279
9538
  var TaskMessageKind = Type$1.Union([
9280
9539
  Type$1.Literal("text_delta"),
@@ -9367,6 +9626,7 @@ Type$1.Object({
9367
9626
  imposedByHumanId: Type$1.Union([Uuid, Type$1.Null()]),
9368
9627
  acceptedAttemptN: Type$1.Union([Type$1.Number(), Type$1.Null()]),
9369
9628
  requiredExecutorTrustLevel: ExecutorTrustLevel,
9629
+ allowedExecutors: Type$1.Array(ExecutorRef, { maxItems: 16 }),
9370
9630
  status: TaskStatus,
9371
9631
  queuedAt: IsoTimestamp,
9372
9632
  completedAt: Type$1.Union([IsoTimestamp, Type$1.Null()]),
@@ -9552,7 +9812,7 @@ function buildFinalOutputBlock(opts) {
9552
9812
  //#endregion
9553
9813
  //#region ../agent-runtime/src/prompts/assess-brief.ts
9554
9814
  /**
9555
- * Build the system prompt for an `assess_brief` judge attempt.
9815
+ * Build the first user-message prompt for an `assess_brief` judge attempt.
9556
9816
  *
9557
9817
  * Design note — no pre-resolved `target` projection
9558
9818
  * --------------------------------------------------
@@ -9573,7 +9833,7 @@ function buildFinalOutputBlock(opts) {
9573
9833
  * future task types whose products are docs / configs / changes /
9574
9834
  * anything) work without any code path here.
9575
9835
  */
9576
- function buildAssessBriefPrompt(input, ctx) {
9836
+ function buildAssessBriefUserPrompt(input, ctx) {
9577
9837
  const rubric = input.successCriteria.rubric;
9578
9838
  const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
9579
9839
  const preambleSection = rubric.preamble ? [
@@ -9688,7 +9948,7 @@ function buildSelfVerificationBlock(taskId) {
9688
9948
  //#endregion
9689
9949
  //#region ../agent-runtime/src/prompts/curate-pack.ts
9690
9950
  /**
9691
- * Build the system prompt for a `curate_pack` task.
9951
+ * Build the first user-message prompt for a `curate_pack` task.
9692
9952
  *
9693
9953
  * Design note: this prompt is deliberately NOT a numbered command
9694
9954
  * sequence. The curator's value comes from judgment — inferring scope
@@ -9709,7 +9969,7 @@ function buildSelfVerificationBlock(taskId) {
9709
9969
  * emits pruned state at phase boundaries so a follow-up session can
9710
9970
  * resume without replaying the tool history.
9711
9971
  */
9712
- function buildCuratePackPrompt(input, ctx) {
9972
+ function buildCuratePackUserPrompt(input, ctx) {
9713
9973
  const { diaryId, taskPrompt, entryTypes, tagFilters, tokenBudget, recipe } = input;
9714
9974
  const entryTypesPinned = Boolean(entryTypes);
9715
9975
  const resolvedRecipe = recipe ?? "topic-focused-v1";
@@ -9845,13 +10105,13 @@ function buildCuratePackPrompt(input, ctx) {
9845
10105
  //#endregion
9846
10106
  //#region ../agent-runtime/src/prompts/fulfill-brief.ts
9847
10107
  /**
9848
- * Build the system prompt for a `fulfill_brief` task.
10108
+ * Build the first user-message prompt for a `fulfill_brief` task.
9849
10109
  *
9850
10110
  * Generalized from the original `resolve-issue` prompt. No longer
9851
10111
  * GitHub-specific; references live on `Task.references[]` and the agent
9852
10112
  * is told to inspect them itself.
9853
10113
  */
9854
- function buildFulfillBriefPrompt(input, ctx) {
10114
+ function buildFulfillBriefUserPrompt(input, ctx) {
9855
10115
  const { brief, title, acceptanceCriteria, seedFiles, scopeHint } = input;
9856
10116
  const criteriaSection = acceptanceCriteria?.length ? [
9857
10117
  "### Acceptance criteria",
@@ -9931,7 +10191,7 @@ function buildFulfillBriefPrompt(input, ctx) {
9931
10191
  }
9932
10192
  //#endregion
9933
10193
  //#region ../agent-runtime/src/prompts/judge-pack.ts
9934
- function buildJudgePackPrompt(input, ctx) {
10194
+ function buildJudgePackUserPrompt(input, ctx) {
9935
10195
  const { renderedPackId, sourcePackId, successCriteria } = input;
9936
10196
  const rubric = successCriteria.rubric;
9937
10197
  const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
@@ -10058,10 +10318,10 @@ function buildJudgePackPrompt(input, ctx) {
10058
10318
  //#endregion
10059
10319
  //#region ../agent-runtime/src/prompts/render-pack.ts
10060
10320
  /**
10061
- * Build the system prompt for a `render_pack` task. Almost mechanical:
10321
+ * Build the first user-message prompt for a `render_pack` task. Almost mechanical:
10062
10322
  * wraps `moltnet_pack_render` and emits the receipt.
10063
10323
  */
10064
- function buildRenderPackPrompt(input, ctx) {
10324
+ function buildRenderPackUserPrompt(input, ctx) {
10065
10325
  const { packId, persist = true, pinned = false } = input;
10066
10326
  return [
10067
10327
  "# Render Pack Agent",
@@ -10115,19 +10375,87 @@ function buildRenderPackPrompt(input, ctx) {
10115
10375
  ].join("\n");
10116
10376
  }
10117
10377
  //#endregion
10378
+ //#region ../agent-runtime/src/prompts/run-eval.ts
10379
+ /**
10380
+ * Build the first user-message prompt for a `run_eval` task.
10381
+ *
10382
+ * Free-form: no git workflow, no commit ceremony. The executor produces
10383
+ * a textual response (and optional file artifacts) that a later
10384
+ * `judge_eval_variant` task (Slice 2) grades against the rubric.
10385
+ *
10386
+ * Context delivery is handled by `resolveTaskContext` (see
10387
+ * libs/agent-runtime/src/context-bindings.ts) and runs BEFORE this
10388
+ * prompt is rendered: `prompt_prefix` items are concatenated ahead of
10389
+ * the body, `skill` items are persisted at the runtime's skill path,
10390
+ * and `user_inline` items are appended to the first user message. This
10391
+ * builder does NOT inline `input.context[]` itself.
10392
+ */
10393
+ function buildRunEvalUserPrompt(input, ctx) {
10394
+ const { scenario, variantLabel, successCriteria } = input;
10395
+ const inputFilesSection = scenario.inputFiles?.length ? [
10396
+ "### Input files",
10397
+ "",
10398
+ ...scenario.inputFiles.map((f) => `- \`${f}\``),
10399
+ ""
10400
+ ].join("\n") : "";
10401
+ const verificationSection = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
10402
+ const correlationSection = ctx.correlationId ? [
10403
+ "### Correlation",
10404
+ "",
10405
+ `This task carries correlationId \`${ctx.correlationId}\`. It joins`,
10406
+ "this variant to its sibling `run_eval` tasks (other variants of the",
10407
+ "same scenario) and to the eventual `judge_eval_variant` task that",
10408
+ "will grade them together. You do not need to act on it directly —",
10409
+ "it is recorded for cross-variant aggregation at query time.",
10410
+ ""
10411
+ ].join("\n") : "";
10412
+ const finalOutputBlock = buildFinalOutputBlock({
10413
+ taskType: "run_eval",
10414
+ outputSchemaName: "RunEvalOutput",
10415
+ shapeSketch: [
10416
+ "{",
10417
+ " \"response\": \"<your free-form answer>\",",
10418
+ " \"artifacts\": [{ \"path\": \"...\", \"cid\": \"...\" }], // optional",
10419
+ " \"totalTokens\": <int>,",
10420
+ " \"durationMs\": <int>,",
10421
+ " \"traceparent\": \"<from claim>\",",
10422
+ " \"verification\": <required iff input.successCriteria; see Self-verification>",
10423
+ "}"
10424
+ ].join("\n")
10425
+ });
10426
+ return [
10427
+ "# Run Eval Agent\n",
10428
+ `You are running an evaluation scenario as variant \`${variantLabel}\`.\nTask id: \`${ctx.taskId}\`\n`,
10429
+ correlationSection,
10430
+ `### Scenario\n\n${scenario.prompt}\n`,
10431
+ inputFilesSection,
10432
+ verificationSection,
10433
+ finalOutputBlock
10434
+ ].filter((s) => s !== "").join("\n");
10435
+ }
10436
+ //#endregion
10118
10437
  //#region ../agent-runtime/src/prompts/index.ts
10119
10438
  /**
10120
- * Resolve the correct prompt builder for `task.taskType` and invoke it.
10121
- * Throws if the type is unknown or the input fails TypeBox validation.
10122
- */
10123
- function buildPromptForTask(task, ctx) {
10439
+ * Resolve the correct user-prompt builder for `task.taskType` and
10440
+ * invoke it. Throws if the type is unknown or the input fails TypeBox
10441
+ * validation.
10442
+ *
10443
+ * Role note: the returned string is delivered as the **first user
10444
+ * message** of the agent's session (pi-coding-agent's
10445
+ * `session.prompt(text)` puts text in the user role). The system
10446
+ * prompt is built separately by pi from `appendSystemPrompt` (the
10447
+ * runtime instructor lives there). Builders here are free-form Markdown
10448
+ * for the user turn; they don't replace or prepend to the system
10449
+ * prompt.
10450
+ */
10451
+ function buildTaskUserPrompt(task, ctx) {
10124
10452
  switch (task.taskType) {
10125
10453
  case FULFILL_BRIEF_TYPE:
10126
10454
  if (!Value.Check(FulfillBriefInput, task.input)) {
10127
10455
  const errors = [...Value.Errors(FulfillBriefInput, task.input)];
10128
10456
  throw new Error(`fulfill_brief input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10129
10457
  }
10130
- return buildFulfillBriefPrompt(task.input, {
10458
+ return buildFulfillBriefUserPrompt(task.input, {
10131
10459
  diaryId: ctx.diaryId,
10132
10460
  taskId: ctx.taskId,
10133
10461
  correlationId: task.correlationId
@@ -10137,7 +10465,7 @@ function buildPromptForTask(task, ctx) {
10137
10465
  const errors = [...Value.Errors(AssessBriefInput, task.input)];
10138
10466
  throw new Error(`assess_brief input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10139
10467
  }
10140
- return buildAssessBriefPrompt(task.input, {
10468
+ return buildAssessBriefUserPrompt(task.input, {
10141
10469
  diaryId: ctx.diaryId,
10142
10470
  taskId: ctx.taskId
10143
10471
  });
@@ -10146,7 +10474,7 @@ function buildPromptForTask(task, ctx) {
10146
10474
  const errors = [...Value.Errors(CuratePackInput, task.input)];
10147
10475
  throw new Error(`curate_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10148
10476
  }
10149
- return buildCuratePackPrompt(task.input, {
10477
+ return buildCuratePackUserPrompt(task.input, {
10150
10478
  diaryId: ctx.diaryId,
10151
10479
  taskId: ctx.taskId
10152
10480
  });
@@ -10155,7 +10483,7 @@ function buildPromptForTask(task, ctx) {
10155
10483
  const errors = [...Value.Errors(RenderPackInput, task.input)];
10156
10484
  throw new Error(`render_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10157
10485
  }
10158
- return buildRenderPackPrompt(task.input, {
10486
+ return buildRenderPackUserPrompt(task.input, {
10159
10487
  diaryId: ctx.diaryId,
10160
10488
  taskId: ctx.taskId
10161
10489
  });
@@ -10164,10 +10492,20 @@ function buildPromptForTask(task, ctx) {
10164
10492
  const errors = [...Value.Errors(JudgePackInput, task.input)];
10165
10493
  throw new Error(`judge_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10166
10494
  }
10167
- return buildJudgePackPrompt(task.input, {
10495
+ return buildJudgePackUserPrompt(task.input, {
10168
10496
  diaryId: ctx.diaryId,
10169
10497
  taskId: ctx.taskId
10170
10498
  });
10499
+ case RUN_EVAL_TYPE:
10500
+ if (!Value.Check(RunEvalInput, task.input)) {
10501
+ const errors = [...Value.Errors(RunEvalInput, task.input)];
10502
+ throw new Error(`run_eval input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10503
+ }
10504
+ return buildRunEvalUserPrompt(task.input, {
10505
+ diaryId: ctx.diaryId,
10506
+ taskId: ctx.taskId,
10507
+ correlationId: task.correlationId
10508
+ });
10171
10509
  default: throw new Error(`No prompt builder registered for taskType="${task.taskType}"`);
10172
10510
  }
10173
10511
  }
@@ -13639,6 +13977,133 @@ var require_multistream = /* @__PURE__ */ __commonJSMin(((exports, module) => {
13639
13977
  module.exports.pino = pino;
13640
13978
  })))();
13641
13979
  //#endregion
13980
+ //#region ../agent-runtime/src/subagent-output-contracts.ts
13981
+ var REGISTRY = /* @__PURE__ */ new Map();
13982
+ /**
13983
+ * Resolve a subagent output contract by name. Returns `null` for
13984
+ * unknown names — callers (the subagent custom tool) decide whether
13985
+ * that's a tool error the parent LLM can recover from or a hard fail.
13986
+ */
13987
+ function getSubagentOutputContract(name) {
13988
+ return REGISTRY.get(name) ?? null;
13989
+ }
13990
+ /**
13991
+ * List all registered contracts. Useful for diagnostics and for the
13992
+ * subagent tool's parameter description so a parent LLM can see what
13993
+ * contracts are available without enumerating them in its prompt.
13994
+ */
13995
+ function listSubagentOutputContracts() {
13996
+ return [...REGISTRY.values()];
13997
+ }
13998
+ //#endregion
13999
+ //#region src/runtime/inject-task-context.ts
14000
+ /**
14001
+ * Slice 1.5 of #943 — wire the agent-runtime resolver into the
14002
+ * pi-extension execution path.
14003
+ *
14004
+ * `resolveTaskContext` is a pure dispatcher; this module provides the
14005
+ * Gondolin-aware deliverer and the post-resolution shape the
14006
+ * `execute-pi-task` caller needs to splice into pi's setup:
14007
+ *
14008
+ * - `systemPromptPrefix` → fed into `appendSystemPrompt` alongside
14009
+ * the runtime instructor (it IS a system-prompt fragment).
14010
+ * - `userInlineSuffix` → appended to the `buildTaskUserPrompt`
14011
+ * output BEFORE `session.prompt(text)`.
14012
+ * - `skills` → spliced into the `skillsOverride` callback's
14013
+ * return value. pi includes them in `<available_skills>` in the
14014
+ * system prompt; the agent fetches the body on demand via the
14015
+ * Read tool.
14016
+ *
14017
+ * Skill files are written into the VM at
14018
+ * `/workspace/.moltnet/skills/<slug>/SKILL.md`. The agent's
14019
+ * Gondolin-bound Read tool is scoped to `/workspace`, so that path is
14020
+ * the only location the agent can actually read at runtime. pi only
14021
+ * reads `<available_skills>` metadata (name, description, location),
14022
+ * never the file body, so we construct synthetic `Skill` objects
14023
+ * pointing at the in-VM path without ever materialising the file on
14024
+ * the host.
14025
+ */
14026
+ /**
14027
+ * Where in the VM we write skill bodies — the memory-backed mount
14028
+ * declared in `vm-manager.ts`. See the comment on
14029
+ * `GUEST_TASK_SKILLS_MOUNT` there for the full rationale (ephemeral
14030
+ * by intent + the worktree symlink interaction with Gondolin's
14031
+ * sandbox-escape protection). The agent's Gondolin Read tool accepts
14032
+ * paths under this mount via `toGuestPath` in `tool-operations.ts`.
14033
+ */
14034
+ var SKILL_ROOT_IN_VM = GUEST_TASK_SKILLS_MOUNT;
14035
+ /** Bounds borrowed from pi's skill validation; conservative caps so a
14036
+ * malformed SKILL.md doesn't bloat the system prompt. */
14037
+ var MAX_SKILL_NAME = 64;
14038
+ var MAX_SKILL_DESCRIPTION = 1024;
14039
+ /**
14040
+ * Resolve a task's `input.context[]` and inject the side effects pi
14041
+ * needs. Safe to call with an empty array — returns an inert result.
14042
+ */
14043
+ async function injectTaskContext(args) {
14044
+ const skills = [];
14045
+ const resolved = await resolveTaskContext({
14046
+ context: args.context,
14047
+ deliver: { skill: async ({ slug, content }) => {
14048
+ const dir = `${SKILL_ROOT_IN_VM}/${slug}`;
14049
+ const filePath = `${dir}/SKILL.md`;
14050
+ await args.fs.mkdir(dir, { recursive: true });
14051
+ await args.fs.writeFile(filePath, content, { mode: 420 });
14052
+ skills.push(buildSyntheticSkill({
14053
+ slug,
14054
+ content,
14055
+ filePath,
14056
+ dir
14057
+ }));
14058
+ } }
14059
+ });
14060
+ return {
14061
+ injected: resolved.injected,
14062
+ skills,
14063
+ systemPromptPrefix: resolved.systemPromptPrefix,
14064
+ userInlineSuffix: resolved.userInlineSuffix
14065
+ };
14066
+ }
14067
+ /**
14068
+ * Build a `Skill` object pi will faithfully render in
14069
+ * `<available_skills>`. We extract `name` and `description` from the
14070
+ * skill content's YAML frontmatter using pi's own `parseFrontmatter`
14071
+ * helper (proper YAML, not a regex hack) and fall back to the slug +
14072
+ * a generic description so a SKILL.md without frontmatter still
14073
+ * renders something meaningful.
14074
+ *
14075
+ * Frontmatter parsing is best-effort: a malformed YAML block is
14076
+ * optional metadata, not a reason to fail the task. We swallow parser
14077
+ * errors and fall back to the slug-derived metadata; the skill body
14078
+ * is unaffected.
14079
+ *
14080
+ * pi's `formatSkillsForPrompt` only reads `name`, `description`, and
14081
+ * `filePath` — `sourceInfo`/`baseDir` exist on the type but never
14082
+ * surface in the prompt, so a synthetic `SourceInfo` is enough.
14083
+ */
14084
+ function buildSyntheticSkill(args) {
14085
+ let fm = {};
14086
+ try {
14087
+ fm = parseFrontmatter(args.content).frontmatter;
14088
+ } catch {}
14089
+ return {
14090
+ name: clip(typeof fm.name === "string" && fm.name.trim().length > 0 ? fm.name.trim() : args.slug, MAX_SKILL_NAME),
14091
+ description: clip(typeof fm.description === "string" && fm.description.trim().length > 0 ? fm.description.trim() : `Task-injected context skill (${args.slug})`, MAX_SKILL_DESCRIPTION),
14092
+ filePath: args.filePath,
14093
+ baseDir: args.dir,
14094
+ sourceInfo: createSyntheticSourceInfo(args.filePath, {
14095
+ source: "moltnet:task-context",
14096
+ scope: "temporary",
14097
+ origin: "top-level",
14098
+ baseDir: args.dir
14099
+ }),
14100
+ disableModelInvocation: fm["disable-model-invocation"] === true
14101
+ };
14102
+ }
14103
+ function clip(s, max) {
14104
+ return s.length > max ? s.slice(0, max) : s;
14105
+ }
14106
+ //#endregion
13642
14107
  //#region src/runtime/runtime-instructor.ts
13643
14108
  /**
13644
14109
  * Build the daemon-controlled invariant prose injected into the system prompt
@@ -13724,6 +14189,190 @@ function buildRuntimeInstructor(ctx) {
13724
14189
  ].join("\n");
13725
14190
  }
13726
14191
  //#endregion
14192
+ //#region src/runtime/subagent-tool.ts
14193
+ var SUBAGENT_SUBMIT_TOOL_NAME = "submit_subagent_output";
14194
+ /**
14195
+ * Parameters shape the parent LLM sees when calling the subagent tool.
14196
+ *
14197
+ * - `task` — natural-language instructions for the subagent.
14198
+ * The parent authors this per call. Must be
14199
+ * non-empty.
14200
+ * - `output_schema` — name of a registered SubagentOutputContract.
14201
+ * Resolved at call time; unknown names error.
14202
+ */
14203
+ var SubagentToolParameters = Type$1.Object({
14204
+ task: Type$1.String({
14205
+ minLength: 1,
14206
+ description: "Natural-language instructions for the subagent. The subagent starts with a fresh conversation and a narrowed system prompt; this is the only context it has from you."
14207
+ }),
14208
+ output_schema: Type$1.String({
14209
+ minLength: 1,
14210
+ description: "Name of a registered subagent output contract. The subagent must submit a structured payload via `submit_subagent_output` matching this contract."
14211
+ })
14212
+ }, { additionalProperties: false });
14213
+ var DEFAULT_SUBAGENT_TIMEOUT_MS = 300 * 1e3;
14214
+ /**
14215
+ * Build the subagent custom tool for a parent session. The handle
14216
+ * exposes the call counter so executors can emit summary telemetry
14217
+ * when the parent terminates.
14218
+ */
14219
+ function createSubagentTool(args) {
14220
+ const buildSession = args.buildAgentSession ?? buildAgentSession;
14221
+ let callCount = 0;
14222
+ return {
14223
+ tool: defineTool({
14224
+ name: "subagent",
14225
+ label: "Delegate to subagent",
14226
+ description: subagentToolDescription(),
14227
+ parameters: SubagentToolParameters,
14228
+ async execute(_id, params) {
14229
+ if (!Value.Check(SubagentToolParameters, params)) return toolError(`subagent: invalid parameters: ${JSON.stringify([...Value.Errors(SubagentToolParameters, params)].slice(0, 3))}`);
14230
+ const { task, output_schema } = params;
14231
+ const contract = getSubagentOutputContract(output_schema);
14232
+ if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${listSubagentOutputContracts().map((c) => c.name).join(", ")}]`);
14233
+ callCount += 1;
14234
+ const callIndex = callCount;
14235
+ let captured = null;
14236
+ const submitTool = defineTool({
14237
+ name: SUBAGENT_SUBMIT_TOOL_NAME,
14238
+ label: `Submit ${output_schema}`,
14239
+ description: `Submit your structured output for this subagent task. Call exactly once when done. Args MUST match the ${output_schema} contract; mismatches return a tool error you can recover from in the same session.`,
14240
+ parameters: contract.parametersSchema,
14241
+ async execute(_innerId, innerParams) {
14242
+ if (!Value.Check(contract.parametersSchema, innerParams)) return toolError(`submit_subagent_output: schema validation failed: ${[...Value.Errors(contract.parametersSchema, innerParams)].slice(0, 3).map((e) => `${e.path}: ${e.message}`).join("; ")}. Re-call with a corrected payload.`);
14243
+ captured = innerParams;
14244
+ return {
14245
+ content: [{
14246
+ type: "text",
14247
+ text: "Output captured. Subagent session will terminate; no further action needed."
14248
+ }],
14249
+ details: { captured: true },
14250
+ terminate: true
14251
+ };
14252
+ }
14253
+ });
14254
+ const subagentInstructor = buildSubagentInstructor({
14255
+ contractName: output_schema,
14256
+ contractDescription: contract.description,
14257
+ parentTaskId: args.parentTaskId,
14258
+ callIndex
14259
+ });
14260
+ const session = await buildSession({
14261
+ mountPath: args.mountPath,
14262
+ piAuthDir: args.piAuthDir,
14263
+ modelHandle: args.modelHandle,
14264
+ agentName: args.agentName,
14265
+ customTools: [...args.inheritedCustomTools, submitTool],
14266
+ appendSystemPrompt: [args.parentRuntimeInstructor, subagentInstructor],
14267
+ skillsOverride: () => ({
14268
+ skills: [],
14269
+ diagnostics: []
14270
+ }),
14271
+ otelSpanAttrs: {
14272
+ "moltnet.task.id": args.parentTaskId,
14273
+ "moltnet.task.type": args.parentTaskType,
14274
+ "moltnet.task.attempt": args.parentAttemptN,
14275
+ "moltnet.subagent.contract": output_schema,
14276
+ "moltnet.subagent.index": callIndex
14277
+ }
14278
+ });
14279
+ let abortReason = null;
14280
+ let abortInvoked = false;
14281
+ const fireAbort = (reason) => {
14282
+ if (abortInvoked) return;
14283
+ abortInvoked = true;
14284
+ abortReason = reason;
14285
+ session.abort().catch((err) => {
14286
+ const message = err instanceof Error ? err.message : String(err);
14287
+ process.stderr.write(`[subagent] inner session.abort() failed: ${message}\n`);
14288
+ });
14289
+ };
14290
+ const cancelListener = args.parentCancelSignal ? (() => {
14291
+ const signal = args.parentCancelSignal;
14292
+ const listener = () => fireAbort("parent_cancelled");
14293
+ if (signal.aborted) listener();
14294
+ else signal.addEventListener("abort", listener, { once: true });
14295
+ return () => signal.removeEventListener("abort", listener);
14296
+ })() : null;
14297
+ const timeoutMs = args.timeoutMs === void 0 || args.timeoutMs < 0 ? DEFAULT_SUBAGENT_TIMEOUT_MS : args.timeoutMs;
14298
+ const timeoutHandle = timeoutMs > 0 ? setTimeout(() => fireAbort("subagent_timed_out"), timeoutMs) : null;
14299
+ try {
14300
+ await session.prompt(task);
14301
+ } catch (err) {
14302
+ return toolError(`subagent: inner session.prompt() threw: ${err instanceof Error ? err.message : String(err)}`);
14303
+ } finally {
14304
+ if (timeoutHandle) clearTimeout(timeoutHandle);
14305
+ if (cancelListener) cancelListener();
14306
+ }
14307
+ if (abortReason !== null) return toolError(`subagent: ${abortReason === "subagent_timed_out" ? `subagent timed out after ${timeoutMs}ms` : "parent task was cancelled"}. The parent should fail this task or retry with a clearer scope.`);
14308
+ if (captured === null) return toolError(`subagent: inner session ended without calling ${SUBAGENT_SUBMIT_TOOL_NAME}. The parent should retry with clearer instructions or fail the task.`);
14309
+ return {
14310
+ content: [{
14311
+ type: "text",
14312
+ text: JSON.stringify(captured)
14313
+ }],
14314
+ details: {
14315
+ captured: true,
14316
+ contract: output_schema,
14317
+ callIndex
14318
+ }
14319
+ };
14320
+ }
14321
+ }),
14322
+ getCallCount: () => callCount
14323
+ };
14324
+ }
14325
+ function subagentToolDescription() {
14326
+ return [
14327
+ "Delegate a sub-task to a fresh subagent session with isolated context.",
14328
+ "",
14329
+ "The subagent starts with no conversation history and only the `task` ",
14330
+ "string you provide as its instructions. It runs in the same VM with ",
14331
+ "the same tools you have (Gondolin-routed Read/Write/Edit/Bash, ",
14332
+ "moltnet_* tools), and is expected to call ",
14333
+ `\`${SUBAGENT_SUBMIT_TOOL_NAME}\` with a payload matching the named `,
14334
+ "contract before its session ends.",
14335
+ "",
14336
+ "On success, the tool result is the JSON-stringified subagent payload.",
14337
+ "On failure (unknown contract, validation error, subagent did not ",
14338
+ "submit) the tool returns isError:true with a recoverable message."
14339
+ ].join("\n");
14340
+ }
14341
+ function buildSubagentInstructor(args) {
14342
+ return [
14343
+ "# You are a subagent",
14344
+ "",
14345
+ `Parent task: \`${args.parentTaskId}\` (subagent call #${args.callIndex}).`,
14346
+ "",
14347
+ `Your assigned output contract is \`${args.contractName}\`:`,
14348
+ `${args.contractDescription}`,
14349
+ "",
14350
+ "Rules for this session:",
14351
+ "",
14352
+ `- You MUST call \`${SUBAGENT_SUBMIT_TOOL_NAME}\` exactly once with a `,
14353
+ " payload matching the contract above. Your session terminates on ",
14354
+ " the valid call.",
14355
+ "- The parent's message above is your task. Do not invent additional ",
14356
+ " steps the parent did not request.",
14357
+ "- All MoltNet runtime invariants from the parent runtime instructor ",
14358
+ " apply (diary discipline, gh-auth pattern, etc.) IF you take any ",
14359
+ " action that would trigger them. Most subagents do not commit code ",
14360
+ " or open PRs — only do so if your task message explicitly requires it.",
14361
+ "- You do NOT have access to the `subagent` tool. Do not attempt nested ",
14362
+ " delegation; do the work yourself."
14363
+ ].join("\n");
14364
+ }
14365
+ function toolError(text) {
14366
+ return {
14367
+ content: [{
14368
+ type: "text",
14369
+ text
14370
+ }],
14371
+ details: { captured: false },
14372
+ isError: true
14373
+ };
14374
+ }
14375
+ //#endregion
13727
14376
  //#region src/runtime/task-output.ts
13728
14377
  var METER_NAME = "@themoltnet/pi-extension/task-output";
13729
14378
  var parseResultCounter = null;
@@ -13962,6 +14611,7 @@ function resolveSubmitTools(taskType, opts = {}) {
13962
14611
  * Anthropic-SDK one) plug in via the `executeTask` function injected into
13963
14612
  * `AgentRuntime`.
13964
14613
  */
14614
+ var noopTurnEventHandler = () => {};
13965
14615
  /**
13966
14616
  * Factory that builds a pi-specific `executeTask` function suitable for
13967
14617
  * injection into `AgentRuntime`. The returned function caches the resolved
@@ -14034,6 +14684,7 @@ async function executePiTask(claimedTask, reporter, opts) {
14034
14684
  const taskTeamId = task.teamId ?? "";
14035
14685
  let reporterOpen = false;
14036
14686
  let session = null;
14687
+ let subagentHandle = null;
14037
14688
  const finalUsage = emptyUsage(opts.provider, opts.model);
14038
14689
  let cancelListener = null;
14039
14690
  const makeFailedOutput = (code, message, usage = finalUsage) => ({
@@ -14058,10 +14709,25 @@ async function executePiTask(claimedTask, reporter, opts) {
14058
14709
  attemptN
14059
14710
  });
14060
14711
  reporterOpen = true;
14061
- const emit = (kind, payload) => reporter.record({
14062
- kind,
14063
- payload
14064
- });
14712
+ let onTurnEvent;
14713
+ if (opts.makeOnTurnEvent) try {
14714
+ onTurnEvent = opts.makeOnTurnEvent(claimedTask);
14715
+ } catch (err) {
14716
+ process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
14717
+ onTurnEvent = noopTurnEventHandler;
14718
+ }
14719
+ else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
14720
+ const emit = (kind, payload) => {
14721
+ try {
14722
+ onTurnEvent(kind, summarizePayloadForLog(kind, payload));
14723
+ } catch (err) {
14724
+ process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
14725
+ }
14726
+ return reporter.record({
14727
+ kind,
14728
+ payload
14729
+ });
14730
+ };
14065
14731
  await emit("info", {
14066
14732
  event: "execute_start",
14067
14733
  taskType: task.taskType,
@@ -14071,7 +14737,7 @@ async function executePiTask(claimedTask, reporter, opts) {
14071
14737
  });
14072
14738
  let taskPrompt;
14073
14739
  try {
14074
- taskPrompt = buildPromptForTask(task, {
14740
+ taskPrompt = buildTaskUserPrompt(task, {
14075
14741
  diaryId,
14076
14742
  taskId: task.id,
14077
14743
  extras: opts.promptExtras
@@ -14084,6 +14750,30 @@ async function executePiTask(claimedTask, reporter, opts) {
14084
14750
  });
14085
14751
  return makeFailedOutput("prompt_build_failed", message);
14086
14752
  }
14753
+ const rawContext = task.input.context;
14754
+ let injectedContext;
14755
+ try {
14756
+ const contextArray = rawContext === void 0 ? [] : rawContext;
14757
+ if (!Value.Check(TaskContext, contextArray)) throw new Error(`task.input.context failed TaskContext validation: ${JSON.stringify([...Value.Errors(TaskContext, contextArray)].slice(0, 3))}`);
14758
+ injectedContext = await injectTaskContext({
14759
+ context: contextArray,
14760
+ fs: managed.vm.fs
14761
+ });
14762
+ } catch (err) {
14763
+ const message = err instanceof Error ? err.message : String(err);
14764
+ await emit("error", {
14765
+ message,
14766
+ phase: "context_resolution"
14767
+ });
14768
+ return makeFailedOutput("context_resolution_failed", message);
14769
+ }
14770
+ if (injectedContext.injected.length > 0) await emit("info", {
14771
+ event: "context_injected",
14772
+ count: injectedContext.injected.length,
14773
+ bindings: injectedContext.injected.map((r) => r.binding),
14774
+ slugs: injectedContext.injected.map((r) => r.slug)
14775
+ });
14776
+ if (injectedContext.userInlineSuffix) taskPrompt = `${taskPrompt}\n\n---\n\n${injectedContext.userInlineSuffix}`;
14087
14777
  const gondolinCustomTools = [
14088
14778
  createReadToolDefinition(mountPath, { operations: createGondolinReadOps(managed.vm, mountPath) }),
14089
14779
  createWriteToolDefinition(mountPath, { operations: createGondolinWriteOps(managed.vm, mountPath) }),
@@ -14112,14 +14802,6 @@ async function executePiTask(claimedTask, reporter, opts) {
14112
14802
  });
14113
14803
  const piAuthDir = process.env.PI_CODING_AGENT_DIR ?? join(homedir(), ".pi", "agent");
14114
14804
  const modelHandle = getModel(opts.provider, opts.model);
14115
- const piOtelExtension = createPiOtelExtension({
14116
- agentName: opts.agentName,
14117
- spanAttributes: {
14118
- "moltnet.task.id": task.id,
14119
- "moltnet.task.attempt": attemptN,
14120
- "moltnet.task.type": task.taskType
14121
- }
14122
- });
14123
14805
  const runtimeInstructor = buildRuntimeInstructor({
14124
14806
  taskId: task.id,
14125
14807
  taskType: task.taskType,
@@ -14128,29 +14810,47 @@ async function executePiTask(claimedTask, reporter, opts) {
14128
14810
  agentName: opts.agentName,
14129
14811
  correlationId: task.correlationId ?? null
14130
14812
  });
14131
- const resourceLoader = new DefaultResourceLoader({
14132
- cwd: mountPath,
14133
- agentDir: piAuthDir,
14134
- extensionFactories: [piOtelExtension],
14135
- appendSystemPrompt: [runtimeInstructor],
14136
- skillsOverride: () => ({
14137
- skills: [],
14138
- diagnostics: []
14139
- })
14140
- });
14141
- await resourceLoader.reload();
14142
- session = (await createAgentSession({
14143
- agentDir: piAuthDir,
14144
- cwd: mountPath,
14145
- model: modelHandle,
14813
+ const appendSystemPrompt = [runtimeInstructor];
14814
+ if (injectedContext.systemPromptPrefix) appendSystemPrompt.push(injectedContext.systemPromptPrefix);
14815
+ const injectedSkills = injectedContext.skills;
14816
+ const parentSubagentTools = [];
14817
+ if (taskTypeUsesSubagents(task.taskType)) {
14818
+ subagentHandle = createSubagentTool({
14819
+ mountPath,
14820
+ piAuthDir,
14821
+ modelHandle,
14822
+ agentName: opts.agentName,
14823
+ inheritedCustomTools: [...gondolinCustomTools, ...moltnetTools],
14824
+ parentRuntimeInstructor: runtimeInstructor,
14825
+ parentTaskId: task.id,
14826
+ parentTaskType: task.taskType,
14827
+ parentAttemptN: attemptN,
14828
+ parentCancelSignal: reporter.cancelSignal
14829
+ });
14830
+ parentSubagentTools.push(subagentHandle.tool);
14831
+ }
14832
+ session = await buildAgentSession({
14833
+ mountPath,
14834
+ piAuthDir,
14835
+ modelHandle,
14836
+ agentName: opts.agentName,
14146
14837
  customTools: [
14147
14838
  ...gondolinCustomTools,
14148
14839
  ...moltnetTools,
14149
- ...submitTools
14840
+ ...submitTools,
14841
+ ...parentSubagentTools
14150
14842
  ],
14151
- sessionManager: SessionManager.inMemory(),
14152
- resourceLoader
14153
- })).session;
14843
+ appendSystemPrompt,
14844
+ skillsOverride: () => ({
14845
+ skills: injectedSkills,
14846
+ diagnostics: []
14847
+ }),
14848
+ otelSpanAttrs: {
14849
+ "moltnet.task.id": task.id,
14850
+ "moltnet.task.attempt": attemptN,
14851
+ "moltnet.task.type": task.taskType
14852
+ }
14853
+ });
14154
14854
  } catch (err) {
14155
14855
  const message = err instanceof Error ? err.message : String(err);
14156
14856
  await emit("error", {
@@ -14221,6 +14921,10 @@ async function executePiTask(claimedTask, reporter, opts) {
14221
14921
  phase: "session_prompt"
14222
14922
  });
14223
14923
  }
14924
+ if (subagentHandle && subagentHandle.getCallCount() > 0) await emit("info", {
14925
+ event: "subagent_summary",
14926
+ callCount: subagentHandle.getCallCount()
14927
+ });
14224
14928
  await Promise.all(recordingPromise);
14225
14929
  const cancelled = reporter.cancelSignal.aborted;
14226
14930
  let parsedOutput = null;
@@ -14359,6 +15063,27 @@ function wireSessionAbort(cancelSignal, session) {
14359
15063
  * `task_messages.payload` row. Bodies above 4 KiB are replaced with a
14360
15064
  * `{ truncated, original_size }` marker so the JSONL/DB size stays bounded.
14361
15065
  */
15066
+ function summarizePayloadForLog(kind, payload) {
15067
+ switch (kind) {
15068
+ case "text_delta": {
15069
+ const delta = payload.delta;
15070
+ return { chars: typeof delta === "string" ? delta.length : 0 };
15071
+ }
15072
+ case "tool_call_start": return { tool: payload.tool_name };
15073
+ case "tool_call_end": return {
15074
+ tool: payload.tool_name,
15075
+ is_error: payload.is_error === true,
15076
+ ...payload.is_error === true && payload.result !== void 0 ? { result: payload.result } : {}
15077
+ };
15078
+ case "turn_end": return { stop_reason: payload.stop_reason };
15079
+ case "error": return {
15080
+ phase: payload.phase,
15081
+ message: typeof payload.message === "string" ? payload.message.slice(0, TRUNCATE_LIMIT) : payload.message
15082
+ };
15083
+ case "info": return Object.fromEntries(Object.entries(payload).map(([k, v]) => [k, typeof v === "string" ? v.slice(0, TRUNCATE_LIMIT) : v]));
15084
+ default: return payload;
15085
+ }
15086
+ }
14362
15087
  var TRUNCATE_LIMIT = 4 * 1024;
14363
15088
  function truncateForWire(value) {
14364
15089
  if (value === null || value === void 0) return value;
@@ -14659,4 +15384,4 @@ function moltnetExtension(pi) {
14659
15384
  registerMoltnetReflectCommand(pi, state);
14660
15385
  }
14661
15386
  //#endregion
14662
- export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resumeVm, toGuestPath };
15387
+ export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildAgentSession, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, createSubagentTool, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, injectTaskContext, loadCredentials, resumeVm, toGuestPath };