@themoltnet/pi-extension 0.13.5 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.d.ts +107 -1
  2. package/dist/index.js +632 -165
  3. package/package.json +3 -3
package/dist/index.d.ts CHANGED
@@ -3,6 +3,7 @@ import { connect } from '@themoltnet/sdk';
3
3
  import { EditOperations } from '@earendil-works/pi-coding-agent';
4
4
  import { ExtensionAPI } from '@earendil-works/pi-coding-agent';
5
5
  import { ReadOperations } from '@earendil-works/pi-coding-agent';
6
+ import { Skill } from '@earendil-works/pi-coding-agent';
6
7
  import { Static } from '@sinclair/typebox';
7
8
  import { TArray } from '@sinclair/typebox';
8
9
  import { TBoolean } from '@sinclair/typebox';
@@ -35,6 +36,31 @@ declare interface ClaimedTask {
35
36
  traceHeaders: Record<string, string>;
36
37
  }
37
38
 
39
+ /**
40
+ * One context entry. Bytes are inlined: the imposer chose them, and the
41
+ * task's `inputCid` already pins the entire input — including
42
+ * `context[]` — so we don't need a separate per-entry hash, fetcher, or
43
+ * flagged-content gate. Tasks reference rendered packs (or any other
44
+ * external content) by copying their bytes into `content` at task
45
+ * creation time.
46
+ *
47
+ * - `slug` — short identifier the daemon uses to disambiguate
48
+ * entries. For `skill` binding it becomes the directory
49
+ * name under the runtime's skill discovery path. Must be
50
+ * kebab-case-safe (alphanumeric + dashes/underscores).
51
+ * - `binding` — how the bytes are delivered to the LLM (see above).
52
+ * - `content` — the actual bytes (UTF-8 text). Capped at 32 KiB per
53
+ * entry; total per-task context bytes are bounded by the
54
+ * soft `maxItems` cap and per-binding daemon limits.
55
+ */
56
+ declare const ContextRef: TObject< {
57
+ slug: TString;
58
+ binding: TUnion<[TLiteral<"skill">, TLiteral<"prompt_prefix">, TLiteral<"user_inline">]>;
59
+ content: TString;
60
+ }>;
61
+
62
+ declare type ContextRef = Static<typeof ContextRef>;
63
+
38
64
  export declare function createGondolinBashOps(vm: VM, localCwd: string): BashOperations;
39
65
 
40
66
  export declare function createGondolinEditOps(vm: VM, localCwd: string): EditOperations;
@@ -91,7 +117,7 @@ export declare interface ExecutePiTaskOptions {
91
117
  /** Sandbox overrides (env, VFS shadows, resources). */
92
118
  sandboxConfig?: SandboxConfig;
93
119
  /**
94
- * Forwarded to `buildPromptForTask` for per-type builders. Static
120
+ * Forwarded to `buildTaskUserPrompt` for per-type builders. Static
95
121
  * across tasks. Today no built-in builder needs per-task `extras` —
96
122
  * judges fetch their own dependent data via MoltNet tools
97
123
  * (`moltnet_get_task`, `moltnet_list_task_attempts`, etc.) at run
@@ -107,6 +133,24 @@ export declare interface ExecutePiTaskOptions {
107
133
  * across tasks.
108
134
  */
109
135
  checkpointPath?: string;
136
+ /**
137
+ * Optional callback invoked alongside every `reporter.record()` so
138
+ * the daemon can mirror task messages into its local logger.
139
+ * Bound at executor-construction time — use when one task runs per
140
+ * process (e.g. `once.ts`) and per-task context is known before
141
+ * the executor is built. For poll mode, prefer `makeOnTurnEvent`
142
+ * below. If both are set, `makeOnTurnEvent` wins.
143
+ * See `TurnEventHandler` for payload shape. Defaults to a no-op.
144
+ */
145
+ onTurnEvent?: TurnEventHandler;
146
+ /**
147
+ * Per-task factory variant for `onTurnEvent`. Invoked once per
148
+ * task with the claimed task before any emit, so the returned
149
+ * handler can bind taskId / attemptN into a pino child.
150
+ * Use in poll mode where N tasks run sequentially in the same
151
+ * process. See #1078.
152
+ */
153
+ makeOnTurnEvent?: TurnEventHandlerFactory;
110
154
  }
111
155
 
112
156
  /**
@@ -121,6 +165,32 @@ export declare function findMainWorktree(): string;
121
165
  */
122
166
  export declare const HOST_EXEC_DEFAULT_BASE_ENV: ReadonlySet<string>;
123
167
 
168
+ export declare interface InjectedTaskContext {
169
+ /** Refs that were delivered, in declared order, for audit. */
170
+ injected: ContextRef[];
171
+ /** Synthetic Skill objects to splice into pi's skillsOverride. */
172
+ skills: Skill[];
173
+ /** Prepend this to `appendSystemPrompt`. Empty when nothing
174
+ * contributed (omit the array entry rather than pass an empty
175
+ * string to keep pi's prompt assembly tidy). */
176
+ systemPromptPrefix: string;
177
+ /** Append this to the task user prompt BEFORE `session.prompt()`. */
178
+ userInlineSuffix: string;
179
+ }
180
+
181
+ /**
182
+ * Resolve a task's `input.context[]` and inject the side effects pi
183
+ * needs. Safe to call with an empty array — returns an inert result.
184
+ */
185
+ export declare function injectTaskContext(args: InjectTaskContextArgs): Promise<InjectedTaskContext>;
186
+
187
+ export declare interface InjectTaskContextArgs {
188
+ /** Empty array (the default for any non-eval task) is a no-op. */
189
+ context: TaskContext;
190
+ /** Guest filesystem handle. In production this is `managed.vm.fs`. */
191
+ fs: VmFsForContext;
192
+ }
193
+
124
194
  export declare function loadCredentials(agentDir: string): VmCredentials;
125
195
 
126
196
  export declare interface ManagedVm {
@@ -264,6 +334,10 @@ declare const Task: TObject< {
264
334
  imposedByHumanId: TUnion<[TString, TNull]>;
265
335
  acceptedAttemptN: TUnion<[TNumber, TNull]>;
266
336
  requiredExecutorTrustLevel: TUnion<[TLiteral<"selfDeclared">, TLiteral<"agentSigned">, TLiteral<"releaseVerifiedTool">, TLiteral<"sandboxAttested">]>;
337
+ allowedExecutors: TArray<TObject< {
338
+ provider: TString;
339
+ model: TString;
340
+ }>>;
267
341
  status: TUnion<[TLiteral<"queued">, TLiteral<"dispatched">, TLiteral<"running">, TLiteral<"completed">, TLiteral<"failed">, TLiteral<"cancelled">, TLiteral<"expired">]>;
268
342
  queuedAt: TString;
269
343
  completedAt: TUnion<[TString, TNull]>;
@@ -278,6 +352,15 @@ declare const Task: TObject< {
278
352
 
279
353
  declare type Task = Static<typeof Task>;
280
354
 
355
+ /** Reusable input fragment for any task type. Soft cap at 5 items. */
356
+ declare const TaskContext: TArray<TObject< {
357
+ slug: TString;
358
+ binding: TUnion<[TLiteral<"skill">, TLiteral<"prompt_prefix">, TLiteral<"user_inline">]>;
359
+ content: TString;
360
+ }>>;
361
+
362
+ declare type TaskContext = Static<typeof TaskContext>;
363
+
281
364
  declare const TaskMessage: TObject< {
282
365
  taskId: TString;
283
366
  attemptN: TNumber;
@@ -410,6 +493,14 @@ declare interface TrackedError {
410
493
  timestamp: number;
411
494
  }
412
495
 
496
+ export declare interface TurnEventHandler {
497
+ (event: TurnEventKind, summary: Record<string, unknown>): void;
498
+ }
499
+
500
+ export declare type TurnEventHandlerFactory = (claimedTask: ClaimedTask) => TurnEventHandler;
501
+
502
+ export declare type TurnEventKind = Parameters<TaskReporter['record']>[0]['kind'];
503
+
413
504
  export declare interface VmConfig {
414
505
  /** Absolute path to the qcow2 checkpoint. */
415
506
  checkpointPath: string;
@@ -444,4 +535,19 @@ export declare interface VmCredentials {
444
535
  githubAppPemFilename: string | null;
445
536
  }
446
537
 
538
+ /**
539
+ * Subset of `@earendil-works/gondolin`'s `VmFs` we actually use. We
540
+ * narrow the dependency surface so unit tests can hand in a
541
+ * vitest-mocked object without instantiating a real VM. We use `any`
542
+ * for the options parameter to make this interface bivariantly
543
+ * compatible with `VmFs` (whose options types differ between
544
+ * `mkdir` and `writeFile`); the orchestrator only ever calls these
545
+ * methods with the documented option shape, so the looseness is
546
+ * confined to this seam.
547
+ */
548
+ export declare interface VmFsForContext {
549
+ mkdir: (dirPath: string, options?: any) => Promise<void>;
550
+ writeFile: (filePath: string, data: string | Uint8Array, options?: any) => Promise<void>;
551
+ }
552
+
447
553
  export { }
package/dist/index.js CHANGED
@@ -2,17 +2,17 @@ import { createRequire } from "node:module";
2
2
  import { execFileSync } from "node:child_process";
3
3
  import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, statSync } from "node:fs";
4
4
  import path, { join } from "node:path";
5
- import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createWriteTool, createWriteToolDefinition, defineTool } from "@earendil-works/pi-coding-agent";
5
+ import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createSyntheticSourceInfo, createWriteTool, createWriteToolDefinition, defineTool, parseFrontmatter } from "@earendil-works/pi-coding-agent";
6
6
  import { createHash } from "node:crypto";
7
7
  import crypto, { createHash as createHash$1 } from "crypto";
8
8
  import { readFile } from "node:fs/promises";
9
9
  import { homedir } from "node:os";
10
10
  import { Type, getModel } from "@earendil-works/pi-ai";
11
- import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
11
+ import { MemoryProvider, RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
12
12
  import { parseEnv } from "node:util";
13
13
  import { SpanStatusCode, context, metrics, trace } from "@opentelemetry/api";
14
- import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
15
14
  import { Value } from "@sinclair/typebox/value";
15
+ import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
16
16
  //#region \0rolldown/runtime.js
17
17
  var __defProp = Object.defineProperty;
18
18
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
@@ -2424,13 +2424,31 @@ function problemToError(problem, statusCode) {
2424
2424
  //#endregion
2425
2425
  //#region ../sdk/src/agent-context.ts
2426
2426
  function unwrapResult(result) {
2427
- if (result.error) {
2427
+ if (result.error !== void 0 && result.error !== null) {
2428
2428
  const error = result.error;
2429
- throw problemToError(error, error.status ?? 500);
2429
+ if (isProblemDetails(error)) throw problemToError(error, error.status);
2430
+ if (error instanceof Error && result.response === void 0) {
2431
+ const networkError = new NetworkError(error.message, { detail: error.cause ? stringifyUnknown(error.cause) : void 0 });
2432
+ networkError.stack = error.stack;
2433
+ throw networkError;
2434
+ }
2435
+ throw new MoltNetError(`Unexpected error from MoltNet API: ${stringifyUnknown(error)}`, { code: "UNKNOWN" });
2430
2436
  }
2431
2437
  if (result.data === void 0) throw new MoltNetError("Unexpected empty response from MoltNet API", { code: "EMPTY_RESPONSE" });
2432
2438
  return result.data;
2433
2439
  }
2440
+ function isProblemDetails(error) {
2441
+ if (!error || typeof error !== "object") return false;
2442
+ return typeof error.status === "number" && ("title" in error || "detail" in error);
2443
+ }
2444
+ function stringifyUnknown(value) {
2445
+ if (value instanceof Error) return `${value.name}: ${value.message}`;
2446
+ try {
2447
+ return JSON.stringify(value) ?? String(value);
2448
+ } catch {
2449
+ return String(value);
2450
+ }
2451
+ }
2434
2452
  function unwrapRequired(result, message, code) {
2435
2453
  if (result.error || !result.data) throw new MoltNetError(message, { code });
2436
2454
  return result.data;
@@ -8057,138 +8075,29 @@ function pruneOldSnapshots(maxCached, currentDir) {
8057
8075
  });
8058
8076
  }
8059
8077
  //#endregion
8060
- //#region src/tool-operations.ts
8061
- /**
8062
- * Gondolin tool operations: redirect pi's built-in tool operations
8063
- * (read, write, edit, bash) to execute inside the VM.
8064
- *
8065
- * Follows the same pattern as upstream pi-gondolin.ts — pi's tool factories
8066
- * accept an `operations` object that provides the underlying I/O.
8067
- */
8078
+ //#region src/vm-manager.ts
8068
8079
  var GUEST_WORKSPACE$2 = "/workspace";
8069
- function shQuote(s) {
8070
- return "'" + s.replace(/'/g, "'\\''") + "'";
8071
- }
8072
8080
  /**
8073
- * Map a host-side absolute path to a guest-side /workspace path.
8074
- * Throws if the path escapes the workspace.
8075
- */
8076
- function toGuestPath(localCwd, localPath) {
8077
- if (localPath === GUEST_WORKSPACE$2 || localPath.startsWith(`${GUEST_WORKSPACE$2}/`)) return localPath;
8078
- const rel = path.relative(localCwd, localPath);
8079
- if (rel === "") return GUEST_WORKSPACE$2;
8080
- if (rel.startsWith("..") || path.isAbsolute(rel)) throw new Error(`path escapes workspace: ${localPath}`);
8081
- const posixRel = rel.split(path.sep).join(path.posix.sep);
8082
- return path.posix.join(GUEST_WORKSPACE$2, posixRel);
8083
- }
8084
- function createGondolinReadOps(vm, localCwd) {
8085
- return {
8086
- readFile: async (p) => {
8087
- const r = await vm.exec(["/bin/cat", toGuestPath(localCwd, p)]);
8088
- if (!r.ok) throw new Error(`cat failed (${r.exitCode}): ${r.stderr}`);
8089
- return r.stdoutBuffer;
8090
- },
8091
- access: async (p) => {
8092
- if (!(await vm.exec([
8093
- "/bin/sh",
8094
- "-lc",
8095
- `test -r ${shQuote(toGuestPath(localCwd, p))}`
8096
- ])).ok) throw new Error(`not readable: ${p}`);
8097
- },
8098
- detectImageMimeType: async (p) => {
8099
- try {
8100
- const r = await vm.exec([
8101
- "/bin/sh",
8102
- "-lc",
8103
- `file --mime-type -b ${shQuote(toGuestPath(localCwd, p))}`
8104
- ]);
8105
- if (!r.ok) return null;
8106
- const m = r.stdout.trim();
8107
- return [
8108
- "image/jpeg",
8109
- "image/png",
8110
- "image/gif",
8111
- "image/webp"
8112
- ].includes(m) ? m : null;
8113
- } catch {
8114
- return null;
8115
- }
8116
- }
8117
- };
8118
- }
8119
- function createGondolinWriteOps(vm, localCwd) {
8120
- return {
8121
- writeFile: async (p, content) => {
8122
- const guestPath = toGuestPath(localCwd, p);
8123
- const dir = path.posix.dirname(guestPath);
8124
- const b64 = Buffer.from(content, "utf8").toString("base64");
8125
- const r = await vm.exec([
8126
- "/bin/sh",
8127
- "-lc",
8128
- [
8129
- "set -eu",
8130
- `mkdir -p ${shQuote(dir)}`,
8131
- `echo ${shQuote(b64)} | base64 -d > ${shQuote(guestPath)}`
8132
- ].join("\n")
8133
- ]);
8134
- if (!r.ok) throw new Error(`write failed (${r.exitCode}): ${r.stderr}`);
8135
- },
8136
- mkdir: async (dir) => {
8137
- const r = await vm.exec([
8138
- "/bin/mkdir",
8139
- "-p",
8140
- toGuestPath(localCwd, dir)
8141
- ]);
8142
- if (!r.ok) throw new Error(`mkdir failed (${r.exitCode}): ${r.stderr}`);
8143
- }
8144
- };
8145
- }
8146
- function createGondolinEditOps(vm, localCwd) {
8147
- const r = createGondolinReadOps(vm, localCwd);
8148
- const w = createGondolinWriteOps(vm, localCwd);
8149
- return {
8150
- readFile: r.readFile,
8151
- access: r.access,
8152
- writeFile: w.writeFile
8153
- };
8154
- }
8155
- function createGondolinBashOps(vm, localCwd) {
8156
- return { exec: async (command, cwd, { onData, signal, timeout, env }) => {
8157
- const guestCwd = toGuestPath(localCwd, cwd);
8158
- const ac = new AbortController();
8159
- const onAbort = () => ac.abort();
8160
- signal?.addEventListener("abort", onAbort, { once: true });
8161
- let timedOut = false;
8162
- const timer = timeout && timeout > 0 ? setTimeout(() => {
8163
- timedOut = true;
8164
- ac.abort();
8165
- }, timeout * 1e3) : void 0;
8166
- try {
8167
- const proc = vm.exec([
8168
- "/bin/sh",
8169
- "-lc",
8170
- command
8171
- ], {
8172
- cwd: guestCwd,
8173
- signal: ac.signal,
8174
- stdout: "pipe",
8175
- stderr: "pipe"
8176
- });
8177
- for await (const chunk of proc.output()) onData(typeof chunk.data === "string" ? Buffer.from(chunk.data, "utf8") : chunk.data);
8178
- return { exitCode: (await proc).exitCode };
8179
- } catch (err) {
8180
- if (signal?.aborted) throw new Error("aborted");
8181
- if (timedOut) throw new Error(`timeout:${timeout}`);
8182
- throw err;
8183
- } finally {
8184
- if (timer) clearTimeout(timer);
8185
- signal?.removeEventListener("abort", onAbort);
8186
- }
8187
- } };
8188
- }
8189
- //#endregion
8190
- //#region src/vm-manager.ts
8191
- var GUEST_WORKSPACE$1 = "/workspace";
8081
+ * Memory-backed VFS mount used by the daemon to inject task-context
8082
+ * skills (#943 slice 1.5). Sibling of /workspace, NOT a sub-path —
8083
+ * Gondolin mounts can't nest. The agent's Gondolin-bound Read tool
8084
+ * accepts paths under this prefix (see toGuestPath in tool-operations.ts).
8085
+ *
8086
+ * Why MemoryProvider rather than a path under /workspace:
8087
+ * - Injected skills are ephemeral by intent: per-task-attempt input
8088
+ * scoped to the VM lifetime. MemoryProvider models that exactly
8089
+ * in-memory, per-VM-instance, zero host artefacts, automatic
8090
+ * cleanup on VM close.
8091
+ * - Writing under /workspace fails in worktrees because we symlink
8092
+ * `.moltnet/` to the main repo (so credentials are reachable from
8093
+ * worktrees), and Gondolin's RealFSProvider correctly refuses to
8094
+ * create paths whose ancestors' realpath escapes the mount root.
8095
+ * That refusal is a deliberate sandbox-escape protection, not a
8096
+ * bug. See diary semantic entry cd27d9d3-efdc-4aec-ac0d-5fd8ce258d1f
8097
+ * and episodic 7affbfeb-18a2-4963-aeac-c177eb2afa2d for the full
8098
+ * investigation and the alternatives we rejected.
8099
+ */
8100
+ var GUEST_TASK_SKILLS_MOUNT = "/moltnet-task-skills";
8192
8101
  /**
8193
8102
  * Resolve the main worktree root (where .moltnet/ lives — it's untracked,
8194
8103
  * only exists in the main worktree, not in git worktrees).
@@ -8317,7 +8226,10 @@ async function resumeVm(config) {
8317
8226
  env: vmEnv,
8318
8227
  ...resources?.memory && { memory: resources.memory },
8319
8228
  ...resources?.cpus && { cpus: resources.cpus },
8320
- vfs: { mounts: { [GUEST_WORKSPACE$1]: workspaceProvider } }
8229
+ vfs: { mounts: {
8230
+ [GUEST_WORKSPACE$2]: workspaceProvider,
8231
+ [GUEST_TASK_SKILLS_MOUNT]: new MemoryProvider()
8232
+ } }
8321
8233
  });
8322
8234
  await vm.exec(`sh -c '
8323
8235
  cp /etc/gondolin/mitm/ca.crt /usr/local/share/ca-certificates/gondolin-mitm.crt
@@ -8347,7 +8259,7 @@ nameserver 1.1.1.1" > /etc/resolv.conf'`);
8347
8259
  vm,
8348
8260
  credentials: creds,
8349
8261
  mountPath: config.mountPath,
8350
- guestWorkspace: GUEST_WORKSPACE$1,
8262
+ guestWorkspace: GUEST_WORKSPACE$2,
8351
8263
  agentDir
8352
8264
  };
8353
8265
  }
@@ -8400,6 +8312,137 @@ function ensureRelativeWorktreePaths(gitconfig) {
8400
8312
  return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
8401
8313
  }
8402
8314
  //#endregion
8315
+ //#region src/tool-operations.ts
8316
+ /**
8317
+ * Gondolin tool operations: redirect pi's built-in tool operations
8318
+ * (read, write, edit, bash) to execute inside the VM.
8319
+ *
8320
+ * Follows the same pattern as upstream pi-gondolin.ts — pi's tool factories
8321
+ * accept an `operations` object that provides the underlying I/O.
8322
+ */
8323
+ var GUEST_WORKSPACE$1 = "/workspace";
8324
+ function shQuote(s) {
8325
+ return "'" + s.replace(/'/g, "'\\''") + "'";
8326
+ }
8327
+ /**
8328
+ * Map a host-side absolute path to a guest-side /workspace path.
8329
+ * Throws if the path escapes the workspace.
8330
+ */
8331
+ function toGuestPath(localCwd, localPath) {
8332
+ if (localPath === GUEST_WORKSPACE$1 || localPath.startsWith(`${GUEST_WORKSPACE$1}/`)) return localPath;
8333
+ if (localPath === "/moltnet-task-skills" || localPath.startsWith(`/moltnet-task-skills/`)) return localPath;
8334
+ const rel = path.relative(localCwd, localPath);
8335
+ if (rel === "") return GUEST_WORKSPACE$1;
8336
+ if (rel.startsWith("..") || path.isAbsolute(rel)) throw new Error(`path escapes workspace: ${localPath}`);
8337
+ const posixRel = rel.split(path.sep).join(path.posix.sep);
8338
+ return path.posix.join(GUEST_WORKSPACE$1, posixRel);
8339
+ }
8340
+ function createGondolinReadOps(vm, localCwd) {
8341
+ return {
8342
+ readFile: async (p) => {
8343
+ const r = await vm.exec(["/bin/cat", toGuestPath(localCwd, p)]);
8344
+ if (!r.ok) throw new Error(`cat failed (${r.exitCode}): ${r.stderr}`);
8345
+ return r.stdoutBuffer;
8346
+ },
8347
+ access: async (p) => {
8348
+ if (!(await vm.exec([
8349
+ "/bin/sh",
8350
+ "-lc",
8351
+ `test -r ${shQuote(toGuestPath(localCwd, p))}`
8352
+ ])).ok) throw new Error(`not readable: ${p}`);
8353
+ },
8354
+ detectImageMimeType: async (p) => {
8355
+ try {
8356
+ const r = await vm.exec([
8357
+ "/bin/sh",
8358
+ "-lc",
8359
+ `file --mime-type -b ${shQuote(toGuestPath(localCwd, p))}`
8360
+ ]);
8361
+ if (!r.ok) return null;
8362
+ const m = r.stdout.trim();
8363
+ return [
8364
+ "image/jpeg",
8365
+ "image/png",
8366
+ "image/gif",
8367
+ "image/webp"
8368
+ ].includes(m) ? m : null;
8369
+ } catch {
8370
+ return null;
8371
+ }
8372
+ }
8373
+ };
8374
+ }
8375
+ function createGondolinWriteOps(vm, localCwd) {
8376
+ return {
8377
+ writeFile: async (p, content) => {
8378
+ const guestPath = toGuestPath(localCwd, p);
8379
+ const dir = path.posix.dirname(guestPath);
8380
+ const b64 = Buffer.from(content, "utf8").toString("base64");
8381
+ const r = await vm.exec([
8382
+ "/bin/sh",
8383
+ "-lc",
8384
+ [
8385
+ "set -eu",
8386
+ `mkdir -p ${shQuote(dir)}`,
8387
+ `echo ${shQuote(b64)} | base64 -d > ${shQuote(guestPath)}`
8388
+ ].join("\n")
8389
+ ]);
8390
+ if (!r.ok) throw new Error(`write failed (${r.exitCode}): ${r.stderr}`);
8391
+ },
8392
+ mkdir: async (dir) => {
8393
+ const r = await vm.exec([
8394
+ "/bin/mkdir",
8395
+ "-p",
8396
+ toGuestPath(localCwd, dir)
8397
+ ]);
8398
+ if (!r.ok) throw new Error(`mkdir failed (${r.exitCode}): ${r.stderr}`);
8399
+ }
8400
+ };
8401
+ }
8402
+ function createGondolinEditOps(vm, localCwd) {
8403
+ const r = createGondolinReadOps(vm, localCwd);
8404
+ const w = createGondolinWriteOps(vm, localCwd);
8405
+ return {
8406
+ readFile: r.readFile,
8407
+ access: r.access,
8408
+ writeFile: w.writeFile
8409
+ };
8410
+ }
8411
+ function createGondolinBashOps(vm, localCwd) {
8412
+ return { exec: async (command, cwd, { onData, signal, timeout, env }) => {
8413
+ const guestCwd = toGuestPath(localCwd, cwd);
8414
+ const ac = new AbortController();
8415
+ const onAbort = () => ac.abort();
8416
+ signal?.addEventListener("abort", onAbort, { once: true });
8417
+ let timedOut = false;
8418
+ const timer = timeout && timeout > 0 ? setTimeout(() => {
8419
+ timedOut = true;
8420
+ ac.abort();
8421
+ }, timeout * 1e3) : void 0;
8422
+ try {
8423
+ const proc = vm.exec([
8424
+ "/bin/sh",
8425
+ "-lc",
8426
+ command
8427
+ ], {
8428
+ cwd: guestCwd,
8429
+ signal: ac.signal,
8430
+ stdout: "pipe",
8431
+ stderr: "pipe"
8432
+ });
8433
+ for await (const chunk of proc.output()) onData(typeof chunk.data === "string" ? Buffer.from(chunk.data, "utf8") : chunk.data);
8434
+ return { exitCode: (await proc).exitCode };
8435
+ } catch (err) {
8436
+ if (signal?.aborted) throw new Error("aborted");
8437
+ if (timedOut) throw new Error(`timeout:${timeout}`);
8438
+ throw err;
8439
+ } finally {
8440
+ if (timer) clearTimeout(timer);
8441
+ signal?.removeEventListener("abort", onAbort);
8442
+ }
8443
+ } };
8444
+ }
8445
+ //#endregion
8403
8446
  //#region src/otel/index.ts
8404
8447
  var TRACER_NAME = "@themoltnet/pi-extension/otel";
8405
8448
  function stripReservedAttrs(attrs) {
@@ -8537,6 +8580,61 @@ function extractUsage(message) {
8537
8580
  };
8538
8581
  }
8539
8582
  //#endregion
8583
+ //#region ../agent-runtime/src/context-bindings.ts
8584
+ var PROMPT_SEPARATOR = "\n\n---\n\n";
8585
+ /**
8586
+ * Resolve `task.input.context[]` into delivered side-effects (skills
8587
+ * persisted via `deliver.skill`) and prompt fragments
8588
+ * (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
8589
+ * built prompt.
8590
+ *
8591
+ * Per-binding semantics (V1):
8592
+ * - `skill` → `deliver.skill({ slug, content })` once per ref.
8593
+ * Slug collisions on distinct contents are
8594
+ * refused loudly.
8595
+ * - `prompt_prefix` → content appended to `systemPromptPrefix` with
8596
+ * the canonical `\n\n---\n\n` separator (in
8597
+ * declared order).
8598
+ * - `user_inline` → content appended to `userInlineSuffix` in
8599
+ * declared order, same separator.
8600
+ *
8601
+ * No fetching, no hashing — bytes are inlined in `ContextRef.content`,
8602
+ * and the task's `inputCid` already pins the entire input. The imposer
8603
+ * chose these bytes; the resolver just dispatches them.
8604
+ *
8605
+ * The function is pure with respect to its arguments: file writes are
8606
+ * confined to the injected `deliver` callback, which makes the
8607
+ * resolver trivial to test.
8608
+ */
8609
+ async function resolveTaskContext(args) {
8610
+ const promptParts = [];
8611
+ const userParts = [];
8612
+ const injected = [];
8613
+ const usedSlugs = /* @__PURE__ */ new Map();
8614
+ for (const ref of args.context) {
8615
+ if (ref.binding === "skill") {
8616
+ const prior = usedSlugs.get(ref.slug);
8617
+ if (prior !== void 0) {
8618
+ if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
8619
+ injected.push(ref);
8620
+ continue;
8621
+ }
8622
+ usedSlugs.set(ref.slug, ref.content);
8623
+ await args.deliver.skill({
8624
+ slug: ref.slug,
8625
+ content: ref.content
8626
+ });
8627
+ } else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
8628
+ else userParts.push(ref.content);
8629
+ injected.push(ref);
8630
+ }
8631
+ return {
8632
+ injected,
8633
+ systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
8634
+ userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
8635
+ };
8636
+ }
8637
+ //#endregion
8540
8638
  //#region ../tasks/src/formats.ts
8541
8639
  /**
8542
8640
  * Register TypeBox string formats used across Task / TaskOutput / task-type
@@ -8551,6 +8649,55 @@ var UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a
8551
8649
  if (!FormatRegistry.Has("uuid")) FormatRegistry.Set("uuid", (v) => UUID_RE.test(v));
8552
8650
  if (!FormatRegistry.Has("date-time")) FormatRegistry.Set("date-time", (v) => !Number.isNaN(Date.parse(v)));
8553
8651
  //#endregion
8652
+ //#region ../tasks/src/context.ts
8653
+ /**
8654
+ * How an executor delivers a context entry to its underlying LLM.
8655
+ * V1 bindings only; Tier-2 (reference_file, mcp_resource, imported_file,
8656
+ * tool_response_seed, additional_context_hook) ship in a later slice.
8657
+ */
8658
+ var ContextBinding = Type$1.Union([
8659
+ Type$1.Literal("skill"),
8660
+ Type$1.Literal("prompt_prefix"),
8661
+ Type$1.Literal("user_inline")
8662
+ ], { $id: "ContextBinding" });
8663
+ /**
8664
+ * One context entry. Bytes are inlined: the imposer chose them, and the
8665
+ * task's `inputCid` already pins the entire input — including
8666
+ * `context[]` — so we don't need a separate per-entry hash, fetcher, or
8667
+ * flagged-content gate. Tasks reference rendered packs (or any other
8668
+ * external content) by copying their bytes into `content` at task
8669
+ * creation time.
8670
+ *
8671
+ * - `slug` — short identifier the daemon uses to disambiguate
8672
+ * entries. For `skill` binding it becomes the directory
8673
+ * name under the runtime's skill discovery path. Must be
8674
+ * kebab-case-safe (alphanumeric + dashes/underscores).
8675
+ * - `binding` — how the bytes are delivered to the LLM (see above).
8676
+ * - `content` — the actual bytes (UTF-8 text). Capped at 32 KiB per
8677
+ * entry; total per-task context bytes are bounded by the
8678
+ * soft `maxItems` cap and per-binding daemon limits.
8679
+ */
8680
+ var ContextRef = Type$1.Object({
8681
+ slug: Type$1.String({
8682
+ minLength: 1,
8683
+ maxLength: 64,
8684
+ pattern: "^[a-zA-Z0-9_-]+$"
8685
+ }),
8686
+ binding: ContextBinding,
8687
+ content: Type$1.String({
8688
+ minLength: 1,
8689
+ maxLength: 32768
8690
+ })
8691
+ }, {
8692
+ $id: "ContextRef",
8693
+ additionalProperties: false
8694
+ });
8695
+ /** Reusable input fragment for any task type. Soft cap at 5 items. */
8696
+ var TaskContext = Type$1.Array(ContextRef, {
8697
+ $id: "TaskContext",
8698
+ maxItems: 5
8699
+ });
8700
+ //#endregion
8554
8701
  //#region ../tasks/src/rubric.ts
8555
8702
  /**
8556
8703
  * Rubric — structured acceptance criteria used by judgment tasks.
@@ -9099,6 +9246,60 @@ var RenderPackOutput = Type$1.Object({
9099
9246
  additionalProperties: false
9100
9247
  });
9101
9248
  //#endregion
9249
+ //#region ../tasks/src/task-types/run-eval.ts
9250
+ /**
9251
+ * `run_eval` — execute a scenario prompt under a named variant for
9252
+ * later cross-variant grading by `judge_eval_variant` (Slice 2).
9253
+ *
9254
+ * output_kind: artifact
9255
+ * criteria: optional (when set, output.verification is required —
9256
+ * producer self-assessment; the judge is the binding evaluator)
9257
+ * references: not required (scenario lives entirely in input)
9258
+ */
9259
+ var RUN_EVAL_TYPE = "run_eval";
9260
+ var RunEvalInput = Type$1.Object({
9261
+ scenario: Type$1.Object({
9262
+ prompt: Type$1.String({ minLength: 1 }),
9263
+ inputFiles: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
9264
+ }, { additionalProperties: false }),
9265
+ variantLabel: Type$1.String({
9266
+ minLength: 1,
9267
+ maxLength: 64
9268
+ }),
9269
+ context: TaskContext,
9270
+ successCriteria: Type$1.Optional(SuccessCriteria)
9271
+ }, {
9272
+ $id: "RunEvalInput",
9273
+ additionalProperties: false
9274
+ });
9275
+ var RunEvalOutput = Type$1.Object({
9276
+ response: Type$1.String({ minLength: 1 }),
9277
+ artifacts: Type$1.Optional(Type$1.Array(Type$1.Object({
9278
+ path: Type$1.String({ minLength: 1 }),
9279
+ cid: Type$1.String({ minLength: 1 })
9280
+ }, { additionalProperties: false }))),
9281
+ totalTokens: Type$1.Integer({ minimum: 0 }),
9282
+ durationMs: Type$1.Integer({ minimum: 0 }),
9283
+ traceparent: Type$1.String({ minLength: 1 }),
9284
+ verification: Type$1.Optional(VerificationRecord)
9285
+ }, {
9286
+ $id: "RunEvalOutput",
9287
+ additionalProperties: false
9288
+ });
9289
+ /**
9290
+ * Cross-field rule mirroring the `requireVerificationWhenCriteriaPresent`
9291
+ * rule used by the brief task types: when input declares
9292
+ * `successCriteria`, output MUST carry `verification`; when it doesn't,
9293
+ * output MUST NOT carry one.
9294
+ */
9295
+ function validateRunEvalOutput(output, input) {
9296
+ const hasCriteria = input !== null && input !== void 0 && input.successCriteria !== void 0;
9297
+ const hasVerification = output !== null && output !== void 0 && output.verification !== void 0;
9298
+ if (hasCriteria && !hasVerification) return "output.verification is required because input.successCriteria is set; the producer LLM must self-assess against the criteria";
9299
+ if (!hasCriteria && hasVerification) return "output.verification was supplied but input.successCriteria is unset; omit verification when there are no criteria to assess against";
9300
+ return null;
9301
+ }
9302
+ //#endregion
9102
9303
  //#region ../tasks/src/task-types/index.ts
9103
9304
  /**
9104
9305
  * Validate that a judgment-task input carries a rubric inside its
@@ -9177,6 +9378,14 @@ var BUILT_IN_TASK_TYPES = {
9177
9378
  requiresReferences: true,
9178
9379
  validateInput: validateJudgmentInput,
9179
9380
  validateOutput: validateJudgePackOutput
9381
+ },
9382
+ [RUN_EVAL_TYPE]: {
9383
+ name: RUN_EVAL_TYPE,
9384
+ inputSchema: RunEvalInput,
9385
+ outputSchema: RunEvalOutput,
9386
+ outputKind: "artifact",
9387
+ requiresReferences: false,
9388
+ validateOutput: validateRunEvalOutput
9180
9389
  }
9181
9390
  };
9182
9391
  //#endregion
@@ -9275,6 +9484,14 @@ var ExecutorTrustLevel = Type$1.Union([
9275
9484
  Type$1.Literal("releaseVerifiedTool"),
9276
9485
  Type$1.Literal("sandboxAttested")
9277
9486
  ], { $id: "ExecutorTrustLevel" });
9487
+ /** Identifies a (provider, model) daemon pair allowed to claim a task. */
9488
+ var ExecutorRef = Type$1.Object({
9489
+ provider: Type$1.String({ minLength: 1 }),
9490
+ model: Type$1.String({ minLength: 1 })
9491
+ }, {
9492
+ $id: "ExecutorRef",
9493
+ additionalProperties: false
9494
+ });
9278
9495
  var OutputKind = Type$1.Union([Type$1.Literal("artifact"), Type$1.Literal("judgment")], { $id: "OutputKind" });
9279
9496
  var TaskMessageKind = Type$1.Union([
9280
9497
  Type$1.Literal("text_delta"),
@@ -9367,6 +9584,7 @@ Type$1.Object({
9367
9584
  imposedByHumanId: Type$1.Union([Uuid, Type$1.Null()]),
9368
9585
  acceptedAttemptN: Type$1.Union([Type$1.Number(), Type$1.Null()]),
9369
9586
  requiredExecutorTrustLevel: ExecutorTrustLevel,
9587
+ allowedExecutors: Type$1.Array(ExecutorRef, { maxItems: 16 }),
9370
9588
  status: TaskStatus,
9371
9589
  queuedAt: IsoTimestamp,
9372
9590
  completedAt: Type$1.Union([IsoTimestamp, Type$1.Null()]),
@@ -9552,7 +9770,7 @@ function buildFinalOutputBlock(opts) {
9552
9770
  //#endregion
9553
9771
  //#region ../agent-runtime/src/prompts/assess-brief.ts
9554
9772
  /**
9555
- * Build the system prompt for an `assess_brief` judge attempt.
9773
+ * Build the first user-message prompt for an `assess_brief` judge attempt.
9556
9774
  *
9557
9775
  * Design note — no pre-resolved `target` projection
9558
9776
  * --------------------------------------------------
@@ -9573,7 +9791,7 @@ function buildFinalOutputBlock(opts) {
9573
9791
  * future task types whose products are docs / configs / changes /
9574
9792
  * anything) work without any code path here.
9575
9793
  */
9576
- function buildAssessBriefPrompt(input, ctx) {
9794
+ function buildAssessBriefUserPrompt(input, ctx) {
9577
9795
  const rubric = input.successCriteria.rubric;
9578
9796
  const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
9579
9797
  const preambleSection = rubric.preamble ? [
@@ -9688,7 +9906,7 @@ function buildSelfVerificationBlock(taskId) {
9688
9906
  //#endregion
9689
9907
  //#region ../agent-runtime/src/prompts/curate-pack.ts
9690
9908
  /**
9691
- * Build the system prompt for a `curate_pack` task.
9909
+ * Build the first user-message prompt for a `curate_pack` task.
9692
9910
  *
9693
9911
  * Design note: this prompt is deliberately NOT a numbered command
9694
9912
  * sequence. The curator's value comes from judgment — inferring scope
@@ -9709,7 +9927,7 @@ function buildSelfVerificationBlock(taskId) {
9709
9927
  * emits pruned state at phase boundaries so a follow-up session can
9710
9928
  * resume without replaying the tool history.
9711
9929
  */
9712
- function buildCuratePackPrompt(input, ctx) {
9930
+ function buildCuratePackUserPrompt(input, ctx) {
9713
9931
  const { diaryId, taskPrompt, entryTypes, tagFilters, tokenBudget, recipe } = input;
9714
9932
  const entryTypesPinned = Boolean(entryTypes);
9715
9933
  const resolvedRecipe = recipe ?? "topic-focused-v1";
@@ -9845,13 +10063,13 @@ function buildCuratePackPrompt(input, ctx) {
9845
10063
  //#endregion
9846
10064
  //#region ../agent-runtime/src/prompts/fulfill-brief.ts
9847
10065
  /**
9848
- * Build the system prompt for a `fulfill_brief` task.
10066
+ * Build the first user-message prompt for a `fulfill_brief` task.
9849
10067
  *
9850
10068
  * Generalized from the original `resolve-issue` prompt. No longer
9851
10069
  * GitHub-specific; references live on `Task.references[]` and the agent
9852
10070
  * is told to inspect them itself.
9853
10071
  */
9854
- function buildFulfillBriefPrompt(input, ctx) {
10072
+ function buildFulfillBriefUserPrompt(input, ctx) {
9855
10073
  const { brief, title, acceptanceCriteria, seedFiles, scopeHint } = input;
9856
10074
  const criteriaSection = acceptanceCriteria?.length ? [
9857
10075
  "### Acceptance criteria",
@@ -9931,7 +10149,7 @@ function buildFulfillBriefPrompt(input, ctx) {
9931
10149
  }
9932
10150
  //#endregion
9933
10151
  //#region ../agent-runtime/src/prompts/judge-pack.ts
9934
- function buildJudgePackPrompt(input, ctx) {
10152
+ function buildJudgePackUserPrompt(input, ctx) {
9935
10153
  const { renderedPackId, sourcePackId, successCriteria } = input;
9936
10154
  const rubric = successCriteria.rubric;
9937
10155
  const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
@@ -10058,10 +10276,10 @@ function buildJudgePackPrompt(input, ctx) {
10058
10276
  //#endregion
10059
10277
  //#region ../agent-runtime/src/prompts/render-pack.ts
10060
10278
  /**
10061
- * Build the system prompt for a `render_pack` task. Almost mechanical:
10279
+ * Build the first user-message prompt for a `render_pack` task. Almost mechanical:
10062
10280
  * wraps `moltnet_pack_render` and emits the receipt.
10063
10281
  */
10064
- function buildRenderPackPrompt(input, ctx) {
10282
+ function buildRenderPackUserPrompt(input, ctx) {
10065
10283
  const { packId, persist = true, pinned = false } = input;
10066
10284
  return [
10067
10285
  "# Render Pack Agent",
@@ -10115,19 +10333,87 @@ function buildRenderPackPrompt(input, ctx) {
10115
10333
  ].join("\n");
10116
10334
  }
10117
10335
  //#endregion
10336
+ //#region ../agent-runtime/src/prompts/run-eval.ts
10337
+ /**
10338
+ * Build the first user-message prompt for a `run_eval` task.
10339
+ *
10340
+ * Free-form: no git workflow, no commit ceremony. The executor produces
10341
+ * a textual response (and optional file artifacts) that a later
10342
+ * `judge_eval_variant` task (Slice 2) grades against the rubric.
10343
+ *
10344
+ * Context delivery is handled by `resolveTaskContext` (see
10345
+ * libs/agent-runtime/src/context-bindings.ts) and runs BEFORE this
10346
+ * prompt is rendered: `prompt_prefix` items are concatenated ahead of
10347
+ * the body, `skill` items are persisted at the runtime's skill path,
10348
+ * and `user_inline` items are appended to the first user message. This
10349
+ * builder does NOT inline `input.context[]` itself.
10350
+ */
10351
+ function buildRunEvalUserPrompt(input, ctx) {
10352
+ const { scenario, variantLabel, successCriteria } = input;
10353
+ const inputFilesSection = scenario.inputFiles?.length ? [
10354
+ "### Input files",
10355
+ "",
10356
+ ...scenario.inputFiles.map((f) => `- \`${f}\``),
10357
+ ""
10358
+ ].join("\n") : "";
10359
+ const verificationSection = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
10360
+ const correlationSection = ctx.correlationId ? [
10361
+ "### Correlation",
10362
+ "",
10363
+ `This task carries correlationId \`${ctx.correlationId}\`. It joins`,
10364
+ "this variant to its sibling `run_eval` tasks (other variants of the",
10365
+ "same scenario) and to the eventual `judge_eval_variant` task that",
10366
+ "will grade them together. You do not need to act on it directly —",
10367
+ "it is recorded for cross-variant aggregation at query time.",
10368
+ ""
10369
+ ].join("\n") : "";
10370
+ const finalOutputBlock = buildFinalOutputBlock({
10371
+ taskType: "run_eval",
10372
+ outputSchemaName: "RunEvalOutput",
10373
+ shapeSketch: [
10374
+ "{",
10375
+ " \"response\": \"<your free-form answer>\",",
10376
+ " \"artifacts\": [{ \"path\": \"...\", \"cid\": \"...\" }], // optional",
10377
+ " \"totalTokens\": <int>,",
10378
+ " \"durationMs\": <int>,",
10379
+ " \"traceparent\": \"<from claim>\",",
10380
+ " \"verification\": <required iff input.successCriteria; see Self-verification>",
10381
+ "}"
10382
+ ].join("\n")
10383
+ });
10384
+ return [
10385
+ "# Run Eval Agent\n",
10386
+ `You are running an evaluation scenario as variant \`${variantLabel}\`.\nTask id: \`${ctx.taskId}\`\n`,
10387
+ correlationSection,
10388
+ `### Scenario\n\n${scenario.prompt}\n`,
10389
+ inputFilesSection,
10390
+ verificationSection,
10391
+ finalOutputBlock
10392
+ ].filter((s) => s !== "").join("\n");
10393
+ }
10394
+ //#endregion
10118
10395
  //#region ../agent-runtime/src/prompts/index.ts
10119
10396
  /**
10120
- * Resolve the correct prompt builder for `task.taskType` and invoke it.
10121
- * Throws if the type is unknown or the input fails TypeBox validation.
10122
- */
10123
- function buildPromptForTask(task, ctx) {
10397
+ * Resolve the correct user-prompt builder for `task.taskType` and
10398
+ * invoke it. Throws if the type is unknown or the input fails TypeBox
10399
+ * validation.
10400
+ *
10401
+ * Role note: the returned string is delivered as the **first user
10402
+ * message** of the agent's session (pi-coding-agent's
10403
+ * `session.prompt(text)` puts text in the user role). The system
10404
+ * prompt is built separately by pi from `appendSystemPrompt` (the
10405
+ * runtime instructor lives there). Builders here are free-form Markdown
10406
+ * for the user turn; they don't replace or prepend to the system
10407
+ * prompt.
10408
+ */
10409
+ function buildTaskUserPrompt(task, ctx) {
10124
10410
  switch (task.taskType) {
10125
10411
  case FULFILL_BRIEF_TYPE:
10126
10412
  if (!Value.Check(FulfillBriefInput, task.input)) {
10127
10413
  const errors = [...Value.Errors(FulfillBriefInput, task.input)];
10128
10414
  throw new Error(`fulfill_brief input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10129
10415
  }
10130
- return buildFulfillBriefPrompt(task.input, {
10416
+ return buildFulfillBriefUserPrompt(task.input, {
10131
10417
  diaryId: ctx.diaryId,
10132
10418
  taskId: ctx.taskId,
10133
10419
  correlationId: task.correlationId
@@ -10137,7 +10423,7 @@ function buildPromptForTask(task, ctx) {
10137
10423
  const errors = [...Value.Errors(AssessBriefInput, task.input)];
10138
10424
  throw new Error(`assess_brief input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10139
10425
  }
10140
- return buildAssessBriefPrompt(task.input, {
10426
+ return buildAssessBriefUserPrompt(task.input, {
10141
10427
  diaryId: ctx.diaryId,
10142
10428
  taskId: ctx.taskId
10143
10429
  });
@@ -10146,7 +10432,7 @@ function buildPromptForTask(task, ctx) {
10146
10432
  const errors = [...Value.Errors(CuratePackInput, task.input)];
10147
10433
  throw new Error(`curate_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10148
10434
  }
10149
- return buildCuratePackPrompt(task.input, {
10435
+ return buildCuratePackUserPrompt(task.input, {
10150
10436
  diaryId: ctx.diaryId,
10151
10437
  taskId: ctx.taskId
10152
10438
  });
@@ -10155,7 +10441,7 @@ function buildPromptForTask(task, ctx) {
10155
10441
  const errors = [...Value.Errors(RenderPackInput, task.input)];
10156
10442
  throw new Error(`render_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10157
10443
  }
10158
- return buildRenderPackPrompt(task.input, {
10444
+ return buildRenderPackUserPrompt(task.input, {
10159
10445
  diaryId: ctx.diaryId,
10160
10446
  taskId: ctx.taskId
10161
10447
  });
@@ -10164,10 +10450,20 @@ function buildPromptForTask(task, ctx) {
10164
10450
  const errors = [...Value.Errors(JudgePackInput, task.input)];
10165
10451
  throw new Error(`judge_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10166
10452
  }
10167
- return buildJudgePackPrompt(task.input, {
10453
+ return buildJudgePackUserPrompt(task.input, {
10168
10454
  diaryId: ctx.diaryId,
10169
10455
  taskId: ctx.taskId
10170
10456
  });
10457
+ case RUN_EVAL_TYPE:
10458
+ if (!Value.Check(RunEvalInput, task.input)) {
10459
+ const errors = [...Value.Errors(RunEvalInput, task.input)];
10460
+ throw new Error(`run_eval input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
10461
+ }
10462
+ return buildRunEvalUserPrompt(task.input, {
10463
+ diaryId: ctx.diaryId,
10464
+ taskId: ctx.taskId,
10465
+ correlationId: task.correlationId
10466
+ });
10171
10467
  default: throw new Error(`No prompt builder registered for taskType="${task.taskType}"`);
10172
10468
  }
10173
10469
  }
@@ -13639,6 +13935,114 @@ var require_multistream = /* @__PURE__ */ __commonJSMin(((exports, module) => {
13639
13935
  module.exports.pino = pino;
13640
13936
  })))();
13641
13937
  //#endregion
13938
+ //#region src/runtime/inject-task-context.ts
13939
+ /**
13940
+ * Slice 1.5 of #943 — wire the agent-runtime resolver into the
13941
+ * pi-extension execution path.
13942
+ *
13943
+ * `resolveTaskContext` is a pure dispatcher; this module provides the
13944
+ * Gondolin-aware deliverer and the post-resolution shape the
13945
+ * `execute-pi-task` caller needs to splice into pi's setup:
13946
+ *
13947
+ * - `systemPromptPrefix` → fed into `appendSystemPrompt` alongside
13948
+ * the runtime instructor (it IS a system-prompt fragment).
13949
+ * - `userInlineSuffix` → appended to the `buildTaskUserPrompt`
13950
+ * output BEFORE `session.prompt(text)`.
13951
+ * - `skills` → spliced into the `skillsOverride` callback's
13952
+ * return value. pi includes them in `<available_skills>` in the
13953
+ * system prompt; the agent fetches the body on demand via the
13954
+ * Read tool.
13955
+ *
13956
+ * Skill files are written into the VM at
13957
+ * `/workspace/.moltnet/skills/<slug>/SKILL.md`. The agent's
13958
+ * Gondolin-bound Read tool is scoped to `/workspace`, so that path is
13959
+ * the only location the agent can actually read at runtime. pi only
13960
+ * reads `<available_skills>` metadata (name, description, location),
13961
+ * never the file body, so we construct synthetic `Skill` objects
13962
+ * pointing at the in-VM path without ever materialising the file on
13963
+ * the host.
13964
+ */
13965
+ /**
13966
+ * Where in the VM we write skill bodies — the memory-backed mount
13967
+ * declared in `vm-manager.ts`. See the comment on
13968
+ * `GUEST_TASK_SKILLS_MOUNT` there for the full rationale (ephemeral
13969
+ * by intent + the worktree symlink interaction with Gondolin's
13970
+ * sandbox-escape protection). The agent's Gondolin Read tool accepts
13971
+ * paths under this mount via `toGuestPath` in `tool-operations.ts`.
13972
+ */
13973
+ var SKILL_ROOT_IN_VM = GUEST_TASK_SKILLS_MOUNT;
13974
+ /** Bounds borrowed from pi's skill validation; conservative caps so a
13975
+ * malformed SKILL.md doesn't bloat the system prompt. */
13976
+ var MAX_SKILL_NAME = 64;
13977
+ var MAX_SKILL_DESCRIPTION = 1024;
13978
+ /**
13979
+ * Resolve a task's `input.context[]` and inject the side effects pi
13980
+ * needs. Safe to call with an empty array — returns an inert result.
13981
+ */
13982
+ async function injectTaskContext(args) {
13983
+ const skills = [];
13984
+ const resolved = await resolveTaskContext({
13985
+ context: args.context,
13986
+ deliver: { skill: async ({ slug, content }) => {
13987
+ const dir = `${SKILL_ROOT_IN_VM}/${slug}`;
13988
+ const filePath = `${dir}/SKILL.md`;
13989
+ await args.fs.mkdir(dir, { recursive: true });
13990
+ await args.fs.writeFile(filePath, content, { mode: 420 });
13991
+ skills.push(buildSyntheticSkill({
13992
+ slug,
13993
+ content,
13994
+ filePath,
13995
+ dir
13996
+ }));
13997
+ } }
13998
+ });
13999
+ return {
14000
+ injected: resolved.injected,
14001
+ skills,
14002
+ systemPromptPrefix: resolved.systemPromptPrefix,
14003
+ userInlineSuffix: resolved.userInlineSuffix
14004
+ };
14005
+ }
14006
+ /**
14007
+ * Build a `Skill` object pi will faithfully render in
14008
+ * `<available_skills>`. We extract `name` and `description` from the
14009
+ * skill content's YAML frontmatter using pi's own `parseFrontmatter`
14010
+ * helper (proper YAML, not a regex hack) and fall back to the slug +
14011
+ * a generic description so a SKILL.md without frontmatter still
14012
+ * renders something meaningful.
14013
+ *
14014
+ * Frontmatter parsing is best-effort: a malformed YAML block is
14015
+ * optional metadata, not a reason to fail the task. We swallow parser
14016
+ * errors and fall back to the slug-derived metadata; the skill body
14017
+ * is unaffected.
14018
+ *
14019
+ * pi's `formatSkillsForPrompt` only reads `name`, `description`, and
14020
+ * `filePath` — `sourceInfo`/`baseDir` exist on the type but never
14021
+ * surface in the prompt, so a synthetic `SourceInfo` is enough.
14022
+ */
14023
+ function buildSyntheticSkill(args) {
14024
+ let fm = {};
14025
+ try {
14026
+ fm = parseFrontmatter(args.content).frontmatter;
14027
+ } catch {}
14028
+ return {
14029
+ name: clip(typeof fm.name === "string" && fm.name.trim().length > 0 ? fm.name.trim() : args.slug, MAX_SKILL_NAME),
14030
+ description: clip(typeof fm.description === "string" && fm.description.trim().length > 0 ? fm.description.trim() : `Task-injected context skill (${args.slug})`, MAX_SKILL_DESCRIPTION),
14031
+ filePath: args.filePath,
14032
+ baseDir: args.dir,
14033
+ sourceInfo: createSyntheticSourceInfo(args.filePath, {
14034
+ source: "moltnet:task-context",
14035
+ scope: "temporary",
14036
+ origin: "top-level",
14037
+ baseDir: args.dir
14038
+ }),
14039
+ disableModelInvocation: fm["disable-model-invocation"] === true
14040
+ };
14041
+ }
14042
+ function clip(s, max) {
14043
+ return s.length > max ? s.slice(0, max) : s;
14044
+ }
14045
+ //#endregion
13642
14046
  //#region src/runtime/runtime-instructor.ts
13643
14047
  /**
13644
14048
  * Build the daemon-controlled invariant prose injected into the system prompt
@@ -13962,6 +14366,7 @@ function resolveSubmitTools(taskType, opts = {}) {
13962
14366
  * Anthropic-SDK one) plug in via the `executeTask` function injected into
13963
14367
  * `AgentRuntime`.
13964
14368
  */
14369
+ var noopTurnEventHandler = () => {};
13965
14370
  /**
13966
14371
  * Factory that builds a pi-specific `executeTask` function suitable for
13967
14372
  * injection into `AgentRuntime`. The returned function caches the resolved
@@ -14058,10 +14463,25 @@ async function executePiTask(claimedTask, reporter, opts) {
14058
14463
  attemptN
14059
14464
  });
14060
14465
  reporterOpen = true;
14061
- const emit = (kind, payload) => reporter.record({
14062
- kind,
14063
- payload
14064
- });
14466
+ let onTurnEvent;
14467
+ if (opts.makeOnTurnEvent) try {
14468
+ onTurnEvent = opts.makeOnTurnEvent(claimedTask);
14469
+ } catch (err) {
14470
+ process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
14471
+ onTurnEvent = noopTurnEventHandler;
14472
+ }
14473
+ else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
14474
+ const emit = (kind, payload) => {
14475
+ try {
14476
+ onTurnEvent(kind, summarizePayloadForLog(kind, payload));
14477
+ } catch (err) {
14478
+ process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
14479
+ }
14480
+ return reporter.record({
14481
+ kind,
14482
+ payload
14483
+ });
14484
+ };
14065
14485
  await emit("info", {
14066
14486
  event: "execute_start",
14067
14487
  taskType: task.taskType,
@@ -14071,7 +14491,7 @@ async function executePiTask(claimedTask, reporter, opts) {
14071
14491
  });
14072
14492
  let taskPrompt;
14073
14493
  try {
14074
- taskPrompt = buildPromptForTask(task, {
14494
+ taskPrompt = buildTaskUserPrompt(task, {
14075
14495
  diaryId,
14076
14496
  taskId: task.id,
14077
14497
  extras: opts.promptExtras
@@ -14084,6 +14504,30 @@ async function executePiTask(claimedTask, reporter, opts) {
14084
14504
  });
14085
14505
  return makeFailedOutput("prompt_build_failed", message);
14086
14506
  }
14507
+ const rawContext = task.input.context;
14508
+ let injectedContext;
14509
+ try {
14510
+ const contextArray = rawContext === void 0 ? [] : rawContext;
14511
+ if (!Value.Check(TaskContext, contextArray)) throw new Error(`task.input.context failed TaskContext validation: ${JSON.stringify([...Value.Errors(TaskContext, contextArray)].slice(0, 3))}`);
14512
+ injectedContext = await injectTaskContext({
14513
+ context: contextArray,
14514
+ fs: managed.vm.fs
14515
+ });
14516
+ } catch (err) {
14517
+ const message = err instanceof Error ? err.message : String(err);
14518
+ await emit("error", {
14519
+ message,
14520
+ phase: "context_resolution"
14521
+ });
14522
+ return makeFailedOutput("context_resolution_failed", message);
14523
+ }
14524
+ if (injectedContext.injected.length > 0) await emit("info", {
14525
+ event: "context_injected",
14526
+ count: injectedContext.injected.length,
14527
+ bindings: injectedContext.injected.map((r) => r.binding),
14528
+ slugs: injectedContext.injected.map((r) => r.slug)
14529
+ });
14530
+ if (injectedContext.userInlineSuffix) taskPrompt = `${taskPrompt}\n\n---\n\n${injectedContext.userInlineSuffix}`;
14087
14531
  const gondolinCustomTools = [
14088
14532
  createReadToolDefinition(mountPath, { operations: createGondolinReadOps(managed.vm, mountPath) }),
14089
14533
  createWriteToolDefinition(mountPath, { operations: createGondolinWriteOps(managed.vm, mountPath) }),
@@ -14120,21 +14564,23 @@ async function executePiTask(claimedTask, reporter, opts) {
14120
14564
  "moltnet.task.type": task.taskType
14121
14565
  }
14122
14566
  });
14123
- const runtimeInstructor = buildRuntimeInstructor({
14567
+ const appendSystemPrompt = [buildRuntimeInstructor({
14124
14568
  taskId: task.id,
14125
14569
  taskType: task.taskType,
14126
14570
  attemptN,
14127
14571
  diaryId,
14128
14572
  agentName: opts.agentName,
14129
14573
  correlationId: task.correlationId ?? null
14130
- });
14574
+ })];
14575
+ if (injectedContext.systemPromptPrefix) appendSystemPrompt.push(injectedContext.systemPromptPrefix);
14576
+ const injectedSkills = injectedContext.skills;
14131
14577
  const resourceLoader = new DefaultResourceLoader({
14132
14578
  cwd: mountPath,
14133
14579
  agentDir: piAuthDir,
14134
14580
  extensionFactories: [piOtelExtension],
14135
- appendSystemPrompt: [runtimeInstructor],
14581
+ appendSystemPrompt,
14136
14582
  skillsOverride: () => ({
14137
- skills: [],
14583
+ skills: injectedSkills,
14138
14584
  diagnostics: []
14139
14585
  })
14140
14586
  });
@@ -14359,6 +14805,27 @@ function wireSessionAbort(cancelSignal, session) {
14359
14805
  * `task_messages.payload` row. Bodies above 4 KiB are replaced with a
14360
14806
  * `{ truncated, original_size }` marker so the JSONL/DB size stays bounded.
14361
14807
  */
14808
+ function summarizePayloadForLog(kind, payload) {
14809
+ switch (kind) {
14810
+ case "text_delta": {
14811
+ const delta = payload.delta;
14812
+ return { chars: typeof delta === "string" ? delta.length : 0 };
14813
+ }
14814
+ case "tool_call_start": return { tool: payload.tool_name };
14815
+ case "tool_call_end": return {
14816
+ tool: payload.tool_name,
14817
+ is_error: payload.is_error === true,
14818
+ ...payload.is_error === true && payload.result !== void 0 ? { result: payload.result } : {}
14819
+ };
14820
+ case "turn_end": return { stop_reason: payload.stop_reason };
14821
+ case "error": return {
14822
+ phase: payload.phase,
14823
+ message: typeof payload.message === "string" ? payload.message.slice(0, TRUNCATE_LIMIT) : payload.message
14824
+ };
14825
+ case "info": return Object.fromEntries(Object.entries(payload).map(([k, v]) => [k, typeof v === "string" ? v.slice(0, TRUNCATE_LIMIT) : v]));
14826
+ default: return payload;
14827
+ }
14828
+ }
14362
14829
  var TRUNCATE_LIMIT = 4 * 1024;
14363
14830
  function truncateForWire(value) {
14364
14831
  if (value === null || value === void 0) return value;
@@ -14659,4 +15126,4 @@ function moltnetExtension(pi) {
14659
15126
  registerMoltnetReflectCommand(pi, state);
14660
15127
  }
14661
15128
  //#endregion
14662
- export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resumeVm, toGuestPath };
15129
+ export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, injectTaskContext, loadCredentials, resumeVm, toGuestPath };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@themoltnet/pi-extension",
3
- "version": "0.13.5",
3
+ "version": "0.14.0",
4
4
  "type": "module",
5
5
  "description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
6
6
  "license": "MIT",
@@ -31,8 +31,8 @@
31
31
  "@earendil-works/gondolin": "^0.9.1",
32
32
  "@opentelemetry/api": "^1.9.0",
33
33
  "@sinclair/typebox": "^0.34.0",
34
- "@themoltnet/agent-runtime": "0.11.0",
35
- "@themoltnet/sdk": "0.99.0"
34
+ "@themoltnet/agent-runtime": "0.12.0",
35
+ "@themoltnet/sdk": "0.100.0"
36
36
  },
37
37
  "peerDependencies": {
38
38
  "@earendil-works/pi-coding-agent": ">=0.74.0",