@themoltnet/pi-extension 0.13.4 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +107 -1
- package/dist/index.js +632 -165
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { connect } from '@themoltnet/sdk';
|
|
|
3
3
|
import { EditOperations } from '@earendil-works/pi-coding-agent';
|
|
4
4
|
import { ExtensionAPI } from '@earendil-works/pi-coding-agent';
|
|
5
5
|
import { ReadOperations } from '@earendil-works/pi-coding-agent';
|
|
6
|
+
import { Skill } from '@earendil-works/pi-coding-agent';
|
|
6
7
|
import { Static } from '@sinclair/typebox';
|
|
7
8
|
import { TArray } from '@sinclair/typebox';
|
|
8
9
|
import { TBoolean } from '@sinclair/typebox';
|
|
@@ -35,6 +36,31 @@ declare interface ClaimedTask {
|
|
|
35
36
|
traceHeaders: Record<string, string>;
|
|
36
37
|
}
|
|
37
38
|
|
|
39
|
+
/**
|
|
40
|
+
* One context entry. Bytes are inlined: the imposer chose them, and the
|
|
41
|
+
* task's `inputCid` already pins the entire input — including
|
|
42
|
+
* `context[]` — so we don't need a separate per-entry hash, fetcher, or
|
|
43
|
+
* flagged-content gate. Tasks reference rendered packs (or any other
|
|
44
|
+
* external content) by copying their bytes into `content` at task
|
|
45
|
+
* creation time.
|
|
46
|
+
*
|
|
47
|
+
* - `slug` — short identifier the daemon uses to disambiguate
|
|
48
|
+
* entries. For `skill` binding it becomes the directory
|
|
49
|
+
* name under the runtime's skill discovery path. Must be
|
|
50
|
+
* kebab-case-safe (alphanumeric + dashes/underscores).
|
|
51
|
+
* - `binding` — how the bytes are delivered to the LLM (see above).
|
|
52
|
+
* - `content` — the actual bytes (UTF-8 text). Capped at 32 KiB per
|
|
53
|
+
* entry; total per-task context bytes are bounded by the
|
|
54
|
+
* soft `maxItems` cap and per-binding daemon limits.
|
|
55
|
+
*/
|
|
56
|
+
declare const ContextRef: TObject< {
|
|
57
|
+
slug: TString;
|
|
58
|
+
binding: TUnion<[TLiteral<"skill">, TLiteral<"prompt_prefix">, TLiteral<"user_inline">]>;
|
|
59
|
+
content: TString;
|
|
60
|
+
}>;
|
|
61
|
+
|
|
62
|
+
declare type ContextRef = Static<typeof ContextRef>;
|
|
63
|
+
|
|
38
64
|
export declare function createGondolinBashOps(vm: VM, localCwd: string): BashOperations;
|
|
39
65
|
|
|
40
66
|
export declare function createGondolinEditOps(vm: VM, localCwd: string): EditOperations;
|
|
@@ -91,7 +117,7 @@ export declare interface ExecutePiTaskOptions {
|
|
|
91
117
|
/** Sandbox overrides (env, VFS shadows, resources). */
|
|
92
118
|
sandboxConfig?: SandboxConfig;
|
|
93
119
|
/**
|
|
94
|
-
* Forwarded to `
|
|
120
|
+
* Forwarded to `buildTaskUserPrompt` for per-type builders. Static
|
|
95
121
|
* across tasks. Today no built-in builder needs per-task `extras` —
|
|
96
122
|
* judges fetch their own dependent data via MoltNet tools
|
|
97
123
|
* (`moltnet_get_task`, `moltnet_list_task_attempts`, etc.) at run
|
|
@@ -107,6 +133,24 @@ export declare interface ExecutePiTaskOptions {
|
|
|
107
133
|
* across tasks.
|
|
108
134
|
*/
|
|
109
135
|
checkpointPath?: string;
|
|
136
|
+
/**
|
|
137
|
+
* Optional callback invoked alongside every `reporter.record()` so
|
|
138
|
+
* the daemon can mirror task messages into its local logger.
|
|
139
|
+
* Bound at executor-construction time — use when one task runs per
|
|
140
|
+
* process (e.g. `once.ts`) and per-task context is known before
|
|
141
|
+
* the executor is built. For poll mode, prefer `makeOnTurnEvent`
|
|
142
|
+
* below. If both are set, `makeOnTurnEvent` wins.
|
|
143
|
+
* See `TurnEventHandler` for payload shape. Defaults to a no-op.
|
|
144
|
+
*/
|
|
145
|
+
onTurnEvent?: TurnEventHandler;
|
|
146
|
+
/**
|
|
147
|
+
* Per-task factory variant for `onTurnEvent`. Invoked once per
|
|
148
|
+
* task with the claimed task before any emit, so the returned
|
|
149
|
+
* handler can bind taskId / attemptN into a pino child.
|
|
150
|
+
* Use in poll mode where N tasks run sequentially in the same
|
|
151
|
+
* process. See #1078.
|
|
152
|
+
*/
|
|
153
|
+
makeOnTurnEvent?: TurnEventHandlerFactory;
|
|
110
154
|
}
|
|
111
155
|
|
|
112
156
|
/**
|
|
@@ -121,6 +165,32 @@ export declare function findMainWorktree(): string;
|
|
|
121
165
|
*/
|
|
122
166
|
export declare const HOST_EXEC_DEFAULT_BASE_ENV: ReadonlySet<string>;
|
|
123
167
|
|
|
168
|
+
export declare interface InjectedTaskContext {
|
|
169
|
+
/** Refs that were delivered, in declared order, for audit. */
|
|
170
|
+
injected: ContextRef[];
|
|
171
|
+
/** Synthetic Skill objects to splice into pi's skillsOverride. */
|
|
172
|
+
skills: Skill[];
|
|
173
|
+
/** Prepend this to `appendSystemPrompt`. Empty when nothing
|
|
174
|
+
* contributed (omit the array entry rather than pass an empty
|
|
175
|
+
* string to keep pi's prompt assembly tidy). */
|
|
176
|
+
systemPromptPrefix: string;
|
|
177
|
+
/** Append this to the task user prompt BEFORE `session.prompt()`. */
|
|
178
|
+
userInlineSuffix: string;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Resolve a task's `input.context[]` and inject the side effects pi
|
|
183
|
+
* needs. Safe to call with an empty array — returns an inert result.
|
|
184
|
+
*/
|
|
185
|
+
export declare function injectTaskContext(args: InjectTaskContextArgs): Promise<InjectedTaskContext>;
|
|
186
|
+
|
|
187
|
+
export declare interface InjectTaskContextArgs {
|
|
188
|
+
/** Empty array (the default for any non-eval task) is a no-op. */
|
|
189
|
+
context: TaskContext;
|
|
190
|
+
/** Guest filesystem handle. In production this is `managed.vm.fs`. */
|
|
191
|
+
fs: VmFsForContext;
|
|
192
|
+
}
|
|
193
|
+
|
|
124
194
|
export declare function loadCredentials(agentDir: string): VmCredentials;
|
|
125
195
|
|
|
126
196
|
export declare interface ManagedVm {
|
|
@@ -264,6 +334,10 @@ declare const Task: TObject< {
|
|
|
264
334
|
imposedByHumanId: TUnion<[TString, TNull]>;
|
|
265
335
|
acceptedAttemptN: TUnion<[TNumber, TNull]>;
|
|
266
336
|
requiredExecutorTrustLevel: TUnion<[TLiteral<"selfDeclared">, TLiteral<"agentSigned">, TLiteral<"releaseVerifiedTool">, TLiteral<"sandboxAttested">]>;
|
|
337
|
+
allowedExecutors: TArray<TObject< {
|
|
338
|
+
provider: TString;
|
|
339
|
+
model: TString;
|
|
340
|
+
}>>;
|
|
267
341
|
status: TUnion<[TLiteral<"queued">, TLiteral<"dispatched">, TLiteral<"running">, TLiteral<"completed">, TLiteral<"failed">, TLiteral<"cancelled">, TLiteral<"expired">]>;
|
|
268
342
|
queuedAt: TString;
|
|
269
343
|
completedAt: TUnion<[TString, TNull]>;
|
|
@@ -278,6 +352,15 @@ declare const Task: TObject< {
|
|
|
278
352
|
|
|
279
353
|
declare type Task = Static<typeof Task>;
|
|
280
354
|
|
|
355
|
+
/** Reusable input fragment for any task type. Soft cap at 5 items. */
|
|
356
|
+
declare const TaskContext: TArray<TObject< {
|
|
357
|
+
slug: TString;
|
|
358
|
+
binding: TUnion<[TLiteral<"skill">, TLiteral<"prompt_prefix">, TLiteral<"user_inline">]>;
|
|
359
|
+
content: TString;
|
|
360
|
+
}>>;
|
|
361
|
+
|
|
362
|
+
declare type TaskContext = Static<typeof TaskContext>;
|
|
363
|
+
|
|
281
364
|
declare const TaskMessage: TObject< {
|
|
282
365
|
taskId: TString;
|
|
283
366
|
attemptN: TNumber;
|
|
@@ -410,6 +493,14 @@ declare interface TrackedError {
|
|
|
410
493
|
timestamp: number;
|
|
411
494
|
}
|
|
412
495
|
|
|
496
|
+
export declare interface TurnEventHandler {
|
|
497
|
+
(event: TurnEventKind, summary: Record<string, unknown>): void;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
export declare type TurnEventHandlerFactory = (claimedTask: ClaimedTask) => TurnEventHandler;
|
|
501
|
+
|
|
502
|
+
export declare type TurnEventKind = Parameters<TaskReporter['record']>[0]['kind'];
|
|
503
|
+
|
|
413
504
|
export declare interface VmConfig {
|
|
414
505
|
/** Absolute path to the qcow2 checkpoint. */
|
|
415
506
|
checkpointPath: string;
|
|
@@ -444,4 +535,19 @@ export declare interface VmCredentials {
|
|
|
444
535
|
githubAppPemFilename: string | null;
|
|
445
536
|
}
|
|
446
537
|
|
|
538
|
+
/**
|
|
539
|
+
* Subset of `@earendil-works/gondolin`'s `VmFs` we actually use. We
|
|
540
|
+
* narrow the dependency surface so unit tests can hand in a
|
|
541
|
+
* vitest-mocked object without instantiating a real VM. We use `any`
|
|
542
|
+
* for the options parameter to make this interface bivariantly
|
|
543
|
+
* compatible with `VmFs` (whose options types differ between
|
|
544
|
+
* `mkdir` and `writeFile`); the orchestrator only ever calls these
|
|
545
|
+
* methods with the documented option shape, so the looseness is
|
|
546
|
+
* confined to this seam.
|
|
547
|
+
*/
|
|
548
|
+
export declare interface VmFsForContext {
|
|
549
|
+
mkdir: (dirPath: string, options?: any) => Promise<void>;
|
|
550
|
+
writeFile: (filePath: string, data: string | Uint8Array, options?: any) => Promise<void>;
|
|
551
|
+
}
|
|
552
|
+
|
|
447
553
|
export { }
|
package/dist/index.js
CHANGED
|
@@ -2,17 +2,17 @@ import { createRequire } from "node:module";
|
|
|
2
2
|
import { execFileSync } from "node:child_process";
|
|
3
3
|
import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, statSync } from "node:fs";
|
|
4
4
|
import path, { join } from "node:path";
|
|
5
|
-
import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createWriteTool, createWriteToolDefinition, defineTool } from "@earendil-works/pi-coding-agent";
|
|
5
|
+
import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createSyntheticSourceInfo, createWriteTool, createWriteToolDefinition, defineTool, parseFrontmatter } from "@earendil-works/pi-coding-agent";
|
|
6
6
|
import { createHash } from "node:crypto";
|
|
7
7
|
import crypto, { createHash as createHash$1 } from "crypto";
|
|
8
8
|
import { readFile } from "node:fs/promises";
|
|
9
9
|
import { homedir } from "node:os";
|
|
10
10
|
import { Type, getModel } from "@earendil-works/pi-ai";
|
|
11
|
-
import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
|
|
11
|
+
import { MemoryProvider, RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
|
|
12
12
|
import { parseEnv } from "node:util";
|
|
13
13
|
import { SpanStatusCode, context, metrics, trace } from "@opentelemetry/api";
|
|
14
|
-
import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
|
|
15
14
|
import { Value } from "@sinclair/typebox/value";
|
|
15
|
+
import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
|
|
16
16
|
//#region \0rolldown/runtime.js
|
|
17
17
|
var __defProp = Object.defineProperty;
|
|
18
18
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
@@ -2424,13 +2424,31 @@ function problemToError(problem, statusCode) {
|
|
|
2424
2424
|
//#endregion
|
|
2425
2425
|
//#region ../sdk/src/agent-context.ts
|
|
2426
2426
|
function unwrapResult(result) {
|
|
2427
|
-
if (result.error) {
|
|
2427
|
+
if (result.error !== void 0 && result.error !== null) {
|
|
2428
2428
|
const error = result.error;
|
|
2429
|
-
throw problemToError(error, error.status
|
|
2429
|
+
if (isProblemDetails(error)) throw problemToError(error, error.status);
|
|
2430
|
+
if (error instanceof Error && result.response === void 0) {
|
|
2431
|
+
const networkError = new NetworkError(error.message, { detail: error.cause ? stringifyUnknown(error.cause) : void 0 });
|
|
2432
|
+
networkError.stack = error.stack;
|
|
2433
|
+
throw networkError;
|
|
2434
|
+
}
|
|
2435
|
+
throw new MoltNetError(`Unexpected error from MoltNet API: ${stringifyUnknown(error)}`, { code: "UNKNOWN" });
|
|
2430
2436
|
}
|
|
2431
2437
|
if (result.data === void 0) throw new MoltNetError("Unexpected empty response from MoltNet API", { code: "EMPTY_RESPONSE" });
|
|
2432
2438
|
return result.data;
|
|
2433
2439
|
}
|
|
2440
|
+
function isProblemDetails(error) {
|
|
2441
|
+
if (!error || typeof error !== "object") return false;
|
|
2442
|
+
return typeof error.status === "number" && ("title" in error || "detail" in error);
|
|
2443
|
+
}
|
|
2444
|
+
function stringifyUnknown(value) {
|
|
2445
|
+
if (value instanceof Error) return `${value.name}: ${value.message}`;
|
|
2446
|
+
try {
|
|
2447
|
+
return JSON.stringify(value) ?? String(value);
|
|
2448
|
+
} catch {
|
|
2449
|
+
return String(value);
|
|
2450
|
+
}
|
|
2451
|
+
}
|
|
2434
2452
|
function unwrapRequired(result, message, code) {
|
|
2435
2453
|
if (result.error || !result.data) throw new MoltNetError(message, { code });
|
|
2436
2454
|
return result.data;
|
|
@@ -8057,138 +8075,29 @@ function pruneOldSnapshots(maxCached, currentDir) {
|
|
|
8057
8075
|
});
|
|
8058
8076
|
}
|
|
8059
8077
|
//#endregion
|
|
8060
|
-
//#region src/
|
|
8061
|
-
/**
|
|
8062
|
-
* Gondolin tool operations: redirect pi's built-in tool operations
|
|
8063
|
-
* (read, write, edit, bash) to execute inside the VM.
|
|
8064
|
-
*
|
|
8065
|
-
* Follows the same pattern as upstream pi-gondolin.ts — pi's tool factories
|
|
8066
|
-
* accept an `operations` object that provides the underlying I/O.
|
|
8067
|
-
*/
|
|
8078
|
+
//#region src/vm-manager.ts
|
|
8068
8079
|
var GUEST_WORKSPACE$2 = "/workspace";
|
|
8069
|
-
function shQuote(s) {
|
|
8070
|
-
return "'" + s.replace(/'/g, "'\\''") + "'";
|
|
8071
|
-
}
|
|
8072
8080
|
/**
|
|
8073
|
-
*
|
|
8074
|
-
*
|
|
8075
|
-
|
|
8076
|
-
|
|
8077
|
-
|
|
8078
|
-
|
|
8079
|
-
|
|
8080
|
-
|
|
8081
|
-
|
|
8082
|
-
|
|
8083
|
-
|
|
8084
|
-
|
|
8085
|
-
|
|
8086
|
-
|
|
8087
|
-
|
|
8088
|
-
|
|
8089
|
-
|
|
8090
|
-
|
|
8091
|
-
|
|
8092
|
-
|
|
8093
|
-
"/bin/sh",
|
|
8094
|
-
"-lc",
|
|
8095
|
-
`test -r ${shQuote(toGuestPath(localCwd, p))}`
|
|
8096
|
-
])).ok) throw new Error(`not readable: ${p}`);
|
|
8097
|
-
},
|
|
8098
|
-
detectImageMimeType: async (p) => {
|
|
8099
|
-
try {
|
|
8100
|
-
const r = await vm.exec([
|
|
8101
|
-
"/bin/sh",
|
|
8102
|
-
"-lc",
|
|
8103
|
-
`file --mime-type -b ${shQuote(toGuestPath(localCwd, p))}`
|
|
8104
|
-
]);
|
|
8105
|
-
if (!r.ok) return null;
|
|
8106
|
-
const m = r.stdout.trim();
|
|
8107
|
-
return [
|
|
8108
|
-
"image/jpeg",
|
|
8109
|
-
"image/png",
|
|
8110
|
-
"image/gif",
|
|
8111
|
-
"image/webp"
|
|
8112
|
-
].includes(m) ? m : null;
|
|
8113
|
-
} catch {
|
|
8114
|
-
return null;
|
|
8115
|
-
}
|
|
8116
|
-
}
|
|
8117
|
-
};
|
|
8118
|
-
}
|
|
8119
|
-
function createGondolinWriteOps(vm, localCwd) {
|
|
8120
|
-
return {
|
|
8121
|
-
writeFile: async (p, content) => {
|
|
8122
|
-
const guestPath = toGuestPath(localCwd, p);
|
|
8123
|
-
const dir = path.posix.dirname(guestPath);
|
|
8124
|
-
const b64 = Buffer.from(content, "utf8").toString("base64");
|
|
8125
|
-
const r = await vm.exec([
|
|
8126
|
-
"/bin/sh",
|
|
8127
|
-
"-lc",
|
|
8128
|
-
[
|
|
8129
|
-
"set -eu",
|
|
8130
|
-
`mkdir -p ${shQuote(dir)}`,
|
|
8131
|
-
`echo ${shQuote(b64)} | base64 -d > ${shQuote(guestPath)}`
|
|
8132
|
-
].join("\n")
|
|
8133
|
-
]);
|
|
8134
|
-
if (!r.ok) throw new Error(`write failed (${r.exitCode}): ${r.stderr}`);
|
|
8135
|
-
},
|
|
8136
|
-
mkdir: async (dir) => {
|
|
8137
|
-
const r = await vm.exec([
|
|
8138
|
-
"/bin/mkdir",
|
|
8139
|
-
"-p",
|
|
8140
|
-
toGuestPath(localCwd, dir)
|
|
8141
|
-
]);
|
|
8142
|
-
if (!r.ok) throw new Error(`mkdir failed (${r.exitCode}): ${r.stderr}`);
|
|
8143
|
-
}
|
|
8144
|
-
};
|
|
8145
|
-
}
|
|
8146
|
-
function createGondolinEditOps(vm, localCwd) {
|
|
8147
|
-
const r = createGondolinReadOps(vm, localCwd);
|
|
8148
|
-
const w = createGondolinWriteOps(vm, localCwd);
|
|
8149
|
-
return {
|
|
8150
|
-
readFile: r.readFile,
|
|
8151
|
-
access: r.access,
|
|
8152
|
-
writeFile: w.writeFile
|
|
8153
|
-
};
|
|
8154
|
-
}
|
|
8155
|
-
function createGondolinBashOps(vm, localCwd) {
|
|
8156
|
-
return { exec: async (command, cwd, { onData, signal, timeout, env }) => {
|
|
8157
|
-
const guestCwd = toGuestPath(localCwd, cwd);
|
|
8158
|
-
const ac = new AbortController();
|
|
8159
|
-
const onAbort = () => ac.abort();
|
|
8160
|
-
signal?.addEventListener("abort", onAbort, { once: true });
|
|
8161
|
-
let timedOut = false;
|
|
8162
|
-
const timer = timeout && timeout > 0 ? setTimeout(() => {
|
|
8163
|
-
timedOut = true;
|
|
8164
|
-
ac.abort();
|
|
8165
|
-
}, timeout * 1e3) : void 0;
|
|
8166
|
-
try {
|
|
8167
|
-
const proc = vm.exec([
|
|
8168
|
-
"/bin/sh",
|
|
8169
|
-
"-lc",
|
|
8170
|
-
command
|
|
8171
|
-
], {
|
|
8172
|
-
cwd: guestCwd,
|
|
8173
|
-
signal: ac.signal,
|
|
8174
|
-
stdout: "pipe",
|
|
8175
|
-
stderr: "pipe"
|
|
8176
|
-
});
|
|
8177
|
-
for await (const chunk of proc.output()) onData(typeof chunk.data === "string" ? Buffer.from(chunk.data, "utf8") : chunk.data);
|
|
8178
|
-
return { exitCode: (await proc).exitCode };
|
|
8179
|
-
} catch (err) {
|
|
8180
|
-
if (signal?.aborted) throw new Error("aborted");
|
|
8181
|
-
if (timedOut) throw new Error(`timeout:${timeout}`);
|
|
8182
|
-
throw err;
|
|
8183
|
-
} finally {
|
|
8184
|
-
if (timer) clearTimeout(timer);
|
|
8185
|
-
signal?.removeEventListener("abort", onAbort);
|
|
8186
|
-
}
|
|
8187
|
-
} };
|
|
8188
|
-
}
|
|
8189
|
-
//#endregion
|
|
8190
|
-
//#region src/vm-manager.ts
|
|
8191
|
-
var GUEST_WORKSPACE$1 = "/workspace";
|
|
8081
|
+
* Memory-backed VFS mount used by the daemon to inject task-context
|
|
8082
|
+
* skills (#943 slice 1.5). Sibling of /workspace, NOT a sub-path —
|
|
8083
|
+
* Gondolin mounts can't nest. The agent's Gondolin-bound Read tool
|
|
8084
|
+
* accepts paths under this prefix (see toGuestPath in tool-operations.ts).
|
|
8085
|
+
*
|
|
8086
|
+
* Why MemoryProvider rather than a path under /workspace:
|
|
8087
|
+
* - Injected skills are ephemeral by intent: per-task-attempt input
|
|
8088
|
+
* scoped to the VM lifetime. MemoryProvider models that exactly —
|
|
8089
|
+
* in-memory, per-VM-instance, zero host artefacts, automatic
|
|
8090
|
+
* cleanup on VM close.
|
|
8091
|
+
* - Writing under /workspace fails in worktrees because we symlink
|
|
8092
|
+
* `.moltnet/` to the main repo (so credentials are reachable from
|
|
8093
|
+
* worktrees), and Gondolin's RealFSProvider correctly refuses to
|
|
8094
|
+
* create paths whose ancestors' realpath escapes the mount root.
|
|
8095
|
+
* That refusal is a deliberate sandbox-escape protection, not a
|
|
8096
|
+
* bug. See diary semantic entry cd27d9d3-efdc-4aec-ac0d-5fd8ce258d1f
|
|
8097
|
+
* and episodic 7affbfeb-18a2-4963-aeac-c177eb2afa2d for the full
|
|
8098
|
+
* investigation and the alternatives we rejected.
|
|
8099
|
+
*/
|
|
8100
|
+
var GUEST_TASK_SKILLS_MOUNT = "/moltnet-task-skills";
|
|
8192
8101
|
/**
|
|
8193
8102
|
* Resolve the main worktree root (where .moltnet/ lives — it's untracked,
|
|
8194
8103
|
* only exists in the main worktree, not in git worktrees).
|
|
@@ -8317,7 +8226,10 @@ async function resumeVm(config) {
|
|
|
8317
8226
|
env: vmEnv,
|
|
8318
8227
|
...resources?.memory && { memory: resources.memory },
|
|
8319
8228
|
...resources?.cpus && { cpus: resources.cpus },
|
|
8320
|
-
vfs: { mounts: {
|
|
8229
|
+
vfs: { mounts: {
|
|
8230
|
+
[GUEST_WORKSPACE$2]: workspaceProvider,
|
|
8231
|
+
[GUEST_TASK_SKILLS_MOUNT]: new MemoryProvider()
|
|
8232
|
+
} }
|
|
8321
8233
|
});
|
|
8322
8234
|
await vm.exec(`sh -c '
|
|
8323
8235
|
cp /etc/gondolin/mitm/ca.crt /usr/local/share/ca-certificates/gondolin-mitm.crt
|
|
@@ -8347,7 +8259,7 @@ nameserver 1.1.1.1" > /etc/resolv.conf'`);
|
|
|
8347
8259
|
vm,
|
|
8348
8260
|
credentials: creds,
|
|
8349
8261
|
mountPath: config.mountPath,
|
|
8350
|
-
guestWorkspace: GUEST_WORKSPACE$
|
|
8262
|
+
guestWorkspace: GUEST_WORKSPACE$2,
|
|
8351
8263
|
agentDir
|
|
8352
8264
|
};
|
|
8353
8265
|
}
|
|
@@ -8400,6 +8312,137 @@ function ensureRelativeWorktreePaths(gitconfig) {
|
|
|
8400
8312
|
return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
|
|
8401
8313
|
}
|
|
8402
8314
|
//#endregion
|
|
8315
|
+
//#region src/tool-operations.ts
|
|
8316
|
+
/**
|
|
8317
|
+
* Gondolin tool operations: redirect pi's built-in tool operations
|
|
8318
|
+
* (read, write, edit, bash) to execute inside the VM.
|
|
8319
|
+
*
|
|
8320
|
+
* Follows the same pattern as upstream pi-gondolin.ts — pi's tool factories
|
|
8321
|
+
* accept an `operations` object that provides the underlying I/O.
|
|
8322
|
+
*/
|
|
8323
|
+
var GUEST_WORKSPACE$1 = "/workspace";
|
|
8324
|
+
function shQuote(s) {
|
|
8325
|
+
return "'" + s.replace(/'/g, "'\\''") + "'";
|
|
8326
|
+
}
|
|
8327
|
+
/**
|
|
8328
|
+
* Map a host-side absolute path to a guest-side /workspace path.
|
|
8329
|
+
* Throws if the path escapes the workspace.
|
|
8330
|
+
*/
|
|
8331
|
+
function toGuestPath(localCwd, localPath) {
|
|
8332
|
+
if (localPath === GUEST_WORKSPACE$1 || localPath.startsWith(`${GUEST_WORKSPACE$1}/`)) return localPath;
|
|
8333
|
+
if (localPath === "/moltnet-task-skills" || localPath.startsWith(`/moltnet-task-skills/`)) return localPath;
|
|
8334
|
+
const rel = path.relative(localCwd, localPath);
|
|
8335
|
+
if (rel === "") return GUEST_WORKSPACE$1;
|
|
8336
|
+
if (rel.startsWith("..") || path.isAbsolute(rel)) throw new Error(`path escapes workspace: ${localPath}`);
|
|
8337
|
+
const posixRel = rel.split(path.sep).join(path.posix.sep);
|
|
8338
|
+
return path.posix.join(GUEST_WORKSPACE$1, posixRel);
|
|
8339
|
+
}
|
|
8340
|
+
function createGondolinReadOps(vm, localCwd) {
|
|
8341
|
+
return {
|
|
8342
|
+
readFile: async (p) => {
|
|
8343
|
+
const r = await vm.exec(["/bin/cat", toGuestPath(localCwd, p)]);
|
|
8344
|
+
if (!r.ok) throw new Error(`cat failed (${r.exitCode}): ${r.stderr}`);
|
|
8345
|
+
return r.stdoutBuffer;
|
|
8346
|
+
},
|
|
8347
|
+
access: async (p) => {
|
|
8348
|
+
if (!(await vm.exec([
|
|
8349
|
+
"/bin/sh",
|
|
8350
|
+
"-lc",
|
|
8351
|
+
`test -r ${shQuote(toGuestPath(localCwd, p))}`
|
|
8352
|
+
])).ok) throw new Error(`not readable: ${p}`);
|
|
8353
|
+
},
|
|
8354
|
+
detectImageMimeType: async (p) => {
|
|
8355
|
+
try {
|
|
8356
|
+
const r = await vm.exec([
|
|
8357
|
+
"/bin/sh",
|
|
8358
|
+
"-lc",
|
|
8359
|
+
`file --mime-type -b ${shQuote(toGuestPath(localCwd, p))}`
|
|
8360
|
+
]);
|
|
8361
|
+
if (!r.ok) return null;
|
|
8362
|
+
const m = r.stdout.trim();
|
|
8363
|
+
return [
|
|
8364
|
+
"image/jpeg",
|
|
8365
|
+
"image/png",
|
|
8366
|
+
"image/gif",
|
|
8367
|
+
"image/webp"
|
|
8368
|
+
].includes(m) ? m : null;
|
|
8369
|
+
} catch {
|
|
8370
|
+
return null;
|
|
8371
|
+
}
|
|
8372
|
+
}
|
|
8373
|
+
};
|
|
8374
|
+
}
|
|
8375
|
+
function createGondolinWriteOps(vm, localCwd) {
|
|
8376
|
+
return {
|
|
8377
|
+
writeFile: async (p, content) => {
|
|
8378
|
+
const guestPath = toGuestPath(localCwd, p);
|
|
8379
|
+
const dir = path.posix.dirname(guestPath);
|
|
8380
|
+
const b64 = Buffer.from(content, "utf8").toString("base64");
|
|
8381
|
+
const r = await vm.exec([
|
|
8382
|
+
"/bin/sh",
|
|
8383
|
+
"-lc",
|
|
8384
|
+
[
|
|
8385
|
+
"set -eu",
|
|
8386
|
+
`mkdir -p ${shQuote(dir)}`,
|
|
8387
|
+
`echo ${shQuote(b64)} | base64 -d > ${shQuote(guestPath)}`
|
|
8388
|
+
].join("\n")
|
|
8389
|
+
]);
|
|
8390
|
+
if (!r.ok) throw new Error(`write failed (${r.exitCode}): ${r.stderr}`);
|
|
8391
|
+
},
|
|
8392
|
+
mkdir: async (dir) => {
|
|
8393
|
+
const r = await vm.exec([
|
|
8394
|
+
"/bin/mkdir",
|
|
8395
|
+
"-p",
|
|
8396
|
+
toGuestPath(localCwd, dir)
|
|
8397
|
+
]);
|
|
8398
|
+
if (!r.ok) throw new Error(`mkdir failed (${r.exitCode}): ${r.stderr}`);
|
|
8399
|
+
}
|
|
8400
|
+
};
|
|
8401
|
+
}
|
|
8402
|
+
function createGondolinEditOps(vm, localCwd) {
|
|
8403
|
+
const r = createGondolinReadOps(vm, localCwd);
|
|
8404
|
+
const w = createGondolinWriteOps(vm, localCwd);
|
|
8405
|
+
return {
|
|
8406
|
+
readFile: r.readFile,
|
|
8407
|
+
access: r.access,
|
|
8408
|
+
writeFile: w.writeFile
|
|
8409
|
+
};
|
|
8410
|
+
}
|
|
8411
|
+
function createGondolinBashOps(vm, localCwd) {
|
|
8412
|
+
return { exec: async (command, cwd, { onData, signal, timeout, env }) => {
|
|
8413
|
+
const guestCwd = toGuestPath(localCwd, cwd);
|
|
8414
|
+
const ac = new AbortController();
|
|
8415
|
+
const onAbort = () => ac.abort();
|
|
8416
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
8417
|
+
let timedOut = false;
|
|
8418
|
+
const timer = timeout && timeout > 0 ? setTimeout(() => {
|
|
8419
|
+
timedOut = true;
|
|
8420
|
+
ac.abort();
|
|
8421
|
+
}, timeout * 1e3) : void 0;
|
|
8422
|
+
try {
|
|
8423
|
+
const proc = vm.exec([
|
|
8424
|
+
"/bin/sh",
|
|
8425
|
+
"-lc",
|
|
8426
|
+
command
|
|
8427
|
+
], {
|
|
8428
|
+
cwd: guestCwd,
|
|
8429
|
+
signal: ac.signal,
|
|
8430
|
+
stdout: "pipe",
|
|
8431
|
+
stderr: "pipe"
|
|
8432
|
+
});
|
|
8433
|
+
for await (const chunk of proc.output()) onData(typeof chunk.data === "string" ? Buffer.from(chunk.data, "utf8") : chunk.data);
|
|
8434
|
+
return { exitCode: (await proc).exitCode };
|
|
8435
|
+
} catch (err) {
|
|
8436
|
+
if (signal?.aborted) throw new Error("aborted");
|
|
8437
|
+
if (timedOut) throw new Error(`timeout:${timeout}`);
|
|
8438
|
+
throw err;
|
|
8439
|
+
} finally {
|
|
8440
|
+
if (timer) clearTimeout(timer);
|
|
8441
|
+
signal?.removeEventListener("abort", onAbort);
|
|
8442
|
+
}
|
|
8443
|
+
} };
|
|
8444
|
+
}
|
|
8445
|
+
//#endregion
|
|
8403
8446
|
//#region src/otel/index.ts
|
|
8404
8447
|
var TRACER_NAME = "@themoltnet/pi-extension/otel";
|
|
8405
8448
|
function stripReservedAttrs(attrs) {
|
|
@@ -8537,6 +8580,61 @@ function extractUsage(message) {
|
|
|
8537
8580
|
};
|
|
8538
8581
|
}
|
|
8539
8582
|
//#endregion
|
|
8583
|
+
//#region ../agent-runtime/src/context-bindings.ts
|
|
8584
|
+
var PROMPT_SEPARATOR = "\n\n---\n\n";
|
|
8585
|
+
/**
|
|
8586
|
+
* Resolve `task.input.context[]` into delivered side-effects (skills
|
|
8587
|
+
* persisted via `deliver.skill`) and prompt fragments
|
|
8588
|
+
* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
|
|
8589
|
+
* built prompt.
|
|
8590
|
+
*
|
|
8591
|
+
* Per-binding semantics (V1):
|
|
8592
|
+
* - `skill` → `deliver.skill({ slug, content })` once per ref.
|
|
8593
|
+
* Slug collisions on distinct contents are
|
|
8594
|
+
* refused loudly.
|
|
8595
|
+
* - `prompt_prefix` → content appended to `systemPromptPrefix` with
|
|
8596
|
+
* the canonical `\n\n---\n\n` separator (in
|
|
8597
|
+
* declared order).
|
|
8598
|
+
* - `user_inline` → content appended to `userInlineSuffix` in
|
|
8599
|
+
* declared order, same separator.
|
|
8600
|
+
*
|
|
8601
|
+
* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
|
|
8602
|
+
* and the task's `inputCid` already pins the entire input. The imposer
|
|
8603
|
+
* chose these bytes; the resolver just dispatches them.
|
|
8604
|
+
*
|
|
8605
|
+
* The function is pure with respect to its arguments: file writes are
|
|
8606
|
+
* confined to the injected `deliver` callback, which makes the
|
|
8607
|
+
* resolver trivial to test.
|
|
8608
|
+
*/
|
|
8609
|
+
async function resolveTaskContext(args) {
|
|
8610
|
+
const promptParts = [];
|
|
8611
|
+
const userParts = [];
|
|
8612
|
+
const injected = [];
|
|
8613
|
+
const usedSlugs = /* @__PURE__ */ new Map();
|
|
8614
|
+
for (const ref of args.context) {
|
|
8615
|
+
if (ref.binding === "skill") {
|
|
8616
|
+
const prior = usedSlugs.get(ref.slug);
|
|
8617
|
+
if (prior !== void 0) {
|
|
8618
|
+
if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
|
|
8619
|
+
injected.push(ref);
|
|
8620
|
+
continue;
|
|
8621
|
+
}
|
|
8622
|
+
usedSlugs.set(ref.slug, ref.content);
|
|
8623
|
+
await args.deliver.skill({
|
|
8624
|
+
slug: ref.slug,
|
|
8625
|
+
content: ref.content
|
|
8626
|
+
});
|
|
8627
|
+
} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
|
|
8628
|
+
else userParts.push(ref.content);
|
|
8629
|
+
injected.push(ref);
|
|
8630
|
+
}
|
|
8631
|
+
return {
|
|
8632
|
+
injected,
|
|
8633
|
+
systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
|
|
8634
|
+
userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
|
|
8635
|
+
};
|
|
8636
|
+
}
|
|
8637
|
+
//#endregion
|
|
8540
8638
|
//#region ../tasks/src/formats.ts
|
|
8541
8639
|
/**
|
|
8542
8640
|
* Register TypeBox string formats used across Task / TaskOutput / task-type
|
|
@@ -8551,6 +8649,55 @@ var UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a
|
|
|
8551
8649
|
if (!FormatRegistry.Has("uuid")) FormatRegistry.Set("uuid", (v) => UUID_RE.test(v));
|
|
8552
8650
|
if (!FormatRegistry.Has("date-time")) FormatRegistry.Set("date-time", (v) => !Number.isNaN(Date.parse(v)));
|
|
8553
8651
|
//#endregion
|
|
8652
|
+
//#region ../tasks/src/context.ts
|
|
8653
|
+
/**
|
|
8654
|
+
* How an executor delivers a context entry to its underlying LLM.
|
|
8655
|
+
* V1 bindings only; Tier-2 (reference_file, mcp_resource, imported_file,
|
|
8656
|
+
* tool_response_seed, additional_context_hook) ship in a later slice.
|
|
8657
|
+
*/
|
|
8658
|
+
var ContextBinding = Type$1.Union([
|
|
8659
|
+
Type$1.Literal("skill"),
|
|
8660
|
+
Type$1.Literal("prompt_prefix"),
|
|
8661
|
+
Type$1.Literal("user_inline")
|
|
8662
|
+
], { $id: "ContextBinding" });
|
|
8663
|
+
/**
|
|
8664
|
+
* One context entry. Bytes are inlined: the imposer chose them, and the
|
|
8665
|
+
* task's `inputCid` already pins the entire input — including
|
|
8666
|
+
* `context[]` — so we don't need a separate per-entry hash, fetcher, or
|
|
8667
|
+
* flagged-content gate. Tasks reference rendered packs (or any other
|
|
8668
|
+
* external content) by copying their bytes into `content` at task
|
|
8669
|
+
* creation time.
|
|
8670
|
+
*
|
|
8671
|
+
* - `slug` — short identifier the daemon uses to disambiguate
|
|
8672
|
+
* entries. For `skill` binding it becomes the directory
|
|
8673
|
+
* name under the runtime's skill discovery path. Must be
|
|
8674
|
+
* kebab-case-safe (alphanumeric + dashes/underscores).
|
|
8675
|
+
* - `binding` — how the bytes are delivered to the LLM (see above).
|
|
8676
|
+
* - `content` — the actual bytes (UTF-8 text). Capped at 32 KiB per
|
|
8677
|
+
* entry; total per-task context bytes are bounded by the
|
|
8678
|
+
* soft `maxItems` cap and per-binding daemon limits.
|
|
8679
|
+
*/
|
|
8680
|
+
var ContextRef = Type$1.Object({
|
|
8681
|
+
slug: Type$1.String({
|
|
8682
|
+
minLength: 1,
|
|
8683
|
+
maxLength: 64,
|
|
8684
|
+
pattern: "^[a-zA-Z0-9_-]+$"
|
|
8685
|
+
}),
|
|
8686
|
+
binding: ContextBinding,
|
|
8687
|
+
content: Type$1.String({
|
|
8688
|
+
minLength: 1,
|
|
8689
|
+
maxLength: 32768
|
|
8690
|
+
})
|
|
8691
|
+
}, {
|
|
8692
|
+
$id: "ContextRef",
|
|
8693
|
+
additionalProperties: false
|
|
8694
|
+
});
|
|
8695
|
+
/** Reusable input fragment for any task type. Soft cap at 5 items. */
|
|
8696
|
+
var TaskContext = Type$1.Array(ContextRef, {
|
|
8697
|
+
$id: "TaskContext",
|
|
8698
|
+
maxItems: 5
|
|
8699
|
+
});
|
|
8700
|
+
//#endregion
|
|
8554
8701
|
//#region ../tasks/src/rubric.ts
|
|
8555
8702
|
/**
|
|
8556
8703
|
* Rubric — structured acceptance criteria used by judgment tasks.
|
|
@@ -9099,6 +9246,60 @@ var RenderPackOutput = Type$1.Object({
|
|
|
9099
9246
|
additionalProperties: false
|
|
9100
9247
|
});
|
|
9101
9248
|
//#endregion
|
|
9249
|
+
//#region ../tasks/src/task-types/run-eval.ts
|
|
9250
|
+
/**
|
|
9251
|
+
* `run_eval` — execute a scenario prompt under a named variant for
|
|
9252
|
+
* later cross-variant grading by `judge_eval_variant` (Slice 2).
|
|
9253
|
+
*
|
|
9254
|
+
* output_kind: artifact
|
|
9255
|
+
* criteria: optional (when set, output.verification is required —
|
|
9256
|
+
* producer self-assessment; the judge is the binding evaluator)
|
|
9257
|
+
* references: not required (scenario lives entirely in input)
|
|
9258
|
+
*/
|
|
9259
|
+
var RUN_EVAL_TYPE = "run_eval";
|
|
9260
|
+
var RunEvalInput = Type$1.Object({
|
|
9261
|
+
scenario: Type$1.Object({
|
|
9262
|
+
prompt: Type$1.String({ minLength: 1 }),
|
|
9263
|
+
inputFiles: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
|
|
9264
|
+
}, { additionalProperties: false }),
|
|
9265
|
+
variantLabel: Type$1.String({
|
|
9266
|
+
minLength: 1,
|
|
9267
|
+
maxLength: 64
|
|
9268
|
+
}),
|
|
9269
|
+
context: TaskContext,
|
|
9270
|
+
successCriteria: Type$1.Optional(SuccessCriteria)
|
|
9271
|
+
}, {
|
|
9272
|
+
$id: "RunEvalInput",
|
|
9273
|
+
additionalProperties: false
|
|
9274
|
+
});
|
|
9275
|
+
var RunEvalOutput = Type$1.Object({
|
|
9276
|
+
response: Type$1.String({ minLength: 1 }),
|
|
9277
|
+
artifacts: Type$1.Optional(Type$1.Array(Type$1.Object({
|
|
9278
|
+
path: Type$1.String({ minLength: 1 }),
|
|
9279
|
+
cid: Type$1.String({ minLength: 1 })
|
|
9280
|
+
}, { additionalProperties: false }))),
|
|
9281
|
+
totalTokens: Type$1.Integer({ minimum: 0 }),
|
|
9282
|
+
durationMs: Type$1.Integer({ minimum: 0 }),
|
|
9283
|
+
traceparent: Type$1.String({ minLength: 1 }),
|
|
9284
|
+
verification: Type$1.Optional(VerificationRecord)
|
|
9285
|
+
}, {
|
|
9286
|
+
$id: "RunEvalOutput",
|
|
9287
|
+
additionalProperties: false
|
|
9288
|
+
});
|
|
9289
|
+
/**
|
|
9290
|
+
* Cross-field rule mirroring the `requireVerificationWhenCriteriaPresent`
|
|
9291
|
+
* rule used by the brief task types: when input declares
|
|
9292
|
+
* `successCriteria`, output MUST carry `verification`; when it doesn't,
|
|
9293
|
+
* output MUST NOT carry one.
|
|
9294
|
+
*/
|
|
9295
|
+
function validateRunEvalOutput(output, input) {
|
|
9296
|
+
const hasCriteria = input !== null && input !== void 0 && input.successCriteria !== void 0;
|
|
9297
|
+
const hasVerification = output !== null && output !== void 0 && output.verification !== void 0;
|
|
9298
|
+
if (hasCriteria && !hasVerification) return "output.verification is required because input.successCriteria is set; the producer LLM must self-assess against the criteria";
|
|
9299
|
+
if (!hasCriteria && hasVerification) return "output.verification was supplied but input.successCriteria is unset; omit verification when there are no criteria to assess against";
|
|
9300
|
+
return null;
|
|
9301
|
+
}
|
|
9302
|
+
//#endregion
|
|
9102
9303
|
//#region ../tasks/src/task-types/index.ts
|
|
9103
9304
|
/**
|
|
9104
9305
|
* Validate that a judgment-task input carries a rubric inside its
|
|
@@ -9177,6 +9378,14 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9177
9378
|
requiresReferences: true,
|
|
9178
9379
|
validateInput: validateJudgmentInput,
|
|
9179
9380
|
validateOutput: validateJudgePackOutput
|
|
9381
|
+
},
|
|
9382
|
+
[RUN_EVAL_TYPE]: {
|
|
9383
|
+
name: RUN_EVAL_TYPE,
|
|
9384
|
+
inputSchema: RunEvalInput,
|
|
9385
|
+
outputSchema: RunEvalOutput,
|
|
9386
|
+
outputKind: "artifact",
|
|
9387
|
+
requiresReferences: false,
|
|
9388
|
+
validateOutput: validateRunEvalOutput
|
|
9180
9389
|
}
|
|
9181
9390
|
};
|
|
9182
9391
|
//#endregion
|
|
@@ -9275,6 +9484,14 @@ var ExecutorTrustLevel = Type$1.Union([
|
|
|
9275
9484
|
Type$1.Literal("releaseVerifiedTool"),
|
|
9276
9485
|
Type$1.Literal("sandboxAttested")
|
|
9277
9486
|
], { $id: "ExecutorTrustLevel" });
|
|
9487
|
+
/** Identifies a (provider, model) daemon pair allowed to claim a task. */
|
|
9488
|
+
var ExecutorRef = Type$1.Object({
|
|
9489
|
+
provider: Type$1.String({ minLength: 1 }),
|
|
9490
|
+
model: Type$1.String({ minLength: 1 })
|
|
9491
|
+
}, {
|
|
9492
|
+
$id: "ExecutorRef",
|
|
9493
|
+
additionalProperties: false
|
|
9494
|
+
});
|
|
9278
9495
|
var OutputKind = Type$1.Union([Type$1.Literal("artifact"), Type$1.Literal("judgment")], { $id: "OutputKind" });
|
|
9279
9496
|
var TaskMessageKind = Type$1.Union([
|
|
9280
9497
|
Type$1.Literal("text_delta"),
|
|
@@ -9367,6 +9584,7 @@ Type$1.Object({
|
|
|
9367
9584
|
imposedByHumanId: Type$1.Union([Uuid, Type$1.Null()]),
|
|
9368
9585
|
acceptedAttemptN: Type$1.Union([Type$1.Number(), Type$1.Null()]),
|
|
9369
9586
|
requiredExecutorTrustLevel: ExecutorTrustLevel,
|
|
9587
|
+
allowedExecutors: Type$1.Array(ExecutorRef, { maxItems: 16 }),
|
|
9370
9588
|
status: TaskStatus,
|
|
9371
9589
|
queuedAt: IsoTimestamp,
|
|
9372
9590
|
completedAt: Type$1.Union([IsoTimestamp, Type$1.Null()]),
|
|
@@ -9552,7 +9770,7 @@ function buildFinalOutputBlock(opts) {
|
|
|
9552
9770
|
//#endregion
|
|
9553
9771
|
//#region ../agent-runtime/src/prompts/assess-brief.ts
|
|
9554
9772
|
/**
|
|
9555
|
-
* Build the
|
|
9773
|
+
* Build the first user-message prompt for an `assess_brief` judge attempt.
|
|
9556
9774
|
*
|
|
9557
9775
|
* Design note — no pre-resolved `target` projection
|
|
9558
9776
|
* --------------------------------------------------
|
|
@@ -9573,7 +9791,7 @@ function buildFinalOutputBlock(opts) {
|
|
|
9573
9791
|
* future task types whose products are docs / configs / changes /
|
|
9574
9792
|
* anything) work without any code path here.
|
|
9575
9793
|
*/
|
|
9576
|
-
function
|
|
9794
|
+
function buildAssessBriefUserPrompt(input, ctx) {
|
|
9577
9795
|
const rubric = input.successCriteria.rubric;
|
|
9578
9796
|
const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
|
|
9579
9797
|
const preambleSection = rubric.preamble ? [
|
|
@@ -9688,7 +9906,7 @@ function buildSelfVerificationBlock(taskId) {
|
|
|
9688
9906
|
//#endregion
|
|
9689
9907
|
//#region ../agent-runtime/src/prompts/curate-pack.ts
|
|
9690
9908
|
/**
|
|
9691
|
-
* Build the
|
|
9909
|
+
* Build the first user-message prompt for a `curate_pack` task.
|
|
9692
9910
|
*
|
|
9693
9911
|
* Design note: this prompt is deliberately NOT a numbered command
|
|
9694
9912
|
* sequence. The curator's value comes from judgment — inferring scope
|
|
@@ -9709,7 +9927,7 @@ function buildSelfVerificationBlock(taskId) {
|
|
|
9709
9927
|
* emits pruned state at phase boundaries so a follow-up session can
|
|
9710
9928
|
* resume without replaying the tool history.
|
|
9711
9929
|
*/
|
|
9712
|
-
function
|
|
9930
|
+
function buildCuratePackUserPrompt(input, ctx) {
|
|
9713
9931
|
const { diaryId, taskPrompt, entryTypes, tagFilters, tokenBudget, recipe } = input;
|
|
9714
9932
|
const entryTypesPinned = Boolean(entryTypes);
|
|
9715
9933
|
const resolvedRecipe = recipe ?? "topic-focused-v1";
|
|
@@ -9845,13 +10063,13 @@ function buildCuratePackPrompt(input, ctx) {
|
|
|
9845
10063
|
//#endregion
|
|
9846
10064
|
//#region ../agent-runtime/src/prompts/fulfill-brief.ts
|
|
9847
10065
|
/**
|
|
9848
|
-
* Build the
|
|
10066
|
+
* Build the first user-message prompt for a `fulfill_brief` task.
|
|
9849
10067
|
*
|
|
9850
10068
|
* Generalized from the original `resolve-issue` prompt. No longer
|
|
9851
10069
|
* GitHub-specific; references live on `Task.references[]` and the agent
|
|
9852
10070
|
* is told to inspect them itself.
|
|
9853
10071
|
*/
|
|
9854
|
-
function
|
|
10072
|
+
function buildFulfillBriefUserPrompt(input, ctx) {
|
|
9855
10073
|
const { brief, title, acceptanceCriteria, seedFiles, scopeHint } = input;
|
|
9856
10074
|
const criteriaSection = acceptanceCriteria?.length ? [
|
|
9857
10075
|
"### Acceptance criteria",
|
|
@@ -9931,7 +10149,7 @@ function buildFulfillBriefPrompt(input, ctx) {
|
|
|
9931
10149
|
}
|
|
9932
10150
|
//#endregion
|
|
9933
10151
|
//#region ../agent-runtime/src/prompts/judge-pack.ts
|
|
9934
|
-
function
|
|
10152
|
+
function buildJudgePackUserPrompt(input, ctx) {
|
|
9935
10153
|
const { renderedPackId, sourcePackId, successCriteria } = input;
|
|
9936
10154
|
const rubric = successCriteria.rubric;
|
|
9937
10155
|
const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
|
|
@@ -10058,10 +10276,10 @@ function buildJudgePackPrompt(input, ctx) {
|
|
|
10058
10276
|
//#endregion
|
|
10059
10277
|
//#region ../agent-runtime/src/prompts/render-pack.ts
|
|
10060
10278
|
/**
|
|
10061
|
-
* Build the
|
|
10279
|
+
* Build the first user-message prompt for a `render_pack` task. Almost mechanical:
|
|
10062
10280
|
* wraps `moltnet_pack_render` and emits the receipt.
|
|
10063
10281
|
*/
|
|
10064
|
-
function
|
|
10282
|
+
function buildRenderPackUserPrompt(input, ctx) {
|
|
10065
10283
|
const { packId, persist = true, pinned = false } = input;
|
|
10066
10284
|
return [
|
|
10067
10285
|
"# Render Pack Agent",
|
|
@@ -10115,19 +10333,87 @@ function buildRenderPackPrompt(input, ctx) {
|
|
|
10115
10333
|
].join("\n");
|
|
10116
10334
|
}
|
|
10117
10335
|
//#endregion
|
|
10336
|
+
//#region ../agent-runtime/src/prompts/run-eval.ts
|
|
10337
|
+
/**
|
|
10338
|
+
* Build the first user-message prompt for a `run_eval` task.
|
|
10339
|
+
*
|
|
10340
|
+
* Free-form: no git workflow, no commit ceremony. The executor produces
|
|
10341
|
+
* a textual response (and optional file artifacts) that a later
|
|
10342
|
+
* `judge_eval_variant` task (Slice 2) grades against the rubric.
|
|
10343
|
+
*
|
|
10344
|
+
* Context delivery is handled by `resolveTaskContext` (see
|
|
10345
|
+
* libs/agent-runtime/src/context-bindings.ts) and runs BEFORE this
|
|
10346
|
+
* prompt is rendered: `prompt_prefix` items are concatenated ahead of
|
|
10347
|
+
* the body, `skill` items are persisted at the runtime's skill path,
|
|
10348
|
+
* and `user_inline` items are appended to the first user message. This
|
|
10349
|
+
* builder does NOT inline `input.context[]` itself.
|
|
10350
|
+
*/
|
|
10351
|
+
function buildRunEvalUserPrompt(input, ctx) {
|
|
10352
|
+
const { scenario, variantLabel, successCriteria } = input;
|
|
10353
|
+
const inputFilesSection = scenario.inputFiles?.length ? [
|
|
10354
|
+
"### Input files",
|
|
10355
|
+
"",
|
|
10356
|
+
...scenario.inputFiles.map((f) => `- \`${f}\``),
|
|
10357
|
+
""
|
|
10358
|
+
].join("\n") : "";
|
|
10359
|
+
const verificationSection = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
|
|
10360
|
+
const correlationSection = ctx.correlationId ? [
|
|
10361
|
+
"### Correlation",
|
|
10362
|
+
"",
|
|
10363
|
+
`This task carries correlationId \`${ctx.correlationId}\`. It joins`,
|
|
10364
|
+
"this variant to its sibling `run_eval` tasks (other variants of the",
|
|
10365
|
+
"same scenario) and to the eventual `judge_eval_variant` task that",
|
|
10366
|
+
"will grade them together. You do not need to act on it directly —",
|
|
10367
|
+
"it is recorded for cross-variant aggregation at query time.",
|
|
10368
|
+
""
|
|
10369
|
+
].join("\n") : "";
|
|
10370
|
+
const finalOutputBlock = buildFinalOutputBlock({
|
|
10371
|
+
taskType: "run_eval",
|
|
10372
|
+
outputSchemaName: "RunEvalOutput",
|
|
10373
|
+
shapeSketch: [
|
|
10374
|
+
"{",
|
|
10375
|
+
" \"response\": \"<your free-form answer>\",",
|
|
10376
|
+
" \"artifacts\": [{ \"path\": \"...\", \"cid\": \"...\" }], // optional",
|
|
10377
|
+
" \"totalTokens\": <int>,",
|
|
10378
|
+
" \"durationMs\": <int>,",
|
|
10379
|
+
" \"traceparent\": \"<from claim>\",",
|
|
10380
|
+
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
10381
|
+
"}"
|
|
10382
|
+
].join("\n")
|
|
10383
|
+
});
|
|
10384
|
+
return [
|
|
10385
|
+
"# Run Eval Agent\n",
|
|
10386
|
+
`You are running an evaluation scenario as variant \`${variantLabel}\`.\nTask id: \`${ctx.taskId}\`\n`,
|
|
10387
|
+
correlationSection,
|
|
10388
|
+
`### Scenario\n\n${scenario.prompt}\n`,
|
|
10389
|
+
inputFilesSection,
|
|
10390
|
+
verificationSection,
|
|
10391
|
+
finalOutputBlock
|
|
10392
|
+
].filter((s) => s !== "").join("\n");
|
|
10393
|
+
}
|
|
10394
|
+
//#endregion
|
|
10118
10395
|
//#region ../agent-runtime/src/prompts/index.ts
|
|
10119
10396
|
/**
|
|
10120
|
-
* Resolve the correct prompt builder for `task.taskType` and
|
|
10121
|
-
* Throws if the type is unknown or the input fails TypeBox
|
|
10122
|
-
|
|
10123
|
-
|
|
10397
|
+
* Resolve the correct user-prompt builder for `task.taskType` and
|
|
10398
|
+
* invoke it. Throws if the type is unknown or the input fails TypeBox
|
|
10399
|
+
* validation.
|
|
10400
|
+
*
|
|
10401
|
+
* Role note: the returned string is delivered as the **first user
|
|
10402
|
+
* message** of the agent's session (pi-coding-agent's
|
|
10403
|
+
* `session.prompt(text)` puts text in the user role). The system
|
|
10404
|
+
* prompt is built separately by pi from `appendSystemPrompt` (the
|
|
10405
|
+
* runtime instructor lives there). Builders here are free-form Markdown
|
|
10406
|
+
* for the user turn; they don't replace or prepend to the system
|
|
10407
|
+
* prompt.
|
|
10408
|
+
*/
|
|
10409
|
+
function buildTaskUserPrompt(task, ctx) {
|
|
10124
10410
|
switch (task.taskType) {
|
|
10125
10411
|
case FULFILL_BRIEF_TYPE:
|
|
10126
10412
|
if (!Value.Check(FulfillBriefInput, task.input)) {
|
|
10127
10413
|
const errors = [...Value.Errors(FulfillBriefInput, task.input)];
|
|
10128
10414
|
throw new Error(`fulfill_brief input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10129
10415
|
}
|
|
10130
|
-
return
|
|
10416
|
+
return buildFulfillBriefUserPrompt(task.input, {
|
|
10131
10417
|
diaryId: ctx.diaryId,
|
|
10132
10418
|
taskId: ctx.taskId,
|
|
10133
10419
|
correlationId: task.correlationId
|
|
@@ -10137,7 +10423,7 @@ function buildPromptForTask(task, ctx) {
|
|
|
10137
10423
|
const errors = [...Value.Errors(AssessBriefInput, task.input)];
|
|
10138
10424
|
throw new Error(`assess_brief input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10139
10425
|
}
|
|
10140
|
-
return
|
|
10426
|
+
return buildAssessBriefUserPrompt(task.input, {
|
|
10141
10427
|
diaryId: ctx.diaryId,
|
|
10142
10428
|
taskId: ctx.taskId
|
|
10143
10429
|
});
|
|
@@ -10146,7 +10432,7 @@ function buildPromptForTask(task, ctx) {
|
|
|
10146
10432
|
const errors = [...Value.Errors(CuratePackInput, task.input)];
|
|
10147
10433
|
throw new Error(`curate_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10148
10434
|
}
|
|
10149
|
-
return
|
|
10435
|
+
return buildCuratePackUserPrompt(task.input, {
|
|
10150
10436
|
diaryId: ctx.diaryId,
|
|
10151
10437
|
taskId: ctx.taskId
|
|
10152
10438
|
});
|
|
@@ -10155,7 +10441,7 @@ function buildPromptForTask(task, ctx) {
|
|
|
10155
10441
|
const errors = [...Value.Errors(RenderPackInput, task.input)];
|
|
10156
10442
|
throw new Error(`render_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10157
10443
|
}
|
|
10158
|
-
return
|
|
10444
|
+
return buildRenderPackUserPrompt(task.input, {
|
|
10159
10445
|
diaryId: ctx.diaryId,
|
|
10160
10446
|
taskId: ctx.taskId
|
|
10161
10447
|
});
|
|
@@ -10164,10 +10450,20 @@ function buildPromptForTask(task, ctx) {
|
|
|
10164
10450
|
const errors = [...Value.Errors(JudgePackInput, task.input)];
|
|
10165
10451
|
throw new Error(`judge_pack input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10166
10452
|
}
|
|
10167
|
-
return
|
|
10453
|
+
return buildJudgePackUserPrompt(task.input, {
|
|
10168
10454
|
diaryId: ctx.diaryId,
|
|
10169
10455
|
taskId: ctx.taskId
|
|
10170
10456
|
});
|
|
10457
|
+
case RUN_EVAL_TYPE:
|
|
10458
|
+
if (!Value.Check(RunEvalInput, task.input)) {
|
|
10459
|
+
const errors = [...Value.Errors(RunEvalInput, task.input)];
|
|
10460
|
+
throw new Error(`run_eval input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
10461
|
+
}
|
|
10462
|
+
return buildRunEvalUserPrompt(task.input, {
|
|
10463
|
+
diaryId: ctx.diaryId,
|
|
10464
|
+
taskId: ctx.taskId,
|
|
10465
|
+
correlationId: task.correlationId
|
|
10466
|
+
});
|
|
10171
10467
|
default: throw new Error(`No prompt builder registered for taskType="${task.taskType}"`);
|
|
10172
10468
|
}
|
|
10173
10469
|
}
|
|
@@ -13639,6 +13935,114 @@ var require_multistream = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
|
13639
13935
|
module.exports.pino = pino;
|
|
13640
13936
|
})))();
|
|
13641
13937
|
//#endregion
|
|
13938
|
+
//#region src/runtime/inject-task-context.ts
|
|
13939
|
+
/**
|
|
13940
|
+
* Slice 1.5 of #943 — wire the agent-runtime resolver into the
|
|
13941
|
+
* pi-extension execution path.
|
|
13942
|
+
*
|
|
13943
|
+
* `resolveTaskContext` is a pure dispatcher; this module provides the
|
|
13944
|
+
* Gondolin-aware deliverer and the post-resolution shape the
|
|
13945
|
+
* `execute-pi-task` caller needs to splice into pi's setup:
|
|
13946
|
+
*
|
|
13947
|
+
* - `systemPromptPrefix` → fed into `appendSystemPrompt` alongside
|
|
13948
|
+
* the runtime instructor (it IS a system-prompt fragment).
|
|
13949
|
+
* - `userInlineSuffix` → appended to the `buildTaskUserPrompt`
|
|
13950
|
+
* output BEFORE `session.prompt(text)`.
|
|
13951
|
+
* - `skills` → spliced into the `skillsOverride` callback's
|
|
13952
|
+
* return value. pi includes them in `<available_skills>` in the
|
|
13953
|
+
* system prompt; the agent fetches the body on demand via the
|
|
13954
|
+
* Read tool.
|
|
13955
|
+
*
|
|
13956
|
+
* Skill files are written into the VM at
|
|
13957
|
+
* `/workspace/.moltnet/skills/<slug>/SKILL.md`. The agent's
|
|
13958
|
+
* Gondolin-bound Read tool is scoped to `/workspace`, so that path is
|
|
13959
|
+
* the only location the agent can actually read at runtime. pi only
|
|
13960
|
+
* reads `<available_skills>` metadata (name, description, location),
|
|
13961
|
+
* never the file body, so we construct synthetic `Skill` objects
|
|
13962
|
+
* pointing at the in-VM path without ever materialising the file on
|
|
13963
|
+
* the host.
|
|
13964
|
+
*/
|
|
13965
|
+
/**
|
|
13966
|
+
* Where in the VM we write skill bodies — the memory-backed mount
|
|
13967
|
+
* declared in `vm-manager.ts`. See the comment on
|
|
13968
|
+
* `GUEST_TASK_SKILLS_MOUNT` there for the full rationale (ephemeral
|
|
13969
|
+
* by intent + the worktree symlink interaction with Gondolin's
|
|
13970
|
+
* sandbox-escape protection). The agent's Gondolin Read tool accepts
|
|
13971
|
+
* paths under this mount via `toGuestPath` in `tool-operations.ts`.
|
|
13972
|
+
*/
|
|
13973
|
+
var SKILL_ROOT_IN_VM = GUEST_TASK_SKILLS_MOUNT;
|
|
13974
|
+
/** Bounds borrowed from pi's skill validation; conservative caps so a
|
|
13975
|
+
* malformed SKILL.md doesn't bloat the system prompt. */
|
|
13976
|
+
var MAX_SKILL_NAME = 64;
|
|
13977
|
+
var MAX_SKILL_DESCRIPTION = 1024;
|
|
13978
|
+
/**
|
|
13979
|
+
* Resolve a task's `input.context[]` and inject the side effects pi
|
|
13980
|
+
* needs. Safe to call with an empty array — returns an inert result.
|
|
13981
|
+
*/
|
|
13982
|
+
async function injectTaskContext(args) {
|
|
13983
|
+
const skills = [];
|
|
13984
|
+
const resolved = await resolveTaskContext({
|
|
13985
|
+
context: args.context,
|
|
13986
|
+
deliver: { skill: async ({ slug, content }) => {
|
|
13987
|
+
const dir = `${SKILL_ROOT_IN_VM}/${slug}`;
|
|
13988
|
+
const filePath = `${dir}/SKILL.md`;
|
|
13989
|
+
await args.fs.mkdir(dir, { recursive: true });
|
|
13990
|
+
await args.fs.writeFile(filePath, content, { mode: 420 });
|
|
13991
|
+
skills.push(buildSyntheticSkill({
|
|
13992
|
+
slug,
|
|
13993
|
+
content,
|
|
13994
|
+
filePath,
|
|
13995
|
+
dir
|
|
13996
|
+
}));
|
|
13997
|
+
} }
|
|
13998
|
+
});
|
|
13999
|
+
return {
|
|
14000
|
+
injected: resolved.injected,
|
|
14001
|
+
skills,
|
|
14002
|
+
systemPromptPrefix: resolved.systemPromptPrefix,
|
|
14003
|
+
userInlineSuffix: resolved.userInlineSuffix
|
|
14004
|
+
};
|
|
14005
|
+
}
|
|
14006
|
+
/**
|
|
14007
|
+
* Build a `Skill` object pi will faithfully render in
|
|
14008
|
+
* `<available_skills>`. We extract `name` and `description` from the
|
|
14009
|
+
* skill content's YAML frontmatter using pi's own `parseFrontmatter`
|
|
14010
|
+
* helper (proper YAML, not a regex hack) and fall back to the slug +
|
|
14011
|
+
* a generic description so a SKILL.md without frontmatter still
|
|
14012
|
+
* renders something meaningful.
|
|
14013
|
+
*
|
|
14014
|
+
* Frontmatter parsing is best-effort: a malformed YAML block is
|
|
14015
|
+
* optional metadata, not a reason to fail the task. We swallow parser
|
|
14016
|
+
* errors and fall back to the slug-derived metadata; the skill body
|
|
14017
|
+
* is unaffected.
|
|
14018
|
+
*
|
|
14019
|
+
* pi's `formatSkillsForPrompt` only reads `name`, `description`, and
|
|
14020
|
+
* `filePath` — `sourceInfo`/`baseDir` exist on the type but never
|
|
14021
|
+
* surface in the prompt, so a synthetic `SourceInfo` is enough.
|
|
14022
|
+
*/
|
|
14023
|
+
function buildSyntheticSkill(args) {
|
|
14024
|
+
let fm = {};
|
|
14025
|
+
try {
|
|
14026
|
+
fm = parseFrontmatter(args.content).frontmatter;
|
|
14027
|
+
} catch {}
|
|
14028
|
+
return {
|
|
14029
|
+
name: clip(typeof fm.name === "string" && fm.name.trim().length > 0 ? fm.name.trim() : args.slug, MAX_SKILL_NAME),
|
|
14030
|
+
description: clip(typeof fm.description === "string" && fm.description.trim().length > 0 ? fm.description.trim() : `Task-injected context skill (${args.slug})`, MAX_SKILL_DESCRIPTION),
|
|
14031
|
+
filePath: args.filePath,
|
|
14032
|
+
baseDir: args.dir,
|
|
14033
|
+
sourceInfo: createSyntheticSourceInfo(args.filePath, {
|
|
14034
|
+
source: "moltnet:task-context",
|
|
14035
|
+
scope: "temporary",
|
|
14036
|
+
origin: "top-level",
|
|
14037
|
+
baseDir: args.dir
|
|
14038
|
+
}),
|
|
14039
|
+
disableModelInvocation: fm["disable-model-invocation"] === true
|
|
14040
|
+
};
|
|
14041
|
+
}
|
|
14042
|
+
function clip(s, max) {
|
|
14043
|
+
return s.length > max ? s.slice(0, max) : s;
|
|
14044
|
+
}
|
|
14045
|
+
//#endregion
|
|
13642
14046
|
//#region src/runtime/runtime-instructor.ts
|
|
13643
14047
|
/**
|
|
13644
14048
|
* Build the daemon-controlled invariant prose injected into the system prompt
|
|
@@ -13962,6 +14366,7 @@ function resolveSubmitTools(taskType, opts = {}) {
|
|
|
13962
14366
|
* Anthropic-SDK one) plug in via the `executeTask` function injected into
|
|
13963
14367
|
* `AgentRuntime`.
|
|
13964
14368
|
*/
|
|
14369
|
+
var noopTurnEventHandler = () => {};
|
|
13965
14370
|
/**
|
|
13966
14371
|
* Factory that builds a pi-specific `executeTask` function suitable for
|
|
13967
14372
|
* injection into `AgentRuntime`. The returned function caches the resolved
|
|
@@ -14058,10 +14463,25 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14058
14463
|
attemptN
|
|
14059
14464
|
});
|
|
14060
14465
|
reporterOpen = true;
|
|
14061
|
-
|
|
14062
|
-
|
|
14063
|
-
|
|
14064
|
-
})
|
|
14466
|
+
let onTurnEvent;
|
|
14467
|
+
if (opts.makeOnTurnEvent) try {
|
|
14468
|
+
onTurnEvent = opts.makeOnTurnEvent(claimedTask);
|
|
14469
|
+
} catch (err) {
|
|
14470
|
+
process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
14471
|
+
onTurnEvent = noopTurnEventHandler;
|
|
14472
|
+
}
|
|
14473
|
+
else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
|
|
14474
|
+
const emit = (kind, payload) => {
|
|
14475
|
+
try {
|
|
14476
|
+
onTurnEvent(kind, summarizePayloadForLog(kind, payload));
|
|
14477
|
+
} catch (err) {
|
|
14478
|
+
process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
|
|
14479
|
+
}
|
|
14480
|
+
return reporter.record({
|
|
14481
|
+
kind,
|
|
14482
|
+
payload
|
|
14483
|
+
});
|
|
14484
|
+
};
|
|
14065
14485
|
await emit("info", {
|
|
14066
14486
|
event: "execute_start",
|
|
14067
14487
|
taskType: task.taskType,
|
|
@@ -14071,7 +14491,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14071
14491
|
});
|
|
14072
14492
|
let taskPrompt;
|
|
14073
14493
|
try {
|
|
14074
|
-
taskPrompt =
|
|
14494
|
+
taskPrompt = buildTaskUserPrompt(task, {
|
|
14075
14495
|
diaryId,
|
|
14076
14496
|
taskId: task.id,
|
|
14077
14497
|
extras: opts.promptExtras
|
|
@@ -14084,6 +14504,30 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14084
14504
|
});
|
|
14085
14505
|
return makeFailedOutput("prompt_build_failed", message);
|
|
14086
14506
|
}
|
|
14507
|
+
const rawContext = task.input.context;
|
|
14508
|
+
let injectedContext;
|
|
14509
|
+
try {
|
|
14510
|
+
const contextArray = rawContext === void 0 ? [] : rawContext;
|
|
14511
|
+
if (!Value.Check(TaskContext, contextArray)) throw new Error(`task.input.context failed TaskContext validation: ${JSON.stringify([...Value.Errors(TaskContext, contextArray)].slice(0, 3))}`);
|
|
14512
|
+
injectedContext = await injectTaskContext({
|
|
14513
|
+
context: contextArray,
|
|
14514
|
+
fs: managed.vm.fs
|
|
14515
|
+
});
|
|
14516
|
+
} catch (err) {
|
|
14517
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
14518
|
+
await emit("error", {
|
|
14519
|
+
message,
|
|
14520
|
+
phase: "context_resolution"
|
|
14521
|
+
});
|
|
14522
|
+
return makeFailedOutput("context_resolution_failed", message);
|
|
14523
|
+
}
|
|
14524
|
+
if (injectedContext.injected.length > 0) await emit("info", {
|
|
14525
|
+
event: "context_injected",
|
|
14526
|
+
count: injectedContext.injected.length,
|
|
14527
|
+
bindings: injectedContext.injected.map((r) => r.binding),
|
|
14528
|
+
slugs: injectedContext.injected.map((r) => r.slug)
|
|
14529
|
+
});
|
|
14530
|
+
if (injectedContext.userInlineSuffix) taskPrompt = `${taskPrompt}\n\n---\n\n${injectedContext.userInlineSuffix}`;
|
|
14087
14531
|
const gondolinCustomTools = [
|
|
14088
14532
|
createReadToolDefinition(mountPath, { operations: createGondolinReadOps(managed.vm, mountPath) }),
|
|
14089
14533
|
createWriteToolDefinition(mountPath, { operations: createGondolinWriteOps(managed.vm, mountPath) }),
|
|
@@ -14120,21 +14564,23 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
14120
14564
|
"moltnet.task.type": task.taskType
|
|
14121
14565
|
}
|
|
14122
14566
|
});
|
|
14123
|
-
const
|
|
14567
|
+
const appendSystemPrompt = [buildRuntimeInstructor({
|
|
14124
14568
|
taskId: task.id,
|
|
14125
14569
|
taskType: task.taskType,
|
|
14126
14570
|
attemptN,
|
|
14127
14571
|
diaryId,
|
|
14128
14572
|
agentName: opts.agentName,
|
|
14129
14573
|
correlationId: task.correlationId ?? null
|
|
14130
|
-
});
|
|
14574
|
+
})];
|
|
14575
|
+
if (injectedContext.systemPromptPrefix) appendSystemPrompt.push(injectedContext.systemPromptPrefix);
|
|
14576
|
+
const injectedSkills = injectedContext.skills;
|
|
14131
14577
|
const resourceLoader = new DefaultResourceLoader({
|
|
14132
14578
|
cwd: mountPath,
|
|
14133
14579
|
agentDir: piAuthDir,
|
|
14134
14580
|
extensionFactories: [piOtelExtension],
|
|
14135
|
-
appendSystemPrompt
|
|
14581
|
+
appendSystemPrompt,
|
|
14136
14582
|
skillsOverride: () => ({
|
|
14137
|
-
skills:
|
|
14583
|
+
skills: injectedSkills,
|
|
14138
14584
|
diagnostics: []
|
|
14139
14585
|
})
|
|
14140
14586
|
});
|
|
@@ -14359,6 +14805,27 @@ function wireSessionAbort(cancelSignal, session) {
|
|
|
14359
14805
|
* `task_messages.payload` row. Bodies above 4 KiB are replaced with a
|
|
14360
14806
|
* `{ truncated, original_size }` marker so the JSONL/DB size stays bounded.
|
|
14361
14807
|
*/
|
|
14808
|
+
function summarizePayloadForLog(kind, payload) {
|
|
14809
|
+
switch (kind) {
|
|
14810
|
+
case "text_delta": {
|
|
14811
|
+
const delta = payload.delta;
|
|
14812
|
+
return { chars: typeof delta === "string" ? delta.length : 0 };
|
|
14813
|
+
}
|
|
14814
|
+
case "tool_call_start": return { tool: payload.tool_name };
|
|
14815
|
+
case "tool_call_end": return {
|
|
14816
|
+
tool: payload.tool_name,
|
|
14817
|
+
is_error: payload.is_error === true,
|
|
14818
|
+
...payload.is_error === true && payload.result !== void 0 ? { result: payload.result } : {}
|
|
14819
|
+
};
|
|
14820
|
+
case "turn_end": return { stop_reason: payload.stop_reason };
|
|
14821
|
+
case "error": return {
|
|
14822
|
+
phase: payload.phase,
|
|
14823
|
+
message: typeof payload.message === "string" ? payload.message.slice(0, TRUNCATE_LIMIT) : payload.message
|
|
14824
|
+
};
|
|
14825
|
+
case "info": return Object.fromEntries(Object.entries(payload).map(([k, v]) => [k, typeof v === "string" ? v.slice(0, TRUNCATE_LIMIT) : v]));
|
|
14826
|
+
default: return payload;
|
|
14827
|
+
}
|
|
14828
|
+
}
|
|
14362
14829
|
var TRUNCATE_LIMIT = 4 * 1024;
|
|
14363
14830
|
function truncateForWire(value) {
|
|
14364
14831
|
if (value === null || value === void 0) return value;
|
|
@@ -14659,4 +15126,4 @@ function moltnetExtension(pi) {
|
|
|
14659
15126
|
registerMoltnetReflectCommand(pi, state);
|
|
14660
15127
|
}
|
|
14661
15128
|
//#endregion
|
|
14662
|
-
export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resumeVm, toGuestPath };
|
|
15129
|
+
export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, injectTaskContext, loadCredentials, resumeVm, toGuestPath };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@themoltnet/pi-extension",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.14.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
|
|
6
6
|
"license": "MIT",
|
|
@@ -31,8 +31,8 @@
|
|
|
31
31
|
"@earendil-works/gondolin": "^0.9.1",
|
|
32
32
|
"@opentelemetry/api": "^1.9.0",
|
|
33
33
|
"@sinclair/typebox": "^0.34.0",
|
|
34
|
-
"@themoltnet/agent-runtime": "0.
|
|
35
|
-
"@themoltnet/sdk": "0.
|
|
34
|
+
"@themoltnet/agent-runtime": "0.12.0",
|
|
35
|
+
"@themoltnet/sdk": "0.100.0"
|
|
36
36
|
},
|
|
37
37
|
"peerDependencies": {
|
|
38
38
|
"@earendil-works/pi-coding-agent": ">=0.74.0",
|