@themoltnet/pi-extension 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -19,6 +19,7 @@ import { TObject } from '@sinclair/typebox';
19
19
  import { ToolDefinition } from '@earendil-works/pi-coding-agent';
20
20
  import { TOptional } from '@sinclair/typebox';
21
21
  import { TRecord } from '@sinclair/typebox';
22
+ import { TSchema } from '@sinclair/typebox';
22
23
  import { TString } from '@sinclair/typebox';
23
24
  import { TUnion } from '@sinclair/typebox';
24
25
  import { TUnknown } from '@sinclair/typebox';
@@ -43,6 +44,8 @@ export declare function buildAgentSession(args: BuildAgentSessionArgs): Promise<
43
44
  declare interface BuildAgentSessionArgs {
44
45
  /** Host directory mounted at /workspace inside the VM. */
45
46
  mountPath: string;
47
+ /** Host working directory where the agent session should start. */
48
+ cwdPath: string;
46
49
  /** pi auth directory (resolved from `PI_CODING_AGENT_DIR` or `~/.pi/agent`). */
47
50
  piAuthDir: string;
48
51
  /** Resolved pi model handle (provider + model id). */
@@ -133,6 +136,8 @@ export declare function createSubagentTool(args: CreateSubagentToolArgs): Subage
133
136
  export declare interface CreateSubagentToolArgs {
134
137
  /** Host directory mounted at /workspace inside the VM. */
135
138
  mountPath: string;
139
+ /** Host working directory the subagent should start in. Defaults to mountPath. */
140
+ cwdPath?: string;
136
141
  /** pi auth directory the parent resolved. */
137
142
  piAuthDir: string;
138
143
  /** Resolved pi model handle — subagents share it. */
@@ -188,6 +193,16 @@ export declare interface CreateSubagentToolArgs {
188
193
  * exercise the tool's logic without booting a VM.
189
194
  */
190
195
  buildAgentSession?: (args: BuildAgentSessionArgs) => Promise<AgentSession>;
196
+ /**
197
+ * Contract registry for resolving output_schema names to TypeBox
198
+ * schemas at call time. The subagent tool reads ONLY via `.get()`
199
+ * and `.list()` — the registry is immutable after construction.
200
+ *
201
+ * Production callers (executePiTask) create the registry with
202
+ * built-in contracts at session-setup; tests inject a registry
203
+ * with whatever stubs they need.
204
+ */
205
+ contractRegistry: SubagentContractRegistry;
191
206
  }
192
207
 
193
208
  /**
@@ -240,6 +255,17 @@ export declare interface ExecutePiTaskOptions {
240
255
  * across tasks.
241
256
  */
242
257
  checkpointPath?: string;
258
+ /**
259
+ * Lazy checkpoint resolver used by `createPiTaskExecutor` so snapshot
260
+ * creation can happen after the reporter has been opened and can surface
261
+ * setup failures as task messages.
262
+ */
263
+ resolveCheckpointPath?: () => Promise<string>;
264
+ /**
265
+ * Set when the caller already opened the reporter before handing control
266
+ * to `executePiTask`.
267
+ */
268
+ reporterAlreadyOpened?: boolean;
243
269
  /**
244
270
  * Optional callback invoked alongside every `reporter.record()` so
245
271
  * the daemon can mirror task messages into its local logger.
@@ -292,6 +318,13 @@ export declare interface ExecutePiTaskOptions {
292
318
  * file-backed Pi sessions for selected task classes.
293
319
  */
294
320
  makeExecutionPlan?: PiTaskExecutionPlanFactory;
321
+ /**
322
+ * Immutable subagent contract registry used to resolve `output_schema`
323
+ * names at subagent tool call time. Constructed by the daemon (or
324
+ * tests) from static built-in schemas — `execute-pi-task` never hardcodes
325
+ * contracts. See #1106.
326
+ */
327
+ subagentContractRegistry?: SubagentContractRegistry;
295
328
  }
296
329
 
297
330
  /**
@@ -527,6 +560,29 @@ export declare interface SandboxConfig {
527
560
  /** Extract snapshot-specific config for backwards compat with ensureSnapshot. */
528
561
  export declare type SnapshotConfig = NonNullable<SandboxConfig['snapshot']>;
529
562
 
563
+ declare interface SubagentContractRegistry {
564
+ /** Resolve a contract by name. Returns `null` for unknown names. */
565
+ get(name: string): SubagentOutputContract | null;
566
+ /** List all registered contracts. */
567
+ list(): SubagentOutputContract[];
568
+ }
569
+
570
+ declare interface SubagentOutputContract {
571
+ /** Stable identifier the parent uses to reference this contract.
572
+ * Lower-snake-case by convention (e.g. `judge_eval_variant_result`). */
573
+ readonly name: string;
574
+ /** Human-readable description shown in the subagent tool's help text
575
+ * and in the inner session's submit-tool description. Useful when a
576
+ * parent LLM has multiple contracts to choose from. */
577
+ readonly description: string;
578
+ /**
579
+ * TypeBox schema the subagent's submit-tool args MUST validate
580
+ * against. The args ARE the output payload (no `{ output: ... }`
581
+ * wrapping), so the LLM gets field-level guidance directly.
582
+ */
583
+ readonly parametersSchema: TSchema;
584
+ }
585
+
530
586
  export declare interface SubagentToolHandle {
531
587
  /** ToolDefinition to register via `customTools` on the parent session. */
532
588
  readonly tool: ToolDefinition;
@@ -769,9 +825,10 @@ export declare interface VmCredentials {
769
825
  agentEnvRaw: string;
770
826
  /**
771
827
  * Pi OAuth/API-key auth blob. Null when neither `~/.pi/agent/auth.json`
772
- * (or its `PI_AUTH_PATH` override) is present — in that case the daemon
773
- * relies on Pi's env-var providers (`ANTHROPIC_API_KEY`, etc.) carried
774
- * via `agentEnv` and the host environment instead. CI uses this path.
828
+ * (resolved via `PI_CODING_AGENT_DIR` when set) is present — in that
829
+ * case the daemon relies on Pi's env-var providers (`ANTHROPIC_API_KEY`,
830
+ * etc.) carried via `agentEnv` and the host environment instead. CI uses
831
+ * this path.
775
832
  */
776
833
  piAuthJson: string | null;
777
834
  agentEnv: Record<string, string | undefined>;
package/dist/index.js CHANGED
@@ -8133,7 +8133,8 @@ function findMainWorktree() {
8133
8133
  function loadCredentials(agentDir) {
8134
8134
  const moltnetJson = readFileSync(path.join(agentDir, "moltnet.json"), "utf8");
8135
8135
  const agentEnvRaw = readFileSync(path.join(agentDir, "env"), "utf8");
8136
- const piAuthPath = process.env.PI_AUTH_PATH ?? path.join(process.env.HOME ?? "", ".pi", "agent", "auth.json");
8136
+ const piAgentDir = process.env.PI_CODING_AGENT_DIR ?? path.join(process.env.HOME ?? "", ".pi", "agent");
8137
+ const piAuthPath = path.join(piAgentDir, "auth.json");
8137
8138
  const piAuthJson = existsSync(piAuthPath) ? readFileSync(piAuthPath, "utf8") : null;
8138
8139
  const gitconfigPath = path.join(agentDir, "gitconfig");
8139
8140
  const gitconfig = existsSync(gitconfigPath) ? readFileSync(gitconfigPath, "utf8") : null;
@@ -8645,7 +8646,7 @@ async function buildAgentSession(args) {
8645
8646
  spanAttributes: args.otelSpanAttrs
8646
8647
  });
8647
8648
  const resourceLoader = new DefaultResourceLoader({
8648
- cwd: args.mountPath,
8649
+ cwd: args.cwdPath,
8649
8650
  agentDir: args.piAuthDir,
8650
8651
  extensionFactories: [piOtelExtension],
8651
8652
  appendSystemPrompt: args.appendSystemPrompt,
@@ -8653,12 +8654,12 @@ async function buildAgentSession(args) {
8653
8654
  });
8654
8655
  await resourceLoader.reload();
8655
8656
  const sessionManager = args.sessionPersistence ? await resolvePersistentSessionManager({
8656
- cwd: args.mountPath,
8657
+ cwd: args.cwdPath,
8657
8658
  sessionDir: args.sessionPersistence.sessionDir
8658
- }) : SessionManager.inMemory(args.mountPath);
8659
+ }) : SessionManager.inMemory(args.cwdPath);
8659
8660
  return (await createAgentSession({
8660
8661
  agentDir: args.piAuthDir,
8661
- cwd: args.mountPath,
8662
+ cwd: args.cwdPath,
8662
8663
  model: args.modelHandle,
8663
8664
  customTools: args.customTools,
8664
8665
  sessionManager,
@@ -8670,6 +8671,61 @@ async function resolvePersistentSessionManager(args) {
8670
8671
  return SessionManager.continueRecent(args.cwd, args.sessionDir);
8671
8672
  }
8672
8673
  //#endregion
8674
+ //#region ../agent-runtime/src/context-bindings.ts
8675
+ var PROMPT_SEPARATOR = "\n\n---\n\n";
8676
+ /**
8677
+ * Resolve `task.input.context[]` into delivered side-effects (skills
8678
+ * persisted via `deliver.skill`) and prompt fragments
8679
+ * (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
8680
+ * built prompt.
8681
+ *
8682
+ * Per-binding semantics (V1):
8683
+ * - `skill` → `deliver.skill({ slug, content })` once per ref.
8684
+ * Slug collisions on distinct contents are
8685
+ * refused loudly.
8686
+ * - `prompt_prefix` → content appended to `systemPromptPrefix` with
8687
+ * the canonical `\n\n---\n\n` separator (in
8688
+ * declared order).
8689
+ * - `user_inline` → content appended to `userInlineSuffix` in
8690
+ * declared order, same separator.
8691
+ *
8692
+ * No fetching, no hashing — bytes are inlined in `ContextRef.content`,
8693
+ * and the task's `inputCid` already pins the entire input. The imposer
8694
+ * chose these bytes; the resolver just dispatches them.
8695
+ *
8696
+ * The function is pure with respect to its arguments: file writes are
8697
+ * confined to the injected `deliver` callback, which makes the
8698
+ * resolver trivial to test.
8699
+ */
8700
+ async function resolveTaskContext(args) {
8701
+ const promptParts = [];
8702
+ const userParts = [];
8703
+ const injected = [];
8704
+ const usedSlugs = /* @__PURE__ */ new Map();
8705
+ for (const ref of args.context) {
8706
+ if (ref.binding === "skill") {
8707
+ const prior = usedSlugs.get(ref.slug);
8708
+ if (prior !== void 0) {
8709
+ if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
8710
+ injected.push(ref);
8711
+ continue;
8712
+ }
8713
+ usedSlugs.set(ref.slug, ref.content);
8714
+ await args.deliver.skill({
8715
+ slug: ref.slug,
8716
+ content: ref.content
8717
+ });
8718
+ } else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
8719
+ else userParts.push(ref.content);
8720
+ injected.push(ref);
8721
+ }
8722
+ return {
8723
+ injected,
8724
+ systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
8725
+ userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
8726
+ };
8727
+ }
8728
+ //#endregion
8673
8729
  //#region ../tasks/src/formats.ts
8674
8730
  /**
8675
8731
  * Register TypeBox string formats used across Task / TaskOutput / task-type
@@ -8841,10 +8897,10 @@ function validateRubricWeights(rubric) {
8841
8897
  * complementary places.
8842
8898
  *
8843
8899
  * Before this envelope existed, criteria were scattered: a vestigial
8844
- * `criteriaCid` column nobody resolved, an `acceptanceCriteria: string[]`
8845
- * field on `fulfill_brief.input` that was "interpreted by the claiming
8846
- * agent," and inline `rubric` / `criteria[]` fields on judgment-task
8847
- * inputs. None of those were machine-verifiable end-to-end.
8900
+ * `criteriaCid` column nobody resolved, free-form prose on
8901
+ * `fulfill_brief.input`, and inline `rubric` / `criteria[]` fields on
8902
+ * judgment-task inputs. None of those were machine-verifiable
8903
+ * end-to-end.
8848
8904
  *
8849
8905
  * This module defines a single, content-addressable envelope an imposer
8850
8906
  * attaches to any task type. It has four orthogonal sections — pick
@@ -9140,7 +9196,6 @@ var FULFILL_BRIEF_TYPE = "fulfill_brief";
9140
9196
  var FulfillBriefInput = Type$1.Object({
9141
9197
  brief: Type$1.String({ minLength: 1 }),
9142
9198
  title: Type$1.Optional(Type$1.String()),
9143
- acceptanceCriteria: Type$1.Optional(Type$1.Array(Type$1.String())),
9144
9199
  successCriteria: Type$1.Optional(SuccessCriteria),
9145
9200
  seedFiles: Type$1.Optional(Type$1.Array(Type$1.String())),
9146
9201
  scopeHint: Type$1.Optional(Type$1.String())
@@ -10086,133 +10141,6 @@ Type$1.Object({
10086
10141
  additionalProperties: false
10087
10142
  });
10088
10143
  //#endregion
10089
- //#region ../agent-runtime/src/subagent-output-contracts.ts
10090
- var REGISTRY = /* @__PURE__ */ new Map();
10091
- /**
10092
- * Register a subagent output contract. Idempotent: re-registering the
10093
- * same name with a different schema throws — contracts are meant to
10094
- * be stable. Re-registering with the identical contract object (same
10095
- * reference) is a no-op for HMR and test convenience.
10096
- *
10097
- * Typically called at module-init time alongside task-type
10098
- * registration. See task-types/index.ts in @moltnet/tasks for the
10099
- * conventional pattern.
10100
- */
10101
- function registerSubagentOutputContract(contract) {
10102
- if (!contract.name || contract.name.trim().length === 0) throw new Error("subagent output contract name is required");
10103
- if (!/^[a-z][a-z0-9_]*$/.test(contract.name)) throw new Error(`subagent output contract name '${contract.name}' must be lower_snake_case (starts with a letter, then [a-z0-9_]+)`);
10104
- const existing = REGISTRY.get(contract.name);
10105
- if (existing && existing !== contract) {
10106
- if (existing.parametersSchema !== contract.parametersSchema) throw new Error(`subagent output contract '${contract.name}' is already registered with a different schema; refusing to override`);
10107
- }
10108
- REGISTRY.set(contract.name, contract);
10109
- }
10110
- /**
10111
- * Resolve a subagent output contract by name. Returns `null` for
10112
- * unknown names — callers (the subagent custom tool) decide whether
10113
- * that's a tool error the parent LLM can recover from or a hard fail.
10114
- */
10115
- function getSubagentOutputContract(name) {
10116
- return REGISTRY.get(name) ?? null;
10117
- }
10118
- /**
10119
- * List all registered contracts. Useful for diagnostics and for the
10120
- * subagent tool's parameter description so a parent LLM can see what
10121
- * contracts are available without enumerating them in its prompt.
10122
- */
10123
- function listSubagentOutputContracts() {
10124
- return [...REGISTRY.values()];
10125
- }
10126
- //#endregion
10127
- //#region ../agent-runtime/src/built-in-contract-registrations.ts
10128
- /**
10129
- * Built-in subagent output contracts (#1087, #943).
10130
- *
10131
- * Why this is an exported function and not a module-init side
10132
- * effect:
10133
- *
10134
- * - The registry is process-global. Module-init registration
10135
- * fires exactly once per Node process (ESM modules are cached
10136
- * by URL). Tests that call `__resetSubagentOutputContractsForTests()`
10137
- * to start from an empty registry have no way to repopulate
10138
- * the built-ins without re-evaluating the module — which the
10139
- * cache prevents. PR #1101 review M4.
10140
- * - An explicit `registerBuiltInSubagentContracts()` lets the
10141
- * package index call it once at module load AND lets test
10142
- * setup hooks call it again after `__reset...`.
10143
- * - `registerSubagentOutputContract` is itself idempotent for
10144
- * identical re-registrations, so calling this function twice
10145
- * in the same process is safe.
10146
- *
10147
- * Adding a new built-in: extend the body of this function. Do not
10148
- * call `registerSubagentOutputContract` from anywhere else in the
10149
- * package — keeping all built-ins in one function makes the set
10150
- * auditable.
10151
- */
10152
- function registerBuiltInSubagentContracts() {
10153
- registerSubagentOutputContract({
10154
- name: "judge_eval_variant_result",
10155
- description: "Per-variant grading result produced by a subagent of judge_eval_variant: scores against the shared rubric, composite, and a 1-3 sentence verdict for a single variant.",
10156
- parametersSchema: JudgeEvalVariantResult
10157
- });
10158
- }
10159
- registerBuiltInSubagentContracts();
10160
- //#endregion
10161
- //#region ../agent-runtime/src/context-bindings.ts
10162
- var PROMPT_SEPARATOR = "\n\n---\n\n";
10163
- /**
10164
- * Resolve `task.input.context[]` into delivered side-effects (skills
10165
- * persisted via `deliver.skill`) and prompt fragments
10166
- * (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
10167
- * built prompt.
10168
- *
10169
- * Per-binding semantics (V1):
10170
- * - `skill` → `deliver.skill({ slug, content })` once per ref.
10171
- * Slug collisions on distinct contents are
10172
- * refused loudly.
10173
- * - `prompt_prefix` → content appended to `systemPromptPrefix` with
10174
- * the canonical `\n\n---\n\n` separator (in
10175
- * declared order).
10176
- * - `user_inline` → content appended to `userInlineSuffix` in
10177
- * declared order, same separator.
10178
- *
10179
- * No fetching, no hashing — bytes are inlined in `ContextRef.content`,
10180
- * and the task's `inputCid` already pins the entire input. The imposer
10181
- * chose these bytes; the resolver just dispatches them.
10182
- *
10183
- * The function is pure with respect to its arguments: file writes are
10184
- * confined to the injected `deliver` callback, which makes the
10185
- * resolver trivial to test.
10186
- */
10187
- async function resolveTaskContext(args) {
10188
- const promptParts = [];
10189
- const userParts = [];
10190
- const injected = [];
10191
- const usedSlugs = /* @__PURE__ */ new Map();
10192
- for (const ref of args.context) {
10193
- if (ref.binding === "skill") {
10194
- const prior = usedSlugs.get(ref.slug);
10195
- if (prior !== void 0) {
10196
- if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
10197
- injected.push(ref);
10198
- continue;
10199
- }
10200
- usedSlugs.set(ref.slug, ref.content);
10201
- await args.deliver.skill({
10202
- slug: ref.slug,
10203
- content: ref.content
10204
- });
10205
- } else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
10206
- else userParts.push(ref.content);
10207
- injected.push(ref);
10208
- }
10209
- return {
10210
- injected,
10211
- systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
10212
- userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
10213
- };
10214
- }
10215
- //#endregion
10216
10144
  //#region ../agent-runtime/src/output-tools.ts
10217
10145
  /**
10218
10146
  * Submit-output tool contract.
@@ -10617,13 +10545,7 @@ function buildCuratePackUserPrompt(input, ctx) {
10617
10545
  * is told to inspect them itself.
10618
10546
  */
10619
10547
  function buildFulfillBriefUserPrompt(input, ctx) {
10620
- const { brief, title, acceptanceCriteria, seedFiles, scopeHint } = input;
10621
- const criteriaSection = acceptanceCriteria?.length ? [
10622
- "### Acceptance criteria",
10623
- "",
10624
- ...acceptanceCriteria.map((c) => `- ${c}`),
10625
- ""
10626
- ].join("\n") : "";
10548
+ const { brief, title, seedFiles, scopeHint } = input;
10627
10549
  const seedSection = seedFiles?.length ? [
10628
10550
  "### Seed files",
10629
10551
  "",
@@ -10671,7 +10593,6 @@ function buildFulfillBriefUserPrompt(input, ctx) {
10671
10593
  "",
10672
10594
  brief,
10673
10595
  "",
10674
- criteriaSection,
10675
10596
  seedSection,
10676
10597
  correlationSection,
10677
10598
  workspaceSection,
@@ -14838,6 +14759,7 @@ var DEFAULT_SUBAGENT_TIMEOUT_MS = 300 * 1e3;
14838
14759
  */
14839
14760
  function createSubagentTool(args) {
14840
14761
  const buildSession = args.buildAgentSession ?? buildAgentSession;
14762
+ const { contractRegistry } = args;
14841
14763
  let callCount = 0;
14842
14764
  return {
14843
14765
  tool: defineTool({
@@ -14848,8 +14770,8 @@ function createSubagentTool(args) {
14848
14770
  async execute(_id, params) {
14849
14771
  if (!Value.Check(SubagentToolParameters, params)) return toolError(`subagent: invalid parameters: ${JSON.stringify([...Value.Errors(SubagentToolParameters, params)].slice(0, 3))}`);
14850
14772
  const { task, output_schema } = params;
14851
- const contract = getSubagentOutputContract(output_schema);
14852
- if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${listSubagentOutputContracts().map((c) => c.name).join(", ")}]`);
14773
+ const contract = contractRegistry.get(output_schema);
14774
+ if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${contractRegistry.list().map((c) => c.name).join(", ")}]`);
14853
14775
  callCount += 1;
14854
14776
  const callIndex = callCount;
14855
14777
  let captured = null;
@@ -14879,6 +14801,7 @@ function createSubagentTool(args) {
14879
14801
  });
14880
14802
  const session = await buildSession({
14881
14803
  mountPath: args.mountPath,
14804
+ cwdPath: args.cwdPath ?? args.mountPath,
14882
14805
  piAuthDir: args.piAuthDir,
14883
14806
  modelHandle: args.modelHandle,
14884
14807
  agentName: args.agentName,
@@ -15219,6 +15142,7 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
15219
15142
  const branch = executionPlan?.worktreeBranch ?? null;
15220
15143
  if (!branch) return {
15221
15144
  mountPath: requestedMountPath,
15145
+ cwdPath: requestedMountPath,
15222
15146
  mode: "shared_mount",
15223
15147
  branch: null,
15224
15148
  cleanup: () => {}
@@ -15226,7 +15150,7 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
15226
15150
  const mainRepo = findMainWorktree();
15227
15151
  const worktreeDir = resolveTaskWorktreePath(mainRepo, executionPlan?.workspaceId ?? `task-${task.id}`);
15228
15152
  const relMount = relative(mainRepo, requestedMountPath);
15229
- const mountPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
15153
+ const cwdPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
15230
15154
  const keepWorkspace = executionPlan?.workspaceScope === "session" && executionPlan.sessionKey !== null;
15231
15155
  if (keepWorkspace) ensureReusableTaskWorktree(mainRepo, worktreeDir, branch);
15232
15156
  else {
@@ -15234,7 +15158,8 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
15234
15158
  addTaskWorktree(mainRepo, worktreeDir, branch);
15235
15159
  }
15236
15160
  return {
15237
- mountPath,
15161
+ mountPath: mainRepo,
15162
+ cwdPath,
15238
15163
  mode: "dedicated_worktree",
15239
15164
  branch,
15240
15165
  cleanup: keepWorkspace ? () => {} : () => {
@@ -15347,15 +15272,24 @@ var noopTurnEventHandler = () => {};
15347
15272
  function createPiTaskExecutor(opts) {
15348
15273
  let cachedCheckpoint = opts.checkpointPath ?? null;
15349
15274
  return async (claimedTask, reporter) => {
15350
- if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
15351
- config: opts.sandboxConfig?.snapshot,
15352
- onProgress: opts.onSnapshotProgress ?? ((m) => {
15353
- process.stderr.write(`[snapshot] ${m}\n`);
15354
- })
15275
+ const reporterWasOpened = !reporter.cancelSignal.aborted;
15276
+ if (reporterWasOpened) await reporter.open({
15277
+ taskId: claimedTask.task.id,
15278
+ attemptN: claimedTask.attemptN
15355
15279
  });
15356
15280
  return executePiTask(claimedTask, reporter, {
15357
15281
  ...opts,
15358
- checkpointPath: cachedCheckpoint
15282
+ checkpointPath: cachedCheckpoint ?? void 0,
15283
+ resolveCheckpointPath: async () => {
15284
+ if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
15285
+ config: opts.sandboxConfig?.snapshot,
15286
+ onProgress: opts.onSnapshotProgress ?? ((m) => {
15287
+ process.stderr.write(`[snapshot] ${m}\n`);
15288
+ })
15289
+ });
15290
+ return cachedCheckpoint;
15291
+ },
15292
+ reporterAlreadyOpened: reporterWasOpened
15359
15293
  });
15360
15294
  };
15361
15295
  }
@@ -15371,8 +15305,9 @@ async function executePiTask(claimedTask, reporter, opts) {
15371
15305
  const startTime = Date.now();
15372
15306
  const requestedMountPath = opts.mountPath ?? process.cwd();
15373
15307
  const executionPlan = opts.makeExecutionPlan?.(claimedTask) ?? null;
15374
- const workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
15375
- const mountPath = workspace.mountPath;
15308
+ let workspace = null;
15309
+ let mountPath = requestedMountPath;
15310
+ let cwdPath = requestedMountPath;
15376
15311
  if (reporter.cancelSignal.aborted) return {
15377
15312
  taskId: task.id,
15378
15313
  attemptN,
@@ -15387,33 +15322,8 @@ async function executePiTask(claimedTask, reporter, opts) {
15387
15322
  retryable: false
15388
15323
  }
15389
15324
  };
15390
- const checkpointPath = opts.checkpointPath ?? await ensureSnapshot({
15391
- config: opts.sandboxConfig?.snapshot,
15392
- onProgress: opts.onSnapshotProgress ?? ((m) => {
15393
- process.stderr.write(`[snapshot] ${m}\n`);
15394
- })
15395
- });
15396
- const mainRepoForRepair = findMainWorktree();
15397
- try {
15398
- execFileSync("git", [
15399
- "-C",
15400
- mainRepoForRepair,
15401
- "worktree",
15402
- "repair",
15403
- "--relative-paths"
15404
- ], { stdio: "pipe" });
15405
- } catch {}
15325
+ let reporterOpen = opts.reporterAlreadyOpened ?? false;
15406
15326
  let managed = null;
15407
- managed = await resumeVm({
15408
- checkpointPath,
15409
- agentName: opts.agentName,
15410
- mountPath,
15411
- extraAllowedHosts: opts.extraAllowedHosts,
15412
- sandboxConfig: opts.sandboxConfig
15413
- });
15414
- const diaryId = task.diaryId ?? "";
15415
- const taskTeamId = task.teamId ?? "";
15416
- let reporterOpen = false;
15417
15327
  let session = null;
15418
15328
  let subagentHandle = null;
15419
15329
  const finalUsage = emptyUsage(opts.provider, opts.model);
@@ -15432,41 +15342,103 @@ async function executePiTask(claimedTask, reporter, opts) {
15432
15342
  retryable: false
15433
15343
  }
15434
15344
  });
15345
+ let onTurnEvent;
15346
+ if (opts.makeOnTurnEvent) try {
15347
+ onTurnEvent = opts.makeOnTurnEvent(claimedTask);
15348
+ } catch (err) {
15349
+ process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
15350
+ onTurnEvent = noopTurnEventHandler;
15351
+ }
15352
+ else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
15353
+ const emit = (kind, payload) => {
15354
+ try {
15355
+ onTurnEvent(kind, summarizePayloadForLog(kind, payload));
15356
+ } catch (err) {
15357
+ process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
15358
+ }
15359
+ return reporter.record({
15360
+ kind,
15361
+ payload
15362
+ });
15363
+ };
15364
+ const emitError = async (phase, message, extra = {}) => {
15365
+ await emit("error", {
15366
+ phase,
15367
+ message,
15368
+ ...extra
15369
+ });
15370
+ };
15435
15371
  try {
15436
- const mainRepo = findMainWorktree();
15437
- activateAgentEnv(managed.credentials.agentEnv, mainRepo);
15438
- await reporter.open({
15372
+ if (!opts.reporterAlreadyOpened) await reporter.open({
15439
15373
  taskId: task.id,
15440
15374
  attemptN
15441
15375
  });
15442
15376
  reporterOpen = true;
15443
- let onTurnEvent;
15444
- if (opts.makeOnTurnEvent) try {
15445
- onTurnEvent = opts.makeOnTurnEvent(claimedTask);
15377
+ let checkpointPath;
15378
+ try {
15379
+ checkpointPath = opts.checkpointPath ?? (opts.resolveCheckpointPath ? await opts.resolveCheckpointPath() : await ensureSnapshot({
15380
+ config: opts.sandboxConfig?.snapshot,
15381
+ onProgress: opts.onSnapshotProgress ?? ((m) => {
15382
+ process.stderr.write(`[snapshot] ${m}\n`);
15383
+ })
15384
+ }));
15385
+ } catch (err) {
15386
+ const message = err instanceof Error ? err.message : String(err);
15387
+ await emitError("snapshot", message);
15388
+ return makeFailedOutput("snapshot_failed", message);
15389
+ }
15390
+ try {
15391
+ workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
15392
+ mountPath = workspace.mountPath;
15393
+ cwdPath = workspace.cwdPath;
15446
15394
  } catch (err) {
15447
- process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
15448
- onTurnEvent = noopTurnEventHandler;
15395
+ const message = err instanceof Error ? err.message : String(err);
15396
+ await emitError("worktree_setup", message);
15397
+ return makeFailedOutput("worktree_setup_failed", message);
15449
15398
  }
15450
- else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
15451
- const emit = (kind, payload) => {
15399
+ try {
15400
+ const mainRepoForRepair = findMainWorktree();
15452
15401
  try {
15453
- onTurnEvent(kind, summarizePayloadForLog(kind, payload));
15454
- } catch (err) {
15455
- process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
15456
- }
15457
- return reporter.record({
15458
- kind,
15459
- payload
15402
+ execFileSync("git", [
15403
+ "-C",
15404
+ mainRepoForRepair,
15405
+ "worktree",
15406
+ "repair",
15407
+ "--relative-paths"
15408
+ ], { stdio: "pipe" });
15409
+ } catch {}
15410
+ } catch (err) {
15411
+ const message = err instanceof Error ? err.message : String(err);
15412
+ await emitError("worktree_setup", message);
15413
+ return makeFailedOutput("worktree_setup_failed", message);
15414
+ }
15415
+ try {
15416
+ managed = await resumeVm({
15417
+ checkpointPath,
15418
+ agentName: opts.agentName,
15419
+ mountPath,
15420
+ extraAllowedHosts: opts.extraAllowedHosts,
15421
+ sandboxConfig: opts.sandboxConfig
15460
15422
  });
15461
- };
15423
+ } catch (err) {
15424
+ const message = err instanceof Error ? err.message : String(err);
15425
+ await emitError("vm_resume", message);
15426
+ return makeFailedOutput("vm_resume_failed", message);
15427
+ }
15428
+ const diaryId = task.diaryId ?? "";
15429
+ const taskTeamId = task.teamId ?? "";
15430
+ const mainRepo = findMainWorktree();
15431
+ activateAgentEnv(managed.credentials.agentEnv, mainRepo);
15432
+ const activeWorkspace = workspace;
15433
+ if (!activeWorkspace) throw new Error("task workspace not prepared");
15462
15434
  await emit("info", {
15463
15435
  event: "execute_start",
15464
15436
  taskType: task.taskType,
15465
15437
  teamId: task.teamId,
15466
15438
  provider: opts.provider,
15467
15439
  model: opts.model,
15468
- workspaceMode: workspace.mode,
15469
- workspaceBranch: workspace.branch
15440
+ workspaceMode: activeWorkspace.mode,
15441
+ workspaceBranch: activeWorkspace.branch
15470
15442
  });
15471
15443
  let taskPrompt;
15472
15444
  try {
@@ -15474,8 +15446,8 @@ async function executePiTask(claimedTask, reporter, opts) {
15474
15446
  diaryId,
15475
15447
  taskId: task.id,
15476
15448
  workspace: {
15477
- mode: workspace.mode,
15478
- branch: workspace.branch
15449
+ mode: activeWorkspace.mode,
15450
+ branch: activeWorkspace.branch
15479
15451
  },
15480
15452
  extras: opts.promptExtras
15481
15453
  });
@@ -15527,7 +15499,7 @@ async function executePiTask(claimedTask, reporter, opts) {
15527
15499
  getTeamId: () => taskTeamId,
15528
15500
  getSessionErrors: () => [],
15529
15501
  clearSessionErrors: () => {},
15530
- getHostCwd: () => mountPath,
15502
+ getHostCwd: () => cwdPath,
15531
15503
  hostExecBaseEnv: new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]),
15532
15504
  hostExecAutoApprove: opts.hostExecAutoApprove ?? opts.sandboxConfig?.hostExec?.autoApprove ?? false,
15533
15505
  getTaskContext: () => ({
@@ -15555,6 +15527,7 @@ async function executePiTask(claimedTask, reporter, opts) {
15555
15527
  if (taskTypeUsesSubagents(task.taskType)) {
15556
15528
  subagentHandle = createSubagentTool({
15557
15529
  mountPath,
15530
+ cwdPath,
15558
15531
  piAuthDir,
15559
15532
  modelHandle,
15560
15533
  agentName: opts.agentName,
@@ -15563,12 +15536,14 @@ async function executePiTask(claimedTask, reporter, opts) {
15563
15536
  parentTaskId: task.id,
15564
15537
  parentTaskType: task.taskType,
15565
15538
  parentAttemptN: attemptN,
15539
+ contractRegistry: opts.subagentContractRegistry,
15566
15540
  parentCancelSignal: reporter.cancelSignal
15567
15541
  });
15568
15542
  parentSubagentTools.push(subagentHandle.tool);
15569
15543
  }
15570
15544
  session = await buildAgentSession({
15571
15545
  mountPath,
15546
+ cwdPath,
15572
15547
  piAuthDir,
15573
15548
  modelHandle,
15574
15549
  agentName: opts.agentName,
@@ -15653,6 +15628,10 @@ async function executePiTask(claimedTask, reporter, opts) {
15653
15628
  is_error: event.isError,
15654
15629
  result: event.isError ? truncateForWire(event.result) : void 0
15655
15630
  }));
15631
+ if (event.isError) track(emitError("tool_call_error", describeToolErrorMessage(event.result), {
15632
+ tool: event.toolName,
15633
+ result: truncateForWire(event.result)
15634
+ }));
15656
15635
  if (maxBashTimeouts > 0 && event.toolName === "bash" && event.isError && isBashTimeoutResult(event.result)) {
15657
15636
  bashTimeoutCount += 1;
15658
15637
  if (bashTimeoutCount >= maxBashTimeouts) triggerCapAbort("max_bash_timeouts_exceeded", `Aborted after ${bashTimeoutCount} bash timeouts in this attempt (cap ${maxBashTimeouts}).`);
@@ -15808,7 +15787,7 @@ async function executePiTask(claimedTask, reporter, opts) {
15808
15787
  }
15809
15788
  }
15810
15789
  if (managed) await managed.vm.close();
15811
- try {
15790
+ if (workspace) try {
15812
15791
  workspace.cleanup();
15813
15792
  } catch (err) {
15814
15793
  const detail = err instanceof Error ? err.message : String(err);
@@ -15919,6 +15898,23 @@ function truncateForWire(value) {
15919
15898
  };
15920
15899
  }
15921
15900
  }
15901
+ function describeToolErrorMessage(result) {
15902
+ if (typeof result === "string" && result.trim().length > 0) return result.trim();
15903
+ if (result && typeof result === "object") {
15904
+ const content = result.content;
15905
+ if (Array.isArray(content)) {
15906
+ for (const item of content) if (item && typeof item === "object" && typeof item.text === "string") {
15907
+ const text = item.text.trim();
15908
+ if (text.length > 0) return text;
15909
+ }
15910
+ }
15911
+ }
15912
+ try {
15913
+ return JSON.stringify(truncateForWire(result));
15914
+ } catch {
15915
+ return "Tool call failed";
15916
+ }
15917
+ }
15922
15918
  //#endregion
15923
15919
  //#region src/index.ts
15924
15920
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@themoltnet/pi-extension",
3
- "version": "0.17.0",
3
+ "version": "0.18.0",
4
4
  "type": "module",
5
5
  "description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
6
6
  "license": "MIT",
@@ -32,7 +32,7 @@
32
32
  "@opentelemetry/api": "^1.9.0",
33
33
  "@sinclair/typebox": "^0.34.0",
34
34
  "@themoltnet/sdk": "0.102.0",
35
- "@themoltnet/agent-runtime": "0.15.1"
35
+ "@themoltnet/agent-runtime": "0.15.2"
36
36
  },
37
37
  "peerDependencies": {
38
38
  "@earendil-works/pi-coding-agent": ">=0.74.0",