@themoltnet/pi-extension 0.17.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -19,6 +19,7 @@ import { TObject } from '@sinclair/typebox';
19
19
  import { ToolDefinition } from '@earendil-works/pi-coding-agent';
20
20
  import { TOptional } from '@sinclair/typebox';
21
21
  import { TRecord } from '@sinclair/typebox';
22
+ import { TSchema } from '@sinclair/typebox';
22
23
  import { TString } from '@sinclair/typebox';
23
24
  import { TUnion } from '@sinclair/typebox';
24
25
  import { TUnknown } from '@sinclair/typebox';
@@ -43,6 +44,8 @@ export declare function buildAgentSession(args: BuildAgentSessionArgs): Promise<
43
44
  declare interface BuildAgentSessionArgs {
44
45
  /** Host directory mounted at /workspace inside the VM. */
45
46
  mountPath: string;
47
+ /** Host working directory where the agent session should start. */
48
+ cwdPath: string;
46
49
  /** pi auth directory (resolved from `PI_CODING_AGENT_DIR` or `~/.pi/agent`). */
47
50
  piAuthDir: string;
48
51
  /** Resolved pi model handle (provider + model id). */
@@ -133,6 +136,8 @@ export declare function createSubagentTool(args: CreateSubagentToolArgs): Subage
133
136
  export declare interface CreateSubagentToolArgs {
134
137
  /** Host directory mounted at /workspace inside the VM. */
135
138
  mountPath: string;
139
+ /** Host working directory the subagent should start in. Defaults to mountPath. */
140
+ cwdPath?: string;
136
141
  /** pi auth directory the parent resolved. */
137
142
  piAuthDir: string;
138
143
  /** Resolved pi model handle — subagents share it. */
@@ -188,6 +193,16 @@ export declare interface CreateSubagentToolArgs {
188
193
  * exercise the tool's logic without booting a VM.
189
194
  */
190
195
  buildAgentSession?: (args: BuildAgentSessionArgs) => Promise<AgentSession>;
196
+ /**
197
+ * Contract registry for resolving output_schema names to TypeBox
198
+ * schemas at call time. The subagent tool reads ONLY via `.get()`
199
+ * and `.list()` — the registry is immutable after construction.
200
+ *
201
+ * Production callers (executePiTask) create the registry with
202
+ * built-in contracts at session-setup; tests inject a registry
203
+ * with whatever stubs they need.
204
+ */
205
+ contractRegistry: SubagentContractRegistry;
191
206
  }
192
207
 
193
208
  /**
@@ -240,6 +255,17 @@ export declare interface ExecutePiTaskOptions {
240
255
  * across tasks.
241
256
  */
242
257
  checkpointPath?: string;
258
+ /**
259
+ * Lazy checkpoint resolver used by `createPiTaskExecutor` so snapshot
260
+ * creation can happen after the reporter has been opened and can surface
261
+ * setup failures as task messages.
262
+ */
263
+ resolveCheckpointPath?: () => Promise<string>;
264
+ /**
265
+ * Set when the caller already opened the reporter before handing control
266
+ * to `executePiTask`.
267
+ */
268
+ reporterAlreadyOpened?: boolean;
243
269
  /**
244
270
  * Optional callback invoked alongside every `reporter.record()` so
245
271
  * the daemon can mirror task messages into its local logger.
@@ -292,6 +318,13 @@ export declare interface ExecutePiTaskOptions {
292
318
  * file-backed Pi sessions for selected task classes.
293
319
  */
294
320
  makeExecutionPlan?: PiTaskExecutionPlanFactory;
321
+ /**
322
+ * Immutable subagent contract registry used to resolve `output_schema`
323
+ * names at subagent tool call time. Constructed by the daemon (or
324
+ * tests) from static built-in schemas — `execute-pi-task` never hardcodes
325
+ * contracts. See #1106.
326
+ */
327
+ subagentContractRegistry?: SubagentContractRegistry;
295
328
  }
296
329
 
297
330
  /**
@@ -527,6 +560,29 @@ export declare interface SandboxConfig {
527
560
  /** Extract snapshot-specific config for backwards compat with ensureSnapshot. */
528
561
  export declare type SnapshotConfig = NonNullable<SandboxConfig['snapshot']>;
529
562
 
563
+ declare interface SubagentContractRegistry {
564
+ /** Resolve a contract by name. Returns `null` for unknown names. */
565
+ get(name: string): SubagentOutputContract | null;
566
+ /** List all registered contracts. */
567
+ list(): SubagentOutputContract[];
568
+ }
569
+
570
+ declare interface SubagentOutputContract {
571
+ /** Stable identifier the parent uses to reference this contract.
572
+ * Lower-snake-case by convention (e.g. `judge_eval_variant_result`). */
573
+ readonly name: string;
574
+ /** Human-readable description shown in the subagent tool's help text
575
+ * and in the inner session's submit-tool description. Useful when a
576
+ * parent LLM has multiple contracts to choose from. */
577
+ readonly description: string;
578
+ /**
579
+ * TypeBox schema the subagent's submit-tool args MUST validate
580
+ * against. The args ARE the output payload (no `{ output: ... }`
581
+ * wrapping), so the LLM gets field-level guidance directly.
582
+ */
583
+ readonly parametersSchema: TSchema;
584
+ }
585
+
530
586
  export declare interface SubagentToolHandle {
531
587
  /** ToolDefinition to register via `customTools` on the parent session. */
532
588
  readonly tool: ToolDefinition;
@@ -769,9 +825,10 @@ export declare interface VmCredentials {
769
825
  agentEnvRaw: string;
770
826
  /**
771
827
  * Pi OAuth/API-key auth blob. Null when neither `~/.pi/agent/auth.json`
772
- * (or its `PI_AUTH_PATH` override) is present — in that case the daemon
773
- * relies on Pi's env-var providers (`ANTHROPIC_API_KEY`, etc.) carried
774
- * via `agentEnv` and the host environment instead. CI uses this path.
828
+ * (resolved via `PI_CODING_AGENT_DIR` when set) is present — in that
829
+ * case the daemon relies on Pi's env-var providers (`ANTHROPIC_API_KEY`,
830
+ * etc.) carried via `agentEnv` and the host environment instead. CI uses
831
+ * this path.
775
832
  */
776
833
  piAuthJson: string | null;
777
834
  agentEnv: Record<string, string | undefined>;
package/dist/index.js CHANGED
@@ -8133,7 +8133,8 @@ function findMainWorktree() {
8133
8133
  function loadCredentials(agentDir) {
8134
8134
  const moltnetJson = readFileSync(path.join(agentDir, "moltnet.json"), "utf8");
8135
8135
  const agentEnvRaw = readFileSync(path.join(agentDir, "env"), "utf8");
8136
- const piAuthPath = process.env.PI_AUTH_PATH ?? path.join(process.env.HOME ?? "", ".pi", "agent", "auth.json");
8136
+ const piAgentDir = process.env.PI_CODING_AGENT_DIR ?? path.join(process.env.HOME ?? "", ".pi", "agent");
8137
+ const piAuthPath = path.join(piAgentDir, "auth.json");
8137
8138
  const piAuthJson = existsSync(piAuthPath) ? readFileSync(piAuthPath, "utf8") : null;
8138
8139
  const gitconfigPath = path.join(agentDir, "gitconfig");
8139
8140
  const gitconfig = existsSync(gitconfigPath) ? readFileSync(gitconfigPath, "utf8") : null;
@@ -8645,7 +8646,7 @@ async function buildAgentSession(args) {
8645
8646
  spanAttributes: args.otelSpanAttrs
8646
8647
  });
8647
8648
  const resourceLoader = new DefaultResourceLoader({
8648
- cwd: args.mountPath,
8649
+ cwd: args.cwdPath,
8649
8650
  agentDir: args.piAuthDir,
8650
8651
  extensionFactories: [piOtelExtension],
8651
8652
  appendSystemPrompt: args.appendSystemPrompt,
@@ -8653,12 +8654,12 @@ async function buildAgentSession(args) {
8653
8654
  });
8654
8655
  await resourceLoader.reload();
8655
8656
  const sessionManager = args.sessionPersistence ? await resolvePersistentSessionManager({
8656
- cwd: args.mountPath,
8657
+ cwd: args.cwdPath,
8657
8658
  sessionDir: args.sessionPersistence.sessionDir
8658
- }) : SessionManager.inMemory(args.mountPath);
8659
+ }) : SessionManager.inMemory(args.cwdPath);
8659
8660
  return (await createAgentSession({
8660
8661
  agentDir: args.piAuthDir,
8661
- cwd: args.mountPath,
8662
+ cwd: args.cwdPath,
8662
8663
  model: args.modelHandle,
8663
8664
  customTools: args.customTools,
8664
8665
  sessionManager,
@@ -8670,6 +8671,61 @@ async function resolvePersistentSessionManager(args) {
8670
8671
  return SessionManager.continueRecent(args.cwd, args.sessionDir);
8671
8672
  }
8672
8673
  //#endregion
8674
+ //#region ../agent-runtime/src/context-bindings.ts
8675
+ var PROMPT_SEPARATOR = "\n\n---\n\n";
8676
+ /**
8677
+ * Resolve `task.input.context[]` into delivered side-effects (skills
8678
+ * persisted via `deliver.skill`) and prompt fragments
8679
+ * (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
8680
+ * built prompt.
8681
+ *
8682
+ * Per-binding semantics (V1):
8683
+ * - `skill` → `deliver.skill({ slug, content })` once per ref.
8684
+ * Slug collisions on distinct contents are
8685
+ * refused loudly.
8686
+ * - `prompt_prefix` → content appended to `systemPromptPrefix` with
8687
+ * the canonical `\n\n---\n\n` separator (in
8688
+ * declared order).
8689
+ * - `user_inline` → content appended to `userInlineSuffix` in
8690
+ * declared order, same separator.
8691
+ *
8692
+ * No fetching, no hashing — bytes are inlined in `ContextRef.content`,
8693
+ * and the task's `inputCid` already pins the entire input. The imposer
8694
+ * chose these bytes; the resolver just dispatches them.
8695
+ *
8696
+ * The function is pure with respect to its arguments: file writes are
8697
+ * confined to the injected `deliver` callback, which makes the
8698
+ * resolver trivial to test.
8699
+ */
8700
+ async function resolveTaskContext(args) {
8701
+ const promptParts = [];
8702
+ const userParts = [];
8703
+ const injected = [];
8704
+ const usedSlugs = /* @__PURE__ */ new Map();
8705
+ for (const ref of args.context) {
8706
+ if (ref.binding === "skill") {
8707
+ const prior = usedSlugs.get(ref.slug);
8708
+ if (prior !== void 0) {
8709
+ if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
8710
+ injected.push(ref);
8711
+ continue;
8712
+ }
8713
+ usedSlugs.set(ref.slug, ref.content);
8714
+ await args.deliver.skill({
8715
+ slug: ref.slug,
8716
+ content: ref.content
8717
+ });
8718
+ } else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
8719
+ else userParts.push(ref.content);
8720
+ injected.push(ref);
8721
+ }
8722
+ return {
8723
+ injected,
8724
+ systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
8725
+ userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
8726
+ };
8727
+ }
8728
+ //#endregion
8673
8729
  //#region ../tasks/src/formats.ts
8674
8730
  /**
8675
8731
  * Register TypeBox string formats used across Task / TaskOutput / task-type
@@ -8841,10 +8897,10 @@ function validateRubricWeights(rubric) {
8841
8897
  * complementary places.
8842
8898
  *
8843
8899
  * Before this envelope existed, criteria were scattered: a vestigial
8844
- * `criteriaCid` column nobody resolved, an `acceptanceCriteria: string[]`
8845
- * field on `fulfill_brief.input` that was "interpreted by the claiming
8846
- * agent," and inline `rubric` / `criteria[]` fields on judgment-task
8847
- * inputs. None of those were machine-verifiable end-to-end.
8900
+ * `criteriaCid` column nobody resolved, free-form prose on
8901
+ * `fulfill_brief.input`, and inline `rubric` / `criteria[]` fields on
8902
+ * judgment-task inputs. None of those were machine-verifiable
8903
+ * end-to-end.
8848
8904
  *
8849
8905
  * This module defines a single, content-addressable envelope an imposer
8850
8906
  * attaches to any task type. It has four orthogonal sections — pick
@@ -9140,7 +9196,6 @@ var FULFILL_BRIEF_TYPE = "fulfill_brief";
9140
9196
  var FulfillBriefInput = Type$1.Object({
9141
9197
  brief: Type$1.String({ minLength: 1 }),
9142
9198
  title: Type$1.Optional(Type$1.String()),
9143
- acceptanceCriteria: Type$1.Optional(Type$1.Array(Type$1.String())),
9144
9199
  successCriteria: Type$1.Optional(SuccessCriteria),
9145
9200
  seedFiles: Type$1.Optional(Type$1.Array(Type$1.String())),
9146
9201
  scopeHint: Type$1.Optional(Type$1.String())
@@ -9564,6 +9619,72 @@ async function onCreateJudgeEvalVariant(input, ctx) {
9564
9619
  }];
9565
9620
  }
9566
9621
  //#endregion
9622
+ //#region ../tasks/src/task-types/pr-review.ts
9623
+ var PR_REVIEW_TYPE = "pr_review";
9624
+ var PrReviewSubject = Type$1.Object({
9625
+ title: Type$1.String({ minLength: 1 }),
9626
+ summary: Type$1.String({ minLength: 1 }),
9627
+ resourceUrls: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 }))),
9628
+ inspectionHints: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
9629
+ }, {
9630
+ $id: "PrReviewSubject",
9631
+ additionalProperties: false
9632
+ });
9633
+ var PrReviewInput = Type$1.Object({
9634
+ subject: PrReviewSubject,
9635
+ taskPrompt: Type$1.Optional(Type$1.String({ minLength: 1 })),
9636
+ successCriteria: SuccessCriteria
9637
+ }, {
9638
+ $id: "PrReviewInput",
9639
+ additionalProperties: false
9640
+ });
9641
+ var PrReviewScore = Type$1.Object({
9642
+ criterionId: Type$1.String({ minLength: 1 }),
9643
+ score: Type$1.Union([Type$1.Literal(0), Type$1.Literal(1)]),
9644
+ rationale: Type$1.String({ minLength: 1 })
9645
+ }, {
9646
+ $id: "PrReviewScore",
9647
+ additionalProperties: false
9648
+ });
9649
+ var PrReviewOutput = Type$1.Object({
9650
+ scores: Type$1.Array(PrReviewScore, { minItems: 1 }),
9651
+ composite: Type$1.Number({
9652
+ minimum: 0,
9653
+ maximum: 1
9654
+ }),
9655
+ verdict: Type$1.String({ minLength: 1 })
9656
+ }, {
9657
+ $id: "PrReviewOutput",
9658
+ additionalProperties: false
9659
+ });
9660
+ function requireBooleanRubric(rubric) {
9661
+ for (const criterion of rubric.criteria) if (criterion.scoring !== "boolean") return `pr_review requires boolean scoring for every rubric criterion; criterion "${criterion.id}" uses "${criterion.scoring}"`;
9662
+ return null;
9663
+ }
9664
+ function validatePrReviewInput(input) {
9665
+ const sc = input.successCriteria;
9666
+ if (!sc) return "successCriteria is required for judgment tasks";
9667
+ if (!sc.rubric) return "successCriteria.rubric is required for judgment tasks";
9668
+ return validateRubricWeights(sc.rubric) ?? requireBooleanRubric(sc.rubric);
9669
+ }
9670
+ function validatePrReviewOutput(output, input) {
9671
+ if (!input) return null;
9672
+ const scores = output.scores;
9673
+ const rubric = input.successCriteria.rubric;
9674
+ if (!rubric) return null;
9675
+ if (scores.length !== rubric.criteria.length) return `scores length ${scores.length} does not match rubric criteria length ${rubric.criteria.length}`;
9676
+ let composite = 0;
9677
+ for (let i = 0; i < rubric.criteria.length; i++) {
9678
+ const criterion = rubric.criteria[i];
9679
+ const score = scores[i];
9680
+ if (score.criterionId !== criterion.id) return `scores[${i}] has criterionId "${score.criterionId}" but rubric expects "${criterion.id}" in that position`;
9681
+ composite += criterion.weight * score.score;
9682
+ }
9683
+ const claimed = output.composite;
9684
+ if (Math.abs(claimed - composite) > 1e-6) return `composite ${claimed} does not match weighted sum ${composite.toFixed(6)}`;
9685
+ return null;
9686
+ }
9687
+ //#endregion
9567
9688
  //#region ../tasks/src/task-types/render-pack.ts
9568
9689
  /**
9569
9690
  * `render_pack` — turn a context pack into a signed rendered artefact.
@@ -9731,6 +9852,18 @@ var BUILT_IN_TASK_TYPES = {
9731
9852
  validateInput: validateJudgmentInput,
9732
9853
  validateInputAsync: validateAssessBriefInputAsync
9733
9854
  },
9855
+ [PR_REVIEW_TYPE]: {
9856
+ name: PR_REVIEW_TYPE,
9857
+ inputSchema: PrReviewInput,
9858
+ outputSchema: PrReviewOutput,
9859
+ outputKind: "judgment",
9860
+ workspaceMode: "dedicated_worktree",
9861
+ workspaceScope: "attempt",
9862
+ sessionScope: "none",
9863
+ requiresReferences: false,
9864
+ validateInput: validatePrReviewInput,
9865
+ validateOutput: validatePrReviewOutput
9866
+ },
9734
9867
  [CURATE_PACK_TYPE]: {
9735
9868
  name: CURATE_PACK_TYPE,
9736
9869
  inputSchema: CuratePackInput,
@@ -10086,133 +10219,6 @@ Type$1.Object({
10086
10219
  additionalProperties: false
10087
10220
  });
10088
10221
  //#endregion
10089
- //#region ../agent-runtime/src/subagent-output-contracts.ts
10090
- var REGISTRY = /* @__PURE__ */ new Map();
10091
- /**
10092
- * Register a subagent output contract. Idempotent: re-registering the
10093
- * same name with a different schema throws — contracts are meant to
10094
- * be stable. Re-registering with the identical contract object (same
10095
- * reference) is a no-op for HMR and test convenience.
10096
- *
10097
- * Typically called at module-init time alongside task-type
10098
- * registration. See task-types/index.ts in @moltnet/tasks for the
10099
- * conventional pattern.
10100
- */
10101
- function registerSubagentOutputContract(contract) {
10102
- if (!contract.name || contract.name.trim().length === 0) throw new Error("subagent output contract name is required");
10103
- if (!/^[a-z][a-z0-9_]*$/.test(contract.name)) throw new Error(`subagent output contract name '${contract.name}' must be lower_snake_case (starts with a letter, then [a-z0-9_]+)`);
10104
- const existing = REGISTRY.get(contract.name);
10105
- if (existing && existing !== contract) {
10106
- if (existing.parametersSchema !== contract.parametersSchema) throw new Error(`subagent output contract '${contract.name}' is already registered with a different schema; refusing to override`);
10107
- }
10108
- REGISTRY.set(contract.name, contract);
10109
- }
10110
- /**
10111
- * Resolve a subagent output contract by name. Returns `null` for
10112
- * unknown names — callers (the subagent custom tool) decide whether
10113
- * that's a tool error the parent LLM can recover from or a hard fail.
10114
- */
10115
- function getSubagentOutputContract(name) {
10116
- return REGISTRY.get(name) ?? null;
10117
- }
10118
- /**
10119
- * List all registered contracts. Useful for diagnostics and for the
10120
- * subagent tool's parameter description so a parent LLM can see what
10121
- * contracts are available without enumerating them in its prompt.
10122
- */
10123
- function listSubagentOutputContracts() {
10124
- return [...REGISTRY.values()];
10125
- }
10126
- //#endregion
10127
- //#region ../agent-runtime/src/built-in-contract-registrations.ts
10128
- /**
10129
- * Built-in subagent output contracts (#1087, #943).
10130
- *
10131
- * Why this is an exported function and not a module-init side
10132
- * effect:
10133
- *
10134
- * - The registry is process-global. Module-init registration
10135
- * fires exactly once per Node process (ESM modules are cached
10136
- * by URL). Tests that call `__resetSubagentOutputContractsForTests()`
10137
- * to start from an empty registry have no way to repopulate
10138
- * the built-ins without re-evaluating the module — which the
10139
- * cache prevents. PR #1101 review M4.
10140
- * - An explicit `registerBuiltInSubagentContracts()` lets the
10141
- * package index call it once at module load AND lets test
10142
- * setup hooks call it again after `__reset...`.
10143
- * - `registerSubagentOutputContract` is itself idempotent for
10144
- * identical re-registrations, so calling this function twice
10145
- * in the same process is safe.
10146
- *
10147
- * Adding a new built-in: extend the body of this function. Do not
10148
- * call `registerSubagentOutputContract` from anywhere else in the
10149
- * package — keeping all built-ins in one function makes the set
10150
- * auditable.
10151
- */
10152
- function registerBuiltInSubagentContracts() {
10153
- registerSubagentOutputContract({
10154
- name: "judge_eval_variant_result",
10155
- description: "Per-variant grading result produced by a subagent of judge_eval_variant: scores against the shared rubric, composite, and a 1-3 sentence verdict for a single variant.",
10156
- parametersSchema: JudgeEvalVariantResult
10157
- });
10158
- }
10159
- registerBuiltInSubagentContracts();
10160
- //#endregion
10161
- //#region ../agent-runtime/src/context-bindings.ts
10162
- var PROMPT_SEPARATOR = "\n\n---\n\n";
10163
- /**
10164
- * Resolve `task.input.context[]` into delivered side-effects (skills
10165
- * persisted via `deliver.skill`) and prompt fragments
10166
- * (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
10167
- * built prompt.
10168
- *
10169
- * Per-binding semantics (V1):
10170
- * - `skill` → `deliver.skill({ slug, content })` once per ref.
10171
- * Slug collisions on distinct contents are
10172
- * refused loudly.
10173
- * - `prompt_prefix` → content appended to `systemPromptPrefix` with
10174
- * the canonical `\n\n---\n\n` separator (in
10175
- * declared order).
10176
- * - `user_inline` → content appended to `userInlineSuffix` in
10177
- * declared order, same separator.
10178
- *
10179
- * No fetching, no hashing — bytes are inlined in `ContextRef.content`,
10180
- * and the task's `inputCid` already pins the entire input. The imposer
10181
- * chose these bytes; the resolver just dispatches them.
10182
- *
10183
- * The function is pure with respect to its arguments: file writes are
10184
- * confined to the injected `deliver` callback, which makes the
10185
- * resolver trivial to test.
10186
- */
10187
- async function resolveTaskContext(args) {
10188
- const promptParts = [];
10189
- const userParts = [];
10190
- const injected = [];
10191
- const usedSlugs = /* @__PURE__ */ new Map();
10192
- for (const ref of args.context) {
10193
- if (ref.binding === "skill") {
10194
- const prior = usedSlugs.get(ref.slug);
10195
- if (prior !== void 0) {
10196
- if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
10197
- injected.push(ref);
10198
- continue;
10199
- }
10200
- usedSlugs.set(ref.slug, ref.content);
10201
- await args.deliver.skill({
10202
- slug: ref.slug,
10203
- content: ref.content
10204
- });
10205
- } else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
10206
- else userParts.push(ref.content);
10207
- injected.push(ref);
10208
- }
10209
- return {
10210
- injected,
10211
- systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
10212
- userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
10213
- };
10214
- }
10215
- //#endregion
10216
10222
  //#region ../agent-runtime/src/output-tools.ts
10217
10223
  /**
10218
10224
  * Submit-output tool contract.
@@ -10305,6 +10311,20 @@ function buildFinalOutputBlock(opts) {
10305
10311
  return lines.join("\n");
10306
10312
  }
10307
10313
  //#endregion
10314
+ //#region ../agent-runtime/src/prompts/rubric-common.ts
10315
+ function renderRubricCriteriaList(rubric) {
10316
+ return rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
10317
+ }
10318
+ function renderRubricPreambleSection(rubric) {
10319
+ if (!rubric.preamble) return null;
10320
+ return [
10321
+ "### Rubric preamble",
10322
+ "",
10323
+ rubric.preamble,
10324
+ ""
10325
+ ].join("\n");
10326
+ }
10327
+ //#endregion
10308
10328
  //#region ../agent-runtime/src/prompts/assess-brief.ts
10309
10329
  /**
10310
10330
  * Build the first user-message prompt for an `assess_brief` judge attempt.
@@ -10330,13 +10350,8 @@ function buildFinalOutputBlock(opts) {
10330
10350
  */
10331
10351
  function buildAssessBriefUserPrompt(input, ctx) {
10332
10352
  const rubric = input.successCriteria.rubric;
10333
- const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
10334
- const preambleSection = rubric.preamble ? [
10335
- "### Rubric preamble",
10336
- "",
10337
- rubric.preamble,
10338
- ""
10339
- ].join("\n") : "";
10353
+ const criteriaList = renderRubricCriteriaList(rubric);
10354
+ const preambleSection = renderRubricPreambleSection(rubric) ?? "";
10340
10355
  const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
10341
10356
  "### Workspace",
10342
10357
  "",
@@ -10617,13 +10632,7 @@ function buildCuratePackUserPrompt(input, ctx) {
10617
10632
  * is told to inspect them itself.
10618
10633
  */
10619
10634
  function buildFulfillBriefUserPrompt(input, ctx) {
10620
- const { brief, title, acceptanceCriteria, seedFiles, scopeHint } = input;
10621
- const criteriaSection = acceptanceCriteria?.length ? [
10622
- "### Acceptance criteria",
10623
- "",
10624
- ...acceptanceCriteria.map((c) => `- ${c}`),
10625
- ""
10626
- ].join("\n") : "";
10635
+ const { brief, title, seedFiles, scopeHint } = input;
10627
10636
  const seedSection = seedFiles?.length ? [
10628
10637
  "### Seed files",
10629
10638
  "",
@@ -10671,7 +10680,6 @@ function buildFulfillBriefUserPrompt(input, ctx) {
10671
10680
  "",
10672
10681
  brief,
10673
10682
  "",
10674
- criteriaSection,
10675
10683
  seedSection,
10676
10684
  correlationSection,
10677
10685
  workspaceSection,
@@ -10811,13 +10819,8 @@ function buildJudgeEvalVariantUserPrompt(input, ctx) {
10811
10819
  function buildJudgePackUserPrompt(input, ctx) {
10812
10820
  const { renderedPackId, sourcePackId, successCriteria } = input;
10813
10821
  const rubric = successCriteria.rubric;
10814
- const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
10815
- const preambleSection = rubric.preamble ? [
10816
- "### Rubric preamble",
10817
- "",
10818
- rubric.preamble,
10819
- ""
10820
- ].join("\n") : null;
10822
+ const criteriaList = renderRubricCriteriaList(rubric);
10823
+ const preambleSection = renderRubricPreambleSection(rubric);
10821
10824
  return [
10822
10825
  "# Judge Pack Agent",
10823
10826
  "",
@@ -10933,6 +10936,112 @@ function buildJudgePackUserPrompt(input, ctx) {
10933
10936
  ].filter((l) => l !== null).join("\n");
10934
10937
  }
10935
10938
  //#endregion
10939
+ //#region ../agent-runtime/src/prompts/pr-review.ts
10940
+ function buildPrReviewUserPrompt(input, ctx) {
10941
+ const rubric = input.successCriteria.rubric;
10942
+ const criteriaList = renderRubricCriteriaList(rubric);
10943
+ const preambleSection = renderRubricPreambleSection(rubric);
10944
+ const taskPromptSection = input.taskPrompt ? [
10945
+ "## Task-specific instructions",
10946
+ "",
10947
+ input.taskPrompt,
10948
+ ""
10949
+ ].join("\n") : "";
10950
+ const resourceSection = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? [
10951
+ "### Resources",
10952
+ "",
10953
+ ...input.subject.resourceUrls.map((url) => `- ${url}`),
10954
+ ""
10955
+ ].join("\n") : "";
10956
+ const hintsSection = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? [
10957
+ "### Inspection hints",
10958
+ "",
10959
+ ...input.subject.inspectionHints.map((hint) => `- ${hint}`),
10960
+ ""
10961
+ ].join("\n") : "";
10962
+ const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
10963
+ "### Workspace",
10964
+ "",
10965
+ "This review attempt is running inside a dedicated disposable git",
10966
+ "worktree. Inspect and reason inside this workspace only.",
10967
+ ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends.",
10968
+ ""
10969
+ ].join("\n") : "";
10970
+ return [
10971
+ "# Review Agent",
10972
+ "",
10973
+ "You are an independent judge. You did NOT produce the subject under review.",
10974
+ "Assess it strictly against the rubric below and emit a structured judgment.",
10975
+ "You may inspect the local workspace and the referenced resources, but do NOT modify anything.",
10976
+ "",
10977
+ `Your diary ID is: ${ctx.diaryId}`,
10978
+ `This task's id is: ${ctx.taskId}`,
10979
+ "",
10980
+ "## Subject",
10981
+ "",
10982
+ `**Title:** ${input.subject.title}`,
10983
+ "",
10984
+ input.subject.summary,
10985
+ "",
10986
+ resourceSection,
10987
+ hintsSection,
10988
+ workspaceSection,
10989
+ "### Execution contract",
10990
+ "",
10991
+ "Treat the provided subject, resources, inspection hints, and any",
10992
+ "task-specific instructions as the full",
10993
+ "review contract for this task.",
10994
+ "",
10995
+ "If the task-specific instructions or inspection hints require an outward action tied to the review",
10996
+ "(for example publishing the judgment somewhere), perform that action as",
10997
+ "part of the task before reporting structured output.",
10998
+ "",
10999
+ "## Review workflow",
11000
+ "",
11001
+ "1. Read the subject summary, resources, inspection hints, and any",
11002
+ " task-specific instructions before scoring.",
11003
+ "2. Inspect the target artefact directly using the tools and resources the",
11004
+ " task makes available.",
11005
+ "3. If you are in a dedicated disposable worktree and need the review target",
11006
+ " checked out locally, do that work inside this disposable workspace only.",
11007
+ "4. Apply the rubric strictly. This task is about complexity and",
11008
+ " reviewability, not correctness or feature desirability.",
11009
+ "5. Perform any required outward action before emitting the final",
11010
+ " structured output.",
11011
+ "",
11012
+ taskPromptSection,
11013
+ preambleSection,
11014
+ "## Criteria",
11015
+ "",
11016
+ criteriaList,
11017
+ "",
11018
+ "### Scoring rules",
11019
+ "",
11020
+ "- Every criterion uses binary scoring only.",
11021
+ "- Score `1` when the subject clearly clears the criterion.",
11022
+ "- Score `0` when it does not, or when the evidence is ambiguous.",
11023
+ "- `rationale` is REQUIRED for every score. Keep it concrete and audit-friendly.",
11024
+ "- Compute `composite = Σ(weight_i × score_i)` exactly; the runtime rejects mismatches.",
11025
+ "",
11026
+ "Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output.",
11027
+ "",
11028
+ buildFinalOutputBlock({
11029
+ taskType: "pr_review",
11030
+ outputSchemaName: "PrReviewOutput",
11031
+ shapeSketch: [
11032
+ "{",
11033
+ " \"scores\": [",
11034
+ " { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
11035
+ " ],",
11036
+ " \"composite\": <sum-of-weighted-binary-scores>,",
11037
+ " \"verdict\": \"<1-3 sentence overall>\"",
11038
+ "}"
11039
+ ].join("\n"),
11040
+ extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
11041
+ })
11042
+ ].filter(Boolean).join("\n");
11043
+ }
11044
+ //#endregion
10936
11045
  //#region ../agent-runtime/src/prompts/render-pack.ts
10937
11046
  /**
10938
11047
  * Build the first user-message prompt for a `render_pack` task. Almost mechanical:
@@ -11115,6 +11224,16 @@ function buildTaskUserPrompt(task, ctx) {
11115
11224
  diaryId: ctx.diaryId,
11116
11225
  taskId: ctx.taskId
11117
11226
  });
11227
+ case PR_REVIEW_TYPE:
11228
+ if (!Value.Check(PrReviewInput, task.input)) {
11229
+ const errors = [...Value.Errors(PrReviewInput, task.input)];
11230
+ throw new Error(`pr_review input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
11231
+ }
11232
+ return buildPrReviewUserPrompt(task.input, {
11233
+ diaryId: ctx.diaryId,
11234
+ taskId: ctx.taskId,
11235
+ workspace: ctx.workspace
11236
+ });
11118
11237
  case JUDGE_EVAL_VARIANT_TYPE:
11119
11238
  if (!Value.Check(JudgeEvalVariantInput, task.input)) {
11120
11239
  const errors = [...Value.Errors(JudgeEvalVariantInput, task.input)];
@@ -14838,6 +14957,7 @@ var DEFAULT_SUBAGENT_TIMEOUT_MS = 300 * 1e3;
14838
14957
  */
14839
14958
  function createSubagentTool(args) {
14840
14959
  const buildSession = args.buildAgentSession ?? buildAgentSession;
14960
+ const { contractRegistry } = args;
14841
14961
  let callCount = 0;
14842
14962
  return {
14843
14963
  tool: defineTool({
@@ -14848,8 +14968,8 @@ function createSubagentTool(args) {
14848
14968
  async execute(_id, params) {
14849
14969
  if (!Value.Check(SubagentToolParameters, params)) return toolError(`subagent: invalid parameters: ${JSON.stringify([...Value.Errors(SubagentToolParameters, params)].slice(0, 3))}`);
14850
14970
  const { task, output_schema } = params;
14851
- const contract = getSubagentOutputContract(output_schema);
14852
- if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${listSubagentOutputContracts().map((c) => c.name).join(", ")}]`);
14971
+ const contract = contractRegistry.get(output_schema);
14972
+ if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${contractRegistry.list().map((c) => c.name).join(", ")}]`);
14853
14973
  callCount += 1;
14854
14974
  const callIndex = callCount;
14855
14975
  let captured = null;
@@ -14879,6 +14999,7 @@ function createSubagentTool(args) {
14879
14999
  });
14880
15000
  const session = await buildSession({
14881
15001
  mountPath: args.mountPath,
15002
+ cwdPath: args.cwdPath ?? args.mountPath,
14882
15003
  piAuthDir: args.piAuthDir,
14883
15004
  modelHandle: args.modelHandle,
14884
15005
  agentName: args.agentName,
@@ -15219,6 +15340,7 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
15219
15340
  const branch = executionPlan?.worktreeBranch ?? null;
15220
15341
  if (!branch) return {
15221
15342
  mountPath: requestedMountPath,
15343
+ cwdPath: requestedMountPath,
15222
15344
  mode: "shared_mount",
15223
15345
  branch: null,
15224
15346
  cleanup: () => {}
@@ -15226,7 +15348,7 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
15226
15348
  const mainRepo = findMainWorktree();
15227
15349
  const worktreeDir = resolveTaskWorktreePath(mainRepo, executionPlan?.workspaceId ?? `task-${task.id}`);
15228
15350
  const relMount = relative(mainRepo, requestedMountPath);
15229
- const mountPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
15351
+ const cwdPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
15230
15352
  const keepWorkspace = executionPlan?.workspaceScope === "session" && executionPlan.sessionKey !== null;
15231
15353
  if (keepWorkspace) ensureReusableTaskWorktree(mainRepo, worktreeDir, branch);
15232
15354
  else {
@@ -15234,7 +15356,8 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
15234
15356
  addTaskWorktree(mainRepo, worktreeDir, branch);
15235
15357
  }
15236
15358
  return {
15237
- mountPath,
15359
+ mountPath: mainRepo,
15360
+ cwdPath,
15238
15361
  mode: "dedicated_worktree",
15239
15362
  branch,
15240
15363
  cleanup: keepWorkspace ? () => {} : () => {
@@ -15347,15 +15470,24 @@ var noopTurnEventHandler = () => {};
15347
15470
  function createPiTaskExecutor(opts) {
15348
15471
  let cachedCheckpoint = opts.checkpointPath ?? null;
15349
15472
  return async (claimedTask, reporter) => {
15350
- if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
15351
- config: opts.sandboxConfig?.snapshot,
15352
- onProgress: opts.onSnapshotProgress ?? ((m) => {
15353
- process.stderr.write(`[snapshot] ${m}\n`);
15354
- })
15473
+ const reporterWasOpened = !reporter.cancelSignal.aborted;
15474
+ if (reporterWasOpened) await reporter.open({
15475
+ taskId: claimedTask.task.id,
15476
+ attemptN: claimedTask.attemptN
15355
15477
  });
15356
15478
  return executePiTask(claimedTask, reporter, {
15357
15479
  ...opts,
15358
- checkpointPath: cachedCheckpoint
15480
+ checkpointPath: cachedCheckpoint ?? void 0,
15481
+ resolveCheckpointPath: async () => {
15482
+ if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
15483
+ config: opts.sandboxConfig?.snapshot,
15484
+ onProgress: opts.onSnapshotProgress ?? ((m) => {
15485
+ process.stderr.write(`[snapshot] ${m}\n`);
15486
+ })
15487
+ });
15488
+ return cachedCheckpoint;
15489
+ },
15490
+ reporterAlreadyOpened: reporterWasOpened
15359
15491
  });
15360
15492
  };
15361
15493
  }
@@ -15371,8 +15503,9 @@ async function executePiTask(claimedTask, reporter, opts) {
15371
15503
  const startTime = Date.now();
15372
15504
  const requestedMountPath = opts.mountPath ?? process.cwd();
15373
15505
  const executionPlan = opts.makeExecutionPlan?.(claimedTask) ?? null;
15374
- const workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
15375
- const mountPath = workspace.mountPath;
15506
+ let workspace = null;
15507
+ let mountPath = requestedMountPath;
15508
+ let cwdPath = requestedMountPath;
15376
15509
  if (reporter.cancelSignal.aborted) return {
15377
15510
  taskId: task.id,
15378
15511
  attemptN,
@@ -15387,33 +15520,8 @@ async function executePiTask(claimedTask, reporter, opts) {
15387
15520
  retryable: false
15388
15521
  }
15389
15522
  };
15390
- const checkpointPath = opts.checkpointPath ?? await ensureSnapshot({
15391
- config: opts.sandboxConfig?.snapshot,
15392
- onProgress: opts.onSnapshotProgress ?? ((m) => {
15393
- process.stderr.write(`[snapshot] ${m}\n`);
15394
- })
15395
- });
15396
- const mainRepoForRepair = findMainWorktree();
15397
- try {
15398
- execFileSync("git", [
15399
- "-C",
15400
- mainRepoForRepair,
15401
- "worktree",
15402
- "repair",
15403
- "--relative-paths"
15404
- ], { stdio: "pipe" });
15405
- } catch {}
15523
+ let reporterOpen = opts.reporterAlreadyOpened ?? false;
15406
15524
  let managed = null;
15407
- managed = await resumeVm({
15408
- checkpointPath,
15409
- agentName: opts.agentName,
15410
- mountPath,
15411
- extraAllowedHosts: opts.extraAllowedHosts,
15412
- sandboxConfig: opts.sandboxConfig
15413
- });
15414
- const diaryId = task.diaryId ?? "";
15415
- const taskTeamId = task.teamId ?? "";
15416
- let reporterOpen = false;
15417
15525
  let session = null;
15418
15526
  let subagentHandle = null;
15419
15527
  const finalUsage = emptyUsage(opts.provider, opts.model);
@@ -15432,41 +15540,103 @@ async function executePiTask(claimedTask, reporter, opts) {
15432
15540
  retryable: false
15433
15541
  }
15434
15542
  });
15543
+ let onTurnEvent;
15544
+ if (opts.makeOnTurnEvent) try {
15545
+ onTurnEvent = opts.makeOnTurnEvent(claimedTask);
15546
+ } catch (err) {
15547
+ process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
15548
+ onTurnEvent = noopTurnEventHandler;
15549
+ }
15550
+ else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
15551
+ const emit = (kind, payload) => {
15552
+ try {
15553
+ onTurnEvent(kind, summarizePayloadForLog(kind, payload));
15554
+ } catch (err) {
15555
+ process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
15556
+ }
15557
+ return reporter.record({
15558
+ kind,
15559
+ payload
15560
+ });
15561
+ };
15562
+ const emitError = async (phase, message, extra = {}) => {
15563
+ await emit("error", {
15564
+ phase,
15565
+ message,
15566
+ ...extra
15567
+ });
15568
+ };
15435
15569
  try {
15436
- const mainRepo = findMainWorktree();
15437
- activateAgentEnv(managed.credentials.agentEnv, mainRepo);
15438
- await reporter.open({
15570
+ if (!opts.reporterAlreadyOpened) await reporter.open({
15439
15571
  taskId: task.id,
15440
15572
  attemptN
15441
15573
  });
15442
15574
  reporterOpen = true;
15443
- let onTurnEvent;
15444
- if (opts.makeOnTurnEvent) try {
15445
- onTurnEvent = opts.makeOnTurnEvent(claimedTask);
15575
+ let checkpointPath;
15576
+ try {
15577
+ checkpointPath = opts.checkpointPath ?? (opts.resolveCheckpointPath ? await opts.resolveCheckpointPath() : await ensureSnapshot({
15578
+ config: opts.sandboxConfig?.snapshot,
15579
+ onProgress: opts.onSnapshotProgress ?? ((m) => {
15580
+ process.stderr.write(`[snapshot] ${m}\n`);
15581
+ })
15582
+ }));
15583
+ } catch (err) {
15584
+ const message = err instanceof Error ? err.message : String(err);
15585
+ await emitError("snapshot", message);
15586
+ return makeFailedOutput("snapshot_failed", message);
15587
+ }
15588
+ try {
15589
+ workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
15590
+ mountPath = workspace.mountPath;
15591
+ cwdPath = workspace.cwdPath;
15446
15592
  } catch (err) {
15447
- process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
15448
- onTurnEvent = noopTurnEventHandler;
15593
+ const message = err instanceof Error ? err.message : String(err);
15594
+ await emitError("worktree_setup", message);
15595
+ return makeFailedOutput("worktree_setup_failed", message);
15449
15596
  }
15450
- else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
15451
- const emit = (kind, payload) => {
15597
+ try {
15598
+ const mainRepoForRepair = findMainWorktree();
15452
15599
  try {
15453
- onTurnEvent(kind, summarizePayloadForLog(kind, payload));
15454
- } catch (err) {
15455
- process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
15456
- }
15457
- return reporter.record({
15458
- kind,
15459
- payload
15600
+ execFileSync("git", [
15601
+ "-C",
15602
+ mainRepoForRepair,
15603
+ "worktree",
15604
+ "repair",
15605
+ "--relative-paths"
15606
+ ], { stdio: "pipe" });
15607
+ } catch {}
15608
+ } catch (err) {
15609
+ const message = err instanceof Error ? err.message : String(err);
15610
+ await emitError("worktree_setup", message);
15611
+ return makeFailedOutput("worktree_setup_failed", message);
15612
+ }
15613
+ try {
15614
+ managed = await resumeVm({
15615
+ checkpointPath,
15616
+ agentName: opts.agentName,
15617
+ mountPath,
15618
+ extraAllowedHosts: opts.extraAllowedHosts,
15619
+ sandboxConfig: opts.sandboxConfig
15460
15620
  });
15461
- };
15621
+ } catch (err) {
15622
+ const message = err instanceof Error ? err.message : String(err);
15623
+ await emitError("vm_resume", message);
15624
+ return makeFailedOutput("vm_resume_failed", message);
15625
+ }
15626
+ const diaryId = task.diaryId ?? "";
15627
+ const taskTeamId = task.teamId ?? "";
15628
+ const mainRepo = findMainWorktree();
15629
+ activateAgentEnv(managed.credentials.agentEnv, mainRepo);
15630
+ const activeWorkspace = workspace;
15631
+ if (!activeWorkspace) throw new Error("task workspace not prepared");
15462
15632
  await emit("info", {
15463
15633
  event: "execute_start",
15464
15634
  taskType: task.taskType,
15465
15635
  teamId: task.teamId,
15466
15636
  provider: opts.provider,
15467
15637
  model: opts.model,
15468
- workspaceMode: workspace.mode,
15469
- workspaceBranch: workspace.branch
15638
+ workspaceMode: activeWorkspace.mode,
15639
+ workspaceBranch: activeWorkspace.branch
15470
15640
  });
15471
15641
  let taskPrompt;
15472
15642
  try {
@@ -15474,8 +15644,8 @@ async function executePiTask(claimedTask, reporter, opts) {
15474
15644
  diaryId,
15475
15645
  taskId: task.id,
15476
15646
  workspace: {
15477
- mode: workspace.mode,
15478
- branch: workspace.branch
15647
+ mode: activeWorkspace.mode,
15648
+ branch: activeWorkspace.branch
15479
15649
  },
15480
15650
  extras: opts.promptExtras
15481
15651
  });
@@ -15527,7 +15697,7 @@ async function executePiTask(claimedTask, reporter, opts) {
15527
15697
  getTeamId: () => taskTeamId,
15528
15698
  getSessionErrors: () => [],
15529
15699
  clearSessionErrors: () => {},
15530
- getHostCwd: () => mountPath,
15700
+ getHostCwd: () => cwdPath,
15531
15701
  hostExecBaseEnv: new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]),
15532
15702
  hostExecAutoApprove: opts.hostExecAutoApprove ?? opts.sandboxConfig?.hostExec?.autoApprove ?? false,
15533
15703
  getTaskContext: () => ({
@@ -15555,6 +15725,7 @@ async function executePiTask(claimedTask, reporter, opts) {
15555
15725
  if (taskTypeUsesSubagents(task.taskType)) {
15556
15726
  subagentHandle = createSubagentTool({
15557
15727
  mountPath,
15728
+ cwdPath,
15558
15729
  piAuthDir,
15559
15730
  modelHandle,
15560
15731
  agentName: opts.agentName,
@@ -15563,12 +15734,14 @@ async function executePiTask(claimedTask, reporter, opts) {
15563
15734
  parentTaskId: task.id,
15564
15735
  parentTaskType: task.taskType,
15565
15736
  parentAttemptN: attemptN,
15737
+ contractRegistry: opts.subagentContractRegistry,
15566
15738
  parentCancelSignal: reporter.cancelSignal
15567
15739
  });
15568
15740
  parentSubagentTools.push(subagentHandle.tool);
15569
15741
  }
15570
15742
  session = await buildAgentSession({
15571
15743
  mountPath,
15744
+ cwdPath,
15572
15745
  piAuthDir,
15573
15746
  modelHandle,
15574
15747
  agentName: opts.agentName,
@@ -15653,6 +15826,10 @@ async function executePiTask(claimedTask, reporter, opts) {
15653
15826
  is_error: event.isError,
15654
15827
  result: event.isError ? truncateForWire(event.result) : void 0
15655
15828
  }));
15829
+ if (event.isError) track(emitError("tool_call_error", describeToolErrorMessage(event.result), {
15830
+ tool: event.toolName,
15831
+ result: truncateForWire(event.result)
15832
+ }));
15656
15833
  if (maxBashTimeouts > 0 && event.toolName === "bash" && event.isError && isBashTimeoutResult(event.result)) {
15657
15834
  bashTimeoutCount += 1;
15658
15835
  if (bashTimeoutCount >= maxBashTimeouts) triggerCapAbort("max_bash_timeouts_exceeded", `Aborted after ${bashTimeoutCount} bash timeouts in this attempt (cap ${maxBashTimeouts}).`);
@@ -15808,7 +15985,7 @@ async function executePiTask(claimedTask, reporter, opts) {
15808
15985
  }
15809
15986
  }
15810
15987
  if (managed) await managed.vm.close();
15811
- try {
15988
+ if (workspace) try {
15812
15989
  workspace.cleanup();
15813
15990
  } catch (err) {
15814
15991
  const detail = err instanceof Error ? err.message : String(err);
@@ -15919,6 +16096,23 @@ function truncateForWire(value) {
15919
16096
  };
15920
16097
  }
15921
16098
  }
16099
+ function describeToolErrorMessage(result) {
16100
+ if (typeof result === "string" && result.trim().length > 0) return result.trim();
16101
+ if (result && typeof result === "object") {
16102
+ const content = result.content;
16103
+ if (Array.isArray(content)) {
16104
+ for (const item of content) if (item && typeof item === "object" && typeof item.text === "string") {
16105
+ const text = item.text.trim();
16106
+ if (text.length > 0) return text;
16107
+ }
16108
+ }
16109
+ }
16110
+ try {
16111
+ return JSON.stringify(truncateForWire(result));
16112
+ } catch {
16113
+ return "Tool call failed";
16114
+ }
16115
+ }
15922
16116
  //#endregion
15923
16117
  //#region src/index.ts
15924
16118
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@themoltnet/pi-extension",
3
- "version": "0.17.0",
3
+ "version": "0.18.1",
4
4
  "type": "module",
5
5
  "description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
6
6
  "license": "MIT",
@@ -31,8 +31,8 @@
31
31
  "@earendil-works/gondolin": "^0.9.1",
32
32
  "@opentelemetry/api": "^1.9.0",
33
33
  "@sinclair/typebox": "^0.34.0",
34
- "@themoltnet/sdk": "0.102.0",
35
- "@themoltnet/agent-runtime": "0.15.1"
34
+ "@themoltnet/agent-runtime": "0.16.0",
35
+ "@themoltnet/sdk": "0.102.0"
36
36
  },
37
37
  "peerDependencies": {
38
38
  "@earendil-works/pi-coding-agent": ">=0.74.0",