@themoltnet/pi-extension 0.17.0 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +60 -3
- package/dist/index.js +415 -221
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -19,6 +19,7 @@ import { TObject } from '@sinclair/typebox';
|
|
|
19
19
|
import { ToolDefinition } from '@earendil-works/pi-coding-agent';
|
|
20
20
|
import { TOptional } from '@sinclair/typebox';
|
|
21
21
|
import { TRecord } from '@sinclair/typebox';
|
|
22
|
+
import { TSchema } from '@sinclair/typebox';
|
|
22
23
|
import { TString } from '@sinclair/typebox';
|
|
23
24
|
import { TUnion } from '@sinclair/typebox';
|
|
24
25
|
import { TUnknown } from '@sinclair/typebox';
|
|
@@ -43,6 +44,8 @@ export declare function buildAgentSession(args: BuildAgentSessionArgs): Promise<
|
|
|
43
44
|
declare interface BuildAgentSessionArgs {
|
|
44
45
|
/** Host directory mounted at /workspace inside the VM. */
|
|
45
46
|
mountPath: string;
|
|
47
|
+
/** Host working directory where the agent session should start. */
|
|
48
|
+
cwdPath: string;
|
|
46
49
|
/** pi auth directory (resolved from `PI_CODING_AGENT_DIR` or `~/.pi/agent`). */
|
|
47
50
|
piAuthDir: string;
|
|
48
51
|
/** Resolved pi model handle (provider + model id). */
|
|
@@ -133,6 +136,8 @@ export declare function createSubagentTool(args: CreateSubagentToolArgs): Subage
|
|
|
133
136
|
export declare interface CreateSubagentToolArgs {
|
|
134
137
|
/** Host directory mounted at /workspace inside the VM. */
|
|
135
138
|
mountPath: string;
|
|
139
|
+
/** Host working directory the subagent should start in. Defaults to mountPath. */
|
|
140
|
+
cwdPath?: string;
|
|
136
141
|
/** pi auth directory the parent resolved. */
|
|
137
142
|
piAuthDir: string;
|
|
138
143
|
/** Resolved pi model handle — subagents share it. */
|
|
@@ -188,6 +193,16 @@ export declare interface CreateSubagentToolArgs {
|
|
|
188
193
|
* exercise the tool's logic without booting a VM.
|
|
189
194
|
*/
|
|
190
195
|
buildAgentSession?: (args: BuildAgentSessionArgs) => Promise<AgentSession>;
|
|
196
|
+
/**
|
|
197
|
+
* Contract registry for resolving output_schema names to TypeBox
|
|
198
|
+
* schemas at call time. The subagent tool reads ONLY via `.get()`
|
|
199
|
+
* and `.list()` — the registry is immutable after construction.
|
|
200
|
+
*
|
|
201
|
+
* Production callers (executePiTask) create the registry with
|
|
202
|
+
* built-in contracts at session-setup; tests inject a registry
|
|
203
|
+
* with whatever stubs they need.
|
|
204
|
+
*/
|
|
205
|
+
contractRegistry: SubagentContractRegistry;
|
|
191
206
|
}
|
|
192
207
|
|
|
193
208
|
/**
|
|
@@ -240,6 +255,17 @@ export declare interface ExecutePiTaskOptions {
|
|
|
240
255
|
* across tasks.
|
|
241
256
|
*/
|
|
242
257
|
checkpointPath?: string;
|
|
258
|
+
/**
|
|
259
|
+
* Lazy checkpoint resolver used by `createPiTaskExecutor` so snapshot
|
|
260
|
+
* creation can happen after the reporter has been opened and can surface
|
|
261
|
+
* setup failures as task messages.
|
|
262
|
+
*/
|
|
263
|
+
resolveCheckpointPath?: () => Promise<string>;
|
|
264
|
+
/**
|
|
265
|
+
* Set when the caller already opened the reporter before handing control
|
|
266
|
+
* to `executePiTask`.
|
|
267
|
+
*/
|
|
268
|
+
reporterAlreadyOpened?: boolean;
|
|
243
269
|
/**
|
|
244
270
|
* Optional callback invoked alongside every `reporter.record()` so
|
|
245
271
|
* the daemon can mirror task messages into its local logger.
|
|
@@ -292,6 +318,13 @@ export declare interface ExecutePiTaskOptions {
|
|
|
292
318
|
* file-backed Pi sessions for selected task classes.
|
|
293
319
|
*/
|
|
294
320
|
makeExecutionPlan?: PiTaskExecutionPlanFactory;
|
|
321
|
+
/**
|
|
322
|
+
* Immutable subagent contract registry used to resolve `output_schema`
|
|
323
|
+
* names at subagent tool call time. Constructed by the daemon (or
|
|
324
|
+
* tests) from static built-in schemas — `execute-pi-task` never hardcodes
|
|
325
|
+
* contracts. See #1106.
|
|
326
|
+
*/
|
|
327
|
+
subagentContractRegistry?: SubagentContractRegistry;
|
|
295
328
|
}
|
|
296
329
|
|
|
297
330
|
/**
|
|
@@ -527,6 +560,29 @@ export declare interface SandboxConfig {
|
|
|
527
560
|
/** Extract snapshot-specific config for backwards compat with ensureSnapshot. */
|
|
528
561
|
export declare type SnapshotConfig = NonNullable<SandboxConfig['snapshot']>;
|
|
529
562
|
|
|
563
|
+
declare interface SubagentContractRegistry {
|
|
564
|
+
/** Resolve a contract by name. Returns `null` for unknown names. */
|
|
565
|
+
get(name: string): SubagentOutputContract | null;
|
|
566
|
+
/** List all registered contracts. */
|
|
567
|
+
list(): SubagentOutputContract[];
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
declare interface SubagentOutputContract {
|
|
571
|
+
/** Stable identifier the parent uses to reference this contract.
|
|
572
|
+
* Lower-snake-case by convention (e.g. `judge_eval_variant_result`). */
|
|
573
|
+
readonly name: string;
|
|
574
|
+
/** Human-readable description shown in the subagent tool's help text
|
|
575
|
+
* and in the inner session's submit-tool description. Useful when a
|
|
576
|
+
* parent LLM has multiple contracts to choose from. */
|
|
577
|
+
readonly description: string;
|
|
578
|
+
/**
|
|
579
|
+
* TypeBox schema the subagent's submit-tool args MUST validate
|
|
580
|
+
* against. The args ARE the output payload (no `{ output: ... }`
|
|
581
|
+
* wrapping), so the LLM gets field-level guidance directly.
|
|
582
|
+
*/
|
|
583
|
+
readonly parametersSchema: TSchema;
|
|
584
|
+
}
|
|
585
|
+
|
|
530
586
|
export declare interface SubagentToolHandle {
|
|
531
587
|
/** ToolDefinition to register via `customTools` on the parent session. */
|
|
532
588
|
readonly tool: ToolDefinition;
|
|
@@ -769,9 +825,10 @@ export declare interface VmCredentials {
|
|
|
769
825
|
agentEnvRaw: string;
|
|
770
826
|
/**
|
|
771
827
|
* Pi OAuth/API-key auth blob. Null when neither `~/.pi/agent/auth.json`
|
|
772
|
-
* (
|
|
773
|
-
* relies on Pi's env-var providers (`ANTHROPIC_API_KEY`,
|
|
774
|
-
* via `agentEnv` and the host environment instead. CI uses
|
|
828
|
+
* (resolved via `PI_CODING_AGENT_DIR` when set) is present — in that
|
|
829
|
+
* case the daemon relies on Pi's env-var providers (`ANTHROPIC_API_KEY`,
|
|
830
|
+
* etc.) carried via `agentEnv` and the host environment instead. CI uses
|
|
831
|
+
* this path.
|
|
775
832
|
*/
|
|
776
833
|
piAuthJson: string | null;
|
|
777
834
|
agentEnv: Record<string, string | undefined>;
|
package/dist/index.js
CHANGED
|
@@ -8133,7 +8133,8 @@ function findMainWorktree() {
|
|
|
8133
8133
|
function loadCredentials(agentDir) {
|
|
8134
8134
|
const moltnetJson = readFileSync(path.join(agentDir, "moltnet.json"), "utf8");
|
|
8135
8135
|
const agentEnvRaw = readFileSync(path.join(agentDir, "env"), "utf8");
|
|
8136
|
-
const
|
|
8136
|
+
const piAgentDir = process.env.PI_CODING_AGENT_DIR ?? path.join(process.env.HOME ?? "", ".pi", "agent");
|
|
8137
|
+
const piAuthPath = path.join(piAgentDir, "auth.json");
|
|
8137
8138
|
const piAuthJson = existsSync(piAuthPath) ? readFileSync(piAuthPath, "utf8") : null;
|
|
8138
8139
|
const gitconfigPath = path.join(agentDir, "gitconfig");
|
|
8139
8140
|
const gitconfig = existsSync(gitconfigPath) ? readFileSync(gitconfigPath, "utf8") : null;
|
|
@@ -8645,7 +8646,7 @@ async function buildAgentSession(args) {
|
|
|
8645
8646
|
spanAttributes: args.otelSpanAttrs
|
|
8646
8647
|
});
|
|
8647
8648
|
const resourceLoader = new DefaultResourceLoader({
|
|
8648
|
-
cwd: args.
|
|
8649
|
+
cwd: args.cwdPath,
|
|
8649
8650
|
agentDir: args.piAuthDir,
|
|
8650
8651
|
extensionFactories: [piOtelExtension],
|
|
8651
8652
|
appendSystemPrompt: args.appendSystemPrompt,
|
|
@@ -8653,12 +8654,12 @@ async function buildAgentSession(args) {
|
|
|
8653
8654
|
});
|
|
8654
8655
|
await resourceLoader.reload();
|
|
8655
8656
|
const sessionManager = args.sessionPersistence ? await resolvePersistentSessionManager({
|
|
8656
|
-
cwd: args.
|
|
8657
|
+
cwd: args.cwdPath,
|
|
8657
8658
|
sessionDir: args.sessionPersistence.sessionDir
|
|
8658
|
-
}) : SessionManager.inMemory(args.
|
|
8659
|
+
}) : SessionManager.inMemory(args.cwdPath);
|
|
8659
8660
|
return (await createAgentSession({
|
|
8660
8661
|
agentDir: args.piAuthDir,
|
|
8661
|
-
cwd: args.
|
|
8662
|
+
cwd: args.cwdPath,
|
|
8662
8663
|
model: args.modelHandle,
|
|
8663
8664
|
customTools: args.customTools,
|
|
8664
8665
|
sessionManager,
|
|
@@ -8670,6 +8671,61 @@ async function resolvePersistentSessionManager(args) {
|
|
|
8670
8671
|
return SessionManager.continueRecent(args.cwd, args.sessionDir);
|
|
8671
8672
|
}
|
|
8672
8673
|
//#endregion
|
|
8674
|
+
//#region ../agent-runtime/src/context-bindings.ts
|
|
8675
|
+
var PROMPT_SEPARATOR = "\n\n---\n\n";
|
|
8676
|
+
/**
|
|
8677
|
+
* Resolve `task.input.context[]` into delivered side-effects (skills
|
|
8678
|
+
* persisted via `deliver.skill`) and prompt fragments
|
|
8679
|
+
* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
|
|
8680
|
+
* built prompt.
|
|
8681
|
+
*
|
|
8682
|
+
* Per-binding semantics (V1):
|
|
8683
|
+
* - `skill` → `deliver.skill({ slug, content })` once per ref.
|
|
8684
|
+
* Slug collisions on distinct contents are
|
|
8685
|
+
* refused loudly.
|
|
8686
|
+
* - `prompt_prefix` → content appended to `systemPromptPrefix` with
|
|
8687
|
+
* the canonical `\n\n---\n\n` separator (in
|
|
8688
|
+
* declared order).
|
|
8689
|
+
* - `user_inline` → content appended to `userInlineSuffix` in
|
|
8690
|
+
* declared order, same separator.
|
|
8691
|
+
*
|
|
8692
|
+
* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
|
|
8693
|
+
* and the task's `inputCid` already pins the entire input. The imposer
|
|
8694
|
+
* chose these bytes; the resolver just dispatches them.
|
|
8695
|
+
*
|
|
8696
|
+
* The function is pure with respect to its arguments: file writes are
|
|
8697
|
+
* confined to the injected `deliver` callback, which makes the
|
|
8698
|
+
* resolver trivial to test.
|
|
8699
|
+
*/
|
|
8700
|
+
async function resolveTaskContext(args) {
|
|
8701
|
+
const promptParts = [];
|
|
8702
|
+
const userParts = [];
|
|
8703
|
+
const injected = [];
|
|
8704
|
+
const usedSlugs = /* @__PURE__ */ new Map();
|
|
8705
|
+
for (const ref of args.context) {
|
|
8706
|
+
if (ref.binding === "skill") {
|
|
8707
|
+
const prior = usedSlugs.get(ref.slug);
|
|
8708
|
+
if (prior !== void 0) {
|
|
8709
|
+
if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
|
|
8710
|
+
injected.push(ref);
|
|
8711
|
+
continue;
|
|
8712
|
+
}
|
|
8713
|
+
usedSlugs.set(ref.slug, ref.content);
|
|
8714
|
+
await args.deliver.skill({
|
|
8715
|
+
slug: ref.slug,
|
|
8716
|
+
content: ref.content
|
|
8717
|
+
});
|
|
8718
|
+
} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
|
|
8719
|
+
else userParts.push(ref.content);
|
|
8720
|
+
injected.push(ref);
|
|
8721
|
+
}
|
|
8722
|
+
return {
|
|
8723
|
+
injected,
|
|
8724
|
+
systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
|
|
8725
|
+
userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
|
|
8726
|
+
};
|
|
8727
|
+
}
|
|
8728
|
+
//#endregion
|
|
8673
8729
|
//#region ../tasks/src/formats.ts
|
|
8674
8730
|
/**
|
|
8675
8731
|
* Register TypeBox string formats used across Task / TaskOutput / task-type
|
|
@@ -8841,10 +8897,10 @@ function validateRubricWeights(rubric) {
|
|
|
8841
8897
|
* complementary places.
|
|
8842
8898
|
*
|
|
8843
8899
|
* Before this envelope existed, criteria were scattered: a vestigial
|
|
8844
|
-
* `criteriaCid` column nobody resolved,
|
|
8845
|
-
*
|
|
8846
|
-
*
|
|
8847
|
-
*
|
|
8900
|
+
* `criteriaCid` column nobody resolved, free-form prose on
|
|
8901
|
+
* `fulfill_brief.input`, and inline `rubric` / `criteria[]` fields on
|
|
8902
|
+
* judgment-task inputs. None of those were machine-verifiable
|
|
8903
|
+
* end-to-end.
|
|
8848
8904
|
*
|
|
8849
8905
|
* This module defines a single, content-addressable envelope an imposer
|
|
8850
8906
|
* attaches to any task type. It has four orthogonal sections — pick
|
|
@@ -9140,7 +9196,6 @@ var FULFILL_BRIEF_TYPE = "fulfill_brief";
|
|
|
9140
9196
|
var FulfillBriefInput = Type$1.Object({
|
|
9141
9197
|
brief: Type$1.String({ minLength: 1 }),
|
|
9142
9198
|
title: Type$1.Optional(Type$1.String()),
|
|
9143
|
-
acceptanceCriteria: Type$1.Optional(Type$1.Array(Type$1.String())),
|
|
9144
9199
|
successCriteria: Type$1.Optional(SuccessCriteria),
|
|
9145
9200
|
seedFiles: Type$1.Optional(Type$1.Array(Type$1.String())),
|
|
9146
9201
|
scopeHint: Type$1.Optional(Type$1.String())
|
|
@@ -9564,6 +9619,72 @@ async function onCreateJudgeEvalVariant(input, ctx) {
|
|
|
9564
9619
|
}];
|
|
9565
9620
|
}
|
|
9566
9621
|
//#endregion
|
|
9622
|
+
//#region ../tasks/src/task-types/pr-review.ts
|
|
9623
|
+
var PR_REVIEW_TYPE = "pr_review";
|
|
9624
|
+
var PrReviewSubject = Type$1.Object({
|
|
9625
|
+
title: Type$1.String({ minLength: 1 }),
|
|
9626
|
+
summary: Type$1.String({ minLength: 1 }),
|
|
9627
|
+
resourceUrls: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 }))),
|
|
9628
|
+
inspectionHints: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
|
|
9629
|
+
}, {
|
|
9630
|
+
$id: "PrReviewSubject",
|
|
9631
|
+
additionalProperties: false
|
|
9632
|
+
});
|
|
9633
|
+
var PrReviewInput = Type$1.Object({
|
|
9634
|
+
subject: PrReviewSubject,
|
|
9635
|
+
taskPrompt: Type$1.Optional(Type$1.String({ minLength: 1 })),
|
|
9636
|
+
successCriteria: SuccessCriteria
|
|
9637
|
+
}, {
|
|
9638
|
+
$id: "PrReviewInput",
|
|
9639
|
+
additionalProperties: false
|
|
9640
|
+
});
|
|
9641
|
+
var PrReviewScore = Type$1.Object({
|
|
9642
|
+
criterionId: Type$1.String({ minLength: 1 }),
|
|
9643
|
+
score: Type$1.Union([Type$1.Literal(0), Type$1.Literal(1)]),
|
|
9644
|
+
rationale: Type$1.String({ minLength: 1 })
|
|
9645
|
+
}, {
|
|
9646
|
+
$id: "PrReviewScore",
|
|
9647
|
+
additionalProperties: false
|
|
9648
|
+
});
|
|
9649
|
+
var PrReviewOutput = Type$1.Object({
|
|
9650
|
+
scores: Type$1.Array(PrReviewScore, { minItems: 1 }),
|
|
9651
|
+
composite: Type$1.Number({
|
|
9652
|
+
minimum: 0,
|
|
9653
|
+
maximum: 1
|
|
9654
|
+
}),
|
|
9655
|
+
verdict: Type$1.String({ minLength: 1 })
|
|
9656
|
+
}, {
|
|
9657
|
+
$id: "PrReviewOutput",
|
|
9658
|
+
additionalProperties: false
|
|
9659
|
+
});
|
|
9660
|
+
function requireBooleanRubric(rubric) {
|
|
9661
|
+
for (const criterion of rubric.criteria) if (criterion.scoring !== "boolean") return `pr_review requires boolean scoring for every rubric criterion; criterion "${criterion.id}" uses "${criterion.scoring}"`;
|
|
9662
|
+
return null;
|
|
9663
|
+
}
|
|
9664
|
+
function validatePrReviewInput(input) {
|
|
9665
|
+
const sc = input.successCriteria;
|
|
9666
|
+
if (!sc) return "successCriteria is required for judgment tasks";
|
|
9667
|
+
if (!sc.rubric) return "successCriteria.rubric is required for judgment tasks";
|
|
9668
|
+
return validateRubricWeights(sc.rubric) ?? requireBooleanRubric(sc.rubric);
|
|
9669
|
+
}
|
|
9670
|
+
function validatePrReviewOutput(output, input) {
|
|
9671
|
+
if (!input) return null;
|
|
9672
|
+
const scores = output.scores;
|
|
9673
|
+
const rubric = input.successCriteria.rubric;
|
|
9674
|
+
if (!rubric) return null;
|
|
9675
|
+
if (scores.length !== rubric.criteria.length) return `scores length ${scores.length} does not match rubric criteria length ${rubric.criteria.length}`;
|
|
9676
|
+
let composite = 0;
|
|
9677
|
+
for (let i = 0; i < rubric.criteria.length; i++) {
|
|
9678
|
+
const criterion = rubric.criteria[i];
|
|
9679
|
+
const score = scores[i];
|
|
9680
|
+
if (score.criterionId !== criterion.id) return `scores[${i}] has criterionId "${score.criterionId}" but rubric expects "${criterion.id}" in that position`;
|
|
9681
|
+
composite += criterion.weight * score.score;
|
|
9682
|
+
}
|
|
9683
|
+
const claimed = output.composite;
|
|
9684
|
+
if (Math.abs(claimed - composite) > 1e-6) return `composite ${claimed} does not match weighted sum ${composite.toFixed(6)}`;
|
|
9685
|
+
return null;
|
|
9686
|
+
}
|
|
9687
|
+
//#endregion
|
|
9567
9688
|
//#region ../tasks/src/task-types/render-pack.ts
|
|
9568
9689
|
/**
|
|
9569
9690
|
* `render_pack` — turn a context pack into a signed rendered artefact.
|
|
@@ -9731,6 +9852,18 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9731
9852
|
validateInput: validateJudgmentInput,
|
|
9732
9853
|
validateInputAsync: validateAssessBriefInputAsync
|
|
9733
9854
|
},
|
|
9855
|
+
[PR_REVIEW_TYPE]: {
|
|
9856
|
+
name: PR_REVIEW_TYPE,
|
|
9857
|
+
inputSchema: PrReviewInput,
|
|
9858
|
+
outputSchema: PrReviewOutput,
|
|
9859
|
+
outputKind: "judgment",
|
|
9860
|
+
workspaceMode: "dedicated_worktree",
|
|
9861
|
+
workspaceScope: "attempt",
|
|
9862
|
+
sessionScope: "none",
|
|
9863
|
+
requiresReferences: false,
|
|
9864
|
+
validateInput: validatePrReviewInput,
|
|
9865
|
+
validateOutput: validatePrReviewOutput
|
|
9866
|
+
},
|
|
9734
9867
|
[CURATE_PACK_TYPE]: {
|
|
9735
9868
|
name: CURATE_PACK_TYPE,
|
|
9736
9869
|
inputSchema: CuratePackInput,
|
|
@@ -10086,133 +10219,6 @@ Type$1.Object({
|
|
|
10086
10219
|
additionalProperties: false
|
|
10087
10220
|
});
|
|
10088
10221
|
//#endregion
|
|
10089
|
-
//#region ../agent-runtime/src/subagent-output-contracts.ts
|
|
10090
|
-
var REGISTRY = /* @__PURE__ */ new Map();
|
|
10091
|
-
/**
|
|
10092
|
-
* Register a subagent output contract. Idempotent: re-registering the
|
|
10093
|
-
* same name with a different schema throws — contracts are meant to
|
|
10094
|
-
* be stable. Re-registering with the identical contract object (same
|
|
10095
|
-
* reference) is a no-op for HMR and test convenience.
|
|
10096
|
-
*
|
|
10097
|
-
* Typically called at module-init time alongside task-type
|
|
10098
|
-
* registration. See task-types/index.ts in @moltnet/tasks for the
|
|
10099
|
-
* conventional pattern.
|
|
10100
|
-
*/
|
|
10101
|
-
function registerSubagentOutputContract(contract) {
|
|
10102
|
-
if (!contract.name || contract.name.trim().length === 0) throw new Error("subagent output contract name is required");
|
|
10103
|
-
if (!/^[a-z][a-z0-9_]*$/.test(contract.name)) throw new Error(`subagent output contract name '${contract.name}' must be lower_snake_case (starts with a letter, then [a-z0-9_]+)`);
|
|
10104
|
-
const existing = REGISTRY.get(contract.name);
|
|
10105
|
-
if (existing && existing !== contract) {
|
|
10106
|
-
if (existing.parametersSchema !== contract.parametersSchema) throw new Error(`subagent output contract '${contract.name}' is already registered with a different schema; refusing to override`);
|
|
10107
|
-
}
|
|
10108
|
-
REGISTRY.set(contract.name, contract);
|
|
10109
|
-
}
|
|
10110
|
-
/**
|
|
10111
|
-
* Resolve a subagent output contract by name. Returns `null` for
|
|
10112
|
-
* unknown names — callers (the subagent custom tool) decide whether
|
|
10113
|
-
* that's a tool error the parent LLM can recover from or a hard fail.
|
|
10114
|
-
*/
|
|
10115
|
-
function getSubagentOutputContract(name) {
|
|
10116
|
-
return REGISTRY.get(name) ?? null;
|
|
10117
|
-
}
|
|
10118
|
-
/**
|
|
10119
|
-
* List all registered contracts. Useful for diagnostics and for the
|
|
10120
|
-
* subagent tool's parameter description so a parent LLM can see what
|
|
10121
|
-
* contracts are available without enumerating them in its prompt.
|
|
10122
|
-
*/
|
|
10123
|
-
function listSubagentOutputContracts() {
|
|
10124
|
-
return [...REGISTRY.values()];
|
|
10125
|
-
}
|
|
10126
|
-
//#endregion
|
|
10127
|
-
//#region ../agent-runtime/src/built-in-contract-registrations.ts
|
|
10128
|
-
/**
|
|
10129
|
-
* Built-in subagent output contracts (#1087, #943).
|
|
10130
|
-
*
|
|
10131
|
-
* Why this is an exported function and not a module-init side
|
|
10132
|
-
* effect:
|
|
10133
|
-
*
|
|
10134
|
-
* - The registry is process-global. Module-init registration
|
|
10135
|
-
* fires exactly once per Node process (ESM modules are cached
|
|
10136
|
-
* by URL). Tests that call `__resetSubagentOutputContractsForTests()`
|
|
10137
|
-
* to start from an empty registry have no way to repopulate
|
|
10138
|
-
* the built-ins without re-evaluating the module — which the
|
|
10139
|
-
* cache prevents. PR #1101 review M4.
|
|
10140
|
-
* - An explicit `registerBuiltInSubagentContracts()` lets the
|
|
10141
|
-
* package index call it once at module load AND lets test
|
|
10142
|
-
* setup hooks call it again after `__reset...`.
|
|
10143
|
-
* - `registerSubagentOutputContract` is itself idempotent for
|
|
10144
|
-
* identical re-registrations, so calling this function twice
|
|
10145
|
-
* in the same process is safe.
|
|
10146
|
-
*
|
|
10147
|
-
* Adding a new built-in: extend the body of this function. Do not
|
|
10148
|
-
* call `registerSubagentOutputContract` from anywhere else in the
|
|
10149
|
-
* package — keeping all built-ins in one function makes the set
|
|
10150
|
-
* auditable.
|
|
10151
|
-
*/
|
|
10152
|
-
function registerBuiltInSubagentContracts() {
|
|
10153
|
-
registerSubagentOutputContract({
|
|
10154
|
-
name: "judge_eval_variant_result",
|
|
10155
|
-
description: "Per-variant grading result produced by a subagent of judge_eval_variant: scores against the shared rubric, composite, and a 1-3 sentence verdict for a single variant.",
|
|
10156
|
-
parametersSchema: JudgeEvalVariantResult
|
|
10157
|
-
});
|
|
10158
|
-
}
|
|
10159
|
-
registerBuiltInSubagentContracts();
|
|
10160
|
-
//#endregion
|
|
10161
|
-
//#region ../agent-runtime/src/context-bindings.ts
|
|
10162
|
-
var PROMPT_SEPARATOR = "\n\n---\n\n";
|
|
10163
|
-
/**
|
|
10164
|
-
* Resolve `task.input.context[]` into delivered side-effects (skills
|
|
10165
|
-
* persisted via `deliver.skill`) and prompt fragments
|
|
10166
|
-
* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
|
|
10167
|
-
* built prompt.
|
|
10168
|
-
*
|
|
10169
|
-
* Per-binding semantics (V1):
|
|
10170
|
-
* - `skill` → `deliver.skill({ slug, content })` once per ref.
|
|
10171
|
-
* Slug collisions on distinct contents are
|
|
10172
|
-
* refused loudly.
|
|
10173
|
-
* - `prompt_prefix` → content appended to `systemPromptPrefix` with
|
|
10174
|
-
* the canonical `\n\n---\n\n` separator (in
|
|
10175
|
-
* declared order).
|
|
10176
|
-
* - `user_inline` → content appended to `userInlineSuffix` in
|
|
10177
|
-
* declared order, same separator.
|
|
10178
|
-
*
|
|
10179
|
-
* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
|
|
10180
|
-
* and the task's `inputCid` already pins the entire input. The imposer
|
|
10181
|
-
* chose these bytes; the resolver just dispatches them.
|
|
10182
|
-
*
|
|
10183
|
-
* The function is pure with respect to its arguments: file writes are
|
|
10184
|
-
* confined to the injected `deliver` callback, which makes the
|
|
10185
|
-
* resolver trivial to test.
|
|
10186
|
-
*/
|
|
10187
|
-
async function resolveTaskContext(args) {
|
|
10188
|
-
const promptParts = [];
|
|
10189
|
-
const userParts = [];
|
|
10190
|
-
const injected = [];
|
|
10191
|
-
const usedSlugs = /* @__PURE__ */ new Map();
|
|
10192
|
-
for (const ref of args.context) {
|
|
10193
|
-
if (ref.binding === "skill") {
|
|
10194
|
-
const prior = usedSlugs.get(ref.slug);
|
|
10195
|
-
if (prior !== void 0) {
|
|
10196
|
-
if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
|
|
10197
|
-
injected.push(ref);
|
|
10198
|
-
continue;
|
|
10199
|
-
}
|
|
10200
|
-
usedSlugs.set(ref.slug, ref.content);
|
|
10201
|
-
await args.deliver.skill({
|
|
10202
|
-
slug: ref.slug,
|
|
10203
|
-
content: ref.content
|
|
10204
|
-
});
|
|
10205
|
-
} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
|
|
10206
|
-
else userParts.push(ref.content);
|
|
10207
|
-
injected.push(ref);
|
|
10208
|
-
}
|
|
10209
|
-
return {
|
|
10210
|
-
injected,
|
|
10211
|
-
systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
|
|
10212
|
-
userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
|
|
10213
|
-
};
|
|
10214
|
-
}
|
|
10215
|
-
//#endregion
|
|
10216
10222
|
//#region ../agent-runtime/src/output-tools.ts
|
|
10217
10223
|
/**
|
|
10218
10224
|
* Submit-output tool contract.
|
|
@@ -10305,6 +10311,20 @@ function buildFinalOutputBlock(opts) {
|
|
|
10305
10311
|
return lines.join("\n");
|
|
10306
10312
|
}
|
|
10307
10313
|
//#endregion
|
|
10314
|
+
//#region ../agent-runtime/src/prompts/rubric-common.ts
|
|
10315
|
+
function renderRubricCriteriaList(rubric) {
|
|
10316
|
+
return rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
|
|
10317
|
+
}
|
|
10318
|
+
function renderRubricPreambleSection(rubric) {
|
|
10319
|
+
if (!rubric.preamble) return null;
|
|
10320
|
+
return [
|
|
10321
|
+
"### Rubric preamble",
|
|
10322
|
+
"",
|
|
10323
|
+
rubric.preamble,
|
|
10324
|
+
""
|
|
10325
|
+
].join("\n");
|
|
10326
|
+
}
|
|
10327
|
+
//#endregion
|
|
10308
10328
|
//#region ../agent-runtime/src/prompts/assess-brief.ts
|
|
10309
10329
|
/**
|
|
10310
10330
|
* Build the first user-message prompt for an `assess_brief` judge attempt.
|
|
@@ -10330,13 +10350,8 @@ function buildFinalOutputBlock(opts) {
|
|
|
10330
10350
|
*/
|
|
10331
10351
|
function buildAssessBriefUserPrompt(input, ctx) {
|
|
10332
10352
|
const rubric = input.successCriteria.rubric;
|
|
10333
|
-
const criteriaList = rubric
|
|
10334
|
-
const preambleSection = rubric
|
|
10335
|
-
"### Rubric preamble",
|
|
10336
|
-
"",
|
|
10337
|
-
rubric.preamble,
|
|
10338
|
-
""
|
|
10339
|
-
].join("\n") : "";
|
|
10353
|
+
const criteriaList = renderRubricCriteriaList(rubric);
|
|
10354
|
+
const preambleSection = renderRubricPreambleSection(rubric) ?? "";
|
|
10340
10355
|
const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10341
10356
|
"### Workspace",
|
|
10342
10357
|
"",
|
|
@@ -10617,13 +10632,7 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10617
10632
|
* is told to inspect them itself.
|
|
10618
10633
|
*/
|
|
10619
10634
|
function buildFulfillBriefUserPrompt(input, ctx) {
|
|
10620
|
-
const { brief, title,
|
|
10621
|
-
const criteriaSection = acceptanceCriteria?.length ? [
|
|
10622
|
-
"### Acceptance criteria",
|
|
10623
|
-
"",
|
|
10624
|
-
...acceptanceCriteria.map((c) => `- ${c}`),
|
|
10625
|
-
""
|
|
10626
|
-
].join("\n") : "";
|
|
10635
|
+
const { brief, title, seedFiles, scopeHint } = input;
|
|
10627
10636
|
const seedSection = seedFiles?.length ? [
|
|
10628
10637
|
"### Seed files",
|
|
10629
10638
|
"",
|
|
@@ -10671,7 +10680,6 @@ function buildFulfillBriefUserPrompt(input, ctx) {
|
|
|
10671
10680
|
"",
|
|
10672
10681
|
brief,
|
|
10673
10682
|
"",
|
|
10674
|
-
criteriaSection,
|
|
10675
10683
|
seedSection,
|
|
10676
10684
|
correlationSection,
|
|
10677
10685
|
workspaceSection,
|
|
@@ -10811,13 +10819,8 @@ function buildJudgeEvalVariantUserPrompt(input, ctx) {
|
|
|
10811
10819
|
function buildJudgePackUserPrompt(input, ctx) {
|
|
10812
10820
|
const { renderedPackId, sourcePackId, successCriteria } = input;
|
|
10813
10821
|
const rubric = successCriteria.rubric;
|
|
10814
|
-
const criteriaList = rubric
|
|
10815
|
-
const preambleSection = rubric
|
|
10816
|
-
"### Rubric preamble",
|
|
10817
|
-
"",
|
|
10818
|
-
rubric.preamble,
|
|
10819
|
-
""
|
|
10820
|
-
].join("\n") : null;
|
|
10822
|
+
const criteriaList = renderRubricCriteriaList(rubric);
|
|
10823
|
+
const preambleSection = renderRubricPreambleSection(rubric);
|
|
10821
10824
|
return [
|
|
10822
10825
|
"# Judge Pack Agent",
|
|
10823
10826
|
"",
|
|
@@ -10933,6 +10936,112 @@ function buildJudgePackUserPrompt(input, ctx) {
|
|
|
10933
10936
|
].filter((l) => l !== null).join("\n");
|
|
10934
10937
|
}
|
|
10935
10938
|
//#endregion
|
|
10939
|
+
//#region ../agent-runtime/src/prompts/pr-review.ts
|
|
10940
|
+
function buildPrReviewUserPrompt(input, ctx) {
|
|
10941
|
+
const rubric = input.successCriteria.rubric;
|
|
10942
|
+
const criteriaList = renderRubricCriteriaList(rubric);
|
|
10943
|
+
const preambleSection = renderRubricPreambleSection(rubric);
|
|
10944
|
+
const taskPromptSection = input.taskPrompt ? [
|
|
10945
|
+
"## Task-specific instructions",
|
|
10946
|
+
"",
|
|
10947
|
+
input.taskPrompt,
|
|
10948
|
+
""
|
|
10949
|
+
].join("\n") : "";
|
|
10950
|
+
const resourceSection = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? [
|
|
10951
|
+
"### Resources",
|
|
10952
|
+
"",
|
|
10953
|
+
...input.subject.resourceUrls.map((url) => `- ${url}`),
|
|
10954
|
+
""
|
|
10955
|
+
].join("\n") : "";
|
|
10956
|
+
const hintsSection = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? [
|
|
10957
|
+
"### Inspection hints",
|
|
10958
|
+
"",
|
|
10959
|
+
...input.subject.inspectionHints.map((hint) => `- ${hint}`),
|
|
10960
|
+
""
|
|
10961
|
+
].join("\n") : "";
|
|
10962
|
+
const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10963
|
+
"### Workspace",
|
|
10964
|
+
"",
|
|
10965
|
+
"This review attempt is running inside a dedicated disposable git",
|
|
10966
|
+
"worktree. Inspect and reason inside this workspace only.",
|
|
10967
|
+
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends.",
|
|
10968
|
+
""
|
|
10969
|
+
].join("\n") : "";
|
|
10970
|
+
return [
|
|
10971
|
+
"# Review Agent",
|
|
10972
|
+
"",
|
|
10973
|
+
"You are an independent judge. You did NOT produce the subject under review.",
|
|
10974
|
+
"Assess it strictly against the rubric below and emit a structured judgment.",
|
|
10975
|
+
"You may inspect the local workspace and the referenced resources, but do NOT modify anything.",
|
|
10976
|
+
"",
|
|
10977
|
+
`Your diary ID is: ${ctx.diaryId}`,
|
|
10978
|
+
`This task's id is: ${ctx.taskId}`,
|
|
10979
|
+
"",
|
|
10980
|
+
"## Subject",
|
|
10981
|
+
"",
|
|
10982
|
+
`**Title:** ${input.subject.title}`,
|
|
10983
|
+
"",
|
|
10984
|
+
input.subject.summary,
|
|
10985
|
+
"",
|
|
10986
|
+
resourceSection,
|
|
10987
|
+
hintsSection,
|
|
10988
|
+
workspaceSection,
|
|
10989
|
+
"### Execution contract",
|
|
10990
|
+
"",
|
|
10991
|
+
"Treat the provided subject, resources, inspection hints, and any",
|
|
10992
|
+
"task-specific instructions as the full",
|
|
10993
|
+
"review contract for this task.",
|
|
10994
|
+
"",
|
|
10995
|
+
"If the task-specific instructions or inspection hints require an outward action tied to the review",
|
|
10996
|
+
"(for example publishing the judgment somewhere), perform that action as",
|
|
10997
|
+
"part of the task before reporting structured output.",
|
|
10998
|
+
"",
|
|
10999
|
+
"## Review workflow",
|
|
11000
|
+
"",
|
|
11001
|
+
"1. Read the subject summary, resources, inspection hints, and any",
|
|
11002
|
+
" task-specific instructions before scoring.",
|
|
11003
|
+
"2. Inspect the target artefact directly using the tools and resources the",
|
|
11004
|
+
" task makes available.",
|
|
11005
|
+
"3. If you are in a dedicated disposable worktree and need the review target",
|
|
11006
|
+
" checked out locally, do that work inside this disposable workspace only.",
|
|
11007
|
+
"4. Apply the rubric strictly. This task is about complexity and",
|
|
11008
|
+
" reviewability, not correctness or feature desirability.",
|
|
11009
|
+
"5. Perform any required outward action before emitting the final",
|
|
11010
|
+
" structured output.",
|
|
11011
|
+
"",
|
|
11012
|
+
taskPromptSection,
|
|
11013
|
+
preambleSection,
|
|
11014
|
+
"## Criteria",
|
|
11015
|
+
"",
|
|
11016
|
+
criteriaList,
|
|
11017
|
+
"",
|
|
11018
|
+
"### Scoring rules",
|
|
11019
|
+
"",
|
|
11020
|
+
"- Every criterion uses binary scoring only.",
|
|
11021
|
+
"- Score `1` when the subject clearly clears the criterion.",
|
|
11022
|
+
"- Score `0` when it does not, or when the evidence is ambiguous.",
|
|
11023
|
+
"- `rationale` is REQUIRED for every score. Keep it concrete and audit-friendly.",
|
|
11024
|
+
"- Compute `composite = Σ(weight_i × score_i)` exactly; the runtime rejects mismatches.",
|
|
11025
|
+
"",
|
|
11026
|
+
"Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output.",
|
|
11027
|
+
"",
|
|
11028
|
+
buildFinalOutputBlock({
|
|
11029
|
+
taskType: "pr_review",
|
|
11030
|
+
outputSchemaName: "PrReviewOutput",
|
|
11031
|
+
shapeSketch: [
|
|
11032
|
+
"{",
|
|
11033
|
+
" \"scores\": [",
|
|
11034
|
+
" { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
|
|
11035
|
+
" ],",
|
|
11036
|
+
" \"composite\": <sum-of-weighted-binary-scores>,",
|
|
11037
|
+
" \"verdict\": \"<1-3 sentence overall>\"",
|
|
11038
|
+
"}"
|
|
11039
|
+
].join("\n"),
|
|
11040
|
+
extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
|
|
11041
|
+
})
|
|
11042
|
+
].filter(Boolean).join("\n");
|
|
11043
|
+
}
|
|
11044
|
+
//#endregion
|
|
10936
11045
|
//#region ../agent-runtime/src/prompts/render-pack.ts
|
|
10937
11046
|
/**
|
|
10938
11047
|
* Build the first user-message prompt for a `render_pack` task. Almost mechanical:
|
|
@@ -11115,6 +11224,16 @@ function buildTaskUserPrompt(task, ctx) {
|
|
|
11115
11224
|
diaryId: ctx.diaryId,
|
|
11116
11225
|
taskId: ctx.taskId
|
|
11117
11226
|
});
|
|
11227
|
+
case PR_REVIEW_TYPE:
|
|
11228
|
+
if (!Value.Check(PrReviewInput, task.input)) {
|
|
11229
|
+
const errors = [...Value.Errors(PrReviewInput, task.input)];
|
|
11230
|
+
throw new Error(`pr_review input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
11231
|
+
}
|
|
11232
|
+
return buildPrReviewUserPrompt(task.input, {
|
|
11233
|
+
diaryId: ctx.diaryId,
|
|
11234
|
+
taskId: ctx.taskId,
|
|
11235
|
+
workspace: ctx.workspace
|
|
11236
|
+
});
|
|
11118
11237
|
case JUDGE_EVAL_VARIANT_TYPE:
|
|
11119
11238
|
if (!Value.Check(JudgeEvalVariantInput, task.input)) {
|
|
11120
11239
|
const errors = [...Value.Errors(JudgeEvalVariantInput, task.input)];
|
|
@@ -14838,6 +14957,7 @@ var DEFAULT_SUBAGENT_TIMEOUT_MS = 300 * 1e3;
|
|
|
14838
14957
|
*/
|
|
14839
14958
|
function createSubagentTool(args) {
|
|
14840
14959
|
const buildSession = args.buildAgentSession ?? buildAgentSession;
|
|
14960
|
+
const { contractRegistry } = args;
|
|
14841
14961
|
let callCount = 0;
|
|
14842
14962
|
return {
|
|
14843
14963
|
tool: defineTool({
|
|
@@ -14848,8 +14968,8 @@ function createSubagentTool(args) {
|
|
|
14848
14968
|
async execute(_id, params) {
|
|
14849
14969
|
if (!Value.Check(SubagentToolParameters, params)) return toolError(`subagent: invalid parameters: ${JSON.stringify([...Value.Errors(SubagentToolParameters, params)].slice(0, 3))}`);
|
|
14850
14970
|
const { task, output_schema } = params;
|
|
14851
|
-
const contract =
|
|
14852
|
-
if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${
|
|
14971
|
+
const contract = contractRegistry.get(output_schema);
|
|
14972
|
+
if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${contractRegistry.list().map((c) => c.name).join(", ")}]`);
|
|
14853
14973
|
callCount += 1;
|
|
14854
14974
|
const callIndex = callCount;
|
|
14855
14975
|
let captured = null;
|
|
@@ -14879,6 +14999,7 @@ function createSubagentTool(args) {
|
|
|
14879
14999
|
});
|
|
14880
15000
|
const session = await buildSession({
|
|
14881
15001
|
mountPath: args.mountPath,
|
|
15002
|
+
cwdPath: args.cwdPath ?? args.mountPath,
|
|
14882
15003
|
piAuthDir: args.piAuthDir,
|
|
14883
15004
|
modelHandle: args.modelHandle,
|
|
14884
15005
|
agentName: args.agentName,
|
|
@@ -15219,6 +15340,7 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
|
|
|
15219
15340
|
const branch = executionPlan?.worktreeBranch ?? null;
|
|
15220
15341
|
if (!branch) return {
|
|
15221
15342
|
mountPath: requestedMountPath,
|
|
15343
|
+
cwdPath: requestedMountPath,
|
|
15222
15344
|
mode: "shared_mount",
|
|
15223
15345
|
branch: null,
|
|
15224
15346
|
cleanup: () => {}
|
|
@@ -15226,7 +15348,7 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
|
|
|
15226
15348
|
const mainRepo = findMainWorktree();
|
|
15227
15349
|
const worktreeDir = resolveTaskWorktreePath(mainRepo, executionPlan?.workspaceId ?? `task-${task.id}`);
|
|
15228
15350
|
const relMount = relative(mainRepo, requestedMountPath);
|
|
15229
|
-
const
|
|
15351
|
+
const cwdPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
|
|
15230
15352
|
const keepWorkspace = executionPlan?.workspaceScope === "session" && executionPlan.sessionKey !== null;
|
|
15231
15353
|
if (keepWorkspace) ensureReusableTaskWorktree(mainRepo, worktreeDir, branch);
|
|
15232
15354
|
else {
|
|
@@ -15234,7 +15356,8 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
|
|
|
15234
15356
|
addTaskWorktree(mainRepo, worktreeDir, branch);
|
|
15235
15357
|
}
|
|
15236
15358
|
return {
|
|
15237
|
-
mountPath,
|
|
15359
|
+
mountPath: mainRepo,
|
|
15360
|
+
cwdPath,
|
|
15238
15361
|
mode: "dedicated_worktree",
|
|
15239
15362
|
branch,
|
|
15240
15363
|
cleanup: keepWorkspace ? () => {} : () => {
|
|
@@ -15347,15 +15470,24 @@ var noopTurnEventHandler = () => {};
|
|
|
15347
15470
|
function createPiTaskExecutor(opts) {
|
|
15348
15471
|
let cachedCheckpoint = opts.checkpointPath ?? null;
|
|
15349
15472
|
return async (claimedTask, reporter) => {
|
|
15350
|
-
|
|
15351
|
-
|
|
15352
|
-
|
|
15353
|
-
|
|
15354
|
-
})
|
|
15473
|
+
const reporterWasOpened = !reporter.cancelSignal.aborted;
|
|
15474
|
+
if (reporterWasOpened) await reporter.open({
|
|
15475
|
+
taskId: claimedTask.task.id,
|
|
15476
|
+
attemptN: claimedTask.attemptN
|
|
15355
15477
|
});
|
|
15356
15478
|
return executePiTask(claimedTask, reporter, {
|
|
15357
15479
|
...opts,
|
|
15358
|
-
checkpointPath: cachedCheckpoint
|
|
15480
|
+
checkpointPath: cachedCheckpoint ?? void 0,
|
|
15481
|
+
resolveCheckpointPath: async () => {
|
|
15482
|
+
if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
|
|
15483
|
+
config: opts.sandboxConfig?.snapshot,
|
|
15484
|
+
onProgress: opts.onSnapshotProgress ?? ((m) => {
|
|
15485
|
+
process.stderr.write(`[snapshot] ${m}\n`);
|
|
15486
|
+
})
|
|
15487
|
+
});
|
|
15488
|
+
return cachedCheckpoint;
|
|
15489
|
+
},
|
|
15490
|
+
reporterAlreadyOpened: reporterWasOpened
|
|
15359
15491
|
});
|
|
15360
15492
|
};
|
|
15361
15493
|
}
|
|
@@ -15371,8 +15503,9 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15371
15503
|
const startTime = Date.now();
|
|
15372
15504
|
const requestedMountPath = opts.mountPath ?? process.cwd();
|
|
15373
15505
|
const executionPlan = opts.makeExecutionPlan?.(claimedTask) ?? null;
|
|
15374
|
-
|
|
15375
|
-
|
|
15506
|
+
let workspace = null;
|
|
15507
|
+
let mountPath = requestedMountPath;
|
|
15508
|
+
let cwdPath = requestedMountPath;
|
|
15376
15509
|
if (reporter.cancelSignal.aborted) return {
|
|
15377
15510
|
taskId: task.id,
|
|
15378
15511
|
attemptN,
|
|
@@ -15387,33 +15520,8 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15387
15520
|
retryable: false
|
|
15388
15521
|
}
|
|
15389
15522
|
};
|
|
15390
|
-
|
|
15391
|
-
config: opts.sandboxConfig?.snapshot,
|
|
15392
|
-
onProgress: opts.onSnapshotProgress ?? ((m) => {
|
|
15393
|
-
process.stderr.write(`[snapshot] ${m}\n`);
|
|
15394
|
-
})
|
|
15395
|
-
});
|
|
15396
|
-
const mainRepoForRepair = findMainWorktree();
|
|
15397
|
-
try {
|
|
15398
|
-
execFileSync("git", [
|
|
15399
|
-
"-C",
|
|
15400
|
-
mainRepoForRepair,
|
|
15401
|
-
"worktree",
|
|
15402
|
-
"repair",
|
|
15403
|
-
"--relative-paths"
|
|
15404
|
-
], { stdio: "pipe" });
|
|
15405
|
-
} catch {}
|
|
15523
|
+
let reporterOpen = opts.reporterAlreadyOpened ?? false;
|
|
15406
15524
|
let managed = null;
|
|
15407
|
-
managed = await resumeVm({
|
|
15408
|
-
checkpointPath,
|
|
15409
|
-
agentName: opts.agentName,
|
|
15410
|
-
mountPath,
|
|
15411
|
-
extraAllowedHosts: opts.extraAllowedHosts,
|
|
15412
|
-
sandboxConfig: opts.sandboxConfig
|
|
15413
|
-
});
|
|
15414
|
-
const diaryId = task.diaryId ?? "";
|
|
15415
|
-
const taskTeamId = task.teamId ?? "";
|
|
15416
|
-
let reporterOpen = false;
|
|
15417
15525
|
let session = null;
|
|
15418
15526
|
let subagentHandle = null;
|
|
15419
15527
|
const finalUsage = emptyUsage(opts.provider, opts.model);
|
|
@@ -15432,41 +15540,103 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15432
15540
|
retryable: false
|
|
15433
15541
|
}
|
|
15434
15542
|
});
|
|
15543
|
+
let onTurnEvent;
|
|
15544
|
+
if (opts.makeOnTurnEvent) try {
|
|
15545
|
+
onTurnEvent = opts.makeOnTurnEvent(claimedTask);
|
|
15546
|
+
} catch (err) {
|
|
15547
|
+
process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
15548
|
+
onTurnEvent = noopTurnEventHandler;
|
|
15549
|
+
}
|
|
15550
|
+
else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
|
|
15551
|
+
const emit = (kind, payload) => {
|
|
15552
|
+
try {
|
|
15553
|
+
onTurnEvent(kind, summarizePayloadForLog(kind, payload));
|
|
15554
|
+
} catch (err) {
|
|
15555
|
+
process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
|
|
15556
|
+
}
|
|
15557
|
+
return reporter.record({
|
|
15558
|
+
kind,
|
|
15559
|
+
payload
|
|
15560
|
+
});
|
|
15561
|
+
};
|
|
15562
|
+
const emitError = async (phase, message, extra = {}) => {
|
|
15563
|
+
await emit("error", {
|
|
15564
|
+
phase,
|
|
15565
|
+
message,
|
|
15566
|
+
...extra
|
|
15567
|
+
});
|
|
15568
|
+
};
|
|
15435
15569
|
try {
|
|
15436
|
-
|
|
15437
|
-
activateAgentEnv(managed.credentials.agentEnv, mainRepo);
|
|
15438
|
-
await reporter.open({
|
|
15570
|
+
if (!opts.reporterAlreadyOpened) await reporter.open({
|
|
15439
15571
|
taskId: task.id,
|
|
15440
15572
|
attemptN
|
|
15441
15573
|
});
|
|
15442
15574
|
reporterOpen = true;
|
|
15443
|
-
let
|
|
15444
|
-
|
|
15445
|
-
|
|
15575
|
+
let checkpointPath;
|
|
15576
|
+
try {
|
|
15577
|
+
checkpointPath = opts.checkpointPath ?? (opts.resolveCheckpointPath ? await opts.resolveCheckpointPath() : await ensureSnapshot({
|
|
15578
|
+
config: opts.sandboxConfig?.snapshot,
|
|
15579
|
+
onProgress: opts.onSnapshotProgress ?? ((m) => {
|
|
15580
|
+
process.stderr.write(`[snapshot] ${m}\n`);
|
|
15581
|
+
})
|
|
15582
|
+
}));
|
|
15583
|
+
} catch (err) {
|
|
15584
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
15585
|
+
await emitError("snapshot", message);
|
|
15586
|
+
return makeFailedOutput("snapshot_failed", message);
|
|
15587
|
+
}
|
|
15588
|
+
try {
|
|
15589
|
+
workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
|
|
15590
|
+
mountPath = workspace.mountPath;
|
|
15591
|
+
cwdPath = workspace.cwdPath;
|
|
15446
15592
|
} catch (err) {
|
|
15447
|
-
|
|
15448
|
-
|
|
15593
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
15594
|
+
await emitError("worktree_setup", message);
|
|
15595
|
+
return makeFailedOutput("worktree_setup_failed", message);
|
|
15449
15596
|
}
|
|
15450
|
-
|
|
15451
|
-
|
|
15597
|
+
try {
|
|
15598
|
+
const mainRepoForRepair = findMainWorktree();
|
|
15452
15599
|
try {
|
|
15453
|
-
|
|
15454
|
-
|
|
15455
|
-
|
|
15456
|
-
|
|
15457
|
-
|
|
15458
|
-
|
|
15459
|
-
|
|
15600
|
+
execFileSync("git", [
|
|
15601
|
+
"-C",
|
|
15602
|
+
mainRepoForRepair,
|
|
15603
|
+
"worktree",
|
|
15604
|
+
"repair",
|
|
15605
|
+
"--relative-paths"
|
|
15606
|
+
], { stdio: "pipe" });
|
|
15607
|
+
} catch {}
|
|
15608
|
+
} catch (err) {
|
|
15609
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
15610
|
+
await emitError("worktree_setup", message);
|
|
15611
|
+
return makeFailedOutput("worktree_setup_failed", message);
|
|
15612
|
+
}
|
|
15613
|
+
try {
|
|
15614
|
+
managed = await resumeVm({
|
|
15615
|
+
checkpointPath,
|
|
15616
|
+
agentName: opts.agentName,
|
|
15617
|
+
mountPath,
|
|
15618
|
+
extraAllowedHosts: opts.extraAllowedHosts,
|
|
15619
|
+
sandboxConfig: opts.sandboxConfig
|
|
15460
15620
|
});
|
|
15461
|
-
}
|
|
15621
|
+
} catch (err) {
|
|
15622
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
15623
|
+
await emitError("vm_resume", message);
|
|
15624
|
+
return makeFailedOutput("vm_resume_failed", message);
|
|
15625
|
+
}
|
|
15626
|
+
const diaryId = task.diaryId ?? "";
|
|
15627
|
+
const taskTeamId = task.teamId ?? "";
|
|
15628
|
+
const mainRepo = findMainWorktree();
|
|
15629
|
+
activateAgentEnv(managed.credentials.agentEnv, mainRepo);
|
|
15630
|
+
const activeWorkspace = workspace;
|
|
15631
|
+
if (!activeWorkspace) throw new Error("task workspace not prepared");
|
|
15462
15632
|
await emit("info", {
|
|
15463
15633
|
event: "execute_start",
|
|
15464
15634
|
taskType: task.taskType,
|
|
15465
15635
|
teamId: task.teamId,
|
|
15466
15636
|
provider: opts.provider,
|
|
15467
15637
|
model: opts.model,
|
|
15468
|
-
workspaceMode:
|
|
15469
|
-
workspaceBranch:
|
|
15638
|
+
workspaceMode: activeWorkspace.mode,
|
|
15639
|
+
workspaceBranch: activeWorkspace.branch
|
|
15470
15640
|
});
|
|
15471
15641
|
let taskPrompt;
|
|
15472
15642
|
try {
|
|
@@ -15474,8 +15644,8 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15474
15644
|
diaryId,
|
|
15475
15645
|
taskId: task.id,
|
|
15476
15646
|
workspace: {
|
|
15477
|
-
mode:
|
|
15478
|
-
branch:
|
|
15647
|
+
mode: activeWorkspace.mode,
|
|
15648
|
+
branch: activeWorkspace.branch
|
|
15479
15649
|
},
|
|
15480
15650
|
extras: opts.promptExtras
|
|
15481
15651
|
});
|
|
@@ -15527,7 +15697,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15527
15697
|
getTeamId: () => taskTeamId,
|
|
15528
15698
|
getSessionErrors: () => [],
|
|
15529
15699
|
clearSessionErrors: () => {},
|
|
15530
|
-
getHostCwd: () =>
|
|
15700
|
+
getHostCwd: () => cwdPath,
|
|
15531
15701
|
hostExecBaseEnv: new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]),
|
|
15532
15702
|
hostExecAutoApprove: opts.hostExecAutoApprove ?? opts.sandboxConfig?.hostExec?.autoApprove ?? false,
|
|
15533
15703
|
getTaskContext: () => ({
|
|
@@ -15555,6 +15725,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15555
15725
|
if (taskTypeUsesSubagents(task.taskType)) {
|
|
15556
15726
|
subagentHandle = createSubagentTool({
|
|
15557
15727
|
mountPath,
|
|
15728
|
+
cwdPath,
|
|
15558
15729
|
piAuthDir,
|
|
15559
15730
|
modelHandle,
|
|
15560
15731
|
agentName: opts.agentName,
|
|
@@ -15563,12 +15734,14 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15563
15734
|
parentTaskId: task.id,
|
|
15564
15735
|
parentTaskType: task.taskType,
|
|
15565
15736
|
parentAttemptN: attemptN,
|
|
15737
|
+
contractRegistry: opts.subagentContractRegistry,
|
|
15566
15738
|
parentCancelSignal: reporter.cancelSignal
|
|
15567
15739
|
});
|
|
15568
15740
|
parentSubagentTools.push(subagentHandle.tool);
|
|
15569
15741
|
}
|
|
15570
15742
|
session = await buildAgentSession({
|
|
15571
15743
|
mountPath,
|
|
15744
|
+
cwdPath,
|
|
15572
15745
|
piAuthDir,
|
|
15573
15746
|
modelHandle,
|
|
15574
15747
|
agentName: opts.agentName,
|
|
@@ -15653,6 +15826,10 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15653
15826
|
is_error: event.isError,
|
|
15654
15827
|
result: event.isError ? truncateForWire(event.result) : void 0
|
|
15655
15828
|
}));
|
|
15829
|
+
if (event.isError) track(emitError("tool_call_error", describeToolErrorMessage(event.result), {
|
|
15830
|
+
tool: event.toolName,
|
|
15831
|
+
result: truncateForWire(event.result)
|
|
15832
|
+
}));
|
|
15656
15833
|
if (maxBashTimeouts > 0 && event.toolName === "bash" && event.isError && isBashTimeoutResult(event.result)) {
|
|
15657
15834
|
bashTimeoutCount += 1;
|
|
15658
15835
|
if (bashTimeoutCount >= maxBashTimeouts) triggerCapAbort("max_bash_timeouts_exceeded", `Aborted after ${bashTimeoutCount} bash timeouts in this attempt (cap ${maxBashTimeouts}).`);
|
|
@@ -15808,7 +15985,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15808
15985
|
}
|
|
15809
15986
|
}
|
|
15810
15987
|
if (managed) await managed.vm.close();
|
|
15811
|
-
try {
|
|
15988
|
+
if (workspace) try {
|
|
15812
15989
|
workspace.cleanup();
|
|
15813
15990
|
} catch (err) {
|
|
15814
15991
|
const detail = err instanceof Error ? err.message : String(err);
|
|
@@ -15919,6 +16096,23 @@ function truncateForWire(value) {
|
|
|
15919
16096
|
};
|
|
15920
16097
|
}
|
|
15921
16098
|
}
|
|
16099
|
+
function describeToolErrorMessage(result) {
|
|
16100
|
+
if (typeof result === "string" && result.trim().length > 0) return result.trim();
|
|
16101
|
+
if (result && typeof result === "object") {
|
|
16102
|
+
const content = result.content;
|
|
16103
|
+
if (Array.isArray(content)) {
|
|
16104
|
+
for (const item of content) if (item && typeof item === "object" && typeof item.text === "string") {
|
|
16105
|
+
const text = item.text.trim();
|
|
16106
|
+
if (text.length > 0) return text;
|
|
16107
|
+
}
|
|
16108
|
+
}
|
|
16109
|
+
}
|
|
16110
|
+
try {
|
|
16111
|
+
return JSON.stringify(truncateForWire(result));
|
|
16112
|
+
} catch {
|
|
16113
|
+
return "Tool call failed";
|
|
16114
|
+
}
|
|
16115
|
+
}
|
|
15922
16116
|
//#endregion
|
|
15923
16117
|
//#region src/index.ts
|
|
15924
16118
|
/**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@themoltnet/pi-extension",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.18.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
|
|
6
6
|
"license": "MIT",
|
|
@@ -31,8 +31,8 @@
|
|
|
31
31
|
"@earendil-works/gondolin": "^0.9.1",
|
|
32
32
|
"@opentelemetry/api": "^1.9.0",
|
|
33
33
|
"@sinclair/typebox": "^0.34.0",
|
|
34
|
-
"@themoltnet/
|
|
35
|
-
"@themoltnet/
|
|
34
|
+
"@themoltnet/agent-runtime": "0.16.0",
|
|
35
|
+
"@themoltnet/sdk": "0.102.0"
|
|
36
36
|
},
|
|
37
37
|
"peerDependencies": {
|
|
38
38
|
"@earendil-works/pi-coding-agent": ">=0.74.0",
|