@themoltnet/pi-extension 0.16.2 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +114 -6
- package/dist/index.js +340 -319
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -19,6 +19,7 @@ import { TObject } from '@sinclair/typebox';
|
|
|
19
19
|
import { ToolDefinition } from '@earendil-works/pi-coding-agent';
|
|
20
20
|
import { TOptional } from '@sinclair/typebox';
|
|
21
21
|
import { TRecord } from '@sinclair/typebox';
|
|
22
|
+
import { TSchema } from '@sinclair/typebox';
|
|
22
23
|
import { TString } from '@sinclair/typebox';
|
|
23
24
|
import { TUnion } from '@sinclair/typebox';
|
|
24
25
|
import { TUnknown } from '@sinclair/typebox';
|
|
@@ -32,15 +33,19 @@ import { WriteOperations } from '@earendil-works/pi-coding-agent';
|
|
|
32
33
|
export declare function activateAgentEnv(agentEnv: Record<string, string | undefined>, repoRoot: string): void;
|
|
33
34
|
|
|
34
35
|
/**
|
|
35
|
-
* Construct an
|
|
36
|
-
*
|
|
37
|
-
*
|
|
36
|
+
* Construct an `AgentSession`. By default it is in-memory; callers may opt
|
|
37
|
+
* parent sessions into daemon-owned file persistence via `sessionPersistence`.
|
|
38
|
+
* The caller is responsible for eventually invoking `session.prompt(...)` and
|
|
39
|
+
* for tearing down — the helper does no lifecycle management beyond
|
|
40
|
+
* construction.
|
|
38
41
|
*/
|
|
39
42
|
export declare function buildAgentSession(args: BuildAgentSessionArgs): Promise<AgentSession>;
|
|
40
43
|
|
|
41
44
|
declare interface BuildAgentSessionArgs {
|
|
42
45
|
/** Host directory mounted at /workspace inside the VM. */
|
|
43
46
|
mountPath: string;
|
|
47
|
+
/** Host working directory where the agent session should start. */
|
|
48
|
+
cwdPath: string;
|
|
44
49
|
/** pi auth directory (resolved from `PI_CODING_AGENT_DIR` or `~/.pi/agent`). */
|
|
45
50
|
piAuthDir: string;
|
|
46
51
|
/** Resolved pi model handle (provider + model id). */
|
|
@@ -56,6 +61,13 @@ declare interface BuildAgentSessionArgs {
|
|
|
56
61
|
otelSpanAttrs: Record<string, string | number | boolean>;
|
|
57
62
|
/** Agent name for `gen_ai.agent.name` on the root span. */
|
|
58
63
|
agentName: string;
|
|
64
|
+
/**
|
|
65
|
+
* Parent sessions may persist their conversation history in a daemon-owned
|
|
66
|
+
* directory. Subagents should leave this unset and stay in-memory.
|
|
67
|
+
*/
|
|
68
|
+
sessionPersistence?: {
|
|
69
|
+
sessionDir: string;
|
|
70
|
+
};
|
|
59
71
|
}
|
|
60
72
|
|
|
61
73
|
declare interface ClaimedTask {
|
|
@@ -124,6 +136,8 @@ export declare function createSubagentTool(args: CreateSubagentToolArgs): Subage
|
|
|
124
136
|
export declare interface CreateSubagentToolArgs {
|
|
125
137
|
/** Host directory mounted at /workspace inside the VM. */
|
|
126
138
|
mountPath: string;
|
|
139
|
+
/** Host working directory the subagent should start in. Defaults to mountPath. */
|
|
140
|
+
cwdPath?: string;
|
|
127
141
|
/** pi auth directory the parent resolved. */
|
|
128
142
|
piAuthDir: string;
|
|
129
143
|
/** Resolved pi model handle — subagents share it. */
|
|
@@ -179,6 +193,16 @@ export declare interface CreateSubagentToolArgs {
|
|
|
179
193
|
* exercise the tool's logic without booting a VM.
|
|
180
194
|
*/
|
|
181
195
|
buildAgentSession?: (args: BuildAgentSessionArgs) => Promise<AgentSession>;
|
|
196
|
+
/**
|
|
197
|
+
* Contract registry for resolving output_schema names to TypeBox
|
|
198
|
+
* schemas at call time. The subagent tool reads ONLY via `.get()`
|
|
199
|
+
* and `.list()` — the registry is immutable after construction.
|
|
200
|
+
*
|
|
201
|
+
* Production callers (executePiTask) create the registry with
|
|
202
|
+
* built-in contracts at session-setup; tests inject a registry
|
|
203
|
+
* with whatever stubs they need.
|
|
204
|
+
*/
|
|
205
|
+
contractRegistry: SubagentContractRegistry;
|
|
182
206
|
}
|
|
183
207
|
|
|
184
208
|
/**
|
|
@@ -231,6 +255,17 @@ export declare interface ExecutePiTaskOptions {
|
|
|
231
255
|
* across tasks.
|
|
232
256
|
*/
|
|
233
257
|
checkpointPath?: string;
|
|
258
|
+
/**
|
|
259
|
+
* Lazy checkpoint resolver used by `createPiTaskExecutor` so snapshot
|
|
260
|
+
* creation can happen after the reporter has been opened and can surface
|
|
261
|
+
* setup failures as task messages.
|
|
262
|
+
*/
|
|
263
|
+
resolveCheckpointPath?: () => Promise<string>;
|
|
264
|
+
/**
|
|
265
|
+
* Set when the caller already opened the reporter before handing control
|
|
266
|
+
* to `executePiTask`.
|
|
267
|
+
*/
|
|
268
|
+
reporterAlreadyOpened?: boolean;
|
|
234
269
|
/**
|
|
235
270
|
* Optional callback invoked alongside every `reporter.record()` so
|
|
236
271
|
* the daemon can mirror task messages into its local logger.
|
|
@@ -277,6 +312,19 @@ export declare interface ExecutePiTaskOptions {
|
|
|
277
312
|
* after HOST_EXEC_ALLOWED; an array limits auto-approval to matching rules.
|
|
278
313
|
*/
|
|
279
314
|
hostExecAutoApprove?: HostExecAutoApproveConfig;
|
|
315
|
+
/**
|
|
316
|
+
* Optional daemon-supplied execution plan. Keeps task semantics out of
|
|
317
|
+
* `pi-extension` while still letting callers opt into stable worktrees and
|
|
318
|
+
* file-backed Pi sessions for selected task classes.
|
|
319
|
+
*/
|
|
320
|
+
makeExecutionPlan?: PiTaskExecutionPlanFactory;
|
|
321
|
+
/**
|
|
322
|
+
* Immutable subagent contract registry used to resolve `output_schema`
|
|
323
|
+
* names at subagent tool call time. Constructed by the daemon (or
|
|
324
|
+
* tests) from static built-in schemas — `execute-pi-task` never hardcodes
|
|
325
|
+
* contracts. See #1106.
|
|
326
|
+
*/
|
|
327
|
+
subagentContractRegistry?: SubagentContractRegistry;
|
|
280
328
|
}
|
|
281
329
|
|
|
282
330
|
/**
|
|
@@ -415,6 +463,42 @@ export declare interface PiOtelOptions {
|
|
|
415
463
|
spanAttributes?: Record<string, string | number | boolean>;
|
|
416
464
|
}
|
|
417
465
|
|
|
466
|
+
export declare interface PiSessionPersistencePlan {
|
|
467
|
+
sessionDir: string;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
export declare interface PiTaskExecutionPlan {
|
|
471
|
+
/**
|
|
472
|
+
* Daemon-local reuse key. When set alongside `workspaceScope: 'session'`,
|
|
473
|
+
* dedicated worktrees may be retained and reopened across related tasks.
|
|
474
|
+
*/
|
|
475
|
+
sessionKey: string | null;
|
|
476
|
+
/**
|
|
477
|
+
* Workspace identity selected by the daemon. `null` means the task should
|
|
478
|
+
* run against the shared mount path.
|
|
479
|
+
*/
|
|
480
|
+
workspaceId: string | null;
|
|
481
|
+
/**
|
|
482
|
+
* Branch to create or reopen for the workspace. `null` means no dedicated
|
|
483
|
+
* worktree is required.
|
|
484
|
+
*/
|
|
485
|
+
worktreeBranch: string | null;
|
|
486
|
+
/**
|
|
487
|
+
* Lifetime of the task workspace from the daemon's point of view.
|
|
488
|
+
* `attempt` = disposable; `session` = keep stable for the reuse key.
|
|
489
|
+
*/
|
|
490
|
+
workspaceScope: 'attempt' | 'session';
|
|
491
|
+
/**
|
|
492
|
+
* Optional location for file-backed Pi session history. When omitted,
|
|
493
|
+
* the executor keeps the conversation in memory for this attempt only.
|
|
494
|
+
*/
|
|
495
|
+
sessionPersistence?: PiSessionPersistencePlan | null;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
export declare type PiTaskExecutionPlanFactory = (claimedTask: ClaimedTask) => PiTaskExecutionPlan | null;
|
|
499
|
+
|
|
500
|
+
export declare function resolveTaskWorktreePath(mainRepo: string, workspaceId: string): string;
|
|
501
|
+
|
|
418
502
|
/**
|
|
419
503
|
* Resume a VM from a checkpoint, inject credentials, configure egress +
|
|
420
504
|
* TLS. Returns the managed VM handle.
|
|
@@ -476,6 +560,29 @@ export declare interface SandboxConfig {
|
|
|
476
560
|
/** Extract snapshot-specific config for backwards compat with ensureSnapshot. */
|
|
477
561
|
export declare type SnapshotConfig = NonNullable<SandboxConfig['snapshot']>;
|
|
478
562
|
|
|
563
|
+
declare interface SubagentContractRegistry {
|
|
564
|
+
/** Resolve a contract by name. Returns `null` for unknown names. */
|
|
565
|
+
get(name: string): SubagentOutputContract | null;
|
|
566
|
+
/** List all registered contracts. */
|
|
567
|
+
list(): SubagentOutputContract[];
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
declare interface SubagentOutputContract {
|
|
571
|
+
/** Stable identifier the parent uses to reference this contract.
|
|
572
|
+
* Lower-snake-case by convention (e.g. `judge_eval_variant_result`). */
|
|
573
|
+
readonly name: string;
|
|
574
|
+
/** Human-readable description shown in the subagent tool's help text
|
|
575
|
+
* and in the inner session's submit-tool description. Useful when a
|
|
576
|
+
* parent LLM has multiple contracts to choose from. */
|
|
577
|
+
readonly description: string;
|
|
578
|
+
/**
|
|
579
|
+
* TypeBox schema the subagent's submit-tool args MUST validate
|
|
580
|
+
* against. The args ARE the output payload (no `{ output: ... }`
|
|
581
|
+
* wrapping), so the LLM gets field-level guidance directly.
|
|
582
|
+
*/
|
|
583
|
+
readonly parametersSchema: TSchema;
|
|
584
|
+
}
|
|
585
|
+
|
|
479
586
|
export declare interface SubagentToolHandle {
|
|
480
587
|
/** ToolDefinition to register via `customTools` on the parent session. */
|
|
481
588
|
readonly tool: ToolDefinition;
|
|
@@ -718,9 +825,10 @@ export declare interface VmCredentials {
|
|
|
718
825
|
agentEnvRaw: string;
|
|
719
826
|
/**
|
|
720
827
|
* Pi OAuth/API-key auth blob. Null when neither `~/.pi/agent/auth.json`
|
|
721
|
-
* (
|
|
722
|
-
* relies on Pi's env-var providers (`ANTHROPIC_API_KEY`,
|
|
723
|
-
* via `agentEnv` and the host environment instead. CI uses
|
|
828
|
+
* (resolved via `PI_CODING_AGENT_DIR` when set) is present — in that
|
|
829
|
+
* case the daemon relies on Pi's env-var providers (`ANTHROPIC_API_KEY`,
|
|
830
|
+
* etc.) carried via `agentEnv` and the host environment instead. CI uses
|
|
831
|
+
* this path.
|
|
724
832
|
*/
|
|
725
833
|
piAuthJson: string | null;
|
|
726
834
|
agentEnv: Record<string, string | undefined>;
|
package/dist/index.js
CHANGED
|
@@ -8133,7 +8133,8 @@ function findMainWorktree() {
|
|
|
8133
8133
|
function loadCredentials(agentDir) {
|
|
8134
8134
|
const moltnetJson = readFileSync(path.join(agentDir, "moltnet.json"), "utf8");
|
|
8135
8135
|
const agentEnvRaw = readFileSync(path.join(agentDir, "env"), "utf8");
|
|
8136
|
-
const
|
|
8136
|
+
const piAgentDir = process.env.PI_CODING_AGENT_DIR ?? path.join(process.env.HOME ?? "", ".pi", "agent");
|
|
8137
|
+
const piAuthPath = path.join(piAgentDir, "auth.json");
|
|
8137
8138
|
const piAuthJson = existsSync(piAuthPath) ? readFileSync(piAuthPath, "utf8") : null;
|
|
8138
8139
|
const gitconfigPath = path.join(agentDir, "gitconfig");
|
|
8139
8140
|
const gitconfig = existsSync(gitconfigPath) ? readFileSync(gitconfigPath, "utf8") : null;
|
|
@@ -8633,9 +8634,11 @@ var NO_SKILLS = () => ({
|
|
|
8633
8634
|
diagnostics: []
|
|
8634
8635
|
});
|
|
8635
8636
|
/**
|
|
8636
|
-
* Construct an
|
|
8637
|
-
*
|
|
8638
|
-
*
|
|
8637
|
+
* Construct an `AgentSession`. By default it is in-memory; callers may opt
|
|
8638
|
+
* parent sessions into daemon-owned file persistence via `sessionPersistence`.
|
|
8639
|
+
* The caller is responsible for eventually invoking `session.prompt(...)` and
|
|
8640
|
+
* for tearing down — the helper does no lifecycle management beyond
|
|
8641
|
+
* construction.
|
|
8639
8642
|
*/
|
|
8640
8643
|
async function buildAgentSession(args) {
|
|
8641
8644
|
const piOtelExtension = createPiOtelExtension({
|
|
@@ -8643,22 +8646,85 @@ async function buildAgentSession(args) {
|
|
|
8643
8646
|
spanAttributes: args.otelSpanAttrs
|
|
8644
8647
|
});
|
|
8645
8648
|
const resourceLoader = new DefaultResourceLoader({
|
|
8646
|
-
cwd: args.
|
|
8649
|
+
cwd: args.cwdPath,
|
|
8647
8650
|
agentDir: args.piAuthDir,
|
|
8648
8651
|
extensionFactories: [piOtelExtension],
|
|
8649
8652
|
appendSystemPrompt: args.appendSystemPrompt,
|
|
8650
8653
|
skillsOverride: args.skillsOverride ?? NO_SKILLS
|
|
8651
8654
|
});
|
|
8652
8655
|
await resourceLoader.reload();
|
|
8656
|
+
const sessionManager = args.sessionPersistence ? await resolvePersistentSessionManager({
|
|
8657
|
+
cwd: args.cwdPath,
|
|
8658
|
+
sessionDir: args.sessionPersistence.sessionDir
|
|
8659
|
+
}) : SessionManager.inMemory(args.cwdPath);
|
|
8653
8660
|
return (await createAgentSession({
|
|
8654
8661
|
agentDir: args.piAuthDir,
|
|
8655
|
-
cwd: args.
|
|
8662
|
+
cwd: args.cwdPath,
|
|
8656
8663
|
model: args.modelHandle,
|
|
8657
8664
|
customTools: args.customTools,
|
|
8658
|
-
sessionManager
|
|
8665
|
+
sessionManager,
|
|
8659
8666
|
resourceLoader
|
|
8660
8667
|
})).session;
|
|
8661
8668
|
}
|
|
8669
|
+
async function resolvePersistentSessionManager(args) {
|
|
8670
|
+
await SessionManager.list(args.cwd, args.sessionDir);
|
|
8671
|
+
return SessionManager.continueRecent(args.cwd, args.sessionDir);
|
|
8672
|
+
}
|
|
8673
|
+
//#endregion
|
|
8674
|
+
//#region ../agent-runtime/src/context-bindings.ts
|
|
8675
|
+
var PROMPT_SEPARATOR = "\n\n---\n\n";
|
|
8676
|
+
/**
|
|
8677
|
+
* Resolve `task.input.context[]` into delivered side-effects (skills
|
|
8678
|
+
* persisted via `deliver.skill`) and prompt fragments
|
|
8679
|
+
* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
|
|
8680
|
+
* built prompt.
|
|
8681
|
+
*
|
|
8682
|
+
* Per-binding semantics (V1):
|
|
8683
|
+
* - `skill` → `deliver.skill({ slug, content })` once per ref.
|
|
8684
|
+
* Slug collisions on distinct contents are
|
|
8685
|
+
* refused loudly.
|
|
8686
|
+
* - `prompt_prefix` → content appended to `systemPromptPrefix` with
|
|
8687
|
+
* the canonical `\n\n---\n\n` separator (in
|
|
8688
|
+
* declared order).
|
|
8689
|
+
* - `user_inline` → content appended to `userInlineSuffix` in
|
|
8690
|
+
* declared order, same separator.
|
|
8691
|
+
*
|
|
8692
|
+
* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
|
|
8693
|
+
* and the task's `inputCid` already pins the entire input. The imposer
|
|
8694
|
+
* chose these bytes; the resolver just dispatches them.
|
|
8695
|
+
*
|
|
8696
|
+
* The function is pure with respect to its arguments: file writes are
|
|
8697
|
+
* confined to the injected `deliver` callback, which makes the
|
|
8698
|
+
* resolver trivial to test.
|
|
8699
|
+
*/
|
|
8700
|
+
async function resolveTaskContext(args) {
|
|
8701
|
+
const promptParts = [];
|
|
8702
|
+
const userParts = [];
|
|
8703
|
+
const injected = [];
|
|
8704
|
+
const usedSlugs = /* @__PURE__ */ new Map();
|
|
8705
|
+
for (const ref of args.context) {
|
|
8706
|
+
if (ref.binding === "skill") {
|
|
8707
|
+
const prior = usedSlugs.get(ref.slug);
|
|
8708
|
+
if (prior !== void 0) {
|
|
8709
|
+
if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
|
|
8710
|
+
injected.push(ref);
|
|
8711
|
+
continue;
|
|
8712
|
+
}
|
|
8713
|
+
usedSlugs.set(ref.slug, ref.content);
|
|
8714
|
+
await args.deliver.skill({
|
|
8715
|
+
slug: ref.slug,
|
|
8716
|
+
content: ref.content
|
|
8717
|
+
});
|
|
8718
|
+
} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
|
|
8719
|
+
else userParts.push(ref.content);
|
|
8720
|
+
injected.push(ref);
|
|
8721
|
+
}
|
|
8722
|
+
return {
|
|
8723
|
+
injected,
|
|
8724
|
+
systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
|
|
8725
|
+
userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
|
|
8726
|
+
};
|
|
8727
|
+
}
|
|
8662
8728
|
//#endregion
|
|
8663
8729
|
//#region ../tasks/src/formats.ts
|
|
8664
8730
|
/**
|
|
@@ -8831,10 +8897,10 @@ function validateRubricWeights(rubric) {
|
|
|
8831
8897
|
* complementary places.
|
|
8832
8898
|
*
|
|
8833
8899
|
* Before this envelope existed, criteria were scattered: a vestigial
|
|
8834
|
-
* `criteriaCid` column nobody resolved,
|
|
8835
|
-
*
|
|
8836
|
-
*
|
|
8837
|
-
*
|
|
8900
|
+
* `criteriaCid` column nobody resolved, free-form prose on
|
|
8901
|
+
* `fulfill_brief.input`, and inline `rubric` / `criteria[]` fields on
|
|
8902
|
+
* judgment-task inputs. None of those were machine-verifiable
|
|
8903
|
+
* end-to-end.
|
|
8838
8904
|
*
|
|
8839
8905
|
* This module defines a single, content-addressable envelope an imposer
|
|
8840
8906
|
* attaches to any task type. It has four orthogonal sections — pick
|
|
@@ -9130,7 +9196,6 @@ var FULFILL_BRIEF_TYPE = "fulfill_brief";
|
|
|
9130
9196
|
var FulfillBriefInput = Type$1.Object({
|
|
9131
9197
|
brief: Type$1.String({ minLength: 1 }),
|
|
9132
9198
|
title: Type$1.Optional(Type$1.String()),
|
|
9133
|
-
acceptanceCriteria: Type$1.Optional(Type$1.Array(Type$1.String())),
|
|
9134
9199
|
successCriteria: Type$1.Optional(SuccessCriteria),
|
|
9135
9200
|
seedFiles: Type$1.Optional(Type$1.Array(Type$1.String())),
|
|
9136
9201
|
scopeHint: Type$1.Optional(Type$1.String())
|
|
@@ -9702,7 +9767,10 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9702
9767
|
inputSchema: FulfillBriefInput,
|
|
9703
9768
|
outputSchema: FulfillBriefOutput,
|
|
9704
9769
|
outputKind: "artifact",
|
|
9770
|
+
resumable: true,
|
|
9705
9771
|
workspaceMode: "dedicated_worktree",
|
|
9772
|
+
workspaceScope: "session",
|
|
9773
|
+
sessionScope: "correlation",
|
|
9706
9774
|
requiresReferences: false,
|
|
9707
9775
|
validateOutput: requireVerificationWhenCriteriaPresent
|
|
9708
9776
|
},
|
|
@@ -9712,6 +9780,8 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9712
9780
|
outputSchema: AssessBriefOutput,
|
|
9713
9781
|
outputKind: "judgment",
|
|
9714
9782
|
workspaceMode: "dedicated_worktree",
|
|
9783
|
+
workspaceScope: "attempt",
|
|
9784
|
+
sessionScope: "none",
|
|
9715
9785
|
requiresReferences: true,
|
|
9716
9786
|
validateInput: validateJudgmentInput,
|
|
9717
9787
|
validateInputAsync: validateAssessBriefInputAsync
|
|
@@ -9721,6 +9791,8 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9721
9791
|
inputSchema: CuratePackInput,
|
|
9722
9792
|
outputSchema: CuratePackOutput,
|
|
9723
9793
|
outputKind: "artifact",
|
|
9794
|
+
workspaceScope: "attempt",
|
|
9795
|
+
sessionScope: "none",
|
|
9724
9796
|
requiresReferences: false,
|
|
9725
9797
|
validateOutput: requireVerificationWhenCriteriaPresent
|
|
9726
9798
|
},
|
|
@@ -9729,6 +9801,8 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9729
9801
|
inputSchema: RenderPackInput,
|
|
9730
9802
|
outputSchema: RenderPackOutput,
|
|
9731
9803
|
outputKind: "artifact",
|
|
9804
|
+
workspaceScope: "attempt",
|
|
9805
|
+
sessionScope: "none",
|
|
9732
9806
|
requiresReferences: false,
|
|
9733
9807
|
validateOutput: requireVerificationWhenCriteriaPresent,
|
|
9734
9808
|
validateInputAsync: validateRenderPackInputAsync
|
|
@@ -9738,6 +9812,8 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9738
9812
|
inputSchema: JudgePackInput,
|
|
9739
9813
|
outputSchema: JudgePackOutput,
|
|
9740
9814
|
outputKind: "judgment",
|
|
9815
|
+
workspaceScope: "attempt",
|
|
9816
|
+
sessionScope: "none",
|
|
9741
9817
|
requiresReferences: true,
|
|
9742
9818
|
validateInput: validateJudgmentInput,
|
|
9743
9819
|
validateOutput: validateJudgePackOutput,
|
|
@@ -9748,6 +9824,8 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9748
9824
|
inputSchema: RunEvalInput,
|
|
9749
9825
|
outputSchema: RunEvalOutput,
|
|
9750
9826
|
outputKind: "artifact",
|
|
9827
|
+
workspaceScope: "attempt",
|
|
9828
|
+
sessionScope: "custom",
|
|
9751
9829
|
requiresReferences: false,
|
|
9752
9830
|
validateOutput: validateRunEvalOutput
|
|
9753
9831
|
},
|
|
@@ -9756,6 +9834,8 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9756
9834
|
inputSchema: JudgeEvalVariantInput,
|
|
9757
9835
|
outputSchema: JudgeEvalVariantOutput,
|
|
9758
9836
|
outputKind: "judgment",
|
|
9837
|
+
workspaceScope: "attempt",
|
|
9838
|
+
sessionScope: "custom",
|
|
9759
9839
|
requiresReferences: false,
|
|
9760
9840
|
validateInput: validateJudgeEvalVariantInput,
|
|
9761
9841
|
validateOutput: validateJudgeEvalVariantOutput,
|
|
@@ -9825,15 +9905,6 @@ function getTaskOutputSchema(taskType) {
|
|
|
9825
9905
|
function taskTypeUsesSubagents(taskType) {
|
|
9826
9906
|
return getTaskTypeEntry(taskType)?.usesSubagents === true;
|
|
9827
9907
|
}
|
|
9828
|
-
/**
|
|
9829
|
-
* Filesystem isolation policy requested by the task type.
|
|
9830
|
-
*
|
|
9831
|
-
* Unknown task types and task types without an explicit policy default to the
|
|
9832
|
-
* legacy/shared behaviour.
|
|
9833
|
-
*/
|
|
9834
|
-
function taskTypeWorkspaceMode(taskType) {
|
|
9835
|
-
return getTaskTypeEntry(taskType)?.workspaceMode ?? "shared_mount";
|
|
9836
|
-
}
|
|
9837
9908
|
//#endregion
|
|
9838
9909
|
//#region ../tasks/src/wire.ts
|
|
9839
9910
|
/**
|
|
@@ -10070,133 +10141,6 @@ Type$1.Object({
|
|
|
10070
10141
|
additionalProperties: false
|
|
10071
10142
|
});
|
|
10072
10143
|
//#endregion
|
|
10073
|
-
//#region ../agent-runtime/src/subagent-output-contracts.ts
|
|
10074
|
-
var REGISTRY = /* @__PURE__ */ new Map();
|
|
10075
|
-
/**
|
|
10076
|
-
* Register a subagent output contract. Idempotent: re-registering the
|
|
10077
|
-
* same name with a different schema throws — contracts are meant to
|
|
10078
|
-
* be stable. Re-registering with the identical contract object (same
|
|
10079
|
-
* reference) is a no-op for HMR and test convenience.
|
|
10080
|
-
*
|
|
10081
|
-
* Typically called at module-init time alongside task-type
|
|
10082
|
-
* registration. See task-types/index.ts in @moltnet/tasks for the
|
|
10083
|
-
* conventional pattern.
|
|
10084
|
-
*/
|
|
10085
|
-
function registerSubagentOutputContract(contract) {
|
|
10086
|
-
if (!contract.name || contract.name.trim().length === 0) throw new Error("subagent output contract name is required");
|
|
10087
|
-
if (!/^[a-z][a-z0-9_]*$/.test(contract.name)) throw new Error(`subagent output contract name '${contract.name}' must be lower_snake_case (starts with a letter, then [a-z0-9_]+)`);
|
|
10088
|
-
const existing = REGISTRY.get(contract.name);
|
|
10089
|
-
if (existing && existing !== contract) {
|
|
10090
|
-
if (existing.parametersSchema !== contract.parametersSchema) throw new Error(`subagent output contract '${contract.name}' is already registered with a different schema; refusing to override`);
|
|
10091
|
-
}
|
|
10092
|
-
REGISTRY.set(contract.name, contract);
|
|
10093
|
-
}
|
|
10094
|
-
/**
|
|
10095
|
-
* Resolve a subagent output contract by name. Returns `null` for
|
|
10096
|
-
* unknown names — callers (the subagent custom tool) decide whether
|
|
10097
|
-
* that's a tool error the parent LLM can recover from or a hard fail.
|
|
10098
|
-
*/
|
|
10099
|
-
function getSubagentOutputContract(name) {
|
|
10100
|
-
return REGISTRY.get(name) ?? null;
|
|
10101
|
-
}
|
|
10102
|
-
/**
|
|
10103
|
-
* List all registered contracts. Useful for diagnostics and for the
|
|
10104
|
-
* subagent tool's parameter description so a parent LLM can see what
|
|
10105
|
-
* contracts are available without enumerating them in its prompt.
|
|
10106
|
-
*/
|
|
10107
|
-
function listSubagentOutputContracts() {
|
|
10108
|
-
return [...REGISTRY.values()];
|
|
10109
|
-
}
|
|
10110
|
-
//#endregion
|
|
10111
|
-
//#region ../agent-runtime/src/built-in-contract-registrations.ts
|
|
10112
|
-
/**
|
|
10113
|
-
* Built-in subagent output contracts (#1087, #943).
|
|
10114
|
-
*
|
|
10115
|
-
* Why this is an exported function and not a module-init side
|
|
10116
|
-
* effect:
|
|
10117
|
-
*
|
|
10118
|
-
* - The registry is process-global. Module-init registration
|
|
10119
|
-
* fires exactly once per Node process (ESM modules are cached
|
|
10120
|
-
* by URL). Tests that call `__resetSubagentOutputContractsForTests()`
|
|
10121
|
-
* to start from an empty registry have no way to repopulate
|
|
10122
|
-
* the built-ins without re-evaluating the module — which the
|
|
10123
|
-
* cache prevents. PR #1101 review M4.
|
|
10124
|
-
* - An explicit `registerBuiltInSubagentContracts()` lets the
|
|
10125
|
-
* package index call it once at module load AND lets test
|
|
10126
|
-
* setup hooks call it again after `__reset...`.
|
|
10127
|
-
* - `registerSubagentOutputContract` is itself idempotent for
|
|
10128
|
-
* identical re-registrations, so calling this function twice
|
|
10129
|
-
* in the same process is safe.
|
|
10130
|
-
*
|
|
10131
|
-
* Adding a new built-in: extend the body of this function. Do not
|
|
10132
|
-
* call `registerSubagentOutputContract` from anywhere else in the
|
|
10133
|
-
* package — keeping all built-ins in one function makes the set
|
|
10134
|
-
* auditable.
|
|
10135
|
-
*/
|
|
10136
|
-
function registerBuiltInSubagentContracts() {
|
|
10137
|
-
registerSubagentOutputContract({
|
|
10138
|
-
name: "judge_eval_variant_result",
|
|
10139
|
-
description: "Per-variant grading result produced by a subagent of judge_eval_variant: scores against the shared rubric, composite, and a 1-3 sentence verdict for a single variant.",
|
|
10140
|
-
parametersSchema: JudgeEvalVariantResult
|
|
10141
|
-
});
|
|
10142
|
-
}
|
|
10143
|
-
registerBuiltInSubagentContracts();
|
|
10144
|
-
//#endregion
|
|
10145
|
-
//#region ../agent-runtime/src/context-bindings.ts
|
|
10146
|
-
var PROMPT_SEPARATOR = "\n\n---\n\n";
|
|
10147
|
-
/**
|
|
10148
|
-
* Resolve `task.input.context[]` into delivered side-effects (skills
|
|
10149
|
-
* persisted via `deliver.skill`) and prompt fragments
|
|
10150
|
-
* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
|
|
10151
|
-
* built prompt.
|
|
10152
|
-
*
|
|
10153
|
-
* Per-binding semantics (V1):
|
|
10154
|
-
* - `skill` → `deliver.skill({ slug, content })` once per ref.
|
|
10155
|
-
* Slug collisions on distinct contents are
|
|
10156
|
-
* refused loudly.
|
|
10157
|
-
* - `prompt_prefix` → content appended to `systemPromptPrefix` with
|
|
10158
|
-
* the canonical `\n\n---\n\n` separator (in
|
|
10159
|
-
* declared order).
|
|
10160
|
-
* - `user_inline` → content appended to `userInlineSuffix` in
|
|
10161
|
-
* declared order, same separator.
|
|
10162
|
-
*
|
|
10163
|
-
* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
|
|
10164
|
-
* and the task's `inputCid` already pins the entire input. The imposer
|
|
10165
|
-
* chose these bytes; the resolver just dispatches them.
|
|
10166
|
-
*
|
|
10167
|
-
* The function is pure with respect to its arguments: file writes are
|
|
10168
|
-
* confined to the injected `deliver` callback, which makes the
|
|
10169
|
-
* resolver trivial to test.
|
|
10170
|
-
*/
|
|
10171
|
-
async function resolveTaskContext(args) {
|
|
10172
|
-
const promptParts = [];
|
|
10173
|
-
const userParts = [];
|
|
10174
|
-
const injected = [];
|
|
10175
|
-
const usedSlugs = /* @__PURE__ */ new Map();
|
|
10176
|
-
for (const ref of args.context) {
|
|
10177
|
-
if (ref.binding === "skill") {
|
|
10178
|
-
const prior = usedSlugs.get(ref.slug);
|
|
10179
|
-
if (prior !== void 0) {
|
|
10180
|
-
if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
|
|
10181
|
-
injected.push(ref);
|
|
10182
|
-
continue;
|
|
10183
|
-
}
|
|
10184
|
-
usedSlugs.set(ref.slug, ref.content);
|
|
10185
|
-
await args.deliver.skill({
|
|
10186
|
-
slug: ref.slug,
|
|
10187
|
-
content: ref.content
|
|
10188
|
-
});
|
|
10189
|
-
} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
|
|
10190
|
-
else userParts.push(ref.content);
|
|
10191
|
-
injected.push(ref);
|
|
10192
|
-
}
|
|
10193
|
-
return {
|
|
10194
|
-
injected,
|
|
10195
|
-
systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
|
|
10196
|
-
userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
|
|
10197
|
-
};
|
|
10198
|
-
}
|
|
10199
|
-
//#endregion
|
|
10200
10144
|
//#region ../agent-runtime/src/output-tools.ts
|
|
10201
10145
|
/**
|
|
10202
10146
|
* Submit-output tool contract.
|
|
@@ -10601,13 +10545,7 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10601
10545
|
* is told to inspect them itself.
|
|
10602
10546
|
*/
|
|
10603
10547
|
function buildFulfillBriefUserPrompt(input, ctx) {
|
|
10604
|
-
const { brief, title,
|
|
10605
|
-
const criteriaSection = acceptanceCriteria?.length ? [
|
|
10606
|
-
"### Acceptance criteria",
|
|
10607
|
-
"",
|
|
10608
|
-
...acceptanceCriteria.map((c) => `- ${c}`),
|
|
10609
|
-
""
|
|
10610
|
-
].join("\n") : "";
|
|
10548
|
+
const { brief, title, seedFiles, scopeHint } = input;
|
|
10611
10549
|
const seedSection = seedFiles?.length ? [
|
|
10612
10550
|
"### Seed files",
|
|
10613
10551
|
"",
|
|
@@ -10655,7 +10593,6 @@ function buildFulfillBriefUserPrompt(input, ctx) {
|
|
|
10655
10593
|
"",
|
|
10656
10594
|
brief,
|
|
10657
10595
|
"",
|
|
10658
|
-
criteriaSection,
|
|
10659
10596
|
seedSection,
|
|
10660
10597
|
correlationSection,
|
|
10661
10598
|
workspaceSection,
|
|
@@ -14822,6 +14759,7 @@ var DEFAULT_SUBAGENT_TIMEOUT_MS = 300 * 1e3;
|
|
|
14822
14759
|
*/
|
|
14823
14760
|
function createSubagentTool(args) {
|
|
14824
14761
|
const buildSession = args.buildAgentSession ?? buildAgentSession;
|
|
14762
|
+
const { contractRegistry } = args;
|
|
14825
14763
|
let callCount = 0;
|
|
14826
14764
|
return {
|
|
14827
14765
|
tool: defineTool({
|
|
@@ -14832,8 +14770,8 @@ function createSubagentTool(args) {
|
|
|
14832
14770
|
async execute(_id, params) {
|
|
14833
14771
|
if (!Value.Check(SubagentToolParameters, params)) return toolError(`subagent: invalid parameters: ${JSON.stringify([...Value.Errors(SubagentToolParameters, params)].slice(0, 3))}`);
|
|
14834
14772
|
const { task, output_schema } = params;
|
|
14835
|
-
const contract =
|
|
14836
|
-
if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${
|
|
14773
|
+
const contract = contractRegistry.get(output_schema);
|
|
14774
|
+
if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${contractRegistry.list().map((c) => c.name).join(", ")}]`);
|
|
14837
14775
|
callCount += 1;
|
|
14838
14776
|
const callIndex = callCount;
|
|
14839
14777
|
let captured = null;
|
|
@@ -14863,6 +14801,7 @@ function createSubagentTool(args) {
|
|
|
14863
14801
|
});
|
|
14864
14802
|
const session = await buildSession({
|
|
14865
14803
|
mountPath: args.mountPath,
|
|
14804
|
+
cwdPath: args.cwdPath ?? args.mountPath,
|
|
14866
14805
|
piAuthDir: args.piAuthDir,
|
|
14867
14806
|
modelHandle: args.modelHandle,
|
|
14868
14807
|
agentName: args.agentName,
|
|
@@ -15198,6 +15137,115 @@ function resolveSubmitTools(taskType, opts = {}) {
|
|
|
15198
15137
|
};
|
|
15199
15138
|
}
|
|
15200
15139
|
//#endregion
|
|
15140
|
+
//#region src/runtime/task-workspace.ts
|
|
15141
|
+
function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
|
|
15142
|
+
const branch = executionPlan?.worktreeBranch ?? null;
|
|
15143
|
+
if (!branch) return {
|
|
15144
|
+
mountPath: requestedMountPath,
|
|
15145
|
+
cwdPath: requestedMountPath,
|
|
15146
|
+
mode: "shared_mount",
|
|
15147
|
+
branch: null,
|
|
15148
|
+
cleanup: () => {}
|
|
15149
|
+
};
|
|
15150
|
+
const mainRepo = findMainWorktree();
|
|
15151
|
+
const worktreeDir = resolveTaskWorktreePath(mainRepo, executionPlan?.workspaceId ?? `task-${task.id}`);
|
|
15152
|
+
const relMount = relative(mainRepo, requestedMountPath);
|
|
15153
|
+
const cwdPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
|
|
15154
|
+
const keepWorkspace = executionPlan?.workspaceScope === "session" && executionPlan.sessionKey !== null;
|
|
15155
|
+
if (keepWorkspace) ensureReusableTaskWorktree(mainRepo, worktreeDir, branch);
|
|
15156
|
+
else {
|
|
15157
|
+
removeExistingTaskWorktree(mainRepo, worktreeDir);
|
|
15158
|
+
addTaskWorktree(mainRepo, worktreeDir, branch);
|
|
15159
|
+
}
|
|
15160
|
+
return {
|
|
15161
|
+
mountPath: mainRepo,
|
|
15162
|
+
cwdPath,
|
|
15163
|
+
mode: "dedicated_worktree",
|
|
15164
|
+
branch,
|
|
15165
|
+
cleanup: keepWorkspace ? () => {} : () => {
|
|
15166
|
+
execFileSync("git", [
|
|
15167
|
+
"-C",
|
|
15168
|
+
mainRepo,
|
|
15169
|
+
"worktree",
|
|
15170
|
+
"remove",
|
|
15171
|
+
"--force",
|
|
15172
|
+
worktreeDir
|
|
15173
|
+
], { stdio: "pipe" });
|
|
15174
|
+
}
|
|
15175
|
+
};
|
|
15176
|
+
}
|
|
15177
|
+
function resolveTaskWorktreePath(mainRepo, workspaceId) {
|
|
15178
|
+
return join(mainRepo, ".worktrees", workspaceId);
|
|
15179
|
+
}
|
|
15180
|
+
function ensureReusableTaskWorktree(mainRepo, worktreeDir, branch) {
|
|
15181
|
+
if (isRegisteredWorktree(mainRepo, worktreeDir)) return;
|
|
15182
|
+
if (existsSync(worktreeDir)) throw new Error(`Expected reusable worktree ${worktreeDir} to be git-managed, but it exists outside git worktree metadata.`);
|
|
15183
|
+
addTaskWorktree(mainRepo, worktreeDir, branch);
|
|
15184
|
+
}
|
|
15185
|
+
function addTaskWorktree(mainRepo, worktreeDir, branch) {
|
|
15186
|
+
const baseRef = resolveWorktreeBaseRef(mainRepo);
|
|
15187
|
+
execFileSync("git", gitRefExists(mainRepo, `refs/heads/${branch}`) ? [
|
|
15188
|
+
"-C",
|
|
15189
|
+
mainRepo,
|
|
15190
|
+
"worktree",
|
|
15191
|
+
"add",
|
|
15192
|
+
worktreeDir,
|
|
15193
|
+
branch
|
|
15194
|
+
] : [
|
|
15195
|
+
"-C",
|
|
15196
|
+
mainRepo,
|
|
15197
|
+
"worktree",
|
|
15198
|
+
"add",
|
|
15199
|
+
"-b",
|
|
15200
|
+
branch,
|
|
15201
|
+
worktreeDir,
|
|
15202
|
+
baseRef
|
|
15203
|
+
], { stdio: "pipe" });
|
|
15204
|
+
}
|
|
15205
|
+
function removeExistingTaskWorktree(mainRepo, worktreeDir) {
|
|
15206
|
+
if (!existsSync(worktreeDir) || !isRegisteredWorktree(mainRepo, worktreeDir)) return;
|
|
15207
|
+
execFileSync("git", [
|
|
15208
|
+
"-C",
|
|
15209
|
+
mainRepo,
|
|
15210
|
+
"worktree",
|
|
15211
|
+
"remove",
|
|
15212
|
+
"--force",
|
|
15213
|
+
worktreeDir
|
|
15214
|
+
], { stdio: "pipe" });
|
|
15215
|
+
}
|
|
15216
|
+
function isRegisteredWorktree(mainRepo, worktreeDir) {
|
|
15217
|
+
const list = execFileSync("git", [
|
|
15218
|
+
"-C",
|
|
15219
|
+
mainRepo,
|
|
15220
|
+
"worktree",
|
|
15221
|
+
"list",
|
|
15222
|
+
"--porcelain"
|
|
15223
|
+
], {
|
|
15224
|
+
encoding: "utf8",
|
|
15225
|
+
stdio: "pipe"
|
|
15226
|
+
});
|
|
15227
|
+
const marker = `worktree ${worktreeDir}\n`;
|
|
15228
|
+
return list.includes(marker) || list.endsWith(`worktree ${worktreeDir}`);
|
|
15229
|
+
}
|
|
15230
|
+
function resolveWorktreeBaseRef(mainRepo) {
|
|
15231
|
+
return gitRefExists(mainRepo, "refs/heads/main") ? "main" : "HEAD";
|
|
15232
|
+
}
|
|
15233
|
+
function gitRefExists(mainRepo, ref) {
|
|
15234
|
+
try {
|
|
15235
|
+
execFileSync("git", [
|
|
15236
|
+
"-C",
|
|
15237
|
+
mainRepo,
|
|
15238
|
+
"show-ref",
|
|
15239
|
+
"--verify",
|
|
15240
|
+
"--quiet",
|
|
15241
|
+
ref
|
|
15242
|
+
], { stdio: "pipe" });
|
|
15243
|
+
return true;
|
|
15244
|
+
} catch {
|
|
15245
|
+
return false;
|
|
15246
|
+
}
|
|
15247
|
+
}
|
|
15248
|
+
//#endregion
|
|
15201
15249
|
//#region src/runtime/execute-pi-task.ts
|
|
15202
15250
|
/**
|
|
15203
15251
|
* executePiTask — run a single Task attempt using pi-coding-agent inside a
|
|
@@ -15224,15 +15272,24 @@ var noopTurnEventHandler = () => {};
|
|
|
15224
15272
|
function createPiTaskExecutor(opts) {
|
|
15225
15273
|
let cachedCheckpoint = opts.checkpointPath ?? null;
|
|
15226
15274
|
return async (claimedTask, reporter) => {
|
|
15227
|
-
|
|
15228
|
-
|
|
15229
|
-
|
|
15230
|
-
|
|
15231
|
-
})
|
|
15275
|
+
const reporterWasOpened = !reporter.cancelSignal.aborted;
|
|
15276
|
+
if (reporterWasOpened) await reporter.open({
|
|
15277
|
+
taskId: claimedTask.task.id,
|
|
15278
|
+
attemptN: claimedTask.attemptN
|
|
15232
15279
|
});
|
|
15233
15280
|
return executePiTask(claimedTask, reporter, {
|
|
15234
15281
|
...opts,
|
|
15235
|
-
checkpointPath: cachedCheckpoint
|
|
15282
|
+
checkpointPath: cachedCheckpoint ?? void 0,
|
|
15283
|
+
resolveCheckpointPath: async () => {
|
|
15284
|
+
if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
|
|
15285
|
+
config: opts.sandboxConfig?.snapshot,
|
|
15286
|
+
onProgress: opts.onSnapshotProgress ?? ((m) => {
|
|
15287
|
+
process.stderr.write(`[snapshot] ${m}\n`);
|
|
15288
|
+
})
|
|
15289
|
+
});
|
|
15290
|
+
return cachedCheckpoint;
|
|
15291
|
+
},
|
|
15292
|
+
reporterAlreadyOpened: reporterWasOpened
|
|
15236
15293
|
});
|
|
15237
15294
|
};
|
|
15238
15295
|
}
|
|
@@ -15246,8 +15303,11 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15246
15303
|
const task = claimedTask.task;
|
|
15247
15304
|
const attemptN = claimedTask.attemptN;
|
|
15248
15305
|
const startTime = Date.now();
|
|
15249
|
-
const
|
|
15250
|
-
const
|
|
15306
|
+
const requestedMountPath = opts.mountPath ?? process.cwd();
|
|
15307
|
+
const executionPlan = opts.makeExecutionPlan?.(claimedTask) ?? null;
|
|
15308
|
+
let workspace = null;
|
|
15309
|
+
let mountPath = requestedMountPath;
|
|
15310
|
+
let cwdPath = requestedMountPath;
|
|
15251
15311
|
if (reporter.cancelSignal.aborted) return {
|
|
15252
15312
|
taskId: task.id,
|
|
15253
15313
|
attemptN,
|
|
@@ -15262,33 +15322,8 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15262
15322
|
retryable: false
|
|
15263
15323
|
}
|
|
15264
15324
|
};
|
|
15265
|
-
|
|
15266
|
-
config: opts.sandboxConfig?.snapshot,
|
|
15267
|
-
onProgress: opts.onSnapshotProgress ?? ((m) => {
|
|
15268
|
-
process.stderr.write(`[snapshot] ${m}\n`);
|
|
15269
|
-
})
|
|
15270
|
-
});
|
|
15271
|
-
const mainRepoForRepair = findMainWorktree();
|
|
15272
|
-
try {
|
|
15273
|
-
execFileSync("git", [
|
|
15274
|
-
"-C",
|
|
15275
|
-
mainRepoForRepair,
|
|
15276
|
-
"worktree",
|
|
15277
|
-
"repair",
|
|
15278
|
-
"--relative-paths"
|
|
15279
|
-
], { stdio: "pipe" });
|
|
15280
|
-
} catch {}
|
|
15325
|
+
let reporterOpen = opts.reporterAlreadyOpened ?? false;
|
|
15281
15326
|
let managed = null;
|
|
15282
|
-
managed = await resumeVm({
|
|
15283
|
-
checkpointPath,
|
|
15284
|
-
agentName: opts.agentName,
|
|
15285
|
-
mountPath,
|
|
15286
|
-
extraAllowedHosts: opts.extraAllowedHosts,
|
|
15287
|
-
sandboxConfig: opts.sandboxConfig
|
|
15288
|
-
});
|
|
15289
|
-
const diaryId = task.diaryId ?? "";
|
|
15290
|
-
const taskTeamId = task.teamId ?? "";
|
|
15291
|
-
let reporterOpen = false;
|
|
15292
15327
|
let session = null;
|
|
15293
15328
|
let subagentHandle = null;
|
|
15294
15329
|
const finalUsage = emptyUsage(opts.provider, opts.model);
|
|
@@ -15307,41 +15342,103 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15307
15342
|
retryable: false
|
|
15308
15343
|
}
|
|
15309
15344
|
});
|
|
15345
|
+
let onTurnEvent;
|
|
15346
|
+
if (opts.makeOnTurnEvent) try {
|
|
15347
|
+
onTurnEvent = opts.makeOnTurnEvent(claimedTask);
|
|
15348
|
+
} catch (err) {
|
|
15349
|
+
process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
15350
|
+
onTurnEvent = noopTurnEventHandler;
|
|
15351
|
+
}
|
|
15352
|
+
else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
|
|
15353
|
+
const emit = (kind, payload) => {
|
|
15354
|
+
try {
|
|
15355
|
+
onTurnEvent(kind, summarizePayloadForLog(kind, payload));
|
|
15356
|
+
} catch (err) {
|
|
15357
|
+
process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
|
|
15358
|
+
}
|
|
15359
|
+
return reporter.record({
|
|
15360
|
+
kind,
|
|
15361
|
+
payload
|
|
15362
|
+
});
|
|
15363
|
+
};
|
|
15364
|
+
const emitError = async (phase, message, extra = {}) => {
|
|
15365
|
+
await emit("error", {
|
|
15366
|
+
phase,
|
|
15367
|
+
message,
|
|
15368
|
+
...extra
|
|
15369
|
+
});
|
|
15370
|
+
};
|
|
15310
15371
|
try {
|
|
15311
|
-
|
|
15312
|
-
activateAgentEnv(managed.credentials.agentEnv, mainRepo);
|
|
15313
|
-
await reporter.open({
|
|
15372
|
+
if (!opts.reporterAlreadyOpened) await reporter.open({
|
|
15314
15373
|
taskId: task.id,
|
|
15315
15374
|
attemptN
|
|
15316
15375
|
});
|
|
15317
15376
|
reporterOpen = true;
|
|
15318
|
-
let
|
|
15319
|
-
|
|
15320
|
-
|
|
15377
|
+
let checkpointPath;
|
|
15378
|
+
try {
|
|
15379
|
+
checkpointPath = opts.checkpointPath ?? (opts.resolveCheckpointPath ? await opts.resolveCheckpointPath() : await ensureSnapshot({
|
|
15380
|
+
config: opts.sandboxConfig?.snapshot,
|
|
15381
|
+
onProgress: opts.onSnapshotProgress ?? ((m) => {
|
|
15382
|
+
process.stderr.write(`[snapshot] ${m}\n`);
|
|
15383
|
+
})
|
|
15384
|
+
}));
|
|
15321
15385
|
} catch (err) {
|
|
15322
|
-
|
|
15323
|
-
|
|
15386
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
15387
|
+
await emitError("snapshot", message);
|
|
15388
|
+
return makeFailedOutput("snapshot_failed", message);
|
|
15324
15389
|
}
|
|
15325
|
-
|
|
15326
|
-
|
|
15390
|
+
try {
|
|
15391
|
+
workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
|
|
15392
|
+
mountPath = workspace.mountPath;
|
|
15393
|
+
cwdPath = workspace.cwdPath;
|
|
15394
|
+
} catch (err) {
|
|
15395
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
15396
|
+
await emitError("worktree_setup", message);
|
|
15397
|
+
return makeFailedOutput("worktree_setup_failed", message);
|
|
15398
|
+
}
|
|
15399
|
+
try {
|
|
15400
|
+
const mainRepoForRepair = findMainWorktree();
|
|
15327
15401
|
try {
|
|
15328
|
-
|
|
15329
|
-
|
|
15330
|
-
|
|
15331
|
-
|
|
15332
|
-
|
|
15333
|
-
|
|
15334
|
-
|
|
15402
|
+
execFileSync("git", [
|
|
15403
|
+
"-C",
|
|
15404
|
+
mainRepoForRepair,
|
|
15405
|
+
"worktree",
|
|
15406
|
+
"repair",
|
|
15407
|
+
"--relative-paths"
|
|
15408
|
+
], { stdio: "pipe" });
|
|
15409
|
+
} catch {}
|
|
15410
|
+
} catch (err) {
|
|
15411
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
15412
|
+
await emitError("worktree_setup", message);
|
|
15413
|
+
return makeFailedOutput("worktree_setup_failed", message);
|
|
15414
|
+
}
|
|
15415
|
+
try {
|
|
15416
|
+
managed = await resumeVm({
|
|
15417
|
+
checkpointPath,
|
|
15418
|
+
agentName: opts.agentName,
|
|
15419
|
+
mountPath,
|
|
15420
|
+
extraAllowedHosts: opts.extraAllowedHosts,
|
|
15421
|
+
sandboxConfig: opts.sandboxConfig
|
|
15335
15422
|
});
|
|
15336
|
-
}
|
|
15423
|
+
} catch (err) {
|
|
15424
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
15425
|
+
await emitError("vm_resume", message);
|
|
15426
|
+
return makeFailedOutput("vm_resume_failed", message);
|
|
15427
|
+
}
|
|
15428
|
+
const diaryId = task.diaryId ?? "";
|
|
15429
|
+
const taskTeamId = task.teamId ?? "";
|
|
15430
|
+
const mainRepo = findMainWorktree();
|
|
15431
|
+
activateAgentEnv(managed.credentials.agentEnv, mainRepo);
|
|
15432
|
+
const activeWorkspace = workspace;
|
|
15433
|
+
if (!activeWorkspace) throw new Error("task workspace not prepared");
|
|
15337
15434
|
await emit("info", {
|
|
15338
15435
|
event: "execute_start",
|
|
15339
15436
|
taskType: task.taskType,
|
|
15340
15437
|
teamId: task.teamId,
|
|
15341
15438
|
provider: opts.provider,
|
|
15342
15439
|
model: opts.model,
|
|
15343
|
-
workspaceMode:
|
|
15344
|
-
workspaceBranch:
|
|
15440
|
+
workspaceMode: activeWorkspace.mode,
|
|
15441
|
+
workspaceBranch: activeWorkspace.branch
|
|
15345
15442
|
});
|
|
15346
15443
|
let taskPrompt;
|
|
15347
15444
|
try {
|
|
@@ -15349,8 +15446,8 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15349
15446
|
diaryId,
|
|
15350
15447
|
taskId: task.id,
|
|
15351
15448
|
workspace: {
|
|
15352
|
-
mode:
|
|
15353
|
-
branch:
|
|
15449
|
+
mode: activeWorkspace.mode,
|
|
15450
|
+
branch: activeWorkspace.branch
|
|
15354
15451
|
},
|
|
15355
15452
|
extras: opts.promptExtras
|
|
15356
15453
|
});
|
|
@@ -15402,7 +15499,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15402
15499
|
getTeamId: () => taskTeamId,
|
|
15403
15500
|
getSessionErrors: () => [],
|
|
15404
15501
|
clearSessionErrors: () => {},
|
|
15405
|
-
getHostCwd: () =>
|
|
15502
|
+
getHostCwd: () => cwdPath,
|
|
15406
15503
|
hostExecBaseEnv: new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]),
|
|
15407
15504
|
hostExecAutoApprove: opts.hostExecAutoApprove ?? opts.sandboxConfig?.hostExec?.autoApprove ?? false,
|
|
15408
15505
|
getTaskContext: () => ({
|
|
@@ -15430,6 +15527,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15430
15527
|
if (taskTypeUsesSubagents(task.taskType)) {
|
|
15431
15528
|
subagentHandle = createSubagentTool({
|
|
15432
15529
|
mountPath,
|
|
15530
|
+
cwdPath,
|
|
15433
15531
|
piAuthDir,
|
|
15434
15532
|
modelHandle,
|
|
15435
15533
|
agentName: opts.agentName,
|
|
@@ -15438,12 +15536,14 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15438
15536
|
parentTaskId: task.id,
|
|
15439
15537
|
parentTaskType: task.taskType,
|
|
15440
15538
|
parentAttemptN: attemptN,
|
|
15539
|
+
contractRegistry: opts.subagentContractRegistry,
|
|
15441
15540
|
parentCancelSignal: reporter.cancelSignal
|
|
15442
15541
|
});
|
|
15443
15542
|
parentSubagentTools.push(subagentHandle.tool);
|
|
15444
15543
|
}
|
|
15445
15544
|
session = await buildAgentSession({
|
|
15446
15545
|
mountPath,
|
|
15546
|
+
cwdPath,
|
|
15447
15547
|
piAuthDir,
|
|
15448
15548
|
modelHandle,
|
|
15449
15549
|
agentName: opts.agentName,
|
|
@@ -15462,7 +15562,8 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15462
15562
|
"moltnet.task.id": task.id,
|
|
15463
15563
|
"moltnet.task.attempt": attemptN,
|
|
15464
15564
|
"moltnet.task.type": task.taskType
|
|
15465
|
-
}
|
|
15565
|
+
},
|
|
15566
|
+
sessionPersistence: executionPlan?.sessionPersistence ?? void 0
|
|
15466
15567
|
});
|
|
15467
15568
|
} catch (err) {
|
|
15468
15569
|
const message = err instanceof Error ? err.message : String(err);
|
|
@@ -15527,6 +15628,10 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15527
15628
|
is_error: event.isError,
|
|
15528
15629
|
result: event.isError ? truncateForWire(event.result) : void 0
|
|
15529
15630
|
}));
|
|
15631
|
+
if (event.isError) track(emitError("tool_call_error", describeToolErrorMessage(event.result), {
|
|
15632
|
+
tool: event.toolName,
|
|
15633
|
+
result: truncateForWire(event.result)
|
|
15634
|
+
}));
|
|
15530
15635
|
if (maxBashTimeouts > 0 && event.toolName === "bash" && event.isError && isBashTimeoutResult(event.result)) {
|
|
15531
15636
|
bashTimeoutCount += 1;
|
|
15532
15637
|
if (bashTimeoutCount >= maxBashTimeouts) triggerCapAbort("max_bash_timeouts_exceeded", `Aborted after ${bashTimeoutCount} bash timeouts in this attempt (cap ${maxBashTimeouts}).`);
|
|
@@ -15682,7 +15787,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15682
15787
|
}
|
|
15683
15788
|
}
|
|
15684
15789
|
if (managed) await managed.vm.close();
|
|
15685
|
-
try {
|
|
15790
|
+
if (workspace) try {
|
|
15686
15791
|
workspace.cleanup();
|
|
15687
15792
|
} catch (err) {
|
|
15688
15793
|
const detail = err instanceof Error ? err.message : String(err);
|
|
@@ -15690,107 +15795,6 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15690
15795
|
}
|
|
15691
15796
|
}
|
|
15692
15797
|
}
|
|
15693
|
-
function resolveTaskWorktreeBranch(task) {
|
|
15694
|
-
if (taskTypeWorkspaceMode(task.taskType) !== "dedicated_worktree") return null;
|
|
15695
|
-
if (task.taskType === "fulfill_brief") {
|
|
15696
|
-
const input = task.input;
|
|
15697
|
-
const slug = slugifyBranchComponent(typeof input.title === "string" && input.title.trim().length > 0 ? input.title : typeof input.brief === "string" && input.brief.trim().length > 0 ? input.brief : task.taskType) || "task";
|
|
15698
|
-
if (task.correlationId) return `moltnet/${task.correlationId}/${slug}`;
|
|
15699
|
-
return `feat/${(typeof input.scopeHint === "string" && input.scopeHint.trim().length > 0 ? slugifyBranchComponent(input.scopeHint) : "task") || "task"}-${slug}`;
|
|
15700
|
-
}
|
|
15701
|
-
return `task/${slugifyBranchComponent(task.taskType) || "task"}-${task.id.slice(0, 8)}`;
|
|
15702
|
-
}
|
|
15703
|
-
function slugifyBranchComponent(input) {
|
|
15704
|
-
return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 60).replace(/-+$/g, "");
|
|
15705
|
-
}
|
|
15706
|
-
function prepareTaskWorkspace(task, requestedMountPath) {
|
|
15707
|
-
const branch = resolveTaskWorktreeBranch(task);
|
|
15708
|
-
if (!branch) return {
|
|
15709
|
-
mountPath: requestedMountPath,
|
|
15710
|
-
mode: "shared_mount",
|
|
15711
|
-
branch: null,
|
|
15712
|
-
cleanup: () => {}
|
|
15713
|
-
};
|
|
15714
|
-
const mainRepo = findMainWorktree();
|
|
15715
|
-
const worktreeDir = join(mainRepo, ".worktrees", `task-${task.id}`);
|
|
15716
|
-
removeExistingTaskWorktree(mainRepo, worktreeDir);
|
|
15717
|
-
const relMount = relative(mainRepo, requestedMountPath);
|
|
15718
|
-
const mountPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
|
|
15719
|
-
const baseRef = resolveWorktreeBaseRef(mainRepo);
|
|
15720
|
-
execFileSync("git", gitRefExists(mainRepo, `refs/heads/${branch}`) ? [
|
|
15721
|
-
"-C",
|
|
15722
|
-
mainRepo,
|
|
15723
|
-
"worktree",
|
|
15724
|
-
"add",
|
|
15725
|
-
worktreeDir,
|
|
15726
|
-
branch
|
|
15727
|
-
] : [
|
|
15728
|
-
"-C",
|
|
15729
|
-
mainRepo,
|
|
15730
|
-
"worktree",
|
|
15731
|
-
"add",
|
|
15732
|
-
"-b",
|
|
15733
|
-
branch,
|
|
15734
|
-
worktreeDir,
|
|
15735
|
-
baseRef
|
|
15736
|
-
], { stdio: "pipe" });
|
|
15737
|
-
return {
|
|
15738
|
-
mountPath,
|
|
15739
|
-
mode: "dedicated_worktree",
|
|
15740
|
-
branch,
|
|
15741
|
-
cleanup: () => {
|
|
15742
|
-
execFileSync("git", [
|
|
15743
|
-
"-C",
|
|
15744
|
-
mainRepo,
|
|
15745
|
-
"worktree",
|
|
15746
|
-
"remove",
|
|
15747
|
-
"--force",
|
|
15748
|
-
worktreeDir
|
|
15749
|
-
], { stdio: "pipe" });
|
|
15750
|
-
}
|
|
15751
|
-
};
|
|
15752
|
-
}
|
|
15753
|
-
function removeExistingTaskWorktree(mainRepo, worktreeDir) {
|
|
15754
|
-
if (!existsSync(worktreeDir)) return;
|
|
15755
|
-
const list = execFileSync("git", [
|
|
15756
|
-
"-C",
|
|
15757
|
-
mainRepo,
|
|
15758
|
-
"worktree",
|
|
15759
|
-
"list",
|
|
15760
|
-
"--porcelain"
|
|
15761
|
-
], {
|
|
15762
|
-
encoding: "utf8",
|
|
15763
|
-
stdio: "pipe"
|
|
15764
|
-
});
|
|
15765
|
-
const marker = `worktree ${worktreeDir}\n`;
|
|
15766
|
-
if (!list.includes(marker) && !list.endsWith(`worktree ${worktreeDir}`)) return;
|
|
15767
|
-
execFileSync("git", [
|
|
15768
|
-
"-C",
|
|
15769
|
-
mainRepo,
|
|
15770
|
-
"worktree",
|
|
15771
|
-
"remove",
|
|
15772
|
-
"--force",
|
|
15773
|
-
worktreeDir
|
|
15774
|
-
], { stdio: "pipe" });
|
|
15775
|
-
}
|
|
15776
|
-
function resolveWorktreeBaseRef(mainRepo) {
|
|
15777
|
-
return gitRefExists(mainRepo, "refs/heads/main") ? "main" : "HEAD";
|
|
15778
|
-
}
|
|
15779
|
-
function gitRefExists(mainRepo, ref) {
|
|
15780
|
-
try {
|
|
15781
|
-
execFileSync("git", [
|
|
15782
|
-
"-C",
|
|
15783
|
-
mainRepo,
|
|
15784
|
-
"show-ref",
|
|
15785
|
-
"--verify",
|
|
15786
|
-
"--quiet",
|
|
15787
|
-
ref
|
|
15788
|
-
], { stdio: "pipe" });
|
|
15789
|
-
return true;
|
|
15790
|
-
} catch {
|
|
15791
|
-
return false;
|
|
15792
|
-
}
|
|
15793
|
-
}
|
|
15794
15798
|
function emptyUsage(provider, model) {
|
|
15795
15799
|
return {
|
|
15796
15800
|
inputTokens: 0,
|
|
@@ -15894,6 +15898,23 @@ function truncateForWire(value) {
|
|
|
15894
15898
|
};
|
|
15895
15899
|
}
|
|
15896
15900
|
}
|
|
15901
|
+
function describeToolErrorMessage(result) {
|
|
15902
|
+
if (typeof result === "string" && result.trim().length > 0) return result.trim();
|
|
15903
|
+
if (result && typeof result === "object") {
|
|
15904
|
+
const content = result.content;
|
|
15905
|
+
if (Array.isArray(content)) {
|
|
15906
|
+
for (const item of content) if (item && typeof item === "object" && typeof item.text === "string") {
|
|
15907
|
+
const text = item.text.trim();
|
|
15908
|
+
if (text.length > 0) return text;
|
|
15909
|
+
}
|
|
15910
|
+
}
|
|
15911
|
+
}
|
|
15912
|
+
try {
|
|
15913
|
+
return JSON.stringify(truncateForWire(result));
|
|
15914
|
+
} catch {
|
|
15915
|
+
return "Tool call failed";
|
|
15916
|
+
}
|
|
15917
|
+
}
|
|
15897
15918
|
//#endregion
|
|
15898
15919
|
//#region src/index.ts
|
|
15899
15920
|
/**
|
|
@@ -16170,4 +16191,4 @@ function moltnetExtension(pi) {
|
|
|
16170
16191
|
registerMoltnetReflectCommand(pi, state);
|
|
16171
16192
|
}
|
|
16172
16193
|
//#endregion
|
|
16173
|
-
export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildAgentSession, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, createSubagentTool, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, injectTaskContext, loadCredentials, resumeVm, toGuestPath };
|
|
16194
|
+
export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildAgentSession, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, createSubagentTool, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, injectTaskContext, loadCredentials, resolveTaskWorktreePath, resumeVm, toGuestPath };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@themoltnet/pi-extension",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.18.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
|
|
6
6
|
"license": "MIT",
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"@opentelemetry/api": "^1.9.0",
|
|
33
33
|
"@sinclair/typebox": "^0.34.0",
|
|
34
34
|
"@themoltnet/sdk": "0.102.0",
|
|
35
|
-
"@themoltnet/agent-runtime": "0.15.
|
|
35
|
+
"@themoltnet/agent-runtime": "0.15.2"
|
|
36
36
|
},
|
|
37
37
|
"peerDependencies": {
|
|
38
38
|
"@earendil-works/pi-coding-agent": ">=0.74.0",
|
|
@@ -51,7 +51,7 @@
|
|
|
51
51
|
"@earendil-works/pi-coding-agent": "^0.74.0",
|
|
52
52
|
"@opentelemetry/sdk-metrics": "^2.5.1",
|
|
53
53
|
"@opentelemetry/sdk-trace-base": "^2.5.1",
|
|
54
|
-
"@types/node": "^
|
|
54
|
+
"@types/node": "^22.19.0",
|
|
55
55
|
"typescript": "^5.3.3",
|
|
56
56
|
"vite": "^8.0.0",
|
|
57
57
|
"vite-plugin-dts": "^4.5.4",
|