@themoltnet/pi-extension 0.19.3 → 0.19.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +875 -476
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -850,6 +850,98 @@ var createDiaryGrant = (options) => (options.client ?? client).post({
|
|
|
850
850
|
}
|
|
851
851
|
});
|
|
852
852
|
/**
|
|
853
|
+
* Initiate a diary transfer to another team. Requires diary manage permission.
|
|
854
|
+
*/
|
|
855
|
+
var initiateTransfer = (options) => (options.client ?? client).post({
|
|
856
|
+
security: [
|
|
857
|
+
{
|
|
858
|
+
scheme: "bearer",
|
|
859
|
+
type: "http"
|
|
860
|
+
},
|
|
861
|
+
{
|
|
862
|
+
name: "X-Moltnet-Session-Token",
|
|
863
|
+
type: "apiKey"
|
|
864
|
+
},
|
|
865
|
+
{
|
|
866
|
+
in: "cookie",
|
|
867
|
+
name: "ory_kratos_session",
|
|
868
|
+
type: "apiKey"
|
|
869
|
+
}
|
|
870
|
+
],
|
|
871
|
+
url: "/diaries/{id}/transfer",
|
|
872
|
+
...options,
|
|
873
|
+
headers: {
|
|
874
|
+
"Content-Type": "application/json",
|
|
875
|
+
...options.headers
|
|
876
|
+
}
|
|
877
|
+
});
|
|
878
|
+
/**
|
|
879
|
+
* List pending transfers where the caller is destination team owner.
|
|
880
|
+
*/
|
|
881
|
+
var listPendingTransfers = (options) => (options?.client ?? client).get({
|
|
882
|
+
security: [
|
|
883
|
+
{
|
|
884
|
+
scheme: "bearer",
|
|
885
|
+
type: "http"
|
|
886
|
+
},
|
|
887
|
+
{
|
|
888
|
+
name: "X-Moltnet-Session-Token",
|
|
889
|
+
type: "apiKey"
|
|
890
|
+
},
|
|
891
|
+
{
|
|
892
|
+
in: "cookie",
|
|
893
|
+
name: "ory_kratos_session",
|
|
894
|
+
type: "apiKey"
|
|
895
|
+
}
|
|
896
|
+
],
|
|
897
|
+
url: "/transfers",
|
|
898
|
+
...options
|
|
899
|
+
});
|
|
900
|
+
/**
|
|
901
|
+
* Accept a pending diary transfer. Caller must be destination team owner.
|
|
902
|
+
*/
|
|
903
|
+
var acceptTransfer = (options) => (options.client ?? client).post({
|
|
904
|
+
security: [
|
|
905
|
+
{
|
|
906
|
+
scheme: "bearer",
|
|
907
|
+
type: "http"
|
|
908
|
+
},
|
|
909
|
+
{
|
|
910
|
+
name: "X-Moltnet-Session-Token",
|
|
911
|
+
type: "apiKey"
|
|
912
|
+
},
|
|
913
|
+
{
|
|
914
|
+
in: "cookie",
|
|
915
|
+
name: "ory_kratos_session",
|
|
916
|
+
type: "apiKey"
|
|
917
|
+
}
|
|
918
|
+
],
|
|
919
|
+
url: "/transfers/{transferId}/accept",
|
|
920
|
+
...options
|
|
921
|
+
});
|
|
922
|
+
/**
|
|
923
|
+
* Reject a pending diary transfer.
|
|
924
|
+
*/
|
|
925
|
+
var rejectTransfer = (options) => (options.client ?? client).post({
|
|
926
|
+
security: [
|
|
927
|
+
{
|
|
928
|
+
scheme: "bearer",
|
|
929
|
+
type: "http"
|
|
930
|
+
},
|
|
931
|
+
{
|
|
932
|
+
name: "X-Moltnet-Session-Token",
|
|
933
|
+
type: "apiKey"
|
|
934
|
+
},
|
|
935
|
+
{
|
|
936
|
+
in: "cookie",
|
|
937
|
+
name: "ory_kratos_session",
|
|
938
|
+
type: "apiKey"
|
|
939
|
+
}
|
|
940
|
+
],
|
|
941
|
+
url: "/transfers/{transferId}/reject",
|
|
942
|
+
...options
|
|
943
|
+
});
|
|
944
|
+
/**
|
|
853
945
|
* List diary entries for a specific diary.
|
|
854
946
|
*/
|
|
855
947
|
var listDiaryEntries = (options) => (options.client ?? client).get({
|
|
@@ -2648,6 +2740,41 @@ function createDiaryGrantsNamespace(context) {
|
|
|
2648
2740
|
};
|
|
2649
2741
|
}
|
|
2650
2742
|
//#endregion
|
|
2743
|
+
//#region ../sdk/src/namespaces/diary-transfers.ts
|
|
2744
|
+
function createDiaryTransfersNamespace(context) {
|
|
2745
|
+
const { client, auth } = context;
|
|
2746
|
+
return {
|
|
2747
|
+
async initiate(diaryId, body) {
|
|
2748
|
+
return unwrapResult(await initiateTransfer({
|
|
2749
|
+
client,
|
|
2750
|
+
auth,
|
|
2751
|
+
path: { id: diaryId },
|
|
2752
|
+
body
|
|
2753
|
+
}));
|
|
2754
|
+
},
|
|
2755
|
+
async listPending() {
|
|
2756
|
+
return unwrapResult(await listPendingTransfers({
|
|
2757
|
+
client,
|
|
2758
|
+
auth
|
|
2759
|
+
}));
|
|
2760
|
+
},
|
|
2761
|
+
async accept(transferId) {
|
|
2762
|
+
return unwrapResult(await acceptTransfer({
|
|
2763
|
+
client,
|
|
2764
|
+
auth,
|
|
2765
|
+
path: { transferId }
|
|
2766
|
+
}));
|
|
2767
|
+
},
|
|
2768
|
+
async reject(transferId) {
|
|
2769
|
+
return unwrapResult(await rejectTransfer({
|
|
2770
|
+
client,
|
|
2771
|
+
auth,
|
|
2772
|
+
path: { transferId }
|
|
2773
|
+
}));
|
|
2774
|
+
}
|
|
2775
|
+
};
|
|
2776
|
+
}
|
|
2777
|
+
//#endregion
|
|
2651
2778
|
//#region ../../node_modules/.pnpm/@noble+hashes@1.8.0/node_modules/@noble/hashes/esm/utils.js
|
|
2652
2779
|
/** Checks if something is Uint8Array. Be careful: nodejs Buffer will return true. */
|
|
2653
2780
|
function isBytes$1(a) {
|
|
@@ -4963,6 +5090,7 @@ function createAgent(options) {
|
|
|
4963
5090
|
return {
|
|
4964
5091
|
diaries: createDiariesNamespace(context),
|
|
4965
5092
|
diaryGrants: createDiaryGrantsNamespace(context),
|
|
5093
|
+
diaryTransfers: createDiaryTransfersNamespace(context),
|
|
4966
5094
|
packs: createPacksNamespace(context),
|
|
4967
5095
|
entries: createEntriesNamespace(context),
|
|
4968
5096
|
agents: createAgentsNamespace(context),
|
|
@@ -9060,7 +9188,7 @@ function validateRubricWeights(rubric) {
|
|
|
9060
9188
|
* attaches to any task type. It has four orthogonal sections — pick
|
|
9061
9189
|
* whichever apply per task type:
|
|
9062
9190
|
*
|
|
9063
|
-
* - `gates`
|
|
9191
|
+
* - `gates` Promise-level structural/process checks
|
|
9064
9192
|
* - `assertions` Declarative claims about output JSON
|
|
9065
9193
|
* - `rubric` Weighted-criteria scoring instrument, reused
|
|
9066
9194
|
* verbatim from `./rubric.ts`.
|
|
@@ -9105,17 +9233,27 @@ var CidEqualsSpec = Type$1.Object({
|
|
|
9105
9233
|
path: Type$1.String({ minLength: 1 }),
|
|
9106
9234
|
expected: Type$1.String({ minLength: 1 })
|
|
9107
9235
|
}, { additionalProperties: false });
|
|
9108
|
-
var
|
|
9236
|
+
var SubmitToolCallGate = Type$1.Object({
|
|
9109
9237
|
id: Type$1.String({ minLength: 1 }),
|
|
9110
|
-
kind: Type$1.Literal("
|
|
9111
|
-
|
|
9112
|
-
required: Type$1.Boolean()
|
|
9113
|
-
}, { additionalProperties: false }), Type$1.Object({
|
|
9114
|
-
id: Type$1.String({ minLength: 1 }),
|
|
9115
|
-
kind: Type$1.Literal("cid-equals"),
|
|
9116
|
-
spec: CidEqualsSpec,
|
|
9238
|
+
kind: Type$1.Literal("submit-tool-call"),
|
|
9239
|
+
description: Type$1.String({ minLength: 1 }),
|
|
9117
9240
|
required: Type$1.Boolean()
|
|
9118
|
-
}, { additionalProperties: false })
|
|
9241
|
+
}, { additionalProperties: false });
|
|
9242
|
+
var Gate = Type$1.Union([
|
|
9243
|
+
SubmitToolCallGate,
|
|
9244
|
+
Type$1.Object({
|
|
9245
|
+
id: Type$1.String({ minLength: 1 }),
|
|
9246
|
+
kind: Type$1.Literal("schema-check"),
|
|
9247
|
+
spec: SchemaCheckSpec,
|
|
9248
|
+
required: Type$1.Boolean()
|
|
9249
|
+
}, { additionalProperties: false }),
|
|
9250
|
+
Type$1.Object({
|
|
9251
|
+
id: Type$1.String({ minLength: 1 }),
|
|
9252
|
+
kind: Type$1.Literal("cid-equals"),
|
|
9253
|
+
spec: CidEqualsSpec,
|
|
9254
|
+
required: Type$1.Boolean()
|
|
9255
|
+
}, { additionalProperties: false })
|
|
9256
|
+
], { $id: "Gate" });
|
|
9119
9257
|
var AssertionOp = Type$1.Union([
|
|
9120
9258
|
Type$1.Literal("exists"),
|
|
9121
9259
|
Type$1.Literal("equals"),
|
|
@@ -10342,6 +10480,32 @@ function submitOutputToolName(taskType) {
|
|
|
10342
10480
|
return `submit_${taskType}_output`;
|
|
10343
10481
|
}
|
|
10344
10482
|
//#endregion
|
|
10483
|
+
//#region ../agent-runtime/src/prompts/assemble.ts
|
|
10484
|
+
/**
|
|
10485
|
+
* Render a `PromptSection[]` into final text + structured trace.
|
|
10486
|
+
* Single source of truth for inter-section spacing and header
|
|
10487
|
+
* rendering across all task types.
|
|
10488
|
+
*/
|
|
10489
|
+
function assembleTaskPrompt(taskType, sections) {
|
|
10490
|
+
const trace = [];
|
|
10491
|
+
const rendered = [];
|
|
10492
|
+
for (const section of sections) {
|
|
10493
|
+
trace.push({
|
|
10494
|
+
id: section.id,
|
|
10495
|
+
source: section.source,
|
|
10496
|
+
header: section.header,
|
|
10497
|
+
char_count: section.body.length
|
|
10498
|
+
});
|
|
10499
|
+
if (section.body === "") continue;
|
|
10500
|
+
rendered.push(section.header ? `## ${section.header}\n\n${section.body}` : section.body);
|
|
10501
|
+
}
|
|
10502
|
+
return {
|
|
10503
|
+
text: rendered.join("\n\n"),
|
|
10504
|
+
trace,
|
|
10505
|
+
taskType
|
|
10506
|
+
};
|
|
10507
|
+
}
|
|
10508
|
+
//#endregion
|
|
10345
10509
|
//#region ../agent-runtime/src/prompts/final-output.ts
|
|
10346
10510
|
function buildFinalOutputBlock(opts) {
|
|
10347
10511
|
const { taskType, outputSchemaName, shapeSketch, extraNotes } = opts;
|
|
@@ -10356,7 +10520,8 @@ function buildFinalOutputBlock(opts) {
|
|
|
10356
10520
|
`The runtime captures the validated arguments and ends the session.`,
|
|
10357
10521
|
`Do NOT emit the output as plain assistant text. Do NOT rely on a`,
|
|
10358
10522
|
`JSON-in-message fallback. If you do not call \`${submitTool}\`, the`,
|
|
10359
|
-
`attempt
|
|
10523
|
+
`attempt is recorded as failing the promised submit-output criterion`,
|
|
10524
|
+
`even if the underlying work succeeded.`,
|
|
10360
10525
|
"",
|
|
10361
10526
|
`Your final assistant text before that tool call may explain your work,`,
|
|
10362
10527
|
`but the submit-tool call itself must be your VERY LAST action.`,
|
|
@@ -10394,37 +10559,17 @@ function renderRubricPreambleSection(rubric) {
|
|
|
10394
10559
|
*
|
|
10395
10560
|
* Design note — no pre-resolved `target` projection
|
|
10396
10561
|
* --------------------------------------------------
|
|
10397
|
-
* Earlier drafts hand-wired a `target` bundle (branch, PR url,
|
|
10398
|
-
*
|
|
10399
|
-
*
|
|
10400
|
-
*
|
|
10401
|
-
*
|
|
10402
|
-
*
|
|
10403
|
-
*
|
|
10404
|
-
* fetching their own data.
|
|
10405
|
-
*
|
|
10406
|
-
* Now: the prompt tells the judge the `targetTaskId` and instructs
|
|
10407
|
-
* it to call `moltnet_get_task` + `moltnet_list_task_attempts`
|
|
10408
|
-
* itself. The judge sees whatever the producer's accepted attempt
|
|
10409
|
-
* actually wrote — no projection, no lossiness, no daemon-side
|
|
10410
|
-
* type knowledge required. Different producers (fulfill_brief,
|
|
10411
|
-
* future task types whose products are docs / configs / changes /
|
|
10412
|
-
* anything) work without any code path here.
|
|
10562
|
+
* Earlier drafts hand-wired a `target` bundle (branch, PR url, commits,
|
|
10563
|
+
* summary, diary entry ids) into the prompt before the judge started.
|
|
10564
|
+
* That coupled the daemon to one specific producer shape, forced every
|
|
10565
|
+
* executor to know how to project it, and went stale every time a
|
|
10566
|
+
* producer task type grew a field. Now: the prompt tells the judge
|
|
10567
|
+
* the `targetTaskId` and instructs it to call `moltnet_get_task` +
|
|
10568
|
+
* `moltnet_list_task_attempts` itself.
|
|
10413
10569
|
*/
|
|
10414
10570
|
function buildAssessBriefUserPrompt(input, ctx) {
|
|
10415
10571
|
const rubric = input.successCriteria.rubric;
|
|
10416
|
-
const
|
|
10417
|
-
const preambleSection = renderRubricPreambleSection(rubric) ?? "";
|
|
10418
|
-
const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10419
|
-
"### Workspace",
|
|
10420
|
-
"",
|
|
10421
|
-
"This review attempt is running inside a dedicated disposable git",
|
|
10422
|
-
"worktree created for this task. If you need to check out the target",
|
|
10423
|
-
"branch or inspect refs locally, do it only inside this worktree.",
|
|
10424
|
-
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`. You may replace it with the target branch locally if that helps your inspection.` : "The current checkout is disposable and will be cleaned up when the task ends.",
|
|
10425
|
-
""
|
|
10426
|
-
].join("\n") : "";
|
|
10427
|
-
return [
|
|
10572
|
+
const header = [
|
|
10428
10573
|
"# Assess Brief Judge",
|
|
10429
10574
|
"",
|
|
10430
10575
|
"You are an independent judge. You did NOT produce the work under review.",
|
|
@@ -10432,10 +10577,9 @@ function buildAssessBriefUserPrompt(input, ctx) {
|
|
|
10432
10577
|
"You may read code, commits, and diary entries — but do NOT modify anything.",
|
|
10433
10578
|
"",
|
|
10434
10579
|
`Your diary ID is: ${ctx.diaryId}`,
|
|
10435
|
-
`This task's id is: ${ctx.taskId}
|
|
10436
|
-
|
|
10437
|
-
|
|
10438
|
-
"",
|
|
10580
|
+
`This task's id is: ${ctx.taskId}`
|
|
10581
|
+
].join("\n");
|
|
10582
|
+
const target = [
|
|
10439
10583
|
`**Producer task id:** \`${input.targetTaskId}\``,
|
|
10440
10584
|
"",
|
|
10441
10585
|
"Investigate the producer task before scoring:",
|
|
@@ -10448,10 +10592,9 @@ function buildAssessBriefUserPrompt(input, ctx) {
|
|
|
10448
10592
|
" - `commits[].sha` listed → use `git show <sha>` for individual commits.",
|
|
10449
10593
|
" - `diaryEntryIds[]` listed → fetch each via `moltnet_get_entry` to read the producer's reasoning.",
|
|
10450
10594
|
" - `summary` set → use as orientation, not as ground truth.",
|
|
10451
|
-
"Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin."
|
|
10452
|
-
|
|
10453
|
-
|
|
10454
|
-
"",
|
|
10595
|
+
"Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin."
|
|
10596
|
+
].join("\n");
|
|
10597
|
+
const diaryQuery = [
|
|
10455
10598
|
`Beyond the explicit \`diaryEntryIds[]\` from step 3, the producer's`,
|
|
10456
10599
|
"attempts auto-tag every entry with the `task:*` provenance namespace.",
|
|
10457
10600
|
"You can pull the full set without enumerating ids by passing the",
|
|
@@ -10462,38 +10605,84 @@ function buildAssessBriefUserPrompt(input, ctx) {
|
|
|
10462
10605
|
"- Just the accepted attempt: add `attemptN: <acceptedAttemptN>`.",
|
|
10463
10606
|
"- The producer plus any prior chain (when a correlationId was set):",
|
|
10464
10607
|
" read it from the task you fetched in step 1 and pass",
|
|
10465
|
-
" `taskFilter: { correlationId: \"<id>\" }`."
|
|
10466
|
-
|
|
10467
|
-
|
|
10468
|
-
|
|
10469
|
-
"
|
|
10470
|
-
"",
|
|
10471
|
-
|
|
10472
|
-
|
|
10473
|
-
|
|
10474
|
-
|
|
10608
|
+
" `taskFilter: { correlationId: \"<id>\" }`."
|
|
10609
|
+
].join("\n");
|
|
10610
|
+
const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10611
|
+
"This review attempt is running inside a dedicated disposable git",
|
|
10612
|
+
"worktree created for this task. If you need to check out the target",
|
|
10613
|
+
"branch or inspect refs locally, do it only inside this worktree.",
|
|
10614
|
+
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`. You may replace it with the target branch locally if that helps your inspection.` : "The current checkout is disposable and will be cleaned up when the task ends."
|
|
10615
|
+
].join("\n") : "";
|
|
10616
|
+
const preamble = renderRubricPreambleSection(rubric) ?? "";
|
|
10617
|
+
const criteria = renderRubricCriteriaList(rubric);
|
|
10618
|
+
const scoring = [
|
|
10475
10619
|
"- `llm_score`: score 0..1 continuous. `rationale` REQUIRED (2–4 sentences).",
|
|
10476
10620
|
"- `boolean`: score exactly 0 or 1. `rationale` optional.",
|
|
10477
10621
|
"- `deterministic_signature_check`: run `moltnet entry verify` on every diary entry returned by step 3 above AND `git verify-commit` on every commit. Score 1 iff ALL signatures are valid; otherwise 0. Populate `evidence.commitsVerified`, `evidence.commitsTotal`, `evidence.signatureFailures`.",
|
|
10478
10622
|
"",
|
|
10479
|
-
"Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output."
|
|
10480
|
-
|
|
10481
|
-
|
|
10482
|
-
|
|
10483
|
-
|
|
10484
|
-
|
|
10485
|
-
|
|
10486
|
-
|
|
10487
|
-
|
|
10488
|
-
|
|
10489
|
-
|
|
10490
|
-
|
|
10491
|
-
|
|
10492
|
-
|
|
10493
|
-
|
|
10494
|
-
|
|
10495
|
-
|
|
10496
|
-
|
|
10623
|
+
"Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output."
|
|
10624
|
+
].join("\n");
|
|
10625
|
+
return assembleTaskPrompt("assess_brief", [
|
|
10626
|
+
{
|
|
10627
|
+
id: "assess_brief.header",
|
|
10628
|
+
source: "header",
|
|
10629
|
+
body: header
|
|
10630
|
+
},
|
|
10631
|
+
{
|
|
10632
|
+
id: "assess_brief.target",
|
|
10633
|
+
source: "task_input",
|
|
10634
|
+
header: "Target of assessment",
|
|
10635
|
+
body: target
|
|
10636
|
+
},
|
|
10637
|
+
{
|
|
10638
|
+
id: "assess_brief.diary_query",
|
|
10639
|
+
source: "static",
|
|
10640
|
+
header: "Querying the producer's diary entries",
|
|
10641
|
+
body: diaryQuery
|
|
10642
|
+
},
|
|
10643
|
+
{
|
|
10644
|
+
id: "assess_brief.workspace",
|
|
10645
|
+
source: "workspace",
|
|
10646
|
+
header: "Workspace",
|
|
10647
|
+
body: workspace
|
|
10648
|
+
},
|
|
10649
|
+
{
|
|
10650
|
+
id: "assess_brief.preamble",
|
|
10651
|
+
source: "rubric_judge",
|
|
10652
|
+
body: preamble
|
|
10653
|
+
},
|
|
10654
|
+
{
|
|
10655
|
+
id: "assess_brief.criteria",
|
|
10656
|
+
source: "rubric_judge",
|
|
10657
|
+
header: "Criteria",
|
|
10658
|
+
body: criteria
|
|
10659
|
+
},
|
|
10660
|
+
{
|
|
10661
|
+
id: "assess_brief.scoring",
|
|
10662
|
+
source: "rubric_judge",
|
|
10663
|
+
header: "Scoring rules",
|
|
10664
|
+
body: scoring
|
|
10665
|
+
},
|
|
10666
|
+
{
|
|
10667
|
+
id: "assess_brief.final_output",
|
|
10668
|
+
source: "final_output",
|
|
10669
|
+
body: buildFinalOutputBlock({
|
|
10670
|
+
taskType: "assess_brief",
|
|
10671
|
+
outputSchemaName: "AssessBriefOutput",
|
|
10672
|
+
shapeSketch: [
|
|
10673
|
+
"{",
|
|
10674
|
+
" \"scores\": [",
|
|
10675
|
+
" { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
|
|
10676
|
+
" ],",
|
|
10677
|
+
" \"composite\": <sum>,",
|
|
10678
|
+
" \"verdict\": \"<1-3 sentence overall>\",",
|
|
10679
|
+
" \"judgeModel\": \"<provider:model>\"",
|
|
10680
|
+
"}"
|
|
10681
|
+
].join("\n"),
|
|
10682
|
+
extraNotes: ["`composite` = Σ(weight_i × score_i) recomputed. The runtime rejects a mismatch."]
|
|
10683
|
+
})
|
|
10684
|
+
}
|
|
10685
|
+
]);
|
|
10497
10686
|
}
|
|
10498
10687
|
//#endregion
|
|
10499
10688
|
//#region ../agent-runtime/src/prompts/self-verification.ts
|
|
@@ -10502,11 +10691,11 @@ function buildSelfVerificationBlock(taskId, criteriaField = "successCriteria") {
|
|
|
10502
10691
|
"## Self-verification",
|
|
10503
10692
|
"",
|
|
10504
10693
|
`If \`input.${criteriaField}\` is set on this task, your final output MUST`,
|
|
10505
|
-
"include a `verification` block.
|
|
10506
|
-
|
|
10507
|
-
"
|
|
10508
|
-
"
|
|
10509
|
-
"
|
|
10694
|
+
"include a `verification` block. Treat every item in those criteria as",
|
|
10695
|
+
"part of the promise you made when you claimed the task. That includes",
|
|
10696
|
+
"the built-in submit-output gate when present. Do not call the submit",
|
|
10697
|
+
"tool until you have computed the verification payload you can honestly",
|
|
10698
|
+
"stand behind.",
|
|
10510
10699
|
"",
|
|
10511
10700
|
`Call \`moltnet_get_task\` with task id \`${taskId}\` and read \`input.${criteriaField}\`.`,
|
|
10512
10701
|
"",
|
|
@@ -10570,22 +10759,13 @@ function buildSelfVerificationBlock(taskId, criteriaField = "successCriteria") {
|
|
|
10570
10759
|
* TODO(#885): add a `moltnet_parallel_explore` custom tool that spawns
|
|
10571
10760
|
* N isolated `createAgentSession` children (one per tag cluster or
|
|
10572
10761
|
* entry_type axis the curator picks after recon), each with a narrow
|
|
10573
|
-
* tool subset and a turn cap, and returns compressed summaries.
|
|
10574
|
-
* curator keeps a warm context and only sees {candidateIds, notes}
|
|
10575
|
-
* per probe — mirrors the fan-out pattern pi-mono SDK example #13
|
|
10576
|
-
* (session runtime) + #05 (custom tools) makes possible. Until that
|
|
10577
|
-
* lands, the `checkpoints[]` output field is the fallback: curator
|
|
10578
|
-
* emits pruned state at phase boundaries so a follow-up session can
|
|
10579
|
-
* resume without replaying the tool history.
|
|
10762
|
+
* tool subset and a turn cap, and returns compressed summaries.
|
|
10580
10763
|
*/
|
|
10581
10764
|
function buildCuratePackUserPrompt(input, ctx) {
|
|
10582
10765
|
const { diaryId, taskPrompt, entryTypes, tagFilters, tokenBudget, recipe } = input;
|
|
10583
10766
|
const entryTypesPinned = Boolean(entryTypes);
|
|
10584
10767
|
const resolvedRecipe = recipe ?? "topic-focused-v1";
|
|
10585
|
-
const
|
|
10586
|
-
const excludeLine = tagFilters?.exclude?.length ? `- Hard exclude (drop if ANY present): ${tagFilters.exclude.map((t) => `\`${t}\``).join(", ")}` : null;
|
|
10587
|
-
const prefixLine = tagFilters?.prefix ? `- Tag prefix hint when inventorying: \`${tagFilters.prefix}\`` : null;
|
|
10588
|
-
return [
|
|
10768
|
+
const header = [
|
|
10589
10769
|
"# Curate Pack Agent",
|
|
10590
10770
|
"",
|
|
10591
10771
|
"You are the curator. Step 1 of the three-session attribution loop:",
|
|
@@ -10593,40 +10773,29 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10593
10773
|
"will judge. Your output IS the pack — nobody downstream will re-rank.",
|
|
10594
10774
|
"",
|
|
10595
10775
|
`Your agent-session diary ID is: ${ctx.diaryId}`,
|
|
10596
|
-
`This task's id is: ${ctx.taskId}
|
|
10597
|
-
|
|
10598
|
-
|
|
10599
|
-
"",
|
|
10776
|
+
`This task's id is: ${ctx.taskId}`
|
|
10777
|
+
].join("\n");
|
|
10778
|
+
const goal = [
|
|
10600
10779
|
`Build a pack from diary \`${diaryId}\` that faithfully serves this`,
|
|
10601
|
-
|
|
10780
|
+
"prompt:",
|
|
10602
10781
|
"",
|
|
10603
10782
|
`> ${taskPrompt}`,
|
|
10604
10783
|
"",
|
|
10605
10784
|
"What \"faithfully\" means is your call. A broad prompt may warrant 20",
|
|
10606
10785
|
"entries spanning clusters; a sharp one may resolve to 4 high-signal",
|
|
10607
10786
|
"entries. Trust your own judgment on breadth vs. depth — but be able",
|
|
10608
|
-
"to defend it in the summary."
|
|
10609
|
-
|
|
10610
|
-
|
|
10611
|
-
|
|
10612
|
-
|
|
10613
|
-
|
|
10614
|
-
|
|
10615
|
-
|
|
10616
|
-
|
|
10617
|
-
|
|
10618
|
-
|
|
10619
|
-
|
|
10620
|
-
entryTypesPinned ? null : " style content (e.g., \"what shipped this week\"). State your choice",
|
|
10621
|
-
entryTypesPinned ? null : " briefly in the final `summary`.",
|
|
10622
|
-
`- Recipe tag: \`${resolvedRecipe}\` (recorded on pack params)`,
|
|
10623
|
-
tokenBudget ? `- Token budget (soft cap on final pack): ${tokenBudget}. Pick entry count so the pack fits — estimate ~300 tok/entry as a starting heuristic, tighten after inspecting actual content lengths.` : "- No token budget — size the pack to match the prompt, not an arbitrary target.",
|
|
10624
|
-
includeLine,
|
|
10625
|
-
excludeLine,
|
|
10626
|
-
prefixLine,
|
|
10627
|
-
"",
|
|
10628
|
-
"## Tools available (not a recipe — use what the situation calls for)",
|
|
10629
|
-
"",
|
|
10787
|
+
"to defend it in the summary."
|
|
10788
|
+
].join("\n");
|
|
10789
|
+
const constraintsLines = [];
|
|
10790
|
+
if (entryTypesPinned) constraintsLines.push(`- Entry types pinned by imposer (do not widen): ${entryTypes.map((t) => `\`${t}\``).join(", ")}`);
|
|
10791
|
+
else constraintsLines.push("- Entry types: **you choose**. The diary contains three kinds:", " - `episodic` — incident reports, \"what happened and how we fixed it\" narratives.", " - `semantic` — durable decisions, patterns, design rationale.", " - `procedural` — commit audit trails / changelog-style provenance.", " Pick the subset that fits the prompt. For \"failures and workarounds\"", " or \"decisions we made\" you generally do NOT want `procedural` — those", " entries are append-only commit logs and produce changelog-shaped packs.", " Include `procedural` only when the prompt explicitly asks for changelog-", " style content (e.g., \"what shipped this week\"). State your choice", " briefly in the final `summary`.");
|
|
10792
|
+
constraintsLines.push(`- Recipe tag: \`${resolvedRecipe}\` (recorded on pack params)`);
|
|
10793
|
+
constraintsLines.push(tokenBudget ? `- Token budget (soft cap on final pack): ${tokenBudget}. Pick entry count so the pack fits — estimate ~300 tok/entry as a starting heuristic, tighten after inspecting actual content lengths.` : "- No token budget — size the pack to match the prompt, not an arbitrary target.");
|
|
10794
|
+
if (tagFilters?.include?.length) constraintsLines.push(`- Hard include (ALL must be present on an entry): ${tagFilters.include.map((t) => `\`${t}\``).join(", ")}`);
|
|
10795
|
+
if (tagFilters?.exclude?.length) constraintsLines.push(`- Hard exclude (drop if ANY present): ${tagFilters.exclude.map((t) => `\`${t}\``).join(", ")}`);
|
|
10796
|
+
if (tagFilters?.prefix) constraintsLines.push(`- Tag prefix hint when inventorying: \`${tagFilters.prefix}\``);
|
|
10797
|
+
const constraints = constraintsLines.join("\n");
|
|
10798
|
+
const tools = [
|
|
10630
10799
|
"- `moltnet_diary_tags` — tag inventory with counts. Cheap reconnaissance",
|
|
10631
10800
|
" when the prompt implies a scope but not a tag. Pass",
|
|
10632
10801
|
" `prefix: \"task:\"` to enumerate task-provenance tags only",
|
|
@@ -10639,10 +10808,9 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10639
10808
|
"- `moltnet_list_entries` — multi-tag (AND) listing with optional",
|
|
10640
10809
|
" `excludeTags`, `entryType`, and the same `taskFilter` shorthand.",
|
|
10641
10810
|
"- `moltnet_get_entry` — full entry read, for disambiguation.",
|
|
10642
|
-
"- `moltnet_pack_create` — terminal call that persists the pack."
|
|
10643
|
-
|
|
10644
|
-
|
|
10645
|
-
"",
|
|
10811
|
+
"- `moltnet_pack_create` — terminal call that persists the pack."
|
|
10812
|
+
].join("\n");
|
|
10813
|
+
const exploration = [
|
|
10646
10814
|
"Context is finite. Treat every tool call as buying information against",
|
|
10647
10815
|
"a budget. Some heuristics that tend to work:",
|
|
10648
10816
|
"",
|
|
@@ -10659,57 +10827,110 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10659
10827
|
"- **Emit a checkpoint if your working set exceeds ~30 candidates.**",
|
|
10660
10828
|
" Write one to the `checkpoints` array (see Output) listing the ids",
|
|
10661
10829
|
" you're keeping and dropping, plus a note explaining the cut. This",
|
|
10662
|
-
" lets a follow-up session resume without replaying your tool history."
|
|
10663
|
-
|
|
10664
|
-
|
|
10665
|
-
"",
|
|
10830
|
+
" lets a follow-up session resume without replaying your tool history."
|
|
10831
|
+
].join("\n");
|
|
10832
|
+
const ranking = [
|
|
10666
10833
|
"Assign integer ranks 1..N, lower = more prominent. Rank reflects",
|
|
10667
10834
|
"relevance to the prompt, NOT recency or entry popularity. Each entry",
|
|
10668
10835
|
"in the output must carry a short `rationale` — one sentence pointing",
|
|
10669
|
-
"at what in its content earned the rank."
|
|
10670
|
-
|
|
10671
|
-
|
|
10672
|
-
"",
|
|
10836
|
+
"at what in its content earned the rank."
|
|
10837
|
+
].join("\n");
|
|
10838
|
+
const persisting = [
|
|
10673
10839
|
"Call `moltnet_pack_create` with:",
|
|
10674
10840
|
"- `entries`: `[{ entryId, rank }]` for each selected entry.",
|
|
10675
|
-
|
|
10841
|
+
`- \`params\`: \`{ recipe: "${resolvedRecipe}", prompt: <the task prompt>, selection_rationale: "<2-sentence summary>" }\`.`,
|
|
10676
10842
|
tokenBudget ? `- \`tokenBudget\`: ${tokenBudget}.` : "- `tokenBudget`: omit.",
|
|
10677
10843
|
"- `pinned: false` (packs in this pipeline are ephemeral by design).",
|
|
10678
10844
|
"",
|
|
10679
10845
|
"The tool returns a JSON payload whose top-level fields are `packId` and",
|
|
10680
10846
|
"`packCid` (NOT `id`). Copy those exact UUID/CID strings verbatim into",
|
|
10681
10847
|
"`packId` and `packCid` in your final output — do not substitute an",
|
|
10682
|
-
"entry id, do not reformat, do not fabricate a UUID."
|
|
10683
|
-
|
|
10684
|
-
|
|
10685
|
-
"",
|
|
10848
|
+
"entry id, do not reformat, do not fabricate a UUID."
|
|
10849
|
+
].join("\n");
|
|
10850
|
+
const hardConstraints = [
|
|
10686
10851
|
"- Do NOT call `moltnet_pack_render` — that belongs to the next session.",
|
|
10687
10852
|
"- Do NOT write diary entries unless curation surfaces a genuine",
|
|
10688
10853
|
" incident worth recording. The curation reasoning lives in the task",
|
|
10689
10854
|
" output, not in the diary.",
|
|
10690
|
-
"- Respect hard include/exclude filters literally."
|
|
10691
|
-
|
|
10692
|
-
|
|
10693
|
-
|
|
10694
|
-
|
|
10695
|
-
|
|
10696
|
-
|
|
10697
|
-
|
|
10698
|
-
|
|
10699
|
-
|
|
10700
|
-
|
|
10701
|
-
|
|
10702
|
-
|
|
10703
|
-
|
|
10704
|
-
|
|
10705
|
-
|
|
10706
|
-
|
|
10707
|
-
|
|
10708
|
-
|
|
10709
|
-
|
|
10710
|
-
|
|
10711
|
-
|
|
10712
|
-
|
|
10855
|
+
"- Respect hard include/exclude filters literally."
|
|
10856
|
+
].join("\n");
|
|
10857
|
+
return assembleTaskPrompt("curate_pack", [
|
|
10858
|
+
{
|
|
10859
|
+
id: "curate_pack.header",
|
|
10860
|
+
source: "header",
|
|
10861
|
+
body: header
|
|
10862
|
+
},
|
|
10863
|
+
{
|
|
10864
|
+
id: "curate_pack.goal",
|
|
10865
|
+
source: "task_input",
|
|
10866
|
+
header: "Goal",
|
|
10867
|
+
body: goal
|
|
10868
|
+
},
|
|
10869
|
+
{
|
|
10870
|
+
id: "curate_pack.constraints",
|
|
10871
|
+
source: "task_input",
|
|
10872
|
+
header: "Constraints",
|
|
10873
|
+
body: constraints
|
|
10874
|
+
},
|
|
10875
|
+
{
|
|
10876
|
+
id: "curate_pack.tools",
|
|
10877
|
+
source: "static",
|
|
10878
|
+
header: "Tools available (not a recipe — use what the situation calls for)",
|
|
10879
|
+
body: tools
|
|
10880
|
+
},
|
|
10881
|
+
{
|
|
10882
|
+
id: "curate_pack.exploration",
|
|
10883
|
+
source: "static",
|
|
10884
|
+
header: "Exploration discipline",
|
|
10885
|
+
body: exploration
|
|
10886
|
+
},
|
|
10887
|
+
{
|
|
10888
|
+
id: "curate_pack.ranking",
|
|
10889
|
+
source: "static",
|
|
10890
|
+
header: "Ranking",
|
|
10891
|
+
body: ranking
|
|
10892
|
+
},
|
|
10893
|
+
{
|
|
10894
|
+
id: "curate_pack.persisting",
|
|
10895
|
+
source: "static",
|
|
10896
|
+
header: "Persisting the pack",
|
|
10897
|
+
body: persisting
|
|
10898
|
+
},
|
|
10899
|
+
{
|
|
10900
|
+
id: "curate_pack.hard_constraints",
|
|
10901
|
+
source: "static",
|
|
10902
|
+
header: "Hard constraints",
|
|
10903
|
+
body: hardConstraints
|
|
10904
|
+
},
|
|
10905
|
+
{
|
|
10906
|
+
id: "curate_pack.verification",
|
|
10907
|
+
source: "verification",
|
|
10908
|
+
body: buildSelfVerificationBlock(ctx.taskId)
|
|
10909
|
+
},
|
|
10910
|
+
{
|
|
10911
|
+
id: "curate_pack.final_output",
|
|
10912
|
+
source: "final_output",
|
|
10913
|
+
body: buildFinalOutputBlock({
|
|
10914
|
+
taskType: "curate_pack",
|
|
10915
|
+
outputSchemaName: "CuratePackOutput",
|
|
10916
|
+
shapeSketch: [
|
|
10917
|
+
"{",
|
|
10918
|
+
" \"packId\": \"<uuid>\",",
|
|
10919
|
+
" \"packCid\": \"<cid>\",",
|
|
10920
|
+
" \"entries\": [",
|
|
10921
|
+
" { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
|
|
10922
|
+
" ],",
|
|
10923
|
+
" \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
|
|
10924
|
+
" \"checkpoints\": [",
|
|
10925
|
+
" { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
|
|
10926
|
+
" ],",
|
|
10927
|
+
" \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\",",
|
|
10928
|
+
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
10929
|
+
"}"
|
|
10930
|
+
].join("\n")
|
|
10931
|
+
})
|
|
10932
|
+
}
|
|
10933
|
+
]);
|
|
10713
10934
|
}
|
|
10714
10935
|
//#endregion
|
|
10715
10936
|
//#region ../agent-runtime/src/prompts/fulfill-brief.ts
|
|
@@ -10722,17 +10943,22 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10722
10943
|
*/
|
|
10723
10944
|
function buildFulfillBriefUserPrompt(input, ctx) {
|
|
10724
10945
|
const { brief, title, seedFiles, scopeHint } = input;
|
|
10725
|
-
const
|
|
10726
|
-
"
|
|
10946
|
+
const header = [
|
|
10947
|
+
"# Fulfill Brief Agent",
|
|
10727
10948
|
"",
|
|
10728
|
-
"
|
|
10729
|
-
|
|
10730
|
-
""
|
|
10731
|
-
|
|
10732
|
-
|
|
10733
|
-
|
|
10734
|
-
"
|
|
10949
|
+
"You are a software engineering agent working in a sandboxed environment.",
|
|
10950
|
+
"Your workspace is at /workspace (mounted from the host repository).",
|
|
10951
|
+
"The MoltNet runtime instructor (above, in this system prompt) defines the",
|
|
10952
|
+
"invariants for this task: identity, gh authentication, diary discipline,",
|
|
10953
|
+
"and the accountable-commit shape. Follow it for every commit.",
|
|
10954
|
+
"",
|
|
10955
|
+
`## Task: ${title ?? "Fulfill brief"}`,
|
|
10735
10956
|
"",
|
|
10957
|
+
`Task id: \`${ctx.taskId}\``
|
|
10958
|
+
].join("\n");
|
|
10959
|
+
const seedFilesBody = seedFiles?.length ? ["Start by reading these files to ground yourself:", ...seedFiles.map((f) => `- \`${f}\``)].join("\n") : "";
|
|
10960
|
+
const branchSlug = ctx.correlationId ? `moltnet/${ctx.correlationId}/` : scopeHint ? `feat/${scopeHint}-` : "feat/";
|
|
10961
|
+
const correlation = ctx.correlationId ? [
|
|
10736
10962
|
`This task carries correlationId \`${ctx.correlationId}\`. You MUST:`,
|
|
10737
10963
|
"",
|
|
10738
10964
|
`1. Name your branch \`moltnet/${ctx.correlationId}/<short-slug>\` — use a`,
|
|
@@ -10741,39 +10967,14 @@ function buildFulfillBriefUserPrompt(input, ctx) {
|
|
|
10741
10967
|
" your **first** commit on that branch (subsequent commits do not need it).",
|
|
10742
10968
|
"",
|
|
10743
10969
|
"These are recovery anchors for the MoltNet mention-bot. Do not deviate",
|
|
10744
|
-
"from this branch naming scheme when correlationId is set."
|
|
10745
|
-
""
|
|
10970
|
+
"from this branch naming scheme when correlationId is set."
|
|
10746
10971
|
].join("\n") : "";
|
|
10747
|
-
const
|
|
10748
|
-
"### Workspace",
|
|
10749
|
-
"",
|
|
10972
|
+
const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10750
10973
|
"This attempt is running inside a dedicated git worktree created",
|
|
10751
10974
|
"for this task. Do not repurpose or switch the primary checkout.",
|
|
10752
|
-
ctx.workspace.branch ? `The current branch is \`${ctx.workspace.branch}\`. Stay on this branch unless the runtime instructor explicitly tells you otherwise.` : "Stay on the branch that was pre-provisioned for this task."
|
|
10753
|
-
""
|
|
10975
|
+
ctx.workspace.branch ? `The current branch is \`${ctx.workspace.branch}\`. Stay on this branch unless the runtime instructor explicitly tells you otherwise.` : "Stay on the branch that was pre-provisioned for this task."
|
|
10754
10976
|
].join("\n") : "";
|
|
10755
|
-
|
|
10756
|
-
"# Fulfill Brief Agent",
|
|
10757
|
-
"",
|
|
10758
|
-
"You are a software engineering agent working in a sandboxed environment.",
|
|
10759
|
-
"Your workspace is at /workspace (mounted from the host repository).",
|
|
10760
|
-
"The MoltNet runtime instructor (above, in this system prompt) defines the",
|
|
10761
|
-
"invariants for this task: identity, gh authentication, diary discipline,",
|
|
10762
|
-
"and the accountable-commit shape. Follow it for every commit.",
|
|
10763
|
-
"",
|
|
10764
|
-
`## Task: ${title ?? "Fulfill brief"}`,
|
|
10765
|
-
"",
|
|
10766
|
-
`Task id: \`${ctx.taskId}\``,
|
|
10767
|
-
"",
|
|
10768
|
-
"### Brief",
|
|
10769
|
-
"",
|
|
10770
|
-
brief,
|
|
10771
|
-
"",
|
|
10772
|
-
seedSection,
|
|
10773
|
-
correlationSection,
|
|
10774
|
-
workspaceSection,
|
|
10775
|
-
"### Workflow",
|
|
10776
|
-
"",
|
|
10977
|
+
const workflow = [
|
|
10777
10978
|
ctx.workspace?.mode === "dedicated_worktree" ? `1. Use the already-provisioned dedicated worktree branch${ctx.workspace.branch ? ` (\`${ctx.workspace.branch}\`)` : ""}; do not create or switch the primary checkout.` : `1. Create a feature branch (starting prefix suggestion: \`${branchSlug}<short-slug>\`).`,
|
|
10778
10979
|
"2. Understand the problem — read relevant code; do not speculate.",
|
|
10779
10980
|
"3. Implement the change. Keep commits small and coherent.",
|
|
@@ -10781,24 +10982,68 @@ function buildFulfillBriefUserPrompt(input, ctx) {
|
|
|
10781
10982
|
"5. For every commit, create a signed diary entry first via",
|
|
10782
10983
|
" `moltnet_create_entry` and embed its id in the commit trailer",
|
|
10783
10984
|
" `MoltNet-Diary: <id>` (per the runtime instructor).",
|
|
10784
|
-
"6. Push the branch and open a PR."
|
|
10785
|
-
|
|
10786
|
-
|
|
10787
|
-
|
|
10788
|
-
|
|
10789
|
-
|
|
10790
|
-
|
|
10791
|
-
|
|
10792
|
-
|
|
10793
|
-
|
|
10794
|
-
|
|
10795
|
-
|
|
10796
|
-
|
|
10797
|
-
|
|
10798
|
-
|
|
10799
|
-
|
|
10800
|
-
|
|
10801
|
-
|
|
10985
|
+
"6. Push the branch and open a PR."
|
|
10986
|
+
].join("\n");
|
|
10987
|
+
return assembleTaskPrompt("fulfill_brief", [
|
|
10988
|
+
{
|
|
10989
|
+
id: "fulfill_brief.header",
|
|
10990
|
+
source: "header",
|
|
10991
|
+
body: header
|
|
10992
|
+
},
|
|
10993
|
+
{
|
|
10994
|
+
id: "fulfill_brief.brief",
|
|
10995
|
+
source: "task_input",
|
|
10996
|
+
header: "Brief",
|
|
10997
|
+
body: brief
|
|
10998
|
+
},
|
|
10999
|
+
{
|
|
11000
|
+
id: "fulfill_brief.seed_files",
|
|
11001
|
+
source: "task_input",
|
|
11002
|
+
header: "Seed files",
|
|
11003
|
+
body: seedFilesBody
|
|
11004
|
+
},
|
|
11005
|
+
{
|
|
11006
|
+
id: "fulfill_brief.correlation",
|
|
11007
|
+
source: "task_input",
|
|
11008
|
+
header: "Correlation",
|
|
11009
|
+
body: correlation
|
|
11010
|
+
},
|
|
11011
|
+
{
|
|
11012
|
+
id: "fulfill_brief.workspace",
|
|
11013
|
+
source: "workspace",
|
|
11014
|
+
header: "Workspace",
|
|
11015
|
+
body: workspace
|
|
11016
|
+
},
|
|
11017
|
+
{
|
|
11018
|
+
id: "fulfill_brief.workflow",
|
|
11019
|
+
source: "static",
|
|
11020
|
+
header: "Workflow",
|
|
11021
|
+
body: workflow
|
|
11022
|
+
},
|
|
11023
|
+
{
|
|
11024
|
+
id: "fulfill_brief.verification",
|
|
11025
|
+
source: "verification",
|
|
11026
|
+
body: buildSelfVerificationBlock(ctx.taskId)
|
|
11027
|
+
},
|
|
11028
|
+
{
|
|
11029
|
+
id: "fulfill_brief.final_output",
|
|
11030
|
+
source: "final_output",
|
|
11031
|
+
body: buildFinalOutputBlock({
|
|
11032
|
+
taskType: "fulfill_brief",
|
|
11033
|
+
outputSchemaName: "FulfillBriefOutput",
|
|
11034
|
+
shapeSketch: [
|
|
11035
|
+
"{",
|
|
11036
|
+
" \"branch\": \"<branch-name>\",",
|
|
11037
|
+
" \"commits\": [{ \"sha\": \"...\", \"message\": \"...\", \"diaryEntryId\": \"...\" }],",
|
|
11038
|
+
" \"pullRequestUrl\": \"<url-or-null>\",",
|
|
11039
|
+
" \"diaryEntryIds\": [\"...\"],",
|
|
11040
|
+
" \"summary\": \"<1-3 sentence recap>\",",
|
|
11041
|
+
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
11042
|
+
"}"
|
|
11043
|
+
].join("\n")
|
|
11044
|
+
})
|
|
11045
|
+
}
|
|
11046
|
+
]);
|
|
10802
11047
|
}
|
|
10803
11048
|
//#endregion
|
|
10804
11049
|
//#region ../agent-runtime/src/prompts/judge-eval-attempt.ts
|
|
@@ -10807,46 +11052,18 @@ function buildJudgeEvalAttemptUserPrompt(input, ctx) {
|
|
|
10807
11052
|
if (!rubric) throw new Error("judge_eval_attempt requires successCriteria.rubric — none present");
|
|
10808
11053
|
const escapeCell = (s) => s.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\r?\n/g, " ");
|
|
10809
11054
|
const criteriaTable = rubric.criteria.map((c) => `| \`${c.id}\` | ${c.weight.toFixed(3)} | ${c.scoring} | ${escapeCell(c.description)} |`).join("\n");
|
|
10810
|
-
const
|
|
10811
|
-
|
|
10812
|
-
outputSchemaName: "JudgeEvalAttemptOutput",
|
|
10813
|
-
shapeSketch: [
|
|
10814
|
-
"{",
|
|
10815
|
-
` "targetTaskId": "${input.targetTaskId}",`,
|
|
10816
|
-
` "targetAttemptN": ${input.targetAttemptN},`,
|
|
10817
|
-
" \"variantLabel\": \"<from producer input>\",",
|
|
10818
|
-
" \"scores\": [ { \"criterionId\": \"...\", \"score\": 0..1, \"rationale\": \"...\", \"assertions\": [...]? } ],",
|
|
10819
|
-
" \"composite\": <Σ(weight × score), 0..1>,",
|
|
10820
|
-
" \"verdict\": \"<1-3 sentences>\",",
|
|
10821
|
-
" \"judgeModel\": \"<id>\", // optional",
|
|
10822
|
-
" \"traceparent\": \"<from claim>\"",
|
|
10823
|
-
"}"
|
|
10824
|
-
].join("\n")
|
|
10825
|
-
});
|
|
10826
|
-
const workspaceSection = ctx.workspace?.attached === true ? [
|
|
10827
|
-
"### Workspace",
|
|
11055
|
+
const header = [
|
|
11056
|
+
"# Judge Eval Attempt",
|
|
10828
11057
|
"",
|
|
10829
|
-
"Your current workspace is already attached to the producer attempt",
|
|
10830
|
-
"you are judging. Inspect files directly from the current workspace",
|
|
10831
|
-
"root instead of inventing synthetic `artifact_<taskId>` paths.",
|
|
10832
|
-
"If the accepted attempt output lists `artifacts[].path`, treat those",
|
|
10833
|
-
"paths as relative to the current workspace root unless the output",
|
|
10834
|
-
"explicitly says otherwise.",
|
|
10835
|
-
ctx.workspace.mode === "dedicated_worktree" ? `This attachment is a dedicated producer worktree${ctx.workspace.branch ? ` on branch \`${ctx.workspace.branch}\`` : ""}.` : ctx.workspace.mode === "scratch_mount" ? "This workspace is a fresh judge-owned scratch copy of the producer workspace." : "This attachment is the producer shared workspace mounted with shadow writes for safe inspection.",
|
|
10836
|
-
""
|
|
10837
|
-
].join("\n") : "";
|
|
10838
|
-
return [
|
|
10839
|
-
"# Judge Eval Attempt\n",
|
|
10840
11058
|
"You are grading one accepted `run_eval` producer attempt against a hidden",
|
|
10841
11059
|
"judge rubric. Do not delegate to subagents. Grade in this session only.",
|
|
10842
11060
|
"",
|
|
10843
11061
|
`Task id: \`${ctx.taskId}\``,
|
|
10844
11062
|
`Diary: \`${ctx.diaryId}\``,
|
|
10845
11063
|
`Producer task: \`${input.targetTaskId}\``,
|
|
10846
|
-
`Producer attempt: \`${input.targetAttemptN}
|
|
10847
|
-
|
|
10848
|
-
|
|
10849
|
-
"",
|
|
11064
|
+
`Producer attempt: \`${input.targetAttemptN}\``
|
|
11065
|
+
].join("\n");
|
|
11066
|
+
const evidence = [
|
|
10850
11067
|
`1. Call \`moltnet_get_task\` with taskId=\`${input.targetTaskId}\`.`,
|
|
10851
11068
|
`2. Call \`moltnet_list_task_attempts\` with taskId=\`${input.targetTaskId}\` and inspect the accepted attempt matching \`${input.targetAttemptN}\`.`,
|
|
10852
11069
|
`3. Call \`moltnet_list_task_messages\` with taskId=\`${input.targetTaskId}\`, attemptN=\`${input.targetAttemptN}\` to inspect the producer's turn-by-turn behavior.`,
|
|
@@ -10854,32 +11071,82 @@ function buildJudgeEvalAttemptUserPrompt(input, ctx) {
|
|
|
10854
11071
|
" artifacts or workspace evidence available in your environment.",
|
|
10855
11072
|
" Read artifact files from the mounted producer workspace when present;",
|
|
10856
11073
|
" do not assume detached `artifact_<taskId>` directories exist.",
|
|
10857
|
-
"5. Score strictly against the rubric below."
|
|
10858
|
-
|
|
10859
|
-
|
|
10860
|
-
"
|
|
10861
|
-
"",
|
|
10862
|
-
|
|
11074
|
+
"5. Score strictly against the rubric below."
|
|
11075
|
+
].join("\n");
|
|
11076
|
+
const workspace = ctx.workspace?.attached === true ? [
|
|
11077
|
+
"Your current workspace is already attached to the producer attempt",
|
|
11078
|
+
"you are judging. Inspect files directly from the current workspace",
|
|
11079
|
+
"root instead of inventing synthetic `artifact_<taskId>` paths.",
|
|
11080
|
+
"If the accepted attempt output lists `artifacts[].path`, treat those",
|
|
11081
|
+
"paths as relative to the current workspace root unless the output",
|
|
11082
|
+
"explicitly says otherwise.",
|
|
11083
|
+
ctx.workspace.mode === "dedicated_worktree" ? `This attachment is a dedicated producer worktree${ctx.workspace.branch ? ` on branch \`${ctx.workspace.branch}\`` : ""}.` : ctx.workspace.mode === "scratch_mount" ? "This workspace is a fresh judge-owned scratch copy of the producer workspace." : "This attachment is the producer shared workspace mounted with shadow writes for safe inspection."
|
|
11084
|
+
].join("\n") : "";
|
|
11085
|
+
const rubricBody = [
|
|
11086
|
+
rubric.preamble ?? "",
|
|
10863
11087
|
"| Criterion | Weight | Scoring | Description |",
|
|
10864
11088
|
"| --- | --- | --- | --- |",
|
|
10865
|
-
criteriaTable
|
|
10866
|
-
"",
|
|
10867
|
-
"### Composite arithmetic",
|
|
10868
|
-
"",
|
|
10869
|
-
"Your `composite` MUST equal `Σ(criterion.weight × score)` over the rubric",
|
|
10870
|
-
"criteria. Drift > 0.001 is rejected.",
|
|
10871
|
-
"",
|
|
10872
|
-
finalOutputBlock
|
|
11089
|
+
criteriaTable
|
|
10873
11090
|
].filter((s) => s !== "").join("\n");
|
|
11091
|
+
const composite = ["Your `composite` MUST equal `Σ(criterion.weight × score)` over the rubric", "criteria. Drift > 0.001 is rejected."].join("\n");
|
|
11092
|
+
return assembleTaskPrompt("judge_eval_attempt", [
|
|
11093
|
+
{
|
|
11094
|
+
id: "judge_eval_attempt.header",
|
|
11095
|
+
source: "header",
|
|
11096
|
+
body: header
|
|
11097
|
+
},
|
|
11098
|
+
{
|
|
11099
|
+
id: "judge_eval_attempt.evidence",
|
|
11100
|
+
source: "evidence",
|
|
11101
|
+
header: "Evidence gathering",
|
|
11102
|
+
body: evidence
|
|
11103
|
+
},
|
|
11104
|
+
{
|
|
11105
|
+
id: "judge_eval_attempt.workspace",
|
|
11106
|
+
source: "workspace",
|
|
11107
|
+
header: "Workspace",
|
|
11108
|
+
body: workspace
|
|
11109
|
+
},
|
|
11110
|
+
{
|
|
11111
|
+
id: "judge_eval_attempt.rubric",
|
|
11112
|
+
source: "rubric_judge",
|
|
11113
|
+
header: "Rubric",
|
|
11114
|
+
body: rubricBody
|
|
11115
|
+
},
|
|
11116
|
+
{
|
|
11117
|
+
id: "judge_eval_attempt.composite",
|
|
11118
|
+
source: "rubric_judge",
|
|
11119
|
+
header: "Composite arithmetic",
|
|
11120
|
+
body: composite
|
|
11121
|
+
},
|
|
11122
|
+
{
|
|
11123
|
+
id: "judge_eval_attempt.final_output",
|
|
11124
|
+
source: "final_output",
|
|
11125
|
+
body: buildFinalOutputBlock({
|
|
11126
|
+
taskType: "judge_eval_attempt",
|
|
11127
|
+
outputSchemaName: "JudgeEvalAttemptOutput",
|
|
11128
|
+
shapeSketch: [
|
|
11129
|
+
"{",
|
|
11130
|
+
` "targetTaskId": "${input.targetTaskId}",`,
|
|
11131
|
+
` "targetAttemptN": ${input.targetAttemptN},`,
|
|
11132
|
+
" \"variantLabel\": \"<from producer input>\",",
|
|
11133
|
+
" \"scores\": [ { \"criterionId\": \"...\", \"score\": 0..1, \"rationale\": \"...\", \"assertions\": [...]? } ],",
|
|
11134
|
+
" \"composite\": <Σ(weight × score), 0..1>,",
|
|
11135
|
+
" \"verdict\": \"<1-3 sentences>\",",
|
|
11136
|
+
" \"judgeModel\": \"<id>\", // optional",
|
|
11137
|
+
" \"traceparent\": \"<from claim>\"",
|
|
11138
|
+
"}"
|
|
11139
|
+
].join("\n")
|
|
11140
|
+
})
|
|
11141
|
+
}
|
|
11142
|
+
]);
|
|
10874
11143
|
}
|
|
10875
11144
|
//#endregion
|
|
10876
11145
|
//#region ../agent-runtime/src/prompts/judge-pack.ts
|
|
10877
11146
|
function buildJudgePackUserPrompt(input, ctx) {
|
|
10878
11147
|
const { renderedPackId, sourcePackId, successCriteria } = input;
|
|
10879
11148
|
const rubric = successCriteria.rubric;
|
|
10880
|
-
const
|
|
10881
|
-
const preambleSection = renderRubricPreambleSection(rubric);
|
|
10882
|
-
return [
|
|
11149
|
+
const header = [
|
|
10883
11150
|
"# Judge Pack Agent",
|
|
10884
11151
|
"",
|
|
10885
11152
|
"You are an independent judge. You did NOT curate or render the pack",
|
|
@@ -10888,17 +11155,15 @@ function buildJudgePackUserPrompt(input, ctx) {
|
|
|
10888
11155
|
"referenced entries — but do NOT modify anything.",
|
|
10889
11156
|
"",
|
|
10890
11157
|
`Your diary ID is: ${ctx.diaryId}`,
|
|
10891
|
-
`This task's id is: ${ctx.taskId}
|
|
10892
|
-
|
|
10893
|
-
|
|
10894
|
-
"",
|
|
11158
|
+
`This task's id is: ${ctx.taskId}`
|
|
11159
|
+
].join("\n");
|
|
11160
|
+
const target = [
|
|
10895
11161
|
`- **Rendered pack**: \`${renderedPackId}\``,
|
|
10896
11162
|
`- **Source pack**: \`${sourcePackId}\``,
|
|
10897
|
-
`- **Rubric**: \`${rubric.rubricId}\` v${rubric.version}
|
|
10898
|
-
|
|
10899
|
-
|
|
10900
|
-
|
|
10901
|
-
"",
|
|
11163
|
+
`- **Rubric**: \`${rubric.rubricId}\` v${rubric.version}`
|
|
11164
|
+
].join("\n");
|
|
11165
|
+
const preamble = renderRubricPreambleSection(rubric) ?? "";
|
|
11166
|
+
const workflow = [
|
|
10902
11167
|
"1. Call `moltnet_rendered_pack_get` for the rendered pack. Keep the",
|
|
10903
11168
|
" `content` string — you will score it.",
|
|
10904
11169
|
"2. Call `moltnet_pack_get` with `expandEntries: true` for the source",
|
|
@@ -10906,14 +11171,10 @@ function buildJudgePackUserPrompt(input, ctx) {
|
|
|
10906
11171
|
"3. For each criterion, score according to its `scoring` mode (see",
|
|
10907
11172
|
" Scoring rules below). Produce rationales where required.",
|
|
10908
11173
|
"4. Compute `composite = Σ(weight_i × score_i)` and sanity-check it",
|
|
10909
|
-
" equals the sum you will emit — the runtime rejects mismatches."
|
|
10910
|
-
|
|
10911
|
-
|
|
10912
|
-
|
|
10913
|
-
criteriaList,
|
|
10914
|
-
"",
|
|
10915
|
-
"### Scoring rules",
|
|
10916
|
-
"",
|
|
11174
|
+
" equals the sum you will emit — the runtime rejects mismatches."
|
|
11175
|
+
].join("\n");
|
|
11176
|
+
const criteria = renderRubricCriteriaList(rubric);
|
|
11177
|
+
const scoring = [
|
|
10917
11178
|
"- `llm_score`: score 0..1 continuous. `rationale` REQUIRED (2–4",
|
|
10918
11179
|
" sentences pointing at specific evidence in the rendered content or",
|
|
10919
11180
|
" the source entries). NOTE: this mode smooths individual failures",
|
|
@@ -10952,80 +11213,95 @@ function buildJudgePackUserPrompt(input, ctx) {
|
|
|
10952
11213
|
"- `deterministic_coverage_check`: for every source entry, check",
|
|
10953
11214
|
" whether its `entryId` (or a stable reference like title + CID",
|
|
10954
11215
|
" prefix) appears in the rendered `content`. Score 1 iff coverage is",
|
|
10955
|
-
" complete; otherwise 0. Populate `evidence` with `{ covered, total, missing: [entryIds] }`."
|
|
10956
|
-
|
|
10957
|
-
|
|
10958
|
-
"",
|
|
11216
|
+
" complete; otherwise 0. Populate `evidence` with `{ covered, total, missing: [entryIds] }`."
|
|
11217
|
+
].join("\n");
|
|
11218
|
+
const constraints = [
|
|
10959
11219
|
"- Do NOT call `moltnet_pack_create` or `moltnet_pack_render`.",
|
|
10960
11220
|
"- Do NOT fetch the curator's or renderer's task output directly — they",
|
|
10961
11221
|
" may leak guidance that biases judgment.",
|
|
10962
11222
|
"- Keep the session focused on scoring; no speculative exploration.",
|
|
10963
11223
|
"",
|
|
10964
|
-
`Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before
|
|
10965
|
-
|
|
10966
|
-
|
|
10967
|
-
|
|
10968
|
-
|
|
10969
|
-
|
|
10970
|
-
|
|
10971
|
-
|
|
10972
|
-
|
|
10973
|
-
|
|
10974
|
-
|
|
10975
|
-
|
|
10976
|
-
|
|
10977
|
-
|
|
10978
|
-
|
|
10979
|
-
|
|
10980
|
-
|
|
10981
|
-
|
|
10982
|
-
|
|
10983
|
-
|
|
10984
|
-
|
|
10985
|
-
|
|
10986
|
-
|
|
10987
|
-
|
|
10988
|
-
|
|
10989
|
-
|
|
10990
|
-
|
|
10991
|
-
|
|
10992
|
-
|
|
10993
|
-
|
|
10994
|
-
|
|
11224
|
+
`Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before reporting structured output.`
|
|
11225
|
+
].join("\n");
|
|
11226
|
+
return assembleTaskPrompt("judge_pack", [
|
|
11227
|
+
{
|
|
11228
|
+
id: "judge_pack.header",
|
|
11229
|
+
source: "header",
|
|
11230
|
+
body: header
|
|
11231
|
+
},
|
|
11232
|
+
{
|
|
11233
|
+
id: "judge_pack.target",
|
|
11234
|
+
source: "task_input",
|
|
11235
|
+
header: "Target",
|
|
11236
|
+
body: target
|
|
11237
|
+
},
|
|
11238
|
+
{
|
|
11239
|
+
id: "judge_pack.preamble",
|
|
11240
|
+
source: "rubric_judge",
|
|
11241
|
+
body: preamble
|
|
11242
|
+
},
|
|
11243
|
+
{
|
|
11244
|
+
id: "judge_pack.workflow",
|
|
11245
|
+
source: "static",
|
|
11246
|
+
header: "Workflow",
|
|
11247
|
+
body: workflow
|
|
11248
|
+
},
|
|
11249
|
+
{
|
|
11250
|
+
id: "judge_pack.criteria",
|
|
11251
|
+
source: "rubric_judge",
|
|
11252
|
+
header: "Criteria",
|
|
11253
|
+
body: criteria
|
|
11254
|
+
},
|
|
11255
|
+
{
|
|
11256
|
+
id: "judge_pack.scoring",
|
|
11257
|
+
source: "rubric_judge",
|
|
11258
|
+
header: "Scoring rules",
|
|
11259
|
+
body: scoring
|
|
11260
|
+
},
|
|
11261
|
+
{
|
|
11262
|
+
id: "judge_pack.constraints",
|
|
11263
|
+
source: "static",
|
|
11264
|
+
header: "Constraints",
|
|
11265
|
+
body: constraints
|
|
11266
|
+
},
|
|
11267
|
+
{
|
|
11268
|
+
id: "judge_pack.final_output",
|
|
11269
|
+
source: "final_output",
|
|
11270
|
+
body: buildFinalOutputBlock({
|
|
11271
|
+
taskType: "judge_pack",
|
|
11272
|
+
outputSchemaName: "JudgePackOutput",
|
|
11273
|
+
shapeSketch: [
|
|
11274
|
+
"{",
|
|
11275
|
+
" \"scores\": [",
|
|
11276
|
+
" { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} },",
|
|
11277
|
+
" {",
|
|
11278
|
+
" \"criterionId\": \"<llm_checklist criterion>\",",
|
|
11279
|
+
" \"score\": 0, // 1 iff every assertion passed",
|
|
11280
|
+
" \"assertions\": [",
|
|
11281
|
+
" { \"id\": \"claim-1\", \"text\": \"...\", \"passed\": false, \"evidence\": \"...\" }",
|
|
11282
|
+
" ]",
|
|
11283
|
+
" }",
|
|
11284
|
+
" ],",
|
|
11285
|
+
" \"composite\": <sum-of-weighted-scores>,",
|
|
11286
|
+
" \"verdict\": \"<1-3 sentence overall>\",",
|
|
11287
|
+
" \"judgeModel\": \"<provider:model>\",",
|
|
11288
|
+
" \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
|
|
11289
|
+
"}"
|
|
11290
|
+
].join("\n"),
|
|
11291
|
+
extraNotes: [
|
|
11292
|
+
"Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
|
|
11293
|
+
"`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is",
|
|
11294
|
+
"optional and absence is the correct representation when unavailable."
|
|
11295
|
+
]
|
|
11296
|
+
})
|
|
11297
|
+
}
|
|
11298
|
+
]);
|
|
10995
11299
|
}
|
|
10996
11300
|
//#endregion
|
|
10997
11301
|
//#region ../agent-runtime/src/prompts/pr-review.ts
|
|
10998
11302
|
function buildPrReviewUserPrompt(input, ctx) {
|
|
10999
11303
|
const rubric = input.successCriteria.rubric;
|
|
11000
|
-
const
|
|
11001
|
-
const preambleSection = renderRubricPreambleSection(rubric);
|
|
11002
|
-
const taskPromptSection = input.taskPrompt ? [
|
|
11003
|
-
"## Task-specific instructions",
|
|
11004
|
-
"",
|
|
11005
|
-
input.taskPrompt,
|
|
11006
|
-
""
|
|
11007
|
-
].join("\n") : "";
|
|
11008
|
-
const resourceSection = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? [
|
|
11009
|
-
"### Resources",
|
|
11010
|
-
"",
|
|
11011
|
-
...input.subject.resourceUrls.map((url) => `- ${url}`),
|
|
11012
|
-
""
|
|
11013
|
-
].join("\n") : "";
|
|
11014
|
-
const hintsSection = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? [
|
|
11015
|
-
"### Inspection hints",
|
|
11016
|
-
"",
|
|
11017
|
-
...input.subject.inspectionHints.map((hint) => `- ${hint}`),
|
|
11018
|
-
""
|
|
11019
|
-
].join("\n") : "";
|
|
11020
|
-
const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
11021
|
-
"### Workspace",
|
|
11022
|
-
"",
|
|
11023
|
-
"This review attempt is running inside a dedicated disposable git",
|
|
11024
|
-
"worktree. Inspect and reason inside this workspace only.",
|
|
11025
|
-
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends.",
|
|
11026
|
-
""
|
|
11027
|
-
].join("\n") : "";
|
|
11028
|
-
return [
|
|
11304
|
+
const header = [
|
|
11029
11305
|
"# Review Agent",
|
|
11030
11306
|
"",
|
|
11031
11307
|
"You are an independent judge. You did NOT produce the subject under review.",
|
|
@@ -11033,29 +11309,30 @@ function buildPrReviewUserPrompt(input, ctx) {
|
|
|
11033
11309
|
"You may inspect the local workspace and the referenced resources, but do NOT modify anything.",
|
|
11034
11310
|
"",
|
|
11035
11311
|
`Your diary ID is: ${ctx.diaryId}`,
|
|
11036
|
-
`This task's id is: ${ctx.taskId}
|
|
11037
|
-
|
|
11038
|
-
|
|
11039
|
-
"",
|
|
11312
|
+
`This task's id is: ${ctx.taskId}`
|
|
11313
|
+
].join("\n");
|
|
11314
|
+
const subject = [
|
|
11040
11315
|
`**Title:** ${input.subject.title}`,
|
|
11041
11316
|
"",
|
|
11042
|
-
input.subject.summary
|
|
11043
|
-
|
|
11044
|
-
|
|
11045
|
-
|
|
11046
|
-
|
|
11047
|
-
"
|
|
11048
|
-
"",
|
|
11317
|
+
input.subject.summary
|
|
11318
|
+
].join("\n");
|
|
11319
|
+
const resources = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? input.subject.resourceUrls.map((url) => `- ${url}`).join("\n") : "";
|
|
11320
|
+
const hints = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? input.subject.inspectionHints.map((hint) => `- ${hint}`).join("\n") : "";
|
|
11321
|
+
const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
11322
|
+
"This review attempt is running inside a dedicated disposable git",
|
|
11323
|
+
"worktree. Inspect and reason inside this workspace only.",
|
|
11324
|
+
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends."
|
|
11325
|
+
].join("\n") : "";
|
|
11326
|
+
const executionContract = [
|
|
11049
11327
|
"Treat the provided subject, resources, inspection hints, and any",
|
|
11050
11328
|
"task-specific instructions as the full",
|
|
11051
11329
|
"review contract for this task.",
|
|
11052
11330
|
"",
|
|
11053
11331
|
"If the task-specific instructions or inspection hints require an outward action tied to the review",
|
|
11054
11332
|
"(for example publishing the judgment somewhere), perform that action as",
|
|
11055
|
-
"part of the task before reporting structured output."
|
|
11056
|
-
|
|
11057
|
-
|
|
11058
|
-
"",
|
|
11333
|
+
"part of the task before reporting structured output."
|
|
11334
|
+
].join("\n");
|
|
11335
|
+
const workflow = [
|
|
11059
11336
|
"1. Read the subject summary, resources, inspection hints, and any",
|
|
11060
11337
|
" task-specific instructions before scoring.",
|
|
11061
11338
|
"2. Inspect the target artefact directly using the tools and resources the",
|
|
@@ -11065,39 +11342,104 @@ function buildPrReviewUserPrompt(input, ctx) {
|
|
|
11065
11342
|
"4. Apply the rubric strictly. This task is about complexity and",
|
|
11066
11343
|
" reviewability, not correctness or feature desirability.",
|
|
11067
11344
|
"5. Perform any required outward action before emitting the final",
|
|
11068
|
-
" structured output."
|
|
11069
|
-
|
|
11070
|
-
|
|
11071
|
-
|
|
11072
|
-
|
|
11073
|
-
|
|
11074
|
-
criteriaList,
|
|
11075
|
-
"",
|
|
11076
|
-
"### Scoring rules",
|
|
11077
|
-
"",
|
|
11345
|
+
" structured output."
|
|
11346
|
+
].join("\n");
|
|
11347
|
+
const taskPromptSection = input.taskPrompt ?? "";
|
|
11348
|
+
const preamble = renderRubricPreambleSection(rubric) ?? "";
|
|
11349
|
+
const criteria = renderRubricCriteriaList(rubric);
|
|
11350
|
+
const scoring = [
|
|
11078
11351
|
"- Every criterion uses binary scoring only.",
|
|
11079
11352
|
"- Score `1` when the subject clearly clears the criterion.",
|
|
11080
11353
|
"- Score `0` when it does not, or when the evidence is ambiguous.",
|
|
11081
11354
|
"- `rationale` is REQUIRED for every score. Keep it concrete and audit-friendly.",
|
|
11082
11355
|
"- Compute `composite = Σ(weight_i × score_i)` exactly; the runtime rejects mismatches.",
|
|
11083
11356
|
"",
|
|
11084
|
-
"Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output."
|
|
11085
|
-
|
|
11086
|
-
|
|
11087
|
-
|
|
11088
|
-
|
|
11089
|
-
|
|
11090
|
-
|
|
11091
|
-
|
|
11092
|
-
|
|
11093
|
-
|
|
11094
|
-
|
|
11095
|
-
|
|
11096
|
-
|
|
11097
|
-
|
|
11098
|
-
|
|
11099
|
-
|
|
11100
|
-
|
|
11357
|
+
"Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output."
|
|
11358
|
+
].join("\n");
|
|
11359
|
+
return assembleTaskPrompt("pr_review", [
|
|
11360
|
+
{
|
|
11361
|
+
id: "pr_review.header",
|
|
11362
|
+
source: "header",
|
|
11363
|
+
body: header
|
|
11364
|
+
},
|
|
11365
|
+
{
|
|
11366
|
+
id: "pr_review.subject",
|
|
11367
|
+
source: "task_input",
|
|
11368
|
+
header: "Subject",
|
|
11369
|
+
body: subject
|
|
11370
|
+
},
|
|
11371
|
+
{
|
|
11372
|
+
id: "pr_review.resources",
|
|
11373
|
+
source: "task_input",
|
|
11374
|
+
header: "Resources",
|
|
11375
|
+
body: resources
|
|
11376
|
+
},
|
|
11377
|
+
{
|
|
11378
|
+
id: "pr_review.hints",
|
|
11379
|
+
source: "task_input",
|
|
11380
|
+
header: "Inspection hints",
|
|
11381
|
+
body: hints
|
|
11382
|
+
},
|
|
11383
|
+
{
|
|
11384
|
+
id: "pr_review.workspace",
|
|
11385
|
+
source: "workspace",
|
|
11386
|
+
header: "Workspace",
|
|
11387
|
+
body: workspace
|
|
11388
|
+
},
|
|
11389
|
+
{
|
|
11390
|
+
id: "pr_review.execution_contract",
|
|
11391
|
+
source: "static",
|
|
11392
|
+
header: "Execution contract",
|
|
11393
|
+
body: executionContract
|
|
11394
|
+
},
|
|
11395
|
+
{
|
|
11396
|
+
id: "pr_review.workflow",
|
|
11397
|
+
source: "static",
|
|
11398
|
+
header: "Review workflow",
|
|
11399
|
+
body: workflow
|
|
11400
|
+
},
|
|
11401
|
+
{
|
|
11402
|
+
id: "pr_review.task_prompt",
|
|
11403
|
+
source: "task_input",
|
|
11404
|
+
header: "Task-specific instructions",
|
|
11405
|
+
body: taskPromptSection
|
|
11406
|
+
},
|
|
11407
|
+
{
|
|
11408
|
+
id: "pr_review.preamble",
|
|
11409
|
+
source: "rubric_judge",
|
|
11410
|
+
body: preamble
|
|
11411
|
+
},
|
|
11412
|
+
{
|
|
11413
|
+
id: "pr_review.criteria",
|
|
11414
|
+
source: "rubric_judge",
|
|
11415
|
+
header: "Criteria",
|
|
11416
|
+
body: criteria
|
|
11417
|
+
},
|
|
11418
|
+
{
|
|
11419
|
+
id: "pr_review.scoring",
|
|
11420
|
+
source: "rubric_judge",
|
|
11421
|
+
header: "Scoring rules",
|
|
11422
|
+
body: scoring
|
|
11423
|
+
},
|
|
11424
|
+
{
|
|
11425
|
+
id: "pr_review.final_output",
|
|
11426
|
+
source: "final_output",
|
|
11427
|
+
body: buildFinalOutputBlock({
|
|
11428
|
+
taskType: "pr_review",
|
|
11429
|
+
outputSchemaName: "PrReviewOutput",
|
|
11430
|
+
shapeSketch: [
|
|
11431
|
+
"{",
|
|
11432
|
+
" \"scores\": [",
|
|
11433
|
+
" { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
|
|
11434
|
+
" ],",
|
|
11435
|
+
" \"composite\": <sum-of-weighted-binary-scores>,",
|
|
11436
|
+
" \"verdict\": \"<1-3 sentence overall>\"",
|
|
11437
|
+
"}"
|
|
11438
|
+
].join("\n"),
|
|
11439
|
+
extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
|
|
11440
|
+
})
|
|
11441
|
+
}
|
|
11442
|
+
]);
|
|
11101
11443
|
}
|
|
11102
11444
|
//#endregion
|
|
11103
11445
|
//#region ../agent-runtime/src/prompts/render-pack.ts
|
|
@@ -11107,7 +11449,7 @@ function buildPrReviewUserPrompt(input, ctx) {
|
|
|
11107
11449
|
*/
|
|
11108
11450
|
function buildRenderPackUserPrompt(input, ctx) {
|
|
11109
11451
|
const { packId, persist = true, pinned = false } = input;
|
|
11110
|
-
|
|
11452
|
+
const header = [
|
|
11111
11453
|
"# Render Pack Agent",
|
|
11112
11454
|
"",
|
|
11113
11455
|
"You are rendering a context pack to markdown. Step 2 of the",
|
|
@@ -11115,16 +11457,14 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11115
11457
|
"a third will judge the rendering. You must NOT judge it here.",
|
|
11116
11458
|
"",
|
|
11117
11459
|
`Your agent-session diary ID is: ${ctx.diaryId}`,
|
|
11118
|
-
`This task's id is: ${ctx.taskId}
|
|
11119
|
-
|
|
11120
|
-
|
|
11121
|
-
"",
|
|
11460
|
+
`This task's id is: ${ctx.taskId}`
|
|
11461
|
+
].join("\n");
|
|
11462
|
+
const inputBlock = [
|
|
11122
11463
|
`- **Pack**: \`${packId}\``,
|
|
11123
11464
|
`- **Persist**: \`${persist}\``,
|
|
11124
|
-
`- **Pinned**: \`${pinned}
|
|
11125
|
-
|
|
11126
|
-
|
|
11127
|
-
"",
|
|
11465
|
+
`- **Pinned**: \`${pinned}\``
|
|
11466
|
+
].join("\n");
|
|
11467
|
+
const workflow = [
|
|
11128
11468
|
"1. Call `moltnet_pack_get` with `expandEntries: true` to inspect the",
|
|
11129
11469
|
" source entries. Read it — you need the entry count for your output.",
|
|
11130
11470
|
"2. Call `moltnet_pack_render` with:",
|
|
@@ -11132,16 +11472,14 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11132
11472
|
` - \`persist\`: \`${persist}\``,
|
|
11133
11473
|
` - \`pinned\`: \`${pinned}\``,
|
|
11134
11474
|
" Record the returned `renderedPackId`, `cid`, `renderMethod`, and",
|
|
11135
|
-
" `content` byte length."
|
|
11136
|
-
|
|
11137
|
-
|
|
11138
|
-
"",
|
|
11475
|
+
" `content` byte length."
|
|
11476
|
+
].join("\n");
|
|
11477
|
+
const constraints = [
|
|
11139
11478
|
"- Do NOT modify the source pack or its entries.",
|
|
11140
11479
|
"- Do NOT write diary entries unless a genuine incident occurs",
|
|
11141
|
-
" (rendering failure, invariant violation)."
|
|
11142
|
-
|
|
11143
|
-
|
|
11144
|
-
"",
|
|
11480
|
+
" (rendering failure, invariant violation)."
|
|
11481
|
+
].join("\n");
|
|
11482
|
+
const fidelity = [
|
|
11145
11483
|
"These rules apply when you are producing the markdown yourself rather",
|
|
11146
11484
|
"than relying on a deterministic `server:*` renderer.",
|
|
11147
11485
|
"",
|
|
@@ -11161,25 +11499,63 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11161
11499
|
" completeness. Optimize for \"no detectable drift across a",
|
|
11162
11500
|
" claim-by-claim audit\", not \"shorter at any cost\". When compressing, prefer",
|
|
11163
11501
|
" tightening prose around a quote rather than altering the quote,",
|
|
11164
|
-
" and prefer summarising a list over silently truncating it."
|
|
11165
|
-
"",
|
|
11166
|
-
buildSelfVerificationBlock(ctx.taskId),
|
|
11167
|
-
buildFinalOutputBlock({
|
|
11168
|
-
taskType: "render_pack",
|
|
11169
|
-
outputSchemaName: "RenderPackOutput",
|
|
11170
|
-
shapeSketch: [
|
|
11171
|
-
"{",
|
|
11172
|
-
" \"renderedPackId\": \"<uuid-or-null>\",",
|
|
11173
|
-
" \"renderedCid\": \"<cid>\",",
|
|
11174
|
-
" \"renderMethod\": \"<label>\",",
|
|
11175
|
-
" \"byteSize\": <int>,",
|
|
11176
|
-
" \"entriesRendered\": <int>,",
|
|
11177
|
-
" \"summary\": \"<1-3 sentence recap>\",",
|
|
11178
|
-
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
11179
|
-
"}"
|
|
11180
|
-
].join("\n")
|
|
11181
|
-
})
|
|
11502
|
+
" and prefer summarising a list over silently truncating it."
|
|
11182
11503
|
].join("\n");
|
|
11504
|
+
return assembleTaskPrompt("render_pack", [
|
|
11505
|
+
{
|
|
11506
|
+
id: "render_pack.header",
|
|
11507
|
+
source: "header",
|
|
11508
|
+
body: header
|
|
11509
|
+
},
|
|
11510
|
+
{
|
|
11511
|
+
id: "render_pack.input",
|
|
11512
|
+
source: "task_input",
|
|
11513
|
+
header: "Input",
|
|
11514
|
+
body: inputBlock
|
|
11515
|
+
},
|
|
11516
|
+
{
|
|
11517
|
+
id: "render_pack.workflow",
|
|
11518
|
+
source: "static",
|
|
11519
|
+
header: "Workflow",
|
|
11520
|
+
body: workflow
|
|
11521
|
+
},
|
|
11522
|
+
{
|
|
11523
|
+
id: "render_pack.constraints",
|
|
11524
|
+
source: "static",
|
|
11525
|
+
header: "Constraints",
|
|
11526
|
+
body: constraints
|
|
11527
|
+
},
|
|
11528
|
+
{
|
|
11529
|
+
id: "render_pack.fidelity",
|
|
11530
|
+
source: "static",
|
|
11531
|
+
header: "Fidelity Discipline",
|
|
11532
|
+
body: fidelity
|
|
11533
|
+
},
|
|
11534
|
+
{
|
|
11535
|
+
id: "render_pack.verification",
|
|
11536
|
+
source: "verification",
|
|
11537
|
+
body: buildSelfVerificationBlock(ctx.taskId)
|
|
11538
|
+
},
|
|
11539
|
+
{
|
|
11540
|
+
id: "render_pack.final_output",
|
|
11541
|
+
source: "final_output",
|
|
11542
|
+
body: buildFinalOutputBlock({
|
|
11543
|
+
taskType: "render_pack",
|
|
11544
|
+
outputSchemaName: "RenderPackOutput",
|
|
11545
|
+
shapeSketch: [
|
|
11546
|
+
"{",
|
|
11547
|
+
" \"renderedPackId\": \"<uuid-or-null>\",",
|
|
11548
|
+
" \"renderedCid\": \"<cid>\",",
|
|
11549
|
+
" \"renderMethod\": \"<label>\",",
|
|
11550
|
+
" \"byteSize\": <int>,",
|
|
11551
|
+
" \"entriesRendered\": <int>,",
|
|
11552
|
+
" \"summary\": \"<1-3 sentence recap>\",",
|
|
11553
|
+
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
11554
|
+
"}"
|
|
11555
|
+
].join("\n")
|
|
11556
|
+
})
|
|
11557
|
+
}
|
|
11558
|
+
]);
|
|
11183
11559
|
}
|
|
11184
11560
|
//#endregion
|
|
11185
11561
|
//#region ../agent-runtime/src/prompts/run-eval.ts
|
|
@@ -11188,8 +11564,7 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11188
11564
|
*
|
|
11189
11565
|
* Free-form: no git workflow, no commit ceremony. The executor produces
|
|
11190
11566
|
* a textual response (and optional file artifacts) that later
|
|
11191
|
-
* `judge_eval_attempt` task(s) grade against their own hidden
|
|
11192
|
-
* rubric.
|
|
11567
|
+
* `judge_eval_attempt` task(s) grade against their own hidden rubric.
|
|
11193
11568
|
*
|
|
11194
11569
|
* Context delivery is handled by `resolveTaskContext` (see
|
|
11195
11570
|
* libs/agent-runtime/src/context-bindings.ts) and runs BEFORE this
|
|
@@ -11197,50 +11572,44 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11197
11572
|
* the body, `skill` items are persisted at the runtime's skill path,
|
|
11198
11573
|
* and `user_inline` items are appended to the first user message. This
|
|
11199
11574
|
* builder does NOT inline `input.context[]` itself.
|
|
11575
|
+
*
|
|
11576
|
+
* Prompt-shape notes (issue #1175, area 1):
|
|
11577
|
+
* - No `Correlation` section: the agent never acts on it. The id is
|
|
11578
|
+
* still carried on attempt event metadata for cross-variant queries.
|
|
11579
|
+
* - No `Execution mode` section: the workspace already reflects the
|
|
11580
|
+
* chosen mode by its shape (scratch/shared mount/dedicated worktree).
|
|
11581
|
+
* Restating it as text adds noise without changing model behavior.
|
|
11582
|
+
* - The "Injected Task Context" phrase is used identically in this
|
|
11583
|
+
* prompt's discipline section and in the materialized context block
|
|
11584
|
+
* header (see context-bindings.ts) so weaker models see one repeated
|
|
11585
|
+
* anchor.
|
|
11586
|
+
* - The discipline copy demands the model encode injected constraints
|
|
11587
|
+
* into the code path itself, not into comments or the verification
|
|
11588
|
+
* field. Quoting the constraint back is not following the task.
|
|
11200
11589
|
*/
|
|
11201
11590
|
function buildRunEvalUserPrompt(input, ctx) {
|
|
11202
|
-
const { scenario, variantLabel,
|
|
11591
|
+
const { scenario, variantLabel, successCriteria } = input;
|
|
11203
11592
|
const hasContext = input.context.length > 0;
|
|
11204
11593
|
const hasInlineContext = input.context.some((entry) => entry.binding === "context_inline");
|
|
11205
|
-
const
|
|
11206
|
-
|
|
11207
|
-
"",
|
|
11208
|
-
|
|
11209
|
-
""
|
|
11210
|
-
].join("\n") : "";
|
|
11211
|
-
const verificationSection = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
|
|
11212
|
-
const correlationSection = ctx.correlationId ? [
|
|
11213
|
-
"### Correlation",
|
|
11214
|
-
"",
|
|
11215
|
-
`This task carries correlationId \`${ctx.correlationId}\`. It joins`,
|
|
11216
|
-
"this variant to its sibling `run_eval` tasks (other variants of the",
|
|
11217
|
-
"same scenario and to any later `judge_eval_attempt` tasks created",
|
|
11218
|
-
"against those variants. You do not need to act on it directly — it",
|
|
11219
|
-
"is recorded for cross-variant aggregation at query time.",
|
|
11220
|
-
""
|
|
11221
|
-
].join("\n") : "";
|
|
11222
|
-
const executionSection = [
|
|
11223
|
-
"### Execution mode",
|
|
11224
|
-
"",
|
|
11225
|
-
`Mode: \`${execution.mode}\``,
|
|
11226
|
-
`Workspace: \`${execution.workspace}\``,
|
|
11227
|
-
execution.workspace === "none" ? "You are running in a scratch workspace with no repository checkout mounted. Do not assume git history or repo files are present unless the scenario provided them explicitly." : execution.workspace === "shared_mount" ? "You are running against the daemon shared mount. Treat any repository mutations as affecting the mounted checkout directly." : "You are running in a dedicated disposable git worktree isolated from the daemon shared checkout.",
|
|
11228
|
-
""
|
|
11229
|
-
].join("\n");
|
|
11230
|
-
const contextDisciplineSection = hasContext ? [
|
|
11231
|
-
"### Injected context discipline",
|
|
11594
|
+
const header = `# Run Eval Agent\n\nYou are running an evaluation scenario as variant \`${variantLabel}\`.\nTask id: \`${ctx.taskId}\``;
|
|
11595
|
+
const contextDiscipline = hasContext ? [
|
|
11596
|
+
"This task includes Injected Task Context supplied by the task",
|
|
11597
|
+
"creator. You MUST inspect it BEFORE you write solution files or",
|
|
11598
|
+
"draft your final answer — not after.",
|
|
11232
11599
|
"",
|
|
11233
|
-
"
|
|
11234
|
-
"
|
|
11235
|
-
"
|
|
11236
|
-
"
|
|
11237
|
-
|
|
11238
|
-
|
|
11239
|
-
"
|
|
11240
|
-
"
|
|
11241
|
-
""
|
|
11600
|
+
"Reconcile every constraint from that context **into the code path",
|
|
11601
|
+
"itself**: function bodies, control flow, transaction boundaries,",
|
|
11602
|
+
"guard clauses. Quoting a constraint back in a comment, a",
|
|
11603
|
+
"`// note:` line, the task summary, or the `verification` field is",
|
|
11604
|
+
"NOT following the task. If the constraint affects behavior, it",
|
|
11605
|
+
"must affect behavior.",
|
|
11606
|
+
hasInlineContext ? "For `context_inline`, your FIRST content-inspection step is a `read` of `/workspace/context-pack.md` before your first `write` call. The same content is also mirrored in `/workspace/AGENTS.md` and may be referenced from `/workspace/.claude/CLAUDE.md`." : "When the context is delivered as a skill, inspect it before solving.",
|
|
11607
|
+
"If the Injected Task Context contains repo- or workflow-specific",
|
|
11608
|
+
"rules, those rules override your generic instincts."
|
|
11242
11609
|
].join("\n") : "";
|
|
11243
|
-
const
|
|
11610
|
+
const inputFiles = scenario.inputFiles?.length ? scenario.inputFiles.map((f) => `- \`${f}\``).join("\n") : "";
|
|
11611
|
+
const verification = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
|
|
11612
|
+
const finalOutput = buildFinalOutputBlock({
|
|
11244
11613
|
taskType: "run_eval",
|
|
11245
11614
|
outputSchemaName: "RunEvalOutput",
|
|
11246
11615
|
shapeSketch: [
|
|
@@ -11260,17 +11629,41 @@ function buildRunEvalUserPrompt(input, ctx) {
|
|
|
11260
11629
|
"}"
|
|
11261
11630
|
].join("\n")
|
|
11262
11631
|
});
|
|
11263
|
-
return [
|
|
11264
|
-
|
|
11265
|
-
|
|
11266
|
-
|
|
11267
|
-
|
|
11268
|
-
|
|
11269
|
-
|
|
11270
|
-
|
|
11271
|
-
|
|
11272
|
-
|
|
11273
|
-
|
|
11632
|
+
return assembleTaskPrompt("run_eval", [
|
|
11633
|
+
{
|
|
11634
|
+
id: "run_eval.header",
|
|
11635
|
+
source: "header",
|
|
11636
|
+
body: header
|
|
11637
|
+
},
|
|
11638
|
+
{
|
|
11639
|
+
id: "run_eval.context_discipline",
|
|
11640
|
+
source: "discipline",
|
|
11641
|
+
header: "Injected Task Context",
|
|
11642
|
+
body: contextDiscipline
|
|
11643
|
+
},
|
|
11644
|
+
{
|
|
11645
|
+
id: "run_eval.scenario",
|
|
11646
|
+
source: "task_input",
|
|
11647
|
+
header: "Scenario",
|
|
11648
|
+
body: scenario.prompt
|
|
11649
|
+
},
|
|
11650
|
+
{
|
|
11651
|
+
id: "run_eval.input_files",
|
|
11652
|
+
source: "task_input",
|
|
11653
|
+
header: "Input files",
|
|
11654
|
+
body: inputFiles
|
|
11655
|
+
},
|
|
11656
|
+
{
|
|
11657
|
+
id: "run_eval.verification",
|
|
11658
|
+
source: "verification",
|
|
11659
|
+
body: verification
|
|
11660
|
+
},
|
|
11661
|
+
{
|
|
11662
|
+
id: "run_eval.final_output",
|
|
11663
|
+
source: "final_output",
|
|
11664
|
+
body: finalOutput
|
|
11665
|
+
}
|
|
11666
|
+
]);
|
|
11274
11667
|
}
|
|
11275
11668
|
//#endregion
|
|
11276
11669
|
//#region ../agent-runtime/src/prompts/index.ts
|
|
@@ -15891,7 +16284,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15891
16284
|
});
|
|
15892
16285
|
let taskPrompt;
|
|
15893
16286
|
try {
|
|
15894
|
-
|
|
16287
|
+
const assembled = buildTaskUserPrompt(task, {
|
|
15895
16288
|
diaryId,
|
|
15896
16289
|
taskId: task.id,
|
|
15897
16290
|
workspace: {
|
|
@@ -15902,6 +16295,12 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
15902
16295
|
},
|
|
15903
16296
|
extras: opts.promptExtras
|
|
15904
16297
|
});
|
|
16298
|
+
taskPrompt = assembled.text;
|
|
16299
|
+
await emit("info", {
|
|
16300
|
+
event: "prompt_assembled",
|
|
16301
|
+
taskType: assembled.taskType,
|
|
16302
|
+
sections: assembled.trace
|
|
16303
|
+
});
|
|
15905
16304
|
} catch (err) {
|
|
15906
16305
|
const message = err instanceof Error ? err.message : String(err);
|
|
15907
16306
|
await emit("error", {
|
|
@@ -16164,8 +16563,8 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
16164
16563
|
}
|
|
16165
16564
|
else if (submitToolHandle) {
|
|
16166
16565
|
parseError = {
|
|
16167
|
-
code: "
|
|
16168
|
-
message: "Agent did not
|
|
16566
|
+
code: "submit_output_missing",
|
|
16567
|
+
message: "Agent did not satisfy the promised submit-output criterion: no valid task submit tool call was captured before the session ended."
|
|
16169
16568
|
};
|
|
16170
16569
|
await emit("error", {
|
|
16171
16570
|
message: parseError.message,
|