@themoltnet/pi-extension 0.19.3 → 0.19.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +875 -476
  2. package/package.json +3 -3
package/dist/index.js CHANGED
@@ -850,6 +850,98 @@ var createDiaryGrant = (options) => (options.client ?? client).post({
850
850
  }
851
851
  });
852
852
  /**
853
+ * Initiate a diary transfer to another team. Requires diary manage permission.
854
+ */
855
+ var initiateTransfer = (options) => (options.client ?? client).post({
856
+ security: [
857
+ {
858
+ scheme: "bearer",
859
+ type: "http"
860
+ },
861
+ {
862
+ name: "X-Moltnet-Session-Token",
863
+ type: "apiKey"
864
+ },
865
+ {
866
+ in: "cookie",
867
+ name: "ory_kratos_session",
868
+ type: "apiKey"
869
+ }
870
+ ],
871
+ url: "/diaries/{id}/transfer",
872
+ ...options,
873
+ headers: {
874
+ "Content-Type": "application/json",
875
+ ...options.headers
876
+ }
877
+ });
878
+ /**
879
+ * List pending transfers where the caller is destination team owner.
880
+ */
881
+ var listPendingTransfers = (options) => (options?.client ?? client).get({
882
+ security: [
883
+ {
884
+ scheme: "bearer",
885
+ type: "http"
886
+ },
887
+ {
888
+ name: "X-Moltnet-Session-Token",
889
+ type: "apiKey"
890
+ },
891
+ {
892
+ in: "cookie",
893
+ name: "ory_kratos_session",
894
+ type: "apiKey"
895
+ }
896
+ ],
897
+ url: "/transfers",
898
+ ...options
899
+ });
900
+ /**
901
+ * Accept a pending diary transfer. Caller must be destination team owner.
902
+ */
903
+ var acceptTransfer = (options) => (options.client ?? client).post({
904
+ security: [
905
+ {
906
+ scheme: "bearer",
907
+ type: "http"
908
+ },
909
+ {
910
+ name: "X-Moltnet-Session-Token",
911
+ type: "apiKey"
912
+ },
913
+ {
914
+ in: "cookie",
915
+ name: "ory_kratos_session",
916
+ type: "apiKey"
917
+ }
918
+ ],
919
+ url: "/transfers/{transferId}/accept",
920
+ ...options
921
+ });
922
+ /**
923
+ * Reject a pending diary transfer.
924
+ */
925
+ var rejectTransfer = (options) => (options.client ?? client).post({
926
+ security: [
927
+ {
928
+ scheme: "bearer",
929
+ type: "http"
930
+ },
931
+ {
932
+ name: "X-Moltnet-Session-Token",
933
+ type: "apiKey"
934
+ },
935
+ {
936
+ in: "cookie",
937
+ name: "ory_kratos_session",
938
+ type: "apiKey"
939
+ }
940
+ ],
941
+ url: "/transfers/{transferId}/reject",
942
+ ...options
943
+ });
944
+ /**
853
945
  * List diary entries for a specific diary.
854
946
  */
855
947
  var listDiaryEntries = (options) => (options.client ?? client).get({
@@ -2648,6 +2740,41 @@ function createDiaryGrantsNamespace(context) {
2648
2740
  };
2649
2741
  }
2650
2742
  //#endregion
2743
+ //#region ../sdk/src/namespaces/diary-transfers.ts
2744
+ function createDiaryTransfersNamespace(context) {
2745
+ const { client, auth } = context;
2746
+ return {
2747
+ async initiate(diaryId, body) {
2748
+ return unwrapResult(await initiateTransfer({
2749
+ client,
2750
+ auth,
2751
+ path: { id: diaryId },
2752
+ body
2753
+ }));
2754
+ },
2755
+ async listPending() {
2756
+ return unwrapResult(await listPendingTransfers({
2757
+ client,
2758
+ auth
2759
+ }));
2760
+ },
2761
+ async accept(transferId) {
2762
+ return unwrapResult(await acceptTransfer({
2763
+ client,
2764
+ auth,
2765
+ path: { transferId }
2766
+ }));
2767
+ },
2768
+ async reject(transferId) {
2769
+ return unwrapResult(await rejectTransfer({
2770
+ client,
2771
+ auth,
2772
+ path: { transferId }
2773
+ }));
2774
+ }
2775
+ };
2776
+ }
2777
+ //#endregion
2651
2778
  //#region ../../node_modules/.pnpm/@noble+hashes@1.8.0/node_modules/@noble/hashes/esm/utils.js
2652
2779
  /** Checks if something is Uint8Array. Be careful: nodejs Buffer will return true. */
2653
2780
  function isBytes$1(a) {
@@ -4963,6 +5090,7 @@ function createAgent(options) {
4963
5090
  return {
4964
5091
  diaries: createDiariesNamespace(context),
4965
5092
  diaryGrants: createDiaryGrantsNamespace(context),
5093
+ diaryTransfers: createDiaryTransfersNamespace(context),
4966
5094
  packs: createPacksNamespace(context),
4967
5095
  entries: createEntriesNamespace(context),
4968
5096
  agents: createAgentsNamespace(context),
@@ -9060,7 +9188,7 @@ function validateRubricWeights(rubric) {
9060
9188
  * attaches to any task type. It has four orthogonal sections — pick
9061
9189
  * whichever apply per task type:
9062
9190
  *
9063
- * - `gates` Deterministic structural checks (CID/schema match)
9191
+ * - `gates` Promise-level structural/process checks
9064
9192
  * - `assertions` Declarative claims about output JSON
9065
9193
  * - `rubric` Weighted-criteria scoring instrument, reused
9066
9194
  * verbatim from `./rubric.ts`.
@@ -9105,17 +9233,27 @@ var CidEqualsSpec = Type$1.Object({
9105
9233
  path: Type$1.String({ minLength: 1 }),
9106
9234
  expected: Type$1.String({ minLength: 1 })
9107
9235
  }, { additionalProperties: false });
9108
- var Gate = Type$1.Union([Type$1.Object({
9236
+ var SubmitToolCallGate = Type$1.Object({
9109
9237
  id: Type$1.String({ minLength: 1 }),
9110
- kind: Type$1.Literal("schema-check"),
9111
- spec: SchemaCheckSpec,
9112
- required: Type$1.Boolean()
9113
- }, { additionalProperties: false }), Type$1.Object({
9114
- id: Type$1.String({ minLength: 1 }),
9115
- kind: Type$1.Literal("cid-equals"),
9116
- spec: CidEqualsSpec,
9238
+ kind: Type$1.Literal("submit-tool-call"),
9239
+ description: Type$1.String({ minLength: 1 }),
9117
9240
  required: Type$1.Boolean()
9118
- }, { additionalProperties: false })], { $id: "Gate" });
9241
+ }, { additionalProperties: false });
9242
+ var Gate = Type$1.Union([
9243
+ SubmitToolCallGate,
9244
+ Type$1.Object({
9245
+ id: Type$1.String({ minLength: 1 }),
9246
+ kind: Type$1.Literal("schema-check"),
9247
+ spec: SchemaCheckSpec,
9248
+ required: Type$1.Boolean()
9249
+ }, { additionalProperties: false }),
9250
+ Type$1.Object({
9251
+ id: Type$1.String({ minLength: 1 }),
9252
+ kind: Type$1.Literal("cid-equals"),
9253
+ spec: CidEqualsSpec,
9254
+ required: Type$1.Boolean()
9255
+ }, { additionalProperties: false })
9256
+ ], { $id: "Gate" });
9119
9257
  var AssertionOp = Type$1.Union([
9120
9258
  Type$1.Literal("exists"),
9121
9259
  Type$1.Literal("equals"),
@@ -10342,6 +10480,32 @@ function submitOutputToolName(taskType) {
10342
10480
  return `submit_${taskType}_output`;
10343
10481
  }
10344
10482
  //#endregion
10483
+ //#region ../agent-runtime/src/prompts/assemble.ts
10484
+ /**
10485
+ * Render a `PromptSection[]` into final text + structured trace.
10486
+ * Single source of truth for inter-section spacing and header
10487
+ * rendering across all task types.
10488
+ */
10489
+ function assembleTaskPrompt(taskType, sections) {
10490
+ const trace = [];
10491
+ const rendered = [];
10492
+ for (const section of sections) {
10493
+ trace.push({
10494
+ id: section.id,
10495
+ source: section.source,
10496
+ header: section.header,
10497
+ char_count: section.body.length
10498
+ });
10499
+ if (section.body === "") continue;
10500
+ rendered.push(section.header ? `## ${section.header}\n\n${section.body}` : section.body);
10501
+ }
10502
+ return {
10503
+ text: rendered.join("\n\n"),
10504
+ trace,
10505
+ taskType
10506
+ };
10507
+ }
10508
+ //#endregion
10345
10509
  //#region ../agent-runtime/src/prompts/final-output.ts
10346
10510
  function buildFinalOutputBlock(opts) {
10347
10511
  const { taskType, outputSchemaName, shapeSketch, extraNotes } = opts;
@@ -10356,7 +10520,8 @@ function buildFinalOutputBlock(opts) {
10356
10520
  `The runtime captures the validated arguments and ends the session.`,
10357
10521
  `Do NOT emit the output as plain assistant text. Do NOT rely on a`,
10358
10522
  `JSON-in-message fallback. If you do not call \`${submitTool}\`, the`,
10359
- `attempt fails even if the underlying work succeeded.`,
10523
+ `attempt is recorded as failing the promised submit-output criterion`,
10524
+ `even if the underlying work succeeded.`,
10360
10525
  "",
10361
10526
  `Your final assistant text before that tool call may explain your work,`,
10362
10527
  `but the submit-tool call itself must be your VERY LAST action.`,
@@ -10394,37 +10559,17 @@ function renderRubricPreambleSection(rubric) {
10394
10559
  *
10395
10560
  * Design note — no pre-resolved `target` projection
10396
10561
  * --------------------------------------------------
10397
- * Earlier drafts hand-wired a `target` bundle (branch, PR url,
10398
- * commits, summary, diary entry ids) into the prompt before the
10399
- * judge started. That coupled the daemon to one specific producer
10400
- * shape (`FulfillBriefOutput`), forced every executor to know how
10401
- * to project it, and went stale every time a producer task type
10402
- * grew a field. Trade-off was wrong: the runtime is meant to be
10403
- * task-type-agnostic, and judges are perfectly capable of
10404
- * fetching their own data.
10405
- *
10406
- * Now: the prompt tells the judge the `targetTaskId` and instructs
10407
- * it to call `moltnet_get_task` + `moltnet_list_task_attempts`
10408
- * itself. The judge sees whatever the producer's accepted attempt
10409
- * actually wrote — no projection, no lossiness, no daemon-side
10410
- * type knowledge required. Different producers (fulfill_brief,
10411
- * future task types whose products are docs / configs / changes /
10412
- * anything) work without any code path here.
10562
+ * Earlier drafts hand-wired a `target` bundle (branch, PR url, commits,
10563
+ * summary, diary entry ids) into the prompt before the judge started.
10564
+ * That coupled the daemon to one specific producer shape, forced every
10565
+ * executor to know how to project it, and went stale every time a
10566
+ * producer task type grew a field. Now: the prompt tells the judge
10567
+ * the `targetTaskId` and instructs it to call `moltnet_get_task` +
10568
+ * `moltnet_list_task_attempts` itself.
10413
10569
  */
10414
10570
  function buildAssessBriefUserPrompt(input, ctx) {
10415
10571
  const rubric = input.successCriteria.rubric;
10416
- const criteriaList = renderRubricCriteriaList(rubric);
10417
- const preambleSection = renderRubricPreambleSection(rubric) ?? "";
10418
- const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
10419
- "### Workspace",
10420
- "",
10421
- "This review attempt is running inside a dedicated disposable git",
10422
- "worktree created for this task. If you need to check out the target",
10423
- "branch or inspect refs locally, do it only inside this worktree.",
10424
- ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`. You may replace it with the target branch locally if that helps your inspection.` : "The current checkout is disposable and will be cleaned up when the task ends.",
10425
- ""
10426
- ].join("\n") : "";
10427
- return [
10572
+ const header = [
10428
10573
  "# Assess Brief Judge",
10429
10574
  "",
10430
10575
  "You are an independent judge. You did NOT produce the work under review.",
@@ -10432,10 +10577,9 @@ function buildAssessBriefUserPrompt(input, ctx) {
10432
10577
  "You may read code, commits, and diary entries — but do NOT modify anything.",
10433
10578
  "",
10434
10579
  `Your diary ID is: ${ctx.diaryId}`,
10435
- `This task's id is: ${ctx.taskId}`,
10436
- "",
10437
- "## Target of assessment",
10438
- "",
10580
+ `This task's id is: ${ctx.taskId}`
10581
+ ].join("\n");
10582
+ const target = [
10439
10583
  `**Producer task id:** \`${input.targetTaskId}\``,
10440
10584
  "",
10441
10585
  "Investigate the producer task before scoring:",
@@ -10448,10 +10592,9 @@ function buildAssessBriefUserPrompt(input, ctx) {
10448
10592
  " - `commits[].sha` listed → use `git show <sha>` for individual commits.",
10449
10593
  " - `diaryEntryIds[]` listed → fetch each via `moltnet_get_entry` to read the producer's reasoning.",
10450
10594
  " - `summary` set → use as orientation, not as ground truth.",
10451
- "Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin.",
10452
- "",
10453
- "### Querying the producer's diary entries",
10454
- "",
10595
+ "Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin."
10596
+ ].join("\n");
10597
+ const diaryQuery = [
10455
10598
  `Beyond the explicit \`diaryEntryIds[]\` from step 3, the producer's`,
10456
10599
  "attempts auto-tag every entry with the `task:*` provenance namespace.",
10457
10600
  "You can pull the full set without enumerating ids by passing the",
@@ -10462,38 +10605,84 @@ function buildAssessBriefUserPrompt(input, ctx) {
10462
10605
  "- Just the accepted attempt: add `attemptN: <acceptedAttemptN>`.",
10463
10606
  "- The producer plus any prior chain (when a correlationId was set):",
10464
10607
  " read it from the task you fetched in step 1 and pass",
10465
- " `taskFilter: { correlationId: \"<id>\" }`.",
10466
- "",
10467
- workspaceSection,
10468
- preambleSection,
10469
- "## Criteria",
10470
- "",
10471
- criteriaList,
10472
- "",
10473
- "### Scoring rules",
10474
- "",
10608
+ " `taskFilter: { correlationId: \"<id>\" }`."
10609
+ ].join("\n");
10610
+ const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
10611
+ "This review attempt is running inside a dedicated disposable git",
10612
+ "worktree created for this task. If you need to check out the target",
10613
+ "branch or inspect refs locally, do it only inside this worktree.",
10614
+ ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`. You may replace it with the target branch locally if that helps your inspection.` : "The current checkout is disposable and will be cleaned up when the task ends."
10615
+ ].join("\n") : "";
10616
+ const preamble = renderRubricPreambleSection(rubric) ?? "";
10617
+ const criteria = renderRubricCriteriaList(rubric);
10618
+ const scoring = [
10475
10619
  "- `llm_score`: score 0..1 continuous. `rationale` REQUIRED (2–4 sentences).",
10476
10620
  "- `boolean`: score exactly 0 or 1. `rationale` optional.",
10477
10621
  "- `deterministic_signature_check`: run `moltnet entry verify` on every diary entry returned by step 3 above AND `git verify-commit` on every commit. Score 1 iff ALL signatures are valid; otherwise 0. Populate `evidence.commitsVerified`, `evidence.commitsTotal`, `evidence.signatureFailures`.",
10478
10622
  "",
10479
- "Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output.",
10480
- "",
10481
- buildFinalOutputBlock({
10482
- taskType: "assess_brief",
10483
- outputSchemaName: "AssessBriefOutput",
10484
- shapeSketch: [
10485
- "{",
10486
- " \"scores\": [",
10487
- " { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
10488
- " ],",
10489
- " \"composite\": <sum>,",
10490
- " \"verdict\": \"<1-3 sentence overall>\",",
10491
- " \"judgeModel\": \"<provider:model>\"",
10492
- "}"
10493
- ].join("\n"),
10494
- extraNotes: ["`composite` = Σ(weight_i × score_i) recomputed. The runtime rejects a mismatch."]
10495
- })
10496
- ].filter(Boolean).join("\n");
10623
+ "Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output."
10624
+ ].join("\n");
10625
+ return assembleTaskPrompt("assess_brief", [
10626
+ {
10627
+ id: "assess_brief.header",
10628
+ source: "header",
10629
+ body: header
10630
+ },
10631
+ {
10632
+ id: "assess_brief.target",
10633
+ source: "task_input",
10634
+ header: "Target of assessment",
10635
+ body: target
10636
+ },
10637
+ {
10638
+ id: "assess_brief.diary_query",
10639
+ source: "static",
10640
+ header: "Querying the producer's diary entries",
10641
+ body: diaryQuery
10642
+ },
10643
+ {
10644
+ id: "assess_brief.workspace",
10645
+ source: "workspace",
10646
+ header: "Workspace",
10647
+ body: workspace
10648
+ },
10649
+ {
10650
+ id: "assess_brief.preamble",
10651
+ source: "rubric_judge",
10652
+ body: preamble
10653
+ },
10654
+ {
10655
+ id: "assess_brief.criteria",
10656
+ source: "rubric_judge",
10657
+ header: "Criteria",
10658
+ body: criteria
10659
+ },
10660
+ {
10661
+ id: "assess_brief.scoring",
10662
+ source: "rubric_judge",
10663
+ header: "Scoring rules",
10664
+ body: scoring
10665
+ },
10666
+ {
10667
+ id: "assess_brief.final_output",
10668
+ source: "final_output",
10669
+ body: buildFinalOutputBlock({
10670
+ taskType: "assess_brief",
10671
+ outputSchemaName: "AssessBriefOutput",
10672
+ shapeSketch: [
10673
+ "{",
10674
+ " \"scores\": [",
10675
+ " { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
10676
+ " ],",
10677
+ " \"composite\": <sum>,",
10678
+ " \"verdict\": \"<1-3 sentence overall>\",",
10679
+ " \"judgeModel\": \"<provider:model>\"",
10680
+ "}"
10681
+ ].join("\n"),
10682
+ extraNotes: ["`composite` = Σ(weight_i × score_i) recomputed. The runtime rejects a mismatch."]
10683
+ })
10684
+ }
10685
+ ]);
10497
10686
  }
10498
10687
  //#endregion
10499
10688
  //#region ../agent-runtime/src/prompts/self-verification.ts
@@ -10502,11 +10691,11 @@ function buildSelfVerificationBlock(taskId, criteriaField = "successCriteria") {
10502
10691
  "## Self-verification",
10503
10692
  "",
10504
10693
  `If \`input.${criteriaField}\` is set on this task, your final output MUST`,
10505
- "include a `verification` block. **The runtime/server rejects task",
10506
- `submission without \`verification\` when \`${criteriaField}\` is present**`,
10507
- "the request fails validation and the attempt is discarded, even if the",
10508
- "underlying work succeeded. Do not call the submit tool until you have",
10509
- "computed the verification payload.",
10694
+ "include a `verification` block. Treat every item in those criteria as",
10695
+ "part of the promise you made when you claimed the task. That includes",
10696
+ "the built-in submit-output gate when present. Do not call the submit",
10697
+ "tool until you have computed the verification payload you can honestly",
10698
+ "stand behind.",
10510
10699
  "",
10511
10700
  `Call \`moltnet_get_task\` with task id \`${taskId}\` and read \`input.${criteriaField}\`.`,
10512
10701
  "",
@@ -10570,22 +10759,13 @@ function buildSelfVerificationBlock(taskId, criteriaField = "successCriteria") {
10570
10759
  * TODO(#885): add a `moltnet_parallel_explore` custom tool that spawns
10571
10760
  * N isolated `createAgentSession` children (one per tag cluster or
10572
10761
  * entry_type axis the curator picks after recon), each with a narrow
10573
- * tool subset and a turn cap, and returns compressed summaries. Parent
10574
- * curator keeps a warm context and only sees {candidateIds, notes}
10575
- * per probe — mirrors the fan-out pattern pi-mono SDK example #13
10576
- * (session runtime) + #05 (custom tools) makes possible. Until that
10577
- * lands, the `checkpoints[]` output field is the fallback: curator
10578
- * emits pruned state at phase boundaries so a follow-up session can
10579
- * resume without replaying the tool history.
10762
+ * tool subset and a turn cap, and returns compressed summaries.
10580
10763
  */
10581
10764
  function buildCuratePackUserPrompt(input, ctx) {
10582
10765
  const { diaryId, taskPrompt, entryTypes, tagFilters, tokenBudget, recipe } = input;
10583
10766
  const entryTypesPinned = Boolean(entryTypes);
10584
10767
  const resolvedRecipe = recipe ?? "topic-focused-v1";
10585
- const includeLine = tagFilters?.include?.length ? `- Hard include (ALL must be present on an entry): ${tagFilters.include.map((t) => `\`${t}\``).join(", ")}` : null;
10586
- const excludeLine = tagFilters?.exclude?.length ? `- Hard exclude (drop if ANY present): ${tagFilters.exclude.map((t) => `\`${t}\``).join(", ")}` : null;
10587
- const prefixLine = tagFilters?.prefix ? `- Tag prefix hint when inventorying: \`${tagFilters.prefix}\`` : null;
10588
- return [
10768
+ const header = [
10589
10769
  "# Curate Pack Agent",
10590
10770
  "",
10591
10771
  "You are the curator. Step 1 of the three-session attribution loop:",
@@ -10593,40 +10773,29 @@ function buildCuratePackUserPrompt(input, ctx) {
10593
10773
  "will judge. Your output IS the pack — nobody downstream will re-rank.",
10594
10774
  "",
10595
10775
  `Your agent-session diary ID is: ${ctx.diaryId}`,
10596
- `This task's id is: ${ctx.taskId}`,
10597
- "",
10598
- "## Goal",
10599
- "",
10776
+ `This task's id is: ${ctx.taskId}`
10777
+ ].join("\n");
10778
+ const goal = [
10600
10779
  `Build a pack from diary \`${diaryId}\` that faithfully serves this`,
10601
- `prompt:`,
10780
+ "prompt:",
10602
10781
  "",
10603
10782
  `> ${taskPrompt}`,
10604
10783
  "",
10605
10784
  "What \"faithfully\" means is your call. A broad prompt may warrant 20",
10606
10785
  "entries spanning clusters; a sharp one may resolve to 4 high-signal",
10607
10786
  "entries. Trust your own judgment on breadth vs. depth — but be able",
10608
- "to defend it in the summary.",
10609
- "",
10610
- "## Constraints",
10611
- "",
10612
- entryTypesPinned ? `- Entry types pinned by imposer (do not widen): ${entryTypes.map((t) => `\`${t}\``).join(", ")}` : "- Entry types: **you choose**. The diary contains three kinds:",
10613
- entryTypesPinned ? null : " - `episodic` incident reports, \"what happened and how we fixed it\" narratives.",
10614
- entryTypesPinned ? null : " - `semantic`durable decisions, patterns, design rationale.",
10615
- entryTypesPinned ? null : " - `procedural` commit audit trails / changelog-style provenance.",
10616
- entryTypesPinned ? null : " Pick the subset that fits the prompt. For \"failures and workarounds\"",
10617
- entryTypesPinned ? null : " or \"decisions we made\" you generally do NOT want `procedural` — those",
10618
- entryTypesPinned ? null : " entries are append-only commit logs and produce changelog-shaped packs.",
10619
- entryTypesPinned ? null : " Include `procedural` only when the prompt explicitly asks for changelog-",
10620
- entryTypesPinned ? null : " style content (e.g., \"what shipped this week\"). State your choice",
10621
- entryTypesPinned ? null : " briefly in the final `summary`.",
10622
- `- Recipe tag: \`${resolvedRecipe}\` (recorded on pack params)`,
10623
- tokenBudget ? `- Token budget (soft cap on final pack): ${tokenBudget}. Pick entry count so the pack fits — estimate ~300 tok/entry as a starting heuristic, tighten after inspecting actual content lengths.` : "- No token budget — size the pack to match the prompt, not an arbitrary target.",
10624
- includeLine,
10625
- excludeLine,
10626
- prefixLine,
10627
- "",
10628
- "## Tools available (not a recipe — use what the situation calls for)",
10629
- "",
10787
+ "to defend it in the summary."
10788
+ ].join("\n");
10789
+ const constraintsLines = [];
10790
+ if (entryTypesPinned) constraintsLines.push(`- Entry types pinned by imposer (do not widen): ${entryTypes.map((t) => `\`${t}\``).join(", ")}`);
10791
+ else constraintsLines.push("- Entry types: **you choose**. The diary contains three kinds:", " - `episodic` — incident reports, \"what happened and how we fixed it\" narratives.", " - `semantic` durable decisions, patterns, design rationale.", " - `procedural` commit audit trails / changelog-style provenance.", " Pick the subset that fits the prompt. For \"failures and workarounds\"", " or \"decisions we made\" you generally do NOT want `procedural` — those", " entries are append-only commit logs and produce changelog-shaped packs.", " Include `procedural` only when the prompt explicitly asks for changelog-", " style content (e.g., \"what shipped this week\"). State your choice", " briefly in the final `summary`.");
10792
+ constraintsLines.push(`- Recipe tag: \`${resolvedRecipe}\` (recorded on pack params)`);
10793
+ constraintsLines.push(tokenBudget ? `- Token budget (soft cap on final pack): ${tokenBudget}. Pick entry count so the pack fits — estimate ~300 tok/entry as a starting heuristic, tighten after inspecting actual content lengths.` : "- No token budget size the pack to match the prompt, not an arbitrary target.");
10794
+ if (tagFilters?.include?.length) constraintsLines.push(`- Hard include (ALL must be present on an entry): ${tagFilters.include.map((t) => `\`${t}\``).join(", ")}`);
10795
+ if (tagFilters?.exclude?.length) constraintsLines.push(`- Hard exclude (drop if ANY present): ${tagFilters.exclude.map((t) => `\`${t}\``).join(", ")}`);
10796
+ if (tagFilters?.prefix) constraintsLines.push(`- Tag prefix hint when inventorying: \`${tagFilters.prefix}\``);
10797
+ const constraints = constraintsLines.join("\n");
10798
+ const tools = [
10630
10799
  "- `moltnet_diary_tags` — tag inventory with counts. Cheap reconnaissance",
10631
10800
  " when the prompt implies a scope but not a tag. Pass",
10632
10801
  " `prefix: \"task:\"` to enumerate task-provenance tags only",
@@ -10639,10 +10808,9 @@ function buildCuratePackUserPrompt(input, ctx) {
10639
10808
  "- `moltnet_list_entries` — multi-tag (AND) listing with optional",
10640
10809
  " `excludeTags`, `entryType`, and the same `taskFilter` shorthand.",
10641
10810
  "- `moltnet_get_entry` — full entry read, for disambiguation.",
10642
- "- `moltnet_pack_create` — terminal call that persists the pack.",
10643
- "",
10644
- "## Exploration discipline",
10645
- "",
10811
+ "- `moltnet_pack_create` — terminal call that persists the pack."
10812
+ ].join("\n");
10813
+ const exploration = [
10646
10814
  "Context is finite. Treat every tool call as buying information against",
10647
10815
  "a budget. Some heuristics that tend to work:",
10648
10816
  "",
@@ -10659,57 +10827,110 @@ function buildCuratePackUserPrompt(input, ctx) {
10659
10827
  "- **Emit a checkpoint if your working set exceeds ~30 candidates.**",
10660
10828
  " Write one to the `checkpoints` array (see Output) listing the ids",
10661
10829
  " you're keeping and dropping, plus a note explaining the cut. This",
10662
- " lets a follow-up session resume without replaying your tool history.",
10663
- "",
10664
- "## Ranking",
10665
- "",
10830
+ " lets a follow-up session resume without replaying your tool history."
10831
+ ].join("\n");
10832
+ const ranking = [
10666
10833
  "Assign integer ranks 1..N, lower = more prominent. Rank reflects",
10667
10834
  "relevance to the prompt, NOT recency or entry popularity. Each entry",
10668
10835
  "in the output must carry a short `rationale` — one sentence pointing",
10669
- "at what in its content earned the rank.",
10670
- "",
10671
- "## Persisting the pack",
10672
- "",
10836
+ "at what in its content earned the rank."
10837
+ ].join("\n");
10838
+ const persisting = [
10673
10839
  "Call `moltnet_pack_create` with:",
10674
10840
  "- `entries`: `[{ entryId, rank }]` for each selected entry.",
10675
- "- `params`: `{ recipe: \"" + resolvedRecipe + "\", prompt: <the task prompt>, selection_rationale: \"<2-sentence summary>\" }`.",
10841
+ `- \`params\`: \`{ recipe: "${resolvedRecipe}", prompt: <the task prompt>, selection_rationale: "<2-sentence summary>" }\`.`,
10676
10842
  tokenBudget ? `- \`tokenBudget\`: ${tokenBudget}.` : "- `tokenBudget`: omit.",
10677
10843
  "- `pinned: false` (packs in this pipeline are ephemeral by design).",
10678
10844
  "",
10679
10845
  "The tool returns a JSON payload whose top-level fields are `packId` and",
10680
10846
  "`packCid` (NOT `id`). Copy those exact UUID/CID strings verbatim into",
10681
10847
  "`packId` and `packCid` in your final output — do not substitute an",
10682
- "entry id, do not reformat, do not fabricate a UUID.",
10683
- "",
10684
- "## Hard constraints",
10685
- "",
10848
+ "entry id, do not reformat, do not fabricate a UUID."
10849
+ ].join("\n");
10850
+ const hardConstraints = [
10686
10851
  "- Do NOT call `moltnet_pack_render` — that belongs to the next session.",
10687
10852
  "- Do NOT write diary entries unless curation surfaces a genuine",
10688
10853
  " incident worth recording. The curation reasoning lives in the task",
10689
10854
  " output, not in the diary.",
10690
- "- Respect hard include/exclude filters literally.",
10691
- "",
10692
- buildSelfVerificationBlock(ctx.taskId),
10693
- buildFinalOutputBlock({
10694
- taskType: "curate_pack",
10695
- outputSchemaName: "CuratePackOutput",
10696
- shapeSketch: [
10697
- "{",
10698
- " \"packId\": \"<uuid>\",",
10699
- " \"packCid\": \"<cid>\",",
10700
- " \"entries\": [",
10701
- " { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
10702
- " ],",
10703
- " \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
10704
- " \"checkpoints\": [",
10705
- " { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
10706
- " ],",
10707
- " \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\",",
10708
- " \"verification\": <required iff input.successCriteria; see Self-verification>",
10709
- "}"
10710
- ].join("\n")
10711
- })
10712
- ].filter((l) => l !== null).join("\n");
10855
+ "- Respect hard include/exclude filters literally."
10856
+ ].join("\n");
10857
+ return assembleTaskPrompt("curate_pack", [
10858
+ {
10859
+ id: "curate_pack.header",
10860
+ source: "header",
10861
+ body: header
10862
+ },
10863
+ {
10864
+ id: "curate_pack.goal",
10865
+ source: "task_input",
10866
+ header: "Goal",
10867
+ body: goal
10868
+ },
10869
+ {
10870
+ id: "curate_pack.constraints",
10871
+ source: "task_input",
10872
+ header: "Constraints",
10873
+ body: constraints
10874
+ },
10875
+ {
10876
+ id: "curate_pack.tools",
10877
+ source: "static",
10878
+ header: "Tools available (not a recipe — use what the situation calls for)",
10879
+ body: tools
10880
+ },
10881
+ {
10882
+ id: "curate_pack.exploration",
10883
+ source: "static",
10884
+ header: "Exploration discipline",
10885
+ body: exploration
10886
+ },
10887
+ {
10888
+ id: "curate_pack.ranking",
10889
+ source: "static",
10890
+ header: "Ranking",
10891
+ body: ranking
10892
+ },
10893
+ {
10894
+ id: "curate_pack.persisting",
10895
+ source: "static",
10896
+ header: "Persisting the pack",
10897
+ body: persisting
10898
+ },
10899
+ {
10900
+ id: "curate_pack.hard_constraints",
10901
+ source: "static",
10902
+ header: "Hard constraints",
10903
+ body: hardConstraints
10904
+ },
10905
+ {
10906
+ id: "curate_pack.verification",
10907
+ source: "verification",
10908
+ body: buildSelfVerificationBlock(ctx.taskId)
10909
+ },
10910
+ {
10911
+ id: "curate_pack.final_output",
10912
+ source: "final_output",
10913
+ body: buildFinalOutputBlock({
10914
+ taskType: "curate_pack",
10915
+ outputSchemaName: "CuratePackOutput",
10916
+ shapeSketch: [
10917
+ "{",
10918
+ " \"packId\": \"<uuid>\",",
10919
+ " \"packCid\": \"<cid>\",",
10920
+ " \"entries\": [",
10921
+ " { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
10922
+ " ],",
10923
+ " \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
10924
+ " \"checkpoints\": [",
10925
+ " { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
10926
+ " ],",
10927
+ " \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\",",
10928
+ " \"verification\": <required iff input.successCriteria; see Self-verification>",
10929
+ "}"
10930
+ ].join("\n")
10931
+ })
10932
+ }
10933
+ ]);
10713
10934
  }
10714
10935
  //#endregion
10715
10936
  //#region ../agent-runtime/src/prompts/fulfill-brief.ts
@@ -10722,17 +10943,22 @@ function buildCuratePackUserPrompt(input, ctx) {
10722
10943
  */
10723
10944
  function buildFulfillBriefUserPrompt(input, ctx) {
10724
10945
  const { brief, title, seedFiles, scopeHint } = input;
10725
- const seedSection = seedFiles?.length ? [
10726
- "### Seed files",
10946
+ const header = [
10947
+ "# Fulfill Brief Agent",
10727
10948
  "",
10728
- "Start by reading these files to ground yourself:",
10729
- ...seedFiles.map((f) => `- \`${f}\``),
10730
- ""
10731
- ].join("\n") : "";
10732
- const branchSlug = ctx.correlationId ? `moltnet/${ctx.correlationId}/` : scopeHint ? `feat/${scopeHint}-` : "feat/";
10733
- const correlationSection = ctx.correlationId ? [
10734
- "### Correlation",
10949
+ "You are a software engineering agent working in a sandboxed environment.",
10950
+ "Your workspace is at /workspace (mounted from the host repository).",
10951
+ "The MoltNet runtime instructor (above, in this system prompt) defines the",
10952
+ "invariants for this task: identity, gh authentication, diary discipline,",
10953
+ "and the accountable-commit shape. Follow it for every commit.",
10954
+ "",
10955
+ `## Task: ${title ?? "Fulfill brief"}`,
10735
10956
  "",
10957
+ `Task id: \`${ctx.taskId}\``
10958
+ ].join("\n");
10959
+ const seedFilesBody = seedFiles?.length ? ["Start by reading these files to ground yourself:", ...seedFiles.map((f) => `- \`${f}\``)].join("\n") : "";
10960
+ const branchSlug = ctx.correlationId ? `moltnet/${ctx.correlationId}/` : scopeHint ? `feat/${scopeHint}-` : "feat/";
10961
+ const correlation = ctx.correlationId ? [
10736
10962
  `This task carries correlationId \`${ctx.correlationId}\`. You MUST:`,
10737
10963
  "",
10738
10964
  `1. Name your branch \`moltnet/${ctx.correlationId}/<short-slug>\` — use a`,
@@ -10741,39 +10967,14 @@ function buildFulfillBriefUserPrompt(input, ctx) {
10741
10967
  " your **first** commit on that branch (subsequent commits do not need it).",
10742
10968
  "",
10743
10969
  "These are recovery anchors for the MoltNet mention-bot. Do not deviate",
10744
- "from this branch naming scheme when correlationId is set.",
10745
- ""
10970
+ "from this branch naming scheme when correlationId is set."
10746
10971
  ].join("\n") : "";
10747
- const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
10748
- "### Workspace",
10749
- "",
10972
+ const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
10750
10973
  "This attempt is running inside a dedicated git worktree created",
10751
10974
  "for this task. Do not repurpose or switch the primary checkout.",
10752
- ctx.workspace.branch ? `The current branch is \`${ctx.workspace.branch}\`. Stay on this branch unless the runtime instructor explicitly tells you otherwise.` : "Stay on the branch that was pre-provisioned for this task.",
10753
- ""
10975
+ ctx.workspace.branch ? `The current branch is \`${ctx.workspace.branch}\`. Stay on this branch unless the runtime instructor explicitly tells you otherwise.` : "Stay on the branch that was pre-provisioned for this task."
10754
10976
  ].join("\n") : "";
10755
- return [
10756
- "# Fulfill Brief Agent",
10757
- "",
10758
- "You are a software engineering agent working in a sandboxed environment.",
10759
- "Your workspace is at /workspace (mounted from the host repository).",
10760
- "The MoltNet runtime instructor (above, in this system prompt) defines the",
10761
- "invariants for this task: identity, gh authentication, diary discipline,",
10762
- "and the accountable-commit shape. Follow it for every commit.",
10763
- "",
10764
- `## Task: ${title ?? "Fulfill brief"}`,
10765
- "",
10766
- `Task id: \`${ctx.taskId}\``,
10767
- "",
10768
- "### Brief",
10769
- "",
10770
- brief,
10771
- "",
10772
- seedSection,
10773
- correlationSection,
10774
- workspaceSection,
10775
- "### Workflow",
10776
- "",
10977
+ const workflow = [
10777
10978
  ctx.workspace?.mode === "dedicated_worktree" ? `1. Use the already-provisioned dedicated worktree branch${ctx.workspace.branch ? ` (\`${ctx.workspace.branch}\`)` : ""}; do not create or switch the primary checkout.` : `1. Create a feature branch (starting prefix suggestion: \`${branchSlug}<short-slug>\`).`,
10778
10979
  "2. Understand the problem — read relevant code; do not speculate.",
10779
10980
  "3. Implement the change. Keep commits small and coherent.",
@@ -10781,24 +10982,68 @@ function buildFulfillBriefUserPrompt(input, ctx) {
10781
10982
  "5. For every commit, create a signed diary entry first via",
10782
10983
  " `moltnet_create_entry` and embed its id in the commit trailer",
10783
10984
  " `MoltNet-Diary: <id>` (per the runtime instructor).",
10784
- "6. Push the branch and open a PR.",
10785
- "",
10786
- buildSelfVerificationBlock(ctx.taskId),
10787
- buildFinalOutputBlock({
10788
- taskType: "fulfill_brief",
10789
- outputSchemaName: "FulfillBriefOutput",
10790
- shapeSketch: [
10791
- "{",
10792
- " \"branch\": \"<branch-name>\",",
10793
- " \"commits\": [{ \"sha\": \"...\", \"message\": \"...\", \"diaryEntryId\": \"...\" }],",
10794
- " \"pullRequestUrl\": \"<url-or-null>\",",
10795
- " \"diaryEntryIds\": [\"...\"],",
10796
- " \"summary\": \"<1-3 sentence recap>\",",
10797
- " \"verification\": <required iff input.successCriteria; see Self-verification>",
10798
- "}"
10799
- ].join("\n")
10800
- })
10801
- ].filter(Boolean).join("\n");
10985
+ "6. Push the branch and open a PR."
10986
+ ].join("\n");
10987
+ return assembleTaskPrompt("fulfill_brief", [
10988
+ {
10989
+ id: "fulfill_brief.header",
10990
+ source: "header",
10991
+ body: header
10992
+ },
10993
+ {
10994
+ id: "fulfill_brief.brief",
10995
+ source: "task_input",
10996
+ header: "Brief",
10997
+ body: brief
10998
+ },
10999
+ {
11000
+ id: "fulfill_brief.seed_files",
11001
+ source: "task_input",
11002
+ header: "Seed files",
11003
+ body: seedFilesBody
11004
+ },
11005
+ {
11006
+ id: "fulfill_brief.correlation",
11007
+ source: "task_input",
11008
+ header: "Correlation",
11009
+ body: correlation
11010
+ },
11011
+ {
11012
+ id: "fulfill_brief.workspace",
11013
+ source: "workspace",
11014
+ header: "Workspace",
11015
+ body: workspace
11016
+ },
11017
+ {
11018
+ id: "fulfill_brief.workflow",
11019
+ source: "static",
11020
+ header: "Workflow",
11021
+ body: workflow
11022
+ },
11023
+ {
11024
+ id: "fulfill_brief.verification",
11025
+ source: "verification",
11026
+ body: buildSelfVerificationBlock(ctx.taskId)
11027
+ },
11028
+ {
11029
+ id: "fulfill_brief.final_output",
11030
+ source: "final_output",
11031
+ body: buildFinalOutputBlock({
11032
+ taskType: "fulfill_brief",
11033
+ outputSchemaName: "FulfillBriefOutput",
11034
+ shapeSketch: [
11035
+ "{",
11036
+ " \"branch\": \"<branch-name>\",",
11037
+ " \"commits\": [{ \"sha\": \"...\", \"message\": \"...\", \"diaryEntryId\": \"...\" }],",
11038
+ " \"pullRequestUrl\": \"<url-or-null>\",",
11039
+ " \"diaryEntryIds\": [\"...\"],",
11040
+ " \"summary\": \"<1-3 sentence recap>\",",
11041
+ " \"verification\": <required iff input.successCriteria; see Self-verification>",
11042
+ "}"
11043
+ ].join("\n")
11044
+ })
11045
+ }
11046
+ ]);
10802
11047
  }
10803
11048
  //#endregion
10804
11049
  //#region ../agent-runtime/src/prompts/judge-eval-attempt.ts
@@ -10807,46 +11052,18 @@ function buildJudgeEvalAttemptUserPrompt(input, ctx) {
10807
11052
  if (!rubric) throw new Error("judge_eval_attempt requires successCriteria.rubric — none present");
10808
11053
  const escapeCell = (s) => s.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\r?\n/g, " ");
10809
11054
  const criteriaTable = rubric.criteria.map((c) => `| \`${c.id}\` | ${c.weight.toFixed(3)} | ${c.scoring} | ${escapeCell(c.description)} |`).join("\n");
10810
- const finalOutputBlock = buildFinalOutputBlock({
10811
- taskType: "judge_eval_attempt",
10812
- outputSchemaName: "JudgeEvalAttemptOutput",
10813
- shapeSketch: [
10814
- "{",
10815
- ` "targetTaskId": "${input.targetTaskId}",`,
10816
- ` "targetAttemptN": ${input.targetAttemptN},`,
10817
- " \"variantLabel\": \"<from producer input>\",",
10818
- " \"scores\": [ { \"criterionId\": \"...\", \"score\": 0..1, \"rationale\": \"...\", \"assertions\": [...]? } ],",
10819
- " \"composite\": <Σ(weight × score), 0..1>,",
10820
- " \"verdict\": \"<1-3 sentences>\",",
10821
- " \"judgeModel\": \"<id>\", // optional",
10822
- " \"traceparent\": \"<from claim>\"",
10823
- "}"
10824
- ].join("\n")
10825
- });
10826
- const workspaceSection = ctx.workspace?.attached === true ? [
10827
- "### Workspace",
11055
+ const header = [
11056
+ "# Judge Eval Attempt",
10828
11057
  "",
10829
- "Your current workspace is already attached to the producer attempt",
10830
- "you are judging. Inspect files directly from the current workspace",
10831
- "root instead of inventing synthetic `artifact_<taskId>` paths.",
10832
- "If the accepted attempt output lists `artifacts[].path`, treat those",
10833
- "paths as relative to the current workspace root unless the output",
10834
- "explicitly says otherwise.",
10835
- ctx.workspace.mode === "dedicated_worktree" ? `This attachment is a dedicated producer worktree${ctx.workspace.branch ? ` on branch \`${ctx.workspace.branch}\`` : ""}.` : ctx.workspace.mode === "scratch_mount" ? "This workspace is a fresh judge-owned scratch copy of the producer workspace." : "This attachment is the producer shared workspace mounted with shadow writes for safe inspection.",
10836
- ""
10837
- ].join("\n") : "";
10838
- return [
10839
- "# Judge Eval Attempt\n",
10840
11058
  "You are grading one accepted `run_eval` producer attempt against a hidden",
10841
11059
  "judge rubric. Do not delegate to subagents. Grade in this session only.",
10842
11060
  "",
10843
11061
  `Task id: \`${ctx.taskId}\``,
10844
11062
  `Diary: \`${ctx.diaryId}\``,
10845
11063
  `Producer task: \`${input.targetTaskId}\``,
10846
- `Producer attempt: \`${input.targetAttemptN}\``,
10847
- "",
10848
- "### Evidence gathering",
10849
- "",
11064
+ `Producer attempt: \`${input.targetAttemptN}\``
11065
+ ].join("\n");
11066
+ const evidence = [
10850
11067
  `1. Call \`moltnet_get_task\` with taskId=\`${input.targetTaskId}\`.`,
10851
11068
  `2. Call \`moltnet_list_task_attempts\` with taskId=\`${input.targetTaskId}\` and inspect the accepted attempt matching \`${input.targetAttemptN}\`.`,
10852
11069
  `3. Call \`moltnet_list_task_messages\` with taskId=\`${input.targetTaskId}\`, attemptN=\`${input.targetAttemptN}\` to inspect the producer's turn-by-turn behavior.`,
@@ -10854,32 +11071,82 @@ function buildJudgeEvalAttemptUserPrompt(input, ctx) {
10854
11071
  " artifacts or workspace evidence available in your environment.",
10855
11072
  " Read artifact files from the mounted producer workspace when present;",
10856
11073
  " do not assume detached `artifact_<taskId>` directories exist.",
10857
- "5. Score strictly against the rubric below.",
10858
- "",
10859
- workspaceSection,
10860
- "### Rubric",
10861
- "",
10862
- rubric.preamble ? `${rubric.preamble}\n` : "",
11074
+ "5. Score strictly against the rubric below."
11075
+ ].join("\n");
11076
+ const workspace = ctx.workspace?.attached === true ? [
11077
+ "Your current workspace is already attached to the producer attempt",
11078
+ "you are judging. Inspect files directly from the current workspace",
11079
+ "root instead of inventing synthetic `artifact_<taskId>` paths.",
11080
+ "If the accepted attempt output lists `artifacts[].path`, treat those",
11081
+ "paths as relative to the current workspace root unless the output",
11082
+ "explicitly says otherwise.",
11083
+ ctx.workspace.mode === "dedicated_worktree" ? `This attachment is a dedicated producer worktree${ctx.workspace.branch ? ` on branch \`${ctx.workspace.branch}\`` : ""}.` : ctx.workspace.mode === "scratch_mount" ? "This workspace is a fresh judge-owned scratch copy of the producer workspace." : "This attachment is the producer shared workspace mounted with shadow writes for safe inspection."
11084
+ ].join("\n") : "";
11085
+ const rubricBody = [
11086
+ rubric.preamble ?? "",
10863
11087
  "| Criterion | Weight | Scoring | Description |",
10864
11088
  "| --- | --- | --- | --- |",
10865
- criteriaTable,
10866
- "",
10867
- "### Composite arithmetic",
10868
- "",
10869
- "Your `composite` MUST equal `Σ(criterion.weight × score)` over the rubric",
10870
- "criteria. Drift > 0.001 is rejected.",
10871
- "",
10872
- finalOutputBlock
11089
+ criteriaTable
10873
11090
  ].filter((s) => s !== "").join("\n");
11091
+ const composite = ["Your `composite` MUST equal `Σ(criterion.weight × score)` over the rubric", "criteria. Drift > 0.001 is rejected."].join("\n");
11092
+ return assembleTaskPrompt("judge_eval_attempt", [
11093
+ {
11094
+ id: "judge_eval_attempt.header",
11095
+ source: "header",
11096
+ body: header
11097
+ },
11098
+ {
11099
+ id: "judge_eval_attempt.evidence",
11100
+ source: "evidence",
11101
+ header: "Evidence gathering",
11102
+ body: evidence
11103
+ },
11104
+ {
11105
+ id: "judge_eval_attempt.workspace",
11106
+ source: "workspace",
11107
+ header: "Workspace",
11108
+ body: workspace
11109
+ },
11110
+ {
11111
+ id: "judge_eval_attempt.rubric",
11112
+ source: "rubric_judge",
11113
+ header: "Rubric",
11114
+ body: rubricBody
11115
+ },
11116
+ {
11117
+ id: "judge_eval_attempt.composite",
11118
+ source: "rubric_judge",
11119
+ header: "Composite arithmetic",
11120
+ body: composite
11121
+ },
11122
+ {
11123
+ id: "judge_eval_attempt.final_output",
11124
+ source: "final_output",
11125
+ body: buildFinalOutputBlock({
11126
+ taskType: "judge_eval_attempt",
11127
+ outputSchemaName: "JudgeEvalAttemptOutput",
11128
+ shapeSketch: [
11129
+ "{",
11130
+ ` "targetTaskId": "${input.targetTaskId}",`,
11131
+ ` "targetAttemptN": ${input.targetAttemptN},`,
11132
+ " \"variantLabel\": \"<from producer input>\",",
11133
+ " \"scores\": [ { \"criterionId\": \"...\", \"score\": 0..1, \"rationale\": \"...\", \"assertions\": [...]? } ],",
11134
+ " \"composite\": <Σ(weight × score), 0..1>,",
11135
+ " \"verdict\": \"<1-3 sentences>\",",
11136
+ " \"judgeModel\": \"<id>\", // optional",
11137
+ " \"traceparent\": \"<from claim>\"",
11138
+ "}"
11139
+ ].join("\n")
11140
+ })
11141
+ }
11142
+ ]);
10874
11143
  }
10875
11144
  //#endregion
10876
11145
  //#region ../agent-runtime/src/prompts/judge-pack.ts
10877
11146
  function buildJudgePackUserPrompt(input, ctx) {
10878
11147
  const { renderedPackId, sourcePackId, successCriteria } = input;
10879
11148
  const rubric = successCriteria.rubric;
10880
- const criteriaList = renderRubricCriteriaList(rubric);
10881
- const preambleSection = renderRubricPreambleSection(rubric);
10882
- return [
11149
+ const header = [
10883
11150
  "# Judge Pack Agent",
10884
11151
  "",
10885
11152
  "You are an independent judge. You did NOT curate or render the pack",
@@ -10888,17 +11155,15 @@ function buildJudgePackUserPrompt(input, ctx) {
10888
11155
  "referenced entries — but do NOT modify anything.",
10889
11156
  "",
10890
11157
  `Your diary ID is: ${ctx.diaryId}`,
10891
- `This task's id is: ${ctx.taskId}`,
10892
- "",
10893
- "## Target",
10894
- "",
11158
+ `This task's id is: ${ctx.taskId}`
11159
+ ].join("\n");
11160
+ const target = [
10895
11161
  `- **Rendered pack**: \`${renderedPackId}\``,
10896
11162
  `- **Source pack**: \`${sourcePackId}\``,
10897
- `- **Rubric**: \`${rubric.rubricId}\` v${rubric.version}`,
10898
- "",
10899
- preambleSection,
10900
- "## Workflow",
10901
- "",
11163
+ `- **Rubric**: \`${rubric.rubricId}\` v${rubric.version}`
11164
+ ].join("\n");
11165
+ const preamble = renderRubricPreambleSection(rubric) ?? "";
11166
+ const workflow = [
10902
11167
  "1. Call `moltnet_rendered_pack_get` for the rendered pack. Keep the",
10903
11168
  " `content` string — you will score it.",
10904
11169
  "2. Call `moltnet_pack_get` with `expandEntries: true` for the source",
@@ -10906,14 +11171,10 @@ function buildJudgePackUserPrompt(input, ctx) {
10906
11171
  "3. For each criterion, score according to its `scoring` mode (see",
10907
11172
  " Scoring rules below). Produce rationales where required.",
10908
11173
  "4. Compute `composite = Σ(weight_i × score_i)` and sanity-check it",
10909
- " equals the sum you will emit — the runtime rejects mismatches.",
10910
- "",
10911
- "## Criteria",
10912
- "",
10913
- criteriaList,
10914
- "",
10915
- "### Scoring rules",
10916
- "",
11174
+ " equals the sum you will emit — the runtime rejects mismatches."
11175
+ ].join("\n");
11176
+ const criteria = renderRubricCriteriaList(rubric);
11177
+ const scoring = [
10917
11178
  "- `llm_score`: score 0..1 continuous. `rationale` REQUIRED (2–4",
10918
11179
  " sentences pointing at specific evidence in the rendered content or",
10919
11180
  " the source entries). NOTE: this mode smooths individual failures",
@@ -10952,80 +11213,95 @@ function buildJudgePackUserPrompt(input, ctx) {
10952
11213
  "- `deterministic_coverage_check`: for every source entry, check",
10953
11214
  " whether its `entryId` (or a stable reference like title + CID",
10954
11215
  " prefix) appears in the rendered `content`. Score 1 iff coverage is",
10955
- " complete; otherwise 0. Populate `evidence` with `{ covered, total, missing: [entryIds] }`.",
10956
- "",
10957
- "## Constraints",
10958
- "",
11216
+ " complete; otherwise 0. Populate `evidence` with `{ covered, total, missing: [entryIds] }`."
11217
+ ].join("\n");
11218
+ const constraints = [
10959
11219
  "- Do NOT call `moltnet_pack_create` or `moltnet_pack_render`.",
10960
11220
  "- Do NOT fetch the curator's or renderer's task output directly — they",
10961
11221
  " may leak guidance that biases judgment.",
10962
11222
  "- Keep the session focused on scoring; no speculative exploration.",
10963
11223
  "",
10964
- `Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before`,
10965
- "reporting structured output.",
10966
- "",
10967
- buildFinalOutputBlock({
10968
- taskType: "judge_pack",
10969
- outputSchemaName: "JudgePackOutput",
10970
- shapeSketch: [
10971
- "{",
10972
- " \"scores\": [",
10973
- " { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} },",
10974
- " {",
10975
- " \"criterionId\": \"<llm_checklist criterion>\",",
10976
- " \"score\": 0, // 1 iff every assertion passed",
10977
- " \"assertions\": [",
10978
- " { \"id\": \"claim-1\", \"text\": \"...\", \"passed\": false, \"evidence\": \"...\" }",
10979
- " ]",
10980
- " }",
10981
- " ],",
10982
- " \"composite\": <sum-of-weighted-scores>,",
10983
- " \"verdict\": \"<1-3 sentence overall>\",",
10984
- " \"judgeModel\": \"<provider:model>\",",
10985
- " \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
10986
- "}"
10987
- ].join("\n"),
10988
- extraNotes: [
10989
- "Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
10990
- "`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is",
10991
- "optional and absence is the correct representation when unavailable."
10992
- ]
10993
- })
10994
- ].filter((l) => l !== null).join("\n");
11224
+ `Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before reporting structured output.`
11225
+ ].join("\n");
11226
+ return assembleTaskPrompt("judge_pack", [
11227
+ {
11228
+ id: "judge_pack.header",
11229
+ source: "header",
11230
+ body: header
11231
+ },
11232
+ {
11233
+ id: "judge_pack.target",
11234
+ source: "task_input",
11235
+ header: "Target",
11236
+ body: target
11237
+ },
11238
+ {
11239
+ id: "judge_pack.preamble",
11240
+ source: "rubric_judge",
11241
+ body: preamble
11242
+ },
11243
+ {
11244
+ id: "judge_pack.workflow",
11245
+ source: "static",
11246
+ header: "Workflow",
11247
+ body: workflow
11248
+ },
11249
+ {
11250
+ id: "judge_pack.criteria",
11251
+ source: "rubric_judge",
11252
+ header: "Criteria",
11253
+ body: criteria
11254
+ },
11255
+ {
11256
+ id: "judge_pack.scoring",
11257
+ source: "rubric_judge",
11258
+ header: "Scoring rules",
11259
+ body: scoring
11260
+ },
11261
+ {
11262
+ id: "judge_pack.constraints",
11263
+ source: "static",
11264
+ header: "Constraints",
11265
+ body: constraints
11266
+ },
11267
+ {
11268
+ id: "judge_pack.final_output",
11269
+ source: "final_output",
11270
+ body: buildFinalOutputBlock({
11271
+ taskType: "judge_pack",
11272
+ outputSchemaName: "JudgePackOutput",
11273
+ shapeSketch: [
11274
+ "{",
11275
+ " \"scores\": [",
11276
+ " { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} },",
11277
+ " {",
11278
+ " \"criterionId\": \"<llm_checklist criterion>\",",
11279
+ " \"score\": 0, // 1 iff every assertion passed",
11280
+ " \"assertions\": [",
11281
+ " { \"id\": \"claim-1\", \"text\": \"...\", \"passed\": false, \"evidence\": \"...\" }",
11282
+ " ]",
11283
+ " }",
11284
+ " ],",
11285
+ " \"composite\": <sum-of-weighted-scores>,",
11286
+ " \"verdict\": \"<1-3 sentence overall>\",",
11287
+ " \"judgeModel\": \"<provider:model>\",",
11288
+ " \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
11289
+ "}"
11290
+ ].join("\n"),
11291
+ extraNotes: [
11292
+ "Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
11293
+ "`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is",
11294
+ "optional and absence is the correct representation when unavailable."
11295
+ ]
11296
+ })
11297
+ }
11298
+ ]);
10995
11299
  }
10996
11300
  //#endregion
10997
11301
  //#region ../agent-runtime/src/prompts/pr-review.ts
10998
11302
  function buildPrReviewUserPrompt(input, ctx) {
10999
11303
  const rubric = input.successCriteria.rubric;
11000
- const criteriaList = renderRubricCriteriaList(rubric);
11001
- const preambleSection = renderRubricPreambleSection(rubric);
11002
- const taskPromptSection = input.taskPrompt ? [
11003
- "## Task-specific instructions",
11004
- "",
11005
- input.taskPrompt,
11006
- ""
11007
- ].join("\n") : "";
11008
- const resourceSection = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? [
11009
- "### Resources",
11010
- "",
11011
- ...input.subject.resourceUrls.map((url) => `- ${url}`),
11012
- ""
11013
- ].join("\n") : "";
11014
- const hintsSection = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? [
11015
- "### Inspection hints",
11016
- "",
11017
- ...input.subject.inspectionHints.map((hint) => `- ${hint}`),
11018
- ""
11019
- ].join("\n") : "";
11020
- const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
11021
- "### Workspace",
11022
- "",
11023
- "This review attempt is running inside a dedicated disposable git",
11024
- "worktree. Inspect and reason inside this workspace only.",
11025
- ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends.",
11026
- ""
11027
- ].join("\n") : "";
11028
- return [
11304
+ const header = [
11029
11305
  "# Review Agent",
11030
11306
  "",
11031
11307
  "You are an independent judge. You did NOT produce the subject under review.",
@@ -11033,29 +11309,30 @@ function buildPrReviewUserPrompt(input, ctx) {
11033
11309
  "You may inspect the local workspace and the referenced resources, but do NOT modify anything.",
11034
11310
  "",
11035
11311
  `Your diary ID is: ${ctx.diaryId}`,
11036
- `This task's id is: ${ctx.taskId}`,
11037
- "",
11038
- "## Subject",
11039
- "",
11312
+ `This task's id is: ${ctx.taskId}`
11313
+ ].join("\n");
11314
+ const subject = [
11040
11315
  `**Title:** ${input.subject.title}`,
11041
11316
  "",
11042
- input.subject.summary,
11043
- "",
11044
- resourceSection,
11045
- hintsSection,
11046
- workspaceSection,
11047
- "### Execution contract",
11048
- "",
11317
+ input.subject.summary
11318
+ ].join("\n");
11319
+ const resources = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? input.subject.resourceUrls.map((url) => `- ${url}`).join("\n") : "";
11320
+ const hints = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? input.subject.inspectionHints.map((hint) => `- ${hint}`).join("\n") : "";
11321
+ const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
11322
+ "This review attempt is running inside a dedicated disposable git",
11323
+ "worktree. Inspect and reason inside this workspace only.",
11324
+ ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends."
11325
+ ].join("\n") : "";
11326
+ const executionContract = [
11049
11327
  "Treat the provided subject, resources, inspection hints, and any",
11050
11328
  "task-specific instructions as the full",
11051
11329
  "review contract for this task.",
11052
11330
  "",
11053
11331
  "If the task-specific instructions or inspection hints require an outward action tied to the review",
11054
11332
  "(for example publishing the judgment somewhere), perform that action as",
11055
- "part of the task before reporting structured output.",
11056
- "",
11057
- "## Review workflow",
11058
- "",
11333
+ "part of the task before reporting structured output."
11334
+ ].join("\n");
11335
+ const workflow = [
11059
11336
  "1. Read the subject summary, resources, inspection hints, and any",
11060
11337
  " task-specific instructions before scoring.",
11061
11338
  "2. Inspect the target artefact directly using the tools and resources the",
@@ -11065,39 +11342,104 @@ function buildPrReviewUserPrompt(input, ctx) {
11065
11342
  "4. Apply the rubric strictly. This task is about complexity and",
11066
11343
  " reviewability, not correctness or feature desirability.",
11067
11344
  "5. Perform any required outward action before emitting the final",
11068
- " structured output.",
11069
- "",
11070
- taskPromptSection,
11071
- preambleSection,
11072
- "## Criteria",
11073
- "",
11074
- criteriaList,
11075
- "",
11076
- "### Scoring rules",
11077
- "",
11345
+ " structured output."
11346
+ ].join("\n");
11347
+ const taskPromptSection = input.taskPrompt ?? "";
11348
+ const preamble = renderRubricPreambleSection(rubric) ?? "";
11349
+ const criteria = renderRubricCriteriaList(rubric);
11350
+ const scoring = [
11078
11351
  "- Every criterion uses binary scoring only.",
11079
11352
  "- Score `1` when the subject clearly clears the criterion.",
11080
11353
  "- Score `0` when it does not, or when the evidence is ambiguous.",
11081
11354
  "- `rationale` is REQUIRED for every score. Keep it concrete and audit-friendly.",
11082
11355
  "- Compute `composite = Σ(weight_i × score_i)` exactly; the runtime rejects mismatches.",
11083
11356
  "",
11084
- "Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output.",
11085
- "",
11086
- buildFinalOutputBlock({
11087
- taskType: "pr_review",
11088
- outputSchemaName: "PrReviewOutput",
11089
- shapeSketch: [
11090
- "{",
11091
- " \"scores\": [",
11092
- " { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
11093
- " ],",
11094
- " \"composite\": <sum-of-weighted-binary-scores>,",
11095
- " \"verdict\": \"<1-3 sentence overall>\"",
11096
- "}"
11097
- ].join("\n"),
11098
- extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
11099
- })
11100
- ].filter(Boolean).join("\n");
11357
+ "Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output."
11358
+ ].join("\n");
11359
+ return assembleTaskPrompt("pr_review", [
11360
+ {
11361
+ id: "pr_review.header",
11362
+ source: "header",
11363
+ body: header
11364
+ },
11365
+ {
11366
+ id: "pr_review.subject",
11367
+ source: "task_input",
11368
+ header: "Subject",
11369
+ body: subject
11370
+ },
11371
+ {
11372
+ id: "pr_review.resources",
11373
+ source: "task_input",
11374
+ header: "Resources",
11375
+ body: resources
11376
+ },
11377
+ {
11378
+ id: "pr_review.hints",
11379
+ source: "task_input",
11380
+ header: "Inspection hints",
11381
+ body: hints
11382
+ },
11383
+ {
11384
+ id: "pr_review.workspace",
11385
+ source: "workspace",
11386
+ header: "Workspace",
11387
+ body: workspace
11388
+ },
11389
+ {
11390
+ id: "pr_review.execution_contract",
11391
+ source: "static",
11392
+ header: "Execution contract",
11393
+ body: executionContract
11394
+ },
11395
+ {
11396
+ id: "pr_review.workflow",
11397
+ source: "static",
11398
+ header: "Review workflow",
11399
+ body: workflow
11400
+ },
11401
+ {
11402
+ id: "pr_review.task_prompt",
11403
+ source: "task_input",
11404
+ header: "Task-specific instructions",
11405
+ body: taskPromptSection
11406
+ },
11407
+ {
11408
+ id: "pr_review.preamble",
11409
+ source: "rubric_judge",
11410
+ body: preamble
11411
+ },
11412
+ {
11413
+ id: "pr_review.criteria",
11414
+ source: "rubric_judge",
11415
+ header: "Criteria",
11416
+ body: criteria
11417
+ },
11418
+ {
11419
+ id: "pr_review.scoring",
11420
+ source: "rubric_judge",
11421
+ header: "Scoring rules",
11422
+ body: scoring
11423
+ },
11424
+ {
11425
+ id: "pr_review.final_output",
11426
+ source: "final_output",
11427
+ body: buildFinalOutputBlock({
11428
+ taskType: "pr_review",
11429
+ outputSchemaName: "PrReviewOutput",
11430
+ shapeSketch: [
11431
+ "{",
11432
+ " \"scores\": [",
11433
+ " { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
11434
+ " ],",
11435
+ " \"composite\": <sum-of-weighted-binary-scores>,",
11436
+ " \"verdict\": \"<1-3 sentence overall>\"",
11437
+ "}"
11438
+ ].join("\n"),
11439
+ extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
11440
+ })
11441
+ }
11442
+ ]);
11101
11443
  }
11102
11444
  //#endregion
11103
11445
  //#region ../agent-runtime/src/prompts/render-pack.ts
@@ -11107,7 +11449,7 @@ function buildPrReviewUserPrompt(input, ctx) {
11107
11449
  */
11108
11450
  function buildRenderPackUserPrompt(input, ctx) {
11109
11451
  const { packId, persist = true, pinned = false } = input;
11110
- return [
11452
+ const header = [
11111
11453
  "# Render Pack Agent",
11112
11454
  "",
11113
11455
  "You are rendering a context pack to markdown. Step 2 of the",
@@ -11115,16 +11457,14 @@ function buildRenderPackUserPrompt(input, ctx) {
11115
11457
  "a third will judge the rendering. You must NOT judge it here.",
11116
11458
  "",
11117
11459
  `Your agent-session diary ID is: ${ctx.diaryId}`,
11118
- `This task's id is: ${ctx.taskId}`,
11119
- "",
11120
- "## Input",
11121
- "",
11460
+ `This task's id is: ${ctx.taskId}`
11461
+ ].join("\n");
11462
+ const inputBlock = [
11122
11463
  `- **Pack**: \`${packId}\``,
11123
11464
  `- **Persist**: \`${persist}\``,
11124
- `- **Pinned**: \`${pinned}\``,
11125
- "",
11126
- "## Workflow",
11127
- "",
11465
+ `- **Pinned**: \`${pinned}\``
11466
+ ].join("\n");
11467
+ const workflow = [
11128
11468
  "1. Call `moltnet_pack_get` with `expandEntries: true` to inspect the",
11129
11469
  " source entries. Read it — you need the entry count for your output.",
11130
11470
  "2. Call `moltnet_pack_render` with:",
@@ -11132,16 +11472,14 @@ function buildRenderPackUserPrompt(input, ctx) {
11132
11472
  ` - \`persist\`: \`${persist}\``,
11133
11473
  ` - \`pinned\`: \`${pinned}\``,
11134
11474
  " Record the returned `renderedPackId`, `cid`, `renderMethod`, and",
11135
- " `content` byte length.",
11136
- "",
11137
- "## Constraints",
11138
- "",
11475
+ " `content` byte length."
11476
+ ].join("\n");
11477
+ const constraints = [
11139
11478
  "- Do NOT modify the source pack or its entries.",
11140
11479
  "- Do NOT write diary entries unless a genuine incident occurs",
11141
- " (rendering failure, invariant violation).",
11142
- "",
11143
- "## Fidelity Discipline",
11144
- "",
11480
+ " (rendering failure, invariant violation)."
11481
+ ].join("\n");
11482
+ const fidelity = [
11145
11483
  "These rules apply when you are producing the markdown yourself rather",
11146
11484
  "than relying on a deterministic `server:*` renderer.",
11147
11485
  "",
@@ -11161,25 +11499,63 @@ function buildRenderPackUserPrompt(input, ctx) {
11161
11499
  " completeness. Optimize for \"no detectable drift across a",
11162
11500
  " claim-by-claim audit\", not \"shorter at any cost\". When compressing, prefer",
11163
11501
  " tightening prose around a quote rather than altering the quote,",
11164
- " and prefer summarising a list over silently truncating it.",
11165
- "",
11166
- buildSelfVerificationBlock(ctx.taskId),
11167
- buildFinalOutputBlock({
11168
- taskType: "render_pack",
11169
- outputSchemaName: "RenderPackOutput",
11170
- shapeSketch: [
11171
- "{",
11172
- " \"renderedPackId\": \"<uuid-or-null>\",",
11173
- " \"renderedCid\": \"<cid>\",",
11174
- " \"renderMethod\": \"<label>\",",
11175
- " \"byteSize\": <int>,",
11176
- " \"entriesRendered\": <int>,",
11177
- " \"summary\": \"<1-3 sentence recap>\",",
11178
- " \"verification\": <required iff input.successCriteria; see Self-verification>",
11179
- "}"
11180
- ].join("\n")
11181
- })
11502
+ " and prefer summarising a list over silently truncating it."
11182
11503
  ].join("\n");
11504
+ return assembleTaskPrompt("render_pack", [
11505
+ {
11506
+ id: "render_pack.header",
11507
+ source: "header",
11508
+ body: header
11509
+ },
11510
+ {
11511
+ id: "render_pack.input",
11512
+ source: "task_input",
11513
+ header: "Input",
11514
+ body: inputBlock
11515
+ },
11516
+ {
11517
+ id: "render_pack.workflow",
11518
+ source: "static",
11519
+ header: "Workflow",
11520
+ body: workflow
11521
+ },
11522
+ {
11523
+ id: "render_pack.constraints",
11524
+ source: "static",
11525
+ header: "Constraints",
11526
+ body: constraints
11527
+ },
11528
+ {
11529
+ id: "render_pack.fidelity",
11530
+ source: "static",
11531
+ header: "Fidelity Discipline",
11532
+ body: fidelity
11533
+ },
11534
+ {
11535
+ id: "render_pack.verification",
11536
+ source: "verification",
11537
+ body: buildSelfVerificationBlock(ctx.taskId)
11538
+ },
11539
+ {
11540
+ id: "render_pack.final_output",
11541
+ source: "final_output",
11542
+ body: buildFinalOutputBlock({
11543
+ taskType: "render_pack",
11544
+ outputSchemaName: "RenderPackOutput",
11545
+ shapeSketch: [
11546
+ "{",
11547
+ " \"renderedPackId\": \"<uuid-or-null>\",",
11548
+ " \"renderedCid\": \"<cid>\",",
11549
+ " \"renderMethod\": \"<label>\",",
11550
+ " \"byteSize\": <int>,",
11551
+ " \"entriesRendered\": <int>,",
11552
+ " \"summary\": \"<1-3 sentence recap>\",",
11553
+ " \"verification\": <required iff input.successCriteria; see Self-verification>",
11554
+ "}"
11555
+ ].join("\n")
11556
+ })
11557
+ }
11558
+ ]);
11183
11559
  }
11184
11560
  //#endregion
11185
11561
  //#region ../agent-runtime/src/prompts/run-eval.ts
@@ -11188,8 +11564,7 @@ function buildRenderPackUserPrompt(input, ctx) {
11188
11564
  *
11189
11565
  * Free-form: no git workflow, no commit ceremony. The executor produces
11190
11566
  * a textual response (and optional file artifacts) that later
11191
- * `judge_eval_attempt` task(s) grade against their own hidden
11192
- * rubric.
11567
+ * `judge_eval_attempt` task(s) grade against their own hidden rubric.
11193
11568
  *
11194
11569
  * Context delivery is handled by `resolveTaskContext` (see
11195
11570
  * libs/agent-runtime/src/context-bindings.ts) and runs BEFORE this
@@ -11197,50 +11572,44 @@ function buildRenderPackUserPrompt(input, ctx) {
11197
11572
  * the body, `skill` items are persisted at the runtime's skill path,
11198
11573
  * and `user_inline` items are appended to the first user message. This
11199
11574
  * builder does NOT inline `input.context[]` itself.
11575
+ *
11576
+ * Prompt-shape notes (issue #1175, area 1):
11577
+ * - No `Correlation` section: the agent never acts on it. The id is
11578
+ * still carried on attempt event metadata for cross-variant queries.
11579
+ * - No `Execution mode` section: the workspace already reflects the
11580
+ * chosen mode by its shape (scratch/shared mount/dedicated worktree).
11581
+ * Restating it as text adds noise without changing model behavior.
11582
+ * - The "Injected Task Context" phrase is used identically in this
11583
+ * prompt's discipline section and in the materialized context block
11584
+ * header (see context-bindings.ts) so weaker models see one repeated
11585
+ * anchor.
11586
+ * - The discipline copy demands the model encode injected constraints
11587
+ * into the code path itself, not into comments or the verification
11588
+ * field. Quoting the constraint back is not following the task.
11200
11589
  */
11201
11590
  function buildRunEvalUserPrompt(input, ctx) {
11202
- const { scenario, variantLabel, execution, successCriteria } = input;
11591
+ const { scenario, variantLabel, successCriteria } = input;
11203
11592
  const hasContext = input.context.length > 0;
11204
11593
  const hasInlineContext = input.context.some((entry) => entry.binding === "context_inline");
11205
- const inputFilesSection = scenario.inputFiles?.length ? [
11206
- "### Input files",
11207
- "",
11208
- ...scenario.inputFiles.map((f) => `- \`${f}\``),
11209
- ""
11210
- ].join("\n") : "";
11211
- const verificationSection = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
11212
- const correlationSection = ctx.correlationId ? [
11213
- "### Correlation",
11214
- "",
11215
- `This task carries correlationId \`${ctx.correlationId}\`. It joins`,
11216
- "this variant to its sibling `run_eval` tasks (other variants of the",
11217
- "same scenario and to any later `judge_eval_attempt` tasks created",
11218
- "against those variants. You do not need to act on it directly — it",
11219
- "is recorded for cross-variant aggregation at query time.",
11220
- ""
11221
- ].join("\n") : "";
11222
- const executionSection = [
11223
- "### Execution mode",
11224
- "",
11225
- `Mode: \`${execution.mode}\``,
11226
- `Workspace: \`${execution.workspace}\``,
11227
- execution.workspace === "none" ? "You are running in a scratch workspace with no repository checkout mounted. Do not assume git history or repo files are present unless the scenario provided them explicitly." : execution.workspace === "shared_mount" ? "You are running against the daemon shared mount. Treat any repository mutations as affecting the mounted checkout directly." : "You are running in a dedicated disposable git worktree isolated from the daemon shared checkout.",
11228
- ""
11229
- ].join("\n");
11230
- const contextDisciplineSection = hasContext ? [
11231
- "### Injected context discipline",
11594
+ const header = `# Run Eval Agent\n\nYou are running an evaluation scenario as variant \`${variantLabel}\`.\nTask id: \`${ctx.taskId}\``;
11595
+ const contextDiscipline = hasContext ? [
11596
+ "This task includes Injected Task Context supplied by the task",
11597
+ "creator. You MUST inspect it BEFORE you write solution files or",
11598
+ "draft your final answer — not after.",
11232
11599
  "",
11233
- "This task includes extra injected context from the task creator.",
11234
- "You MUST inspect and use that context BEFORE you write solution",
11235
- "files or draft your final answer.",
11236
- "Do not solve first and only review the context afterward.",
11237
- hasInlineContext ? "For `context_inline`, your FIRST content-inspection step should be a `read` of `/workspace/context-pack.md` before your first `write` call. The same content is also mirrored in `/workspace/AGENTS.md` and may be referenced from `/workspace/.claude/CLAUDE.md`." : "If injected context was provided as a skill, inspect that task-injected context before solving.",
11238
- hasInlineContext ? "If `/workspace/context-pack.md` exists and you skip reading it before writing solution files, you are not following the task instructions." : "Do not rely on memory alone when task-injected context is available; inspect it first.",
11239
- "If the injected context contains repo- or workflow-specific rules,",
11240
- "those rules override your generic instincts.",
11241
- ""
11600
+ "Reconcile every constraint from that context **into the code path",
11601
+ "itself**: function bodies, control flow, transaction boundaries,",
11602
+ "guard clauses. Quoting a constraint back in a comment, a",
11603
+ "`// note:` line, the task summary, or the `verification` field is",
11604
+ "NOT following the task. If the constraint affects behavior, it",
11605
+ "must affect behavior.",
11606
+ hasInlineContext ? "For `context_inline`, your FIRST content-inspection step is a `read` of `/workspace/context-pack.md` before your first `write` call. The same content is also mirrored in `/workspace/AGENTS.md` and may be referenced from `/workspace/.claude/CLAUDE.md`." : "When the context is delivered as a skill, inspect it before solving.",
11607
+ "If the Injected Task Context contains repo- or workflow-specific",
11608
+ "rules, those rules override your generic instincts."
11242
11609
  ].join("\n") : "";
11243
- const finalOutputBlock = buildFinalOutputBlock({
11610
+ const inputFiles = scenario.inputFiles?.length ? scenario.inputFiles.map((f) => `- \`${f}\``).join("\n") : "";
11611
+ const verification = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
11612
+ const finalOutput = buildFinalOutputBlock({
11244
11613
  taskType: "run_eval",
11245
11614
  outputSchemaName: "RunEvalOutput",
11246
11615
  shapeSketch: [
@@ -11260,17 +11629,41 @@ function buildRunEvalUserPrompt(input, ctx) {
11260
11629
  "}"
11261
11630
  ].join("\n")
11262
11631
  });
11263
- return [
11264
- "# Run Eval Agent\n",
11265
- `You are running an evaluation scenario as variant \`${variantLabel}\`.\nTask id: \`${ctx.taskId}\`\n`,
11266
- correlationSection,
11267
- executionSection,
11268
- contextDisciplineSection,
11269
- `### Scenario\n\n${scenario.prompt}\n`,
11270
- inputFilesSection,
11271
- verificationSection,
11272
- finalOutputBlock
11273
- ].filter((s) => s !== "").join("\n");
11632
+ return assembleTaskPrompt("run_eval", [
11633
+ {
11634
+ id: "run_eval.header",
11635
+ source: "header",
11636
+ body: header
11637
+ },
11638
+ {
11639
+ id: "run_eval.context_discipline",
11640
+ source: "discipline",
11641
+ header: "Injected Task Context",
11642
+ body: contextDiscipline
11643
+ },
11644
+ {
11645
+ id: "run_eval.scenario",
11646
+ source: "task_input",
11647
+ header: "Scenario",
11648
+ body: scenario.prompt
11649
+ },
11650
+ {
11651
+ id: "run_eval.input_files",
11652
+ source: "task_input",
11653
+ header: "Input files",
11654
+ body: inputFiles
11655
+ },
11656
+ {
11657
+ id: "run_eval.verification",
11658
+ source: "verification",
11659
+ body: verification
11660
+ },
11661
+ {
11662
+ id: "run_eval.final_output",
11663
+ source: "final_output",
11664
+ body: finalOutput
11665
+ }
11666
+ ]);
11274
11667
  }
11275
11668
  //#endregion
11276
11669
  //#region ../agent-runtime/src/prompts/index.ts
@@ -15891,7 +16284,7 @@ async function executePiTask(claimedTask, reporter, opts) {
15891
16284
  });
15892
16285
  let taskPrompt;
15893
16286
  try {
15894
- taskPrompt = buildTaskUserPrompt(task, {
16287
+ const assembled = buildTaskUserPrompt(task, {
15895
16288
  diaryId,
15896
16289
  taskId: task.id,
15897
16290
  workspace: {
@@ -15902,6 +16295,12 @@ async function executePiTask(claimedTask, reporter, opts) {
15902
16295
  },
15903
16296
  extras: opts.promptExtras
15904
16297
  });
16298
+ taskPrompt = assembled.text;
16299
+ await emit("info", {
16300
+ event: "prompt_assembled",
16301
+ taskType: assembled.taskType,
16302
+ sections: assembled.trace
16303
+ });
15905
16304
  } catch (err) {
15906
16305
  const message = err instanceof Error ? err.message : String(err);
15907
16306
  await emit("error", {
@@ -16164,8 +16563,8 @@ async function executePiTask(claimedTask, reporter, opts) {
16164
16563
  }
16165
16564
  else if (submitToolHandle) {
16166
16565
  parseError = {
16167
- code: "output_missing",
16168
- message: "Agent did not submit output through the task submit tool. A valid submit tool call is required to complete this task type."
16566
+ code: "submit_output_missing",
16567
+ message: "Agent did not satisfy the promised submit-output criterion: no valid task submit tool call was captured before the session ended."
16169
16568
  };
16170
16569
  await emit("error", {
16171
16570
  message: parseError.message,