@themoltnet/pi-extension 0.19.4 → 0.19.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +747 -476
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -9188,7 +9188,7 @@ function validateRubricWeights(rubric) {
|
|
|
9188
9188
|
* attaches to any task type. It has four orthogonal sections — pick
|
|
9189
9189
|
* whichever apply per task type:
|
|
9190
9190
|
*
|
|
9191
|
-
* - `gates`
|
|
9191
|
+
* - `gates` Promise-level structural/process checks
|
|
9192
9192
|
* - `assertions` Declarative claims about output JSON
|
|
9193
9193
|
* - `rubric` Weighted-criteria scoring instrument, reused
|
|
9194
9194
|
* verbatim from `./rubric.ts`.
|
|
@@ -9233,17 +9233,27 @@ var CidEqualsSpec = Type$1.Object({
|
|
|
9233
9233
|
path: Type$1.String({ minLength: 1 }),
|
|
9234
9234
|
expected: Type$1.String({ minLength: 1 })
|
|
9235
9235
|
}, { additionalProperties: false });
|
|
9236
|
-
var
|
|
9236
|
+
var SubmitToolCallGate = Type$1.Object({
|
|
9237
9237
|
id: Type$1.String({ minLength: 1 }),
|
|
9238
|
-
kind: Type$1.Literal("
|
|
9239
|
-
|
|
9240
|
-
required: Type$1.Boolean()
|
|
9241
|
-
}, { additionalProperties: false }), Type$1.Object({
|
|
9242
|
-
id: Type$1.String({ minLength: 1 }),
|
|
9243
|
-
kind: Type$1.Literal("cid-equals"),
|
|
9244
|
-
spec: CidEqualsSpec,
|
|
9238
|
+
kind: Type$1.Literal("submit-tool-call"),
|
|
9239
|
+
description: Type$1.String({ minLength: 1 }),
|
|
9245
9240
|
required: Type$1.Boolean()
|
|
9246
|
-
}, { additionalProperties: false })
|
|
9241
|
+
}, { additionalProperties: false });
|
|
9242
|
+
var Gate = Type$1.Union([
|
|
9243
|
+
SubmitToolCallGate,
|
|
9244
|
+
Type$1.Object({
|
|
9245
|
+
id: Type$1.String({ minLength: 1 }),
|
|
9246
|
+
kind: Type$1.Literal("schema-check"),
|
|
9247
|
+
spec: SchemaCheckSpec,
|
|
9248
|
+
required: Type$1.Boolean()
|
|
9249
|
+
}, { additionalProperties: false }),
|
|
9250
|
+
Type$1.Object({
|
|
9251
|
+
id: Type$1.String({ minLength: 1 }),
|
|
9252
|
+
kind: Type$1.Literal("cid-equals"),
|
|
9253
|
+
spec: CidEqualsSpec,
|
|
9254
|
+
required: Type$1.Boolean()
|
|
9255
|
+
}, { additionalProperties: false })
|
|
9256
|
+
], { $id: "Gate" });
|
|
9247
9257
|
var AssertionOp = Type$1.Union([
|
|
9248
9258
|
Type$1.Literal("exists"),
|
|
9249
9259
|
Type$1.Literal("equals"),
|
|
@@ -10470,6 +10480,32 @@ function submitOutputToolName(taskType) {
|
|
|
10470
10480
|
return `submit_${taskType}_output`;
|
|
10471
10481
|
}
|
|
10472
10482
|
//#endregion
|
|
10483
|
+
//#region ../agent-runtime/src/prompts/assemble.ts
|
|
10484
|
+
/**
|
|
10485
|
+
* Render a `PromptSection[]` into final text + structured trace.
|
|
10486
|
+
* Single source of truth for inter-section spacing and header
|
|
10487
|
+
* rendering across all task types.
|
|
10488
|
+
*/
|
|
10489
|
+
function assembleTaskPrompt(taskType, sections) {
|
|
10490
|
+
const trace = [];
|
|
10491
|
+
const rendered = [];
|
|
10492
|
+
for (const section of sections) {
|
|
10493
|
+
trace.push({
|
|
10494
|
+
id: section.id,
|
|
10495
|
+
source: section.source,
|
|
10496
|
+
header: section.header,
|
|
10497
|
+
char_count: section.body.length
|
|
10498
|
+
});
|
|
10499
|
+
if (section.body === "") continue;
|
|
10500
|
+
rendered.push(section.header ? `## ${section.header}\n\n${section.body}` : section.body);
|
|
10501
|
+
}
|
|
10502
|
+
return {
|
|
10503
|
+
text: rendered.join("\n\n"),
|
|
10504
|
+
trace,
|
|
10505
|
+
taskType
|
|
10506
|
+
};
|
|
10507
|
+
}
|
|
10508
|
+
//#endregion
|
|
10473
10509
|
//#region ../agent-runtime/src/prompts/final-output.ts
|
|
10474
10510
|
function buildFinalOutputBlock(opts) {
|
|
10475
10511
|
const { taskType, outputSchemaName, shapeSketch, extraNotes } = opts;
|
|
@@ -10484,7 +10520,8 @@ function buildFinalOutputBlock(opts) {
|
|
|
10484
10520
|
`The runtime captures the validated arguments and ends the session.`,
|
|
10485
10521
|
`Do NOT emit the output as plain assistant text. Do NOT rely on a`,
|
|
10486
10522
|
`JSON-in-message fallback. If you do not call \`${submitTool}\`, the`,
|
|
10487
|
-
`attempt
|
|
10523
|
+
`attempt is recorded as failing the promised submit-output criterion`,
|
|
10524
|
+
`even if the underlying work succeeded.`,
|
|
10488
10525
|
"",
|
|
10489
10526
|
`Your final assistant text before that tool call may explain your work,`,
|
|
10490
10527
|
`but the submit-tool call itself must be your VERY LAST action.`,
|
|
@@ -10522,37 +10559,17 @@ function renderRubricPreambleSection(rubric) {
|
|
|
10522
10559
|
*
|
|
10523
10560
|
* Design note — no pre-resolved `target` projection
|
|
10524
10561
|
* --------------------------------------------------
|
|
10525
|
-
* Earlier drafts hand-wired a `target` bundle (branch, PR url,
|
|
10526
|
-
*
|
|
10527
|
-
*
|
|
10528
|
-
*
|
|
10529
|
-
*
|
|
10530
|
-
*
|
|
10531
|
-
*
|
|
10532
|
-
* fetching their own data.
|
|
10533
|
-
*
|
|
10534
|
-
* Now: the prompt tells the judge the `targetTaskId` and instructs
|
|
10535
|
-
* it to call `moltnet_get_task` + `moltnet_list_task_attempts`
|
|
10536
|
-
* itself. The judge sees whatever the producer's accepted attempt
|
|
10537
|
-
* actually wrote — no projection, no lossiness, no daemon-side
|
|
10538
|
-
* type knowledge required. Different producers (fulfill_brief,
|
|
10539
|
-
* future task types whose products are docs / configs / changes /
|
|
10540
|
-
* anything) work without any code path here.
|
|
10562
|
+
* Earlier drafts hand-wired a `target` bundle (branch, PR url, commits,
|
|
10563
|
+
* summary, diary entry ids) into the prompt before the judge started.
|
|
10564
|
+
* That coupled the daemon to one specific producer shape, forced every
|
|
10565
|
+
* executor to know how to project it, and went stale every time a
|
|
10566
|
+
* producer task type grew a field. Now: the prompt tells the judge
|
|
10567
|
+
* the `targetTaskId` and instructs it to call `moltnet_get_task` +
|
|
10568
|
+
* `moltnet_list_task_attempts` itself.
|
|
10541
10569
|
*/
|
|
10542
10570
|
function buildAssessBriefUserPrompt(input, ctx) {
|
|
10543
10571
|
const rubric = input.successCriteria.rubric;
|
|
10544
|
-
const
|
|
10545
|
-
const preambleSection = renderRubricPreambleSection(rubric) ?? "";
|
|
10546
|
-
const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10547
|
-
"### Workspace",
|
|
10548
|
-
"",
|
|
10549
|
-
"This review attempt is running inside a dedicated disposable git",
|
|
10550
|
-
"worktree created for this task. If you need to check out the target",
|
|
10551
|
-
"branch or inspect refs locally, do it only inside this worktree.",
|
|
10552
|
-
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`. You may replace it with the target branch locally if that helps your inspection.` : "The current checkout is disposable and will be cleaned up when the task ends.",
|
|
10553
|
-
""
|
|
10554
|
-
].join("\n") : "";
|
|
10555
|
-
return [
|
|
10572
|
+
const header = [
|
|
10556
10573
|
"# Assess Brief Judge",
|
|
10557
10574
|
"",
|
|
10558
10575
|
"You are an independent judge. You did NOT produce the work under review.",
|
|
@@ -10560,10 +10577,9 @@ function buildAssessBriefUserPrompt(input, ctx) {
|
|
|
10560
10577
|
"You may read code, commits, and diary entries — but do NOT modify anything.",
|
|
10561
10578
|
"",
|
|
10562
10579
|
`Your diary ID is: ${ctx.diaryId}`,
|
|
10563
|
-
`This task's id is: ${ctx.taskId}
|
|
10564
|
-
|
|
10565
|
-
|
|
10566
|
-
"",
|
|
10580
|
+
`This task's id is: ${ctx.taskId}`
|
|
10581
|
+
].join("\n");
|
|
10582
|
+
const target = [
|
|
10567
10583
|
`**Producer task id:** \`${input.targetTaskId}\``,
|
|
10568
10584
|
"",
|
|
10569
10585
|
"Investigate the producer task before scoring:",
|
|
@@ -10576,10 +10592,9 @@ function buildAssessBriefUserPrompt(input, ctx) {
|
|
|
10576
10592
|
" - `commits[].sha` listed → use `git show <sha>` for individual commits.",
|
|
10577
10593
|
" - `diaryEntryIds[]` listed → fetch each via `moltnet_get_entry` to read the producer's reasoning.",
|
|
10578
10594
|
" - `summary` set → use as orientation, not as ground truth.",
|
|
10579
|
-
"Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin."
|
|
10580
|
-
|
|
10581
|
-
|
|
10582
|
-
"",
|
|
10595
|
+
"Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin."
|
|
10596
|
+
].join("\n");
|
|
10597
|
+
const diaryQuery = [
|
|
10583
10598
|
`Beyond the explicit \`diaryEntryIds[]\` from step 3, the producer's`,
|
|
10584
10599
|
"attempts auto-tag every entry with the `task:*` provenance namespace.",
|
|
10585
10600
|
"You can pull the full set without enumerating ids by passing the",
|
|
@@ -10590,38 +10605,84 @@ function buildAssessBriefUserPrompt(input, ctx) {
|
|
|
10590
10605
|
"- Just the accepted attempt: add `attemptN: <acceptedAttemptN>`.",
|
|
10591
10606
|
"- The producer plus any prior chain (when a correlationId was set):",
|
|
10592
10607
|
" read it from the task you fetched in step 1 and pass",
|
|
10593
|
-
" `taskFilter: { correlationId: \"<id>\" }`."
|
|
10594
|
-
|
|
10595
|
-
|
|
10596
|
-
|
|
10597
|
-
"
|
|
10598
|
-
"",
|
|
10599
|
-
|
|
10600
|
-
|
|
10601
|
-
|
|
10602
|
-
|
|
10608
|
+
" `taskFilter: { correlationId: \"<id>\" }`."
|
|
10609
|
+
].join("\n");
|
|
10610
|
+
const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10611
|
+
"This review attempt is running inside a dedicated disposable git",
|
|
10612
|
+
"worktree created for this task. If you need to check out the target",
|
|
10613
|
+
"branch or inspect refs locally, do it only inside this worktree.",
|
|
10614
|
+
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`. You may replace it with the target branch locally if that helps your inspection.` : "The current checkout is disposable and will be cleaned up when the task ends."
|
|
10615
|
+
].join("\n") : "";
|
|
10616
|
+
const preamble = renderRubricPreambleSection(rubric) ?? "";
|
|
10617
|
+
const criteria = renderRubricCriteriaList(rubric);
|
|
10618
|
+
const scoring = [
|
|
10603
10619
|
"- `llm_score`: score 0..1 continuous. `rationale` REQUIRED (2–4 sentences).",
|
|
10604
10620
|
"- `boolean`: score exactly 0 or 1. `rationale` optional.",
|
|
10605
10621
|
"- `deterministic_signature_check`: run `moltnet entry verify` on every diary entry returned by step 3 above AND `git verify-commit` on every commit. Score 1 iff ALL signatures are valid; otherwise 0. Populate `evidence.commitsVerified`, `evidence.commitsTotal`, `evidence.signatureFailures`.",
|
|
10606
10622
|
"",
|
|
10607
|
-
"Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output."
|
|
10608
|
-
|
|
10609
|
-
|
|
10610
|
-
|
|
10611
|
-
|
|
10612
|
-
|
|
10613
|
-
|
|
10614
|
-
|
|
10615
|
-
|
|
10616
|
-
|
|
10617
|
-
|
|
10618
|
-
|
|
10619
|
-
|
|
10620
|
-
|
|
10621
|
-
|
|
10622
|
-
|
|
10623
|
-
|
|
10624
|
-
|
|
10623
|
+
"Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output."
|
|
10624
|
+
].join("\n");
|
|
10625
|
+
return assembleTaskPrompt("assess_brief", [
|
|
10626
|
+
{
|
|
10627
|
+
id: "assess_brief.header",
|
|
10628
|
+
source: "header",
|
|
10629
|
+
body: header
|
|
10630
|
+
},
|
|
10631
|
+
{
|
|
10632
|
+
id: "assess_brief.target",
|
|
10633
|
+
source: "task_input",
|
|
10634
|
+
header: "Target of assessment",
|
|
10635
|
+
body: target
|
|
10636
|
+
},
|
|
10637
|
+
{
|
|
10638
|
+
id: "assess_brief.diary_query",
|
|
10639
|
+
source: "static",
|
|
10640
|
+
header: "Querying the producer's diary entries",
|
|
10641
|
+
body: diaryQuery
|
|
10642
|
+
},
|
|
10643
|
+
{
|
|
10644
|
+
id: "assess_brief.workspace",
|
|
10645
|
+
source: "workspace",
|
|
10646
|
+
header: "Workspace",
|
|
10647
|
+
body: workspace
|
|
10648
|
+
},
|
|
10649
|
+
{
|
|
10650
|
+
id: "assess_brief.preamble",
|
|
10651
|
+
source: "rubric_judge",
|
|
10652
|
+
body: preamble
|
|
10653
|
+
},
|
|
10654
|
+
{
|
|
10655
|
+
id: "assess_brief.criteria",
|
|
10656
|
+
source: "rubric_judge",
|
|
10657
|
+
header: "Criteria",
|
|
10658
|
+
body: criteria
|
|
10659
|
+
},
|
|
10660
|
+
{
|
|
10661
|
+
id: "assess_brief.scoring",
|
|
10662
|
+
source: "rubric_judge",
|
|
10663
|
+
header: "Scoring rules",
|
|
10664
|
+
body: scoring
|
|
10665
|
+
},
|
|
10666
|
+
{
|
|
10667
|
+
id: "assess_brief.final_output",
|
|
10668
|
+
source: "final_output",
|
|
10669
|
+
body: buildFinalOutputBlock({
|
|
10670
|
+
taskType: "assess_brief",
|
|
10671
|
+
outputSchemaName: "AssessBriefOutput",
|
|
10672
|
+
shapeSketch: [
|
|
10673
|
+
"{",
|
|
10674
|
+
" \"scores\": [",
|
|
10675
|
+
" { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
|
|
10676
|
+
" ],",
|
|
10677
|
+
" \"composite\": <sum>,",
|
|
10678
|
+
" \"verdict\": \"<1-3 sentence overall>\",",
|
|
10679
|
+
" \"judgeModel\": \"<provider:model>\"",
|
|
10680
|
+
"}"
|
|
10681
|
+
].join("\n"),
|
|
10682
|
+
extraNotes: ["`composite` = Σ(weight_i × score_i) recomputed. The runtime rejects a mismatch."]
|
|
10683
|
+
})
|
|
10684
|
+
}
|
|
10685
|
+
]);
|
|
10625
10686
|
}
|
|
10626
10687
|
//#endregion
|
|
10627
10688
|
//#region ../agent-runtime/src/prompts/self-verification.ts
|
|
@@ -10630,11 +10691,11 @@ function buildSelfVerificationBlock(taskId, criteriaField = "successCriteria") {
|
|
|
10630
10691
|
"## Self-verification",
|
|
10631
10692
|
"",
|
|
10632
10693
|
`If \`input.${criteriaField}\` is set on this task, your final output MUST`,
|
|
10633
|
-
"include a `verification` block.
|
|
10634
|
-
|
|
10635
|
-
"
|
|
10636
|
-
"
|
|
10637
|
-
"
|
|
10694
|
+
"include a `verification` block. Treat every item in those criteria as",
|
|
10695
|
+
"part of the promise you made when you claimed the task. That includes",
|
|
10696
|
+
"the built-in submit-output gate when present. Do not call the submit",
|
|
10697
|
+
"tool until you have computed the verification payload you can honestly",
|
|
10698
|
+
"stand behind.",
|
|
10638
10699
|
"",
|
|
10639
10700
|
`Call \`moltnet_get_task\` with task id \`${taskId}\` and read \`input.${criteriaField}\`.`,
|
|
10640
10701
|
"",
|
|
@@ -10698,22 +10759,13 @@ function buildSelfVerificationBlock(taskId, criteriaField = "successCriteria") {
|
|
|
10698
10759
|
* TODO(#885): add a `moltnet_parallel_explore` custom tool that spawns
|
|
10699
10760
|
* N isolated `createAgentSession` children (one per tag cluster or
|
|
10700
10761
|
* entry_type axis the curator picks after recon), each with a narrow
|
|
10701
|
-
* tool subset and a turn cap, and returns compressed summaries.
|
|
10702
|
-
* curator keeps a warm context and only sees {candidateIds, notes}
|
|
10703
|
-
* per probe — mirrors the fan-out pattern pi-mono SDK example #13
|
|
10704
|
-
* (session runtime) + #05 (custom tools) makes possible. Until that
|
|
10705
|
-
* lands, the `checkpoints[]` output field is the fallback: curator
|
|
10706
|
-
* emits pruned state at phase boundaries so a follow-up session can
|
|
10707
|
-
* resume without replaying the tool history.
|
|
10762
|
+
* tool subset and a turn cap, and returns compressed summaries.
|
|
10708
10763
|
*/
|
|
10709
10764
|
function buildCuratePackUserPrompt(input, ctx) {
|
|
10710
10765
|
const { diaryId, taskPrompt, entryTypes, tagFilters, tokenBudget, recipe } = input;
|
|
10711
10766
|
const entryTypesPinned = Boolean(entryTypes);
|
|
10712
10767
|
const resolvedRecipe = recipe ?? "topic-focused-v1";
|
|
10713
|
-
const
|
|
10714
|
-
const excludeLine = tagFilters?.exclude?.length ? `- Hard exclude (drop if ANY present): ${tagFilters.exclude.map((t) => `\`${t}\``).join(", ")}` : null;
|
|
10715
|
-
const prefixLine = tagFilters?.prefix ? `- Tag prefix hint when inventorying: \`${tagFilters.prefix}\`` : null;
|
|
10716
|
-
return [
|
|
10768
|
+
const header = [
|
|
10717
10769
|
"# Curate Pack Agent",
|
|
10718
10770
|
"",
|
|
10719
10771
|
"You are the curator. Step 1 of the three-session attribution loop:",
|
|
@@ -10721,40 +10773,29 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10721
10773
|
"will judge. Your output IS the pack — nobody downstream will re-rank.",
|
|
10722
10774
|
"",
|
|
10723
10775
|
`Your agent-session diary ID is: ${ctx.diaryId}`,
|
|
10724
|
-
`This task's id is: ${ctx.taskId}
|
|
10725
|
-
|
|
10726
|
-
|
|
10727
|
-
"",
|
|
10776
|
+
`This task's id is: ${ctx.taskId}`
|
|
10777
|
+
].join("\n");
|
|
10778
|
+
const goal = [
|
|
10728
10779
|
`Build a pack from diary \`${diaryId}\` that faithfully serves this`,
|
|
10729
|
-
|
|
10780
|
+
"prompt:",
|
|
10730
10781
|
"",
|
|
10731
10782
|
`> ${taskPrompt}`,
|
|
10732
10783
|
"",
|
|
10733
10784
|
"What \"faithfully\" means is your call. A broad prompt may warrant 20",
|
|
10734
10785
|
"entries spanning clusters; a sharp one may resolve to 4 high-signal",
|
|
10735
10786
|
"entries. Trust your own judgment on breadth vs. depth — but be able",
|
|
10736
|
-
"to defend it in the summary."
|
|
10737
|
-
|
|
10738
|
-
|
|
10739
|
-
|
|
10740
|
-
|
|
10741
|
-
|
|
10742
|
-
|
|
10743
|
-
|
|
10744
|
-
|
|
10745
|
-
|
|
10746
|
-
|
|
10747
|
-
|
|
10748
|
-
entryTypesPinned ? null : " style content (e.g., \"what shipped this week\"). State your choice",
|
|
10749
|
-
entryTypesPinned ? null : " briefly in the final `summary`.",
|
|
10750
|
-
`- Recipe tag: \`${resolvedRecipe}\` (recorded on pack params)`,
|
|
10751
|
-
tokenBudget ? `- Token budget (soft cap on final pack): ${tokenBudget}. Pick entry count so the pack fits — estimate ~300 tok/entry as a starting heuristic, tighten after inspecting actual content lengths.` : "- No token budget — size the pack to match the prompt, not an arbitrary target.",
|
|
10752
|
-
includeLine,
|
|
10753
|
-
excludeLine,
|
|
10754
|
-
prefixLine,
|
|
10755
|
-
"",
|
|
10756
|
-
"## Tools available (not a recipe — use what the situation calls for)",
|
|
10757
|
-
"",
|
|
10787
|
+
"to defend it in the summary."
|
|
10788
|
+
].join("\n");
|
|
10789
|
+
const constraintsLines = [];
|
|
10790
|
+
if (entryTypesPinned) constraintsLines.push(`- Entry types pinned by imposer (do not widen): ${entryTypes.map((t) => `\`${t}\``).join(", ")}`);
|
|
10791
|
+
else constraintsLines.push("- Entry types: **you choose**. The diary contains three kinds:", " - `episodic` — incident reports, \"what happened and how we fixed it\" narratives.", " - `semantic` — durable decisions, patterns, design rationale.", " - `procedural` — commit audit trails / changelog-style provenance.", " Pick the subset that fits the prompt. For \"failures and workarounds\"", " or \"decisions we made\" you generally do NOT want `procedural` — those", " entries are append-only commit logs and produce changelog-shaped packs.", " Include `procedural` only when the prompt explicitly asks for changelog-", " style content (e.g., \"what shipped this week\"). State your choice", " briefly in the final `summary`.");
|
|
10792
|
+
constraintsLines.push(`- Recipe tag: \`${resolvedRecipe}\` (recorded on pack params)`);
|
|
10793
|
+
constraintsLines.push(tokenBudget ? `- Token budget (soft cap on final pack): ${tokenBudget}. Pick entry count so the pack fits — estimate ~300 tok/entry as a starting heuristic, tighten after inspecting actual content lengths.` : "- No token budget — size the pack to match the prompt, not an arbitrary target.");
|
|
10794
|
+
if (tagFilters?.include?.length) constraintsLines.push(`- Hard include (ALL must be present on an entry): ${tagFilters.include.map((t) => `\`${t}\``).join(", ")}`);
|
|
10795
|
+
if (tagFilters?.exclude?.length) constraintsLines.push(`- Hard exclude (drop if ANY present): ${tagFilters.exclude.map((t) => `\`${t}\``).join(", ")}`);
|
|
10796
|
+
if (tagFilters?.prefix) constraintsLines.push(`- Tag prefix hint when inventorying: \`${tagFilters.prefix}\``);
|
|
10797
|
+
const constraints = constraintsLines.join("\n");
|
|
10798
|
+
const tools = [
|
|
10758
10799
|
"- `moltnet_diary_tags` — tag inventory with counts. Cheap reconnaissance",
|
|
10759
10800
|
" when the prompt implies a scope but not a tag. Pass",
|
|
10760
10801
|
" `prefix: \"task:\"` to enumerate task-provenance tags only",
|
|
@@ -10767,10 +10808,9 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10767
10808
|
"- `moltnet_list_entries` — multi-tag (AND) listing with optional",
|
|
10768
10809
|
" `excludeTags`, `entryType`, and the same `taskFilter` shorthand.",
|
|
10769
10810
|
"- `moltnet_get_entry` — full entry read, for disambiguation.",
|
|
10770
|
-
"- `moltnet_pack_create` — terminal call that persists the pack."
|
|
10771
|
-
|
|
10772
|
-
|
|
10773
|
-
"",
|
|
10811
|
+
"- `moltnet_pack_create` — terminal call that persists the pack."
|
|
10812
|
+
].join("\n");
|
|
10813
|
+
const exploration = [
|
|
10774
10814
|
"Context is finite. Treat every tool call as buying information against",
|
|
10775
10815
|
"a budget. Some heuristics that tend to work:",
|
|
10776
10816
|
"",
|
|
@@ -10787,57 +10827,110 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10787
10827
|
"- **Emit a checkpoint if your working set exceeds ~30 candidates.**",
|
|
10788
10828
|
" Write one to the `checkpoints` array (see Output) listing the ids",
|
|
10789
10829
|
" you're keeping and dropping, plus a note explaining the cut. This",
|
|
10790
|
-
" lets a follow-up session resume without replaying your tool history."
|
|
10791
|
-
|
|
10792
|
-
|
|
10793
|
-
"",
|
|
10830
|
+
" lets a follow-up session resume without replaying your tool history."
|
|
10831
|
+
].join("\n");
|
|
10832
|
+
const ranking = [
|
|
10794
10833
|
"Assign integer ranks 1..N, lower = more prominent. Rank reflects",
|
|
10795
10834
|
"relevance to the prompt, NOT recency or entry popularity. Each entry",
|
|
10796
10835
|
"in the output must carry a short `rationale` — one sentence pointing",
|
|
10797
|
-
"at what in its content earned the rank."
|
|
10798
|
-
|
|
10799
|
-
|
|
10800
|
-
"",
|
|
10836
|
+
"at what in its content earned the rank."
|
|
10837
|
+
].join("\n");
|
|
10838
|
+
const persisting = [
|
|
10801
10839
|
"Call `moltnet_pack_create` with:",
|
|
10802
10840
|
"- `entries`: `[{ entryId, rank }]` for each selected entry.",
|
|
10803
|
-
|
|
10841
|
+
`- \`params\`: \`{ recipe: "${resolvedRecipe}", prompt: <the task prompt>, selection_rationale: "<2-sentence summary>" }\`.`,
|
|
10804
10842
|
tokenBudget ? `- \`tokenBudget\`: ${tokenBudget}.` : "- `tokenBudget`: omit.",
|
|
10805
10843
|
"- `pinned: false` (packs in this pipeline are ephemeral by design).",
|
|
10806
10844
|
"",
|
|
10807
10845
|
"The tool returns a JSON payload whose top-level fields are `packId` and",
|
|
10808
10846
|
"`packCid` (NOT `id`). Copy those exact UUID/CID strings verbatim into",
|
|
10809
10847
|
"`packId` and `packCid` in your final output — do not substitute an",
|
|
10810
|
-
"entry id, do not reformat, do not fabricate a UUID."
|
|
10811
|
-
|
|
10812
|
-
|
|
10813
|
-
"",
|
|
10848
|
+
"entry id, do not reformat, do not fabricate a UUID."
|
|
10849
|
+
].join("\n");
|
|
10850
|
+
const hardConstraints = [
|
|
10814
10851
|
"- Do NOT call `moltnet_pack_render` — that belongs to the next session.",
|
|
10815
10852
|
"- Do NOT write diary entries unless curation surfaces a genuine",
|
|
10816
10853
|
" incident worth recording. The curation reasoning lives in the task",
|
|
10817
10854
|
" output, not in the diary.",
|
|
10818
|
-
"- Respect hard include/exclude filters literally."
|
|
10819
|
-
|
|
10820
|
-
|
|
10821
|
-
|
|
10822
|
-
|
|
10823
|
-
|
|
10824
|
-
|
|
10825
|
-
|
|
10826
|
-
|
|
10827
|
-
|
|
10828
|
-
|
|
10829
|
-
|
|
10830
|
-
|
|
10831
|
-
|
|
10832
|
-
|
|
10833
|
-
|
|
10834
|
-
|
|
10835
|
-
|
|
10836
|
-
|
|
10837
|
-
|
|
10838
|
-
|
|
10839
|
-
|
|
10840
|
-
|
|
10855
|
+
"- Respect hard include/exclude filters literally."
|
|
10856
|
+
].join("\n");
|
|
10857
|
+
return assembleTaskPrompt("curate_pack", [
|
|
10858
|
+
{
|
|
10859
|
+
id: "curate_pack.header",
|
|
10860
|
+
source: "header",
|
|
10861
|
+
body: header
|
|
10862
|
+
},
|
|
10863
|
+
{
|
|
10864
|
+
id: "curate_pack.goal",
|
|
10865
|
+
source: "task_input",
|
|
10866
|
+
header: "Goal",
|
|
10867
|
+
body: goal
|
|
10868
|
+
},
|
|
10869
|
+
{
|
|
10870
|
+
id: "curate_pack.constraints",
|
|
10871
|
+
source: "task_input",
|
|
10872
|
+
header: "Constraints",
|
|
10873
|
+
body: constraints
|
|
10874
|
+
},
|
|
10875
|
+
{
|
|
10876
|
+
id: "curate_pack.tools",
|
|
10877
|
+
source: "static",
|
|
10878
|
+
header: "Tools available (not a recipe — use what the situation calls for)",
|
|
10879
|
+
body: tools
|
|
10880
|
+
},
|
|
10881
|
+
{
|
|
10882
|
+
id: "curate_pack.exploration",
|
|
10883
|
+
source: "static",
|
|
10884
|
+
header: "Exploration discipline",
|
|
10885
|
+
body: exploration
|
|
10886
|
+
},
|
|
10887
|
+
{
|
|
10888
|
+
id: "curate_pack.ranking",
|
|
10889
|
+
source: "static",
|
|
10890
|
+
header: "Ranking",
|
|
10891
|
+
body: ranking
|
|
10892
|
+
},
|
|
10893
|
+
{
|
|
10894
|
+
id: "curate_pack.persisting",
|
|
10895
|
+
source: "static",
|
|
10896
|
+
header: "Persisting the pack",
|
|
10897
|
+
body: persisting
|
|
10898
|
+
},
|
|
10899
|
+
{
|
|
10900
|
+
id: "curate_pack.hard_constraints",
|
|
10901
|
+
source: "static",
|
|
10902
|
+
header: "Hard constraints",
|
|
10903
|
+
body: hardConstraints
|
|
10904
|
+
},
|
|
10905
|
+
{
|
|
10906
|
+
id: "curate_pack.verification",
|
|
10907
|
+
source: "verification",
|
|
10908
|
+
body: buildSelfVerificationBlock(ctx.taskId)
|
|
10909
|
+
},
|
|
10910
|
+
{
|
|
10911
|
+
id: "curate_pack.final_output",
|
|
10912
|
+
source: "final_output",
|
|
10913
|
+
body: buildFinalOutputBlock({
|
|
10914
|
+
taskType: "curate_pack",
|
|
10915
|
+
outputSchemaName: "CuratePackOutput",
|
|
10916
|
+
shapeSketch: [
|
|
10917
|
+
"{",
|
|
10918
|
+
" \"packId\": \"<uuid>\",",
|
|
10919
|
+
" \"packCid\": \"<cid>\",",
|
|
10920
|
+
" \"entries\": [",
|
|
10921
|
+
" { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
|
|
10922
|
+
" ],",
|
|
10923
|
+
" \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
|
|
10924
|
+
" \"checkpoints\": [",
|
|
10925
|
+
" { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
|
|
10926
|
+
" ],",
|
|
10927
|
+
" \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\",",
|
|
10928
|
+
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
10929
|
+
"}"
|
|
10930
|
+
].join("\n")
|
|
10931
|
+
})
|
|
10932
|
+
}
|
|
10933
|
+
]);
|
|
10841
10934
|
}
|
|
10842
10935
|
//#endregion
|
|
10843
10936
|
//#region ../agent-runtime/src/prompts/fulfill-brief.ts
|
|
@@ -10850,17 +10943,22 @@ function buildCuratePackUserPrompt(input, ctx) {
|
|
|
10850
10943
|
*/
|
|
10851
10944
|
function buildFulfillBriefUserPrompt(input, ctx) {
|
|
10852
10945
|
const { brief, title, seedFiles, scopeHint } = input;
|
|
10853
|
-
const
|
|
10854
|
-
"
|
|
10946
|
+
const header = [
|
|
10947
|
+
"# Fulfill Brief Agent",
|
|
10855
10948
|
"",
|
|
10856
|
-
"
|
|
10857
|
-
|
|
10858
|
-
""
|
|
10859
|
-
|
|
10860
|
-
|
|
10861
|
-
|
|
10862
|
-
"
|
|
10949
|
+
"You are a software engineering agent working in a sandboxed environment.",
|
|
10950
|
+
"Your workspace is at /workspace (mounted from the host repository).",
|
|
10951
|
+
"The MoltNet runtime instructor (above, in this system prompt) defines the",
|
|
10952
|
+
"invariants for this task: identity, gh authentication, diary discipline,",
|
|
10953
|
+
"and the accountable-commit shape. Follow it for every commit.",
|
|
10954
|
+
"",
|
|
10955
|
+
`## Task: ${title ?? "Fulfill brief"}`,
|
|
10863
10956
|
"",
|
|
10957
|
+
`Task id: \`${ctx.taskId}\``
|
|
10958
|
+
].join("\n");
|
|
10959
|
+
const seedFilesBody = seedFiles?.length ? ["Start by reading these files to ground yourself:", ...seedFiles.map((f) => `- \`${f}\``)].join("\n") : "";
|
|
10960
|
+
const branchSlug = ctx.correlationId ? `moltnet/${ctx.correlationId}/` : scopeHint ? `feat/${scopeHint}-` : "feat/";
|
|
10961
|
+
const correlation = ctx.correlationId ? [
|
|
10864
10962
|
`This task carries correlationId \`${ctx.correlationId}\`. You MUST:`,
|
|
10865
10963
|
"",
|
|
10866
10964
|
`1. Name your branch \`moltnet/${ctx.correlationId}/<short-slug>\` — use a`,
|
|
@@ -10869,39 +10967,14 @@ function buildFulfillBriefUserPrompt(input, ctx) {
|
|
|
10869
10967
|
" your **first** commit on that branch (subsequent commits do not need it).",
|
|
10870
10968
|
"",
|
|
10871
10969
|
"These are recovery anchors for the MoltNet mention-bot. Do not deviate",
|
|
10872
|
-
"from this branch naming scheme when correlationId is set."
|
|
10873
|
-
""
|
|
10970
|
+
"from this branch naming scheme when correlationId is set."
|
|
10874
10971
|
].join("\n") : "";
|
|
10875
|
-
const
|
|
10876
|
-
"### Workspace",
|
|
10877
|
-
"",
|
|
10972
|
+
const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10878
10973
|
"This attempt is running inside a dedicated git worktree created",
|
|
10879
10974
|
"for this task. Do not repurpose or switch the primary checkout.",
|
|
10880
|
-
ctx.workspace.branch ? `The current branch is \`${ctx.workspace.branch}\`. Stay on this branch unless the runtime instructor explicitly tells you otherwise.` : "Stay on the branch that was pre-provisioned for this task."
|
|
10881
|
-
""
|
|
10975
|
+
ctx.workspace.branch ? `The current branch is \`${ctx.workspace.branch}\`. Stay on this branch unless the runtime instructor explicitly tells you otherwise.` : "Stay on the branch that was pre-provisioned for this task."
|
|
10882
10976
|
].join("\n") : "";
|
|
10883
|
-
|
|
10884
|
-
"# Fulfill Brief Agent",
|
|
10885
|
-
"",
|
|
10886
|
-
"You are a software engineering agent working in a sandboxed environment.",
|
|
10887
|
-
"Your workspace is at /workspace (mounted from the host repository).",
|
|
10888
|
-
"The MoltNet runtime instructor (above, in this system prompt) defines the",
|
|
10889
|
-
"invariants for this task: identity, gh authentication, diary discipline,",
|
|
10890
|
-
"and the accountable-commit shape. Follow it for every commit.",
|
|
10891
|
-
"",
|
|
10892
|
-
`## Task: ${title ?? "Fulfill brief"}`,
|
|
10893
|
-
"",
|
|
10894
|
-
`Task id: \`${ctx.taskId}\``,
|
|
10895
|
-
"",
|
|
10896
|
-
"### Brief",
|
|
10897
|
-
"",
|
|
10898
|
-
brief,
|
|
10899
|
-
"",
|
|
10900
|
-
seedSection,
|
|
10901
|
-
correlationSection,
|
|
10902
|
-
workspaceSection,
|
|
10903
|
-
"### Workflow",
|
|
10904
|
-
"",
|
|
10977
|
+
const workflow = [
|
|
10905
10978
|
ctx.workspace?.mode === "dedicated_worktree" ? `1. Use the already-provisioned dedicated worktree branch${ctx.workspace.branch ? ` (\`${ctx.workspace.branch}\`)` : ""}; do not create or switch the primary checkout.` : `1. Create a feature branch (starting prefix suggestion: \`${branchSlug}<short-slug>\`).`,
|
|
10906
10979
|
"2. Understand the problem — read relevant code; do not speculate.",
|
|
10907
10980
|
"3. Implement the change. Keep commits small and coherent.",
|
|
@@ -10909,24 +10982,68 @@ function buildFulfillBriefUserPrompt(input, ctx) {
|
|
|
10909
10982
|
"5. For every commit, create a signed diary entry first via",
|
|
10910
10983
|
" `moltnet_create_entry` and embed its id in the commit trailer",
|
|
10911
10984
|
" `MoltNet-Diary: <id>` (per the runtime instructor).",
|
|
10912
|
-
"6. Push the branch and open a PR."
|
|
10913
|
-
|
|
10914
|
-
|
|
10915
|
-
|
|
10916
|
-
|
|
10917
|
-
|
|
10918
|
-
|
|
10919
|
-
|
|
10920
|
-
|
|
10921
|
-
|
|
10922
|
-
|
|
10923
|
-
|
|
10924
|
-
|
|
10925
|
-
|
|
10926
|
-
|
|
10927
|
-
|
|
10928
|
-
|
|
10929
|
-
|
|
10985
|
+
"6. Push the branch and open a PR."
|
|
10986
|
+
].join("\n");
|
|
10987
|
+
return assembleTaskPrompt("fulfill_brief", [
|
|
10988
|
+
{
|
|
10989
|
+
id: "fulfill_brief.header",
|
|
10990
|
+
source: "header",
|
|
10991
|
+
body: header
|
|
10992
|
+
},
|
|
10993
|
+
{
|
|
10994
|
+
id: "fulfill_brief.brief",
|
|
10995
|
+
source: "task_input",
|
|
10996
|
+
header: "Brief",
|
|
10997
|
+
body: brief
|
|
10998
|
+
},
|
|
10999
|
+
{
|
|
11000
|
+
id: "fulfill_brief.seed_files",
|
|
11001
|
+
source: "task_input",
|
|
11002
|
+
header: "Seed files",
|
|
11003
|
+
body: seedFilesBody
|
|
11004
|
+
},
|
|
11005
|
+
{
|
|
11006
|
+
id: "fulfill_brief.correlation",
|
|
11007
|
+
source: "task_input",
|
|
11008
|
+
header: "Correlation",
|
|
11009
|
+
body: correlation
|
|
11010
|
+
},
|
|
11011
|
+
{
|
|
11012
|
+
id: "fulfill_brief.workspace",
|
|
11013
|
+
source: "workspace",
|
|
11014
|
+
header: "Workspace",
|
|
11015
|
+
body: workspace
|
|
11016
|
+
},
|
|
11017
|
+
{
|
|
11018
|
+
id: "fulfill_brief.workflow",
|
|
11019
|
+
source: "static",
|
|
11020
|
+
header: "Workflow",
|
|
11021
|
+
body: workflow
|
|
11022
|
+
},
|
|
11023
|
+
{
|
|
11024
|
+
id: "fulfill_brief.verification",
|
|
11025
|
+
source: "verification",
|
|
11026
|
+
body: buildSelfVerificationBlock(ctx.taskId)
|
|
11027
|
+
},
|
|
11028
|
+
{
|
|
11029
|
+
id: "fulfill_brief.final_output",
|
|
11030
|
+
source: "final_output",
|
|
11031
|
+
body: buildFinalOutputBlock({
|
|
11032
|
+
taskType: "fulfill_brief",
|
|
11033
|
+
outputSchemaName: "FulfillBriefOutput",
|
|
11034
|
+
shapeSketch: [
|
|
11035
|
+
"{",
|
|
11036
|
+
" \"branch\": \"<branch-name>\",",
|
|
11037
|
+
" \"commits\": [{ \"sha\": \"...\", \"message\": \"...\", \"diaryEntryId\": \"...\" }],",
|
|
11038
|
+
" \"pullRequestUrl\": \"<url-or-null>\",",
|
|
11039
|
+
" \"diaryEntryIds\": [\"...\"],",
|
|
11040
|
+
" \"summary\": \"<1-3 sentence recap>\",",
|
|
11041
|
+
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
11042
|
+
"}"
|
|
11043
|
+
].join("\n")
|
|
11044
|
+
})
|
|
11045
|
+
}
|
|
11046
|
+
]);
|
|
10930
11047
|
}
|
|
10931
11048
|
//#endregion
|
|
10932
11049
|
//#region ../agent-runtime/src/prompts/judge-eval-attempt.ts
|
|
@@ -10935,46 +11052,18 @@ function buildJudgeEvalAttemptUserPrompt(input, ctx) {
|
|
|
10935
11052
|
if (!rubric) throw new Error("judge_eval_attempt requires successCriteria.rubric — none present");
|
|
10936
11053
|
const escapeCell = (s) => s.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\r?\n/g, " ");
|
|
10937
11054
|
const criteriaTable = rubric.criteria.map((c) => `| \`${c.id}\` | ${c.weight.toFixed(3)} | ${c.scoring} | ${escapeCell(c.description)} |`).join("\n");
|
|
10938
|
-
const
|
|
10939
|
-
|
|
10940
|
-
outputSchemaName: "JudgeEvalAttemptOutput",
|
|
10941
|
-
shapeSketch: [
|
|
10942
|
-
"{",
|
|
10943
|
-
` "targetTaskId": "${input.targetTaskId}",`,
|
|
10944
|
-
` "targetAttemptN": ${input.targetAttemptN},`,
|
|
10945
|
-
" \"variantLabel\": \"<from producer input>\",",
|
|
10946
|
-
" \"scores\": [ { \"criterionId\": \"...\", \"score\": 0..1, \"rationale\": \"...\", \"assertions\": [...]? } ],",
|
|
10947
|
-
" \"composite\": <Σ(weight × score), 0..1>,",
|
|
10948
|
-
" \"verdict\": \"<1-3 sentences>\",",
|
|
10949
|
-
" \"judgeModel\": \"<id>\", // optional",
|
|
10950
|
-
" \"traceparent\": \"<from claim>\"",
|
|
10951
|
-
"}"
|
|
10952
|
-
].join("\n")
|
|
10953
|
-
});
|
|
10954
|
-
const workspaceSection = ctx.workspace?.attached === true ? [
|
|
10955
|
-
"### Workspace",
|
|
11055
|
+
const header = [
|
|
11056
|
+
"# Judge Eval Attempt",
|
|
10956
11057
|
"",
|
|
10957
|
-
"Your current workspace is already attached to the producer attempt",
|
|
10958
|
-
"you are judging. Inspect files directly from the current workspace",
|
|
10959
|
-
"root instead of inventing synthetic `artifact_<taskId>` paths.",
|
|
10960
|
-
"If the accepted attempt output lists `artifacts[].path`, treat those",
|
|
10961
|
-
"paths as relative to the current workspace root unless the output",
|
|
10962
|
-
"explicitly says otherwise.",
|
|
10963
|
-
ctx.workspace.mode === "dedicated_worktree" ? `This attachment is a dedicated producer worktree${ctx.workspace.branch ? ` on branch \`${ctx.workspace.branch}\`` : ""}.` : ctx.workspace.mode === "scratch_mount" ? "This workspace is a fresh judge-owned scratch copy of the producer workspace." : "This attachment is the producer shared workspace mounted with shadow writes for safe inspection.",
|
|
10964
|
-
""
|
|
10965
|
-
].join("\n") : "";
|
|
10966
|
-
return [
|
|
10967
|
-
"# Judge Eval Attempt\n",
|
|
10968
11058
|
"You are grading one accepted `run_eval` producer attempt against a hidden",
|
|
10969
11059
|
"judge rubric. Do not delegate to subagents. Grade in this session only.",
|
|
10970
11060
|
"",
|
|
10971
11061
|
`Task id: \`${ctx.taskId}\``,
|
|
10972
11062
|
`Diary: \`${ctx.diaryId}\``,
|
|
10973
11063
|
`Producer task: \`${input.targetTaskId}\``,
|
|
10974
|
-
`Producer attempt: \`${input.targetAttemptN}
|
|
10975
|
-
|
|
10976
|
-
|
|
10977
|
-
"",
|
|
11064
|
+
`Producer attempt: \`${input.targetAttemptN}\``
|
|
11065
|
+
].join("\n");
|
|
11066
|
+
const evidence = [
|
|
10978
11067
|
`1. Call \`moltnet_get_task\` with taskId=\`${input.targetTaskId}\`.`,
|
|
10979
11068
|
`2. Call \`moltnet_list_task_attempts\` with taskId=\`${input.targetTaskId}\` and inspect the accepted attempt matching \`${input.targetAttemptN}\`.`,
|
|
10980
11069
|
`3. Call \`moltnet_list_task_messages\` with taskId=\`${input.targetTaskId}\`, attemptN=\`${input.targetAttemptN}\` to inspect the producer's turn-by-turn behavior.`,
|
|
@@ -10982,32 +11071,82 @@ function buildJudgeEvalAttemptUserPrompt(input, ctx) {
|
|
|
10982
11071
|
" artifacts or workspace evidence available in your environment.",
|
|
10983
11072
|
" Read artifact files from the mounted producer workspace when present;",
|
|
10984
11073
|
" do not assume detached `artifact_<taskId>` directories exist.",
|
|
10985
|
-
"5. Score strictly against the rubric below."
|
|
10986
|
-
|
|
10987
|
-
|
|
10988
|
-
"
|
|
10989
|
-
"",
|
|
10990
|
-
|
|
11074
|
+
"5. Score strictly against the rubric below."
|
|
11075
|
+
].join("\n");
|
|
11076
|
+
const workspace = ctx.workspace?.attached === true ? [
|
|
11077
|
+
"Your current workspace is already attached to the producer attempt",
|
|
11078
|
+
"you are judging. Inspect files directly from the current workspace",
|
|
11079
|
+
"root instead of inventing synthetic `artifact_<taskId>` paths.",
|
|
11080
|
+
"If the accepted attempt output lists `artifacts[].path`, treat those",
|
|
11081
|
+
"paths as relative to the current workspace root unless the output",
|
|
11082
|
+
"explicitly says otherwise.",
|
|
11083
|
+
ctx.workspace.mode === "dedicated_worktree" ? `This attachment is a dedicated producer worktree${ctx.workspace.branch ? ` on branch \`${ctx.workspace.branch}\`` : ""}.` : ctx.workspace.mode === "scratch_mount" ? "This workspace is a fresh judge-owned scratch copy of the producer workspace." : "This attachment is the producer shared workspace mounted with shadow writes for safe inspection."
|
|
11084
|
+
].join("\n") : "";
|
|
11085
|
+
const rubricBody = [
|
|
11086
|
+
rubric.preamble ?? "",
|
|
10991
11087
|
"| Criterion | Weight | Scoring | Description |",
|
|
10992
11088
|
"| --- | --- | --- | --- |",
|
|
10993
|
-
criteriaTable
|
|
10994
|
-
"",
|
|
10995
|
-
"### Composite arithmetic",
|
|
10996
|
-
"",
|
|
10997
|
-
"Your `composite` MUST equal `Σ(criterion.weight × score)` over the rubric",
|
|
10998
|
-
"criteria. Drift > 0.001 is rejected.",
|
|
10999
|
-
"",
|
|
11000
|
-
finalOutputBlock
|
|
11089
|
+
criteriaTable
|
|
11001
11090
|
].filter((s) => s !== "").join("\n");
|
|
11091
|
+
const composite = ["Your `composite` MUST equal `Σ(criterion.weight × score)` over the rubric", "criteria. Drift > 0.001 is rejected."].join("\n");
|
|
11092
|
+
return assembleTaskPrompt("judge_eval_attempt", [
|
|
11093
|
+
{
|
|
11094
|
+
id: "judge_eval_attempt.header",
|
|
11095
|
+
source: "header",
|
|
11096
|
+
body: header
|
|
11097
|
+
},
|
|
11098
|
+
{
|
|
11099
|
+
id: "judge_eval_attempt.evidence",
|
|
11100
|
+
source: "evidence",
|
|
11101
|
+
header: "Evidence gathering",
|
|
11102
|
+
body: evidence
|
|
11103
|
+
},
|
|
11104
|
+
{
|
|
11105
|
+
id: "judge_eval_attempt.workspace",
|
|
11106
|
+
source: "workspace",
|
|
11107
|
+
header: "Workspace",
|
|
11108
|
+
body: workspace
|
|
11109
|
+
},
|
|
11110
|
+
{
|
|
11111
|
+
id: "judge_eval_attempt.rubric",
|
|
11112
|
+
source: "rubric_judge",
|
|
11113
|
+
header: "Rubric",
|
|
11114
|
+
body: rubricBody
|
|
11115
|
+
},
|
|
11116
|
+
{
|
|
11117
|
+
id: "judge_eval_attempt.composite",
|
|
11118
|
+
source: "rubric_judge",
|
|
11119
|
+
header: "Composite arithmetic",
|
|
11120
|
+
body: composite
|
|
11121
|
+
},
|
|
11122
|
+
{
|
|
11123
|
+
id: "judge_eval_attempt.final_output",
|
|
11124
|
+
source: "final_output",
|
|
11125
|
+
body: buildFinalOutputBlock({
|
|
11126
|
+
taskType: "judge_eval_attempt",
|
|
11127
|
+
outputSchemaName: "JudgeEvalAttemptOutput",
|
|
11128
|
+
shapeSketch: [
|
|
11129
|
+
"{",
|
|
11130
|
+
` "targetTaskId": "${input.targetTaskId}",`,
|
|
11131
|
+
` "targetAttemptN": ${input.targetAttemptN},`,
|
|
11132
|
+
" \"variantLabel\": \"<from producer input>\",",
|
|
11133
|
+
" \"scores\": [ { \"criterionId\": \"...\", \"score\": 0..1, \"rationale\": \"...\", \"assertions\": [...]? } ],",
|
|
11134
|
+
" \"composite\": <Σ(weight × score), 0..1>,",
|
|
11135
|
+
" \"verdict\": \"<1-3 sentences>\",",
|
|
11136
|
+
" \"judgeModel\": \"<id>\", // optional",
|
|
11137
|
+
" \"traceparent\": \"<from claim>\"",
|
|
11138
|
+
"}"
|
|
11139
|
+
].join("\n")
|
|
11140
|
+
})
|
|
11141
|
+
}
|
|
11142
|
+
]);
|
|
11002
11143
|
}
|
|
11003
11144
|
//#endregion
|
|
11004
11145
|
//#region ../agent-runtime/src/prompts/judge-pack.ts
|
|
11005
11146
|
function buildJudgePackUserPrompt(input, ctx) {
|
|
11006
11147
|
const { renderedPackId, sourcePackId, successCriteria } = input;
|
|
11007
11148
|
const rubric = successCriteria.rubric;
|
|
11008
|
-
const
|
|
11009
|
-
const preambleSection = renderRubricPreambleSection(rubric);
|
|
11010
|
-
return [
|
|
11149
|
+
const header = [
|
|
11011
11150
|
"# Judge Pack Agent",
|
|
11012
11151
|
"",
|
|
11013
11152
|
"You are an independent judge. You did NOT curate or render the pack",
|
|
@@ -11016,17 +11155,15 @@ function buildJudgePackUserPrompt(input, ctx) {
|
|
|
11016
11155
|
"referenced entries — but do NOT modify anything.",
|
|
11017
11156
|
"",
|
|
11018
11157
|
`Your diary ID is: ${ctx.diaryId}`,
|
|
11019
|
-
`This task's id is: ${ctx.taskId}
|
|
11020
|
-
|
|
11021
|
-
|
|
11022
|
-
"",
|
|
11158
|
+
`This task's id is: ${ctx.taskId}`
|
|
11159
|
+
].join("\n");
|
|
11160
|
+
const target = [
|
|
11023
11161
|
`- **Rendered pack**: \`${renderedPackId}\``,
|
|
11024
11162
|
`- **Source pack**: \`${sourcePackId}\``,
|
|
11025
|
-
`- **Rubric**: \`${rubric.rubricId}\` v${rubric.version}
|
|
11026
|
-
|
|
11027
|
-
|
|
11028
|
-
|
|
11029
|
-
"",
|
|
11163
|
+
`- **Rubric**: \`${rubric.rubricId}\` v${rubric.version}`
|
|
11164
|
+
].join("\n");
|
|
11165
|
+
const preamble = renderRubricPreambleSection(rubric) ?? "";
|
|
11166
|
+
const workflow = [
|
|
11030
11167
|
"1. Call `moltnet_rendered_pack_get` for the rendered pack. Keep the",
|
|
11031
11168
|
" `content` string — you will score it.",
|
|
11032
11169
|
"2. Call `moltnet_pack_get` with `expandEntries: true` for the source",
|
|
@@ -11034,14 +11171,10 @@ function buildJudgePackUserPrompt(input, ctx) {
|
|
|
11034
11171
|
"3. For each criterion, score according to its `scoring` mode (see",
|
|
11035
11172
|
" Scoring rules below). Produce rationales where required.",
|
|
11036
11173
|
"4. Compute `composite = Σ(weight_i × score_i)` and sanity-check it",
|
|
11037
|
-
" equals the sum you will emit — the runtime rejects mismatches."
|
|
11038
|
-
|
|
11039
|
-
|
|
11040
|
-
|
|
11041
|
-
criteriaList,
|
|
11042
|
-
"",
|
|
11043
|
-
"### Scoring rules",
|
|
11044
|
-
"",
|
|
11174
|
+
" equals the sum you will emit — the runtime rejects mismatches."
|
|
11175
|
+
].join("\n");
|
|
11176
|
+
const criteria = renderRubricCriteriaList(rubric);
|
|
11177
|
+
const scoring = [
|
|
11045
11178
|
"- `llm_score`: score 0..1 continuous. `rationale` REQUIRED (2–4",
|
|
11046
11179
|
" sentences pointing at specific evidence in the rendered content or",
|
|
11047
11180
|
" the source entries). NOTE: this mode smooths individual failures",
|
|
@@ -11080,80 +11213,95 @@ function buildJudgePackUserPrompt(input, ctx) {
|
|
|
11080
11213
|
"- `deterministic_coverage_check`: for every source entry, check",
|
|
11081
11214
|
" whether its `entryId` (or a stable reference like title + CID",
|
|
11082
11215
|
" prefix) appears in the rendered `content`. Score 1 iff coverage is",
|
|
11083
|
-
" complete; otherwise 0. Populate `evidence` with `{ covered, total, missing: [entryIds] }`."
|
|
11084
|
-
|
|
11085
|
-
|
|
11086
|
-
"",
|
|
11216
|
+
" complete; otherwise 0. Populate `evidence` with `{ covered, total, missing: [entryIds] }`."
|
|
11217
|
+
].join("\n");
|
|
11218
|
+
const constraints = [
|
|
11087
11219
|
"- Do NOT call `moltnet_pack_create` or `moltnet_pack_render`.",
|
|
11088
11220
|
"- Do NOT fetch the curator's or renderer's task output directly — they",
|
|
11089
11221
|
" may leak guidance that biases judgment.",
|
|
11090
11222
|
"- Keep the session focused on scoring; no speculative exploration.",
|
|
11091
11223
|
"",
|
|
11092
|
-
`Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before
|
|
11093
|
-
|
|
11094
|
-
|
|
11095
|
-
|
|
11096
|
-
|
|
11097
|
-
|
|
11098
|
-
|
|
11099
|
-
|
|
11100
|
-
|
|
11101
|
-
|
|
11102
|
-
|
|
11103
|
-
|
|
11104
|
-
|
|
11105
|
-
|
|
11106
|
-
|
|
11107
|
-
|
|
11108
|
-
|
|
11109
|
-
|
|
11110
|
-
|
|
11111
|
-
|
|
11112
|
-
|
|
11113
|
-
|
|
11114
|
-
|
|
11115
|
-
|
|
11116
|
-
|
|
11117
|
-
|
|
11118
|
-
|
|
11119
|
-
|
|
11120
|
-
|
|
11121
|
-
|
|
11122
|
-
|
|
11224
|
+
`Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before reporting structured output.`
|
|
11225
|
+
].join("\n");
|
|
11226
|
+
return assembleTaskPrompt("judge_pack", [
|
|
11227
|
+
{
|
|
11228
|
+
id: "judge_pack.header",
|
|
11229
|
+
source: "header",
|
|
11230
|
+
body: header
|
|
11231
|
+
},
|
|
11232
|
+
{
|
|
11233
|
+
id: "judge_pack.target",
|
|
11234
|
+
source: "task_input",
|
|
11235
|
+
header: "Target",
|
|
11236
|
+
body: target
|
|
11237
|
+
},
|
|
11238
|
+
{
|
|
11239
|
+
id: "judge_pack.preamble",
|
|
11240
|
+
source: "rubric_judge",
|
|
11241
|
+
body: preamble
|
|
11242
|
+
},
|
|
11243
|
+
{
|
|
11244
|
+
id: "judge_pack.workflow",
|
|
11245
|
+
source: "static",
|
|
11246
|
+
header: "Workflow",
|
|
11247
|
+
body: workflow
|
|
11248
|
+
},
|
|
11249
|
+
{
|
|
11250
|
+
id: "judge_pack.criteria",
|
|
11251
|
+
source: "rubric_judge",
|
|
11252
|
+
header: "Criteria",
|
|
11253
|
+
body: criteria
|
|
11254
|
+
},
|
|
11255
|
+
{
|
|
11256
|
+
id: "judge_pack.scoring",
|
|
11257
|
+
source: "rubric_judge",
|
|
11258
|
+
header: "Scoring rules",
|
|
11259
|
+
body: scoring
|
|
11260
|
+
},
|
|
11261
|
+
{
|
|
11262
|
+
id: "judge_pack.constraints",
|
|
11263
|
+
source: "static",
|
|
11264
|
+
header: "Constraints",
|
|
11265
|
+
body: constraints
|
|
11266
|
+
},
|
|
11267
|
+
{
|
|
11268
|
+
id: "judge_pack.final_output",
|
|
11269
|
+
source: "final_output",
|
|
11270
|
+
body: buildFinalOutputBlock({
|
|
11271
|
+
taskType: "judge_pack",
|
|
11272
|
+
outputSchemaName: "JudgePackOutput",
|
|
11273
|
+
shapeSketch: [
|
|
11274
|
+
"{",
|
|
11275
|
+
" \"scores\": [",
|
|
11276
|
+
" { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} },",
|
|
11277
|
+
" {",
|
|
11278
|
+
" \"criterionId\": \"<llm_checklist criterion>\",",
|
|
11279
|
+
" \"score\": 0, // 1 iff every assertion passed",
|
|
11280
|
+
" \"assertions\": [",
|
|
11281
|
+
" { \"id\": \"claim-1\", \"text\": \"...\", \"passed\": false, \"evidence\": \"...\" }",
|
|
11282
|
+
" ]",
|
|
11283
|
+
" }",
|
|
11284
|
+
" ],",
|
|
11285
|
+
" \"composite\": <sum-of-weighted-scores>,",
|
|
11286
|
+
" \"verdict\": \"<1-3 sentence overall>\",",
|
|
11287
|
+
" \"judgeModel\": \"<provider:model>\",",
|
|
11288
|
+
" \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
|
|
11289
|
+
"}"
|
|
11290
|
+
].join("\n"),
|
|
11291
|
+
extraNotes: [
|
|
11292
|
+
"Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
|
|
11293
|
+
"`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is",
|
|
11294
|
+
"optional and absence is the correct representation when unavailable."
|
|
11295
|
+
]
|
|
11296
|
+
})
|
|
11297
|
+
}
|
|
11298
|
+
]);
|
|
11123
11299
|
}
|
|
11124
11300
|
//#endregion
|
|
11125
11301
|
//#region ../agent-runtime/src/prompts/pr-review.ts
|
|
11126
11302
|
function buildPrReviewUserPrompt(input, ctx) {
|
|
11127
11303
|
const rubric = input.successCriteria.rubric;
|
|
11128
|
-
const
|
|
11129
|
-
const preambleSection = renderRubricPreambleSection(rubric);
|
|
11130
|
-
const taskPromptSection = input.taskPrompt ? [
|
|
11131
|
-
"## Task-specific instructions",
|
|
11132
|
-
"",
|
|
11133
|
-
input.taskPrompt,
|
|
11134
|
-
""
|
|
11135
|
-
].join("\n") : "";
|
|
11136
|
-
const resourceSection = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? [
|
|
11137
|
-
"### Resources",
|
|
11138
|
-
"",
|
|
11139
|
-
...input.subject.resourceUrls.map((url) => `- ${url}`),
|
|
11140
|
-
""
|
|
11141
|
-
].join("\n") : "";
|
|
11142
|
-
const hintsSection = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? [
|
|
11143
|
-
"### Inspection hints",
|
|
11144
|
-
"",
|
|
11145
|
-
...input.subject.inspectionHints.map((hint) => `- ${hint}`),
|
|
11146
|
-
""
|
|
11147
|
-
].join("\n") : "";
|
|
11148
|
-
const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
11149
|
-
"### Workspace",
|
|
11150
|
-
"",
|
|
11151
|
-
"This review attempt is running inside a dedicated disposable git",
|
|
11152
|
-
"worktree. Inspect and reason inside this workspace only.",
|
|
11153
|
-
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends.",
|
|
11154
|
-
""
|
|
11155
|
-
].join("\n") : "";
|
|
11156
|
-
return [
|
|
11304
|
+
const header = [
|
|
11157
11305
|
"# Review Agent",
|
|
11158
11306
|
"",
|
|
11159
11307
|
"You are an independent judge. You did NOT produce the subject under review.",
|
|
@@ -11161,29 +11309,30 @@ function buildPrReviewUserPrompt(input, ctx) {
|
|
|
11161
11309
|
"You may inspect the local workspace and the referenced resources, but do NOT modify anything.",
|
|
11162
11310
|
"",
|
|
11163
11311
|
`Your diary ID is: ${ctx.diaryId}`,
|
|
11164
|
-
`This task's id is: ${ctx.taskId}
|
|
11165
|
-
|
|
11166
|
-
|
|
11167
|
-
"",
|
|
11312
|
+
`This task's id is: ${ctx.taskId}`
|
|
11313
|
+
].join("\n");
|
|
11314
|
+
const subject = [
|
|
11168
11315
|
`**Title:** ${input.subject.title}`,
|
|
11169
11316
|
"",
|
|
11170
|
-
input.subject.summary
|
|
11171
|
-
|
|
11172
|
-
|
|
11173
|
-
|
|
11174
|
-
|
|
11175
|
-
"
|
|
11176
|
-
"",
|
|
11317
|
+
input.subject.summary
|
|
11318
|
+
].join("\n");
|
|
11319
|
+
const resources = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? input.subject.resourceUrls.map((url) => `- ${url}`).join("\n") : "";
|
|
11320
|
+
const hints = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? input.subject.inspectionHints.map((hint) => `- ${hint}`).join("\n") : "";
|
|
11321
|
+
const workspace = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
11322
|
+
"This review attempt is running inside a dedicated disposable git",
|
|
11323
|
+
"worktree. Inspect and reason inside this workspace only.",
|
|
11324
|
+
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends."
|
|
11325
|
+
].join("\n") : "";
|
|
11326
|
+
const executionContract = [
|
|
11177
11327
|
"Treat the provided subject, resources, inspection hints, and any",
|
|
11178
11328
|
"task-specific instructions as the full",
|
|
11179
11329
|
"review contract for this task.",
|
|
11180
11330
|
"",
|
|
11181
11331
|
"If the task-specific instructions or inspection hints require an outward action tied to the review",
|
|
11182
11332
|
"(for example publishing the judgment somewhere), perform that action as",
|
|
11183
|
-
"part of the task before reporting structured output."
|
|
11184
|
-
|
|
11185
|
-
|
|
11186
|
-
"",
|
|
11333
|
+
"part of the task before reporting structured output."
|
|
11334
|
+
].join("\n");
|
|
11335
|
+
const workflow = [
|
|
11187
11336
|
"1. Read the subject summary, resources, inspection hints, and any",
|
|
11188
11337
|
" task-specific instructions before scoring.",
|
|
11189
11338
|
"2. Inspect the target artefact directly using the tools and resources the",
|
|
@@ -11193,39 +11342,104 @@ function buildPrReviewUserPrompt(input, ctx) {
|
|
|
11193
11342
|
"4. Apply the rubric strictly. This task is about complexity and",
|
|
11194
11343
|
" reviewability, not correctness or feature desirability.",
|
|
11195
11344
|
"5. Perform any required outward action before emitting the final",
|
|
11196
|
-
" structured output."
|
|
11197
|
-
|
|
11198
|
-
|
|
11199
|
-
|
|
11200
|
-
|
|
11201
|
-
|
|
11202
|
-
criteriaList,
|
|
11203
|
-
"",
|
|
11204
|
-
"### Scoring rules",
|
|
11205
|
-
"",
|
|
11345
|
+
" structured output."
|
|
11346
|
+
].join("\n");
|
|
11347
|
+
const taskPromptSection = input.taskPrompt ?? "";
|
|
11348
|
+
const preamble = renderRubricPreambleSection(rubric) ?? "";
|
|
11349
|
+
const criteria = renderRubricCriteriaList(rubric);
|
|
11350
|
+
const scoring = [
|
|
11206
11351
|
"- Every criterion uses binary scoring only.",
|
|
11207
11352
|
"- Score `1` when the subject clearly clears the criterion.",
|
|
11208
11353
|
"- Score `0` when it does not, or when the evidence is ambiguous.",
|
|
11209
11354
|
"- `rationale` is REQUIRED for every score. Keep it concrete and audit-friendly.",
|
|
11210
11355
|
"- Compute `composite = Σ(weight_i × score_i)` exactly; the runtime rejects mismatches.",
|
|
11211
11356
|
"",
|
|
11212
|
-
"Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output."
|
|
11213
|
-
|
|
11214
|
-
|
|
11215
|
-
|
|
11216
|
-
|
|
11217
|
-
|
|
11218
|
-
|
|
11219
|
-
|
|
11220
|
-
|
|
11221
|
-
|
|
11222
|
-
|
|
11223
|
-
|
|
11224
|
-
|
|
11225
|
-
|
|
11226
|
-
|
|
11227
|
-
|
|
11228
|
-
|
|
11357
|
+
"Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output."
|
|
11358
|
+
].join("\n");
|
|
11359
|
+
return assembleTaskPrompt("pr_review", [
|
|
11360
|
+
{
|
|
11361
|
+
id: "pr_review.header",
|
|
11362
|
+
source: "header",
|
|
11363
|
+
body: header
|
|
11364
|
+
},
|
|
11365
|
+
{
|
|
11366
|
+
id: "pr_review.subject",
|
|
11367
|
+
source: "task_input",
|
|
11368
|
+
header: "Subject",
|
|
11369
|
+
body: subject
|
|
11370
|
+
},
|
|
11371
|
+
{
|
|
11372
|
+
id: "pr_review.resources",
|
|
11373
|
+
source: "task_input",
|
|
11374
|
+
header: "Resources",
|
|
11375
|
+
body: resources
|
|
11376
|
+
},
|
|
11377
|
+
{
|
|
11378
|
+
id: "pr_review.hints",
|
|
11379
|
+
source: "task_input",
|
|
11380
|
+
header: "Inspection hints",
|
|
11381
|
+
body: hints
|
|
11382
|
+
},
|
|
11383
|
+
{
|
|
11384
|
+
id: "pr_review.workspace",
|
|
11385
|
+
source: "workspace",
|
|
11386
|
+
header: "Workspace",
|
|
11387
|
+
body: workspace
|
|
11388
|
+
},
|
|
11389
|
+
{
|
|
11390
|
+
id: "pr_review.execution_contract",
|
|
11391
|
+
source: "static",
|
|
11392
|
+
header: "Execution contract",
|
|
11393
|
+
body: executionContract
|
|
11394
|
+
},
|
|
11395
|
+
{
|
|
11396
|
+
id: "pr_review.workflow",
|
|
11397
|
+
source: "static",
|
|
11398
|
+
header: "Review workflow",
|
|
11399
|
+
body: workflow
|
|
11400
|
+
},
|
|
11401
|
+
{
|
|
11402
|
+
id: "pr_review.task_prompt",
|
|
11403
|
+
source: "task_input",
|
|
11404
|
+
header: "Task-specific instructions",
|
|
11405
|
+
body: taskPromptSection
|
|
11406
|
+
},
|
|
11407
|
+
{
|
|
11408
|
+
id: "pr_review.preamble",
|
|
11409
|
+
source: "rubric_judge",
|
|
11410
|
+
body: preamble
|
|
11411
|
+
},
|
|
11412
|
+
{
|
|
11413
|
+
id: "pr_review.criteria",
|
|
11414
|
+
source: "rubric_judge",
|
|
11415
|
+
header: "Criteria",
|
|
11416
|
+
body: criteria
|
|
11417
|
+
},
|
|
11418
|
+
{
|
|
11419
|
+
id: "pr_review.scoring",
|
|
11420
|
+
source: "rubric_judge",
|
|
11421
|
+
header: "Scoring rules",
|
|
11422
|
+
body: scoring
|
|
11423
|
+
},
|
|
11424
|
+
{
|
|
11425
|
+
id: "pr_review.final_output",
|
|
11426
|
+
source: "final_output",
|
|
11427
|
+
body: buildFinalOutputBlock({
|
|
11428
|
+
taskType: "pr_review",
|
|
11429
|
+
outputSchemaName: "PrReviewOutput",
|
|
11430
|
+
shapeSketch: [
|
|
11431
|
+
"{",
|
|
11432
|
+
" \"scores\": [",
|
|
11433
|
+
" { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
|
|
11434
|
+
" ],",
|
|
11435
|
+
" \"composite\": <sum-of-weighted-binary-scores>,",
|
|
11436
|
+
" \"verdict\": \"<1-3 sentence overall>\"",
|
|
11437
|
+
"}"
|
|
11438
|
+
].join("\n"),
|
|
11439
|
+
extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
|
|
11440
|
+
})
|
|
11441
|
+
}
|
|
11442
|
+
]);
|
|
11229
11443
|
}
|
|
11230
11444
|
//#endregion
|
|
11231
11445
|
//#region ../agent-runtime/src/prompts/render-pack.ts
|
|
@@ -11235,7 +11449,7 @@ function buildPrReviewUserPrompt(input, ctx) {
|
|
|
11235
11449
|
*/
|
|
11236
11450
|
function buildRenderPackUserPrompt(input, ctx) {
|
|
11237
11451
|
const { packId, persist = true, pinned = false } = input;
|
|
11238
|
-
|
|
11452
|
+
const header = [
|
|
11239
11453
|
"# Render Pack Agent",
|
|
11240
11454
|
"",
|
|
11241
11455
|
"You are rendering a context pack to markdown. Step 2 of the",
|
|
@@ -11243,16 +11457,14 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11243
11457
|
"a third will judge the rendering. You must NOT judge it here.",
|
|
11244
11458
|
"",
|
|
11245
11459
|
`Your agent-session diary ID is: ${ctx.diaryId}`,
|
|
11246
|
-
`This task's id is: ${ctx.taskId}
|
|
11247
|
-
|
|
11248
|
-
|
|
11249
|
-
"",
|
|
11460
|
+
`This task's id is: ${ctx.taskId}`
|
|
11461
|
+
].join("\n");
|
|
11462
|
+
const inputBlock = [
|
|
11250
11463
|
`- **Pack**: \`${packId}\``,
|
|
11251
11464
|
`- **Persist**: \`${persist}\``,
|
|
11252
|
-
`- **Pinned**: \`${pinned}
|
|
11253
|
-
|
|
11254
|
-
|
|
11255
|
-
"",
|
|
11465
|
+
`- **Pinned**: \`${pinned}\``
|
|
11466
|
+
].join("\n");
|
|
11467
|
+
const workflow = [
|
|
11256
11468
|
"1. Call `moltnet_pack_get` with `expandEntries: true` to inspect the",
|
|
11257
11469
|
" source entries. Read it — you need the entry count for your output.",
|
|
11258
11470
|
"2. Call `moltnet_pack_render` with:",
|
|
@@ -11260,16 +11472,14 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11260
11472
|
` - \`persist\`: \`${persist}\``,
|
|
11261
11473
|
` - \`pinned\`: \`${pinned}\``,
|
|
11262
11474
|
" Record the returned `renderedPackId`, `cid`, `renderMethod`, and",
|
|
11263
|
-
" `content` byte length."
|
|
11264
|
-
|
|
11265
|
-
|
|
11266
|
-
"",
|
|
11475
|
+
" `content` byte length."
|
|
11476
|
+
].join("\n");
|
|
11477
|
+
const constraints = [
|
|
11267
11478
|
"- Do NOT modify the source pack or its entries.",
|
|
11268
11479
|
"- Do NOT write diary entries unless a genuine incident occurs",
|
|
11269
|
-
" (rendering failure, invariant violation)."
|
|
11270
|
-
|
|
11271
|
-
|
|
11272
|
-
"",
|
|
11480
|
+
" (rendering failure, invariant violation)."
|
|
11481
|
+
].join("\n");
|
|
11482
|
+
const fidelity = [
|
|
11273
11483
|
"These rules apply when you are producing the markdown yourself rather",
|
|
11274
11484
|
"than relying on a deterministic `server:*` renderer.",
|
|
11275
11485
|
"",
|
|
@@ -11289,25 +11499,63 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11289
11499
|
" completeness. Optimize for \"no detectable drift across a",
|
|
11290
11500
|
" claim-by-claim audit\", not \"shorter at any cost\". When compressing, prefer",
|
|
11291
11501
|
" tightening prose around a quote rather than altering the quote,",
|
|
11292
|
-
" and prefer summarising a list over silently truncating it."
|
|
11293
|
-
"",
|
|
11294
|
-
buildSelfVerificationBlock(ctx.taskId),
|
|
11295
|
-
buildFinalOutputBlock({
|
|
11296
|
-
taskType: "render_pack",
|
|
11297
|
-
outputSchemaName: "RenderPackOutput",
|
|
11298
|
-
shapeSketch: [
|
|
11299
|
-
"{",
|
|
11300
|
-
" \"renderedPackId\": \"<uuid-or-null>\",",
|
|
11301
|
-
" \"renderedCid\": \"<cid>\",",
|
|
11302
|
-
" \"renderMethod\": \"<label>\",",
|
|
11303
|
-
" \"byteSize\": <int>,",
|
|
11304
|
-
" \"entriesRendered\": <int>,",
|
|
11305
|
-
" \"summary\": \"<1-3 sentence recap>\",",
|
|
11306
|
-
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
11307
|
-
"}"
|
|
11308
|
-
].join("\n")
|
|
11309
|
-
})
|
|
11502
|
+
" and prefer summarising a list over silently truncating it."
|
|
11310
11503
|
].join("\n");
|
|
11504
|
+
return assembleTaskPrompt("render_pack", [
|
|
11505
|
+
{
|
|
11506
|
+
id: "render_pack.header",
|
|
11507
|
+
source: "header",
|
|
11508
|
+
body: header
|
|
11509
|
+
},
|
|
11510
|
+
{
|
|
11511
|
+
id: "render_pack.input",
|
|
11512
|
+
source: "task_input",
|
|
11513
|
+
header: "Input",
|
|
11514
|
+
body: inputBlock
|
|
11515
|
+
},
|
|
11516
|
+
{
|
|
11517
|
+
id: "render_pack.workflow",
|
|
11518
|
+
source: "static",
|
|
11519
|
+
header: "Workflow",
|
|
11520
|
+
body: workflow
|
|
11521
|
+
},
|
|
11522
|
+
{
|
|
11523
|
+
id: "render_pack.constraints",
|
|
11524
|
+
source: "static",
|
|
11525
|
+
header: "Constraints",
|
|
11526
|
+
body: constraints
|
|
11527
|
+
},
|
|
11528
|
+
{
|
|
11529
|
+
id: "render_pack.fidelity",
|
|
11530
|
+
source: "static",
|
|
11531
|
+
header: "Fidelity Discipline",
|
|
11532
|
+
body: fidelity
|
|
11533
|
+
},
|
|
11534
|
+
{
|
|
11535
|
+
id: "render_pack.verification",
|
|
11536
|
+
source: "verification",
|
|
11537
|
+
body: buildSelfVerificationBlock(ctx.taskId)
|
|
11538
|
+
},
|
|
11539
|
+
{
|
|
11540
|
+
id: "render_pack.final_output",
|
|
11541
|
+
source: "final_output",
|
|
11542
|
+
body: buildFinalOutputBlock({
|
|
11543
|
+
taskType: "render_pack",
|
|
11544
|
+
outputSchemaName: "RenderPackOutput",
|
|
11545
|
+
shapeSketch: [
|
|
11546
|
+
"{",
|
|
11547
|
+
" \"renderedPackId\": \"<uuid-or-null>\",",
|
|
11548
|
+
" \"renderedCid\": \"<cid>\",",
|
|
11549
|
+
" \"renderMethod\": \"<label>\",",
|
|
11550
|
+
" \"byteSize\": <int>,",
|
|
11551
|
+
" \"entriesRendered\": <int>,",
|
|
11552
|
+
" \"summary\": \"<1-3 sentence recap>\",",
|
|
11553
|
+
" \"verification\": <required iff input.successCriteria; see Self-verification>",
|
|
11554
|
+
"}"
|
|
11555
|
+
].join("\n")
|
|
11556
|
+
})
|
|
11557
|
+
}
|
|
11558
|
+
]);
|
|
11311
11559
|
}
|
|
11312
11560
|
//#endregion
|
|
11313
11561
|
//#region ../agent-runtime/src/prompts/run-eval.ts
|
|
@@ -11316,8 +11564,7 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11316
11564
|
*
|
|
11317
11565
|
* Free-form: no git workflow, no commit ceremony. The executor produces
|
|
11318
11566
|
* a textual response (and optional file artifacts) that later
|
|
11319
|
-
* `judge_eval_attempt` task(s) grade against their own hidden
|
|
11320
|
-
* rubric.
|
|
11567
|
+
* `judge_eval_attempt` task(s) grade against their own hidden rubric.
|
|
11321
11568
|
*
|
|
11322
11569
|
* Context delivery is handled by `resolveTaskContext` (see
|
|
11323
11570
|
* libs/agent-runtime/src/context-bindings.ts) and runs BEFORE this
|
|
@@ -11325,50 +11572,44 @@ function buildRenderPackUserPrompt(input, ctx) {
|
|
|
11325
11572
|
* the body, `skill` items are persisted at the runtime's skill path,
|
|
11326
11573
|
* and `user_inline` items are appended to the first user message. This
|
|
11327
11574
|
* builder does NOT inline `input.context[]` itself.
|
|
11575
|
+
*
|
|
11576
|
+
* Prompt-shape notes (issue #1175, area 1):
|
|
11577
|
+
* - No `Correlation` section: the agent never acts on it. The id is
|
|
11578
|
+
* still carried on attempt event metadata for cross-variant queries.
|
|
11579
|
+
* - No `Execution mode` section: the workspace already reflects the
|
|
11580
|
+
* chosen mode by its shape (scratch/shared mount/dedicated worktree).
|
|
11581
|
+
* Restating it as text adds noise without changing model behavior.
|
|
11582
|
+
* - The "Injected Task Context" phrase is used identically in this
|
|
11583
|
+
* prompt's discipline section and in the materialized context block
|
|
11584
|
+
* header (see context-bindings.ts) so weaker models see one repeated
|
|
11585
|
+
* anchor.
|
|
11586
|
+
* - The discipline copy demands the model encode injected constraints
|
|
11587
|
+
* into the code path itself, not into comments or the verification
|
|
11588
|
+
* field. Quoting the constraint back is not following the task.
|
|
11328
11589
|
*/
|
|
11329
11590
|
function buildRunEvalUserPrompt(input, ctx) {
|
|
11330
|
-
const { scenario, variantLabel,
|
|
11591
|
+
const { scenario, variantLabel, successCriteria } = input;
|
|
11331
11592
|
const hasContext = input.context.length > 0;
|
|
11332
11593
|
const hasInlineContext = input.context.some((entry) => entry.binding === "context_inline");
|
|
11333
|
-
const
|
|
11334
|
-
|
|
11594
|
+
const header = `# Run Eval Agent\n\nYou are running an evaluation scenario as variant \`${variantLabel}\`.\nTask id: \`${ctx.taskId}\``;
|
|
11595
|
+
const contextDiscipline = hasContext ? [
|
|
11596
|
+
"This task includes Injected Task Context supplied by the task",
|
|
11597
|
+
"creator. You MUST inspect it BEFORE you write solution files or",
|
|
11598
|
+
"draft your final answer — not after.",
|
|
11335
11599
|
"",
|
|
11336
|
-
|
|
11337
|
-
""
|
|
11600
|
+
"Reconcile every constraint from that context **into the code path",
|
|
11601
|
+
"itself**: function bodies, control flow, transaction boundaries,",
|
|
11602
|
+
"guard clauses. Quoting a constraint back in a comment, a",
|
|
11603
|
+
"`// note:` line, the task summary, or the `verification` field is",
|
|
11604
|
+
"NOT following the task. If the constraint affects behavior, it",
|
|
11605
|
+
"must affect behavior.",
|
|
11606
|
+
hasInlineContext ? "For `context_inline`, your FIRST content-inspection step is a `read` of `/workspace/context-pack.md` before your first `write` call. The same content is also mirrored in `/workspace/AGENTS.md` and may be referenced from `/workspace/.claude/CLAUDE.md`." : "When the context is delivered as a skill, inspect it before solving.",
|
|
11607
|
+
"If the Injected Task Context contains repo- or workflow-specific",
|
|
11608
|
+
"rules, those rules override your generic instincts."
|
|
11338
11609
|
].join("\n") : "";
|
|
11339
|
-
const
|
|
11340
|
-
const
|
|
11341
|
-
|
|
11342
|
-
"",
|
|
11343
|
-
`This task carries correlationId \`${ctx.correlationId}\`. It joins`,
|
|
11344
|
-
"this variant to its sibling `run_eval` tasks (other variants of the",
|
|
11345
|
-
"same scenario and to any later `judge_eval_attempt` tasks created",
|
|
11346
|
-
"against those variants. You do not need to act on it directly — it",
|
|
11347
|
-
"is recorded for cross-variant aggregation at query time.",
|
|
11348
|
-
""
|
|
11349
|
-
].join("\n") : "";
|
|
11350
|
-
const executionSection = [
|
|
11351
|
-
"### Execution mode",
|
|
11352
|
-
"",
|
|
11353
|
-
`Mode: \`${execution.mode}\``,
|
|
11354
|
-
`Workspace: \`${execution.workspace}\``,
|
|
11355
|
-
execution.workspace === "none" ? "You are running in a scratch workspace with no repository checkout mounted. Do not assume git history or repo files are present unless the scenario provided them explicitly." : execution.workspace === "shared_mount" ? "You are running against the daemon shared mount. Treat any repository mutations as affecting the mounted checkout directly." : "You are running in a dedicated disposable git worktree isolated from the daemon shared checkout.",
|
|
11356
|
-
""
|
|
11357
|
-
].join("\n");
|
|
11358
|
-
const contextDisciplineSection = hasContext ? [
|
|
11359
|
-
"### Injected context discipline",
|
|
11360
|
-
"",
|
|
11361
|
-
"This task includes extra injected context from the task creator.",
|
|
11362
|
-
"You MUST inspect and use that context BEFORE you write solution",
|
|
11363
|
-
"files or draft your final answer.",
|
|
11364
|
-
"Do not solve first and only review the context afterward.",
|
|
11365
|
-
hasInlineContext ? "For `context_inline`, your FIRST content-inspection step should be a `read` of `/workspace/context-pack.md` before your first `write` call. The same content is also mirrored in `/workspace/AGENTS.md` and may be referenced from `/workspace/.claude/CLAUDE.md`." : "If injected context was provided as a skill, inspect that task-injected context before solving.",
|
|
11366
|
-
hasInlineContext ? "If `/workspace/context-pack.md` exists and you skip reading it before writing solution files, you are not following the task instructions." : "Do not rely on memory alone when task-injected context is available; inspect it first.",
|
|
11367
|
-
"If the injected context contains repo- or workflow-specific rules,",
|
|
11368
|
-
"those rules override your generic instincts.",
|
|
11369
|
-
""
|
|
11370
|
-
].join("\n") : "";
|
|
11371
|
-
const finalOutputBlock = buildFinalOutputBlock({
|
|
11610
|
+
const inputFiles = scenario.inputFiles?.length ? scenario.inputFiles.map((f) => `- \`${f}\``).join("\n") : "";
|
|
11611
|
+
const verification = successCriteria ? buildSelfVerificationBlock(ctx.taskId) : "";
|
|
11612
|
+
const finalOutput = buildFinalOutputBlock({
|
|
11372
11613
|
taskType: "run_eval",
|
|
11373
11614
|
outputSchemaName: "RunEvalOutput",
|
|
11374
11615
|
shapeSketch: [
|
|
@@ -11388,17 +11629,41 @@ function buildRunEvalUserPrompt(input, ctx) {
|
|
|
11388
11629
|
"}"
|
|
11389
11630
|
].join("\n")
|
|
11390
11631
|
});
|
|
11391
|
-
return [
|
|
11392
|
-
|
|
11393
|
-
|
|
11394
|
-
|
|
11395
|
-
|
|
11396
|
-
|
|
11397
|
-
|
|
11398
|
-
|
|
11399
|
-
|
|
11400
|
-
|
|
11401
|
-
|
|
11632
|
+
return assembleTaskPrompt("run_eval", [
|
|
11633
|
+
{
|
|
11634
|
+
id: "run_eval.header",
|
|
11635
|
+
source: "header",
|
|
11636
|
+
body: header
|
|
11637
|
+
},
|
|
11638
|
+
{
|
|
11639
|
+
id: "run_eval.context_discipline",
|
|
11640
|
+
source: "discipline",
|
|
11641
|
+
header: "Injected Task Context",
|
|
11642
|
+
body: contextDiscipline
|
|
11643
|
+
},
|
|
11644
|
+
{
|
|
11645
|
+
id: "run_eval.scenario",
|
|
11646
|
+
source: "task_input",
|
|
11647
|
+
header: "Scenario",
|
|
11648
|
+
body: scenario.prompt
|
|
11649
|
+
},
|
|
11650
|
+
{
|
|
11651
|
+
id: "run_eval.input_files",
|
|
11652
|
+
source: "task_input",
|
|
11653
|
+
header: "Input files",
|
|
11654
|
+
body: inputFiles
|
|
11655
|
+
},
|
|
11656
|
+
{
|
|
11657
|
+
id: "run_eval.verification",
|
|
11658
|
+
source: "verification",
|
|
11659
|
+
body: verification
|
|
11660
|
+
},
|
|
11661
|
+
{
|
|
11662
|
+
id: "run_eval.final_output",
|
|
11663
|
+
source: "final_output",
|
|
11664
|
+
body: finalOutput
|
|
11665
|
+
}
|
|
11666
|
+
]);
|
|
11402
11667
|
}
|
|
11403
11668
|
//#endregion
|
|
11404
11669
|
//#region ../agent-runtime/src/prompts/index.ts
|
|
@@ -16019,7 +16284,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
16019
16284
|
});
|
|
16020
16285
|
let taskPrompt;
|
|
16021
16286
|
try {
|
|
16022
|
-
|
|
16287
|
+
const assembled = buildTaskUserPrompt(task, {
|
|
16023
16288
|
diaryId,
|
|
16024
16289
|
taskId: task.id,
|
|
16025
16290
|
workspace: {
|
|
@@ -16030,6 +16295,12 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
16030
16295
|
},
|
|
16031
16296
|
extras: opts.promptExtras
|
|
16032
16297
|
});
|
|
16298
|
+
taskPrompt = assembled.text;
|
|
16299
|
+
await emit("info", {
|
|
16300
|
+
event: "prompt_assembled",
|
|
16301
|
+
taskType: assembled.taskType,
|
|
16302
|
+
sections: assembled.trace
|
|
16303
|
+
});
|
|
16033
16304
|
} catch (err) {
|
|
16034
16305
|
const message = err instanceof Error ? err.message : String(err);
|
|
16035
16306
|
await emit("error", {
|
|
@@ -16292,8 +16563,8 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
16292
16563
|
}
|
|
16293
16564
|
else if (submitToolHandle) {
|
|
16294
16565
|
parseError = {
|
|
16295
|
-
code: "
|
|
16296
|
-
message: "Agent did not
|
|
16566
|
+
code: "submit_output_missing",
|
|
16567
|
+
message: "Agent did not satisfy the promised submit-output criterion: no valid task submit tool call was captured before the session ended."
|
|
16297
16568
|
};
|
|
16298
16569
|
await emit("error", {
|
|
16299
16570
|
message: parseError.message,
|