@themoltnet/pi-extension 0.18.0 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +212 -14
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -9619,6 +9619,72 @@ async function onCreateJudgeEvalVariant(input, ctx) {
|
|
|
9619
9619
|
}];
|
|
9620
9620
|
}
|
|
9621
9621
|
//#endregion
|
|
9622
|
+
//#region ../tasks/src/task-types/pr-review.ts
|
|
9623
|
+
var PR_REVIEW_TYPE = "pr_review";
|
|
9624
|
+
var PrReviewSubject = Type$1.Object({
|
|
9625
|
+
title: Type$1.String({ minLength: 1 }),
|
|
9626
|
+
summary: Type$1.String({ minLength: 1 }),
|
|
9627
|
+
resourceUrls: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 }))),
|
|
9628
|
+
inspectionHints: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
|
|
9629
|
+
}, {
|
|
9630
|
+
$id: "PrReviewSubject",
|
|
9631
|
+
additionalProperties: false
|
|
9632
|
+
});
|
|
9633
|
+
var PrReviewInput = Type$1.Object({
|
|
9634
|
+
subject: PrReviewSubject,
|
|
9635
|
+
taskPrompt: Type$1.Optional(Type$1.String({ minLength: 1 })),
|
|
9636
|
+
successCriteria: SuccessCriteria
|
|
9637
|
+
}, {
|
|
9638
|
+
$id: "PrReviewInput",
|
|
9639
|
+
additionalProperties: false
|
|
9640
|
+
});
|
|
9641
|
+
var PrReviewScore = Type$1.Object({
|
|
9642
|
+
criterionId: Type$1.String({ minLength: 1 }),
|
|
9643
|
+
score: Type$1.Union([Type$1.Literal(0), Type$1.Literal(1)]),
|
|
9644
|
+
rationale: Type$1.String({ minLength: 1 })
|
|
9645
|
+
}, {
|
|
9646
|
+
$id: "PrReviewScore",
|
|
9647
|
+
additionalProperties: false
|
|
9648
|
+
});
|
|
9649
|
+
var PrReviewOutput = Type$1.Object({
|
|
9650
|
+
scores: Type$1.Array(PrReviewScore, { minItems: 1 }),
|
|
9651
|
+
composite: Type$1.Number({
|
|
9652
|
+
minimum: 0,
|
|
9653
|
+
maximum: 1
|
|
9654
|
+
}),
|
|
9655
|
+
verdict: Type$1.String({ minLength: 1 })
|
|
9656
|
+
}, {
|
|
9657
|
+
$id: "PrReviewOutput",
|
|
9658
|
+
additionalProperties: false
|
|
9659
|
+
});
|
|
9660
|
+
function requireBooleanRubric(rubric) {
|
|
9661
|
+
for (const criterion of rubric.criteria) if (criterion.scoring !== "boolean") return `pr_review requires boolean scoring for every rubric criterion; criterion "${criterion.id}" uses "${criterion.scoring}"`;
|
|
9662
|
+
return null;
|
|
9663
|
+
}
|
|
9664
|
+
function validatePrReviewInput(input) {
|
|
9665
|
+
const sc = input.successCriteria;
|
|
9666
|
+
if (!sc) return "successCriteria is required for judgment tasks";
|
|
9667
|
+
if (!sc.rubric) return "successCriteria.rubric is required for judgment tasks";
|
|
9668
|
+
return validateRubricWeights(sc.rubric) ?? requireBooleanRubric(sc.rubric);
|
|
9669
|
+
}
|
|
9670
|
+
function validatePrReviewOutput(output, input) {
|
|
9671
|
+
if (!input) return null;
|
|
9672
|
+
const scores = output.scores;
|
|
9673
|
+
const rubric = input.successCriteria.rubric;
|
|
9674
|
+
if (!rubric) return null;
|
|
9675
|
+
if (scores.length !== rubric.criteria.length) return `scores length ${scores.length} does not match rubric criteria length ${rubric.criteria.length}`;
|
|
9676
|
+
let composite = 0;
|
|
9677
|
+
for (let i = 0; i < rubric.criteria.length; i++) {
|
|
9678
|
+
const criterion = rubric.criteria[i];
|
|
9679
|
+
const score = scores[i];
|
|
9680
|
+
if (score.criterionId !== criterion.id) return `scores[${i}] has criterionId "${score.criterionId}" but rubric expects "${criterion.id}" in that position`;
|
|
9681
|
+
composite += criterion.weight * score.score;
|
|
9682
|
+
}
|
|
9683
|
+
const claimed = output.composite;
|
|
9684
|
+
if (Math.abs(claimed - composite) > 1e-6) return `composite ${claimed} does not match weighted sum ${composite.toFixed(6)}`;
|
|
9685
|
+
return null;
|
|
9686
|
+
}
|
|
9687
|
+
//#endregion
|
|
9622
9688
|
//#region ../tasks/src/task-types/render-pack.ts
|
|
9623
9689
|
/**
|
|
9624
9690
|
* `render_pack` — turn a context pack into a signed rendered artefact.
|
|
@@ -9786,6 +9852,18 @@ var BUILT_IN_TASK_TYPES = {
|
|
|
9786
9852
|
validateInput: validateJudgmentInput,
|
|
9787
9853
|
validateInputAsync: validateAssessBriefInputAsync
|
|
9788
9854
|
},
|
|
9855
|
+
[PR_REVIEW_TYPE]: {
|
|
9856
|
+
name: PR_REVIEW_TYPE,
|
|
9857
|
+
inputSchema: PrReviewInput,
|
|
9858
|
+
outputSchema: PrReviewOutput,
|
|
9859
|
+
outputKind: "judgment",
|
|
9860
|
+
workspaceMode: "dedicated_worktree",
|
|
9861
|
+
workspaceScope: "attempt",
|
|
9862
|
+
sessionScope: "none",
|
|
9863
|
+
requiresReferences: false,
|
|
9864
|
+
validateInput: validatePrReviewInput,
|
|
9865
|
+
validateOutput: validatePrReviewOutput
|
|
9866
|
+
},
|
|
9789
9867
|
[CURATE_PACK_TYPE]: {
|
|
9790
9868
|
name: CURATE_PACK_TYPE,
|
|
9791
9869
|
inputSchema: CuratePackInput,
|
|
@@ -10233,6 +10311,20 @@ function buildFinalOutputBlock(opts) {
|
|
|
10233
10311
|
return lines.join("\n");
|
|
10234
10312
|
}
|
|
10235
10313
|
//#endregion
|
|
10314
|
+
//#region ../agent-runtime/src/prompts/rubric-common.ts
|
|
10315
|
+
function renderRubricCriteriaList(rubric) {
|
|
10316
|
+
return rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
|
|
10317
|
+
}
|
|
10318
|
+
function renderRubricPreambleSection(rubric) {
|
|
10319
|
+
if (!rubric.preamble) return null;
|
|
10320
|
+
return [
|
|
10321
|
+
"### Rubric preamble",
|
|
10322
|
+
"",
|
|
10323
|
+
rubric.preamble,
|
|
10324
|
+
""
|
|
10325
|
+
].join("\n");
|
|
10326
|
+
}
|
|
10327
|
+
//#endregion
|
|
10236
10328
|
//#region ../agent-runtime/src/prompts/assess-brief.ts
|
|
10237
10329
|
/**
|
|
10238
10330
|
* Build the first user-message prompt for an `assess_brief` judge attempt.
|
|
@@ -10258,13 +10350,8 @@ function buildFinalOutputBlock(opts) {
|
|
|
10258
10350
|
*/
|
|
10259
10351
|
function buildAssessBriefUserPrompt(input, ctx) {
|
|
10260
10352
|
const rubric = input.successCriteria.rubric;
|
|
10261
|
-
const criteriaList = rubric
|
|
10262
|
-
const preambleSection = rubric
|
|
10263
|
-
"### Rubric preamble",
|
|
10264
|
-
"",
|
|
10265
|
-
rubric.preamble,
|
|
10266
|
-
""
|
|
10267
|
-
].join("\n") : "";
|
|
10353
|
+
const criteriaList = renderRubricCriteriaList(rubric);
|
|
10354
|
+
const preambleSection = renderRubricPreambleSection(rubric) ?? "";
|
|
10268
10355
|
const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10269
10356
|
"### Workspace",
|
|
10270
10357
|
"",
|
|
@@ -10732,13 +10819,8 @@ function buildJudgeEvalVariantUserPrompt(input, ctx) {
|
|
|
10732
10819
|
function buildJudgePackUserPrompt(input, ctx) {
|
|
10733
10820
|
const { renderedPackId, sourcePackId, successCriteria } = input;
|
|
10734
10821
|
const rubric = successCriteria.rubric;
|
|
10735
|
-
const criteriaList = rubric
|
|
10736
|
-
const preambleSection = rubric
|
|
10737
|
-
"### Rubric preamble",
|
|
10738
|
-
"",
|
|
10739
|
-
rubric.preamble,
|
|
10740
|
-
""
|
|
10741
|
-
].join("\n") : null;
|
|
10822
|
+
const criteriaList = renderRubricCriteriaList(rubric);
|
|
10823
|
+
const preambleSection = renderRubricPreambleSection(rubric);
|
|
10742
10824
|
return [
|
|
10743
10825
|
"# Judge Pack Agent",
|
|
10744
10826
|
"",
|
|
@@ -10854,6 +10936,112 @@ function buildJudgePackUserPrompt(input, ctx) {
|
|
|
10854
10936
|
].filter((l) => l !== null).join("\n");
|
|
10855
10937
|
}
|
|
10856
10938
|
//#endregion
|
|
10939
|
+
//#region ../agent-runtime/src/prompts/pr-review.ts
|
|
10940
|
+
function buildPrReviewUserPrompt(input, ctx) {
|
|
10941
|
+
const rubric = input.successCriteria.rubric;
|
|
10942
|
+
const criteriaList = renderRubricCriteriaList(rubric);
|
|
10943
|
+
const preambleSection = renderRubricPreambleSection(rubric);
|
|
10944
|
+
const taskPromptSection = input.taskPrompt ? [
|
|
10945
|
+
"## Task-specific instructions",
|
|
10946
|
+
"",
|
|
10947
|
+
input.taskPrompt,
|
|
10948
|
+
""
|
|
10949
|
+
].join("\n") : "";
|
|
10950
|
+
const resourceSection = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? [
|
|
10951
|
+
"### Resources",
|
|
10952
|
+
"",
|
|
10953
|
+
...input.subject.resourceUrls.map((url) => `- ${url}`),
|
|
10954
|
+
""
|
|
10955
|
+
].join("\n") : "";
|
|
10956
|
+
const hintsSection = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? [
|
|
10957
|
+
"### Inspection hints",
|
|
10958
|
+
"",
|
|
10959
|
+
...input.subject.inspectionHints.map((hint) => `- ${hint}`),
|
|
10960
|
+
""
|
|
10961
|
+
].join("\n") : "";
|
|
10962
|
+
const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
|
|
10963
|
+
"### Workspace",
|
|
10964
|
+
"",
|
|
10965
|
+
"This review attempt is running inside a dedicated disposable git",
|
|
10966
|
+
"worktree. Inspect and reason inside this workspace only.",
|
|
10967
|
+
ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends.",
|
|
10968
|
+
""
|
|
10969
|
+
].join("\n") : "";
|
|
10970
|
+
return [
|
|
10971
|
+
"# Review Agent",
|
|
10972
|
+
"",
|
|
10973
|
+
"You are an independent judge. You did NOT produce the subject under review.",
|
|
10974
|
+
"Assess it strictly against the rubric below and emit a structured judgment.",
|
|
10975
|
+
"You may inspect the local workspace and the referenced resources, but do NOT modify anything.",
|
|
10976
|
+
"",
|
|
10977
|
+
`Your diary ID is: ${ctx.diaryId}`,
|
|
10978
|
+
`This task's id is: ${ctx.taskId}`,
|
|
10979
|
+
"",
|
|
10980
|
+
"## Subject",
|
|
10981
|
+
"",
|
|
10982
|
+
`**Title:** ${input.subject.title}`,
|
|
10983
|
+
"",
|
|
10984
|
+
input.subject.summary,
|
|
10985
|
+
"",
|
|
10986
|
+
resourceSection,
|
|
10987
|
+
hintsSection,
|
|
10988
|
+
workspaceSection,
|
|
10989
|
+
"### Execution contract",
|
|
10990
|
+
"",
|
|
10991
|
+
"Treat the provided subject, resources, inspection hints, and any",
|
|
10992
|
+
"task-specific instructions as the full",
|
|
10993
|
+
"review contract for this task.",
|
|
10994
|
+
"",
|
|
10995
|
+
"If the task-specific instructions or inspection hints require an outward action tied to the review",
|
|
10996
|
+
"(for example publishing the judgment somewhere), perform that action as",
|
|
10997
|
+
"part of the task before reporting structured output.",
|
|
10998
|
+
"",
|
|
10999
|
+
"## Review workflow",
|
|
11000
|
+
"",
|
|
11001
|
+
"1. Read the subject summary, resources, inspection hints, and any",
|
|
11002
|
+
" task-specific instructions before scoring.",
|
|
11003
|
+
"2. Inspect the target artefact directly using the tools and resources the",
|
|
11004
|
+
" task makes available.",
|
|
11005
|
+
"3. If you are in a dedicated disposable worktree and need the review target",
|
|
11006
|
+
" checked out locally, do that work inside this disposable workspace only.",
|
|
11007
|
+
"4. Apply the rubric strictly. This task is about complexity and",
|
|
11008
|
+
" reviewability, not correctness or feature desirability.",
|
|
11009
|
+
"5. Perform any required outward action before emitting the final",
|
|
11010
|
+
" structured output.",
|
|
11011
|
+
"",
|
|
11012
|
+
taskPromptSection,
|
|
11013
|
+
preambleSection,
|
|
11014
|
+
"## Criteria",
|
|
11015
|
+
"",
|
|
11016
|
+
criteriaList,
|
|
11017
|
+
"",
|
|
11018
|
+
"### Scoring rules",
|
|
11019
|
+
"",
|
|
11020
|
+
"- Every criterion uses binary scoring only.",
|
|
11021
|
+
"- Score `1` when the subject clearly clears the criterion.",
|
|
11022
|
+
"- Score `0` when it does not, or when the evidence is ambiguous.",
|
|
11023
|
+
"- `rationale` is REQUIRED for every score. Keep it concrete and audit-friendly.",
|
|
11024
|
+
"- Compute `composite = Σ(weight_i × score_i)` exactly; the runtime rejects mismatches.",
|
|
11025
|
+
"",
|
|
11026
|
+
"Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output.",
|
|
11027
|
+
"",
|
|
11028
|
+
buildFinalOutputBlock({
|
|
11029
|
+
taskType: "pr_review",
|
|
11030
|
+
outputSchemaName: "PrReviewOutput",
|
|
11031
|
+
shapeSketch: [
|
|
11032
|
+
"{",
|
|
11033
|
+
" \"scores\": [",
|
|
11034
|
+
" { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
|
|
11035
|
+
" ],",
|
|
11036
|
+
" \"composite\": <sum-of-weighted-binary-scores>,",
|
|
11037
|
+
" \"verdict\": \"<1-3 sentence overall>\"",
|
|
11038
|
+
"}"
|
|
11039
|
+
].join("\n"),
|
|
11040
|
+
extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
|
|
11041
|
+
})
|
|
11042
|
+
].filter(Boolean).join("\n");
|
|
11043
|
+
}
|
|
11044
|
+
//#endregion
|
|
10857
11045
|
//#region ../agent-runtime/src/prompts/render-pack.ts
|
|
10858
11046
|
/**
|
|
10859
11047
|
* Build the first user-message prompt for a `render_pack` task. Almost mechanical:
|
|
@@ -11036,6 +11224,16 @@ function buildTaskUserPrompt(task, ctx) {
|
|
|
11036
11224
|
diaryId: ctx.diaryId,
|
|
11037
11225
|
taskId: ctx.taskId
|
|
11038
11226
|
});
|
|
11227
|
+
case PR_REVIEW_TYPE:
|
|
11228
|
+
if (!Value.Check(PrReviewInput, task.input)) {
|
|
11229
|
+
const errors = [...Value.Errors(PrReviewInput, task.input)];
|
|
11230
|
+
throw new Error(`pr_review input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
|
|
11231
|
+
}
|
|
11232
|
+
return buildPrReviewUserPrompt(task.input, {
|
|
11233
|
+
diaryId: ctx.diaryId,
|
|
11234
|
+
taskId: ctx.taskId,
|
|
11235
|
+
workspace: ctx.workspace
|
|
11236
|
+
});
|
|
11039
11237
|
case JUDGE_EVAL_VARIANT_TYPE:
|
|
11040
11238
|
if (!Value.Check(JudgeEvalVariantInput, task.input)) {
|
|
11041
11239
|
const errors = [...Value.Errors(JudgeEvalVariantInput, task.input)];
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@themoltnet/pi-extension",
|
|
3
|
-
"version": "0.18.
|
|
3
|
+
"version": "0.18.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
|
|
6
6
|
"license": "MIT",
|
|
@@ -31,8 +31,8 @@
|
|
|
31
31
|
"@earendil-works/gondolin": "^0.9.1",
|
|
32
32
|
"@opentelemetry/api": "^1.9.0",
|
|
33
33
|
"@sinclair/typebox": "^0.34.0",
|
|
34
|
-
"@themoltnet/
|
|
35
|
-
"@themoltnet/
|
|
34
|
+
"@themoltnet/agent-runtime": "0.16.0",
|
|
35
|
+
"@themoltnet/sdk": "0.102.0"
|
|
36
36
|
},
|
|
37
37
|
"peerDependencies": {
|
|
38
38
|
"@earendil-works/pi-coding-agent": ">=0.74.0",
|