npm - @themoltnet/pi-extension - Versions diffs - 0.18.0 → 0.18.1 - Mend

@themoltnet/pi-extension 0.18.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +212 -14
package/package.json +3 -3

package/dist/index.js CHANGED Viewed

@@ -9619,6 +9619,72 @@ async function onCreateJudgeEvalVariant(input, ctx) {
 	}];
 }
 //#endregion
+//#region ../tasks/src/task-types/pr-review.ts
+var PR_REVIEW_TYPE = "pr_review";
+var PrReviewSubject = Type$1.Object({
+	title: Type$1.String({ minLength: 1 }),
+	summary: Type$1.String({ minLength: 1 }),
+	resourceUrls: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 }))),
+	inspectionHints: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
+}, {
+	$id: "PrReviewSubject",
+	additionalProperties: false
+});
+var PrReviewInput = Type$1.Object({
+	subject: PrReviewSubject,
+	taskPrompt: Type$1.Optional(Type$1.String({ minLength: 1 })),
+	successCriteria: SuccessCriteria
+}, {
+	$id: "PrReviewInput",
+	additionalProperties: false
+});
+var PrReviewScore = Type$1.Object({
+	criterionId: Type$1.String({ minLength: 1 }),
+	score: Type$1.Union([Type$1.Literal(0), Type$1.Literal(1)]),
+	rationale: Type$1.String({ minLength: 1 })
+}, {
+	$id: "PrReviewScore",
+	additionalProperties: false
+});
+var PrReviewOutput = Type$1.Object({
+	scores: Type$1.Array(PrReviewScore, { minItems: 1 }),
+	composite: Type$1.Number({
+		minimum: 0,
+		maximum: 1
+	}),
+	verdict: Type$1.String({ minLength: 1 })
+}, {
+	$id: "PrReviewOutput",
+	additionalProperties: false
+});
+function requireBooleanRubric(rubric) {
+	for (const criterion of rubric.criteria) if (criterion.scoring !== "boolean") return `pr_review requires boolean scoring for every rubric criterion; criterion "${criterion.id}" uses "${criterion.scoring}"`;
+	return null;
+}
+function validatePrReviewInput(input) {
+	const sc = input.successCriteria;
+	if (!sc) return "successCriteria is required for judgment tasks";
+	if (!sc.rubric) return "successCriteria.rubric is required for judgment tasks";
+	return validateRubricWeights(sc.rubric) ?? requireBooleanRubric(sc.rubric);
+}
+function validatePrReviewOutput(output, input) {
+	if (!input) return null;
+	const scores = output.scores;
+	const rubric = input.successCriteria.rubric;
+	if (!rubric) return null;
+	if (scores.length !== rubric.criteria.length) return `scores length ${scores.length} does not match rubric criteria length ${rubric.criteria.length}`;
+	let composite = 0;
+	for (let i = 0; i < rubric.criteria.length; i++) {
+		const criterion = rubric.criteria[i];
+		const score = scores[i];
+		if (score.criterionId !== criterion.id) return `scores[${i}] has criterionId "${score.criterionId}" but rubric expects "${criterion.id}" in that position`;
+		composite += criterion.weight * score.score;
+	}
+	const claimed = output.composite;
+	if (Math.abs(claimed - composite) > 1e-6) return `composite ${claimed} does not match weighted sum ${composite.toFixed(6)}`;
+	return null;
+}
+//#endregion
 //#region ../tasks/src/task-types/render-pack.ts
 /**
 * `render_pack` — turn a context pack into a signed rendered artefact.
@@ -9786,6 +9852,18 @@ var BUILT_IN_TASK_TYPES = {
 		validateInput: validateJudgmentInput,
 		validateInputAsync: validateAssessBriefInputAsync
 	},
+	[PR_REVIEW_TYPE]: {
+		name: PR_REVIEW_TYPE,
+		inputSchema: PrReviewInput,
+		outputSchema: PrReviewOutput,
+		outputKind: "judgment",
+		workspaceMode: "dedicated_worktree",
+		workspaceScope: "attempt",
+		sessionScope: "none",
+		requiresReferences: false,
+		validateInput: validatePrReviewInput,
+		validateOutput: validatePrReviewOutput
+	},
 	[CURATE_PACK_TYPE]: {
 		name: CURATE_PACK_TYPE,
 		inputSchema: CuratePackInput,
@@ -10233,6 +10311,20 @@ function buildFinalOutputBlock(opts) {
 	return lines.join("\n");
 }
 //#endregion
+//#region ../agent-runtime/src/prompts/rubric-common.ts
+function renderRubricCriteriaList(rubric) {
+	return rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
+}
+function renderRubricPreambleSection(rubric) {
+	if (!rubric.preamble) return null;
+	return [
+		"### Rubric preamble",
+		"",
+		rubric.preamble,
+		""
+	].join("\n");
+}
+//#endregion
 //#region ../agent-runtime/src/prompts/assess-brief.ts
 /**
 * Build the first user-message prompt for an `assess_brief` judge attempt.
@@ -10258,13 +10350,8 @@ function buildFinalOutputBlock(opts) {
 */
 function buildAssessBriefUserPrompt(input, ctx) {
 	const rubric = input.successCriteria.rubric;
-	const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
-	const preambleSection = rubric.preamble ? [
-		"### Rubric preamble",
-		"",
-		rubric.preamble,
-		""
-	].join("\n") : "";
+	const criteriaList = renderRubricCriteriaList(rubric);
+	const preambleSection = renderRubricPreambleSection(rubric) ?? "";
 	const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
 		"### Workspace",
 		"",
@@ -10732,13 +10819,8 @@ function buildJudgeEvalVariantUserPrompt(input, ctx) {
 function buildJudgePackUserPrompt(input, ctx) {
 	const { renderedPackId, sourcePackId, successCriteria } = input;
 	const rubric = successCriteria.rubric;
-	const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
-	const preambleSection = rubric.preamble ? [
-		"### Rubric preamble",
-		"",
-		rubric.preamble,
-		""
-	].join("\n") : null;
+	const criteriaList = renderRubricCriteriaList(rubric);
+	const preambleSection = renderRubricPreambleSection(rubric);
 	return [
 		"# Judge Pack Agent",
 		"",
@@ -10854,6 +10936,112 @@ function buildJudgePackUserPrompt(input, ctx) {
 	].filter((l) => l !== null).join("\n");
 }
 //#endregion
+//#region ../agent-runtime/src/prompts/pr-review.ts
+function buildPrReviewUserPrompt(input, ctx) {
+	const rubric = input.successCriteria.rubric;
+	const criteriaList = renderRubricCriteriaList(rubric);
+	const preambleSection = renderRubricPreambleSection(rubric);
+	const taskPromptSection = input.taskPrompt ? [
+		"## Task-specific instructions",
+		"",
+		input.taskPrompt,
+		""
+	].join("\n") : "";
+	const resourceSection = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? [
+		"### Resources",
+		"",
+		...input.subject.resourceUrls.map((url) => `- ${url}`),
+		""
+	].join("\n") : "";
+	const hintsSection = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? [
+		"### Inspection hints",
+		"",
+		...input.subject.inspectionHints.map((hint) => `- ${hint}`),
+		""
+	].join("\n") : "";
+	const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
+		"### Workspace",
+		"",
+		"This review attempt is running inside a dedicated disposable git",
+		"worktree. Inspect and reason inside this workspace only.",
+		ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends.",
+		""
+	].join("\n") : "";
+	return [
+		"# Review Agent",
+		"",
+		"You are an independent judge. You did NOT produce the subject under review.",
+		"Assess it strictly against the rubric below and emit a structured judgment.",
+		"You may inspect the local workspace and the referenced resources, but do NOT modify anything.",
+		"",
+		`Your diary ID is: ${ctx.diaryId}`,
+		`This task's id is: ${ctx.taskId}`,
+		"",
+		"## Subject",
+		"",
+		`**Title:** ${input.subject.title}`,
+		"",
+		input.subject.summary,
+		"",
+		resourceSection,
+		hintsSection,
+		workspaceSection,
+		"### Execution contract",
+		"",
+		"Treat the provided subject, resources, inspection hints, and any",
+		"task-specific instructions as the full",
+		"review contract for this task.",
+		"",
+		"If the task-specific instructions or inspection hints require an outward action tied to the review",
+		"(for example publishing the judgment somewhere), perform that action as",
+		"part of the task before reporting structured output.",
+		"",
+		"## Review workflow",
+		"",
+		"1. Read the subject summary, resources, inspection hints, and any",
+		"   task-specific instructions before scoring.",
+		"2. Inspect the target artefact directly using the tools and resources the",
+		"   task makes available.",
+		"3. If you are in a dedicated disposable worktree and need the review target",
+		"   checked out locally, do that work inside this disposable workspace only.",
+		"4. Apply the rubric strictly. This task is about complexity and",
+		"   reviewability, not correctness or feature desirability.",
+		"5. Perform any required outward action before emitting the final",
+		"   structured output.",
+		"",
+		taskPromptSection,
+		preambleSection,
+		"## Criteria",
+		"",
+		criteriaList,
+		"",
+		"### Scoring rules",
+		"",
+		"- Every criterion uses binary scoring only.",
+		"- Score `1` when the subject clearly clears the criterion.",
+		"- Score `0` when it does not, or when the evidence is ambiguous.",
+		"- `rationale` is REQUIRED for every score. Keep it concrete and audit-friendly.",
+		"- Compute `composite = Σ(weight_i × score_i)` exactly; the runtime rejects mismatches.",
+		"",
+		"Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output.",
+		"",
+		buildFinalOutputBlock({
+			taskType: "pr_review",
+			outputSchemaName: "PrReviewOutput",
+			shapeSketch: [
+				"{",
+				"  \"scores\": [",
+				"    { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
+				"  ],",
+				"  \"composite\": <sum-of-weighted-binary-scores>,",
+				"  \"verdict\": \"<1-3 sentence overall>\"",
+				"}"
+			].join("\n"),
+			extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
+		})
+	].filter(Boolean).join("\n");
+}
+//#endregion
 //#region ../agent-runtime/src/prompts/render-pack.ts
 /**
 * Build the first user-message prompt for a `render_pack` task. Almost mechanical:
@@ -11036,6 +11224,16 @@ function buildTaskUserPrompt(task, ctx) {
 				diaryId: ctx.diaryId,
 				taskId: ctx.taskId
 			});
+		case PR_REVIEW_TYPE:
+			if (!Value.Check(PrReviewInput, task.input)) {
+				const errors = [...Value.Errors(PrReviewInput, task.input)];
+				throw new Error(`pr_review input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
+			}
+			return buildPrReviewUserPrompt(task.input, {
+				diaryId: ctx.diaryId,
+				taskId: ctx.taskId,
+				workspace: ctx.workspace
+			});
 		case JUDGE_EVAL_VARIANT_TYPE:
 			if (!Value.Check(JudgeEvalVariantInput, task.input)) {
 				const errors = [...Value.Errors(JudgeEvalVariantInput, task.input)];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@themoltnet/pi-extension",
-  "version": "0.18.0",
+  "version": "0.18.1",
   "type": "module",
   "description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
   "license": "MIT",
@@ -31,8 +31,8 @@
     "@earendil-works/gondolin": "^0.9.1",
     "@opentelemetry/api": "^1.9.0",
     "@sinclair/typebox": "^0.34.0",
-    "@themoltnet/sdk": "0.102.0",
-    "@themoltnet/agent-runtime": "0.15.2"
+    "@themoltnet/agent-runtime": "0.16.0",
+    "@themoltnet/sdk": "0.102.0"
   },
   "peerDependencies": {
     "@earendil-works/pi-coding-agent": ">=0.74.0",