npm - @infinitedusky/indusk-mcp - Versions diffs - 1.11.0 → 1.11.1 - Mend

@infinitedusky/indusk-mcp 1.11.0 → 1.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/lib/eval/judge-runner.js +12 -21
package/dist/lib/eval/prompt-builder.d.ts +5 -3
package/dist/lib/eval/prompt-builder.js +7 -8
package/hooks/eval-trigger.js +5 -3
package/package.json +1 -1

package/dist/lib/eval/judge-runner.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * The judge is a detached child process so the calling hook can exit immediately.
  * Results appear asynchronously in `.indusk/eval/results.log`.
  */
-import { execSync, spawn } from "node:child_process";
+import { spawn } from "node:child_process";
 import { join } from "node:path";
 import { getProjectGroupId } from "../config.js";
 import { EvalLogWriter } from "./log-writer.js";
@@ -14,14 +14,6 @@ import { V1_RUBRIC } from "./rubric.js";
 function getEvalLogPath(projectRoot) {
     return join(projectRoot, ".indusk", "eval", "results.log");
 }
-function getDiff(changeId) {
-    try {
-        return execSync(`jj diff -r ${changeId}`, { encoding: "utf8", maxBuffer: 10 * 1024 * 1024 });
-    }
-    catch {
-        return "(diff unavailable)";
-    }
-}
 async function postTelemetry(endpoint, scorecard) {
     try {
         const controller = new AbortController();
@@ -46,13 +38,11 @@ async function postTelemetry(endpoint, scorecard) {
  * If anything fails, logs an error entry instead of silently dropping.
  */
 export function runJudgeBackground(opts) {
-    const diff = getDiff(opts.changeId);
     const projectGroup = getProjectGroupId(opts.projectRoot);
     const prompt = buildJudgePrompt({
         rubric: V1_RUBRIC,
         changeId: opts.changeId,
         transcriptPath: opts.transcriptPath,
-        diff,
         mode: opts.mode,
         projectGroup,
     });
@@ -75,16 +65,18 @@ export function runJudgeBackground(opts) {
         "--permission-mode",
         "acceptEdits",
         "--allowed-tools",
-        ...allowedTools,
-        prompt,
+        allowedTools.join(","),
     ];
+    // Not detached — the eval-trigger hook already spawns this in a separate
+    // node process. Detaching + unref causes the close handler to never fire.
     const child = spawn("claude", args, {
         cwd: opts.projectRoot,
-        stdio: ["ignore", "pipe", "pipe"],
-        detached: true,
+        stdio: ["pipe", "pipe", "pipe"],
         env: { ...process.env },
     });
-    child.unref();
+    // Pipe the prompt via stdin (too large for CLI arg)
+    child.stdin?.write(prompt);
+    child.stdin?.end();
     let stdout = "";
     let stderr = "";
     child.stdout?.on("data", (chunk) => {
@@ -140,13 +132,11 @@ export function runJudgeBackground(opts) {
  * Returns the scorecard or error entry.
  */
 export async function runJudgeSync(opts) {
-    const diff = getDiff(opts.changeId);
     const projectGroup = getProjectGroupId(opts.projectRoot);
     const prompt = buildJudgePrompt({
         rubric: V1_RUBRIC,
         changeId: opts.changeId,
         transcriptPath: opts.transcriptPath,
-        diff,
         mode: opts.mode,
         projectGroup,
     });
@@ -169,15 +159,16 @@ export async function runJudgeSync(opts) {
         "--permission-mode",
         "acceptEdits",
         "--allowed-tools",
-        ...allowedTools,
-        prompt,
+        allowedTools.join(","),
     ];
     return new Promise((resolve) => {
         const child = spawn("claude", args, {
             cwd: opts.projectRoot,
-            stdio: ["ignore", "pipe", "pipe"],
+            stdio: ["pipe", "pipe", "pipe"],
             env: { ...process.env },
         });
+        child.stdin?.write(prompt);
+        child.stdin?.end();
         let stdout = "";
         let stderr = "";
         child.stdout?.on("data", (chunk) => {

package/dist/lib/eval/prompt-builder.d.ts CHANGED Viewed

@@ -2,15 +2,17 @@
  * Builds the judge agent's system prompt.
  *
  * The prompt instructs the judge to: do catchup, read the transcript, read the
- * diff, answer each rubric question, write findings to Graphiti (eval mode
- * only), and output a JSON scorecard.
+ * diff itself via jj, answer each rubric question, write findings to Graphiti
+ * (eval mode only), and output a JSON scorecard.
+ *
+ * The diff is NOT embedded in the prompt — the judge reads it via tool calls.
+ * This keeps the prompt small regardless of commit size.
  */
 import type { RubricQuestion } from "./types.js";
 export interface PromptBuilderOptions {
     rubric: RubricQuestion[];
     changeId: string;
     transcriptPath: string;
-    diff: string;
     mode: "eval" | "baseline";
     projectGroup: string;
 }

package/dist/lib/eval/prompt-builder.js CHANGED Viewed

@@ -2,8 +2,11 @@
  * Builds the judge agent's system prompt.
  *
  * The prompt instructs the judge to: do catchup, read the transcript, read the
- * diff, answer each rubric question, write findings to Graphiti (eval mode
- * only), and output a JSON scorecard.
+ * diff itself via jj, answer each rubric question, write findings to Graphiti
+ * (eval mode only), and output a JSON scorecard.
+ *
+ * The diff is NOT embedded in the prompt — the judge reads it via tool calls.
+ * This keeps the prompt small regardless of commit size.
  */
 export function buildJudgePrompt(opts) {
     const questionsBlock = opts.rubric
@@ -54,13 +57,9 @@ This is the JSONL record of the working agent's session. Read it to understand:
 ### Step 3: Read the diff
-Here is the diff of the committed work:
-\`\`\`
-${opts.diff}
-\`\`\`
+Run \`jj diff -r ${opts.changeId}\` to see what was committed. This is the work being evaluated.
-This is what was actually built. Cross-reference with the transcript to understand the journey from task to result.
+Then read the specific files that were changed to understand the full context — not just the diff lines, but the surrounding code.
 ### Step 4: Answer the evaluation questions

package/hooks/eval-trigger.js CHANGED Viewed

@@ -90,17 +90,18 @@ const transcriptPath =
 	"(transcript unavailable)";
 // Spawn the judge runner as a detached background process.
-// We use a small inline node script that imports and calls runJudgeBackground.
-// This avoids needing the compiled dist/ to exist at hook time.
+// Spawn a detached node process that calls runJudgeSync (which awaits completion).
+// runJudgeSync keeps the process alive until claude --print finishes and logs the result.
 const judgeScript = `
 import("${resolve(projectRoot, "apps/indusk-mcp/dist/lib/eval/judge-runner.js")}")
-  .then(m => m.runJudgeBackground({
+  .then(m => m.runJudgeSync({
     projectRoot: ${JSON.stringify(projectRoot)},
     changeId: ${JSON.stringify(changeId)},
     transcriptPath: ${JSON.stringify(transcriptPath)},
     mode: "eval",
     evalEndpoint: ${JSON.stringify(evalConfig.endpoint)},
   }))
+  .then(() => process.exit(0))
   .catch(err => {
     const fs = require("fs");
     const path = require("path");
@@ -115,6 +116,7 @@ import("${resolve(projectRoot, "apps/indusk-mcp/dist/lib/eval/judge-runner.js")}
       message: err.message || String(err),
     });
     fs.appendFileSync(logPath, entry + "\\n", "utf8");
+    process.exit(1);
   });
 `;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@infinitedusky/indusk-mcp",
-	"version": "1.11.0",
+	"version": "1.11.1",
 	"description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
 	"type": "module",
 	"files": [