@infinitedusky/indusk-mcp 1.11.0 → 1.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@
5
5
  * The judge is a detached child process so the calling hook can exit immediately.
6
6
  * Results appear asynchronously in `.indusk/eval/results.log`.
7
7
  */
8
- import { execSync, spawn } from "node:child_process";
8
+ import { spawn } from "node:child_process";
9
9
  import { join } from "node:path";
10
10
  import { getProjectGroupId } from "../config.js";
11
11
  import { EvalLogWriter } from "./log-writer.js";
@@ -14,14 +14,6 @@ import { V1_RUBRIC } from "./rubric.js";
14
14
  function getEvalLogPath(projectRoot) {
15
15
  return join(projectRoot, ".indusk", "eval", "results.log");
16
16
  }
17
- function getDiff(changeId) {
18
- try {
19
- return execSync(`jj diff -r ${changeId}`, { encoding: "utf8", maxBuffer: 10 * 1024 * 1024 });
20
- }
21
- catch {
22
- return "(diff unavailable)";
23
- }
24
- }
25
17
  async function postTelemetry(endpoint, scorecard) {
26
18
  try {
27
19
  const controller = new AbortController();
@@ -46,13 +38,11 @@ async function postTelemetry(endpoint, scorecard) {
46
38
  * If anything fails, logs an error entry instead of silently dropping.
47
39
  */
48
40
  export function runJudgeBackground(opts) {
49
- const diff = getDiff(opts.changeId);
50
41
  const projectGroup = getProjectGroupId(opts.projectRoot);
51
42
  const prompt = buildJudgePrompt({
52
43
  rubric: V1_RUBRIC,
53
44
  changeId: opts.changeId,
54
45
  transcriptPath: opts.transcriptPath,
55
- diff,
56
46
  mode: opts.mode,
57
47
  projectGroup,
58
48
  });
@@ -75,16 +65,18 @@ export function runJudgeBackground(opts) {
75
65
  "--permission-mode",
76
66
  "acceptEdits",
77
67
  "--allowed-tools",
78
- ...allowedTools,
79
- prompt,
68
+ allowedTools.join(","),
80
69
  ];
70
+ // Not detached — the eval-trigger hook already spawns this in a separate
71
+ // node process. Detaching + unref causes the close handler to never fire.
81
72
  const child = spawn("claude", args, {
82
73
  cwd: opts.projectRoot,
83
- stdio: ["ignore", "pipe", "pipe"],
84
- detached: true,
74
+ stdio: ["pipe", "pipe", "pipe"],
85
75
  env: { ...process.env },
86
76
  });
87
- child.unref();
77
+ // Pipe the prompt via stdin (too large for CLI arg)
78
+ child.stdin?.write(prompt);
79
+ child.stdin?.end();
88
80
  let stdout = "";
89
81
  let stderr = "";
90
82
  child.stdout?.on("data", (chunk) => {
@@ -140,13 +132,11 @@ export function runJudgeBackground(opts) {
140
132
  * Returns the scorecard or error entry.
141
133
  */
142
134
  export async function runJudgeSync(opts) {
143
- const diff = getDiff(opts.changeId);
144
135
  const projectGroup = getProjectGroupId(opts.projectRoot);
145
136
  const prompt = buildJudgePrompt({
146
137
  rubric: V1_RUBRIC,
147
138
  changeId: opts.changeId,
148
139
  transcriptPath: opts.transcriptPath,
149
- diff,
150
140
  mode: opts.mode,
151
141
  projectGroup,
152
142
  });
@@ -169,15 +159,16 @@ export async function runJudgeSync(opts) {
169
159
  "--permission-mode",
170
160
  "acceptEdits",
171
161
  "--allowed-tools",
172
- ...allowedTools,
173
- prompt,
162
+ allowedTools.join(","),
174
163
  ];
175
164
  return new Promise((resolve) => {
176
165
  const child = spawn("claude", args, {
177
166
  cwd: opts.projectRoot,
178
- stdio: ["ignore", "pipe", "pipe"],
167
+ stdio: ["pipe", "pipe", "pipe"],
179
168
  env: { ...process.env },
180
169
  });
170
+ child.stdin?.write(prompt);
171
+ child.stdin?.end();
181
172
  let stdout = "";
182
173
  let stderr = "";
183
174
  child.stdout?.on("data", (chunk) => {
@@ -2,15 +2,17 @@
2
2
  * Builds the judge agent's system prompt.
3
3
  *
4
4
  * The prompt instructs the judge to: do catchup, read the transcript, read the
5
- * diff, answer each rubric question, write findings to Graphiti (eval mode
6
- * only), and output a JSON scorecard.
5
+ * diff itself via jj, answer each rubric question, write findings to Graphiti
6
+ * (eval mode only), and output a JSON scorecard.
7
+ *
8
+ * The diff is NOT embedded in the prompt — the judge reads it via tool calls.
9
+ * This keeps the prompt small regardless of commit size.
7
10
  */
8
11
  import type { RubricQuestion } from "./types.js";
9
12
  export interface PromptBuilderOptions {
10
13
  rubric: RubricQuestion[];
11
14
  changeId: string;
12
15
  transcriptPath: string;
13
- diff: string;
14
16
  mode: "eval" | "baseline";
15
17
  projectGroup: string;
16
18
  }
@@ -2,8 +2,11 @@
2
2
  * Builds the judge agent's system prompt.
3
3
  *
4
4
  * The prompt instructs the judge to: do catchup, read the transcript, read the
5
- * diff, answer each rubric question, write findings to Graphiti (eval mode
6
- * only), and output a JSON scorecard.
5
+ * diff itself via jj, answer each rubric question, write findings to Graphiti
6
+ * (eval mode only), and output a JSON scorecard.
7
+ *
8
+ * The diff is NOT embedded in the prompt — the judge reads it via tool calls.
9
+ * This keeps the prompt small regardless of commit size.
7
10
  */
8
11
  export function buildJudgePrompt(opts) {
9
12
  const questionsBlock = opts.rubric
@@ -54,13 +57,9 @@ This is the JSONL record of the working agent's session. Read it to understand:
54
57
 
55
58
  ### Step 3: Read the diff
56
59
 
57
- Here is the diff of the committed work:
58
-
59
- \`\`\`
60
- ${opts.diff}
61
- \`\`\`
60
+ Run \`jj diff -r ${opts.changeId}\` to see what was committed. This is the work being evaluated.
62
61
 
63
- This is what was actually built. Cross-reference with the transcript to understand the journey from task to result.
62
+ Then read the specific files that were changed to understand the full context not just the diff lines, but the surrounding code.
64
63
 
65
64
  ### Step 4: Answer the evaluation questions
66
65
 
@@ -90,17 +90,18 @@ const transcriptPath =
90
90
  "(transcript unavailable)";
91
91
 
92
92
  // Spawn the judge runner as a detached background process.
93
- // We use a small inline node script that imports and calls runJudgeBackground.
94
- // This avoids needing the compiled dist/ to exist at hook time.
93
+ // Spawn a detached node process that calls runJudgeSync (which awaits completion).
94
+ // runJudgeSync keeps the process alive until claude --print finishes and logs the result.
95
95
  const judgeScript = `
96
96
  import("${resolve(projectRoot, "apps/indusk-mcp/dist/lib/eval/judge-runner.js")}")
97
- .then(m => m.runJudgeBackground({
97
+ .then(m => m.runJudgeSync({
98
98
  projectRoot: ${JSON.stringify(projectRoot)},
99
99
  changeId: ${JSON.stringify(changeId)},
100
100
  transcriptPath: ${JSON.stringify(transcriptPath)},
101
101
  mode: "eval",
102
102
  evalEndpoint: ${JSON.stringify(evalConfig.endpoint)},
103
103
  }))
104
+ .then(() => process.exit(0))
104
105
  .catch(err => {
105
106
  const fs = require("fs");
106
107
  const path = require("path");
@@ -115,6 +116,7 @@ import("${resolve(projectRoot, "apps/indusk-mcp/dist/lib/eval/judge-runner.js")}
115
116
  message: err.message || String(err),
116
117
  });
117
118
  fs.appendFileSync(logPath, entry + "\\n", "utf8");
119
+ process.exit(1);
118
120
  });
119
121
  `;
120
122
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@infinitedusky/indusk-mcp",
3
- "version": "1.11.0",
3
+ "version": "1.11.1",
4
4
  "description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
5
5
  "type": "module",
6
6
  "files": [