@infinitedusky/indusk-mcp 1.11.0 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -23,7 +23,10 @@ const GITIGNORE_ENTRIES = [
|
|
|
23
23
|
{ comment: "# Session-specific handoff (not project knowledge)", pattern: ".claude/handoff.md" },
|
|
24
24
|
{ comment: "# Semantic graph event log (large, local-only)", pattern: ".indusk/graph/" },
|
|
25
25
|
{ comment: "# Eval results (local-only)", pattern: ".indusk/eval/" },
|
|
26
|
-
{
|
|
26
|
+
{
|
|
27
|
+
comment: "# Extension manifests are package-owned; env files contain secrets",
|
|
28
|
+
pattern: ".indusk/extensions/",
|
|
29
|
+
},
|
|
27
30
|
];
|
|
28
31
|
const GITIGNORE_MARKER = "# InDusk managed";
|
|
29
32
|
export function ensureGitignore(projectRoot) {
|
|
@@ -36,12 +39,7 @@ export function ensureGitignore(projectRoot) {
|
|
|
36
39
|
return;
|
|
37
40
|
}
|
|
38
41
|
// Build the block to append
|
|
39
|
-
const block = [
|
|
40
|
-
"",
|
|
41
|
-
GITIGNORE_MARKER,
|
|
42
|
-
...missing.flatMap((e) => [e.comment, e.pattern]),
|
|
43
|
-
"",
|
|
44
|
-
].join("\n");
|
|
42
|
+
const block = ["", GITIGNORE_MARKER, ...missing.flatMap((e) => [e.comment, e.pattern]), ""].join("\n");
|
|
45
43
|
writeFileSync(gitignorePath, `${content.trimEnd()}${block}`);
|
|
46
44
|
const verb = content.length > 0 ? "updated" : "created";
|
|
47
45
|
console.info(` ${verb}: .gitignore (added ${missing.map((e) => e.pattern).join(", ")})`);
|
|
@@ -634,6 +632,10 @@ export async function init(projectRoot, options = {}) {
|
|
|
634
632
|
matcher: "Edit|Write",
|
|
635
633
|
hooks: [{ type: "command", command: "node .claude/hooks/gate-reminder.js" }],
|
|
636
634
|
},
|
|
635
|
+
{
|
|
636
|
+
matcher: "Bash",
|
|
637
|
+
hooks: [{ type: "command", command: "node .claude/hooks/eval-trigger.js" }],
|
|
638
|
+
},
|
|
637
639
|
],
|
|
638
640
|
};
|
|
639
641
|
if (existsSync(claudeSettingsPath)) {
|
|
@@ -191,6 +191,7 @@ export async function update(projectRoot) {
|
|
|
191
191
|
"gate-reminder.js",
|
|
192
192
|
"validate-impl-structure.js",
|
|
193
193
|
"check-catchup.js",
|
|
194
|
+
"eval-trigger.js",
|
|
194
195
|
];
|
|
195
196
|
for (const file of hookFiles) {
|
|
196
197
|
const sourceFile = join(hooksSource, file);
|
|
@@ -210,6 +211,32 @@ export async function update(projectRoot) {
|
|
|
210
211
|
}
|
|
211
212
|
}
|
|
212
213
|
console.info(`\n ${hooksUpdated} updated, ${hooksCurrent} current.`);
|
|
214
|
+
// Ensure eval hook is registered in settings.json
|
|
215
|
+
const settingsPath = join(projectRoot, ".claude/settings.json");
|
|
216
|
+
if (existsSync(settingsPath)) {
|
|
217
|
+
try {
|
|
218
|
+
const settings = JSON.parse(readFileSync(settingsPath, "utf-8"));
|
|
219
|
+
const postHooks = settings.hooks?.PostToolUse ?? [];
|
|
220
|
+
const hasBashEvalHook = postHooks.some((entry) => entry.matcher === "Bash" &&
|
|
221
|
+
entry.hooks?.some((h) => h.command?.includes("eval-trigger")));
|
|
222
|
+
if (!hasBashEvalHook) {
|
|
223
|
+
if (!settings.hooks)
|
|
224
|
+
settings.hooks = {};
|
|
225
|
+
if (!settings.hooks.PostToolUse)
|
|
226
|
+
settings.hooks.PostToolUse = [];
|
|
227
|
+
settings.hooks.PostToolUse.push({
|
|
228
|
+
matcher: "Bash",
|
|
229
|
+
hooks: [{ type: "command", command: "node .claude/hooks/eval-trigger.js" }],
|
|
230
|
+
});
|
|
231
|
+
const { writeFileSync } = await import("node:fs");
|
|
232
|
+
writeFileSync(settingsPath, `${JSON.stringify(settings, null, 2)}\n`);
|
|
233
|
+
console.info(" registered eval-trigger hook in settings.json");
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
catch {
|
|
237
|
+
console.info(" could not register eval hook in settings.json");
|
|
238
|
+
}
|
|
239
|
+
}
|
|
213
240
|
}
|
|
214
241
|
else {
|
|
215
242
|
console.info(" not installed (run init to install)");
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* The judge is a detached child process so the calling hook can exit immediately.
|
|
6
6
|
* Results appear asynchronously in `.indusk/eval/results.log`.
|
|
7
7
|
*/
|
|
8
|
-
import {
|
|
8
|
+
import { spawn } from "node:child_process";
|
|
9
9
|
import { join } from "node:path";
|
|
10
10
|
import { getProjectGroupId } from "../config.js";
|
|
11
11
|
import { EvalLogWriter } from "./log-writer.js";
|
|
@@ -14,14 +14,6 @@ import { V1_RUBRIC } from "./rubric.js";
|
|
|
14
14
|
function getEvalLogPath(projectRoot) {
|
|
15
15
|
return join(projectRoot, ".indusk", "eval", "results.log");
|
|
16
16
|
}
|
|
17
|
-
function getDiff(changeId) {
|
|
18
|
-
try {
|
|
19
|
-
return execSync(`jj diff -r ${changeId}`, { encoding: "utf8", maxBuffer: 10 * 1024 * 1024 });
|
|
20
|
-
}
|
|
21
|
-
catch {
|
|
22
|
-
return "(diff unavailable)";
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
17
|
async function postTelemetry(endpoint, scorecard) {
|
|
26
18
|
try {
|
|
27
19
|
const controller = new AbortController();
|
|
@@ -46,13 +38,11 @@ async function postTelemetry(endpoint, scorecard) {
|
|
|
46
38
|
* If anything fails, logs an error entry instead of silently dropping.
|
|
47
39
|
*/
|
|
48
40
|
export function runJudgeBackground(opts) {
|
|
49
|
-
const diff = getDiff(opts.changeId);
|
|
50
41
|
const projectGroup = getProjectGroupId(opts.projectRoot);
|
|
51
42
|
const prompt = buildJudgePrompt({
|
|
52
43
|
rubric: V1_RUBRIC,
|
|
53
44
|
changeId: opts.changeId,
|
|
54
45
|
transcriptPath: opts.transcriptPath,
|
|
55
|
-
diff,
|
|
56
46
|
mode: opts.mode,
|
|
57
47
|
projectGroup,
|
|
58
48
|
});
|
|
@@ -75,16 +65,18 @@ export function runJudgeBackground(opts) {
|
|
|
75
65
|
"--permission-mode",
|
|
76
66
|
"acceptEdits",
|
|
77
67
|
"--allowed-tools",
|
|
78
|
-
|
|
79
|
-
prompt,
|
|
68
|
+
allowedTools.join(","),
|
|
80
69
|
];
|
|
70
|
+
// Not detached — the eval-trigger hook already spawns this in a separate
|
|
71
|
+
// node process. Detaching + unref causes the close handler to never fire.
|
|
81
72
|
const child = spawn("claude", args, {
|
|
82
73
|
cwd: opts.projectRoot,
|
|
83
|
-
stdio: ["
|
|
84
|
-
detached: true,
|
|
74
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
85
75
|
env: { ...process.env },
|
|
86
76
|
});
|
|
87
|
-
|
|
77
|
+
// Pipe the prompt via stdin (too large for CLI arg)
|
|
78
|
+
child.stdin?.write(prompt);
|
|
79
|
+
child.stdin?.end();
|
|
88
80
|
let stdout = "";
|
|
89
81
|
let stderr = "";
|
|
90
82
|
child.stdout?.on("data", (chunk) => {
|
|
@@ -140,13 +132,11 @@ export function runJudgeBackground(opts) {
|
|
|
140
132
|
* Returns the scorecard or error entry.
|
|
141
133
|
*/
|
|
142
134
|
export async function runJudgeSync(opts) {
|
|
143
|
-
const diff = getDiff(opts.changeId);
|
|
144
135
|
const projectGroup = getProjectGroupId(opts.projectRoot);
|
|
145
136
|
const prompt = buildJudgePrompt({
|
|
146
137
|
rubric: V1_RUBRIC,
|
|
147
138
|
changeId: opts.changeId,
|
|
148
139
|
transcriptPath: opts.transcriptPath,
|
|
149
|
-
diff,
|
|
150
140
|
mode: opts.mode,
|
|
151
141
|
projectGroup,
|
|
152
142
|
});
|
|
@@ -169,15 +159,16 @@ export async function runJudgeSync(opts) {
|
|
|
169
159
|
"--permission-mode",
|
|
170
160
|
"acceptEdits",
|
|
171
161
|
"--allowed-tools",
|
|
172
|
-
|
|
173
|
-
prompt,
|
|
162
|
+
allowedTools.join(","),
|
|
174
163
|
];
|
|
175
164
|
return new Promise((resolve) => {
|
|
176
165
|
const child = spawn("claude", args, {
|
|
177
166
|
cwd: opts.projectRoot,
|
|
178
|
-
stdio: ["
|
|
167
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
179
168
|
env: { ...process.env },
|
|
180
169
|
});
|
|
170
|
+
child.stdin?.write(prompt);
|
|
171
|
+
child.stdin?.end();
|
|
181
172
|
let stdout = "";
|
|
182
173
|
let stderr = "";
|
|
183
174
|
child.stdout?.on("data", (chunk) => {
|
|
@@ -2,15 +2,17 @@
|
|
|
2
2
|
* Builds the judge agent's system prompt.
|
|
3
3
|
*
|
|
4
4
|
* The prompt instructs the judge to: do catchup, read the transcript, read the
|
|
5
|
-
* diff, answer each rubric question, write findings to Graphiti
|
|
6
|
-
* only), and output a JSON scorecard.
|
|
5
|
+
* diff itself via jj, answer each rubric question, write findings to Graphiti
|
|
6
|
+
* (eval mode only), and output a JSON scorecard.
|
|
7
|
+
*
|
|
8
|
+
* The diff is NOT embedded in the prompt — the judge reads it via tool calls.
|
|
9
|
+
* This keeps the prompt small regardless of commit size.
|
|
7
10
|
*/
|
|
8
11
|
import type { RubricQuestion } from "./types.js";
|
|
9
12
|
export interface PromptBuilderOptions {
|
|
10
13
|
rubric: RubricQuestion[];
|
|
11
14
|
changeId: string;
|
|
12
15
|
transcriptPath: string;
|
|
13
|
-
diff: string;
|
|
14
16
|
mode: "eval" | "baseline";
|
|
15
17
|
projectGroup: string;
|
|
16
18
|
}
|
|
@@ -2,8 +2,11 @@
|
|
|
2
2
|
* Builds the judge agent's system prompt.
|
|
3
3
|
*
|
|
4
4
|
* The prompt instructs the judge to: do catchup, read the transcript, read the
|
|
5
|
-
* diff, answer each rubric question, write findings to Graphiti
|
|
6
|
-
* only), and output a JSON scorecard.
|
|
5
|
+
* diff itself via jj, answer each rubric question, write findings to Graphiti
|
|
6
|
+
* (eval mode only), and output a JSON scorecard.
|
|
7
|
+
*
|
|
8
|
+
* The diff is NOT embedded in the prompt — the judge reads it via tool calls.
|
|
9
|
+
* This keeps the prompt small regardless of commit size.
|
|
7
10
|
*/
|
|
8
11
|
export function buildJudgePrompt(opts) {
|
|
9
12
|
const questionsBlock = opts.rubric
|
|
@@ -54,13 +57,9 @@ This is the JSONL record of the working agent's session. Read it to understand:
|
|
|
54
57
|
|
|
55
58
|
### Step 3: Read the diff
|
|
56
59
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
\`\`\`
|
|
60
|
-
${opts.diff}
|
|
61
|
-
\`\`\`
|
|
60
|
+
Run \`jj diff -r ${opts.changeId}\` to see what was committed. This is the work being evaluated.
|
|
62
61
|
|
|
63
|
-
|
|
62
|
+
Then read the specific files that were changed to understand the full context — not just the diff lines, but the surrounding code.
|
|
64
63
|
|
|
65
64
|
### Step 4: Answer the evaluation questions
|
|
66
65
|
|
package/hooks/eval-trigger.js
CHANGED
|
@@ -90,17 +90,18 @@ const transcriptPath =
|
|
|
90
90
|
"(transcript unavailable)";
|
|
91
91
|
|
|
92
92
|
// Spawn the judge runner as a detached background process.
|
|
93
|
-
//
|
|
94
|
-
//
|
|
93
|
+
// Spawn a detached node process that calls runJudgeSync (which awaits completion).
|
|
94
|
+
// runJudgeSync keeps the process alive until claude --print finishes and logs the result.
|
|
95
95
|
const judgeScript = `
|
|
96
96
|
import("${resolve(projectRoot, "apps/indusk-mcp/dist/lib/eval/judge-runner.js")}")
|
|
97
|
-
.then(m => m.
|
|
97
|
+
.then(m => m.runJudgeSync({
|
|
98
98
|
projectRoot: ${JSON.stringify(projectRoot)},
|
|
99
99
|
changeId: ${JSON.stringify(changeId)},
|
|
100
100
|
transcriptPath: ${JSON.stringify(transcriptPath)},
|
|
101
101
|
mode: "eval",
|
|
102
102
|
evalEndpoint: ${JSON.stringify(evalConfig.endpoint)},
|
|
103
103
|
}))
|
|
104
|
+
.then(() => process.exit(0))
|
|
104
105
|
.catch(err => {
|
|
105
106
|
const fs = require("fs");
|
|
106
107
|
const path = require("path");
|
|
@@ -115,6 +116,7 @@ import("${resolve(projectRoot, "apps/indusk-mcp/dist/lib/eval/judge-runner.js")}
|
|
|
115
116
|
message: err.message || String(err),
|
|
116
117
|
});
|
|
117
118
|
fs.appendFileSync(logPath, entry + "\\n", "utf8");
|
|
119
|
+
process.exit(1);
|
|
118
120
|
});
|
|
119
121
|
`;
|
|
120
122
|
|