@infinitedusky/indusk-mcp 1.11.7 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/commands/eval.js +9 -0
- package/dist/lib/eval/judge-runner.js +30 -2
- package/dist/lib/eval/types.d.ts +9 -0
- package/dist/server/index.js +36 -13
- package/hooks/eval-trigger.js +61 -9
- package/package.json +1 -1
|
@@ -48,6 +48,12 @@ export async function evalSummary(projectRoot, opts) {
|
|
|
48
48
|
console.info(` ${id.padEnd(20)} ${bar} ${(rate * 100).toFixed(0)}%`);
|
|
49
49
|
}
|
|
50
50
|
console.info(`\nGraphiti writes: ${summary.totalGraphitiWrites}`);
|
|
51
|
+
if (summary.totalCostUsd > 0) {
|
|
52
|
+
console.info(`\nCost:`);
|
|
53
|
+
console.info(` total: $${summary.totalCostUsd.toFixed(2)}`);
|
|
54
|
+
console.info(` per eval: $${(summary.totalCostUsd / scorecards.length).toFixed(2)}`);
|
|
55
|
+
console.info(` tokens: ${summary.totalInputTokens.toLocaleString()} in / ${summary.totalOutputTokens.toLocaleString()} out`);
|
|
56
|
+
}
|
|
51
57
|
if (summary.evalCount >= 10) {
|
|
52
58
|
console.info(`\nTrend (last 10 vs previous 10):`);
|
|
53
59
|
for (const [id, delta] of Object.entries(summary.trend)) {
|
|
@@ -94,6 +100,9 @@ function computeSummary(scorecards) {
|
|
|
94
100
|
baselineCount: baselineCards.length,
|
|
95
101
|
passRates,
|
|
96
102
|
totalGraphitiWrites: scorecards.reduce((sum, s) => sum + s.graphitiWrites, 0),
|
|
103
|
+
totalCostUsd: scorecards.reduce((sum, s) => sum + (s.usage?.costUsd ?? 0), 0),
|
|
104
|
+
totalInputTokens: scorecards.reduce((sum, s) => sum + (s.usage?.inputTokens ?? 0) + (s.usage?.cacheReadTokens ?? 0), 0),
|
|
105
|
+
totalOutputTokens: scorecards.reduce((sum, s) => sum + (s.usage?.outputTokens ?? 0), 0),
|
|
97
106
|
trend,
|
|
98
107
|
};
|
|
99
108
|
}
|
|
@@ -91,12 +91,24 @@ export function runJudgeBackground(opts) {
|
|
|
91
91
|
if (code !== 0) {
|
|
92
92
|
throw new Error(`claude exited with code ${code}: ${stderr.slice(0, 500)}`);
|
|
93
93
|
}
|
|
94
|
-
// --output-format json wraps the result; extract the text content
|
|
94
|
+
// --output-format json wraps the result; extract the text content and usage
|
|
95
95
|
let scorecardText = stdout;
|
|
96
|
+
let usage;
|
|
96
97
|
try {
|
|
97
98
|
const jsonOutput = JSON.parse(stdout);
|
|
98
|
-
// claude --print --output-format json returns { result: string } or similar
|
|
99
99
|
scorecardText = jsonOutput.result ?? jsonOutput.text ?? jsonOutput.content ?? stdout;
|
|
100
|
+
// Capture usage data from claude --print output
|
|
101
|
+
if (jsonOutput.total_cost_usd !== undefined || jsonOutput.usage) {
|
|
102
|
+
const u = jsonOutput.usage ?? {};
|
|
103
|
+
usage = {
|
|
104
|
+
costUsd: jsonOutput.total_cost_usd ?? 0,
|
|
105
|
+
inputTokens: u.input_tokens ?? 0,
|
|
106
|
+
outputTokens: u.output_tokens ?? 0,
|
|
107
|
+
cacheCreationTokens: u.cache_creation_input_tokens ?? 0,
|
|
108
|
+
cacheReadTokens: u.cache_read_input_tokens ?? 0,
|
|
109
|
+
durationMs: jsonOutput.duration_ms ?? 0,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
100
112
|
}
|
|
101
113
|
catch {
|
|
102
114
|
// stdout might be raw JSON scorecard already
|
|
@@ -107,6 +119,8 @@ export function runJudgeBackground(opts) {
|
|
|
107
119
|
scorecardText = jsonMatch[1];
|
|
108
120
|
}
|
|
109
121
|
const scorecard = JSON.parse(scorecardText.trim());
|
|
122
|
+
if (usage)
|
|
123
|
+
scorecard.usage = usage;
|
|
110
124
|
scorecard.telemetryPosted = false;
|
|
111
125
|
if (opts.evalEndpoint) {
|
|
112
126
|
await postTelemetry(opts.evalEndpoint, scorecard);
|
|
@@ -184,9 +198,21 @@ export async function runJudgeSync(opts) {
|
|
|
184
198
|
throw new Error(`claude exited with code ${code}: ${stderr.slice(0, 500)}`);
|
|
185
199
|
}
|
|
186
200
|
let scorecardText = stdout;
|
|
201
|
+
let syncUsage;
|
|
187
202
|
try {
|
|
188
203
|
const jsonOutput = JSON.parse(stdout);
|
|
189
204
|
scorecardText = jsonOutput.result ?? jsonOutput.text ?? jsonOutput.content ?? stdout;
|
|
205
|
+
if (jsonOutput.total_cost_usd !== undefined || jsonOutput.usage) {
|
|
206
|
+
const u = jsonOutput.usage ?? {};
|
|
207
|
+
syncUsage = {
|
|
208
|
+
costUsd: jsonOutput.total_cost_usd ?? 0,
|
|
209
|
+
inputTokens: u.input_tokens ?? 0,
|
|
210
|
+
outputTokens: u.output_tokens ?? 0,
|
|
211
|
+
cacheCreationTokens: u.cache_creation_input_tokens ?? 0,
|
|
212
|
+
cacheReadTokens: u.cache_read_input_tokens ?? 0,
|
|
213
|
+
durationMs: jsonOutput.duration_ms ?? 0,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
190
216
|
}
|
|
191
217
|
catch {
|
|
192
218
|
// raw JSON
|
|
@@ -196,6 +222,8 @@ export async function runJudgeSync(opts) {
|
|
|
196
222
|
scorecardText = jsonMatch[1];
|
|
197
223
|
}
|
|
198
224
|
const scorecard = JSON.parse(scorecardText.trim());
|
|
225
|
+
if (syncUsage)
|
|
226
|
+
scorecard.usage = syncUsage;
|
|
199
227
|
scorecard.telemetryPosted = false;
|
|
200
228
|
if (opts.evalEndpoint) {
|
|
201
229
|
await postTelemetry(opts.evalEndpoint, scorecard);
|
package/dist/lib/eval/types.d.ts
CHANGED
|
@@ -17,6 +17,14 @@ export interface EvalQuestion {
|
|
|
17
17
|
evidence: string;
|
|
18
18
|
finding: string;
|
|
19
19
|
}
|
|
20
|
+
export interface EvalUsage {
|
|
21
|
+
costUsd: number;
|
|
22
|
+
inputTokens: number;
|
|
23
|
+
outputTokens: number;
|
|
24
|
+
cacheCreationTokens: number;
|
|
25
|
+
cacheReadTokens: number;
|
|
26
|
+
durationMs: number;
|
|
27
|
+
}
|
|
20
28
|
export interface EvalScorecard {
|
|
21
29
|
version: 1;
|
|
22
30
|
timestamp: string;
|
|
@@ -27,6 +35,7 @@ export interface EvalScorecard {
|
|
|
27
35
|
summary: string;
|
|
28
36
|
graphitiWrites: number;
|
|
29
37
|
telemetryPosted: boolean;
|
|
38
|
+
usage?: EvalUsage;
|
|
30
39
|
}
|
|
31
40
|
export interface EvalErrorEntry {
|
|
32
41
|
version: 1;
|
package/dist/server/index.js
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
process.on("uncaughtException", (err) => {
|
|
2
|
+
console.error(`[indusk] uncaught exception: ${err.message}`);
|
|
3
|
+
if (err.stack)
|
|
4
|
+
console.error(err.stack);
|
|
5
|
+
process.exit(1);
|
|
6
|
+
});
|
|
7
|
+
process.on("unhandledRejection", (reason) => {
|
|
8
|
+
console.error(`[indusk] unhandled rejection: ${reason}`);
|
|
9
|
+
process.exit(1);
|
|
10
|
+
});
|
|
1
11
|
import { readFileSync } from "node:fs";
|
|
2
12
|
import { dirname, join, resolve } from "node:path";
|
|
3
13
|
import { fileURLToPath } from "node:url";
|
|
@@ -32,19 +42,32 @@ function checkForUpdates(currentVersion) {
|
|
|
32
42
|
export async function startServer() {
|
|
33
43
|
const projectRoot = resolve(process.env.PROJECT_ROOT ?? ".");
|
|
34
44
|
const version = getLocalVersion();
|
|
45
|
+
console.error(`[indusk] v${version} starting (project: ${projectRoot})`);
|
|
35
46
|
// Non-blocking version check
|
|
36
47
|
checkForUpdates(version);
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
try {
|
|
49
|
+
const server = new McpServer({
|
|
50
|
+
name: "indusk",
|
|
51
|
+
version,
|
|
52
|
+
});
|
|
53
|
+
console.error("[indusk] registering tools...");
|
|
54
|
+
registerPlanTools(server, projectRoot);
|
|
55
|
+
registerContextTools(server, projectRoot);
|
|
56
|
+
registerQualityTools(server, projectRoot);
|
|
57
|
+
registerDocumentTools(server, projectRoot);
|
|
58
|
+
registerSystemTools(server, projectRoot);
|
|
59
|
+
registerGraphTools(server, projectRoot);
|
|
60
|
+
registerLessonTools(server, projectRoot);
|
|
61
|
+
console.error("[indusk] tools registered");
|
|
62
|
+
const transport = new StdioServerTransport();
|
|
63
|
+
await server.connect(transport);
|
|
64
|
+
console.error("[indusk] connected via stdio");
|
|
65
|
+
}
|
|
66
|
+
catch (err) {
|
|
67
|
+
console.error(`[indusk] FATAL: ${err instanceof Error ? err.message : String(err)}`);
|
|
68
|
+
if (err instanceof Error && err.stack) {
|
|
69
|
+
console.error(err.stack);
|
|
70
|
+
}
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
50
73
|
}
|
package/hooks/eval-trigger.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
|
|
2
3
|
/**
|
|
3
4
|
* PostToolUse hook: triggers the eval judge after `jj describe`.
|
|
4
5
|
*
|
|
@@ -9,9 +10,9 @@
|
|
|
9
10
|
* Exit 0 always — this is advisory, not blocking.
|
|
10
11
|
*/
|
|
11
12
|
|
|
12
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
13
13
|
import { execSync, spawn } from "node:child_process";
|
|
14
|
-
import {
|
|
14
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
15
|
+
import { dirname, resolve } from "node:path";
|
|
15
16
|
import { fileURLToPath } from "node:url";
|
|
16
17
|
|
|
17
18
|
// Read hook input from stdin
|
|
@@ -85,15 +86,64 @@ try {
|
|
|
85
86
|
// Claude Code provides CLAUDE_TRANSCRIPT_PATH in the environment when hooks run,
|
|
86
87
|
// or we can search for the most recent transcript.
|
|
87
88
|
const transcriptPath =
|
|
88
|
-
process.env.CLAUDE_TRANSCRIPT_PATH ??
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
process.env.CLAUDE_TRANSCRIPT_PATH ?? process.env.TRANSCRIPT_PATH ?? "(transcript unavailable)";
|
|
90
|
+
|
|
91
|
+
// Find the indusk-mcp package — resolve from the hook's own location.
|
|
92
|
+
// The hook lives at .claude/hooks/eval-trigger.js but was copied from the package's hooks/ dir.
|
|
93
|
+
// Try multiple resolution strategies:
|
|
94
|
+
// 1. Relative to the hook's original package location (when run from the package source)
|
|
95
|
+
// 2. Via npx cache / global install
|
|
96
|
+
// 3. Via the project's node_modules
|
|
97
|
+
const hookDir = dirname(fileURLToPath(import.meta.url));
|
|
98
|
+
const candidates = [
|
|
99
|
+
// Source repo (apps/indusk-mcp/hooks/ → apps/indusk-mcp/dist/)
|
|
100
|
+
resolve(hookDir, "../dist/lib/eval/judge-runner.js"),
|
|
101
|
+
// Installed package (hooks/ → dist/)
|
|
102
|
+
resolve(hookDir, "../../node_modules/@infinitedusky/indusk-mcp/dist/lib/eval/judge-runner.js"),
|
|
103
|
+
// Global npx cache
|
|
104
|
+
...(() => {
|
|
105
|
+
try {
|
|
106
|
+
const which = execSync("which indusk", { encoding: "utf8" }).trim();
|
|
107
|
+
if (which)
|
|
108
|
+
return [
|
|
109
|
+
resolve(
|
|
110
|
+
dirname(which),
|
|
111
|
+
"../lib/node_modules/@infinitedusky/indusk-mcp/dist/lib/eval/judge-runner.js",
|
|
112
|
+
),
|
|
113
|
+
];
|
|
114
|
+
} catch {}
|
|
115
|
+
return [];
|
|
116
|
+
})(),
|
|
117
|
+
];
|
|
118
|
+
let judgeRunnerPath = null;
|
|
119
|
+
for (const c of candidates) {
|
|
120
|
+
if (existsSync(c)) {
|
|
121
|
+
judgeRunnerPath = c;
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (!judgeRunnerPath) {
|
|
127
|
+
// Can't find the package — log error and exit
|
|
128
|
+
const { mkdirSync, appendFileSync } = await import("node:fs");
|
|
129
|
+
const logPath = resolve(projectRoot, ".indusk", "eval", "results.log");
|
|
130
|
+
mkdirSync(dirname(logPath), { recursive: true });
|
|
131
|
+
const entry = JSON.stringify({
|
|
132
|
+
version: 1,
|
|
133
|
+
timestamp: new Date().toISOString(),
|
|
134
|
+
mode: "eval",
|
|
135
|
+
changeId,
|
|
136
|
+
error: true,
|
|
137
|
+
message:
|
|
138
|
+
"Could not find @infinitedusky/indusk-mcp package — eval judge not available. Run: npm i -g @infinitedusky/indusk-mcp",
|
|
139
|
+
});
|
|
140
|
+
appendFileSync(logPath, entry + "\n", "utf8");
|
|
141
|
+
process.exit(0);
|
|
142
|
+
}
|
|
91
143
|
|
|
92
|
-
// Spawn the judge runner as a detached background process.
|
|
93
144
|
// Spawn a detached node process that calls runJudgeSync (which awaits completion).
|
|
94
|
-
// runJudgeSync keeps the process alive until claude --print finishes and logs the result.
|
|
95
145
|
const judgeScript = `
|
|
96
|
-
import("${
|
|
146
|
+
import("${judgeRunnerPath}")
|
|
97
147
|
.then(m => m.runJudgeSync({
|
|
98
148
|
projectRoot: ${JSON.stringify(projectRoot)},
|
|
99
149
|
changeId: ${JSON.stringify(changeId)},
|
|
@@ -137,6 +187,8 @@ const output = JSON.stringify({
|
|
|
137
187
|
},
|
|
138
188
|
});
|
|
139
189
|
process.stdout.write(output);
|
|
140
|
-
process.stderr.write(
|
|
190
|
+
process.stderr.write(
|
|
191
|
+
`📊 Eval judge spawned in background for ${changeId.slice(0, 8)}. Results will appear in .indusk/eval/results.log\n`,
|
|
192
|
+
);
|
|
141
193
|
|
|
142
194
|
process.exit(0);
|