@infinitedusky/indusk-mcp 1.10.2 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cli.js +121 -0
- package/dist/bin/commands/eval.d.ts +15 -0
- package/dist/bin/commands/eval.js +225 -0
- package/dist/bin/commands/init.d.ts +1 -0
- package/dist/bin/commands/init.js +26 -14
- package/dist/bin/commands/update.js +5 -1
- package/dist/lib/config.d.ts +18 -2
- package/dist/lib/config.js +21 -3
- package/dist/lib/eval/judge-runner.d.ts +28 -0
- package/dist/lib/eval/judge-runner.js +230 -0
- package/dist/lib/eval/log-reader.d.ts +15 -0
- package/dist/lib/eval/log-reader.js +53 -0
- package/dist/lib/eval/log-writer.d.ts +14 -0
- package/dist/lib/eval/log-writer.js +27 -0
- package/dist/lib/eval/prompt-builder.d.ts +17 -0
- package/dist/lib/eval/prompt-builder.js +107 -0
- package/dist/lib/eval/rubric.d.ts +8 -0
- package/dist/lib/eval/rubric.js +28 -0
- package/dist/lib/eval/types.d.ts +41 -0
- package/dist/lib/eval/types.js +12 -0
- package/dist/lib/semantic-graph/adapter.d.ts +55 -0
- package/dist/lib/semantic-graph/adapter.js +12 -0
- package/dist/lib/semantic-graph/adapters/cgc.d.ts +28 -0
- package/dist/lib/semantic-graph/adapters/cgc.js +163 -0
- package/dist/lib/semantic-graph/events.d.ts +156 -0
- package/dist/lib/semantic-graph/events.js +93 -0
- package/dist/lib/semantic-graph/graphiti-log-wrapper.d.ts +39 -0
- package/dist/lib/semantic-graph/graphiti-log-wrapper.js +113 -0
- package/dist/lib/semantic-graph/index.d.ts +14 -0
- package/dist/lib/semantic-graph/index.js +13 -0
- package/dist/lib/semantic-graph/jj.d.ts +42 -0
- package/dist/lib/semantic-graph/jj.js +98 -0
- package/dist/lib/semantic-graph/log-reader.d.ts +22 -0
- package/dist/lib/semantic-graph/log-reader.js +50 -0
- package/dist/lib/semantic-graph/log-writer.d.ts +24 -0
- package/dist/lib/semantic-graph/log-writer.js +38 -0
- package/dist/lib/semantic-graph/paths.d.ts +13 -0
- package/dist/lib/semantic-graph/paths.js +18 -0
- package/dist/lib/semantic-graph/replay.d.ts +52 -0
- package/dist/lib/semantic-graph/replay.js +58 -0
- package/dist/lib/semantic-graph/runtime-client.d.ts +58 -0
- package/dist/lib/semantic-graph/runtime-client.js +208 -0
- package/dist/lib/semantic-graph/sync-engine.d.ts +33 -0
- package/dist/lib/semantic-graph/sync-engine.js +166 -0
- package/dist/tools/graph-tools.js +187 -1
- package/extensions/sigma/manifest.json +6 -0
- package/extensions/sigma/skill.md +290 -0
- package/hooks/eval-trigger.js +140 -0
- package/package.json +2 -1
- package/skills/eval-review.md +37 -0
- package/skills/planner.md +2 -2
- package/skills/retrospective.md +1 -1
- package/skills/work.md +2 -2
package/dist/bin/cli.js
CHANGED
|
@@ -131,6 +131,93 @@ infra
|
|
|
131
131
|
const { infraStatus } = await import("./commands/infra.js");
|
|
132
132
|
await infraStatus();
|
|
133
133
|
});
|
|
134
|
+
const graph = program
|
|
135
|
+
.command("graph")
|
|
136
|
+
.description("Manage the semantic graph (sync, rebuild, status)");
|
|
137
|
+
graph
|
|
138
|
+
.command("sync")
|
|
139
|
+
.description("Sync CGC structural data into the semantic graph")
|
|
140
|
+
.action(async () => {
|
|
141
|
+
const { basename } = await import("node:path");
|
|
142
|
+
const { CgcAdapter } = await import("../lib/semantic-graph/adapters/cgc.js");
|
|
143
|
+
const { LogWriter } = await import("../lib/semantic-graph/log-writer.js");
|
|
144
|
+
const { getLogPath } = await import("../lib/semantic-graph/paths.js");
|
|
145
|
+
const { SemanticGraphClient } = await import("../lib/semantic-graph/runtime-client.js");
|
|
146
|
+
const { runSync } = await import("../lib/semantic-graph/sync-engine.js");
|
|
147
|
+
const projectRoot = process.cwd();
|
|
148
|
+
const projectName = basename(projectRoot);
|
|
149
|
+
const adapter = new CgcAdapter();
|
|
150
|
+
const logWriter = new LogWriter(getLogPath(projectRoot));
|
|
151
|
+
const client = new SemanticGraphClient(projectName);
|
|
152
|
+
await client.ensureConnection();
|
|
153
|
+
console.info("Syncing semantic graph...");
|
|
154
|
+
const result = await runSync(adapter, projectRoot, logWriter, client);
|
|
155
|
+
await client.close();
|
|
156
|
+
console.info(`Created: ${result.created}, Moved: ${result.moved}, Tombstoned: ${result.tombstoned}, Edges: ${result.edges_attached}, Unchanged: ${result.unchanged}`);
|
|
157
|
+
console.info(`Duration: ${result.duration_ms}ms`);
|
|
158
|
+
});
|
|
159
|
+
graph
|
|
160
|
+
.command("rebuild")
|
|
161
|
+
.description("Clear and rebuild the semantic graph runtime from the event log")
|
|
162
|
+
.action(async () => {
|
|
163
|
+
const { basename } = await import("node:path");
|
|
164
|
+
const { getLogPath } = await import("../lib/semantic-graph/paths.js");
|
|
165
|
+
const { replay } = await import("../lib/semantic-graph/replay.js");
|
|
166
|
+
const { SemanticGraphClient } = await import("../lib/semantic-graph/runtime-client.js");
|
|
167
|
+
const projectRoot = process.cwd();
|
|
168
|
+
const projectName = basename(projectRoot);
|
|
169
|
+
const logPath = getLogPath(projectRoot);
|
|
170
|
+
const client = new SemanticGraphClient(projectName);
|
|
171
|
+
await client.ensureConnection();
|
|
172
|
+
console.info("Clearing runtime...");
|
|
173
|
+
await client.clearGraph();
|
|
174
|
+
await client.close();
|
|
175
|
+
const freshClient = new SemanticGraphClient(projectName);
|
|
176
|
+
await freshClient.ensureConnection();
|
|
177
|
+
console.info("Replaying log...");
|
|
178
|
+
const result = await replay(logPath, freshClient);
|
|
179
|
+
await freshClient.close();
|
|
180
|
+
console.info(`Total: ${result.total}, Applied: ${result.applied}, Skipped: ${result.skipped}, Errors: ${result.errors}`);
|
|
181
|
+
});
|
|
182
|
+
graph
|
|
183
|
+
.command("status")
|
|
184
|
+
.description("Show semantic graph status")
|
|
185
|
+
.action(async () => {
|
|
186
|
+
const { basename } = await import("node:path");
|
|
187
|
+
const { existsSync, statSync } = await import("node:fs");
|
|
188
|
+
const { getLogPath } = await import("../lib/semantic-graph/paths.js");
|
|
189
|
+
const { readAllEvents } = await import("../lib/semantic-graph/log-reader.js");
|
|
190
|
+
const { SemanticGraphClient } = await import("../lib/semantic-graph/runtime-client.js");
|
|
191
|
+
const projectRoot = process.cwd();
|
|
192
|
+
const projectName = basename(projectRoot);
|
|
193
|
+
const logPath = getLogPath(projectRoot);
|
|
194
|
+
console.info(`Project: ${projectName}`);
|
|
195
|
+
console.info(`Log: ${logPath}`);
|
|
196
|
+
if (existsSync(logPath)) {
|
|
197
|
+
const stat = statSync(logPath);
|
|
198
|
+
const events = await readAllEvents(logPath);
|
|
199
|
+
console.info(` Events: ${events.length}`);
|
|
200
|
+
console.info(` Size: ${(stat.size / 1024).toFixed(1)}KB`);
|
|
201
|
+
const lastSync = [...events].reverse().find((e) => e.type === "sync.completed");
|
|
202
|
+
if (lastSync) {
|
|
203
|
+
console.info(` Last sync: ${lastSync.ts}`);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
console.info(" (no log file — run 'indusk graph sync' first)");
|
|
208
|
+
}
|
|
209
|
+
try {
|
|
210
|
+
const client = new SemanticGraphClient(projectName);
|
|
211
|
+
await client.ensureConnection();
|
|
212
|
+
const anchors = await client.countAnchors();
|
|
213
|
+
const edges = await client.countEdges();
|
|
214
|
+
await client.close();
|
|
215
|
+
console.info(`Runtime: ${anchors} anchors, ${edges} edges`);
|
|
216
|
+
}
|
|
217
|
+
catch {
|
|
218
|
+
console.info("Runtime: FalkorDB not available");
|
|
219
|
+
}
|
|
220
|
+
});
|
|
134
221
|
program
|
|
135
222
|
.command("pr-clean")
|
|
136
223
|
.description("Strip InDusk settings overlay before a PR")
|
|
@@ -147,6 +234,40 @@ program
|
|
|
147
234
|
applyOverlay(process.cwd());
|
|
148
235
|
console.info("Re-applied InDusk overlay to .claude/settings.json");
|
|
149
236
|
});
|
|
237
|
+
program
|
|
238
|
+
.command("install <names...>")
|
|
239
|
+
.description("Install extensions (shorthand for extensions enable / add)")
|
|
240
|
+
.option("--from <source>", "Source for third-party extension (npm:pkg, github:user/repo, URL, or path)")
|
|
241
|
+
.action(async (names, opts) => {
|
|
242
|
+
if (opts.from) {
|
|
243
|
+
const { extensionsAdd } = await import("./commands/extensions.js");
|
|
244
|
+
await extensionsAdd(process.cwd(), names[0], opts.from);
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
const { extensionsEnable } = await import("./commands/extensions.js");
|
|
248
|
+
await extensionsEnable(process.cwd(), names);
|
|
249
|
+
}
|
|
250
|
+
});
|
|
251
|
+
const eval_ = program.command("eval").description("Context evaluation and quality scoring");
|
|
252
|
+
eval_
|
|
253
|
+
.command("summary")
|
|
254
|
+
.description("Aggregate eval scores and trends")
|
|
255
|
+
.option("--mode <mode>", "Filter by mode (eval, baseline)")
|
|
256
|
+
.option("--since <date>", "Show results since date")
|
|
257
|
+
.option("--json", "Output as JSON")
|
|
258
|
+
.action(async (opts) => {
|
|
259
|
+
const { evalSummary } = await import("./commands/eval.js");
|
|
260
|
+
await evalSummary(process.cwd(), opts);
|
|
261
|
+
});
|
|
262
|
+
eval_
|
|
263
|
+
.command("baseline")
|
|
264
|
+
.description("Run baseline evaluation with vanilla agent")
|
|
265
|
+
.requiredOption("--task <path>", "Path to task prompt file")
|
|
266
|
+
.option("--keep", "Keep baseline worktree after eval")
|
|
267
|
+
.action(async (opts) => {
|
|
268
|
+
const { evalBaseline } = await import("./commands/eval.js");
|
|
269
|
+
await evalBaseline(process.cwd(), opts);
|
|
270
|
+
});
|
|
150
271
|
program
|
|
151
272
|
.command("serve")
|
|
152
273
|
.description("Start the MCP server (used by Claude Code via .mcp.json)")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI commands for the eval system.
|
|
3
|
+
*
|
|
4
|
+
* `indusk eval summary` — aggregate scores and trends
|
|
5
|
+
* `indusk eval baseline` — run baseline evaluation with vanilla agent
|
|
6
|
+
*/
|
|
7
|
+
export declare function evalSummary(projectRoot: string, opts: {
|
|
8
|
+
mode?: string;
|
|
9
|
+
since?: string;
|
|
10
|
+
json?: boolean;
|
|
11
|
+
}): Promise<void>;
|
|
12
|
+
export declare function evalBaseline(projectRoot: string, opts: {
|
|
13
|
+
task: string;
|
|
14
|
+
keep?: boolean;
|
|
15
|
+
}): Promise<void>;
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI commands for the eval system.
|
|
3
|
+
*
|
|
4
|
+
* `indusk eval summary` — aggregate scores and trends
|
|
5
|
+
* `indusk eval baseline` — run baseline evaluation with vanilla agent
|
|
6
|
+
*/
|
|
7
|
+
import { existsSync } from "node:fs";
|
|
8
|
+
import { join } from "node:path";
|
|
9
|
+
import { readAllEntries } from "../../lib/eval/log-reader.js";
|
|
10
|
+
import { isScorecard } from "../../lib/eval/types.js";
|
|
11
|
+
function getEvalLogPath(projectRoot) {
|
|
12
|
+
return join(projectRoot, ".indusk", "eval", "results.log");
|
|
13
|
+
}
|
|
14
|
+
export async function evalSummary(projectRoot, opts) {
|
|
15
|
+
const logPath = getEvalLogPath(projectRoot);
|
|
16
|
+
if (!existsSync(logPath)) {
|
|
17
|
+
console.info("No eval results yet. Results appear after jj describe triggers the eval hook.");
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
const filterOpts = {};
|
|
21
|
+
if (opts.mode === "eval" || opts.mode === "baseline")
|
|
22
|
+
filterOpts.mode = opts.mode;
|
|
23
|
+
if (opts.since)
|
|
24
|
+
filterOpts.since = new Date(opts.since);
|
|
25
|
+
const entries = await readAllEntries(logPath, filterOpts);
|
|
26
|
+
const scorecards = entries.filter(isScorecard);
|
|
27
|
+
const errors = entries.filter((e) => !isScorecard(e));
|
|
28
|
+
if (entries.length === 0) {
|
|
29
|
+
console.info("No eval results match the filter criteria.");
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
if (opts.json) {
|
|
33
|
+
console.info(JSON.stringify({ scorecards, errors, summary: computeSummary(scorecards) }, null, 2));
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
// Text output
|
|
37
|
+
const summary = computeSummary(scorecards);
|
|
38
|
+
console.info(`\n📊 Eval Summary`);
|
|
39
|
+
console.info(`${"─".repeat(50)}`);
|
|
40
|
+
console.info(`Total evaluations: ${entries.length} (${scorecards.length} scorecards, ${errors.length} errors)`);
|
|
41
|
+
if (scorecards.length > 0) {
|
|
42
|
+
console.info(`\nMode breakdown:`);
|
|
43
|
+
console.info(` eval: ${summary.evalCount}`);
|
|
44
|
+
console.info(` baseline: ${summary.baselineCount}`);
|
|
45
|
+
console.info(`\nPer-question pass rates:`);
|
|
46
|
+
for (const [id, rate] of Object.entries(summary.passRates)) {
|
|
47
|
+
const bar = "█".repeat(Math.round(rate * 20)) + "░".repeat(20 - Math.round(rate * 20));
|
|
48
|
+
console.info(` ${id.padEnd(20)} ${bar} ${(rate * 100).toFixed(0)}%`);
|
|
49
|
+
}
|
|
50
|
+
console.info(`\nGraphiti writes: ${summary.totalGraphitiWrites}`);
|
|
51
|
+
if (summary.evalCount >= 10) {
|
|
52
|
+
console.info(`\nTrend (last 10 vs previous 10):`);
|
|
53
|
+
for (const [id, delta] of Object.entries(summary.trend)) {
|
|
54
|
+
const arrow = delta > 0 ? "↑" : delta < 0 ? "↓" : "→";
|
|
55
|
+
console.info(` ${id.padEnd(20)} ${arrow} ${delta > 0 ? "+" : ""}${(delta * 100).toFixed(0)}%`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
console.info("");
|
|
60
|
+
}
|
|
61
|
+
function computeSummary(scorecards) {
|
|
62
|
+
const evalCards = scorecards.filter((s) => s.mode === "eval");
|
|
63
|
+
const baselineCards = scorecards.filter((s) => s.mode === "baseline");
|
|
64
|
+
// Pass rates per question
|
|
65
|
+
const passRates = {};
|
|
66
|
+
const questionCounts = {};
|
|
67
|
+
for (const card of scorecards) {
|
|
68
|
+
for (const q of card.questions) {
|
|
69
|
+
if (!questionCounts[q.id])
|
|
70
|
+
questionCounts[q.id] = { pass: 0, total: 0 };
|
|
71
|
+
questionCounts[q.id].total++;
|
|
72
|
+
if (q.answer === "yes")
|
|
73
|
+
questionCounts[q.id].pass++;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
for (const [id, counts] of Object.entries(questionCounts)) {
|
|
77
|
+
passRates[id] = counts.total > 0 ? counts.pass / counts.total : 0;
|
|
78
|
+
}
|
|
79
|
+
// Trend: compare last 10 eval scorecards vs previous 10
|
|
80
|
+
const trend = {};
|
|
81
|
+
if (evalCards.length >= 10) {
|
|
82
|
+
const recent = evalCards.slice(-10);
|
|
83
|
+
const previous = evalCards.slice(-20, -10);
|
|
84
|
+
if (previous.length >= 5) {
|
|
85
|
+
const recentRates = computePassRates(recent);
|
|
86
|
+
const previousRates = computePassRates(previous);
|
|
87
|
+
for (const id of Object.keys(recentRates)) {
|
|
88
|
+
trend[id] = (recentRates[id] ?? 0) - (previousRates[id] ?? 0);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
evalCount: evalCards.length,
|
|
94
|
+
baselineCount: baselineCards.length,
|
|
95
|
+
passRates,
|
|
96
|
+
totalGraphitiWrites: scorecards.reduce((sum, s) => sum + s.graphitiWrites, 0),
|
|
97
|
+
trend,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
function computePassRates(cards) {
|
|
101
|
+
const counts = {};
|
|
102
|
+
for (const card of cards) {
|
|
103
|
+
for (const q of card.questions) {
|
|
104
|
+
if (!counts[q.id])
|
|
105
|
+
counts[q.id] = { pass: 0, total: 0 };
|
|
106
|
+
counts[q.id].total++;
|
|
107
|
+
if (q.answer === "yes")
|
|
108
|
+
counts[q.id].pass++;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
const rates = {};
|
|
112
|
+
for (const [id, c] of Object.entries(counts)) {
|
|
113
|
+
rates[id] = c.total > 0 ? c.pass / c.total : 0;
|
|
114
|
+
}
|
|
115
|
+
return rates;
|
|
116
|
+
}
|
|
117
|
+
export async function evalBaseline(projectRoot, opts) {
|
|
118
|
+
const { execSync, spawnSync } = await import("node:child_process");
|
|
119
|
+
const { rmSync, writeFileSync } = await import("node:fs");
|
|
120
|
+
const { basename } = await import("node:path");
|
|
121
|
+
const taskPath = opts.task;
|
|
122
|
+
if (!existsSync(taskPath)) {
|
|
123
|
+
console.error(`Task file not found: ${taskPath}`);
|
|
124
|
+
process.exit(1);
|
|
125
|
+
}
|
|
126
|
+
const { readFileSync } = await import("node:fs");
|
|
127
|
+
const taskPrompt = readFileSync(taskPath, "utf-8");
|
|
128
|
+
const taskName = basename(taskPath, ".md").replace(/\s+/g, "-");
|
|
129
|
+
const worktreePath = join(projectRoot, ".indusk", "eval", "baseline-worktree");
|
|
130
|
+
console.info("Setting up baseline worktree...");
|
|
131
|
+
// Create worktree from current HEAD
|
|
132
|
+
try {
|
|
133
|
+
execSync(`git worktree add "${worktreePath}" HEAD --detach`, {
|
|
134
|
+
cwd: projectRoot,
|
|
135
|
+
stdio: "pipe",
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
// Worktree may already exist
|
|
140
|
+
if (existsSync(worktreePath)) {
|
|
141
|
+
console.info("Baseline worktree already exists, reusing...");
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
console.error("Failed to create baseline worktree");
|
|
145
|
+
process.exit(1);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// Strip the worktree for vanilla agent
|
|
149
|
+
console.info("Stripping worktree for baseline agent...");
|
|
150
|
+
const skillsPath = join(worktreePath, ".claude", "skills");
|
|
151
|
+
if (existsSync(skillsPath)) {
|
|
152
|
+
rmSync(skillsPath, { recursive: true, force: true });
|
|
153
|
+
}
|
|
154
|
+
// Write minimal CLAUDE.md
|
|
155
|
+
const projectName = basename(projectRoot);
|
|
156
|
+
writeFileSync(join(worktreePath, "CLAUDE.md"), `# ${projectName}\n\nThis is a software project. No special instructions.\n`);
|
|
157
|
+
// Write empty .mcp.json
|
|
158
|
+
writeFileSync(join(worktreePath, ".mcp.json"), JSON.stringify({ mcpServers: {} }, null, 2));
|
|
159
|
+
console.info(`Running baseline agent with task: ${taskName}...`);
|
|
160
|
+
// Spawn vanilla claude --print in the baseline worktree
|
|
161
|
+
const result = spawnSync("claude", ["--print", "--model", "opus", taskPrompt], {
|
|
162
|
+
cwd: worktreePath,
|
|
163
|
+
stdio: "inherit",
|
|
164
|
+
timeout: 10 * 60 * 1000, // 10 minutes
|
|
165
|
+
env: { ...process.env },
|
|
166
|
+
});
|
|
167
|
+
if (result.status !== 0) {
|
|
168
|
+
console.error(`Baseline agent exited with code ${result.status}`);
|
|
169
|
+
}
|
|
170
|
+
// Commit the baseline work
|
|
171
|
+
console.info("Committing baseline work...");
|
|
172
|
+
try {
|
|
173
|
+
execSync("jj new", { cwd: worktreePath, stdio: "pipe" });
|
|
174
|
+
execSync(`jj describe -m "baseline: ${taskName}"`, { cwd: worktreePath, stdio: "pipe" });
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
console.info("Note: jj commit may have failed — evaluating current state anyway");
|
|
178
|
+
}
|
|
179
|
+
// Run the smart evaluator against the baseline
|
|
180
|
+
console.info("Running smart evaluator against baseline...");
|
|
181
|
+
const { runJudgeSync } = await import("../../lib/eval/judge-runner.js");
|
|
182
|
+
let changeId;
|
|
183
|
+
try {
|
|
184
|
+
changeId = execSync("jj log -r @ --no-graph -T change_id", {
|
|
185
|
+
cwd: worktreePath,
|
|
186
|
+
encoding: "utf8",
|
|
187
|
+
}).trim();
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
changeId = "baseline-unknown";
|
|
191
|
+
}
|
|
192
|
+
const evalResult = await runJudgeSync({
|
|
193
|
+
projectRoot: worktreePath,
|
|
194
|
+
changeId,
|
|
195
|
+
transcriptPath: "(baseline — no transcript)",
|
|
196
|
+
mode: "baseline",
|
|
197
|
+
});
|
|
198
|
+
if ("error" in evalResult) {
|
|
199
|
+
console.error(`Eval failed: ${evalResult.message}`);
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
console.info(`\nBaseline scorecard for ${taskName}:`);
|
|
203
|
+
console.info(` Summary: ${evalResult.summary}`);
|
|
204
|
+
for (const q of evalResult.questions) {
|
|
205
|
+
const icon = q.answer === "yes" ? "✓" : q.answer === "no" ? "✗" : "~";
|
|
206
|
+
console.info(` ${icon} ${q.id}: ${q.finding}`);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
// Cleanup
|
|
210
|
+
if (!opts.keep) {
|
|
211
|
+
console.info("Cleaning up baseline worktree...");
|
|
212
|
+
try {
|
|
213
|
+
execSync(`git worktree remove "${worktreePath}" --force`, {
|
|
214
|
+
cwd: projectRoot,
|
|
215
|
+
stdio: "pipe",
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
catch {
|
|
219
|
+
console.info("Note: worktree cleanup may need manual removal");
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
else {
|
|
223
|
+
console.info(`Baseline worktree kept at: ${worktreePath}`);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
@@ -17,22 +17,34 @@ function run(cmd, options) {
|
|
|
17
17
|
return "";
|
|
18
18
|
}
|
|
19
19
|
}
|
|
20
|
-
|
|
20
|
+
/** Paths that should be gitignored in ALL modes (full + local). */
|
|
21
|
+
const GITIGNORE_ENTRIES = [
|
|
22
|
+
{ comment: "# MCP config (contains auth tokens)", pattern: ".mcp.json" },
|
|
23
|
+
{ comment: "# Session-specific handoff (not project knowledge)", pattern: ".claude/handoff.md" },
|
|
24
|
+
{ comment: "# Semantic graph event log (large, local-only)", pattern: ".indusk/graph/" },
|
|
25
|
+
{ comment: "# Eval results (local-only)", pattern: ".indusk/eval/" },
|
|
26
|
+
{ comment: "# Extension manifests are package-owned; env files contain secrets", pattern: ".indusk/extensions/" },
|
|
27
|
+
];
|
|
28
|
+
const GITIGNORE_MARKER = "# InDusk managed";
|
|
29
|
+
export function ensureGitignore(projectRoot) {
|
|
21
30
|
const gitignorePath = join(projectRoot, ".gitignore");
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
}
|
|
29
|
-
writeFileSync(gitignorePath, `${content.trimEnd()}\n\n${entries.join("\n")}\n`);
|
|
30
|
-
console.info(" updated: .gitignore (added .mcp.json)");
|
|
31
|
-
}
|
|
32
|
-
else {
|
|
33
|
-
writeFileSync(gitignorePath, `${entries.join("\n")}\n`);
|
|
34
|
-
console.info(" create: .gitignore (with .mcp.json)");
|
|
31
|
+
const content = existsSync(gitignorePath) ? readFileSync(gitignorePath, "utf-8") : "";
|
|
32
|
+
// Collect entries that are missing from the current .gitignore
|
|
33
|
+
const missing = GITIGNORE_ENTRIES.filter((e) => !content.includes(e.pattern));
|
|
34
|
+
if (missing.length === 0) {
|
|
35
|
+
console.info(" skip: .gitignore (all InDusk entries present)");
|
|
36
|
+
return;
|
|
35
37
|
}
|
|
38
|
+
// Build the block to append
|
|
39
|
+
const block = [
|
|
40
|
+
"",
|
|
41
|
+
GITIGNORE_MARKER,
|
|
42
|
+
...missing.flatMap((e) => [e.comment, e.pattern]),
|
|
43
|
+
"",
|
|
44
|
+
].join("\n");
|
|
45
|
+
writeFileSync(gitignorePath, `${content.trimEnd()}${block}`);
|
|
46
|
+
const verb = content.length > 0 ? "updated" : "created";
|
|
47
|
+
console.info(` ${verb}: .gitignore (added ${missing.map((e) => e.pattern).join(", ")})`);
|
|
36
48
|
}
|
|
37
49
|
function createCgcIgnore(projectRoot) {
|
|
38
50
|
const ignorePath = join(projectRoot, ".cgcignore");
|
|
@@ -323,7 +323,11 @@ export async function update(projectRoot) {
|
|
|
323
323
|
catch {
|
|
324
324
|
console.info(" could not check third-party extensions");
|
|
325
325
|
}
|
|
326
|
-
// 8.
|
|
326
|
+
// 8. Ensure .gitignore has all required entries
|
|
327
|
+
console.info("\n[Git Ignores]\n");
|
|
328
|
+
const { ensureGitignore } = await import("./init.js");
|
|
329
|
+
ensureGitignore(projectRoot);
|
|
330
|
+
// 9. Respect local mode: re-apply overlay, refresh excludes
|
|
327
331
|
const { readConfig } = await import("../../lib/config.js");
|
|
328
332
|
const config = readConfig(projectRoot);
|
|
329
333
|
if (config?.mode === "local") {
|
package/dist/lib/config.d.ts
CHANGED
|
@@ -44,12 +44,28 @@ export declare function getConfigPath(projectRoot: string): string;
|
|
|
44
44
|
export declare function readConfig(projectRoot: string): InduskConfig | null;
|
|
45
45
|
export declare function writeConfig(projectRoot: string, config: InduskConfig): void;
|
|
46
46
|
export declare function getPlanningDir(projectRoot: string): string;
|
|
47
|
+
/**
|
|
48
|
+
* Sanitize a string into a valid Graphiti group id.
|
|
49
|
+
*
|
|
50
|
+
* Graphiti uses RediSearch under the hood, which treats `-` as a token separator.
|
|
51
|
+
* A query like `chitin-sportsbook` parses as "find chitin, exclude sportsbook" and
|
|
52
|
+
* fails with `Syntax error at offset N near chitin`. Anything that isn't
|
|
53
|
+
* `[A-Za-z0-9_]` gets replaced with `_`. Multiple separators collapse to one.
|
|
54
|
+
*
|
|
55
|
+
* Examples:
|
|
56
|
+
* "chitin-sportsbook" → "chitin_sportsbook"
|
|
57
|
+
* "my.cool.project" → "my_cool_project"
|
|
58
|
+
* "@scope/pkg" → "scope_pkg"
|
|
59
|
+
* "indusk_already_ok" → "indusk_already_ok" (no change)
|
|
60
|
+
*/
|
|
61
|
+
export declare function sanitizeGroupId(raw: string): string;
|
|
47
62
|
/**
|
|
48
63
|
* Get the Graphiti group id for project-specific episodes.
|
|
49
64
|
*
|
|
50
65
|
* Resolution order:
|
|
51
|
-
* 1. .indusk/config.json `graphiti.groupId` if set
|
|
52
|
-
*
|
|
66
|
+
* 1. .indusk/config.json `graphiti.groupId` if set (used as-is, not sanitized —
|
|
67
|
+
* explicit overrides are trusted; if you set a hyphenated id, that's on you)
|
|
68
|
+
* 2. Sanitized project directory basename (`-` → `_`, etc., for RediSearch safety)
|
|
53
69
|
*
|
|
54
70
|
* Use `[getProjectGroupId(root), "shared"]` as the default group_ids list when
|
|
55
71
|
* searching Graphiti — this gives both project-scoped and cross-project knowledge.
|
package/dist/lib/config.js
CHANGED
|
@@ -26,12 +26,30 @@ export function getPlanningDir(projectRoot) {
|
|
|
26
26
|
// Default to new path (will be created by init)
|
|
27
27
|
return newPath;
|
|
28
28
|
}
|
|
29
|
+
/**
|
|
30
|
+
* Sanitize a string into a valid Graphiti group id.
|
|
31
|
+
*
|
|
32
|
+
* Graphiti uses RediSearch under the hood, which treats `-` as a token separator.
|
|
33
|
+
* A query like `chitin-sportsbook` parses as "find chitin, exclude sportsbook" and
|
|
34
|
+
* fails with `Syntax error at offset N near chitin`. Anything that isn't
|
|
35
|
+
* `[A-Za-z0-9_]` gets replaced with `_`. Multiple separators collapse to one.
|
|
36
|
+
*
|
|
37
|
+
* Examples:
|
|
38
|
+
* "chitin-sportsbook" → "chitin_sportsbook"
|
|
39
|
+
* "my.cool.project" → "my_cool_project"
|
|
40
|
+
* "@scope/pkg" → "scope_pkg"
|
|
41
|
+
* "indusk_already_ok" → "indusk_already_ok" (no change)
|
|
42
|
+
*/
|
|
43
|
+
export function sanitizeGroupId(raw) {
|
|
44
|
+
return raw.replace(/[^A-Za-z0-9_]+/g, "_").replace(/^_+|_+$/g, "");
|
|
45
|
+
}
|
|
29
46
|
/**
|
|
30
47
|
* Get the Graphiti group id for project-specific episodes.
|
|
31
48
|
*
|
|
32
49
|
* Resolution order:
|
|
33
|
-
* 1. .indusk/config.json `graphiti.groupId` if set
|
|
34
|
-
*
|
|
50
|
+
* 1. .indusk/config.json `graphiti.groupId` if set (used as-is, not sanitized —
|
|
51
|
+
* explicit overrides are trusted; if you set a hyphenated id, that's on you)
|
|
52
|
+
* 2. Sanitized project directory basename (`-` → `_`, etc., for RediSearch safety)
|
|
35
53
|
*
|
|
36
54
|
* Use `[getProjectGroupId(root), "shared"]` as the default group_ids list when
|
|
37
55
|
* searching Graphiti — this gives both project-scoped and cross-project knowledge.
|
|
@@ -40,7 +58,7 @@ export function getProjectGroupId(projectRoot) {
|
|
|
40
58
|
const config = readConfig(projectRoot);
|
|
41
59
|
if (config?.graphiti?.groupId)
|
|
42
60
|
return config.graphiti.groupId;
|
|
43
|
-
return basename(projectRoot);
|
|
61
|
+
return sanitizeGroupId(basename(projectRoot));
|
|
44
62
|
}
|
|
45
63
|
/**
|
|
46
64
|
* Whether the OTel gate should fire for this project.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Judge runner — spawns a background `claude --print` process that evaluates
|
|
3
|
+
* a commit and writes results to the eval log.
|
|
4
|
+
*
|
|
5
|
+
* The judge is a detached child process so the calling hook can exit immediately.
|
|
6
|
+
* Results appear asynchronously in `.indusk/eval/results.log`.
|
|
7
|
+
*/
|
|
8
|
+
import type { EvalErrorEntry, EvalScorecard } from "./types.js";
|
|
9
|
+
export interface JudgeRunOptions {
|
|
10
|
+
projectRoot: string;
|
|
11
|
+
changeId: string;
|
|
12
|
+
transcriptPath: string;
|
|
13
|
+
mode: "eval" | "baseline";
|
|
14
|
+
evalEndpoint?: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Run the judge as a detached background process.
|
|
18
|
+
*
|
|
19
|
+
* Spawns `claude --print` with the judge prompt and allowed tools whitelist.
|
|
20
|
+
* Collects stdout, parses the scorecard JSON, and appends to the eval log.
|
|
21
|
+
* If anything fails, logs an error entry instead of silently dropping.
|
|
22
|
+
*/
|
|
23
|
+
export declare function runJudgeBackground(opts: JudgeRunOptions): void;
|
|
24
|
+
/**
|
|
25
|
+
* Run the judge synchronously (for testing and manual invocation).
|
|
26
|
+
* Returns the scorecard or error entry.
|
|
27
|
+
*/
|
|
28
|
+
export declare function runJudgeSync(opts: JudgeRunOptions): Promise<EvalScorecard | EvalErrorEntry>;
|