@infinitedusky/indusk-mcp 1.16.0 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cli.js +48 -24
- package/dist/bin/commands/extensions.js +5 -2
- package/dist/bin/commands/init-docs.js +2 -2
- package/dist/lib/config.d.ts +17 -0
- package/dist/lib/config.js +29 -1
- package/dist/lib/eval/prompt-builder.js +31 -6
- package/dist/lib/falsification/log.d.ts +51 -0
- package/dist/lib/falsification/log.js +207 -0
- package/dist/lib/falsification/skip.d.ts +23 -0
- package/dist/lib/falsification/skip.js +37 -0
- package/dist/lib/highlights/highlights.d.ts +48 -0
- package/dist/lib/highlights/highlights.js +136 -0
- package/dist/lib/semantic-graph/index.d.ts +1 -1
- package/dist/lib/trajectory/audit.js +4 -4
- package/dist/server/index.js +2 -0
- package/dist/tools/highlight-tools.d.ts +18 -0
- package/dist/tools/highlight-tools.js +78 -0
- package/hooks/check-catchup.js +18 -7
- package/hooks/eval-trigger.js +66 -30
- package/hooks/gate-reminder.js +1 -3
- package/package.json +1 -1
- package/skills/handoff.md +14 -0
- package/skills/highlight.md +50 -0
- package/skills/planner.md +12 -16
- package/skills/retrospective.md +23 -17
- package/skills/work.md +8 -14
- package/templates/FullscreenDiagram.vue +3 -3
- package/templates/filtering-exporter.ts +3 -16
- package/templates/instrumentation.ts +4 -5
- package/templates/instrumentation.web.ts +19 -15
- package/templates/logger.ts +1 -1
package/dist/bin/cli.js
CHANGED
|
@@ -3,8 +3,31 @@ import { readFileSync } from "node:fs";
|
|
|
3
3
|
import { dirname, join } from "node:path";
|
|
4
4
|
import { fileURLToPath } from "node:url";
|
|
5
5
|
import { Command } from "commander";
|
|
6
|
+
import { resolveProjectRoot } from "../lib/config.js";
|
|
6
7
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
8
|
const pkg = JSON.parse(readFileSync(join(__dirname, "../../package.json"), "utf-8"));
|
|
9
|
+
/**
|
|
10
|
+
* Resolve the InDusk project root for commands that operate on an existing
|
|
11
|
+
* project. Walks up from cwd looking for `.indusk/config.json`. If not
|
|
12
|
+
* found, errors out — prevents accidental writes to the wrong `.claude/`
|
|
13
|
+
* when invoked from a sub-directory like `apps/indusk-mcp/`.
|
|
14
|
+
*
|
|
15
|
+
* Commands that CREATE the project root (currently only `init`) use
|
|
16
|
+
* `process.cwd()` directly — init is responsible for creating the marker.
|
|
17
|
+
*/
|
|
18
|
+
function rootOrExit() {
|
|
19
|
+
const cwd = process.cwd();
|
|
20
|
+
const root = resolveProjectRoot(cwd);
|
|
21
|
+
if (root === null) {
|
|
22
|
+
console.error(`Not inside an InDusk project (no .indusk/config.json found walking up from ${cwd}).\n` +
|
|
23
|
+
"Run 'indusk init' here to initialize a new project, or cd to an existing one.");
|
|
24
|
+
process.exit(1);
|
|
25
|
+
}
|
|
26
|
+
if (root !== cwd) {
|
|
27
|
+
console.info(`[indusk] Using project root: ${root}\n`);
|
|
28
|
+
}
|
|
29
|
+
return root;
|
|
30
|
+
}
|
|
8
31
|
const program = new Command();
|
|
9
32
|
program
|
|
10
33
|
.name("dev-system")
|
|
@@ -29,7 +52,7 @@ program
|
|
|
29
52
|
.description("Update skills from package without touching project content")
|
|
30
53
|
.action(async () => {
|
|
31
54
|
const { update } = await import("./commands/update.js");
|
|
32
|
-
await update(
|
|
55
|
+
await update(rootOrExit());
|
|
33
56
|
});
|
|
34
57
|
const ext = program
|
|
35
58
|
.command("extensions")
|
|
@@ -39,28 +62,28 @@ ext
|
|
|
39
62
|
.description("Show all available extensions")
|
|
40
63
|
.action(async () => {
|
|
41
64
|
const { extensionsList } = await import("./commands/extensions.js");
|
|
42
|
-
await extensionsList(
|
|
65
|
+
await extensionsList(rootOrExit());
|
|
43
66
|
});
|
|
44
67
|
ext
|
|
45
68
|
.command("status")
|
|
46
69
|
.description("Show enabled extensions with health")
|
|
47
70
|
.action(async () => {
|
|
48
71
|
const { extensionsStatus } = await import("./commands/extensions.js");
|
|
49
|
-
await extensionsStatus(
|
|
72
|
+
await extensionsStatus(rootOrExit());
|
|
50
73
|
});
|
|
51
74
|
ext
|
|
52
75
|
.command("enable <names...>")
|
|
53
76
|
.description("Enable extensions")
|
|
54
77
|
.action(async (names) => {
|
|
55
78
|
const { extensionsEnable } = await import("./commands/extensions.js");
|
|
56
|
-
await extensionsEnable(
|
|
79
|
+
await extensionsEnable(rootOrExit(), names);
|
|
57
80
|
});
|
|
58
81
|
ext
|
|
59
82
|
.command("disable <names...>")
|
|
60
83
|
.description("Disable extensions")
|
|
61
84
|
.action(async (names) => {
|
|
62
85
|
const { extensionsDisable } = await import("./commands/extensions.js");
|
|
63
|
-
await extensionsDisable(
|
|
86
|
+
await extensionsDisable(rootOrExit(), names);
|
|
64
87
|
});
|
|
65
88
|
ext
|
|
66
89
|
.command("add <name>")
|
|
@@ -68,35 +91,35 @@ ext
|
|
|
68
91
|
.requiredOption("--from <source>", "Source: npm:pkg, github:user/repo, URL, or local path")
|
|
69
92
|
.action(async (name, opts) => {
|
|
70
93
|
const { extensionsAdd } = await import("./commands/extensions.js");
|
|
71
|
-
await extensionsAdd(
|
|
94
|
+
await extensionsAdd(rootOrExit(), name, opts.from);
|
|
72
95
|
});
|
|
73
96
|
ext
|
|
74
97
|
.command("remove <names...>")
|
|
75
98
|
.description("Remove extensions")
|
|
76
99
|
.action(async (names) => {
|
|
77
100
|
const { extensionsRemove } = await import("./commands/extensions.js");
|
|
78
|
-
await extensionsRemove(
|
|
101
|
+
await extensionsRemove(rootOrExit(), names);
|
|
79
102
|
});
|
|
80
103
|
ext
|
|
81
104
|
.command("update [names...]")
|
|
82
105
|
.description("Update third-party extensions from their original source")
|
|
83
106
|
.action(async (names) => {
|
|
84
107
|
const { extensionsUpdate } = await import("./commands/extensions.js");
|
|
85
|
-
await extensionsUpdate(
|
|
108
|
+
await extensionsUpdate(rootOrExit(), names);
|
|
86
109
|
});
|
|
87
110
|
ext
|
|
88
111
|
.command("suggest")
|
|
89
112
|
.description("Recommend extensions based on project contents")
|
|
90
113
|
.action(async () => {
|
|
91
114
|
const { extensionsSuggest } = await import("./commands/extensions.js");
|
|
92
|
-
await extensionsSuggest(
|
|
115
|
+
await extensionsSuggest(rootOrExit());
|
|
93
116
|
});
|
|
94
117
|
program
|
|
95
118
|
.command("init-docs")
|
|
96
119
|
.description("Scaffold a VitePress documentation site with Mermaid, llms.txt, and FullscreenDiagram")
|
|
97
120
|
.action(async () => {
|
|
98
121
|
const { initDocs } = await import("./commands/init-docs.js");
|
|
99
|
-
await initDocs(
|
|
122
|
+
await initDocs(rootOrExit());
|
|
100
123
|
});
|
|
101
124
|
program
|
|
102
125
|
.command("check-gates")
|
|
@@ -105,7 +128,7 @@ program
|
|
|
105
128
|
.option("--phase <number>", "Check a specific phase number", Number.parseInt)
|
|
106
129
|
.action(async (opts) => {
|
|
107
130
|
const { checkGates } = await import("./commands/check-gates.js");
|
|
108
|
-
await checkGates(
|
|
131
|
+
await checkGates(rootOrExit(), { file: opts.file, phase: opts.phase });
|
|
109
132
|
});
|
|
110
133
|
const infra = program
|
|
111
134
|
.command("infra")
|
|
@@ -144,7 +167,7 @@ graph
|
|
|
144
167
|
const { getLogPath } = await import("../lib/semantic-graph/paths.js");
|
|
145
168
|
const { SemanticGraphClient } = await import("../lib/semantic-graph/runtime-client.js");
|
|
146
169
|
const { runSync } = await import("../lib/semantic-graph/sync-engine.js");
|
|
147
|
-
const projectRoot =
|
|
170
|
+
const projectRoot = rootOrExit();
|
|
148
171
|
const projectName = basename(projectRoot);
|
|
149
172
|
const adapter = new CgcAdapter();
|
|
150
173
|
const logWriter = new LogWriter(getLogPath(projectRoot));
|
|
@@ -164,7 +187,7 @@ graph
|
|
|
164
187
|
const { getLogPath } = await import("../lib/semantic-graph/paths.js");
|
|
165
188
|
const { replay } = await import("../lib/semantic-graph/replay.js");
|
|
166
189
|
const { SemanticGraphClient } = await import("../lib/semantic-graph/runtime-client.js");
|
|
167
|
-
const projectRoot =
|
|
190
|
+
const projectRoot = rootOrExit();
|
|
168
191
|
const projectName = basename(projectRoot);
|
|
169
192
|
const logPath = getLogPath(projectRoot);
|
|
170
193
|
const client = new SemanticGraphClient(projectName);
|
|
@@ -188,7 +211,7 @@ graph
|
|
|
188
211
|
const { getLogPath } = await import("../lib/semantic-graph/paths.js");
|
|
189
212
|
const { readAllEvents } = await import("../lib/semantic-graph/log-reader.js");
|
|
190
213
|
const { SemanticGraphClient } = await import("../lib/semantic-graph/runtime-client.js");
|
|
191
|
-
const projectRoot =
|
|
214
|
+
const projectRoot = rootOrExit();
|
|
192
215
|
const projectName = basename(projectRoot);
|
|
193
216
|
const logPath = getLogPath(projectRoot);
|
|
194
217
|
console.info(`Project: ${projectName}`);
|
|
@@ -223,7 +246,7 @@ program
|
|
|
223
246
|
.description("Strip InDusk settings overlay before a PR")
|
|
224
247
|
.action(async () => {
|
|
225
248
|
const { stripOverlay } = await import("../lib/settings-overlay.js");
|
|
226
|
-
stripOverlay(
|
|
249
|
+
stripOverlay(rootOrExit());
|
|
227
250
|
console.info("Stripped InDusk overlay from .claude/settings.json");
|
|
228
251
|
});
|
|
229
252
|
program
|
|
@@ -231,7 +254,7 @@ program
|
|
|
231
254
|
.description("Re-apply InDusk settings overlay after a PR")
|
|
232
255
|
.action(async () => {
|
|
233
256
|
const { applyOverlay } = await import("../lib/settings-overlay.js");
|
|
234
|
-
applyOverlay(
|
|
257
|
+
applyOverlay(rootOrExit());
|
|
235
258
|
console.info("Re-applied InDusk overlay to .claude/settings.json");
|
|
236
259
|
});
|
|
237
260
|
program
|
|
@@ -239,13 +262,14 @@ program
|
|
|
239
262
|
.description("Install extensions (shorthand for extensions enable / add)")
|
|
240
263
|
.option("--from <source>", "Source for third-party extension (npm:pkg, github:user/repo, URL, or path)")
|
|
241
264
|
.action(async (names, opts) => {
|
|
265
|
+
const root = rootOrExit();
|
|
242
266
|
if (opts.from) {
|
|
243
267
|
const { extensionsAdd } = await import("./commands/extensions.js");
|
|
244
|
-
await extensionsAdd(
|
|
268
|
+
await extensionsAdd(root, names[0], opts.from);
|
|
245
269
|
}
|
|
246
270
|
else {
|
|
247
271
|
const { extensionsEnable } = await import("./commands/extensions.js");
|
|
248
|
-
await extensionsEnable(
|
|
272
|
+
await extensionsEnable(root, names);
|
|
249
273
|
}
|
|
250
274
|
});
|
|
251
275
|
const eval_ = program.command("eval").description("Context evaluation and quality scoring");
|
|
@@ -257,7 +281,7 @@ eval_
|
|
|
257
281
|
.option("--json", "Output as JSON")
|
|
258
282
|
.action(async (opts) => {
|
|
259
283
|
const { evalSummary } = await import("./commands/eval.js");
|
|
260
|
-
await evalSummary(
|
|
284
|
+
await evalSummary(rootOrExit(), opts);
|
|
261
285
|
});
|
|
262
286
|
eval_
|
|
263
287
|
.command("findings")
|
|
@@ -265,21 +289,21 @@ eval_
|
|
|
265
289
|
.option("--all", "Show all findings including fixed/ignored")
|
|
266
290
|
.action(async (opts) => {
|
|
267
291
|
const { evalFindings } = await import("./commands/eval.js");
|
|
268
|
-
await evalFindings(
|
|
292
|
+
await evalFindings(rootOrExit(), opts);
|
|
269
293
|
});
|
|
270
294
|
eval_
|
|
271
295
|
.command("fix <key>")
|
|
272
296
|
.description("Mark an eval finding as fixed")
|
|
273
297
|
.action(async (key) => {
|
|
274
298
|
const { evalMark } = await import("./commands/eval.js");
|
|
275
|
-
await evalMark(
|
|
299
|
+
await evalMark(rootOrExit(), key, "fixed");
|
|
276
300
|
});
|
|
277
301
|
eval_
|
|
278
302
|
.command("ignore <key>")
|
|
279
303
|
.description("Mark an eval finding as ignored")
|
|
280
304
|
.action(async (key) => {
|
|
281
305
|
const { evalMark } = await import("./commands/eval.js");
|
|
282
|
-
await evalMark(
|
|
306
|
+
await evalMark(rootOrExit(), key, "ignored");
|
|
283
307
|
});
|
|
284
308
|
eval_
|
|
285
309
|
.command("baseline")
|
|
@@ -288,7 +312,7 @@ eval_
|
|
|
288
312
|
.option("--keep", "Keep baseline worktree after eval")
|
|
289
313
|
.action(async (opts) => {
|
|
290
314
|
const { evalBaseline } = await import("./commands/eval.js");
|
|
291
|
-
await evalBaseline(
|
|
315
|
+
await evalBaseline(rootOrExit(), opts);
|
|
292
316
|
});
|
|
293
317
|
program
|
|
294
318
|
.command("beam <file>")
|
|
@@ -299,7 +323,7 @@ program
|
|
|
299
323
|
const { runBeam } = await import("../lib/beam/runner.js");
|
|
300
324
|
const { formatBeamMarkdown, formatBeamTrace } = await import("../lib/beam/format.js");
|
|
301
325
|
const result = await runBeam({
|
|
302
|
-
projectRoot:
|
|
326
|
+
projectRoot: rootOrExit(),
|
|
303
327
|
targetPath: file,
|
|
304
328
|
trace: opts.trace ?? false,
|
|
305
329
|
});
|
|
@@ -374,7 +374,7 @@ export async function extensionsUpdate(projectRoot, names) {
|
|
|
374
374
|
continue;
|
|
375
375
|
try {
|
|
376
376
|
if (!ext.manifest._source) {
|
|
377
|
-
if (names
|
|
377
|
+
if (names?.includes(name)) {
|
|
378
378
|
console.info(` ${name}: built-in extension — updated via package update, not extensions update`);
|
|
379
379
|
}
|
|
380
380
|
continue;
|
|
@@ -633,7 +633,10 @@ function printMcpInstructions(name, manifest) {
|
|
|
633
633
|
const needsAuth = server.headers && Object.keys(server.headers).length > 0;
|
|
634
634
|
// Remove first, then add — ensures clean state
|
|
635
635
|
try {
|
|
636
|
-
execSync(`claude mcp remove -s project ${name}`, {
|
|
636
|
+
execSync(`claude mcp remove -s project ${name}`, {
|
|
637
|
+
timeout: 10000,
|
|
638
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
639
|
+
});
|
|
637
640
|
}
|
|
638
641
|
catch {
|
|
639
642
|
// not registered yet, fine
|
|
@@ -24,7 +24,7 @@ export async function initDocs(projectRoot) {
|
|
|
24
24
|
mkdirSync(join(docsDir, dir), { recursive: true });
|
|
25
25
|
}
|
|
26
26
|
// package.json
|
|
27
|
-
writeFileSync(join(docsDir, "package.json"), JSON.stringify({
|
|
27
|
+
writeFileSync(join(docsDir, "package.json"), `${JSON.stringify({
|
|
28
28
|
name: `${projectName}-docs`,
|
|
29
29
|
version: "0.1.0",
|
|
30
30
|
private: true,
|
|
@@ -42,7 +42,7 @@ export async function initDocs(projectRoot) {
|
|
|
42
42
|
"vitepress-plugin-mermaid": "^2.0.10",
|
|
43
43
|
vue: "^3.4.15",
|
|
44
44
|
},
|
|
45
|
-
}, null, "\t")
|
|
45
|
+
}, null, "\t")}\n`);
|
|
46
46
|
// .vitepress/config.ts
|
|
47
47
|
writeFileSync(join(docsDir, "src/.vitepress/config.ts"), `import { defineConfig } from "vitepress";
|
|
48
48
|
import llmstxt from "vitepress-plugin-llms";
|
package/dist/lib/config.d.ts
CHANGED
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve the InDusk project root by walking up from the given directory
|
|
3
|
+
* until `.indusk/config.json` is found. Returns the directory containing
|
|
4
|
+
* `.indusk/config.json`, or `null` if none is found up to the filesystem
|
|
5
|
+
* root.
|
|
6
|
+
*
|
|
7
|
+
* `.indusk/config.json` is the authoritative "this is an InDusk project"
|
|
8
|
+
* marker — created by `indusk init`, never by sub-apps that happen to
|
|
9
|
+
* have their own `.claude/` scaffolding. Walking up to find it prevents
|
|
10
|
+
* bugs like `indusk update` syncing to the wrong `.claude/` when the user
|
|
11
|
+
* runs it from a sub-directory (e.g. `apps/indusk-mcp/`).
|
|
12
|
+
*
|
|
13
|
+
* For `indusk init` itself, use the raw cwd — init creates the marker, so
|
|
14
|
+
* walk-up would either find nothing or (worse) match an ancestor project
|
|
15
|
+
* the user doesn't intend to re-init.
|
|
16
|
+
*/
|
|
17
|
+
export declare function resolveProjectRoot(startDir: string): string | null;
|
|
1
18
|
export interface VerifyToolConfig {
|
|
2
19
|
tool: string;
|
|
3
20
|
config: string;
|
package/dist/lib/config.js
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
-
import { basename, dirname, join } from "node:path";
|
|
2
|
+
import { basename, dirname, join, resolve } from "node:path";
|
|
3
|
+
/**
|
|
4
|
+
* Resolve the InDusk project root by walking up from the given directory
|
|
5
|
+
* until `.indusk/config.json` is found. Returns the directory containing
|
|
6
|
+
* `.indusk/config.json`, or `null` if none is found up to the filesystem
|
|
7
|
+
* root.
|
|
8
|
+
*
|
|
9
|
+
* `.indusk/config.json` is the authoritative "this is an InDusk project"
|
|
10
|
+
* marker — created by `indusk init`, never by sub-apps that happen to
|
|
11
|
+
* have their own `.claude/` scaffolding. Walking up to find it prevents
|
|
12
|
+
* bugs like `indusk update` syncing to the wrong `.claude/` when the user
|
|
13
|
+
* runs it from a sub-directory (e.g. `apps/indusk-mcp/`).
|
|
14
|
+
*
|
|
15
|
+
* For `indusk init` itself, use the raw cwd — init creates the marker, so
|
|
16
|
+
* walk-up would either find nothing or (worse) match an ancestor project
|
|
17
|
+
* the user doesn't intend to re-init.
|
|
18
|
+
*/
|
|
19
|
+
export function resolveProjectRoot(startDir) {
|
|
20
|
+
let dir = startDir;
|
|
21
|
+
for (let i = 0; i < 20; i++) {
|
|
22
|
+
if (existsSync(join(dir, ".indusk/config.json")))
|
|
23
|
+
return dir;
|
|
24
|
+
const parent = resolve(dir, "..");
|
|
25
|
+
if (parent === dir)
|
|
26
|
+
return null;
|
|
27
|
+
dir = parent;
|
|
28
|
+
}
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
3
31
|
const CONFIG_PATH = ".indusk/config.json";
|
|
4
32
|
export function getConfigPath(projectRoot) {
|
|
5
33
|
return join(projectRoot, CONFIG_PATH);
|
|
@@ -12,9 +12,32 @@ export function buildJudgePrompt(opts) {
|
|
|
12
12
|
const questionsBlock = opts.rubric
|
|
13
13
|
.map((q, i) => `${i + 1}. **${q.id}**: ${q.question}\n Guidance: ${q.guidance}`)
|
|
14
14
|
.join("\n\n");
|
|
15
|
+
const highlightsInstructions = opts.mode === "eval"
|
|
16
|
+
? `### Step 4: Process unprocessed highlights
|
|
17
|
+
|
|
18
|
+
Before answering the rubric, process the working agent's highlights queue. Highlights are the working agent's flagged moments — brief acceptances, ADR acceptances, corrections, retrospective lessons — and the eval agent is responsible for materializing them into structured Graphiti episodes.
|
|
19
|
+
|
|
20
|
+
Call \`mcp__indusk__highlights_unprocessed\` to get the list. For each highlight, the level drives effort and Graphiti edge weight:
|
|
21
|
+
|
|
22
|
+
- **critical** (architectural decision, accepted ADR, accepted brief): extract full context from the transcript and the changed files, write a structured Graphiti episode with weight **1.0**.
|
|
23
|
+
- **important** (correction, retro lesson, confirmed pattern): extract context, write a Graphiti episode with weight **0.6**.
|
|
24
|
+
- **note** (observation, partially-formed thought): consider it. Write a low-weight (**0.3**) episode if it adds signal; skip if it's already captured in an existing episode.
|
|
25
|
+
|
|
26
|
+
Write each episode using \`mcp__indusk__graph_capture\` so it attaches to the relevant file anchor in the semantic graph — not raw \`mcp__graphiti__add_memory\`. Pick the group sensibly: \`${opts.projectGroup}\` for project-specific facts, \`shared\` for cross-project conventions (e.g., "always use pnpm ce"). Use the level to set the edge weight in the body's metadata section so downstream context-beam queries can rank by importance.
|
|
27
|
+
|
|
28
|
+
After processing each highlight (whether you wrote an episode or decided to skip), call \`mcp__indusk__highlight_mark_processed\` with the highlight ID and the action:
|
|
29
|
+
- \`action: "wrote-episode"\`, \`detail: "{episode name}"\` — if you wrote an episode.
|
|
30
|
+
- \`action: "skipped"\`, \`detail: "{brief reason}"\` — if you decided not to (e.g., already captured, or not meaningful enough).
|
|
31
|
+
|
|
32
|
+
**Highlights are additive context, not a constraint.** Continue reading the full transcript and inferring knowledge independently — highlights ensure important moments aren't missed, but they don't bound your analysis. The transcript may contain insights the working agent didn't flag.
|
|
33
|
+
|
|
34
|
+
If \`mcp__indusk__highlights_unprocessed\` is unavailable, skip this step silently and continue.`
|
|
35
|
+
: `### Step 4: Highlights (baseline mode)
|
|
36
|
+
|
|
37
|
+
Baseline mode — do NOT process highlights or write to Graphiti. Skip to Step 5.`;
|
|
15
38
|
const graphitiInstructions = opts.mode === "eval"
|
|
16
39
|
? `
|
|
17
|
-
|
|
40
|
+
### Step 6: Write findings to the knowledge graph
|
|
18
41
|
|
|
19
42
|
For each finding with severity "warning" or "critical", write it using \`mcp__indusk__graph_capture\`. This dual-writes to both Graphiti AND the semantic graph, connecting the finding to the existing file anchor — so the context beam can find it later.
|
|
20
43
|
|
|
@@ -33,10 +56,10 @@ mcp__indusk__graph_capture({
|
|
|
33
56
|
\`\`\`
|
|
34
57
|
|
|
35
58
|
Only write facts that would have changed the outcome. Be selective — quality over quantity.
|
|
36
|
-
Count how many graph_capture calls you made for the scorecard.
|
|
59
|
+
Count how many graph_capture calls you made for the scorecard (this count includes any highlight episodes written in Step 4).
|
|
37
60
|
If the tool is unavailable, skip silently and set graphitiWrites to 0.`
|
|
38
61
|
: `
|
|
39
|
-
|
|
62
|
+
### Step 6: Graphiti writes
|
|
40
63
|
|
|
41
64
|
Baseline mode — do NOT write to Graphiti. Set graphitiWrites to 0.`;
|
|
42
65
|
return `You are the InDusk evaluation judge. Your job is to evaluate the quality of work done by an AI agent on a software project.
|
|
@@ -65,7 +88,9 @@ Run \`jj diff -r ${opts.changeId}\` to see what was committed. This is the work
|
|
|
65
88
|
|
|
66
89
|
Then read the specific files that were changed to understand the full context — not just the diff lines, but the surrounding code.
|
|
67
90
|
|
|
68
|
-
|
|
91
|
+
${highlightsInstructions}
|
|
92
|
+
|
|
93
|
+
### Step 5: Answer the evaluation questions
|
|
69
94
|
|
|
70
95
|
For each question, investigate thoroughly using MCP tools — search the codebase, query the code graph, check Graphiti for relevant facts. Then answer with this exact JSON shape per question:
|
|
71
96
|
|
|
@@ -88,7 +113,7 @@ Questions:
|
|
|
88
113
|
${questionsBlock}
|
|
89
114
|
${graphitiInstructions}
|
|
90
115
|
|
|
91
|
-
|
|
116
|
+
### Step 7: Output the scorecard
|
|
92
117
|
|
|
93
118
|
After completing all steps, output ONLY the following JSON object. No markdown wrapping, no commentary before or after — just the JSON:
|
|
94
119
|
|
|
@@ -99,7 +124,7 @@ After completing all steps, output ONLY the following JSON object. No markdown w
|
|
|
99
124
|
"mode": "${opts.mode}",
|
|
100
125
|
"changeId": "${opts.changeId}",
|
|
101
126
|
"projectGroup": "${opts.projectGroup}",
|
|
102
|
-
"questions": [/* your answers from Step
|
|
127
|
+
"questions": [/* your answers from Step 5 */],
|
|
103
128
|
"summary": "{one paragraph overall assessment}",
|
|
104
129
|
"graphitiWrites": {number of Graphiti writes made},
|
|
105
130
|
"telemetryPosted": false
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
export type HypothesisOutcome = "fix-in-scope" | "spawn-plan" | "accept-finding";
|
|
2
|
+
export interface HypothesisEntry {
|
|
3
|
+
kind: "hypothesis";
|
|
4
|
+
hypothesis: string;
|
|
5
|
+
testPath: string | null;
|
|
6
|
+
outcome: HypothesisOutcome;
|
|
7
|
+
note?: string;
|
|
8
|
+
timestamp: string;
|
|
9
|
+
}
|
|
10
|
+
export interface TerminatorEntry {
|
|
11
|
+
kind: "terminator";
|
|
12
|
+
reason: string;
|
|
13
|
+
timestamp: string;
|
|
14
|
+
}
|
|
15
|
+
export type LogEntry = HypothesisEntry | TerminatorEntry;
|
|
16
|
+
export interface MalformedLine {
|
|
17
|
+
lineNumber: number;
|
|
18
|
+
content: string;
|
|
19
|
+
reason: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Append a confirmed-hypothesis entry to the plan's falsification log.
|
|
23
|
+
* Creates the log file with a header if it doesn't yet exist. Throws if
|
|
24
|
+
* the log is already terminated (a new hypothesis after a terminator is a
|
|
25
|
+
* sign the ritual was restarted incorrectly — see `isFalsificationComplete`
|
|
26
|
+
* and start a new plan or explicitly un-terminate first).
|
|
27
|
+
*/
|
|
28
|
+
export declare function appendHypothesis(planRoot: string, entry: Omit<HypothesisEntry, "kind" | "timestamp">): HypothesisEntry;
|
|
29
|
+
/**
|
|
30
|
+
* Append a terminator entry marking the falsification ritual complete for
|
|
31
|
+
* this plan. No further hypotheses can be appended after this. The reason
|
|
32
|
+
* is the user-confirmed rationale for termination (e.g., "investigated
|
|
33
|
+
* concurrency, race conditions, partial-write paths, and type-narrowing
|
|
34
|
+
* gaps; no in-scope failure remained").
|
|
35
|
+
*/
|
|
36
|
+
export declare function markTerminated(planRoot: string, reason: string): TerminatorEntry;
|
|
37
|
+
/**
|
|
38
|
+
* Read the falsification log for a plan. Returns an empty array if the log
|
|
39
|
+
* file does not exist. Malformed entries are skipped (not thrown) and
|
|
40
|
+
* surfaced via the optional `onMalformed` callback, matching the semantic
|
|
41
|
+
* graph event log's resilience pattern.
|
|
42
|
+
*/
|
|
43
|
+
export declare function readFalsificationLog(planRoot: string, opts?: {
|
|
44
|
+
onMalformed?: (malformed: MalformedLine) => void;
|
|
45
|
+
}): LogEntry[];
|
|
46
|
+
/**
|
|
47
|
+
* True iff the plan's falsification log exists AND its last entry is a
|
|
48
|
+
* terminator. False for a missing log, a log with only hypotheses (ritual
|
|
49
|
+
* started but not terminated), or an empty log file.
|
|
50
|
+
*/
|
|
51
|
+
export declare function isFalsificationComplete(planRoot: string): boolean;
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import { appendFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { basename, join } from "node:path";
|
|
3
|
+
const VALID_OUTCOMES = new Set([
|
|
4
|
+
"fix-in-scope",
|
|
5
|
+
"spawn-plan",
|
|
6
|
+
"accept-finding",
|
|
7
|
+
]);
|
|
8
|
+
/**
|
|
9
|
+
* Reject multiline content at the library boundary. The log's on-disk
|
|
10
|
+
* format is markdown sections with bold-labeled single-line fields; any
|
|
11
|
+
* line-separator character in hypothesis / note / reason silently
|
|
12
|
+
* truncates during parse (the regex uses /m mode, where $ matches before
|
|
13
|
+
* LF, CR, LS, and PS). Throwing here forces callers to sanitize — either
|
|
14
|
+
* collapse to a single line or split across multiple entries.
|
|
15
|
+
*
|
|
16
|
+
* Line separators rejected: LF (\n), CR (\r), LS (U+2028), PS (U+2029).
|
|
17
|
+
*/
|
|
18
|
+
const LINE_SEPARATOR_RE = /[\n\r\u2028\u2029]/;
|
|
19
|
+
function assertSingleLine(field, value) {
|
|
20
|
+
if (LINE_SEPARATOR_RE.test(value)) {
|
|
21
|
+
throw new Error(`Falsification log ${field} must be single-line (got a value containing a line separator). Either collapse to one line (replace '\\n' with '; '), or split the content across multiple entries. The log's parser is line-oriented; any line-separator (LF, CR, LS, PS) would silently truncate.`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
function logPath(planRoot) {
|
|
25
|
+
return join(planRoot, "falsification.md");
|
|
26
|
+
}
|
|
27
|
+
function headerFor(planRoot) {
|
|
28
|
+
return `# Falsification Log — ${basename(planRoot)}\n\nAppend-only record of the /falsify bounty hunt for this plan. Never edit in place; entries are appended via \`appendHypothesis\` and \`markTerminated\` from \`apps/indusk-mcp/src/lib/falsification/log.ts\`.\n\n`;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Append a confirmed-hypothesis entry to the plan's falsification log.
|
|
32
|
+
* Creates the log file with a header if it doesn't yet exist. Throws if
|
|
33
|
+
* the log is already terminated (a new hypothesis after a terminator is a
|
|
34
|
+
* sign the ritual was restarted incorrectly — see `isFalsificationComplete`
|
|
35
|
+
* and start a new plan or explicitly un-terminate first).
|
|
36
|
+
*/
|
|
37
|
+
export function appendHypothesis(planRoot, entry) {
|
|
38
|
+
assertSingleLine("hypothesis", entry.hypothesis);
|
|
39
|
+
if (entry.note !== undefined)
|
|
40
|
+
assertSingleLine("note", entry.note);
|
|
41
|
+
const path = logPath(planRoot);
|
|
42
|
+
const existing = existsSync(path) ? readFalsificationLog(planRoot) : [];
|
|
43
|
+
if (existing.length > 0 && existing[existing.length - 1].kind === "terminator") {
|
|
44
|
+
throw new Error(`Falsification log at ${path} is already terminated. Start a new plan or remove the terminator before appending.`);
|
|
45
|
+
}
|
|
46
|
+
if (!existsSync(path)) {
|
|
47
|
+
writeFileSync(path, headerFor(planRoot), "utf-8");
|
|
48
|
+
}
|
|
49
|
+
const stored = {
|
|
50
|
+
kind: "hypothesis",
|
|
51
|
+
hypothesis: entry.hypothesis,
|
|
52
|
+
testPath: entry.testPath,
|
|
53
|
+
outcome: entry.outcome,
|
|
54
|
+
note: entry.note,
|
|
55
|
+
timestamp: new Date().toISOString(),
|
|
56
|
+
};
|
|
57
|
+
appendFileSync(path, renderHypothesis(stored), "utf-8");
|
|
58
|
+
return stored;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Append a terminator entry marking the falsification ritual complete for
|
|
62
|
+
* this plan. No further hypotheses can be appended after this. The reason
|
|
63
|
+
* is the user-confirmed rationale for termination (e.g., "investigated
|
|
64
|
+
* concurrency, race conditions, partial-write paths, and type-narrowing
|
|
65
|
+
* gaps; no in-scope failure remained").
|
|
66
|
+
*/
|
|
67
|
+
export function markTerminated(planRoot, reason) {
|
|
68
|
+
if (!reason.trim()) {
|
|
69
|
+
throw new Error("markTerminated requires a non-empty reason.");
|
|
70
|
+
}
|
|
71
|
+
assertSingleLine("reason", reason);
|
|
72
|
+
const path = logPath(planRoot);
|
|
73
|
+
const existing = existsSync(path) ? readFalsificationLog(planRoot) : [];
|
|
74
|
+
if (existing.length > 0 && existing[existing.length - 1].kind === "terminator") {
|
|
75
|
+
throw new Error(`Falsification log at ${path} is already terminated.`);
|
|
76
|
+
}
|
|
77
|
+
if (!existsSync(path)) {
|
|
78
|
+
writeFileSync(path, headerFor(planRoot), "utf-8");
|
|
79
|
+
}
|
|
80
|
+
const stored = {
|
|
81
|
+
kind: "terminator",
|
|
82
|
+
reason: reason.trim(),
|
|
83
|
+
timestamp: new Date().toISOString(),
|
|
84
|
+
};
|
|
85
|
+
appendFileSync(path, renderTerminator(stored), "utf-8");
|
|
86
|
+
return stored;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Read the falsification log for a plan. Returns an empty array if the log
|
|
90
|
+
* file does not exist. Malformed entries are skipped (not thrown) and
|
|
91
|
+
* surfaced via the optional `onMalformed` callback, matching the semantic
|
|
92
|
+
* graph event log's resilience pattern.
|
|
93
|
+
*/
|
|
94
|
+
export function readFalsificationLog(planRoot, opts) {
|
|
95
|
+
const path = logPath(planRoot);
|
|
96
|
+
if (!existsSync(path))
|
|
97
|
+
return [];
|
|
98
|
+
const content = readFileSync(path, "utf-8");
|
|
99
|
+
const entries = [];
|
|
100
|
+
const sectionRegex = /^##\s+(Hypothesis|Terminated)\s+(.+?)\s*$/gm;
|
|
101
|
+
const matches = [...content.matchAll(sectionRegex)];
|
|
102
|
+
for (let i = 0; i < matches.length; i++) {
|
|
103
|
+
const match = matches[i];
|
|
104
|
+
const [, kind, timestamp] = match;
|
|
105
|
+
const start = (match.index ?? 0) + match[0].length;
|
|
106
|
+
const end = i + 1 < matches.length ? (matches[i + 1].index ?? content.length) : content.length;
|
|
107
|
+
const body = content.slice(start, end).trim();
|
|
108
|
+
if (kind === "Hypothesis") {
|
|
109
|
+
const entry = parseHypothesisBody(body, timestamp);
|
|
110
|
+
if ("lineNumber" in entry) {
|
|
111
|
+
opts?.onMalformed?.(entry);
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
entries.push(entry);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
else if (kind === "Terminated") {
|
|
118
|
+
const entry = parseTerminatorBody(body, timestamp);
|
|
119
|
+
if ("lineNumber" in entry) {
|
|
120
|
+
opts?.onMalformed?.(entry);
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
entries.push(entry);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return entries;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* True iff the plan's falsification log exists AND its last entry is a
|
|
131
|
+
* terminator. False for a missing log, a log with only hypotheses (ritual
|
|
132
|
+
* started but not terminated), or an empty log file.
|
|
133
|
+
*/
|
|
134
|
+
export function isFalsificationComplete(planRoot) {
|
|
135
|
+
const entries = readFalsificationLog(planRoot);
|
|
136
|
+
if (entries.length === 0)
|
|
137
|
+
return false;
|
|
138
|
+
return entries[entries.length - 1].kind === "terminator";
|
|
139
|
+
}
|
|
140
|
+
// ---------------------------------------------------------------
|
|
141
|
+
// Rendering (writing entries to markdown)
|
|
142
|
+
// ---------------------------------------------------------------
|
|
143
|
+
function renderHypothesis(entry) {
|
|
144
|
+
const lines = [
|
|
145
|
+
`## Hypothesis ${entry.timestamp}`,
|
|
146
|
+
"",
|
|
147
|
+
`**Hypothesis:** ${entry.hypothesis}`,
|
|
148
|
+
`**Test:** ${entry.testPath ?? "(not written)"}`,
|
|
149
|
+
`**Outcome:** ${entry.outcome}`,
|
|
150
|
+
];
|
|
151
|
+
if (entry.note) {
|
|
152
|
+
lines.push(`**Note:** ${entry.note}`);
|
|
153
|
+
}
|
|
154
|
+
lines.push("", "");
|
|
155
|
+
return lines.join("\n");
|
|
156
|
+
}
|
|
157
|
+
function renderTerminator(entry) {
|
|
158
|
+
return [`## Terminated ${entry.timestamp}`, "", `**Reason:** ${entry.reason}`, "", ""].join("\n");
|
|
159
|
+
}
|
|
160
|
+
// ---------------------------------------------------------------
|
|
161
|
+
// Parsing (reading entries from markdown)
|
|
162
|
+
// ---------------------------------------------------------------
|
|
163
|
+
function parseHypothesisBody(body, timestamp) {
|
|
164
|
+
const hypothesisMatch = body.match(/^\*\*Hypothesis:\*\*\s+(.+)$/m);
|
|
165
|
+
const testMatch = body.match(/^\*\*Test:\*\*\s+(.+)$/m);
|
|
166
|
+
const outcomeMatch = body.match(/^\*\*Outcome:\*\*\s+([a-z-]+)$/m);
|
|
167
|
+
const noteMatch = body.match(/^\*\*Note:\*\*\s+(.+)$/m);
|
|
168
|
+
if (!hypothesisMatch || !outcomeMatch) {
|
|
169
|
+
return {
|
|
170
|
+
lineNumber: 0,
|
|
171
|
+
content: body,
|
|
172
|
+
reason: "Hypothesis entry missing required fields (hypothesis or outcome)",
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
const outcome = outcomeMatch[1];
|
|
176
|
+
if (!VALID_OUTCOMES.has(outcome)) {
|
|
177
|
+
return {
|
|
178
|
+
lineNumber: 0,
|
|
179
|
+
content: body,
|
|
180
|
+
reason: `Invalid outcome "${outcome}"; must be one of ${[...VALID_OUTCOMES].join(", ")}`,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
const testRaw = testMatch?.[1]?.trim() ?? "(not written)";
|
|
184
|
+
return {
|
|
185
|
+
kind: "hypothesis",
|
|
186
|
+
hypothesis: hypothesisMatch[1].trim(),
|
|
187
|
+
testPath: testRaw === "(not written)" ? null : testRaw,
|
|
188
|
+
outcome,
|
|
189
|
+
note: noteMatch?.[1]?.trim(),
|
|
190
|
+
timestamp,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
function parseTerminatorBody(body, timestamp) {
|
|
194
|
+
const reasonMatch = body.match(/^\*\*Reason:\*\*\s+(.+)$/m);
|
|
195
|
+
if (!reasonMatch) {
|
|
196
|
+
return {
|
|
197
|
+
lineNumber: 0,
|
|
198
|
+
content: body,
|
|
199
|
+
reason: "Terminator entry missing required Reason field",
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
return {
|
|
203
|
+
kind: "terminator",
|
|
204
|
+
reason: reasonMatch[1].trim(),
|
|
205
|
+
timestamp,
|
|
206
|
+
};
|
|
207
|
+
}
|