@infinitedusky/indusk-mcp 1.16.1 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/commands/extensions.js +5 -2
- package/dist/bin/commands/init-docs.js +2 -2
- package/dist/lib/eval/prompt-builder.js +31 -6
- package/dist/lib/highlights/highlights.d.ts +48 -0
- package/dist/lib/highlights/highlights.js +136 -0
- package/dist/lib/semantic-graph/index.d.ts +1 -1
- package/dist/lib/trajectory/audit.js +4 -4
- package/dist/server/index.js +2 -0
- package/dist/tools/highlight-tools.d.ts +18 -0
- package/dist/tools/highlight-tools.js +78 -0
- package/hooks/check-catchup.js +18 -7
- package/hooks/eval-trigger.js +66 -30
- package/hooks/gate-reminder.js +1 -3
- package/package.json +1 -1
- package/skills/handoff.md +14 -0
- package/skills/highlight.md +50 -0
- package/skills/planner.md +12 -16
- package/skills/retrospective.md +23 -17
- package/skills/work.md +8 -14
- package/templates/FullscreenDiagram.vue +3 -3
- package/templates/filtering-exporter.ts +3 -16
- package/templates/instrumentation.ts +4 -5
- package/templates/instrumentation.web.ts +19 -15
- package/templates/logger.ts +1 -1
|
@@ -374,7 +374,7 @@ export async function extensionsUpdate(projectRoot, names) {
|
|
|
374
374
|
continue;
|
|
375
375
|
try {
|
|
376
376
|
if (!ext.manifest._source) {
|
|
377
|
-
if (names
|
|
377
|
+
if (names?.includes(name)) {
|
|
378
378
|
console.info(` ${name}: built-in extension — updated via package update, not extensions update`);
|
|
379
379
|
}
|
|
380
380
|
continue;
|
|
@@ -633,7 +633,10 @@ function printMcpInstructions(name, manifest) {
|
|
|
633
633
|
const needsAuth = server.headers && Object.keys(server.headers).length > 0;
|
|
634
634
|
// Remove first, then add — ensures clean state
|
|
635
635
|
try {
|
|
636
|
-
execSync(`claude mcp remove -s project ${name}`, {
|
|
636
|
+
execSync(`claude mcp remove -s project ${name}`, {
|
|
637
|
+
timeout: 10000,
|
|
638
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
639
|
+
});
|
|
637
640
|
}
|
|
638
641
|
catch {
|
|
639
642
|
// not registered yet, fine
|
|
@@ -24,7 +24,7 @@ export async function initDocs(projectRoot) {
|
|
|
24
24
|
mkdirSync(join(docsDir, dir), { recursive: true });
|
|
25
25
|
}
|
|
26
26
|
// package.json
|
|
27
|
-
writeFileSync(join(docsDir, "package.json"), JSON.stringify({
|
|
27
|
+
writeFileSync(join(docsDir, "package.json"), `${JSON.stringify({
|
|
28
28
|
name: `${projectName}-docs`,
|
|
29
29
|
version: "0.1.0",
|
|
30
30
|
private: true,
|
|
@@ -42,7 +42,7 @@ export async function initDocs(projectRoot) {
|
|
|
42
42
|
"vitepress-plugin-mermaid": "^2.0.10",
|
|
43
43
|
vue: "^3.4.15",
|
|
44
44
|
},
|
|
45
|
-
}, null, "\t")
|
|
45
|
+
}, null, "\t")}\n`);
|
|
46
46
|
// .vitepress/config.ts
|
|
47
47
|
writeFileSync(join(docsDir, "src/.vitepress/config.ts"), `import { defineConfig } from "vitepress";
|
|
48
48
|
import llmstxt from "vitepress-plugin-llms";
|
|
@@ -12,9 +12,32 @@ export function buildJudgePrompt(opts) {
|
|
|
12
12
|
const questionsBlock = opts.rubric
|
|
13
13
|
.map((q, i) => `${i + 1}. **${q.id}**: ${q.question}\n Guidance: ${q.guidance}`)
|
|
14
14
|
.join("\n\n");
|
|
15
|
+
const highlightsInstructions = opts.mode === "eval"
|
|
16
|
+
? `### Step 4: Process unprocessed highlights
|
|
17
|
+
|
|
18
|
+
Before answering the rubric, process the working agent's highlights queue. Highlights are the working agent's flagged moments — brief acceptances, ADR acceptances, corrections, retrospective lessons — and the eval agent is responsible for materializing them into structured Graphiti episodes.
|
|
19
|
+
|
|
20
|
+
Call \`mcp__indusk__highlights_unprocessed\` to get the list. For each highlight, the level drives effort and Graphiti edge weight:
|
|
21
|
+
|
|
22
|
+
- **critical** (architectural decision, accepted ADR, accepted brief): extract full context from the transcript and the changed files, write a structured Graphiti episode with weight **1.0**.
|
|
23
|
+
- **important** (correction, retro lesson, confirmed pattern): extract context, write a Graphiti episode with weight **0.6**.
|
|
24
|
+
- **note** (observation, partially-formed thought): consider it. Write a low-weight (**0.3**) episode if it adds signal; skip if it's already captured in an existing episode.
|
|
25
|
+
|
|
26
|
+
Write each episode using \`mcp__indusk__graph_capture\` so it attaches to the relevant file anchor in the semantic graph — not raw \`mcp__graphiti__add_memory\`. Pick the group sensibly: \`${opts.projectGroup}\` for project-specific facts, \`shared\` for cross-project conventions (e.g., "always use pnpm ce"). Use the level to set the edge weight in the body's metadata section so downstream context-beam queries can rank by importance.
|
|
27
|
+
|
|
28
|
+
After processing each highlight (whether you wrote an episode or decided to skip), call \`mcp__indusk__highlight_mark_processed\` with the highlight ID and the action:
|
|
29
|
+
- \`action: "wrote-episode"\`, \`detail: "{episode name}"\` — if you wrote an episode.
|
|
30
|
+
- \`action: "skipped"\`, \`detail: "{brief reason}"\` — if you decided not to (e.g., already captured, or not meaningful enough).
|
|
31
|
+
|
|
32
|
+
**Highlights are additive context, not a constraint.** Continue reading the full transcript and inferring knowledge independently — highlights ensure important moments aren't missed, but they don't bound your analysis. The transcript may contain insights the working agent didn't flag.
|
|
33
|
+
|
|
34
|
+
If \`mcp__indusk__highlights_unprocessed\` is unavailable, skip this step silently and continue.`
|
|
35
|
+
: `### Step 4: Highlights (baseline mode)
|
|
36
|
+
|
|
37
|
+
Baseline mode — do NOT process highlights or write to Graphiti. Skip to Step 5.`;
|
|
15
38
|
const graphitiInstructions = opts.mode === "eval"
|
|
16
39
|
? `
|
|
17
|
-
|
|
40
|
+
### Step 6: Write findings to the knowledge graph
|
|
18
41
|
|
|
19
42
|
For each finding with severity "warning" or "critical", write it using \`mcp__indusk__graph_capture\`. This dual-writes to both Graphiti AND the semantic graph, connecting the finding to the existing file anchor — so the context beam can find it later.
|
|
20
43
|
|
|
@@ -33,10 +56,10 @@ mcp__indusk__graph_capture({
|
|
|
33
56
|
\`\`\`
|
|
34
57
|
|
|
35
58
|
Only write facts that would have changed the outcome. Be selective — quality over quantity.
|
|
36
|
-
Count how many graph_capture calls you made for the scorecard.
|
|
59
|
+
Count how many graph_capture calls you made for the scorecard (this count includes any highlight episodes written in Step 4).
|
|
37
60
|
If the tool is unavailable, skip silently and set graphitiWrites to 0.`
|
|
38
61
|
: `
|
|
39
|
-
|
|
62
|
+
### Step 6: Graphiti writes
|
|
40
63
|
|
|
41
64
|
Baseline mode — do NOT write to Graphiti. Set graphitiWrites to 0.`;
|
|
42
65
|
return `You are the InDusk evaluation judge. Your job is to evaluate the quality of work done by an AI agent on a software project.
|
|
@@ -65,7 +88,9 @@ Run \`jj diff -r ${opts.changeId}\` to see what was committed. This is the work
|
|
|
65
88
|
|
|
66
89
|
Then read the specific files that were changed to understand the full context — not just the diff lines, but the surrounding code.
|
|
67
90
|
|
|
68
|
-
|
|
91
|
+
${highlightsInstructions}
|
|
92
|
+
|
|
93
|
+
### Step 5: Answer the evaluation questions
|
|
69
94
|
|
|
70
95
|
For each question, investigate thoroughly using MCP tools — search the codebase, query the code graph, check Graphiti for relevant facts. Then answer with this exact JSON shape per question:
|
|
71
96
|
|
|
@@ -88,7 +113,7 @@ Questions:
|
|
|
88
113
|
${questionsBlock}
|
|
89
114
|
${graphitiInstructions}
|
|
90
115
|
|
|
91
|
-
|
|
116
|
+
### Step 7: Output the scorecard
|
|
92
117
|
|
|
93
118
|
After completing all steps, output ONLY the following JSON object. No markdown wrapping, no commentary before or after — just the JSON:
|
|
94
119
|
|
|
@@ -99,7 +124,7 @@ After completing all steps, output ONLY the following JSON object. No markdown w
|
|
|
99
124
|
"mode": "${opts.mode}",
|
|
100
125
|
"changeId": "${opts.changeId}",
|
|
101
126
|
"projectGroup": "${opts.projectGroup}",
|
|
102
|
-
"questions": [/* your answers from Step
|
|
127
|
+
"questions": [/* your answers from Step 5 */],
|
|
103
128
|
"summary": "{one paragraph overall assessment}",
|
|
104
129
|
"graphitiWrites": {number of Graphiti writes made},
|
|
105
130
|
"telemetryPosted": false
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export type HighlightLevel = "critical" | "important" | "note";
|
|
2
|
+
export type ProcessedAction = "wrote-episode" | "skipped";
|
|
3
|
+
export interface Highlight {
|
|
4
|
+
id: string;
|
|
5
|
+
timestamp: string;
|
|
6
|
+
level: HighlightLevel;
|
|
7
|
+
tag: string;
|
|
8
|
+
note: string;
|
|
9
|
+
}
|
|
10
|
+
export interface ProcessedMark {
|
|
11
|
+
id: string;
|
|
12
|
+
processedAt: string;
|
|
13
|
+
action: ProcessedAction;
|
|
14
|
+
detail?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface WriteHighlightInput {
|
|
17
|
+
tag: string;
|
|
18
|
+
note: string;
|
|
19
|
+
level: HighlightLevel;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Append a highlight to the plan-scoped `.indusk/highlights.jsonl` queue.
|
|
23
|
+
* Called by the working agent (via the `highlight` MCP tool) at trigger
|
|
24
|
+
* points — brief accepted, ADR accepted, correction confirmed, retro
|
|
25
|
+
* lesson, manual `/highlight` flag. The eval agent reads these later
|
|
26
|
+
* and turns them into structured Graphiti episodes.
|
|
27
|
+
*
|
|
28
|
+
* The ID is `h-{YYYYMMDD}-{seq}` where `seq` is a 3-digit counter that
|
|
29
|
+
* resets daily. The timestamp is ISO 8601 UTC.
|
|
30
|
+
*/
|
|
31
|
+
export declare function writeHighlight(projectRoot: string, input: WriteHighlightInput): Highlight;
|
|
32
|
+
/**
|
|
33
|
+
* Return all highlights whose IDs don't yet appear in the processed log.
|
|
34
|
+
* Used by the eval agent (via the `highlights_unprocessed` MCP tool) to
|
|
35
|
+
* find highlights that haven't been written to Graphiti yet.
|
|
36
|
+
*/
|
|
37
|
+
export declare function readUnprocessedHighlights(projectRoot: string): Highlight[];
|
|
38
|
+
/**
|
|
39
|
+
* Mark a highlight as processed. Called by the eval agent (via the
|
|
40
|
+
* `highlight_mark_processed` MCP tool) after handling a highlight —
|
|
41
|
+
* either writing a structured Graphiti episode (`wrote-episode`) or
|
|
42
|
+
* deciding the highlight doesn't warrant a new episode (`skipped`).
|
|
43
|
+
*
|
|
44
|
+
* The operation is idempotent by design — appending a processed mark
|
|
45
|
+
* for an already-processed ID just adds a duplicate entry, and
|
|
46
|
+
* `readUnprocessedHighlights` uses a Set so duplicates don't matter.
|
|
47
|
+
*/
|
|
48
|
+
export declare function markProcessed(projectRoot: string, id: string, action: ProcessedAction, detail?: string): ProcessedMark;
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
function highlightsPath(projectRoot) {
|
|
4
|
+
return join(projectRoot, ".indusk", "highlights.jsonl");
|
|
5
|
+
}
|
|
6
|
+
function processedPath(projectRoot) {
|
|
7
|
+
return join(projectRoot, ".indusk", "highlights-processed.jsonl");
|
|
8
|
+
}
|
|
9
|
+
function ensureInduskDir(projectRoot) {
|
|
10
|
+
mkdirSync(join(projectRoot, ".indusk"), { recursive: true });
|
|
11
|
+
}
|
|
12
|
+
function todayStamp() {
|
|
13
|
+
return new Date().toISOString().slice(0, 10).replace(/-/g, "");
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Read all highlights from the JSONL queue. Returns empty array if the
|
|
17
|
+
* file doesn't exist. Malformed lines are skipped with a silent warning
|
|
18
|
+
* (matching the semantic-graph / falsification log resilience pattern).
|
|
19
|
+
*/
|
|
20
|
+
function readAllHighlights(projectRoot) {
|
|
21
|
+
const path = highlightsPath(projectRoot);
|
|
22
|
+
if (!existsSync(path))
|
|
23
|
+
return [];
|
|
24
|
+
const content = readFileSync(path, "utf-8");
|
|
25
|
+
const lines = content.split("\n").filter((l) => l.length > 0);
|
|
26
|
+
const highlights = [];
|
|
27
|
+
for (const line of lines) {
|
|
28
|
+
try {
|
|
29
|
+
const parsed = JSON.parse(line);
|
|
30
|
+
if (parsed && typeof parsed.id === "string") {
|
|
31
|
+
highlights.push(parsed);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
// skip malformed line
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return highlights;
|
|
39
|
+
}
|
|
40
|
+
function readAllProcessed(projectRoot) {
|
|
41
|
+
const path = processedPath(projectRoot);
|
|
42
|
+
if (!existsSync(path))
|
|
43
|
+
return [];
|
|
44
|
+
const content = readFileSync(path, "utf-8");
|
|
45
|
+
const lines = content.split("\n").filter((l) => l.length > 0);
|
|
46
|
+
const marks = [];
|
|
47
|
+
for (const line of lines) {
|
|
48
|
+
try {
|
|
49
|
+
const parsed = JSON.parse(line);
|
|
50
|
+
if (parsed && typeof parsed.id === "string") {
|
|
51
|
+
marks.push(parsed);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
// skip malformed line
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return marks;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Compute the next sequence number for today's highlights. Reads all
|
|
62
|
+
* existing highlights, filters to entries whose ID starts with today's
|
|
63
|
+
* date, takes max seq, adds 1. Starts at 001 if none exist for today.
|
|
64
|
+
*/
|
|
65
|
+
function nextSeqForToday(projectRoot) {
|
|
66
|
+
const today = todayStamp();
|
|
67
|
+
const prefix = `h-${today}-`;
|
|
68
|
+
const highlights = readAllHighlights(projectRoot);
|
|
69
|
+
let maxSeq = 0;
|
|
70
|
+
for (const h of highlights) {
|
|
71
|
+
if (!h.id.startsWith(prefix))
|
|
72
|
+
continue;
|
|
73
|
+
const seqStr = h.id.slice(prefix.length);
|
|
74
|
+
const seq = Number.parseInt(seqStr, 10);
|
|
75
|
+
if (!Number.isNaN(seq) && seq > maxSeq) {
|
|
76
|
+
maxSeq = seq;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return maxSeq + 1;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Append a highlight to the plan-scoped `.indusk/highlights.jsonl` queue.
|
|
83
|
+
* Called by the working agent (via the `highlight` MCP tool) at trigger
|
|
84
|
+
* points — brief accepted, ADR accepted, correction confirmed, retro
|
|
85
|
+
* lesson, manual `/highlight` flag. The eval agent reads these later
|
|
86
|
+
* and turns them into structured Graphiti episodes.
|
|
87
|
+
*
|
|
88
|
+
* The ID is `h-{YYYYMMDD}-{seq}` where `seq` is a 3-digit counter that
|
|
89
|
+
* resets daily. The timestamp is ISO 8601 UTC.
|
|
90
|
+
*/
|
|
91
|
+
export function writeHighlight(projectRoot, input) {
|
|
92
|
+
ensureInduskDir(projectRoot);
|
|
93
|
+
const seq = nextSeqForToday(projectRoot);
|
|
94
|
+
const entry = {
|
|
95
|
+
id: `h-${todayStamp()}-${String(seq).padStart(3, "0")}`,
|
|
96
|
+
timestamp: new Date().toISOString(),
|
|
97
|
+
level: input.level,
|
|
98
|
+
tag: input.tag,
|
|
99
|
+
note: input.note,
|
|
100
|
+
};
|
|
101
|
+
appendFileSync(highlightsPath(projectRoot), `${JSON.stringify(entry)}\n`, "utf-8");
|
|
102
|
+
return entry;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Return all highlights whose IDs don't yet appear in the processed log.
|
|
106
|
+
* Used by the eval agent (via the `highlights_unprocessed` MCP tool) to
|
|
107
|
+
* find highlights that haven't been written to Graphiti yet.
|
|
108
|
+
*/
|
|
109
|
+
export function readUnprocessedHighlights(projectRoot) {
|
|
110
|
+
const highlights = readAllHighlights(projectRoot);
|
|
111
|
+
if (highlights.length === 0)
|
|
112
|
+
return [];
|
|
113
|
+
const processedIds = new Set(readAllProcessed(projectRoot).map((m) => m.id));
|
|
114
|
+
return highlights.filter((h) => !processedIds.has(h.id));
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Mark a highlight as processed. Called by the eval agent (via the
|
|
118
|
+
* `highlight_mark_processed` MCP tool) after handling a highlight —
|
|
119
|
+
* either writing a structured Graphiti episode (`wrote-episode`) or
|
|
120
|
+
* deciding the highlight doesn't warrant a new episode (`skipped`).
|
|
121
|
+
*
|
|
122
|
+
* The operation is idempotent by design — appending a processed mark
|
|
123
|
+
* for an already-processed ID just adds a duplicate entry, and
|
|
124
|
+
* `readUnprocessedHighlights` uses a Set so duplicates don't matter.
|
|
125
|
+
*/
|
|
126
|
+
export function markProcessed(projectRoot, id, action, detail) {
|
|
127
|
+
ensureInduskDir(projectRoot);
|
|
128
|
+
const mark = {
|
|
129
|
+
id,
|
|
130
|
+
processedAt: new Date().toISOString(),
|
|
131
|
+
action,
|
|
132
|
+
detail,
|
|
133
|
+
};
|
|
134
|
+
appendFileSync(processedPath(projectRoot), `${JSON.stringify(mark)}\n`, "utf-8");
|
|
135
|
+
return mark;
|
|
136
|
+
}
|
|
@@ -11,4 +11,4 @@ export { LogWriter } from "./log-writer.js";
|
|
|
11
11
|
export * from "./paths.js";
|
|
12
12
|
export { type ReplayOptions, type ReplayResult, replay } from "./replay.js";
|
|
13
13
|
export { SemanticGraphClient, type SemanticGraphClientOptions } from "./runtime-client.js";
|
|
14
|
-
export { type SyncResult
|
|
14
|
+
export { runSync, type SyncResult } from "./sync-engine.js";
|
|
@@ -75,12 +75,12 @@ export function resolveTestIdCommand(trajectory, id) {
|
|
|
75
75
|
return null;
|
|
76
76
|
const backtickMatches = [...row.asserts.matchAll(/`([^`]+)`/g)].map((m) => m[1]);
|
|
77
77
|
const identifiers = [...row.asserts.matchAll(/\b[a-zA-Z][a-zA-Z0-9_]{3,}\b/g)].map((m) => m[0]);
|
|
78
|
-
const keyword = backtickMatches
|
|
79
|
-
.filter((s) => /[a-zA-Z]/.test(s))
|
|
80
|
-
.sort((a, b) => b.length - a.length)[0] ??
|
|
78
|
+
const keyword = backtickMatches.filter((s) => /[a-zA-Z]/.test(s)).sort((a, b) => b.length - a.length)[0] ??
|
|
81
79
|
identifiers.sort((a, b) => b.length - a.length)[0] ??
|
|
82
80
|
null;
|
|
83
|
-
const fileGlob = keyword
|
|
81
|
+
const fileGlob = keyword
|
|
82
|
+
? `**/*${keyword.toLowerCase().replace(/[^a-z0-9]/g, "")}*.test.ts`
|
|
83
|
+
: null;
|
|
84
84
|
const suggestedCommand = keyword
|
|
85
85
|
? `pnpm test -t "${keyword}"`
|
|
86
86
|
: `pnpm test -t "${row.asserts.slice(0, 40)}"`;
|
package/dist/server/index.js
CHANGED
|
@@ -16,6 +16,7 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
|
|
|
16
16
|
import { registerContextTools } from "../tools/context-tools.js";
|
|
17
17
|
import { registerDocumentTools } from "../tools/document-tools.js";
|
|
18
18
|
import { registerGraphTools } from "../tools/graph-tools.js";
|
|
19
|
+
import { registerHighlightTools } from "../tools/highlight-tools.js";
|
|
19
20
|
import { registerLessonTools } from "../tools/lesson-tools.js";
|
|
20
21
|
import { registerPlanTools } from "../tools/plan-tools.js";
|
|
21
22
|
import { registerQualityTools } from "../tools/quality-tools.js";
|
|
@@ -58,6 +59,7 @@ export async function startServer() {
|
|
|
58
59
|
registerSystemTools(server, projectRoot);
|
|
59
60
|
registerGraphTools(server, projectRoot);
|
|
60
61
|
registerLessonTools(server, projectRoot);
|
|
62
|
+
registerHighlightTools(server, projectRoot);
|
|
61
63
|
console.error("[indusk] tools registered");
|
|
62
64
|
const transport = new StdioServerTransport();
|
|
63
65
|
await server.connect(transport);
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
/**
|
|
3
|
+
* Register the three highlight MCP tools:
|
|
4
|
+
*
|
|
5
|
+
* - `highlight` — the working agent writes a highlight at trigger points
|
|
6
|
+
* (brief accepted, ADR accepted, correction confirmed, retro lesson,
|
|
7
|
+
* manual /highlight). The highlight is append-only to
|
|
8
|
+
* .indusk/highlights.jsonl.
|
|
9
|
+
* - `highlights_unprocessed` — the eval agent reads unprocessed
|
|
10
|
+
* highlights to decide what to turn into Graphiti episodes.
|
|
11
|
+
* - `highlight_mark_processed` — the eval agent marks a highlight as
|
|
12
|
+
* processed (either wrote-episode or skipped) to move it out of the
|
|
13
|
+
* unprocessed queue.
|
|
14
|
+
*
|
|
15
|
+
* See .indusk/planning/agent-roles/adr.md for the three-tier role model
|
|
16
|
+
* and the highlights-queue interface.
|
|
17
|
+
*/
|
|
18
|
+
export declare function registerHighlightTools(server: McpServer, projectRoot: string): void;
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { markProcessed, readUnprocessedHighlights, writeHighlight, } from "../lib/highlights/highlights.js";
|
|
3
|
+
/**
|
|
4
|
+
* Register the three highlight MCP tools:
|
|
5
|
+
*
|
|
6
|
+
* - `highlight` — the working agent writes a highlight at trigger points
|
|
7
|
+
* (brief accepted, ADR accepted, correction confirmed, retro lesson,
|
|
8
|
+
* manual /highlight). The highlight is append-only to
|
|
9
|
+
* .indusk/highlights.jsonl.
|
|
10
|
+
* - `highlights_unprocessed` — the eval agent reads unprocessed
|
|
11
|
+
* highlights to decide what to turn into Graphiti episodes.
|
|
12
|
+
* - `highlight_mark_processed` — the eval agent marks a highlight as
|
|
13
|
+
* processed (either wrote-episode or skipped) to move it out of the
|
|
14
|
+
* unprocessed queue.
|
|
15
|
+
*
|
|
16
|
+
* See .indusk/planning/agent-roles/adr.md for the three-tier role model
|
|
17
|
+
* and the highlights-queue interface.
|
|
18
|
+
*/
|
|
19
|
+
export function registerHighlightTools(server, projectRoot) {
|
|
20
|
+
server.registerTool("highlight", {
|
|
21
|
+
description: "Write a highlight to the queue. Called by the working agent at trigger points — brief/ADR acceptance, corrections, retro lessons, or explicit user flags. The eval agent processes these later into structured Graphiti episodes.",
|
|
22
|
+
inputSchema: {
|
|
23
|
+
tag: z
|
|
24
|
+
.string()
|
|
25
|
+
.describe("Short tag for the highlight type: brief-accepted, adr-accepted, correction, retro-lesson, observation, or a custom tag."),
|
|
26
|
+
note: z.string().describe("The highlight content — a single line describing what matters."),
|
|
27
|
+
level: z
|
|
28
|
+
.enum(["critical", "important", "note"])
|
|
29
|
+
.describe("Weight level: critical = architectural decision, important = correction/lesson, note = observation."),
|
|
30
|
+
},
|
|
31
|
+
}, async ({ tag, note, level }) => {
|
|
32
|
+
const entry = writeHighlight(projectRoot, { tag, note, level });
|
|
33
|
+
return {
|
|
34
|
+
content: [
|
|
35
|
+
{
|
|
36
|
+
type: "text",
|
|
37
|
+
text: JSON.stringify(entry, null, 2),
|
|
38
|
+
},
|
|
39
|
+
],
|
|
40
|
+
};
|
|
41
|
+
});
|
|
42
|
+
server.registerTool("highlights_unprocessed", {
|
|
43
|
+
description: "Return all highlights that haven't yet been marked as processed. Called by the eval agent to find highlights needing structured episodes.",
|
|
44
|
+
}, async () => {
|
|
45
|
+
const unprocessed = readUnprocessedHighlights(projectRoot);
|
|
46
|
+
return {
|
|
47
|
+
content: [
|
|
48
|
+
{
|
|
49
|
+
type: "text",
|
|
50
|
+
text: JSON.stringify(unprocessed, null, 2),
|
|
51
|
+
},
|
|
52
|
+
],
|
|
53
|
+
};
|
|
54
|
+
});
|
|
55
|
+
server.registerTool("highlight_mark_processed", {
|
|
56
|
+
description: "Mark a highlight as processed. Called by the eval agent after handling a highlight — either writing a structured Graphiti episode (wrote-episode) or deciding not to (skipped).",
|
|
57
|
+
inputSchema: {
|
|
58
|
+
id: z.string().describe("The highlight ID to mark (format: h-YYYYMMDD-NNN)."),
|
|
59
|
+
action: z
|
|
60
|
+
.enum(["wrote-episode", "skipped"])
|
|
61
|
+
.describe("What was done: wrote-episode = structured episode created; skipped = no episode."),
|
|
62
|
+
detail: z
|
|
63
|
+
.string()
|
|
64
|
+
.optional()
|
|
65
|
+
.describe("Optional detail — for wrote-episode, the episode name; for skipped, the reason."),
|
|
66
|
+
},
|
|
67
|
+
}, async ({ id, action, detail }) => {
|
|
68
|
+
const mark = markProcessed(projectRoot, id, action, detail);
|
|
69
|
+
return {
|
|
70
|
+
content: [
|
|
71
|
+
{
|
|
72
|
+
type: "text",
|
|
73
|
+
text: JSON.stringify(mark, null, 2),
|
|
74
|
+
},
|
|
75
|
+
],
|
|
76
|
+
};
|
|
77
|
+
});
|
|
78
|
+
}
|
package/hooks/check-catchup.js
CHANGED
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
import { execSync } from "node:child_process";
|
|
15
|
-
import { createConnection } from "node:net";
|
|
16
15
|
import { existsSync, readFileSync } from "node:fs";
|
|
16
|
+
import { createConnection } from "node:net";
|
|
17
17
|
import { join, resolve } from "node:path";
|
|
18
18
|
|
|
19
19
|
const input = JSON.parse(readFileSync("/dev/stdin", "utf-8"));
|
|
@@ -38,9 +38,15 @@ function checkTcp(host, port, timeoutMs = 3000) {
|
|
|
38
38
|
return new Promise((resolve) => {
|
|
39
39
|
const sock = createConnection({ host, port });
|
|
40
40
|
sock.setTimeout(timeoutMs);
|
|
41
|
-
sock.on("connect", () => {
|
|
41
|
+
sock.on("connect", () => {
|
|
42
|
+
sock.end();
|
|
43
|
+
resolve(true);
|
|
44
|
+
});
|
|
42
45
|
sock.on("error", () => resolve(false));
|
|
43
|
-
sock.on("timeout", () => {
|
|
46
|
+
sock.on("timeout", () => {
|
|
47
|
+
sock.destroy();
|
|
48
|
+
resolve(false);
|
|
49
|
+
});
|
|
44
50
|
});
|
|
45
51
|
}
|
|
46
52
|
|
|
@@ -61,7 +67,9 @@ if (filePath.endsWith("handoff.md")) {
|
|
|
61
67
|
// Check FalkorDB (indusk-infra container) on localhost:6379
|
|
62
68
|
const falkorUp = await checkTcp("localhost", 6379);
|
|
63
69
|
if (!falkorUp) {
|
|
64
|
-
errors.push(
|
|
70
|
+
errors.push(
|
|
71
|
+
"FalkorDB not reachable on localhost:6379 — is indusk-infra running? Try: docker start indusk-infra",
|
|
72
|
+
);
|
|
65
73
|
}
|
|
66
74
|
|
|
67
75
|
// Check Graphiti on localhost:8100
|
|
@@ -75,14 +83,17 @@ if (filePath.endsWith("handoff.md")) {
|
|
|
75
83
|
errors.push("Graphiti responded but not healthy on localhost:8100");
|
|
76
84
|
}
|
|
77
85
|
} catch {
|
|
78
|
-
errors.push(
|
|
86
|
+
errors.push(
|
|
87
|
+
"Graphiti MCP server not reachable on localhost:8100 — may still be starting (~90s after container restart)",
|
|
88
|
+
);
|
|
79
89
|
}
|
|
80
90
|
|
|
81
91
|
if (errors.length > 0) {
|
|
82
92
|
process.stderr.write(
|
|
83
93
|
`Cannot check off mcp-ready — infrastructure verification failed:\n` +
|
|
84
|
-
|
|
85
|
-
|
|
94
|
+
errors.map((e) => ` - ${e}`).join("\n") +
|
|
95
|
+
"\n" +
|
|
96
|
+
`Fix the issues above before proceeding with catchup.`,
|
|
86
97
|
);
|
|
87
98
|
process.exit(2);
|
|
88
99
|
}
|
package/hooks/eval-trigger.js
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
4
|
+
* Dual-mode eval trigger.
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* 1) PostToolUse hook mode (default): fires on Bash tool calls containing
|
|
7
|
+
* `jj describe`. Reads the hook event JSON from stdin. Spawns the judge
|
|
8
|
+
* runner as a detached background process.
|
|
9
|
+
*
|
|
10
|
+
* 2) CLI mode (`--source <tag>`): invoked manually by skills (e.g., handoff)
|
|
11
|
+
* at session end. No stdin read, no `jj describe` filter. Uses the current
|
|
12
|
+
* @ change and passes the source tag to the judge via INDUSK_EVAL_SOURCE.
|
|
13
|
+
* The judge may skip diff-based scoring when source != "commit" but still
|
|
14
|
+
* processes the highlights queue.
|
|
9
15
|
*
|
|
10
16
|
* Exit 0 always — this is advisory, not blocking.
|
|
11
17
|
*/
|
|
@@ -26,25 +32,46 @@ function syslog(projectRoot, msg) {
|
|
|
26
32
|
}
|
|
27
33
|
}
|
|
28
34
|
|
|
29
|
-
//
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
35
|
+
// Parse --source <tag> from argv. Returns null if not in CLI mode.
|
|
36
|
+
function parseSourceArg(argv) {
|
|
37
|
+
const idx = argv.indexOf("--source");
|
|
38
|
+
if (idx === -1 || idx === argv.length - 1) return null;
|
|
39
|
+
const value = argv[idx + 1];
|
|
40
|
+
if (!value || value.startsWith("--")) return null;
|
|
41
|
+
return value;
|
|
33
42
|
}
|
|
34
43
|
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
const cwd = event.cwd ?? process.cwd();
|
|
44
|
+
const cliSource = parseSourceArg(process.argv);
|
|
45
|
+
let cwd;
|
|
46
|
+
let command = "";
|
|
39
47
|
|
|
40
|
-
|
|
48
|
+
if (cliSource !== null) {
|
|
49
|
+
// CLI mode — no stdin, no jj describe filter
|
|
50
|
+
cwd = process.cwd();
|
|
51
|
+
syslog(cwd, `cli invocation — source: ${cliSource}`);
|
|
52
|
+
} else {
|
|
53
|
+
// Hook mode — read event from stdin
|
|
54
|
+
let input = "";
|
|
55
|
+
for await (const chunk of process.stdin) {
|
|
56
|
+
input += chunk;
|
|
57
|
+
}
|
|
41
58
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
process.
|
|
59
|
+
const event = JSON.parse(input);
|
|
60
|
+
const toolInput = event.tool_input ?? {};
|
|
61
|
+
command = toolInput.command ?? "";
|
|
62
|
+
cwd = event.cwd ?? process.cwd();
|
|
63
|
+
|
|
64
|
+
syslog(cwd, `hook fired — tool: ${event.tool_name}, command: ${command.slice(0, 100)}`);
|
|
65
|
+
|
|
66
|
+
// Fast path: not a jj describe command
|
|
67
|
+
if (!command.includes("jj describe")) {
|
|
68
|
+
syslog(cwd, "skip — no jj describe in command");
|
|
69
|
+
process.exit(0);
|
|
70
|
+
}
|
|
46
71
|
}
|
|
47
72
|
|
|
73
|
+
const source = cliSource ?? "commit";
|
|
74
|
+
|
|
48
75
|
/**
|
|
49
76
|
* Find the project root by walking up looking for .indusk/ or .claude/.
|
|
50
77
|
*/
|
|
@@ -157,7 +184,7 @@ if (!judgeRunnerPath) {
|
|
|
157
184
|
message:
|
|
158
185
|
"Could not find @infinitedusky/indusk-mcp package — eval judge not available. Run: npm i -g @infinitedusky/indusk-mcp",
|
|
159
186
|
});
|
|
160
|
-
appendFileSync(logPath, entry
|
|
187
|
+
appendFileSync(logPath, `${entry}\n`, "utf8");
|
|
161
188
|
process.exit(0);
|
|
162
189
|
}
|
|
163
190
|
|
|
@@ -238,21 +265,30 @@ const child = spawn("node", ["--input-type=module", "-e", judgeScript], {
|
|
|
238
265
|
cwd: projectRoot,
|
|
239
266
|
stdio: "ignore",
|
|
240
267
|
detached: true,
|
|
241
|
-
env: { ...process.env },
|
|
268
|
+
env: { ...process.env, INDUSK_EVAL_SOURCE: source },
|
|
242
269
|
});
|
|
243
270
|
|
|
244
271
|
child.unref();
|
|
245
272
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
273
|
+
syslog(projectRoot, `judge spawned — source: ${source}, pid: ${child.pid}`);
|
|
274
|
+
|
|
275
|
+
if (cliSource !== null) {
|
|
276
|
+
// CLI mode — write a brief notice to stderr and exit
|
|
277
|
+
process.stderr.write(
|
|
278
|
+
`📊 Eval judge spawned (source=${source}) for ${changeId.slice(0, 8)}. Results will appear in .indusk/eval/results.log\n`,
|
|
279
|
+
);
|
|
280
|
+
} else {
|
|
281
|
+
// Hook mode — output structured hook response
|
|
282
|
+
const output = JSON.stringify({
|
|
283
|
+
hookSpecificOutput: {
|
|
284
|
+
hookEventName: "PostToolUse",
|
|
285
|
+
message: `Eval judge spawned for change ${changeId.slice(0, 8)}`,
|
|
286
|
+
},
|
|
287
|
+
});
|
|
288
|
+
process.stdout.write(output);
|
|
289
|
+
process.stderr.write(
|
|
290
|
+
`📊 Eval judge spawned in background for ${changeId.slice(0, 8)}. Results will appear in .indusk/eval/results.log\n`,
|
|
291
|
+
);
|
|
292
|
+
}
|
|
257
293
|
|
|
258
294
|
process.exit(0);
|
package/hooks/gate-reminder.js
CHANGED
|
@@ -93,13 +93,11 @@ for (const phase of phases) {
|
|
|
93
93
|
const nextHasUnchecked = nextPhase.items.some((i) => !i.checked);
|
|
94
94
|
if (nextHasUnchecked) {
|
|
95
95
|
// This phase is complete and next phase hasn't started
|
|
96
|
-
const
|
|
96
|
+
const _result = {
|
|
97
97
|
hookSpecificOutput: {
|
|
98
98
|
hookEventName: "PostToolUse",
|
|
99
99
|
},
|
|
100
100
|
};
|
|
101
|
-
// Output reminder as JSON to stdout
|
|
102
|
-
console.log(JSON.stringify(result));
|
|
103
101
|
console.error(
|
|
104
102
|
`Phase ${phase.number} (${phase.name}) is fully complete. Call advance_plan to validate gates before starting Phase ${nextPhase.number}.`,
|
|
105
103
|
);
|
package/package.json
CHANGED
package/skills/handoff.md
CHANGED
|
@@ -52,6 +52,20 @@ Create or overwrite `.claude/handoff.md` with:
|
|
|
52
52
|
- When you're about to run out of context
|
|
53
53
|
- `/handoff` explicitly
|
|
54
54
|
|
|
55
|
+
## Fire the Eval Trigger
|
|
56
|
+
|
|
57
|
+
After writing the handoff file, fire the eval trigger with `--source handoff` so the eval agent processes any unprocessed highlights before the session ends. This matters because highlights written after the last `jj describe` would otherwise sit in the queue until the next session's first commit.
|
|
58
|
+
|
|
59
|
+
Run this from the project root:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
node .claude/hooks/eval-trigger.js --source handoff
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
The trigger spawns the judge in the background and returns immediately — it never blocks handoff. The judge processes the highlights queue and, because `INDUSK_EVAL_SOURCE=handoff` is set in the environment, may skip diff-based rubric scoring (there's no new commit). Highlights still get materialized into Graphiti episodes.
|
|
66
|
+
|
|
67
|
+
If the hook isn't installed or Node isn't on PATH, the handoff still succeeds — the highlights remain queued for the next `jj describe` in a future session.
|
|
68
|
+
|
|
55
69
|
## Rules
|
|
56
70
|
|
|
57
71
|
- **Be specific.** "Working on Phase 3" is useless. "Phase 3, item 4: refactored check_health to use extensions. extensions_status MCP tool created. Next: refactor init to remove hardcoded FalkorDB/CGC." is useful.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: highlight
|
|
3
|
+
description: Flag a moment in the session as worth remembering. Writes a highlight to the queue so the eval agent can materialize it into a structured Graphiti episode.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
You are flagging something the user wants captured — an observation, a decision, a surprise, a lesson that isn't an official retrospective yet. Write a highlight to the queue. The eval agent will process it into a structured Graphiti episode on the next `jj describe` or at session end.
|
|
7
|
+
|
|
8
|
+
## Invocation
|
|
9
|
+
|
|
10
|
+
The user runs `/highlight {free-form text} [level: critical|important|note]`.
|
|
11
|
+
|
|
12
|
+
- If no level is specified, **default to `important`**.
|
|
13
|
+
- If the user's phrasing clearly signals weight (`critical`, `important`, `note`, or synonyms like `major`, `minor`), use that.
|
|
14
|
+
- If the user passes `level=critical` / `level=note` explicitly, honor it.
|
|
15
|
+
|
|
16
|
+
## Process
|
|
17
|
+
|
|
18
|
+
1. **Parse the input.** Extract the note text. If the user wrote `/highlight level=critical decide X over Y`, strip the `level=` prefix out of the note.
|
|
19
|
+
|
|
20
|
+
2. **Choose a tag.** Tags categorize the highlight for the eval agent. Pick one of:
|
|
21
|
+
- `observation` — general flag (default)
|
|
22
|
+
- `decision` — an informal decision the user wants recorded
|
|
23
|
+
- `surprise` — something unexpected
|
|
24
|
+
- `correction` — a mid-session correction (but the work skill already does this on `context learn`; prefer that path)
|
|
25
|
+
- A user-supplied tag if the message contains one (`tag=architecture` etc.)
|
|
26
|
+
|
|
27
|
+
3. **Write the highlight** via the InDusk MCP:
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
mcp__indusk__highlight({
|
|
31
|
+
tag: "{chosen tag}",
|
|
32
|
+
note: "{the user's text, cleaned of any level= or tag= prefixes}",
|
|
33
|
+
level: "{critical|important|note}"
|
|
34
|
+
})
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
4. **Confirm to the user** with the generated ID and a one-line summary:
|
|
38
|
+
|
|
39
|
+
> "Highlighted as `h-20260417-007` (level: critical, tag: decision). The eval agent will pick this up on the next `jj describe` or at session end."
|
|
40
|
+
|
|
41
|
+
## Rules
|
|
42
|
+
|
|
43
|
+
- **Default level is `important`.** Only bump to `critical` if the user explicitly says so or the content is clearly an architectural / decision-level moment.
|
|
44
|
+
- **Do not write the Graphiti episode yourself.** The whole point of highlights is that the working agent flags and moves on; the eval agent handles materialization.
|
|
45
|
+
- **If `mcp__indusk__highlight` is unavailable**, degrade gracefully: tell the user "highlights queue unavailable — InDusk MCP may be down" and do not fail.
|
|
46
|
+
- **One highlight per invocation.** If the user flags multiple things at once, ask them to split or pick the most important one to flag.
|
|
47
|
+
|
|
48
|
+
## Cross-reference
|
|
49
|
+
|
|
50
|
+
See [`apps/indusk-docs/src/reference/tools/highlights.md`](../../indusk-docs/src/reference/tools/highlights.md) for the full highlights system — file format, level → Graphiti edge weight mapping, eval agent processing, and trigger points across other skills.
|
package/skills/planner.md
CHANGED
|
@@ -74,31 +74,27 @@ Workflow templates are in `templates/workflows/` in the package. They describe w
|
|
|
74
74
|
|
|
75
75
|
4. **If research is done**, write the brief. This is where a direction emerges from the research. The brief proposes what we're building and why, informed by what the research uncovered. **Consider creating a visual sketch** of the proposed architecture with Excalidraw (if the extension is enabled) — a hand-drawn diagram makes the proposal concrete and easier to discuss. **Present the brief and have a conversation about it.** Don't just ask "does this look good?" — walk the user through it: "Here's what I'm proposing we build. Does this match what you had in mind? Is there anything missing, or anything here you don't want?" Iterate until the user is genuinely happy with the direction, then mark it as `accepted`.
|
|
76
76
|
|
|
77
|
-
**When the brief moves from `draft` to `accepted`**,
|
|
77
|
+
**When the brief moves from `draft` to `accepted`**, write a highlight so the eval agent can turn it into a structured Graphiti episode:
|
|
78
78
|
```
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
source: "text",
|
|
84
|
-
source_description: "brief acceptance"
|
|
79
|
+
mcp__indusk__highlight({
|
|
80
|
+
tag: "brief-accepted",
|
|
81
|
+
note: "{plan-name}: {one-line summary of Proposed Direction}",
|
|
82
|
+
level: "critical"
|
|
85
83
|
})
|
|
86
84
|
```
|
|
87
|
-
The
|
|
85
|
+
The working agent does not write Graphiti episodes directly. The eval agent reads unprocessed highlights (via `highlights_unprocessed`), extracts the full Problem + Proposed Direction + Scope context from the transcript, writes a structured episode into the project group, and marks the highlight processed. Skip silently if `mcp__indusk__highlight` is unavailable — highlights are best-effort and must not fail brief acceptance. See [`apps/indusk-docs/src/reference/tools/highlights.md`](../../indusk-docs/src/reference/tools/highlights.md) for the full flow.
|
|
88
86
|
|
|
89
87
|
5. **If brief is accepted** and the workflow includes an ADR (feature only), write the ADR. The ADR formalizes the decisions that were discussed during research and led to the brief. It records what was chosen, what was rejected, and why. **After the ADR is accepted**, add a one-liner to CLAUDE.md's Key Decisions section per the context skill: `- {decision summary} — see .indusk/planning/{plan}/adr.md`
|
|
90
88
|
|
|
91
|
-
**When the ADR moves from `proposed` to `accepted`**,
|
|
89
|
+
**When the ADR moves from `proposed` to `accepted`**, write a highlight so the eval agent can turn it into a structured Y-statement episode:
|
|
92
90
|
```
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
source: "text",
|
|
98
|
-
source_description: "ADR acceptance"
|
|
91
|
+
mcp__indusk__highlight({
|
|
92
|
+
tag: "adr-accepted",
|
|
93
|
+
note: "{plan-name}: {chosen option} — rejected {primary alternative}",
|
|
94
|
+
level: "critical"
|
|
99
95
|
})
|
|
100
96
|
```
|
|
101
|
-
The Y-statement
|
|
97
|
+
The eval agent reads the highlight, pulls the full Y-statement from the ADR file, writes a structured episode into the project group, and marks it processed. Graphiti's entity extraction will pick up the chosen option, rejected alternatives, constraint, and rationale, and will detect contradictions if a later ADR overrides this one. The working agent does not write the episode directly. Skip silently on highlight unavailability — degrade gracefully.
|
|
102
98
|
|
|
103
99
|
6. **If ADR is accepted** (or brief is accepted for bugfix/refactor), write the impl. Break into phased checklists with concrete tasks. For refactor workflows, include a `## Boundary Map` section. For multi-phase impls of any type, consider adding a boundary map.
|
|
104
100
|
|
package/skills/retrospective.md
CHANGED
|
@@ -113,7 +113,17 @@ For each finding, act on it:
|
|
|
113
113
|
- **Deferred rows classified as `downstream-plan`** — verify the referenced plan exists and is either `accepted` or `in-progress`. If it's `draft` or missing, either accept the referenced plan now or pick a different mitigation.
|
|
114
114
|
- **Deferred rows classified as `telemetry-alert`** — verify the named metric actually exists in the codebase (grep for it). If the metric hasn't been wired up, the mitigation is aspirational — either wire it up now or change the mitigation.
|
|
115
115
|
|
|
116
|
-
|
|
116
|
+
Flag findings as a highlight — the eval agent reads it, writes the `retrospective-audit-{plan-slug}` Graphiti episode, and marks it processed:
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
mcp__indusk__highlight({
|
|
120
|
+
tag: "retro-audit",
|
|
121
|
+
note: "{plan-name}: {finding classification}; {what was done}; {warning if any}",
|
|
122
|
+
level: "important"
|
|
123
|
+
})
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Include the classification, the warning (if any), and what was done. This is the signal the eval agent uses to detect mitigation drift over time.
|
|
117
127
|
|
|
118
128
|
### Step 5: Quality Audit
|
|
119
129
|
|
|
@@ -140,35 +150,31 @@ If yes, call `add_lesson` for each one. These become personal lessons in `.claud
|
|
|
140
150
|
|
|
141
151
|
If no lessons emerged, that's fine — not every plan produces new knowledge. Move on.
|
|
142
152
|
|
|
143
|
-
**Also
|
|
153
|
+
**Also flag each retrospective insight as a highlight** so the eval agent can turn it into a structured Graphiti episode and surface it in future searches and contradiction detection.
|
|
144
154
|
|
|
145
155
|
For each item in the retrospective's **What We Learned** section:
|
|
146
156
|
```
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
source: "text",
|
|
152
|
-
source_description: "retrospective lesson"
|
|
157
|
+
mcp__indusk__highlight({
|
|
158
|
+
tag: "retro-lesson",
|
|
159
|
+
note: "{plan-name}: {the insight, with enough context for the eval agent to write a full episode}",
|
|
160
|
+
level: "important"
|
|
153
161
|
})
|
|
154
162
|
```
|
|
155
163
|
|
|
156
164
|
For each item in the retrospective's **What We'd Do Differently** section:
|
|
157
165
|
```
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
source: "text",
|
|
163
|
-
source_description: "retrospective hindsight"
|
|
166
|
+
mcp__indusk__highlight({
|
|
167
|
+
tag: "retro-hindsight",
|
|
168
|
+
note: "{plan-name}: {the hindsight item, with reasoning}",
|
|
169
|
+
level: "important"
|
|
164
170
|
})
|
|
165
171
|
```
|
|
166
172
|
|
|
167
|
-
|
|
173
|
+
The eval agent reads each highlight, writes the full Graphiti episode (project group by default; `shared` if the insight is clearly cross-project), and marks it processed. The working agent does not write the episode directly.
|
|
168
174
|
|
|
169
|
-
**Contradictions:** If the retrospective surfaces a moment where "we thought X but found Y",
|
|
175
|
+
**Contradictions:** If the retrospective surfaces a moment where "we thought X but found Y", write two highlights (one per fact). Graphiti's contradiction detection will invalidate the older fact when it sees the conflicting one once the eval agent materializes the episodes. This is one of Graphiti's most useful features — it remembers that a previous assumption was overturned, so the agent doesn't accidentally re-introduce it later.
|
|
170
176
|
|
|
171
|
-
Skip silently if `
|
|
177
|
+
Skip silently if `mcp__indusk__highlight` is unavailable — highlights are best-effort, and lesson recording via `add_lesson` remains the canonical local path. Highlight-driven Graphiti capture is supplementary.
|
|
172
178
|
|
|
173
179
|
### Step 7: Context Audit
|
|
174
180
|
|
package/skills/work.md
CHANGED
|
@@ -259,26 +259,20 @@ When you are corrected mid-work — the user says "no, not that way" or "don't d
|
|
|
259
259
|
|
|
260
260
|
Don't wait to be told. Corrections are the most valuable source of project knowledge.
|
|
261
261
|
|
|
262
|
-
**When the user confirms `context learn`, ALSO
|
|
262
|
+
**When the user confirms `context learn`, ALSO write a highlight so the eval agent can capture it in Graphiti:**
|
|
263
263
|
```
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
source: "text",
|
|
269
|
-
source_description: "user correction"
|
|
264
|
+
mcp__indusk__highlight({
|
|
265
|
+
tag: "correction",
|
|
266
|
+
note: "{short slug + lesson text}",
|
|
267
|
+
level: "important"
|
|
270
268
|
})
|
|
271
269
|
```
|
|
272
270
|
|
|
273
|
-
|
|
271
|
+
The working agent does not write the Graphiti episode directly. The eval agent reads the highlight, decides whether it's a cross-project convention (→ `shared` group) or a project-specific fact (→ project group), phrases the episode, and writes it. The working agent just flags the moment and keeps working.
|
|
274
272
|
|
|
275
|
-
**
|
|
276
|
-
- **`shared`**: tools, conventions, patterns that apply across projects. Examples: "always use pnpm ce", "never mock the database in integration tests", "use jj describe-then-do for commits". The lesson is generally true and a different project would benefit from it.
|
|
277
|
-
- **`{project-group}`**: facts specific to this project's code, data, or domain. Examples: "the impl-parser handles four gate types per phase", "graph_ensure auto-repairs the indusk-infra container". The lesson only makes sense in the context of this project.
|
|
273
|
+
**What to include in the `note`:** enough for the eval agent to reconstruct the lesson and classify its scope. Example: `pnpm-ce: always use pnpm ce, not npx — skill doc specifies pnpm and mixing causes cache drift`. The eval agent has the full transcript, so concision over completeness is fine.
|
|
278
274
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
Use `getProjectGroupId(projectRoot)` from `apps/indusk-mcp/src/lib/config.ts` to get the project group consistently. Skip silently if `mcp__graphiti__add_memory` is unavailable — Graphiti capture is best-effort, do not fail the work item. Prefer `mcp__indusk__graph_capture` over raw `mcp__graphiti__add_memory` — it dual-writes to both Graphiti and the semantic graph event log.
|
|
275
|
+
Skip silently if `mcp__indusk__highlight` is unavailable — highlights are best-effort and must not fail the work item or the `context learn` recording (which is the canonical, local copy of the lesson).
|
|
282
276
|
|
|
283
277
|
## Commits (jj)
|
|
284
278
|
|
|
@@ -63,7 +63,7 @@ const cloneDiagram = () => {
|
|
|
63
63
|
diagramHTML.value = diagramRef.value.innerHTML;
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
-
const
|
|
66
|
+
const _toggleExpand = () => {
|
|
67
67
|
if (!isExpanded.value) {
|
|
68
68
|
cloneDiagram();
|
|
69
69
|
}
|
|
@@ -97,7 +97,7 @@ const toggleExpand = () => {
|
|
|
97
97
|
}
|
|
98
98
|
};
|
|
99
99
|
|
|
100
|
-
const
|
|
100
|
+
const _zoomIn = () => {
|
|
101
101
|
if (panzoomInstance.value) {
|
|
102
102
|
const currentZoom = panzoomInstance.value.getTransform().scale;
|
|
103
103
|
if (currentZoom < 10) {
|
|
@@ -106,7 +106,7 @@ const zoomIn = () => {
|
|
|
106
106
|
}
|
|
107
107
|
};
|
|
108
108
|
|
|
109
|
-
const
|
|
109
|
+
const _zoomOut = () => {
|
|
110
110
|
if (panzoomInstance.value) {
|
|
111
111
|
const currentZoom = panzoomInstance.value.getTransform().scale;
|
|
112
112
|
if (currentZoom > 0.05) {
|
|
@@ -14,20 +14,10 @@
|
|
|
14
14
|
* If OTEL_ENABLED_CATEGORIES is not set, all categories are exported.
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
import type {
|
|
18
|
-
ExportResult,
|
|
19
|
-
ExportResultCode,
|
|
20
|
-
} from "@opentelemetry/core";
|
|
17
|
+
import type { ExportResult, ExportResultCode } from "@opentelemetry/core";
|
|
21
18
|
import type { ReadableSpan, SpanExporter } from "@opentelemetry/sdk-trace-base";
|
|
22
19
|
|
|
23
|
-
export const ALL_CATEGORIES = [
|
|
24
|
-
"http",
|
|
25
|
-
"db",
|
|
26
|
-
"business",
|
|
27
|
-
"inference",
|
|
28
|
-
"state",
|
|
29
|
-
"system",
|
|
30
|
-
] as const;
|
|
20
|
+
export const ALL_CATEGORIES = ["http", "db", "business", "inference", "state", "system"] as const;
|
|
31
21
|
|
|
32
22
|
export type OtelCategory = (typeof ALL_CATEGORIES)[number];
|
|
33
23
|
|
|
@@ -49,10 +39,7 @@ export class FilteringExporter implements SpanExporter {
|
|
|
49
39
|
this.enabledCategories = getEnabledCategories();
|
|
50
40
|
}
|
|
51
41
|
|
|
52
|
-
export(
|
|
53
|
-
spans: ReadableSpan[],
|
|
54
|
-
resultCallback: (result: ExportResult) => void,
|
|
55
|
-
): void {
|
|
42
|
+
export(spans: ReadableSpan[], resultCallback: (result: ExportResult) => void): void {
|
|
56
43
|
const filtered = spans.filter((span) => {
|
|
57
44
|
const category = span.attributes["otel.category"] as string | undefined;
|
|
58
45
|
// Spans without a category are always exported
|
|
@@ -15,15 +15,15 @@
|
|
|
15
15
|
* OTEL_ENABLED_CATEGORIES — comma-separated categories to export (default: all)
|
|
16
16
|
*/
|
|
17
17
|
|
|
18
|
-
import { NodeSDK } from "@opentelemetry/sdk-node";
|
|
19
18
|
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
|
20
19
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
20
|
+
import { resourceFromAttributes } from "@opentelemetry/resources";
|
|
21
|
+
import { NodeSDK } from "@opentelemetry/sdk-node";
|
|
21
22
|
import {
|
|
22
|
-
ConsoleSpanExporter,
|
|
23
23
|
BatchSpanProcessor,
|
|
24
|
+
ConsoleSpanExporter,
|
|
24
25
|
SimpleSpanProcessor,
|
|
25
26
|
} from "@opentelemetry/sdk-trace-base";
|
|
26
|
-
import { resourceFromAttributes } from "@opentelemetry/resources";
|
|
27
27
|
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
|
28
28
|
import { FilteringExporter } from "./filtering-exporter";
|
|
29
29
|
|
|
@@ -41,8 +41,7 @@ function createSpanProcessor() {
|
|
|
41
41
|
|
|
42
42
|
const sdk = new NodeSDK({
|
|
43
43
|
resource: resourceFromAttributes({
|
|
44
|
-
[ATTR_SERVICE_NAME]:
|
|
45
|
-
process.env.OTEL_SERVICE_NAME ?? "unknown-service",
|
|
44
|
+
[ATTR_SERVICE_NAME]: process.env.OTEL_SERVICE_NAME ?? "unknown-service",
|
|
46
45
|
}),
|
|
47
46
|
spanProcessors: [createSpanProcessor()],
|
|
48
47
|
instrumentations: [
|
|
@@ -18,30 +18,34 @@
|
|
|
18
18
|
* pnpm add @opentelemetry/exporter-trace-otlp-http @opentelemetry/resources @opentelemetry/semantic-conventions
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
|
-
import { WebTracerProvider } from "@opentelemetry/sdk-trace-web";
|
|
22
|
-
import { BatchSpanProcessor, SimpleSpanProcessor, ConsoleSpanExporter } from "@opentelemetry/sdk-trace-base";
|
|
23
21
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
24
|
-
import {
|
|
25
|
-
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
|
26
|
-
import { FetchInstrumentation } from "@opentelemetry/instrumentation-fetch";
|
|
22
|
+
import { registerInstrumentations } from "@opentelemetry/instrumentation";
|
|
27
23
|
import { DocumentLoadInstrumentation } from "@opentelemetry/instrumentation-document-load";
|
|
24
|
+
import { FetchInstrumentation } from "@opentelemetry/instrumentation-fetch";
|
|
28
25
|
import { UserInteractionInstrumentation } from "@opentelemetry/instrumentation-user-interaction";
|
|
29
|
-
import {
|
|
26
|
+
import { resourceFromAttributes } from "@opentelemetry/resources";
|
|
27
|
+
import {
|
|
28
|
+
BatchSpanProcessor,
|
|
29
|
+
ConsoleSpanExporter,
|
|
30
|
+
SimpleSpanProcessor,
|
|
31
|
+
} from "@opentelemetry/sdk-trace-base";
|
|
32
|
+
import { WebTracerProvider } from "@opentelemetry/sdk-trace-web";
|
|
33
|
+
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
|
30
34
|
|
|
31
35
|
const endpoint =
|
|
32
|
-
import.meta.env?.VITE_OTEL_EXPORTER_OTLP_ENDPOINT
|
|
33
|
-
|
|
34
|
-
|
|
36
|
+
import.meta.env?.VITE_OTEL_EXPORTER_OTLP_ENDPOINT ??
|
|
37
|
+
process.env.NEXT_PUBLIC_OTEL_EXPORTER_OTLP_ENDPOINT ??
|
|
38
|
+
"";
|
|
35
39
|
|
|
36
40
|
const headers =
|
|
37
|
-
import.meta.env?.VITE_OTEL_EXPORTER_OTLP_HEADERS
|
|
38
|
-
|
|
39
|
-
|
|
41
|
+
import.meta.env?.VITE_OTEL_EXPORTER_OTLP_HEADERS ??
|
|
42
|
+
process.env.NEXT_PUBLIC_OTEL_EXPORTER_OTLP_HEADERS ??
|
|
43
|
+
"";
|
|
40
44
|
|
|
41
45
|
const serviceName =
|
|
42
|
-
import.meta.env?.VITE_OTEL_SERVICE_NAME
|
|
43
|
-
|
|
44
|
-
|
|
46
|
+
import.meta.env?.VITE_OTEL_SERVICE_NAME ??
|
|
47
|
+
process.env.NEXT_PUBLIC_OTEL_SERVICE_NAME ??
|
|
48
|
+
"unknown-service";
|
|
45
49
|
|
|
46
50
|
// Parse headers string: "Key1=Value1,Key2=Value2" → { Key1: "Value1", Key2: "Value2" }
|
|
47
51
|
function parseHeaders(headerStr: string): Record<string, string> {
|
package/templates/logger.ts
CHANGED