@infinitedusky/indusk-mcp 1.12.0 → 1.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/cli.js CHANGED
@@ -259,6 +259,28 @@ eval_
259
259
  const { evalSummary } = await import("./commands/eval.js");
260
260
  await evalSummary(process.cwd(), opts);
261
261
  });
262
+ eval_
263
+ .command("findings")
264
+ .description("List unresolved eval findings")
265
+ .option("--all", "Show all findings including fixed/ignored")
266
+ .action(async (opts) => {
267
+ const { evalFindings } = await import("./commands/eval.js");
268
+ await evalFindings(process.cwd(), opts);
269
+ });
270
+ eval_
271
+ .command("fix <key>")
272
+ .description("Mark an eval finding as fixed")
273
+ .action(async (key) => {
274
+ const { evalMark } = await import("./commands/eval.js");
275
+ await evalMark(process.cwd(), key, "fixed");
276
+ });
277
+ eval_
278
+ .command("ignore <key>")
279
+ .description("Mark an eval finding as ignored")
280
+ .action(async (key) => {
281
+ const { evalMark } = await import("./commands/eval.js");
282
+ await evalMark(process.cwd(), key, "ignored");
283
+ });
262
284
  eval_
263
285
  .command("baseline")
264
286
  .description("Run baseline evaluation with vanilla agent")
@@ -9,6 +9,10 @@ export declare function evalSummary(projectRoot: string, opts: {
9
9
  since?: string;
10
10
  json?: boolean;
11
11
  }): Promise<void>;
12
+ export declare function evalFindings(projectRoot: string, opts: {
13
+ all?: boolean;
14
+ }): Promise<void>;
15
+ export declare function evalMark(projectRoot: string, key: string, state: "fixed" | "ignored"): Promise<void>;
12
16
  export declare function evalBaseline(projectRoot: string, opts: {
13
17
  task: string;
14
18
  keep?: boolean;
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import { existsSync } from "node:fs";
8
8
  import { join } from "node:path";
9
+ import { getAllFindings, getUnresolvedFindings, markFinding } from "../../lib/eval/findings.js";
9
10
  import { readAllEntries } from "../../lib/eval/log-reader.js";
10
11
  import { isScorecard } from "../../lib/eval/types.js";
11
12
  function getEvalLogPath(projectRoot) {
@@ -106,6 +107,30 @@ function computeSummary(scorecards) {
106
107
  trend,
107
108
  };
108
109
  }
110
+ export async function evalFindings(projectRoot, opts) {
111
+ const findings = opts.all ? getAllFindings(projectRoot) : getUnresolvedFindings(projectRoot);
112
+ if (findings.length === 0) {
113
+ console.info(opts.all ? "No eval findings." : "No unresolved findings.");
114
+ return;
115
+ }
116
+ console.info(`\n${opts.all ? "All" : "Unresolved"} eval findings (${findings.length}):\n`);
117
+ for (const f of findings) {
118
+ const icon = f.state === "fixed" ? "✓" : f.state === "ignored" ? "–" : "●";
119
+ console.info(` ${icon} [${f.severity}] ${f.questionId}: ${f.finding}`);
120
+ console.info(` key: ${f.key} change: ${f.changeId.slice(0, 8)} state: ${f.state}`);
121
+ }
122
+ console.info("");
123
+ }
124
+ export async function evalMark(projectRoot, key, state) {
125
+ const success = markFinding(projectRoot, key, state);
126
+ if (success) {
127
+ console.info(`Marked ${key} as ${state}`);
128
+ }
129
+ else {
130
+ console.error(`Finding not found: ${key}`);
131
+ process.exit(1);
132
+ }
133
+ }
109
134
  function computePassRates(cards) {
110
135
  const counts = {};
111
136
  for (const card of cards) {
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Tracks eval finding resolution state.
3
+ *
4
+ * Findings persist as "unresolved" until explicitly fixed or ignored.
5
+ * The eval hook surfaces unresolved findings on every jj describe.
6
+ */
7
+ import type { EvalScorecard } from "./types.js";
8
+ export type FindingState = "unresolved" | "fixed" | "ignored";
9
+ export interface FindingEntry {
10
+ state: FindingState;
11
+ questionId: string;
12
+ severity: string;
13
+ finding: string;
14
+ changeId: string;
15
+ }
16
+ export declare function getUnresolvedFindings(projectRoot: string): Array<{
17
+ key: string;
18
+ } & FindingEntry>;
19
+ export declare function getAllFindings(projectRoot: string): Array<{
20
+ key: string;
21
+ } & FindingEntry>;
22
+ export declare function markFinding(projectRoot: string, key: string, state: FindingState): boolean;
23
+ export declare function ingestScorecard(projectRoot: string, scorecard: EvalScorecard): number;
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Tracks eval finding resolution state.
3
+ *
4
+ * Findings persist as "unresolved" until explicitly fixed or ignored.
5
+ * The eval hook surfaces unresolved findings on every jj describe.
6
+ */
7
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
8
+ import { dirname, join } from "node:path";
9
+ function getFindingsPath(projectRoot) {
10
+ return join(projectRoot, ".indusk", "eval", "findings.json");
11
+ }
12
+ function readFindings(projectRoot) {
13
+ const path = getFindingsPath(projectRoot);
14
+ if (!existsSync(path))
15
+ return {};
16
+ try {
17
+ return JSON.parse(readFileSync(path, "utf8"));
18
+ }
19
+ catch {
20
+ return {};
21
+ }
22
+ }
23
+ function writeFindings(projectRoot, findings) {
24
+ const path = getFindingsPath(projectRoot);
25
+ mkdirSync(dirname(path), { recursive: true });
26
+ writeFileSync(path, `${JSON.stringify(findings, null, 2)}\n`);
27
+ }
28
+ export function getUnresolvedFindings(projectRoot) {
29
+ const findings = readFindings(projectRoot);
30
+ return Object.entries(findings)
31
+ .filter(([, entry]) => entry.state === "unresolved")
32
+ .map(([key, entry]) => ({ key, ...entry }));
33
+ }
34
+ export function getAllFindings(projectRoot) {
35
+ const findings = readFindings(projectRoot);
36
+ return Object.entries(findings).map(([key, entry]) => ({ key, ...entry }));
37
+ }
38
+ export function markFinding(projectRoot, key, state) {
39
+ const findings = readFindings(projectRoot);
40
+ if (!findings[key])
41
+ return false;
42
+ findings[key].state = state;
43
+ writeFindings(projectRoot, findings);
44
+ return true;
45
+ }
46
+ export function ingestScorecard(projectRoot, scorecard) {
47
+ const findings = readFindings(projectRoot);
48
+ let added = 0;
49
+ for (const q of scorecard.questions) {
50
+ if (q.answer === "yes")
51
+ continue; // no finding for passing questions
52
+ const key = `${scorecard.changeId}:${q.id}`;
53
+ if (!findings[key]) {
54
+ findings[key] = {
55
+ state: "unresolved",
56
+ questionId: q.id,
57
+ severity: q.severity,
58
+ finding: q.finding,
59
+ changeId: scorecard.changeId,
60
+ };
61
+ added++;
62
+ }
63
+ }
64
+ if (added > 0) {
65
+ writeFindings(projectRoot, findings);
66
+ }
67
+ return added;
68
+ }
@@ -8,6 +8,7 @@
8
8
  import { spawn } from "node:child_process";
9
9
  import { join } from "node:path";
10
10
  import { getProjectGroupId } from "../config.js";
11
+ import { ingestScorecard } from "./findings.js";
11
12
  import { EvalLogWriter } from "./log-writer.js";
12
13
  import { buildJudgePrompt } from "./prompt-builder.js";
13
14
  import { V1_RUBRIC } from "./rubric.js";
@@ -127,6 +128,7 @@ export function runJudgeBackground(opts) {
127
128
  scorecard.telemetryPosted = true;
128
129
  }
129
130
  await logWriter.append(scorecard);
131
+ ingestScorecard(opts.projectRoot, scorecard);
130
132
  }
131
133
  catch (err) {
132
134
  const errorEntry = {
@@ -230,6 +232,7 @@ export async function runJudgeSync(opts) {
230
232
  scorecard.telemetryPosted = true;
231
233
  }
232
234
  await logWriter.append(scorecard);
235
+ ingestScorecard(opts.projectRoot, scorecard);
233
236
  resolve(scorecard);
234
237
  }
235
238
  catch (err) {
@@ -5,7 +5,7 @@
5
5
  * rubric, defined in rubric.ts and answered by the judge agent.
6
6
  */
7
7
  export function isScorecard(entry) {
8
- return !("error" in entry);
8
+ return (!("error" in entry) && "questions" in entry && Array.isArray(entry.questions));
9
9
  }
10
10
  export function isErrorEntry(entry) {
11
11
  return "error" in entry && entry.error === true;
@@ -141,6 +141,25 @@ if (!judgeRunnerPath) {
141
141
  process.exit(0);
142
142
  }
143
143
 
144
+ // Surface unresolved findings from previous evals
145
+ const findingsPath = judgeRunnerPath.replace("judge-runner.js", "findings.js");
146
+ if (existsSync(findingsPath)) {
147
+ try {
148
+ const { getUnresolvedFindings } = await import(findingsPath);
149
+ const unresolved = getUnresolvedFindings(projectRoot);
150
+ if (unresolved.length > 0) {
151
+ const lines = unresolved.map(
152
+ (f) => ` [${f.severity}] ${f.questionId}: ${f.finding} (change ${f.changeId.slice(0, 8)})`,
153
+ );
154
+ process.stderr.write(
155
+ `\n📊 Unresolved eval findings (${unresolved.length}):\n${lines.join("\n")}\nUse \`indusk eval fix <key>\` or \`indusk eval ignore <key>\` to resolve.\n\n`,
156
+ );
157
+ }
158
+ } catch {
159
+ // findings module not available — skip silently
160
+ }
161
+ }
162
+
144
163
  // Spawn a detached node process that calls runJudgeSync (which awaits completion).
145
164
  const judgeScript = `
146
165
  import("${judgeRunnerPath}")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@infinitedusky/indusk-mcp",
3
- "version": "1.12.0",
3
+ "version": "1.12.3",
4
4
  "description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
5
5
  "type": "module",
6
6
  "files": [
package/skills/planner.md CHANGED
@@ -25,7 +25,7 @@ Each document builds on the ones before it. Not every plan needs all five — us
25
25
 
26
26
  The order is always preserved — never write an ADR before the brief, or an impl before the ADR (when both exist).
27
27
 
28
- General-purpose research (insights useful across plans) also lives in `research/` at the repo root.
28
+ General-purpose research (insights useful across plans) also lives in `.indusk/research/`.
29
29
 
30
30
  ## Workflow Types
31
31
 
@@ -62,12 +62,12 @@ Workflow templates are in `templates/workflows/` in the package. They describe w
62
62
  - **refactor**: start with brief (includes boundary map)
63
63
  - **spike**: start with research (and stop there)
64
64
 
65
- **Check for existing research first.** Before writing new research, scan `research/` at the repo root for relevant standalone research docs. If one exists (e.g., `research/auth-options.md`), ask the user: "I found existing research at `research/auth-options.md`. Want to use this as the starting point?" If yes:
65
+ **Check for existing research first.** Before writing new research, scan `.indusk/research/` for relevant standalone research docs. If one exists (e.g., `.indusk/research/auth-options.md`), ask the user: "I found existing research at `.indusk/research/auth-options.md`. Want to use this as the starting point?" If yes:
66
66
  - Copy it to `.indusk/planning/{plan-name}/research.md`
67
67
  - Set the frontmatter status to `complete`
68
68
  - Move straight to the brief
69
69
 
70
- The `research/` directory is for standalone exploration that isn't tied to a plan yet. When it becomes a plan, it moves into the planning folder. The original in `research/` can be deleted or kept as a reference — user's choice.
70
+ The `.indusk/research/` directory is for standalone exploration that isn't tied to a plan yet. When it becomes a plan, it moves into the planning folder. The original in `.indusk/research/` can be deleted or kept as a reference — user's choice.
71
71
 
72
72
  For feature/spike workflows that need new research: Explore the problem space — read code, search the web, check Context7 for library docs. **Query the code graph before scoping** (see toolbelt "Before Modifying Code") — include structural findings in research.md with concrete numbers.
73
73
  Document what you find. The research doc records findings and analysis, but saves the recommendation for the brief.
@@ -336,7 +336,7 @@ date: {YYYY-MM-DD}
336
336
  - {Hindsight — decisions that could have been better, steps to skip or add}
337
337
 
338
338
  ## Insights Worth Carrying Forward
339
- {Takeaways for future plans. Save to research/ if broadly useful.}
339
+ {Takeaways for future plans. Save to .indusk/research/ if broadly useful.}
340
340
 
341
341
  ## Quality Ratchet
342
342
  {Could any mistakes in this plan have been caught automatically by a Biome rule? If yes, add the rule to biome.json and document it in biome-rationale.md. The quality ratchet only gets tighter.}
@@ -361,7 +361,7 @@ date: {YYYY-MM-DD}
361
361
  └── archive/
362
362
  └── {completed-plan}/
363
363
 
364
- research/ # Standalone insights useful across plans
364
+ .indusk/research/ # Standalone insights useful across plans
365
365
  ```
366
366
 
367
367
  - Kebab-case folder names
@@ -374,6 +374,6 @@ research/ # Standalone insights useful across plans
374
374
  - **Use the code graph for scoping.** Before writing a brief or impl, query `analyze_code_relationships` to understand what depends on what. "How many files import X?" and "What calls this function?" prevent underscoping.
375
375
  - Keep Y-statements concise but complete. Every field filled in.
376
376
  - Impl checklists: granular enough to track, not so granular they're busywork.
377
- - When research produces broadly useful insights, also save to `research/` at repo root.
377
+ - When research produces broadly useful insights, also save to `.indusk/research/`.
378
378
  - Cross-reference related plans by path whenever work overlaps between plans.
379
379
  - The user's input is: $ARGUMENTS