@sean.holung/minicode 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +48 -43
  2. package/dist/scripts/run-benchmarks.js +147 -0
  3. package/dist/src/agent/config.js +149 -40
  4. package/dist/src/agent/editable-config.js +314 -0
  5. package/dist/src/analysis/structural-analysis.js +379 -0
  6. package/dist/src/benchmark/evaluator.js +79 -0
  7. package/dist/src/benchmark/index.js +4 -0
  8. package/dist/src/benchmark/reporter.js +177 -0
  9. package/dist/src/benchmark/runner.js +100 -0
  10. package/dist/src/benchmark/task-loader.js +78 -0
  11. package/dist/src/benchmark/types.js +5 -0
  12. package/dist/src/cli/args.js +10 -0
  13. package/dist/src/cli/config-slash-command.js +135 -0
  14. package/dist/src/cli/plugin-install.js +69 -0
  15. package/dist/src/index.js +76 -6
  16. package/dist/src/indexer/cache.js +6 -4
  17. package/dist/src/indexer/code-map.js +41 -13
  18. package/dist/src/indexer/plugins/typescript.js +70 -23
  19. package/dist/src/indexer/project-index.js +175 -36
  20. package/dist/src/indexer/symbol-names.js +92 -0
  21. package/dist/src/model-utils.js +18 -0
  22. package/dist/src/serve/agent-bridge.js +203 -24
  23. package/dist/src/serve/mcp-server.js +405 -0
  24. package/dist/src/serve/server.js +165 -10
  25. package/dist/src/serve/websocket.js +8 -0
  26. package/dist/src/shared/graph-styles.js +119 -0
  27. package/dist/src/tools/find-path.js +75 -0
  28. package/dist/src/tools/find-references.js +7 -2
  29. package/dist/src/tools/get-dependencies.js +3 -2
  30. package/dist/src/tools/read-symbol.js +12 -5
  31. package/dist/src/tools/registry.js +3 -1
  32. package/dist/src/tools/search-code-map.js +4 -2
  33. package/dist/src/ui/app.js +1 -1
  34. package/dist/src/ui/cli-ink.js +79 -4
  35. package/dist/src/ui/components/header-bar.js +6 -2
  36. package/dist/src/ui/state/ui-store.js +5 -0
  37. package/dist/src/web/app.js +1124 -176
  38. package/dist/src/web/index.html +113 -3
  39. package/dist/src/web/style.css +973 -55
  40. package/dist/tests/agent.test.js +31 -0
  41. package/dist/tests/analysis-helpers.test.js +89 -0
  42. package/dist/tests/analysis-ui.test.js +29 -0
  43. package/dist/tests/benchmark-harness.test.js +527 -0
  44. package/dist/tests/config-api.test.js +143 -0
  45. package/dist/tests/config-integration.test.js +751 -0
  46. package/dist/tests/config-slash-command.test.js +106 -0
  47. package/dist/tests/config.test.js +42 -1
  48. package/dist/tests/context-indicator.test.js +220 -0
  49. package/dist/tests/editable-config.test.js +109 -0
  50. package/dist/tests/find-path.test.js +183 -0
  51. package/dist/tests/focus-tracker.test.js +62 -0
  52. package/dist/tests/graph-onboarding.test.js +55 -0
  53. package/dist/tests/graph-styles.test.js +65 -0
  54. package/dist/tests/indexer.test.js +137 -0
  55. package/dist/tests/mcp-and-plugin.test.js +186 -0
  56. package/dist/tests/model-client-openai.test.js +29 -0
  57. package/dist/tests/model-selection.test.js +136 -0
  58. package/dist/tests/model-utils.test.js +22 -0
  59. package/dist/tests/reasoning-effort.test.js +264 -0
  60. package/dist/tests/run-benchmarks.test.js +161 -0
  61. package/dist/tests/search-code-map.test.js +18 -0
  62. package/dist/tests/serve.integration.test.js +218 -2
  63. package/dist/tests/session-ui.test.js +21 -0
  64. package/dist/tests/session.test.js +50 -0
  65. package/dist/tests/settings-ui.test.js +30 -0
  66. package/dist/tests/structural-analysis.test.js +218 -0
  67. package/node_modules/@minicode/agent-sdk/README.md +80 -51
  68. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts +16 -5
  69. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts.map +1 -1
  70. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js +51 -33
  71. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js.map +1 -1
  72. package/node_modules/@minicode/agent-sdk/dist/src/agent/types.d.ts +14 -0
  73. package/node_modules/@minicode/agent-sdk/dist/src/agent/types.d.ts.map +1 -1
  74. package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts +3 -2
  75. package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts.map +1 -1
  76. package/node_modules/@minicode/agent-sdk/dist/src/index.js +2 -0
  77. package/node_modules/@minicode/agent-sdk/dist/src/index.js.map +1 -1
  78. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.d.ts +35 -0
  79. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.d.ts.map +1 -0
  80. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.js +64 -0
  81. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.js.map +1 -0
  82. package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts +7 -0
  83. package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts.map +1 -1
  84. package/node_modules/@minicode/agent-sdk/dist/src/model/client.d.ts +5 -1
  85. package/node_modules/@minicode/agent-sdk/dist/src/model/client.d.ts.map +1 -1
  86. package/node_modules/@minicode/agent-sdk/dist/src/model/client.js +83 -11
  87. package/node_modules/@minicode/agent-sdk/dist/src/model/client.js.map +1 -1
  88. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.d.ts +1 -0
  89. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.d.ts.map +1 -1
  90. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.js +8 -1
  91. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.js.map +1 -1
  92. package/node_modules/@minicode/agent-sdk/dist/src/session/session.d.ts.map +1 -1
  93. package/node_modules/@minicode/agent-sdk/dist/src/session/session.js +4 -1
  94. package/node_modules/@minicode/agent-sdk/dist/src/session/session.js.map +1 -1
  95. package/node_modules/@minicode/agent-sdk/dist/tests/agent.test.js +3 -1
  96. package/node_modules/@minicode/agent-sdk/dist/tests/agent.test.js.map +1 -1
  97. package/node_modules/@minicode/agent-sdk/dist/tests/guardrails.test.js +8 -2
  98. package/node_modules/@minicode/agent-sdk/dist/tests/guardrails.test.js.map +1 -1
  99. package/node_modules/@minicode/agent-sdk/dist/tsconfig.tsbuildinfo +1 -1
  100. package/package.json +9 -5
  101. package/plugin/.claude-plugin/plugin.json +12 -0
  102. package/plugin/.mcp.json +8 -0
  103. package/plugin/CLAUDE.md +26 -0
  104. package/plugin/skills/analyze/SKILL.md +12 -0
  105. package/plugin/skills/focus/SKILL.md +20 -0
  106. package/plugin/skills/graph/SKILL.md +13 -0
  107. package/plugin/skills/symbols/SKILL.md +13 -0
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Evaluates a benchmark trace against a task's rubric.
3
+ */
4
+ /**
5
+ * Evaluate a benchmark trace against the task rubric.
6
+ */
7
+ export function evaluate(taskId, rubric, trace) {
8
+ const checks = [];
9
+ // 1. Check expected output patterns
10
+ if (rubric.expectedOutputPatterns) {
11
+ for (const pattern of rubric.expectedOutputPatterns) {
12
+ const regex = new RegExp(pattern, "i");
13
+ checks.push({
14
+ name: `output matches /${pattern}/i`,
15
+ passed: regex.test(trace.response),
16
+ detail: regex.test(trace.response)
17
+ ? undefined
18
+ : `Pattern not found in response`,
19
+ });
20
+ }
21
+ }
22
+ // 2. Check expected files read
23
+ if (rubric.expectedFilesRead) {
24
+ for (const expectedFile of rubric.expectedFilesRead) {
25
+ const found = trace.filesRead.some((f) => f === expectedFile || f.endsWith(expectedFile));
26
+ checks.push({
27
+ name: `read file ${expectedFile}`,
28
+ passed: found,
29
+ detail: found ? undefined : `File was not read during the run`,
30
+ });
31
+ }
32
+ }
33
+ // 3. Check expected symbols queried
34
+ if (rubric.expectedSymbols) {
35
+ for (const sym of rubric.expectedSymbols) {
36
+ const found = trace.symbolsQueried.some((s) => s === sym || s.includes(sym));
37
+ checks.push({
38
+ name: `queried symbol ${sym}`,
39
+ passed: found,
40
+ detail: found ? undefined : `Symbol was not queried`,
41
+ });
42
+ }
43
+ }
44
+ // 4. Check forbidden patterns
45
+ if (rubric.forbiddenPatterns) {
46
+ for (const pattern of rubric.forbiddenPatterns) {
47
+ const regex = new RegExp(pattern, "i");
48
+ const absent = !regex.test(trace.response);
49
+ checks.push({
50
+ name: `output does NOT match /${pattern}/i`,
51
+ passed: absent,
52
+ detail: absent ? undefined : `Forbidden pattern found in response`,
53
+ });
54
+ }
55
+ }
56
+ // 5. Efficiency metrics
57
+ const efficiency = computeEfficiency(rubric, trace);
58
+ const allChecksPassed = checks.every((c) => c.passed);
59
+ return {
60
+ taskId,
61
+ passed: allChecksPassed,
62
+ checks,
63
+ efficiency,
64
+ };
65
+ }
66
+ function computeEfficiency(rubric, trace) {
67
+ const toolCallCount = trace.toolCalls.length;
68
+ const totalTokens = trace.usage.totalTokens;
69
+ const withinToolBudget = rubric.maxToolCalls == null || toolCallCount <= rubric.maxToolCalls;
70
+ const withinTokenBudget = rubric.maxTotalTokens == null || totalTokens <= rubric.maxTotalTokens;
71
+ return {
72
+ toolCallCount,
73
+ totalTokens,
74
+ durationMs: trace.durationMs,
75
+ filesReadCount: trace.filesRead.length,
76
+ symbolsQueriedCount: trace.symbolsQueried.length,
77
+ withinBudget: withinToolBudget && withinTokenBudget,
78
+ };
79
+ }
@@ -0,0 +1,4 @@
1
+ export { loadBenchmarkTask, loadBenchmarkTasks } from "./task-loader.js";
2
+ export { evaluate } from "./evaluator.js";
3
+ export { runBenchmarkTask, runBenchmarkSuite, } from "./runner.js";
4
+ export { buildReport, buildReportFromEvaluations, formatReport, compareReports, } from "./reporter.js";
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Generates benchmark reports from evaluation results and traces.
3
+ */
4
+ import { evaluate } from "./evaluator.js";
5
+ /**
6
+ * Build a full benchmark report from tasks and their traces.
7
+ */
8
+ export function buildReport(tasks, traces, variant, model) {
9
+ const taskMap = new Map(tasks.map((t) => [t.id, t]));
10
+ const results = traces.map((trace) => {
11
+ const task = taskMap.get(trace.taskId);
12
+ if (!task) {
13
+ throw new Error(`No task definition found for trace: ${trace.taskId}`);
14
+ }
15
+ const evaluation = evaluate(task.id, task.rubric, trace);
16
+ return {
17
+ taskId: task.id,
18
+ category: task.category,
19
+ evaluation,
20
+ trace,
21
+ };
22
+ });
23
+ return {
24
+ variant,
25
+ model,
26
+ generatedAt: new Date().toISOString(),
27
+ results,
28
+ summary: computeSummary(results),
29
+ };
30
+ }
31
+ /**
32
+ * Build a report from pre-computed evaluations and traces.
33
+ */
34
+ export function buildReportFromEvaluations(evaluations, traces, tasks, variant, model) {
35
+ const taskMap = new Map(tasks.map((t) => [t.id, t]));
36
+ const evalMap = new Map(evaluations.map((e) => [e.taskId, e]));
37
+ const results = traces.map((trace) => {
38
+ const task = taskMap.get(trace.taskId);
39
+ const evaluation = evalMap.get(trace.taskId);
40
+ if (!task || !evaluation) {
41
+ throw new Error(`Missing task or evaluation for: ${trace.taskId}`);
42
+ }
43
+ return {
44
+ taskId: task.id,
45
+ category: task.category,
46
+ evaluation,
47
+ trace,
48
+ };
49
+ });
50
+ return {
51
+ variant,
52
+ model,
53
+ generatedAt: new Date().toISOString(),
54
+ results,
55
+ summary: computeSummary(results),
56
+ };
57
+ }
58
+ function computeSummary(results) {
59
+ const total = results.length;
60
+ const passed = results.filter((r) => r.evaluation.passed).length;
61
+ // By category
62
+ const byCategory = {};
63
+ for (const r of results) {
64
+ const cat = r.category;
65
+ if (!byCategory[cat]) {
66
+ byCategory[cat] = { total: 0, passed: 0, passRate: 0 };
67
+ }
68
+ byCategory[cat].total += 1;
69
+ if (r.evaluation.passed)
70
+ byCategory[cat].passed += 1;
71
+ }
72
+ for (const entry of Object.values(byCategory)) {
73
+ entry.passRate = entry.total > 0 ? entry.passed / entry.total : 0;
74
+ }
75
+ // Averages
76
+ const avgToolCalls = total > 0
77
+ ? results.reduce((sum, r) => sum + r.evaluation.efficiency.toolCallCount, 0) / total
78
+ : 0;
79
+ const avgTotalTokens = total > 0
80
+ ? results.reduce((sum, r) => sum + r.evaluation.efficiency.totalTokens, 0) / total
81
+ : 0;
82
+ const avgDurationMs = total > 0
83
+ ? results.reduce((sum, r) => sum + r.evaluation.efficiency.durationMs, 0) / total
84
+ : 0;
85
+ return {
86
+ totalTasks: total,
87
+ passed,
88
+ failed: total - passed,
89
+ passRate: total > 0 ? passed / total : 0,
90
+ byCategory,
91
+ avgToolCalls,
92
+ avgTotalTokens,
93
+ avgDurationMs,
94
+ };
95
+ }
96
+ /**
97
+ * Format a report as a human-readable string for terminal output.
98
+ */
99
+ export function formatReport(report) {
100
+ const lines = [];
101
+ lines.push(`Benchmark Report: ${report.variant}`);
102
+ lines.push(`Model: ${report.model}`);
103
+ lines.push(`Generated: ${report.generatedAt}`);
104
+ lines.push("");
105
+ // Summary
106
+ const s = report.summary;
107
+ lines.push(`Results: ${s.passed}/${s.totalTasks} passed (${(s.passRate * 100).toFixed(1)}%)`);
108
+ lines.push("");
109
+ // By category
110
+ lines.push("By category:");
111
+ for (const [cat, stats] of Object.entries(s.byCategory)) {
112
+ lines.push(` ${cat}: ${stats.passed}/${stats.total} (${(stats.passRate * 100).toFixed(1)}%)`);
113
+ }
114
+ lines.push("");
115
+ // Efficiency
116
+ lines.push("Efficiency (averages):");
117
+ lines.push(` Tool calls: ${s.avgToolCalls.toFixed(1)}`);
118
+ lines.push(` Total tokens: ${s.avgTotalTokens.toFixed(0)}`);
119
+ lines.push(` Duration: ${s.avgDurationMs.toFixed(0)}ms`);
120
+ lines.push("");
121
+ // Per-task details
122
+ lines.push("Task details:");
123
+ for (const r of report.results) {
124
+ const status = r.evaluation.passed ? "PASS" : "FAIL";
125
+ lines.push(` [${status}] ${r.taskId}`);
126
+ for (const check of r.evaluation.checks) {
127
+ const checkMark = check.passed ? "+" : "-";
128
+ const detail = check.detail ? ` (${check.detail})` : "";
129
+ lines.push(` [${checkMark}] ${check.name}${detail}`);
130
+ }
131
+ const eff = r.evaluation.efficiency;
132
+ lines.push(` tools: ${eff.toolCallCount}, tokens: ${eff.totalTokens}, files: ${eff.filesReadCount}, symbols: ${eff.symbolsQueriedCount}`);
133
+ }
134
+ return lines.join("\n");
135
+ }
136
+ /**
137
+ * Compare two reports side by side.
138
+ */
139
+ export function compareReports(baseline, candidate) {
140
+ const lines = [];
141
+ lines.push(`Comparison: "${baseline.variant}" vs "${candidate.variant}"`);
142
+ lines.push(`Models: ${baseline.model} vs ${candidate.model}`);
143
+ lines.push("");
144
+ const bs = baseline.summary;
145
+ const cs = candidate.summary;
146
+ lines.push("Overall:");
147
+ lines.push(` Pass rate: ${(bs.passRate * 100).toFixed(1)}% -> ${(cs.passRate * 100).toFixed(1)}% (${formatDelta(cs.passRate - bs.passRate, true)})`);
148
+ lines.push(` Avg tool calls: ${bs.avgToolCalls.toFixed(1)} -> ${cs.avgToolCalls.toFixed(1)} (${formatDelta(cs.avgToolCalls - bs.avgToolCalls, false)})`);
149
+ lines.push(` Avg tokens: ${bs.avgTotalTokens.toFixed(0)} -> ${cs.avgTotalTokens.toFixed(0)} (${formatDelta(cs.avgTotalTokens - bs.avgTotalTokens, false)})`);
150
+ lines.push(` Avg duration: ${bs.avgDurationMs.toFixed(0)}ms -> ${cs.avgDurationMs.toFixed(0)}ms (${formatDelta(cs.avgDurationMs - bs.avgDurationMs, false)})`);
151
+ lines.push("");
152
+ // Per-task comparison
153
+ const baseResults = new Map(baseline.results.map((r) => [r.taskId, r]));
154
+ lines.push("Per-task changes:");
155
+ for (const cr of candidate.results) {
156
+ const br = baseResults.get(cr.taskId);
157
+ if (!br) {
158
+ lines.push(` [NEW] ${cr.taskId}: ${cr.evaluation.passed ? "PASS" : "FAIL"}`);
159
+ continue;
160
+ }
161
+ if (br.evaluation.passed !== cr.evaluation.passed) {
162
+ const change = cr.evaluation.passed ? "FIXED" : "REGRESSED";
163
+ lines.push(` [${change}] ${cr.taskId}`);
164
+ }
165
+ }
166
+ return lines.join("\n");
167
+ }
168
+ function formatDelta(delta, higherIsBetter) {
169
+ const sign = delta >= 0 ? "+" : "";
170
+ const formatted = `${sign}${delta.toFixed(1)}`;
171
+ if (Math.abs(delta) < 0.01)
172
+ return "no change";
173
+ if (higherIsBetter) {
174
+ return delta > 0 ? `${formatted} better` : `${formatted} worse`;
175
+ }
176
+ return delta < 0 ? `${formatted} better` : `${formatted} worse`;
177
+ }
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Benchmark runner — executes tasks via the agent and captures traces.
3
+ *
4
+ * Uses CodingAgent directly (not the CLI subprocess) for full control
5
+ * over tool-call instrumentation and trace capture.
6
+ */
7
+ import { execSync } from "node:child_process";
8
+ import { CodingAgent, Session, ToolRegistry, } from "@minicode/agent-sdk";
9
+ function getGitCommitSha() {
10
+ try {
11
+ return execSync("git rev-parse HEAD", { encoding: "utf8" }).trim();
12
+ }
13
+ catch {
14
+ return "unknown";
15
+ }
16
+ }
17
+ const STRUCTURAL_TOOLS = new Set([
18
+ "read_symbol",
19
+ "find_references",
20
+ "get_dependencies",
21
+ "search_code_map",
22
+ ]);
23
+ /**
24
+ * Run a single benchmark task and return the captured trace.
25
+ */
26
+ export async function runBenchmarkTask(task, options) {
27
+ const captured = [];
28
+ const filesRead = new Set();
29
+ const symbolsQueried = new Set();
30
+ // Wrap each tool to capture calls
31
+ const instrumentedTools = options.tools.map((tool) => ({
32
+ ...tool,
33
+ execute: async (input) => {
34
+ const start = performance.now();
35
+ const output = await tool.execute(input);
36
+ const durationMs = performance.now() - start;
37
+ captured.push({
38
+ name: tool.name,
39
+ input,
40
+ output: output.length > 2000 ? output.slice(0, 2000) + "…[truncated]" : output,
41
+ durationMs,
42
+ });
43
+ // Track files read
44
+ if (tool.name === "read_file" || tool.name === "read_symbol") {
45
+ const filePath = input.path ?? input.file_path ?? input.filePath;
46
+ if (typeof filePath === "string")
47
+ filesRead.add(filePath);
48
+ }
49
+ // Track symbol queries
50
+ if (STRUCTURAL_TOOLS.has(tool.name)) {
51
+ const sym = input.symbol ?? input.symbolName ?? input.name ?? input.query;
52
+ if (typeof sym === "string")
53
+ symbolsQueried.add(sym);
54
+ }
55
+ return output;
56
+ },
57
+ }));
58
+ const registry = new ToolRegistry(instrumentedTools);
59
+ const session = new Session();
60
+ const agent = new CodingAgent({
61
+ config: options.config,
62
+ modelClient: options.modelClient,
63
+ toolRegistry: registry,
64
+ session,
65
+ });
66
+ const startedAt = new Date().toISOString();
67
+ const start = performance.now();
68
+ const { text, usage } = await agent.runTurn(task.prompt);
69
+ const durationMs = performance.now() - start;
70
+ const trace = {
71
+ taskId: task.id,
72
+ model: options.config.model,
73
+ variant: options.variant,
74
+ commitSha: getGitCommitSha(),
75
+ response: text,
76
+ toolCalls: captured,
77
+ filesRead: [...filesRead],
78
+ symbolsQueried: [...symbolsQueried],
79
+ usage: {
80
+ inputTokens: usage?.inputTokens ?? 0,
81
+ outputTokens: usage?.outputTokens ?? 0,
82
+ totalTokens: (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0),
83
+ },
84
+ durationMs,
85
+ startedAt,
86
+ };
87
+ options.onTaskComplete?.(task.id, trace);
88
+ return trace;
89
+ }
90
+ /**
91
+ * Run all provided benchmark tasks sequentially.
92
+ */
93
+ export async function runBenchmarkSuite(tasks, options) {
94
+ const traces = [];
95
+ for (const task of tasks) {
96
+ const trace = await runBenchmarkTask(task, options);
97
+ traces.push(trace);
98
+ }
99
+ return traces;
100
+ }
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Loads benchmark tasks from the benchmarks/tasks/ directory.
3
+ *
4
+ * Each task lives in a category subdirectory and contains:
5
+ * - task.json — task metadata, prompt, and rubric
6
+ */
7
+ import { readFile, readdir, stat } from "node:fs/promises";
8
+ import path from "node:path";
9
+ const VALID_CATEGORIES = new Set([
10
+ "navigation",
11
+ "editing",
12
+ "refactors",
13
+ "debugging",
14
+ "planning",
15
+ ]);
16
+ function isValidCategory(name) {
17
+ return VALID_CATEGORIES.has(name);
18
+ }
19
+ /**
20
+ * Load all benchmark tasks from the given base directory.
21
+ * Expects: `<baseDir>/<category>/<task-name>/task.json`
22
+ */
23
+ export async function loadBenchmarkTasks(baseDir) {
24
+ const tasks = [];
25
+ const entries = await readdir(baseDir, { withFileTypes: true });
26
+ for (const entry of entries) {
27
+ if (!entry.isDirectory() || !isValidCategory(entry.name))
28
+ continue;
29
+ const category = entry.name;
30
+ const categoryDir = path.join(baseDir, category);
31
+ const taskDirs = await readdir(categoryDir, { withFileTypes: true });
32
+ for (const taskDir of taskDirs) {
33
+ if (!taskDir.isDirectory())
34
+ continue;
35
+ const taskJsonPath = path.join(categoryDir, taskDir.name, "task.json");
36
+ const exists = await stat(taskJsonPath)
37
+ .then(() => true)
38
+ .catch(() => false);
39
+ if (!exists)
40
+ continue;
41
+ const raw = await readFile(taskJsonPath, "utf8");
42
+ const parsed = JSON.parse(raw);
43
+ tasks.push({
44
+ id: `${category}/${taskDir.name}`,
45
+ title: parsed.title,
46
+ category,
47
+ prompt: parsed.prompt,
48
+ workspaceRoot: parsed.workspaceRoot,
49
+ rubric: parsed.rubric,
50
+ });
51
+ }
52
+ }
53
+ return tasks.sort((a, b) => a.id.localeCompare(b.id));
54
+ }
55
+ /**
56
+ * Load a single benchmark task by its id (e.g. "navigation/find-symbol-definition").
57
+ */
58
+ export async function loadBenchmarkTask(baseDir, taskId) {
59
+ const taskJsonPath = path.join(baseDir, taskId, "task.json");
60
+ const exists = await stat(taskJsonPath)
61
+ .then(() => true)
62
+ .catch(() => false);
63
+ if (!exists)
64
+ return undefined;
65
+ const raw = await readFile(taskJsonPath, "utf8");
66
+ const parsed = JSON.parse(raw);
67
+ const [category] = taskId.split("/");
68
+ if (!category || !isValidCategory(category))
69
+ return undefined;
70
+ return {
71
+ id: taskId,
72
+ title: parsed.title,
73
+ category,
74
+ prompt: parsed.prompt,
75
+ workspaceRoot: parsed.workspaceRoot,
76
+ rubric: parsed.rubric,
77
+ };
78
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Benchmark harness types for measuring agent quality, efficiency,
3
+ * and structural-tool usage across repeatable tasks.
4
+ */
5
+ export {};
@@ -11,6 +11,7 @@ export function parseCliArgs(argv) {
11
11
  let json = false;
12
12
  let outFile;
13
13
  let serve = false;
14
+ let pluginInstall = false;
14
15
  let port = 4567;
15
16
  const taskParts = [];
16
17
  for (let i = 0; i < args.length; i += 1) {
@@ -22,6 +23,11 @@ export function parseCliArgs(argv) {
22
23
  serve = true;
23
24
  continue;
24
25
  }
26
+ if (arg === "plugin" && args[i + 1] === "install") {
27
+ pluginInstall = true;
28
+ i += 1;
29
+ continue;
30
+ }
25
31
  if (arg === "--port") {
26
32
  const value = args[i + 1];
27
33
  if (!value || value.startsWith("-")) {
@@ -81,6 +87,7 @@ export function parseCliArgs(argv) {
81
87
  serve,
82
88
  port,
83
89
  task: taskParts.join(" ").trim(),
90
+ pluginInstall,
84
91
  };
85
92
  }
86
93
  export function validateCliArgs(args) {
@@ -93,4 +100,7 @@ export function validateCliArgs(args) {
93
100
  if (args.serve && (args.oneshot || args.json || args.outFile)) {
94
101
  throw new CliUsageError("serve mode is mutually exclusive with --oneshot, --json, and --out.");
95
102
  }
103
+ if (args.pluginInstall && (args.serve || args.oneshot)) {
104
+ throw new CliUsageError("plugin install is mutually exclusive with serve and --oneshot.");
105
+ }
96
106
  }
@@ -0,0 +1,135 @@
1
+ import { formatConfigForDisplay, MINICODE_HOME, resolveConfigEnv } from "../agent/config.js";
2
+ import { formatPersistedConfigValue, getEditableConfigDefinition, getEffectiveEditableConfigValue, isEditableConfigKey, listEditableConfigDefinitions, loadPersistedConfig, setPersistedConfigValue, unsetPersistedConfigValue, } from "../agent/editable-config.js";
3
+ function renderUsage() {
4
+ return [
5
+ 'Usage:',
6
+ ' /config',
7
+ ' /config keys',
8
+ ' /config get <key>',
9
+ ' /config set <key> <value>',
10
+ ' /config unset <key>',
11
+ ].join("\n");
12
+ }
13
+ function renderEditableKeys() {
14
+ const lines = [
15
+ "Editable config keys (persisted in ~/.minicode/agent.config.json; environment variables take precedence):",
16
+ ];
17
+ for (const definition of listEditableConfigDefinitions()) {
18
+ const valueHint = definition.type === "enum"
19
+ ? `<${definition.values?.join("|")}>`
20
+ : `<${definition.type}>`;
21
+ lines.push(` ${definition.key} ${valueHint} — ${definition.description} (env: ${definition.envVar})`);
22
+ }
23
+ lines.push("");
24
+ lines.push('Use "/config set <key> <value>" to update your global config.');
25
+ lines.push("Secrets like API keys stay env-only for now.");
26
+ return lines.join("\n");
27
+ }
28
+ async function renderConfigValue(key, context) {
29
+ if (!isEditableConfigKey(key)) {
30
+ return `Unknown editable config key "${key}".\n\n${renderEditableKeys()}`;
31
+ }
32
+ const minicodeHome = context.minicodeHome ?? MINICODE_HOME;
33
+ const definition = getEditableConfigDefinition(key);
34
+ const persisted = await loadPersistedConfig(minicodeHome);
35
+ const env = await resolveConfigEnv({ minicodeHome });
36
+ const envValue = env.values[definition.envVar];
37
+ return [
38
+ `${definition.key}`,
39
+ ` effective: ${getEffectiveEditableConfigValue(context.config, key)}`,
40
+ ` config file: ${formatPersistedConfigValue(persisted[definition.fileKey])}`,
41
+ ` env override (${definition.envVar}): ${formatPersistedConfigValue(envValue)}`,
42
+ ].join("\n");
43
+ }
44
+ async function persistConfigValue(key, rawValue, context) {
45
+ if (!isEditableConfigKey(key)) {
46
+ return `Unknown editable config key "${key}".\n\n${renderEditableKeys()}`;
47
+ }
48
+ const minicodeHome = context.minicodeHome ?? MINICODE_HOME;
49
+ const definition = getEditableConfigDefinition(key);
50
+ const env = await resolveConfigEnv({ minicodeHome });
51
+ try {
52
+ const result = await setPersistedConfigValue({
53
+ key,
54
+ rawValue,
55
+ minicodeHome,
56
+ });
57
+ const lines = [
58
+ `Saved config: ${key} = ${formatPersistedConfigValue(result.storedValue)}`,
59
+ `File: ${result.path}`,
60
+ "Restart minicode to pick up persisted config changes in a new session.",
61
+ ];
62
+ if (env.values[definition.envVar] !== undefined) {
63
+ lines.push(`Note: ${definition.envVar} is currently set and will override this persisted value until it is unset.`);
64
+ }
65
+ return lines.join("\n");
66
+ }
67
+ catch (error) {
68
+ const message = error instanceof Error ? error.message : "Unknown error";
69
+ return `Failed to save config: ${message}`;
70
+ }
71
+ }
72
+ async function removeConfigValue(key, context) {
73
+ if (!isEditableConfigKey(key)) {
74
+ return `Unknown editable config key "${key}".\n\n${renderEditableKeys()}`;
75
+ }
76
+ const minicodeHome = context.minicodeHome ?? MINICODE_HOME;
77
+ const definition = getEditableConfigDefinition(key);
78
+ const env = await resolveConfigEnv({ minicodeHome });
79
+ await unsetPersistedConfigValue({
80
+ key,
81
+ minicodeHome,
82
+ });
83
+ const lines = [
84
+ `Removed persisted value for "${key}".`,
85
+ `File: ${minicodeHome}/agent.config.json`,
86
+ "Restart minicode to ensure the updated config is applied in a new session.",
87
+ ];
88
+ if (env.values[definition.envVar] !== undefined) {
89
+ lines.push(`Note: ${definition.envVar} is still set in the environment, so the effective value may remain unchanged.`);
90
+ }
91
+ return lines.join("\n");
92
+ }
93
+ export async function handleConfigSlashCommand(trimmed, context) {
94
+ if (!(trimmed === "/config" || trimmed.startsWith("/config "))) {
95
+ return { handled: false };
96
+ }
97
+ const rest = trimmed.slice("/config".length).trim();
98
+ if (rest.length === 0) {
99
+ return { handled: true, message: formatConfigForDisplay(context.config) };
100
+ }
101
+ const tokens = rest.split(/\s+/);
102
+ const [subcommand, ...subArgs] = tokens;
103
+ if (subcommand === "keys") {
104
+ return { handled: true, message: renderEditableKeys() };
105
+ }
106
+ if (subcommand === "get") {
107
+ if (subArgs.length !== 1) {
108
+ return { handled: true, message: renderUsage() };
109
+ }
110
+ return {
111
+ handled: true,
112
+ message: await renderConfigValue(subArgs[0], context),
113
+ };
114
+ }
115
+ if (subcommand === "set") {
116
+ if (subArgs.length < 2) {
117
+ return { handled: true, message: renderUsage() };
118
+ }
119
+ const [key, ...valueParts] = subArgs;
120
+ return {
121
+ handled: true,
122
+ message: await persistConfigValue(key, valueParts.join(" "), context),
123
+ };
124
+ }
125
+ if (subcommand === "unset") {
126
+ if (subArgs.length !== 1) {
127
+ return { handled: true, message: renderUsage() };
128
+ }
129
+ return {
130
+ handled: true,
131
+ message: await removeConfigValue(subArgs[0], context),
132
+ };
133
+ }
134
+ return { handled: true, message: renderUsage() };
135
+ }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Install the minicode Claude Code plugin globally.
3
+ *
4
+ * Creates a symlink from ~/.claude/plugins/minicode → the plugin directory
5
+ * shipped alongside the minicode package.
6
+ */
7
+ import { mkdir, symlink, readlink, unlink, stat } from "node:fs/promises";
8
+ import path from "node:path";
9
+ import os from "node:os";
10
+ import { fileURLToPath } from "node:url";
11
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
+ /**
13
+ * Resolve the plugin source directory.
14
+ * In dev (tsx): __dirname = src/cli → go up to project root, then plugin/
15
+ * In prod (dist): __dirname = dist/src/cli → go up to project root, then plugin/
16
+ */
17
+ function getPluginSourceDir() {
18
+ if (__dirname.includes(`${path.sep}dist${path.sep}`)) {
19
+ return path.resolve(__dirname, "../../../plugin");
20
+ }
21
+ return path.resolve(__dirname, "../../plugin");
22
+ }
23
+ export async function installPlugin() {
24
+ const pluginsDir = path.join(os.homedir(), ".claude", "plugins");
25
+ const targetDir = path.join(pluginsDir, "minicode");
26
+ const sourceDir = getPluginSourceDir();
27
+ // Verify the plugin source exists
28
+ try {
29
+ await stat(path.join(sourceDir, ".claude-plugin", "plugin.json"));
30
+ }
31
+ catch {
32
+ console.error(`Error: plugin source not found at ${sourceDir}`);
33
+ console.error("Make sure you are running from a minicode installation.");
34
+ process.exit(1);
35
+ }
36
+ // Create ~/.claude/plugins/ if it doesn't exist
37
+ await mkdir(pluginsDir, { recursive: true });
38
+ // Check if target already exists
39
+ try {
40
+ const existing = await readlink(targetDir);
41
+ if (existing === sourceDir) {
42
+ console.log(`Plugin already installed at ${targetDir}`);
43
+ console.log(` → ${sourceDir}`);
44
+ return;
45
+ }
46
+ // Different target — remove and re-link
47
+ await unlink(targetDir);
48
+ }
49
+ catch {
50
+ // Check if it's a directory (not a symlink) that exists
51
+ try {
52
+ const stats = await stat(targetDir);
53
+ if (stats.isDirectory()) {
54
+ console.error(`Error: ${targetDir} exists and is not a symlink.`);
55
+ console.error("Remove it manually if you want to reinstall.");
56
+ process.exit(1);
57
+ }
58
+ }
59
+ catch {
60
+ // Doesn't exist — good
61
+ }
62
+ }
63
+ await symlink(sourceDir, targetDir, "dir");
64
+ console.log("minicode plugin installed for Claude Code");
65
+ console.log(` ${targetDir} → ${sourceDir}`);
66
+ console.log("\nThe plugin will load automatically in Claude Code sessions.");
67
+ console.log("Make sure minicode serve is running for the MCP tools to work:");
68
+ console.log(" minicode serve");
69
+ }