@sean.holung/minicode 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +52 -42
  2. package/dist/scripts/run-benchmarks.js +147 -0
  3. package/dist/src/agent/config.js +149 -40
  4. package/dist/src/agent/editable-config.js +314 -0
  5. package/dist/src/analysis/structural-analysis.js +379 -0
  6. package/dist/src/benchmark/evaluator.js +79 -0
  7. package/dist/src/benchmark/index.js +4 -0
  8. package/dist/src/benchmark/reporter.js +177 -0
  9. package/dist/src/benchmark/runner.js +100 -0
  10. package/dist/src/benchmark/task-loader.js +78 -0
  11. package/dist/src/benchmark/types.js +5 -0
  12. package/dist/src/cli/args.js +10 -0
  13. package/dist/src/cli/config-slash-command.js +135 -0
  14. package/dist/src/cli/plugin-install.js +69 -0
  15. package/dist/src/index.js +76 -6
  16. package/dist/src/indexer/cache.js +6 -4
  17. package/dist/src/indexer/code-map.js +41 -13
  18. package/dist/src/indexer/plugins/typescript.js +70 -23
  19. package/dist/src/indexer/project-index.js +175 -36
  20. package/dist/src/indexer/symbol-names.js +92 -0
  21. package/dist/src/model-utils.js +18 -0
  22. package/dist/src/serve/agent-bridge.js +203 -24
  23. package/dist/src/serve/mcp-server.js +405 -0
  24. package/dist/src/serve/server.js +165 -10
  25. package/dist/src/serve/websocket.js +8 -0
  26. package/dist/src/shared/graph-styles.js +119 -0
  27. package/dist/src/tools/find-path.js +75 -0
  28. package/dist/src/tools/find-references.js +7 -2
  29. package/dist/src/tools/get-dependencies.js +3 -2
  30. package/dist/src/tools/read-symbol.js +12 -5
  31. package/dist/src/tools/registry.js +3 -1
  32. package/dist/src/tools/search-code-map.js +4 -2
  33. package/dist/src/ui/app.js +1 -1
  34. package/dist/src/ui/cli-ink.js +79 -4
  35. package/dist/src/ui/components/header-bar.js +6 -2
  36. package/dist/src/ui/state/ui-store.js +5 -0
  37. package/dist/src/web/app.js +1124 -176
  38. package/dist/src/web/index.html +113 -3
  39. package/dist/src/web/style.css +973 -55
  40. package/dist/tests/agent.test.js +31 -0
  41. package/dist/tests/analysis-helpers.test.js +89 -0
  42. package/dist/tests/analysis-ui.test.js +29 -0
  43. package/dist/tests/benchmark-harness.test.js +527 -0
  44. package/dist/tests/config-api.test.js +143 -0
  45. package/dist/tests/config-integration.test.js +751 -0
  46. package/dist/tests/config-slash-command.test.js +106 -0
  47. package/dist/tests/config.test.js +42 -1
  48. package/dist/tests/context-indicator.test.js +220 -0
  49. package/dist/tests/editable-config.test.js +109 -0
  50. package/dist/tests/find-path.test.js +183 -0
  51. package/dist/tests/focus-tracker.test.js +62 -0
  52. package/dist/tests/graph-onboarding.test.js +55 -0
  53. package/dist/tests/graph-styles.test.js +65 -0
  54. package/dist/tests/indexer.test.js +137 -0
  55. package/dist/tests/mcp-and-plugin.test.js +186 -0
  56. package/dist/tests/model-client-openai.test.js +29 -0
  57. package/dist/tests/model-selection.test.js +136 -0
  58. package/dist/tests/model-utils.test.js +22 -0
  59. package/dist/tests/reasoning-effort.test.js +264 -0
  60. package/dist/tests/run-benchmarks.test.js +161 -0
  61. package/dist/tests/search-code-map.test.js +18 -0
  62. package/dist/tests/serve.integration.test.js +218 -2
  63. package/dist/tests/session-ui.test.js +21 -0
  64. package/dist/tests/session.test.js +50 -0
  65. package/dist/tests/settings-ui.test.js +30 -0
  66. package/dist/tests/structural-analysis.test.js +218 -0
  67. package/node_modules/@minicode/agent-sdk/README.md +80 -51
  68. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts +16 -5
  69. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts.map +1 -1
  70. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js +51 -33
  71. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js.map +1 -1
  72. package/node_modules/@minicode/agent-sdk/dist/src/agent/types.d.ts +14 -0
  73. package/node_modules/@minicode/agent-sdk/dist/src/agent/types.d.ts.map +1 -1
  74. package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts +3 -2
  75. package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts.map +1 -1
  76. package/node_modules/@minicode/agent-sdk/dist/src/index.js +2 -0
  77. package/node_modules/@minicode/agent-sdk/dist/src/index.js.map +1 -1
  78. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.d.ts +35 -0
  79. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.d.ts.map +1 -0
  80. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.js +64 -0
  81. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.js.map +1 -0
  82. package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts +7 -0
  83. package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts.map +1 -1
  84. package/node_modules/@minicode/agent-sdk/dist/src/model/client.d.ts +5 -1
  85. package/node_modules/@minicode/agent-sdk/dist/src/model/client.d.ts.map +1 -1
  86. package/node_modules/@minicode/agent-sdk/dist/src/model/client.js +83 -11
  87. package/node_modules/@minicode/agent-sdk/dist/src/model/client.js.map +1 -1
  88. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.d.ts +1 -0
  89. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.d.ts.map +1 -1
  90. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.js +8 -1
  91. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.js.map +1 -1
  92. package/node_modules/@minicode/agent-sdk/dist/src/session/session.d.ts.map +1 -1
  93. package/node_modules/@minicode/agent-sdk/dist/src/session/session.js +4 -1
  94. package/node_modules/@minicode/agent-sdk/dist/src/session/session.js.map +1 -1
  95. package/node_modules/@minicode/agent-sdk/dist/tests/agent.test.js +3 -1
  96. package/node_modules/@minicode/agent-sdk/dist/tests/agent.test.js.map +1 -1
  97. package/node_modules/@minicode/agent-sdk/dist/tests/guardrails.test.js +8 -2
  98. package/node_modules/@minicode/agent-sdk/dist/tests/guardrails.test.js.map +1 -1
  99. package/node_modules/@minicode/agent-sdk/dist/tsconfig.tsbuildinfo +1 -1
  100. package/package.json +9 -5
  101. package/plugin/.claude-plugin/plugin.json +12 -0
  102. package/plugin/.mcp.json +8 -0
  103. package/plugin/CLAUDE.md +26 -0
  104. package/plugin/skills/analyze/SKILL.md +12 -0
  105. package/plugin/skills/focus/SKILL.md +20 -0
  106. package/plugin/skills/graph/SKILL.md +13 -0
  107. package/plugin/skills/symbols/SKILL.md +13 -0
package/README.md CHANGED
@@ -1,8 +1,13 @@
1
1
  # minicode
2
2
 
3
- A lightweight coding agent optimized for **local models** CLI-first with a built-in web UI. Provides AST-based intelligent context for smaller models running on consumer hardware.
3
+ A graph-native coding agent and code exploration environment built around structural context optimization. It started as a way to make local models viable under tighter context budgets, and it now also works well with hosted frontier models through the same runtime, web UI, and OpenAI-compatible serve mode.
4
4
 
5
- Read operations dominate token usage in typical agent sessions; minicode addresses this by optimizing for **specific languages** indexing your project at startup with language plugins (TypeScript/JavaScript built-in) and injecting a compact **code map** (signatures only) into the system prompt, plus symbol-level tools (`read_symbol`, `find_references`, `get_dependencies`) so the model reads only what it needs instead of entire files. This keeps prompts lean enough for smaller models in the 20B range, with faster inference and better attention over the relevant code.
5
+ _Run `minicode serve` to get the web UI on localhost: chat, tool activity, session controls, model switching, symbol focus, annotations, and a live dependency graph._
6
+
7
+ <img width="1723" height="920" alt="Screenshot 2026-03-26 at 6 30 23 PM" src="https://github.com/user-attachments/assets/499c8dc7-cc2b-4125-abd5-32b2fc9795ea" />
8
+
9
+
10
+ Read operations dominate token usage in typical agent sessions; minicode addresses this by optimizing for **specific languages**. It indexes your project at startup with language plugins, injects a compact **code map** (signatures only) into the system prompt, and exposes symbol-level tools (`read_symbol`, `find_references`, `get_dependencies`) so the model reads only what it needs instead of entire files. TypeScript and JavaScript support come built-in, with custom language plugins leaving room for broader language support over time.
6
11
 
7
12
  ## Quick Start (LM Studio)
8
13
 
@@ -12,22 +17,11 @@ Read operations dominate token usage in typical agent sessions; minicode address
12
17
  # 2. Install
13
18
  npm install -g @sean.holung/minicode
14
19
 
15
- # 3. Configure for local (no API key needed)
16
- mkdir -p ~/.minicode
20
+ # 3. Configure (~/.minicode/agent.config.json is auto-created on first run)
21
+ # Set your model name — minicode will prompt you if this is missing.
17
22
  cat > ~/.minicode/.env << 'EOF'
18
23
  MODEL_PROVIDER=openai-compatible
19
- MODEL=zai-org/glm-4.7-flash
20
- OPENAI_BASE_URL=http://localhost:1234/v1
21
- OPENAI_API_KEY=
22
- MAX_STEPS=50
23
- MAX_TOKENS=4096
24
- MAX_CONTEXT_TOKENS=24000
25
- WORKSPACE_ROOT=.
26
- COMMAND_TIMEOUT_MS=30000
27
- MAX_FILE_SIZE_BYTES=1000000
28
- CONFIRM_DESTRUCTIVE=true
29
- KEEP_RECENT_MESSAGES=12
30
- LOOP_DETECTION_WINDOW=6
24
+ MODEL=your-model-name
31
25
  EOF
32
26
  ```
33
27
 
@@ -92,7 +86,7 @@ npm run install:global
92
86
  - **Web UI** — `minicode serve` starts an HTTP + WebSocket server with a bundled chat client, real-time streaming, session management, and project graph data endpoints
93
87
  - **OpenAI-compatible API** — any client that speaks the OpenAI protocol can use minicode as a backend at `/v1/chat/completions`
94
88
  - **Context optimization:** Code map in system prompt, `read_symbol`, `find_references`, `get_dependencies`
95
- - **Plugin system:** Extensible language support (TypeScript built-in)
89
+ - **Plugin system:** Extensible language support (TypeScript/JavaScript built in today)
96
90
 
97
91
  ## Context Optimization
98
92
 
@@ -105,9 +99,9 @@ For the proposed reusable package architecture and public interfaces for a stand
105
99
  minicode reduces token usage by indexing your project and providing targeted tools:
106
100
 
107
101
  - **Code map** — A compact project skeleton (signatures only) is injected into the system prompt so the model can orient itself without reading full files.
108
- - `**read_symbol`** — Read a specific function or class by name, with referenced types.
109
- - `**find_references**` — Find all symbols that reference a given symbol.
110
- - `**get_dependencies**` — Get the dependency cone of a symbol.
102
+ - `read_symbol` — Read a specific function or class by name, with referenced types.
103
+ - `find_references` — Find all symbols that reference a given symbol.
104
+ - `get_dependencies` — Get the dependency cone of a symbol.
111
105
 
112
106
  The index is cached in `~/.minicode/cache/<workspace-hash>/` for faster startup on subsequent runs. Caches are global and keyed by workspace path, so nothing is stored inside your project directories.
113
107
 
@@ -170,9 +164,9 @@ See [docs/PLUGIN_SPEC.md](docs/PLUGIN_SPEC.md) for the full specification. Quick
170
164
 
171
165
  Configuration can come from (later sources override earlier):
172
166
 
173
- 1. `**~/.minicode/.env`** — User-level defaults (API keys, model, etc.)
174
- 2. `**~/.minicode/agent.config.json**` — User-level JSON config
175
- 3. **Project `.env`** and `**agent.config.json**` in workspace root
167
+ 1. `~/.minicode/.env` — User-level defaults (API keys, model, etc.)
168
+ 2. `~/.minicode/agent.config.json` — User-level JSON config
169
+ 3. Project `.env` and `agent.config.json` in workspace root
176
170
  4. Environment variables (highest precedence)
177
171
 
178
172
  Nothing is written inside your workspace; config and cache live under `~/.minicode/`.
@@ -183,45 +177,41 @@ Nothing is written inside your workspace; config and cache live under `~/.minico
183
177
  | Variable | Required | Default | Notes |
184
178
  | ----------------------- | --------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
185
179
  | `MODEL_PROVIDER` | No | `openai-compatible` | `anthropic` or `openai-compatible` (aliases: `openai`, `lmstudio`, `lm-studio`) |
186
- | `MODEL` | No | `zai-org/glm-4.7-flash` | Model name for selected provider |
180
+ | `MODEL` | Yes | none | Model name for selected provider |
187
181
  | `ANTHROPIC_API_KEY` | Yes (Anthropic) | none | Required when `MODEL_PROVIDER=anthropic` |
188
182
  | `OPENAI_BASE_URL` | No | `http://localhost:1234/v1` | Base URL for OpenAI-compatible API (LM Studio, etc.) |
189
183
  | `OPENAI_API_KEY` | No | none | Optional for local servers; required if your endpoint enforces auth |
184
+ | `OPENROUTER_API_KEY` | No | none | Preferred key when `OPENAI_BASE_URL` points at OpenRouter; falls back to `OPENAI_API_KEY` if unset |
190
185
  | `MAX_STEPS` | No | `50` | Max agent loop iterations per user turn |
191
186
  | `MAX_TOKENS` | No | `4096` | Max model output tokens per model call |
192
- | `MAX_CONTEXT_TOKENS` | No | `120000` | Approximate session history trimming target. For small models (e.g. 8k context), set lower (e.g. `6000`) to leave room for responses. |
193
- | `MAX_TOOL_OUTPUT_CHARS` | No | `15000` | Max chars per tool result before truncation. Set to `0` to disable. |
194
- | `WORKSPACE_ROOT` | No | current working directory | Root directory tools are allowed to access |
187
+ | `MAX_CONTEXT_TOKENS` | No | `32000` | Approximate session history trimming target. For small models (e.g. 8k context), set lower (e.g. `6000`) to leave room for responses. |
188
+ | `MAX_TOOL_OUTPUT_CHARS` | No | `8000` | Max chars per tool result before truncation. Set to `0` to disable. |
189
+ | `WORKSPACE_ROOT` | No | current working directory | Root directory tools are allowed to access (set at runtime, not typically configured) |
195
190
  | `COMMAND_TIMEOUT_MS` | No | `30000` | Timeout for shell/search commands |
196
191
  | `MAX_FILE_SIZE_BYTES` | No | `1000000` | Read limit for `read_file` |
197
192
  | `CONFIRM_DESTRUCTIVE` | No | `true` | If `true`, blocks destructive shell commands unless confirmed |
198
193
  | `KEEP_RECENT_MESSAGES` | No | `12` | Minimum number of latest messages kept during trimming |
199
194
  | `LOOP_DETECTION_WINDOW` | No | `6` | Window for repeated tool-call loop detection |
195
+ | `ENABLE_FILE_READ_DEDUP` | No | `true` | Reuses earlier `read_file` results within a turn when the same file slice is still in context |
196
+ | `ENABLE_ADAPTIVE_KEEP_RECENT` | No | `true` | Scales `keepRecentMessages` down as context fills so trimming gets more aggressive when needed |
197
+ | `ENABLE_TOOL_OUTPUT_TRUNCATION` | No | `true` | Enables content-aware truncation strategies for tool output instead of simple head-only clipping |
200
198
  | `COMPACTION_THRESHOLD` | No | `0.8` | Context fullness ratio (0–1) at which auto-compaction triggers |
201
199
  | `COMPACTION_MODEL` | No | none | Model for LLM-based compaction summaries. When set, `/compact` and auto-compaction use this model instead of mechanical truncation. Use a small, fast model (e.g. your local model). |
200
+ | `REASONING_EFFORT` | No | unset | Reasoning level for providers that support it. Valid values: `xhigh`, `high`, `medium`, `low`, `minimal`, `none` |
202
201
 
203
202
 
204
203
  ### `agent.config.json`
205
204
 
206
- Create `agent.config.json` in `~/.minicode/` for user-level defaults, or in the project root for workspace-specific overrides:
205
+ A global `~/.minicode/agent.config.json` is auto-created on first run. Only set what you need everything has sensible defaults:
207
206
 
208
207
  ```json
209
208
  {
210
209
  "modelProvider": "openai-compatible",
211
- "model": "zai-org/glm-4.7-flash",
210
+ "model": "your-model-name",
211
+ "openAiBaseUrl": "http://localhost:1234/v1",
212
212
  "maxSteps": 50,
213
213
  "maxTokens": 4096,
214
- "maxContextTokens": 120000,
215
- "workspaceRoot": ".",
216
- "commandTimeout": 30000,
217
- "commandDenylist": [],
218
- "confirmDestructive": true,
219
- "maxFileSizeBytes": 1000000,
220
- "keepRecentMessages": 12,
221
- "loopDetectionWindow": 6,
222
- "openAiBaseUrl": "http://localhost:1234/v1",
223
- "openAiApiKey": "",
224
- "compactionModel": ""
214
+ "maxContextTokens": 32000
225
215
  }
226
216
  ```
227
217
 
@@ -240,9 +230,14 @@ Field mapping:
240
230
  - `keepRecentMessages` ↔ `KEEP_RECENT_MESSAGES`
241
231
  - `loopDetectionWindow` ↔ `LOOP_DETECTION_WINDOW`
242
232
  - `openAiBaseUrl` ↔ `OPENAI_BASE_URL`
243
- - `openAiApiKey` ↔ `OPENAI_API_KEY`
233
+ - `openAiApiKey` ↔ `OPENAI_API_KEY` / `OPENROUTER_API_KEY` (when using OpenRouter)
234
+ - `maxToolOutputChars` ↔ `MAX_TOOL_OUTPUT_CHARS`
235
+ - `enableFileReadDedup` ↔ `ENABLE_FILE_READ_DEDUP`
236
+ - `enableAdaptiveKeepRecent` ↔ `ENABLE_ADAPTIVE_KEEP_RECENT`
237
+ - `enableToolOutputTruncation` ↔ `ENABLE_TOOL_OUTPUT_TRUNCATION`
244
238
  - `compactionThreshold` ↔ `COMPACTION_THRESHOLD`
245
239
  - `compactionModel` ↔ `COMPACTION_MODEL`
240
+ - `reasoningEffort` ↔ `REASONING_EFFORT`
246
241
 
247
242
  ## Usage
248
243
 
@@ -273,6 +268,19 @@ npm run dev -- --oneshot --json "Summarize TODOs"
273
268
  npm run dev -- --oneshot --out result.txt "Draft changelog"
274
269
  ```
275
270
 
271
+ Interactive slash commands:
272
+
273
+ - `/help`
274
+ - `/config`
275
+ - `/compact`
276
+ - `/reasoning [level]`
277
+ - `/models`
278
+ - `/model [name]`
279
+ - `/save [label]`
280
+ - `/load [label]`
281
+ - `/sessions`
282
+ - `/exit`
283
+
276
284
  ### Exit codes
277
285
 
278
286
  - `0`: Success
@@ -284,7 +292,9 @@ npm run dev -- --oneshot --out result.txt "Draft changelog"
284
292
  - `npm run dev` - start the CLI in TypeScript mode
285
293
  - `npm run dev:ink` - start with Ink UI (same as `dev` when in a TTY; use to override `CLI_UI_MODE=legacy`)
286
294
  - `npm run build` - compile TypeScript to `dist/`
295
+ - `npm run build:web` - build the bundled web client used by `minicode serve`
287
296
  - `npm start` - run compiled CLI
297
+ - `npm run install:global` - build and `npm link` the CLI locally
288
298
  - `npm run lint` - run ESLint on TypeScript source and tests
289
299
  - `npm test` - run Node test suite
290
-
300
+ - `npm run verify-index` - run the TypeScript index verification harness
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * CLI entry point for running benchmark tasks.
4
+ *
5
+ * Usage:
6
+ * node --import tsx scripts/run-benchmarks.ts [options]
7
+ *
8
+ * Options:
9
+ * --category <name> Run only tasks in the given category
10
+ * --task <id> Run a single task by id (e.g. "navigation/find-symbol-definition")
11
+ * --variant <label> Variant label for the report (default: "ci")
12
+ * --out <path> Write the JSON report to a file
13
+ *
14
+ * Environment:
15
+ * MODEL_PROVIDER, MODEL, OPENAI_BASE_URL, OPENAI_API_KEY, ANTHROPIC_API_KEY
16
+ * — same as minicode runtime config.
17
+ */
18
+ import path from "node:path";
19
+ import { writeFile } from "node:fs/promises";
20
+ import { createModelClient, createReadFileTool, createWriteFileTool, createEditFileTool, createSearchTool, createListFilesTool, createRunCommandTool, } from "@minicode/agent-sdk";
21
+ import { loadBenchmarkTasks, loadBenchmarkTask } from "../src/benchmark/task-loader.js";
22
+ import { runBenchmarkSuite } from "../src/benchmark/runner.js";
23
+ import { buildReport, formatReport } from "../src/benchmark/reporter.js";
24
+ export function parseArgs(argv) {
25
+ const args = { variant: "ci" };
26
+ for (let i = 0; i < argv.length; i++) {
27
+ const arg = argv[i];
28
+ const next = argv[i + 1];
29
+ if (arg === "--category" && next) {
30
+ args.category = next;
31
+ i++;
32
+ }
33
+ else if (arg === "--task" && next) {
34
+ args.task = next;
35
+ i++;
36
+ }
37
+ else if (arg === "--variant" && next) {
38
+ args.variant = next;
39
+ i++;
40
+ }
41
+ else if (arg === "--out" && next) {
42
+ args.out = next;
43
+ i++;
44
+ }
45
+ }
46
+ return args;
47
+ }
48
+ /* ------------------------------------------------------------------ */
49
+ /* Config builder */
50
+ /* ------------------------------------------------------------------ */
51
+ export function buildConfig() {
52
+ const provider = (process.env.MODEL_PROVIDER ?? "openai-compatible");
53
+ const model = process.env.MODEL ?? "test-model";
54
+ return {
55
+ modelProvider: provider,
56
+ model,
57
+ maxSteps: Number(process.env.MAX_STEPS ?? "50"),
58
+ maxTokens: Number(process.env.MAX_TOKENS ?? "4096"),
59
+ maxContextTokens: Number(process.env.MAX_CONTEXT_TOKENS ?? "32000"),
60
+ workspaceRoot: process.cwd(),
61
+ commandTimeoutMs: Number(process.env.COMMAND_TIMEOUT_MS ?? "30000"),
62
+ maxFileSizeBytes: Number(process.env.MAX_FILE_SIZE_BYTES ?? "1000000"),
63
+ commandDenylist: [],
64
+ confirmDestructive: false,
65
+ keepRecentMessages: Number(process.env.KEEP_RECENT_MESSAGES ?? "12"),
66
+ loopDetectionWindow: Number(process.env.LOOP_DETECTION_WINDOW ?? "6"),
67
+ maxToolOutputChars: Number(process.env.MAX_TOOL_OUTPUT_CHARS ?? "8000"),
68
+ openAiBaseUrl: process.env.OPENAI_BASE_URL ?? "http://localhost:1234/v1",
69
+ ...(process.env.OPENAI_API_KEY ? { openAiApiKey: process.env.OPENAI_API_KEY } : {}),
70
+ };
71
+ }
72
+ /* ------------------------------------------------------------------ */
73
+ /* Task loading */
74
+ /* ------------------------------------------------------------------ */
75
+ export async function loadTasks(tasksDir, args) {
76
+ if (args.task) {
77
+ const single = await loadBenchmarkTask(tasksDir, args.task);
78
+ if (!single) {
79
+ throw new Error(`Task not found: ${args.task}`);
80
+ }
81
+ return [single];
82
+ }
83
+ let tasks = await loadBenchmarkTasks(tasksDir);
84
+ if (args.category) {
85
+ tasks = tasks.filter((t) => t.category === args.category);
86
+ if (tasks.length === 0) {
87
+ throw new Error(`No tasks found for category: ${args.category}`);
88
+ }
89
+ }
90
+ return tasks;
91
+ }
92
+ /* ------------------------------------------------------------------ */
93
+ /* Main */
94
+ /* ------------------------------------------------------------------ */
95
+ async function main() {
96
+ const args = parseArgs(process.argv.slice(2));
97
+ const config = buildConfig();
98
+ const tasksDir = path.resolve(process.cwd(), "benchmarks", "tasks");
99
+ console.log(`Benchmark runner starting...`);
100
+ console.log(` Provider: ${config.modelProvider}`);
101
+ console.log(` Model: ${config.model}`);
102
+ console.log(` Variant: ${args.variant}`);
103
+ const tasks = await loadTasks(tasksDir, args);
104
+ console.log(` Tasks: ${tasks.length}`);
105
+ console.log("");
106
+ const modelClient = createModelClient(config);
107
+ const tools = [
108
+ createReadFileTool(config),
109
+ createWriteFileTool(config),
110
+ createEditFileTool(config),
111
+ createSearchTool(config),
112
+ createListFilesTool(config),
113
+ createRunCommandTool(config),
114
+ ];
115
+ const traces = await runBenchmarkSuite(tasks, {
116
+ modelClient,
117
+ config,
118
+ tools,
119
+ variant: args.variant,
120
+ onTaskComplete: (taskId, trace) => {
121
+ const dur = (trace.durationMs / 1000).toFixed(1);
122
+ console.log(` [done] ${taskId} (${dur}s, ${trace.toolCalls.length} tool calls)`);
123
+ },
124
+ });
125
+ const report = buildReport(tasks, traces, args.variant, config.model);
126
+ const formatted = formatReport(report);
127
+ console.log("");
128
+ console.log(formatted);
129
+ if (args.out) {
130
+ const outPath = path.resolve(args.out);
131
+ await writeFile(outPath, JSON.stringify(report, null, 2), "utf8");
132
+ console.log(`\nReport written to ${outPath}`);
133
+ }
134
+ // Exit with failure if any task failed
135
+ if (report.summary.failed > 0) {
136
+ process.exitCode = 1;
137
+ }
138
+ }
139
+ // Only run main when executed directly (not imported for testing)
140
+ const isDirectRun = process.argv[1]?.endsWith("run-benchmarks.ts") ||
141
+ process.argv[1]?.endsWith("run-benchmarks.js");
142
+ if (isDirectRun) {
143
+ main().catch((err) => {
144
+ console.error("Benchmark runner failed:", err);
145
+ process.exitCode = 1;
146
+ });
147
+ }
@@ -1,8 +1,7 @@
1
- import { access, readFile } from "node:fs/promises";
1
+ import { access, mkdir, readFile, writeFile } from "node:fs/promises";
2
2
  import os from "node:os";
3
3
  import path from "node:path";
4
4
  import process from "node:process";
5
- import { fileURLToPath } from "node:url";
6
5
  import dotenv from "dotenv";
7
6
  /** User-level config directory: ~/.minicode */
8
7
  export const MINICODE_HOME = path.join(os.homedir(), ".minicode");
@@ -32,17 +31,52 @@ export function formatConfigForDisplay(config) {
32
31
  "enableToolOutputTruncation: " + (config.enableToolOutputTruncation ?? false),
33
32
  "compactionThreshold: " + (config.compactionThreshold ?? "(disabled)"),
34
33
  "compactionModel: " + (config.compactionModel ?? "(disabled — using mechanical compaction)"),
34
+ "reasoningEffort: " + (config.reasoningEffort ?? "(unset — no reasoning parameters sent)"),
35
+ "enableDynamicPrompt: " + (config.enableDynamicPrompt ?? true),
35
36
  ];
36
37
  return lines.join("\n");
37
38
  }
38
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
39
- const envPath = __dirname.includes(`${path.sep}dist${path.sep}`)
40
- ? path.resolve(__dirname, "../../../.env")
41
- : path.resolve(__dirname, "../../.env");
42
- // Load order: user home (~/.minicode/.env) < project .env < cwd .env
43
- dotenv.config({ path: path.join(MINICODE_HOME, ".env") });
44
- dotenv.config({ path: envPath, override: true });
45
- dotenv.config({ path: path.resolve(process.cwd(), ".env"), override: true });
39
+ /**
40
+ * Check if the config has enough information to connect to a model provider.
41
+ * Returns null if valid, or a user-facing setup message if not.
42
+ */
43
+ /**
44
+ * Return a list of missing config items that prevent the agent from running.
45
+ * Empty array means the config is valid.
46
+ */
47
+ export function getConfigMissing(config) {
48
+ const missing = [];
49
+ if (!config.model) {
50
+ missing.push("MODEL is not set");
51
+ }
52
+ if (config.modelProvider === "anthropic" && !process.env.ANTHROPIC_API_KEY) {
53
+ missing.push("ANTHROPIC_API_KEY is not set");
54
+ }
55
+ return missing;
56
+ }
57
+ export function getConfigSetupMessage(config) {
58
+ const missing = getConfigMissing(config);
59
+ if (missing.length === 0) {
60
+ return null;
61
+ }
62
+ return [
63
+ "minicode is not configured yet. Missing:",
64
+ ...missing.map((m) => ` - ${m}`),
65
+ "",
66
+ `Set these in ~/.minicode/.env or as environment variables.`,
67
+ `Edit ~/.minicode/agent.config.json for non-secret settings.`,
68
+ "",
69
+ "Example ~/.minicode/.env for a local model:",
70
+ " MODEL_PROVIDER=openai-compatible",
71
+ " OPENAI_BASE_URL=http://localhost:1234/v1",
72
+ " MODEL=your-model-name",
73
+ "",
74
+ "Example for Anthropic:",
75
+ " MODEL_PROVIDER=anthropic",
76
+ " ANTHROPIC_API_KEY=sk-ant-...",
77
+ " MODEL=claude-sonnet-4-20250514",
78
+ ].join("\n");
79
+ }
46
80
  const DEFAULT_COMMAND_DENYLIST = [
47
81
  /\brm\s+-rf\s+\//i,
48
82
  /\bmkfs\b/i,
@@ -54,6 +88,15 @@ const DEFAULT_COMMAND_DENYLIST = [
54
88
  /\binit\s+0\b/i,
55
89
  /\bchmod\s+-R\s+777\s+\//i,
56
90
  ];
91
+ const VALID_REASONING_EFFORTS = new Set([
92
+ "xhigh", "high", "medium", "low", "minimal", "none",
93
+ ]);
94
+ function parseReasoningEffort(value) {
95
+ if (!value)
96
+ return undefined;
97
+ const normalized = value.trim().toLowerCase();
98
+ return VALID_REASONING_EFFORTS.has(normalized) ? normalized : undefined;
99
+ }
57
100
  function parseNumber(value, fallback) {
58
101
  if (!value) {
59
102
  return fallback;
@@ -74,7 +117,7 @@ function parseBoolean(value, fallback) {
74
117
  }
75
118
  return fallback;
76
119
  }
77
- async function loadConfigFile(configPath) {
120
+ export async function loadConfigFile(configPath) {
78
121
  try {
79
122
  await access(configPath);
80
123
  }
@@ -88,6 +131,48 @@ async function loadConfigFile(configPath) {
88
131
  }
89
132
  return parsed;
90
133
  }
134
+ async function loadDotenvFile(envPath) {
135
+ try {
136
+ const file = await readFile(envPath, "utf8");
137
+ return dotenv.parse(file);
138
+ }
139
+ catch {
140
+ return {};
141
+ }
142
+ }
143
+ function applyEnvLayer(target, sources, layer, source, override) {
144
+ for (const [key, value] of Object.entries(layer)) {
145
+ if (!override && target[key] !== undefined) {
146
+ continue;
147
+ }
148
+ target[key] = value;
149
+ sources[key] = source;
150
+ }
151
+ }
152
+ function applyProcessEnv(target, sources) {
153
+ for (const [key, value] of Object.entries(process.env)) {
154
+ if (value === undefined) {
155
+ continue;
156
+ }
157
+ target[key] = value;
158
+ sources[key] = "process";
159
+ }
160
+ }
161
+ export async function resolveConfigEnv(options = {}) {
162
+ const minicodeHome = options.minicodeHome ?? MINICODE_HOME;
163
+ const homeEnvPath = path.join(minicodeHome, ".env");
164
+ const values = {};
165
+ const sources = {};
166
+ // Base: ~/.minicode/.env
167
+ applyEnvLayer(values, sources, await loadDotenvFile(homeEnvPath), "home-dotenv", true);
168
+ // Override: shell environment variables take precedence
169
+ applyProcessEnv(values, sources);
170
+ return {
171
+ values,
172
+ sources,
173
+ homeEnvPath,
174
+ };
175
+ }
91
176
  function parseUserDenylist(patterns) {
92
177
  if (!patterns?.length) {
93
178
  return [];
@@ -113,51 +198,75 @@ function parseModelProvider(value) {
113
198
  }
114
199
  return "anthropic";
115
200
  }
116
- export async function loadAgentConfig(cwd = process.cwd()) {
117
- const homeConfigPath = path.join(MINICODE_HOME, "agent.config.json");
118
- const workspaceConfigPath = path.resolve(cwd, "agent.config.json");
119
- const homeConfig = await loadConfigFile(homeConfigPath);
120
- const workspaceConfig = await loadConfigFile(workspaceConfigPath);
121
- const fileConfig = { ...homeConfig, ...workspaceConfig };
122
- const rawWorkspaceRoot = process.env.WORKSPACE_ROOT ?? fileConfig.workspaceRoot ?? cwd;
201
+ const DEFAULT_CONFIG_CONTENT = `{
202
+ "modelProvider": "openai-compatible",
203
+ "model": "",
204
+ "openAiBaseUrl": "http://localhost:1234/v1",
205
+ "maxSteps": 50,
206
+ "maxTokens": 4096,
207
+ "maxContextTokens": 32000
208
+ }
209
+ `;
210
+ async function ensureMinicodeHome(minicodeHome) {
211
+ await mkdir(minicodeHome, { recursive: true });
212
+ const configPath = path.join(minicodeHome, "agent.config.json");
213
+ try {
214
+ await access(configPath);
215
+ }
216
+ catch {
217
+ await writeFile(configPath, DEFAULT_CONFIG_CONTENT, "utf8");
218
+ }
219
+ }
220
+ export async function loadAgentConfig(cwd = process.cwd(), options = {}) {
221
+ const minicodeHome = options.minicodeHome ?? MINICODE_HOME;
222
+ await ensureMinicodeHome(minicodeHome);
223
+ const homeConfigPath = path.join(minicodeHome, "agent.config.json");
224
+ const fileConfig = await loadConfigFile(homeConfigPath);
225
+ const env = (await resolveConfigEnv({ minicodeHome })).values;
226
+ const rawWorkspaceRoot = env.WORKSPACE_ROOT ?? fileConfig.workspaceRoot ?? cwd;
123
227
  const workspaceRoot = path.resolve(cwd, rawWorkspaceRoot);
124
228
  const commandDenylist = [
125
229
  ...DEFAULT_COMMAND_DENYLIST,
126
230
  ...parseUserDenylist(fileConfig.commandDenylist),
127
231
  ];
128
- const rawBaseUrl = process.env.OPENAI_BASE_URL ??
232
+ const rawBaseUrl = env.OPENAI_BASE_URL ??
129
233
  fileConfig.openAiBaseUrl ??
130
234
  "http://localhost:1234/v1";
131
235
  const isOpenRouter = rawBaseUrl.includes("openrouter");
132
236
  const openAiApiKey = isOpenRouter
133
- ? (process.env.OPENROUTER_API_KEY ??
134
- process.env.OPENAI_API_KEY ??
237
+ ? (env.OPENROUTER_API_KEY ??
238
+ env.OPENAI_API_KEY ??
135
239
  fileConfig.openAiApiKey)
136
- : (process.env.OPENAI_API_KEY ?? fileConfig.openAiApiKey);
240
+ : (env.OPENAI_API_KEY ?? fileConfig.openAiApiKey);
137
241
  return {
138
- modelProvider: parseModelProvider(process.env.MODEL_PROVIDER ?? fileConfig.modelProvider ?? "openai-compatible"),
139
- model: process.env.MODEL ??
242
+ modelProvider: parseModelProvider(env.MODEL_PROVIDER ?? fileConfig.modelProvider ?? "openai-compatible"),
243
+ model: env.MODEL ??
140
244
  fileConfig.model ??
141
- "zai-org/glm-4.7-flash",
142
- maxSteps: parseNumber(process.env.MAX_STEPS, fileConfig.maxSteps ?? 50),
143
- maxTokens: parseNumber(process.env.MAX_TOKENS, fileConfig.maxTokens ?? 4096),
144
- maxContextTokens: parseNumber(process.env.MAX_CONTEXT_TOKENS, fileConfig.maxContextTokens ?? 40_000),
245
+ "",
246
+ maxSteps: parseNumber(env.MAX_STEPS, fileConfig.maxSteps ?? 50),
247
+ maxTokens: parseNumber(env.MAX_TOKENS, fileConfig.maxTokens ?? 4096),
248
+ maxContextTokens: parseNumber(env.MAX_CONTEXT_TOKENS, fileConfig.maxContextTokens ?? 32_000),
145
249
  workspaceRoot,
146
- commandTimeoutMs: parseNumber(process.env.COMMAND_TIMEOUT_MS, fileConfig.commandTimeout ?? 30_000),
147
- maxFileSizeBytes: parseNumber(process.env.MAX_FILE_SIZE_BYTES, fileConfig.maxFileSizeBytes ?? 1_000_000),
250
+ commandTimeoutMs: parseNumber(env.COMMAND_TIMEOUT_MS, fileConfig.commandTimeout ?? 30_000),
251
+ maxFileSizeBytes: parseNumber(env.MAX_FILE_SIZE_BYTES, fileConfig.maxFileSizeBytes ?? 1_000_000),
148
252
  commandDenylist,
149
- confirmDestructive: parseBoolean(process.env.CONFIRM_DESTRUCTIVE, fileConfig.confirmDestructive ?? true),
150
- keepRecentMessages: parseNumber(process.env.KEEP_RECENT_MESSAGES, fileConfig.keepRecentMessages ?? 12),
151
- loopDetectionWindow: parseNumber(process.env.LOOP_DETECTION_WINDOW, fileConfig.loopDetectionWindow ?? 6),
152
- maxToolOutputChars: parseNumber(process.env.MAX_TOOL_OUTPUT_CHARS, fileConfig.maxToolOutputChars ?? 8_000),
253
+ confirmDestructive: parseBoolean(env.CONFIRM_DESTRUCTIVE, fileConfig.confirmDestructive ?? true),
254
+ keepRecentMessages: parseNumber(env.KEEP_RECENT_MESSAGES, fileConfig.keepRecentMessages ?? 12),
255
+ loopDetectionWindow: parseNumber(env.LOOP_DETECTION_WINDOW, fileConfig.loopDetectionWindow ?? 6),
256
+ maxToolOutputChars: parseNumber(env.MAX_TOOL_OUTPUT_CHARS, fileConfig.maxToolOutputChars ?? 8_000),
153
257
  openAiBaseUrl: rawBaseUrl,
154
258
  ...(openAiApiKey !== undefined ? { openAiApiKey } : {}),
155
- enableFileReadDedup: parseBoolean(process.env.ENABLE_FILE_READ_DEDUP, fileConfig.enableFileReadDedup ?? true),
156
- enableAdaptiveKeepRecent: parseBoolean(process.env.ENABLE_ADAPTIVE_KEEP_RECENT, fileConfig.enableAdaptiveKeepRecent ?? true),
157
- enableToolOutputTruncation: parseBoolean(process.env.ENABLE_TOOL_OUTPUT_TRUNCATION, fileConfig.enableToolOutputTruncation ?? true),
158
- compactionThreshold: parseNumber(process.env.COMPACTION_THRESHOLD, fileConfig.compactionThreshold ?? 0.8),
159
- ...(process.env.COMPACTION_MODEL ?? fileConfig.compactionModel
160
- ? { compactionModel: process.env.COMPACTION_MODEL ?? fileConfig.compactionModel }
259
+ enableFileReadDedup: parseBoolean(env.ENABLE_FILE_READ_DEDUP, fileConfig.enableFileReadDedup ?? true),
260
+ enableAdaptiveKeepRecent: parseBoolean(env.ENABLE_ADAPTIVE_KEEP_RECENT, fileConfig.enableAdaptiveKeepRecent ?? true),
261
+ enableToolOutputTruncation: parseBoolean(env.ENABLE_TOOL_OUTPUT_TRUNCATION, fileConfig.enableToolOutputTruncation ?? true),
262
+ compactionThreshold: parseNumber(env.COMPACTION_THRESHOLD, fileConfig.compactionThreshold ?? 0.8),
263
+ ...(env.COMPACTION_MODEL ?? fileConfig.compactionModel
264
+ ? { compactionModel: env.COMPACTION_MODEL ?? fileConfig.compactionModel }
161
265
  : {}),
266
+ enableDynamicPrompt: parseBoolean(env.ENABLE_DYNAMIC_PROMPT, fileConfig.enableDynamicPrompt ?? true),
267
+ ...(() => {
268
+ const effort = parseReasoningEffort(env.REASONING_EFFORT ?? fileConfig.reasoningEffort);
269
+ return effort ? { reasoningEffort: effort } : {};
270
+ })(),
162
271
  };
163
272
  }