@sean.holung/minicode 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +48 -43
  2. package/dist/scripts/run-benchmarks.js +147 -0
  3. package/dist/src/agent/config.js +149 -40
  4. package/dist/src/agent/editable-config.js +314 -0
  5. package/dist/src/analysis/structural-analysis.js +379 -0
  6. package/dist/src/benchmark/evaluator.js +79 -0
  7. package/dist/src/benchmark/index.js +4 -0
  8. package/dist/src/benchmark/reporter.js +177 -0
  9. package/dist/src/benchmark/runner.js +100 -0
  10. package/dist/src/benchmark/task-loader.js +78 -0
  11. package/dist/src/benchmark/types.js +5 -0
  12. package/dist/src/cli/args.js +10 -0
  13. package/dist/src/cli/config-slash-command.js +135 -0
  14. package/dist/src/cli/plugin-install.js +69 -0
  15. package/dist/src/index.js +76 -6
  16. package/dist/src/indexer/cache.js +6 -4
  17. package/dist/src/indexer/code-map.js +41 -13
  18. package/dist/src/indexer/plugins/typescript.js +70 -23
  19. package/dist/src/indexer/project-index.js +175 -36
  20. package/dist/src/indexer/symbol-names.js +92 -0
  21. package/dist/src/model-utils.js +18 -0
  22. package/dist/src/serve/agent-bridge.js +203 -24
  23. package/dist/src/serve/mcp-server.js +405 -0
  24. package/dist/src/serve/server.js +165 -10
  25. package/dist/src/serve/websocket.js +8 -0
  26. package/dist/src/shared/graph-styles.js +119 -0
  27. package/dist/src/tools/find-path.js +75 -0
  28. package/dist/src/tools/find-references.js +7 -2
  29. package/dist/src/tools/get-dependencies.js +3 -2
  30. package/dist/src/tools/read-symbol.js +12 -5
  31. package/dist/src/tools/registry.js +3 -1
  32. package/dist/src/tools/search-code-map.js +4 -2
  33. package/dist/src/ui/app.js +1 -1
  34. package/dist/src/ui/cli-ink.js +79 -4
  35. package/dist/src/ui/components/header-bar.js +6 -2
  36. package/dist/src/ui/state/ui-store.js +5 -0
  37. package/dist/src/web/app.js +1124 -176
  38. package/dist/src/web/index.html +113 -3
  39. package/dist/src/web/style.css +973 -55
  40. package/dist/tests/agent.test.js +31 -0
  41. package/dist/tests/analysis-helpers.test.js +89 -0
  42. package/dist/tests/analysis-ui.test.js +29 -0
  43. package/dist/tests/benchmark-harness.test.js +527 -0
  44. package/dist/tests/config-api.test.js +143 -0
  45. package/dist/tests/config-integration.test.js +751 -0
  46. package/dist/tests/config-slash-command.test.js +106 -0
  47. package/dist/tests/config.test.js +42 -1
  48. package/dist/tests/context-indicator.test.js +220 -0
  49. package/dist/tests/editable-config.test.js +109 -0
  50. package/dist/tests/find-path.test.js +183 -0
  51. package/dist/tests/focus-tracker.test.js +62 -0
  52. package/dist/tests/graph-onboarding.test.js +55 -0
  53. package/dist/tests/graph-styles.test.js +65 -0
  54. package/dist/tests/indexer.test.js +137 -0
  55. package/dist/tests/mcp-and-plugin.test.js +186 -0
  56. package/dist/tests/model-client-openai.test.js +29 -0
  57. package/dist/tests/model-selection.test.js +136 -0
  58. package/dist/tests/model-utils.test.js +22 -0
  59. package/dist/tests/reasoning-effort.test.js +264 -0
  60. package/dist/tests/run-benchmarks.test.js +161 -0
  61. package/dist/tests/search-code-map.test.js +18 -0
  62. package/dist/tests/serve.integration.test.js +218 -2
  63. package/dist/tests/session-ui.test.js +21 -0
  64. package/dist/tests/session.test.js +50 -0
  65. package/dist/tests/settings-ui.test.js +30 -0
  66. package/dist/tests/structural-analysis.test.js +218 -0
  67. package/node_modules/@minicode/agent-sdk/README.md +80 -51
  68. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts +16 -5
  69. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts.map +1 -1
  70. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js +51 -33
  71. package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js.map +1 -1
  72. package/node_modules/@minicode/agent-sdk/dist/src/agent/types.d.ts +14 -0
  73. package/node_modules/@minicode/agent-sdk/dist/src/agent/types.d.ts.map +1 -1
  74. package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts +3 -2
  75. package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts.map +1 -1
  76. package/node_modules/@minicode/agent-sdk/dist/src/index.js +2 -0
  77. package/node_modules/@minicode/agent-sdk/dist/src/index.js.map +1 -1
  78. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.d.ts +35 -0
  79. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.d.ts.map +1 -0
  80. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.js +64 -0
  81. package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.js.map +1 -0
  82. package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts +7 -0
  83. package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts.map +1 -1
  84. package/node_modules/@minicode/agent-sdk/dist/src/model/client.d.ts +5 -1
  85. package/node_modules/@minicode/agent-sdk/dist/src/model/client.d.ts.map +1 -1
  86. package/node_modules/@minicode/agent-sdk/dist/src/model/client.js +83 -11
  87. package/node_modules/@minicode/agent-sdk/dist/src/model/client.js.map +1 -1
  88. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.d.ts +1 -0
  89. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.d.ts.map +1 -1
  90. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.js +8 -1
  91. package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.js.map +1 -1
  92. package/node_modules/@minicode/agent-sdk/dist/src/session/session.d.ts.map +1 -1
  93. package/node_modules/@minicode/agent-sdk/dist/src/session/session.js +4 -1
  94. package/node_modules/@minicode/agent-sdk/dist/src/session/session.js.map +1 -1
  95. package/node_modules/@minicode/agent-sdk/dist/tests/agent.test.js +3 -1
  96. package/node_modules/@minicode/agent-sdk/dist/tests/agent.test.js.map +1 -1
  97. package/node_modules/@minicode/agent-sdk/dist/tests/guardrails.test.js +8 -2
  98. package/node_modules/@minicode/agent-sdk/dist/tests/guardrails.test.js.map +1 -1
  99. package/node_modules/@minicode/agent-sdk/dist/tsconfig.tsbuildinfo +1 -1
  100. package/package.json +9 -5
  101. package/plugin/.claude-plugin/plugin.json +12 -0
  102. package/plugin/.mcp.json +8 -0
  103. package/plugin/CLAUDE.md +26 -0
  104. package/plugin/skills/analyze/SKILL.md +12 -0
  105. package/plugin/skills/focus/SKILL.md +20 -0
  106. package/plugin/skills/graph/SKILL.md +13 -0
  107. package/plugin/skills/symbols/SKILL.md +13 -0
package/README.md CHANGED
@@ -1,13 +1,13 @@
1
1
  # minicode
2
2
 
3
- A lightweight coding agent optimized for **local models** CLI-first with a built-in web UI. Provides AST-based intelligent context for smaller models running on consumer hardware.
3
+ A graph-native coding agent and code exploration environment built around structural context optimization. It started as a way to make local models viable under tighter context budgets, and it now also works well with hosted frontier models through the same runtime, web UI, and OpenAI-compatible serve mode.
4
4
 
5
- _New Web UI interface with code dependency graph visualizer. Updates in real time as agent explores the codebase. Run `minicode serve` to interact over localhost._
5
+ _Run `minicode serve` to get the web UI on localhost: chat, tool activity, session controls, model switching, symbol focus, annotations, and a live dependency graph._
6
6
 
7
7
  <img width="1723" height="920" alt="Screenshot 2026-03-26 at 6 30 23 PM" src="https://github.com/user-attachments/assets/499c8dc7-cc2b-4125-abd5-32b2fc9795ea" />
8
8
 
9
9
 
10
- Read operations dominate token usage in typical agent sessions; minicode addresses this by optimizing for **specific languages** indexing your project at startup with language plugins (TypeScript/JavaScript built-in) and injecting a compact **code map** (signatures only) into the system prompt, plus symbol-level tools (`read_symbol`, `find_references`, `get_dependencies`) so the model reads only what it needs instead of entire files. This keeps prompts lean enough for smaller models in the 20B range, with faster inference and better attention over the relevant code.
10
+ Read operations dominate token usage in typical agent sessions; minicode addresses this by optimizing for **specific languages**. It indexes your project at startup with language plugins, injects a compact **code map** (signatures only) into the system prompt, and exposes symbol-level tools (`read_symbol`, `find_references`, `get_dependencies`) so the model reads only what it needs instead of entire files. TypeScript and JavaScript support come built-in, with custom language plugins leaving room for broader language support over time.
11
11
 
12
12
  ## Quick Start (LM Studio)
13
13
 
@@ -17,22 +17,11 @@ Read operations dominate token usage in typical agent sessions; minicode address
17
17
  # 2. Install
18
18
  npm install -g @sean.holung/minicode
19
19
 
20
- # 3. Configure for local (no API key needed)
21
- mkdir -p ~/.minicode
20
+ # 3. Configure (~/.minicode/agent.config.json is auto-created on first run)
21
+ # Set your model name — minicode will prompt you if this is missing.
22
22
  cat > ~/.minicode/.env << 'EOF'
23
23
  MODEL_PROVIDER=openai-compatible
24
- MODEL=zai-org/glm-4.7-flash
25
- OPENAI_BASE_URL=http://localhost:1234/v1
26
- OPENAI_API_KEY=
27
- MAX_STEPS=50
28
- MAX_TOKENS=4096
29
- MAX_CONTEXT_TOKENS=24000
30
- WORKSPACE_ROOT=.
31
- COMMAND_TIMEOUT_MS=30000
32
- MAX_FILE_SIZE_BYTES=1000000
33
- CONFIRM_DESTRUCTIVE=true
34
- KEEP_RECENT_MESSAGES=12
35
- LOOP_DETECTION_WINDOW=6
24
+ MODEL=your-model-name
36
25
  EOF
37
26
  ```
38
27
 
@@ -97,7 +86,7 @@ npm run install:global
97
86
  - **Web UI** — `minicode serve` starts an HTTP + WebSocket server with a bundled chat client, real-time streaming, session management, and project graph data endpoints
98
87
  - **OpenAI-compatible API** — any client that speaks the OpenAI protocol can use minicode as a backend at `/v1/chat/completions`
99
88
  - **Context optimization:** Code map in system prompt, `read_symbol`, `find_references`, `get_dependencies`
100
- - **Plugin system:** Extensible language support (TypeScript built-in)
89
+ - **Plugin system:** Extensible language support (TypeScript/JavaScript built in today)
101
90
 
102
91
  ## Context Optimization
103
92
 
@@ -110,9 +99,9 @@ For the proposed reusable package architecture and public interfaces for a stand
110
99
  minicode reduces token usage by indexing your project and providing targeted tools:
111
100
 
112
101
  - **Code map** — A compact project skeleton (signatures only) is injected into the system prompt so the model can orient itself without reading full files.
113
- - `**read_symbol`** — Read a specific function or class by name, with referenced types.
114
- - `**find_references**` — Find all symbols that reference a given symbol.
115
- - `**get_dependencies**` — Get the dependency cone of a symbol.
102
+ - `read_symbol` — Read a specific function or class by name, with referenced types.
103
+ - `find_references` — Find all symbols that reference a given symbol.
104
+ - `get_dependencies` — Get the dependency cone of a symbol.
116
105
 
117
106
  The index is cached in `~/.minicode/cache/<workspace-hash>/` for faster startup on subsequent runs. Caches are global and keyed by workspace path, so nothing is stored inside your project directories.
118
107
 
@@ -175,9 +164,9 @@ See [docs/PLUGIN_SPEC.md](docs/PLUGIN_SPEC.md) for the full specification. Quick
175
164
 
176
165
  Configuration can come from (later sources override earlier):
177
166
 
178
- 1. `**~/.minicode/.env`** — User-level defaults (API keys, model, etc.)
179
- 2. `**~/.minicode/agent.config.json**` — User-level JSON config
180
- 3. **Project `.env`** and `**agent.config.json**` in workspace root
167
+ 1. `~/.minicode/.env` — User-level defaults (API keys, model, etc.)
168
+ 2. `~/.minicode/agent.config.json` — User-level JSON config
169
+ 3. Project `.env` and `agent.config.json` in workspace root
181
170
  4. Environment variables (highest precedence)
182
171
 
183
172
  Nothing is written inside your workspace; config and cache live under `~/.minicode/`.
@@ -188,45 +177,41 @@ Nothing is written inside your workspace; config and cache live under `~/.minico
188
177
  | Variable | Required | Default | Notes |
189
178
  | ----------------------- | --------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
190
179
  | `MODEL_PROVIDER` | No | `openai-compatible` | `anthropic` or `openai-compatible` (aliases: `openai`, `lmstudio`, `lm-studio`) |
191
- | `MODEL` | No | `zai-org/glm-4.7-flash` | Model name for selected provider |
180
+ | `MODEL` | Yes | none | Model name for selected provider |
192
181
  | `ANTHROPIC_API_KEY` | Yes (Anthropic) | none | Required when `MODEL_PROVIDER=anthropic` |
193
182
  | `OPENAI_BASE_URL` | No | `http://localhost:1234/v1` | Base URL for OpenAI-compatible API (LM Studio, etc.) |
194
183
  | `OPENAI_API_KEY` | No | none | Optional for local servers; required if your endpoint enforces auth |
184
+ | `OPENROUTER_API_KEY` | No | none | Preferred key when `OPENAI_BASE_URL` points at OpenRouter; falls back to `OPENAI_API_KEY` if unset |
195
185
  | `MAX_STEPS` | No | `50` | Max agent loop iterations per user turn |
196
186
  | `MAX_TOKENS` | No | `4096` | Max model output tokens per model call |
197
- | `MAX_CONTEXT_TOKENS` | No | `120000` | Approximate session history trimming target. For small models (e.g. 8k context), set lower (e.g. `6000`) to leave room for responses. |
198
- | `MAX_TOOL_OUTPUT_CHARS` | No | `15000` | Max chars per tool result before truncation. Set to `0` to disable. |
199
- | `WORKSPACE_ROOT` | No | current working directory | Root directory tools are allowed to access |
187
+ | `MAX_CONTEXT_TOKENS` | No | `32000` | Approximate session history trimming target. For small models (e.g. 8k context), set lower (e.g. `6000`) to leave room for responses. |
188
+ | `MAX_TOOL_OUTPUT_CHARS` | No | `8000` | Max chars per tool result before truncation. Set to `0` to disable. |
189
+ | `WORKSPACE_ROOT` | No | current working directory | Root directory tools are allowed to access (set at runtime, not typically configured) |
200
190
  | `COMMAND_TIMEOUT_MS` | No | `30000` | Timeout for shell/search commands |
201
191
  | `MAX_FILE_SIZE_BYTES` | No | `1000000` | Read limit for `read_file` |
202
192
  | `CONFIRM_DESTRUCTIVE` | No | `true` | If `true`, blocks destructive shell commands unless confirmed |
203
193
  | `KEEP_RECENT_MESSAGES` | No | `12` | Minimum number of latest messages kept during trimming |
204
194
  | `LOOP_DETECTION_WINDOW` | No | `6` | Window for repeated tool-call loop detection |
195
+ | `ENABLE_FILE_READ_DEDUP` | No | `true` | Reuses earlier `read_file` results within a turn when the same file slice is still in context |
196
+ | `ENABLE_ADAPTIVE_KEEP_RECENT` | No | `true` | Scales `keepRecentMessages` down as context fills so trimming gets more aggressive when needed |
197
+ | `ENABLE_TOOL_OUTPUT_TRUNCATION` | No | `true` | Enables content-aware truncation strategies for tool output instead of simple head-only clipping |
205
198
  | `COMPACTION_THRESHOLD` | No | `0.8` | Context fullness ratio (0–1) at which auto-compaction triggers |
206
199
  | `COMPACTION_MODEL` | No | none | Model for LLM-based compaction summaries. When set, `/compact` and auto-compaction use this model instead of mechanical truncation. Use a small, fast model (e.g. your local model). |
200
+ | `REASONING_EFFORT` | No | unset | Reasoning level for providers that support it. Valid values: `xhigh`, `high`, `medium`, `low`, `minimal`, `none` |
207
201
 
208
202
 
209
203
  ### `agent.config.json`
210
204
 
211
- Create `agent.config.json` in `~/.minicode/` for user-level defaults, or in the project root for workspace-specific overrides:
205
+ A global `~/.minicode/agent.config.json` is auto-created on first run. Only set what you need everything has sensible defaults:
212
206
 
213
207
  ```json
214
208
  {
215
209
  "modelProvider": "openai-compatible",
216
- "model": "zai-org/glm-4.7-flash",
210
+ "model": "your-model-name",
211
+ "openAiBaseUrl": "http://localhost:1234/v1",
217
212
  "maxSteps": 50,
218
213
  "maxTokens": 4096,
219
- "maxContextTokens": 120000,
220
- "workspaceRoot": ".",
221
- "commandTimeout": 30000,
222
- "commandDenylist": [],
223
- "confirmDestructive": true,
224
- "maxFileSizeBytes": 1000000,
225
- "keepRecentMessages": 12,
226
- "loopDetectionWindow": 6,
227
- "openAiBaseUrl": "http://localhost:1234/v1",
228
- "openAiApiKey": "",
229
- "compactionModel": ""
214
+ "maxContextTokens": 32000
230
215
  }
231
216
  ```
232
217
 
@@ -245,9 +230,14 @@ Field mapping:
245
230
  - `keepRecentMessages` ↔ `KEEP_RECENT_MESSAGES`
246
231
  - `loopDetectionWindow` ↔ `LOOP_DETECTION_WINDOW`
247
232
  - `openAiBaseUrl` ↔ `OPENAI_BASE_URL`
248
- - `openAiApiKey` ↔ `OPENAI_API_KEY`
233
+ - `openAiApiKey` ↔ `OPENAI_API_KEY` / `OPENROUTER_API_KEY` (when using OpenRouter)
234
+ - `maxToolOutputChars` ↔ `MAX_TOOL_OUTPUT_CHARS`
235
+ - `enableFileReadDedup` ↔ `ENABLE_FILE_READ_DEDUP`
236
+ - `enableAdaptiveKeepRecent` ↔ `ENABLE_ADAPTIVE_KEEP_RECENT`
237
+ - `enableToolOutputTruncation` ↔ `ENABLE_TOOL_OUTPUT_TRUNCATION`
249
238
  - `compactionThreshold` ↔ `COMPACTION_THRESHOLD`
250
239
  - `compactionModel` ↔ `COMPACTION_MODEL`
240
+ - `reasoningEffort` ↔ `REASONING_EFFORT`
251
241
 
252
242
  ## Usage
253
243
 
@@ -278,6 +268,19 @@ npm run dev -- --oneshot --json "Summarize TODOs"
278
268
  npm run dev -- --oneshot --out result.txt "Draft changelog"
279
269
  ```
280
270
 
271
+ Interactive slash commands:
272
+
273
+ - `/help`
274
+ - `/config`
275
+ - `/compact`
276
+ - `/reasoning [level]`
277
+ - `/models`
278
+ - `/model [name]`
279
+ - `/save [label]`
280
+ - `/load [label]`
281
+ - `/sessions`
282
+ - `/exit`
283
+
281
284
  ### Exit codes
282
285
 
283
286
  - `0`: Success
@@ -289,7 +292,9 @@ npm run dev -- --oneshot --out result.txt "Draft changelog"
289
292
  - `npm run dev` - start the CLI in TypeScript mode
290
293
  - `npm run dev:ink` - start with Ink UI (same as `dev` when in a TTY; use to override `CLI_UI_MODE=legacy`)
291
294
  - `npm run build` - compile TypeScript to `dist/`
295
+ - `npm run build:web` - build the bundled web client used by `minicode serve`
292
296
  - `npm start` - run compiled CLI
297
+ - `npm run install:global` - build and `npm link` the CLI locally
293
298
  - `npm run lint` - run ESLint on TypeScript source and tests
294
299
  - `npm test` - run Node test suite
295
-
300
+ - `npm run verify-index` - run the TypeScript index verification harness
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * CLI entry point for running benchmark tasks.
4
+ *
5
+ * Usage:
6
+ * node --import tsx scripts/run-benchmarks.ts [options]
7
+ *
8
+ * Options:
9
+ * --category <name> Run only tasks in the given category
10
+ * --task <id> Run a single task by id (e.g. "navigation/find-symbol-definition")
11
+ * --variant <label> Variant label for the report (default: "ci")
12
+ * --out <path> Write the JSON report to a file
13
+ *
14
+ * Environment:
15
+ * MODEL_PROVIDER, MODEL, OPENAI_BASE_URL, OPENAI_API_KEY, ANTHROPIC_API_KEY
16
+ * — same as minicode runtime config.
17
+ */
18
+ import path from "node:path";
19
+ import { writeFile } from "node:fs/promises";
20
+ import { createModelClient, createReadFileTool, createWriteFileTool, createEditFileTool, createSearchTool, createListFilesTool, createRunCommandTool, } from "@minicode/agent-sdk";
21
+ import { loadBenchmarkTasks, loadBenchmarkTask } from "../src/benchmark/task-loader.js";
22
+ import { runBenchmarkSuite } from "../src/benchmark/runner.js";
23
+ import { buildReport, formatReport } from "../src/benchmark/reporter.js";
24
+ export function parseArgs(argv) {
25
+ const args = { variant: "ci" };
26
+ for (let i = 0; i < argv.length; i++) {
27
+ const arg = argv[i];
28
+ const next = argv[i + 1];
29
+ if (arg === "--category" && next) {
30
+ args.category = next;
31
+ i++;
32
+ }
33
+ else if (arg === "--task" && next) {
34
+ args.task = next;
35
+ i++;
36
+ }
37
+ else if (arg === "--variant" && next) {
38
+ args.variant = next;
39
+ i++;
40
+ }
41
+ else if (arg === "--out" && next) {
42
+ args.out = next;
43
+ i++;
44
+ }
45
+ }
46
+ return args;
47
+ }
48
+ /* ------------------------------------------------------------------ */
49
+ /* Config builder */
50
+ /* ------------------------------------------------------------------ */
51
+ export function buildConfig() {
52
+ const provider = (process.env.MODEL_PROVIDER ?? "openai-compatible");
53
+ const model = process.env.MODEL ?? "test-model";
54
+ return {
55
+ modelProvider: provider,
56
+ model,
57
+ maxSteps: Number(process.env.MAX_STEPS ?? "50"),
58
+ maxTokens: Number(process.env.MAX_TOKENS ?? "4096"),
59
+ maxContextTokens: Number(process.env.MAX_CONTEXT_TOKENS ?? "32000"),
60
+ workspaceRoot: process.cwd(),
61
+ commandTimeoutMs: Number(process.env.COMMAND_TIMEOUT_MS ?? "30000"),
62
+ maxFileSizeBytes: Number(process.env.MAX_FILE_SIZE_BYTES ?? "1000000"),
63
+ commandDenylist: [],
64
+ confirmDestructive: false,
65
+ keepRecentMessages: Number(process.env.KEEP_RECENT_MESSAGES ?? "12"),
66
+ loopDetectionWindow: Number(process.env.LOOP_DETECTION_WINDOW ?? "6"),
67
+ maxToolOutputChars: Number(process.env.MAX_TOOL_OUTPUT_CHARS ?? "8000"),
68
+ openAiBaseUrl: process.env.OPENAI_BASE_URL ?? "http://localhost:1234/v1",
69
+ ...(process.env.OPENAI_API_KEY ? { openAiApiKey: process.env.OPENAI_API_KEY } : {}),
70
+ };
71
+ }
72
+ /* ------------------------------------------------------------------ */
73
+ /* Task loading */
74
+ /* ------------------------------------------------------------------ */
75
+ export async function loadTasks(tasksDir, args) {
76
+ if (args.task) {
77
+ const single = await loadBenchmarkTask(tasksDir, args.task);
78
+ if (!single) {
79
+ throw new Error(`Task not found: ${args.task}`);
80
+ }
81
+ return [single];
82
+ }
83
+ let tasks = await loadBenchmarkTasks(tasksDir);
84
+ if (args.category) {
85
+ tasks = tasks.filter((t) => t.category === args.category);
86
+ if (tasks.length === 0) {
87
+ throw new Error(`No tasks found for category: ${args.category}`);
88
+ }
89
+ }
90
+ return tasks;
91
+ }
92
+ /* ------------------------------------------------------------------ */
93
+ /* Main */
94
+ /* ------------------------------------------------------------------ */
95
+ async function main() {
96
+ const args = parseArgs(process.argv.slice(2));
97
+ const config = buildConfig();
98
+ const tasksDir = path.resolve(process.cwd(), "benchmarks", "tasks");
99
+ console.log(`Benchmark runner starting...`);
100
+ console.log(` Provider: ${config.modelProvider}`);
101
+ console.log(` Model: ${config.model}`);
102
+ console.log(` Variant: ${args.variant}`);
103
+ const tasks = await loadTasks(tasksDir, args);
104
+ console.log(` Tasks: ${tasks.length}`);
105
+ console.log("");
106
+ const modelClient = createModelClient(config);
107
+ const tools = [
108
+ createReadFileTool(config),
109
+ createWriteFileTool(config),
110
+ createEditFileTool(config),
111
+ createSearchTool(config),
112
+ createListFilesTool(config),
113
+ createRunCommandTool(config),
114
+ ];
115
+ const traces = await runBenchmarkSuite(tasks, {
116
+ modelClient,
117
+ config,
118
+ tools,
119
+ variant: args.variant,
120
+ onTaskComplete: (taskId, trace) => {
121
+ const dur = (trace.durationMs / 1000).toFixed(1);
122
+ console.log(` [done] ${taskId} (${dur}s, ${trace.toolCalls.length} tool calls)`);
123
+ },
124
+ });
125
+ const report = buildReport(tasks, traces, args.variant, config.model);
126
+ const formatted = formatReport(report);
127
+ console.log("");
128
+ console.log(formatted);
129
+ if (args.out) {
130
+ const outPath = path.resolve(args.out);
131
+ await writeFile(outPath, JSON.stringify(report, null, 2), "utf8");
132
+ console.log(`\nReport written to ${outPath}`);
133
+ }
134
+ // Exit with failure if any task failed
135
+ if (report.summary.failed > 0) {
136
+ process.exitCode = 1;
137
+ }
138
+ }
139
+ // Only run main when executed directly (not imported for testing)
140
+ const isDirectRun = process.argv[1]?.endsWith("run-benchmarks.ts") ||
141
+ process.argv[1]?.endsWith("run-benchmarks.js");
142
+ if (isDirectRun) {
143
+ main().catch((err) => {
144
+ console.error("Benchmark runner failed:", err);
145
+ process.exitCode = 1;
146
+ });
147
+ }
@@ -1,8 +1,7 @@
1
- import { access, readFile } from "node:fs/promises";
1
+ import { access, mkdir, readFile, writeFile } from "node:fs/promises";
2
2
  import os from "node:os";
3
3
  import path from "node:path";
4
4
  import process from "node:process";
5
- import { fileURLToPath } from "node:url";
6
5
  import dotenv from "dotenv";
7
6
  /** User-level config directory: ~/.minicode */
8
7
  export const MINICODE_HOME = path.join(os.homedir(), ".minicode");
@@ -32,17 +31,52 @@ export function formatConfigForDisplay(config) {
32
31
  "enableToolOutputTruncation: " + (config.enableToolOutputTruncation ?? false),
33
32
  "compactionThreshold: " + (config.compactionThreshold ?? "(disabled)"),
34
33
  "compactionModel: " + (config.compactionModel ?? "(disabled — using mechanical compaction)"),
34
+ "reasoningEffort: " + (config.reasoningEffort ?? "(unset — no reasoning parameters sent)"),
35
+ "enableDynamicPrompt: " + (config.enableDynamicPrompt ?? true),
35
36
  ];
36
37
  return lines.join("\n");
37
38
  }
38
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
39
- const envPath = __dirname.includes(`${path.sep}dist${path.sep}`)
40
- ? path.resolve(__dirname, "../../../.env")
41
- : path.resolve(__dirname, "../../.env");
42
- // Load order: user home (~/.minicode/.env) < project .env < cwd .env
43
- dotenv.config({ path: path.join(MINICODE_HOME, ".env") });
44
- dotenv.config({ path: envPath, override: true });
45
- dotenv.config({ path: path.resolve(process.cwd(), ".env"), override: true });
39
+ /**
40
+ * Check if the config has enough information to connect to a model provider.
41
+ * Returns null if valid, or a user-facing setup message if not.
42
+ */
43
+ /**
44
+ * Return a list of missing config items that prevent the agent from running.
45
+ * Empty array means the config is valid.
46
+ */
47
+ export function getConfigMissing(config) {
48
+ const missing = [];
49
+ if (!config.model) {
50
+ missing.push("MODEL is not set");
51
+ }
52
+ if (config.modelProvider === "anthropic" && !process.env.ANTHROPIC_API_KEY) {
53
+ missing.push("ANTHROPIC_API_KEY is not set");
54
+ }
55
+ return missing;
56
+ }
57
+ export function getConfigSetupMessage(config) {
58
+ const missing = getConfigMissing(config);
59
+ if (missing.length === 0) {
60
+ return null;
61
+ }
62
+ return [
63
+ "minicode is not configured yet. Missing:",
64
+ ...missing.map((m) => ` - ${m}`),
65
+ "",
66
+ `Set these in ~/.minicode/.env or as environment variables.`,
67
+ `Edit ~/.minicode/agent.config.json for non-secret settings.`,
68
+ "",
69
+ "Example ~/.minicode/.env for a local model:",
70
+ " MODEL_PROVIDER=openai-compatible",
71
+ " OPENAI_BASE_URL=http://localhost:1234/v1",
72
+ " MODEL=your-model-name",
73
+ "",
74
+ "Example for Anthropic:",
75
+ " MODEL_PROVIDER=anthropic",
76
+ " ANTHROPIC_API_KEY=sk-ant-...",
77
+ " MODEL=claude-sonnet-4-20250514",
78
+ ].join("\n");
79
+ }
46
80
  const DEFAULT_COMMAND_DENYLIST = [
47
81
  /\brm\s+-rf\s+\//i,
48
82
  /\bmkfs\b/i,
@@ -54,6 +88,15 @@ const DEFAULT_COMMAND_DENYLIST = [
54
88
  /\binit\s+0\b/i,
55
89
  /\bchmod\s+-R\s+777\s+\//i,
56
90
  ];
91
+ const VALID_REASONING_EFFORTS = new Set([
92
+ "xhigh", "high", "medium", "low", "minimal", "none",
93
+ ]);
94
+ function parseReasoningEffort(value) {
95
+ if (!value)
96
+ return undefined;
97
+ const normalized = value.trim().toLowerCase();
98
+ return VALID_REASONING_EFFORTS.has(normalized) ? normalized : undefined;
99
+ }
57
100
  function parseNumber(value, fallback) {
58
101
  if (!value) {
59
102
  return fallback;
@@ -74,7 +117,7 @@ function parseBoolean(value, fallback) {
74
117
  }
75
118
  return fallback;
76
119
  }
77
- async function loadConfigFile(configPath) {
120
+ export async function loadConfigFile(configPath) {
78
121
  try {
79
122
  await access(configPath);
80
123
  }
@@ -88,6 +131,48 @@ async function loadConfigFile(configPath) {
88
131
  }
89
132
  return parsed;
90
133
  }
134
+ async function loadDotenvFile(envPath) {
135
+ try {
136
+ const file = await readFile(envPath, "utf8");
137
+ return dotenv.parse(file);
138
+ }
139
+ catch {
140
+ return {};
141
+ }
142
+ }
143
+ function applyEnvLayer(target, sources, layer, source, override) {
144
+ for (const [key, value] of Object.entries(layer)) {
145
+ if (!override && target[key] !== undefined) {
146
+ continue;
147
+ }
148
+ target[key] = value;
149
+ sources[key] = source;
150
+ }
151
+ }
152
+ function applyProcessEnv(target, sources) {
153
+ for (const [key, value] of Object.entries(process.env)) {
154
+ if (value === undefined) {
155
+ continue;
156
+ }
157
+ target[key] = value;
158
+ sources[key] = "process";
159
+ }
160
+ }
161
+ export async function resolveConfigEnv(options = {}) {
162
+ const minicodeHome = options.minicodeHome ?? MINICODE_HOME;
163
+ const homeEnvPath = path.join(minicodeHome, ".env");
164
+ const values = {};
165
+ const sources = {};
166
+ // Base: ~/.minicode/.env
167
+ applyEnvLayer(values, sources, await loadDotenvFile(homeEnvPath), "home-dotenv", true);
168
+ // Override: shell environment variables take precedence
169
+ applyProcessEnv(values, sources);
170
+ return {
171
+ values,
172
+ sources,
173
+ homeEnvPath,
174
+ };
175
+ }
91
176
  function parseUserDenylist(patterns) {
92
177
  if (!patterns?.length) {
93
178
  return [];
@@ -113,51 +198,75 @@ function parseModelProvider(value) {
113
198
  }
114
199
  return "anthropic";
115
200
  }
116
- export async function loadAgentConfig(cwd = process.cwd()) {
117
- const homeConfigPath = path.join(MINICODE_HOME, "agent.config.json");
118
- const workspaceConfigPath = path.resolve(cwd, "agent.config.json");
119
- const homeConfig = await loadConfigFile(homeConfigPath);
120
- const workspaceConfig = await loadConfigFile(workspaceConfigPath);
121
- const fileConfig = { ...homeConfig, ...workspaceConfig };
122
- const rawWorkspaceRoot = process.env.WORKSPACE_ROOT ?? fileConfig.workspaceRoot ?? cwd;
201
+ const DEFAULT_CONFIG_CONTENT = `{
202
+ "modelProvider": "openai-compatible",
203
+ "model": "",
204
+ "openAiBaseUrl": "http://localhost:1234/v1",
205
+ "maxSteps": 50,
206
+ "maxTokens": 4096,
207
+ "maxContextTokens": 32000
208
+ }
209
+ `;
210
+ async function ensureMinicodeHome(minicodeHome) {
211
+ await mkdir(minicodeHome, { recursive: true });
212
+ const configPath = path.join(minicodeHome, "agent.config.json");
213
+ try {
214
+ await access(configPath);
215
+ }
216
+ catch {
217
+ await writeFile(configPath, DEFAULT_CONFIG_CONTENT, "utf8");
218
+ }
219
+ }
220
+ export async function loadAgentConfig(cwd = process.cwd(), options = {}) {
221
+ const minicodeHome = options.minicodeHome ?? MINICODE_HOME;
222
+ await ensureMinicodeHome(minicodeHome);
223
+ const homeConfigPath = path.join(minicodeHome, "agent.config.json");
224
+ const fileConfig = await loadConfigFile(homeConfigPath);
225
+ const env = (await resolveConfigEnv({ minicodeHome })).values;
226
+ const rawWorkspaceRoot = env.WORKSPACE_ROOT ?? fileConfig.workspaceRoot ?? cwd;
123
227
  const workspaceRoot = path.resolve(cwd, rawWorkspaceRoot);
124
228
  const commandDenylist = [
125
229
  ...DEFAULT_COMMAND_DENYLIST,
126
230
  ...parseUserDenylist(fileConfig.commandDenylist),
127
231
  ];
128
- const rawBaseUrl = process.env.OPENAI_BASE_URL ??
232
+ const rawBaseUrl = env.OPENAI_BASE_URL ??
129
233
  fileConfig.openAiBaseUrl ??
130
234
  "http://localhost:1234/v1";
131
235
  const isOpenRouter = rawBaseUrl.includes("openrouter");
132
236
  const openAiApiKey = isOpenRouter
133
- ? (process.env.OPENROUTER_API_KEY ??
134
- process.env.OPENAI_API_KEY ??
237
+ ? (env.OPENROUTER_API_KEY ??
238
+ env.OPENAI_API_KEY ??
135
239
  fileConfig.openAiApiKey)
136
- : (process.env.OPENAI_API_KEY ?? fileConfig.openAiApiKey);
240
+ : (env.OPENAI_API_KEY ?? fileConfig.openAiApiKey);
137
241
  return {
138
- modelProvider: parseModelProvider(process.env.MODEL_PROVIDER ?? fileConfig.modelProvider ?? "openai-compatible"),
139
- model: process.env.MODEL ??
242
+ modelProvider: parseModelProvider(env.MODEL_PROVIDER ?? fileConfig.modelProvider ?? "openai-compatible"),
243
+ model: env.MODEL ??
140
244
  fileConfig.model ??
141
- "zai-org/glm-4.7-flash",
142
- maxSteps: parseNumber(process.env.MAX_STEPS, fileConfig.maxSteps ?? 50),
143
- maxTokens: parseNumber(process.env.MAX_TOKENS, fileConfig.maxTokens ?? 4096),
144
- maxContextTokens: parseNumber(process.env.MAX_CONTEXT_TOKENS, fileConfig.maxContextTokens ?? 40_000),
245
+ "",
246
+ maxSteps: parseNumber(env.MAX_STEPS, fileConfig.maxSteps ?? 50),
247
+ maxTokens: parseNumber(env.MAX_TOKENS, fileConfig.maxTokens ?? 4096),
248
+ maxContextTokens: parseNumber(env.MAX_CONTEXT_TOKENS, fileConfig.maxContextTokens ?? 32_000),
145
249
  workspaceRoot,
146
- commandTimeoutMs: parseNumber(process.env.COMMAND_TIMEOUT_MS, fileConfig.commandTimeout ?? 30_000),
147
- maxFileSizeBytes: parseNumber(process.env.MAX_FILE_SIZE_BYTES, fileConfig.maxFileSizeBytes ?? 1_000_000),
250
+ commandTimeoutMs: parseNumber(env.COMMAND_TIMEOUT_MS, fileConfig.commandTimeout ?? 30_000),
251
+ maxFileSizeBytes: parseNumber(env.MAX_FILE_SIZE_BYTES, fileConfig.maxFileSizeBytes ?? 1_000_000),
148
252
  commandDenylist,
149
- confirmDestructive: parseBoolean(process.env.CONFIRM_DESTRUCTIVE, fileConfig.confirmDestructive ?? true),
150
- keepRecentMessages: parseNumber(process.env.KEEP_RECENT_MESSAGES, fileConfig.keepRecentMessages ?? 12),
151
- loopDetectionWindow: parseNumber(process.env.LOOP_DETECTION_WINDOW, fileConfig.loopDetectionWindow ?? 6),
152
- maxToolOutputChars: parseNumber(process.env.MAX_TOOL_OUTPUT_CHARS, fileConfig.maxToolOutputChars ?? 8_000),
253
+ confirmDestructive: parseBoolean(env.CONFIRM_DESTRUCTIVE, fileConfig.confirmDestructive ?? true),
254
+ keepRecentMessages: parseNumber(env.KEEP_RECENT_MESSAGES, fileConfig.keepRecentMessages ?? 12),
255
+ loopDetectionWindow: parseNumber(env.LOOP_DETECTION_WINDOW, fileConfig.loopDetectionWindow ?? 6),
256
+ maxToolOutputChars: parseNumber(env.MAX_TOOL_OUTPUT_CHARS, fileConfig.maxToolOutputChars ?? 8_000),
153
257
  openAiBaseUrl: rawBaseUrl,
154
258
  ...(openAiApiKey !== undefined ? { openAiApiKey } : {}),
155
- enableFileReadDedup: parseBoolean(process.env.ENABLE_FILE_READ_DEDUP, fileConfig.enableFileReadDedup ?? true),
156
- enableAdaptiveKeepRecent: parseBoolean(process.env.ENABLE_ADAPTIVE_KEEP_RECENT, fileConfig.enableAdaptiveKeepRecent ?? true),
157
- enableToolOutputTruncation: parseBoolean(process.env.ENABLE_TOOL_OUTPUT_TRUNCATION, fileConfig.enableToolOutputTruncation ?? true),
158
- compactionThreshold: parseNumber(process.env.COMPACTION_THRESHOLD, fileConfig.compactionThreshold ?? 0.8),
159
- ...(process.env.COMPACTION_MODEL ?? fileConfig.compactionModel
160
- ? { compactionModel: process.env.COMPACTION_MODEL ?? fileConfig.compactionModel }
259
+ enableFileReadDedup: parseBoolean(env.ENABLE_FILE_READ_DEDUP, fileConfig.enableFileReadDedup ?? true),
260
+ enableAdaptiveKeepRecent: parseBoolean(env.ENABLE_ADAPTIVE_KEEP_RECENT, fileConfig.enableAdaptiveKeepRecent ?? true),
261
+ enableToolOutputTruncation: parseBoolean(env.ENABLE_TOOL_OUTPUT_TRUNCATION, fileConfig.enableToolOutputTruncation ?? true),
262
+ compactionThreshold: parseNumber(env.COMPACTION_THRESHOLD, fileConfig.compactionThreshold ?? 0.8),
263
+ ...(env.COMPACTION_MODEL ?? fileConfig.compactionModel
264
+ ? { compactionModel: env.COMPACTION_MODEL ?? fileConfig.compactionModel }
161
265
  : {}),
266
+ enableDynamicPrompt: parseBoolean(env.ENABLE_DYNAMIC_PROMPT, fileConfig.enableDynamicPrompt ?? true),
267
+ ...(() => {
268
+ const effort = parseReasoningEffort(env.REASONING_EFFORT ?? fileConfig.reasoningEffort);
269
+ return effort ? { reasoningEffort: effort } : {};
270
+ })(),
162
271
  };
163
272
  }