@sean.holung/minicode 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -42
- package/dist/scripts/run-benchmarks.js +147 -0
- package/dist/src/agent/config.js +149 -40
- package/dist/src/agent/editable-config.js +314 -0
- package/dist/src/analysis/structural-analysis.js +379 -0
- package/dist/src/benchmark/evaluator.js +79 -0
- package/dist/src/benchmark/index.js +4 -0
- package/dist/src/benchmark/reporter.js +177 -0
- package/dist/src/benchmark/runner.js +100 -0
- package/dist/src/benchmark/task-loader.js +78 -0
- package/dist/src/benchmark/types.js +5 -0
- package/dist/src/cli/args.js +10 -0
- package/dist/src/cli/config-slash-command.js +135 -0
- package/dist/src/cli/plugin-install.js +69 -0
- package/dist/src/index.js +76 -6
- package/dist/src/indexer/cache.js +6 -4
- package/dist/src/indexer/code-map.js +41 -13
- package/dist/src/indexer/plugins/typescript.js +70 -23
- package/dist/src/indexer/project-index.js +175 -36
- package/dist/src/indexer/symbol-names.js +92 -0
- package/dist/src/model-utils.js +18 -0
- package/dist/src/serve/agent-bridge.js +203 -24
- package/dist/src/serve/mcp-server.js +405 -0
- package/dist/src/serve/server.js +165 -10
- package/dist/src/serve/websocket.js +8 -0
- package/dist/src/shared/graph-styles.js +119 -0
- package/dist/src/tools/find-path.js +75 -0
- package/dist/src/tools/find-references.js +7 -2
- package/dist/src/tools/get-dependencies.js +3 -2
- package/dist/src/tools/read-symbol.js +12 -5
- package/dist/src/tools/registry.js +3 -1
- package/dist/src/tools/search-code-map.js +4 -2
- package/dist/src/ui/app.js +1 -1
- package/dist/src/ui/cli-ink.js +79 -4
- package/dist/src/ui/components/header-bar.js +6 -2
- package/dist/src/ui/state/ui-store.js +5 -0
- package/dist/src/web/app.js +1124 -176
- package/dist/src/web/index.html +113 -3
- package/dist/src/web/style.css +973 -55
- package/dist/tests/agent.test.js +31 -0
- package/dist/tests/analysis-helpers.test.js +89 -0
- package/dist/tests/analysis-ui.test.js +29 -0
- package/dist/tests/benchmark-harness.test.js +527 -0
- package/dist/tests/config-api.test.js +143 -0
- package/dist/tests/config-integration.test.js +751 -0
- package/dist/tests/config-slash-command.test.js +106 -0
- package/dist/tests/config.test.js +42 -1
- package/dist/tests/context-indicator.test.js +220 -0
- package/dist/tests/editable-config.test.js +109 -0
- package/dist/tests/find-path.test.js +183 -0
- package/dist/tests/focus-tracker.test.js +62 -0
- package/dist/tests/graph-onboarding.test.js +55 -0
- package/dist/tests/graph-styles.test.js +65 -0
- package/dist/tests/indexer.test.js +137 -0
- package/dist/tests/mcp-and-plugin.test.js +186 -0
- package/dist/tests/model-client-openai.test.js +29 -0
- package/dist/tests/model-selection.test.js +136 -0
- package/dist/tests/model-utils.test.js +22 -0
- package/dist/tests/reasoning-effort.test.js +264 -0
- package/dist/tests/run-benchmarks.test.js +161 -0
- package/dist/tests/search-code-map.test.js +18 -0
- package/dist/tests/serve.integration.test.js +218 -2
- package/dist/tests/session-ui.test.js +21 -0
- package/dist/tests/session.test.js +50 -0
- package/dist/tests/settings-ui.test.js +30 -0
- package/dist/tests/structural-analysis.test.js +218 -0
- package/node_modules/@minicode/agent-sdk/README.md +80 -51
- package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts +16 -5
- package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js +51 -33
- package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/agent/types.d.ts +14 -0
- package/node_modules/@minicode/agent-sdk/dist/src/agent/types.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts +3 -2
- package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/index.js +2 -0
- package/node_modules/@minicode/agent-sdk/dist/src/index.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.d.ts +35 -0
- package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.d.ts.map +1 -0
- package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.js +64 -0
- package/node_modules/@minicode/agent-sdk/dist/src/indexer/focus-tracker.js.map +1 -0
- package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts +7 -0
- package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/model/client.d.ts +5 -1
- package/node_modules/@minicode/agent-sdk/dist/src/model/client.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/model/client.js +83 -11
- package/node_modules/@minicode/agent-sdk/dist/src/model/client.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.d.ts +1 -0
- package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.js +8 -1
- package/node_modules/@minicode/agent-sdk/dist/src/safety/guardrails.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/session/session.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/session/session.js +4 -1
- package/node_modules/@minicode/agent-sdk/dist/src/session/session.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/tests/agent.test.js +3 -1
- package/node_modules/@minicode/agent-sdk/dist/tests/agent.test.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/tests/guardrails.test.js +8 -2
- package/node_modules/@minicode/agent-sdk/dist/tests/guardrails.test.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +9 -5
- package/plugin/.claude-plugin/plugin.json +12 -0
- package/plugin/.mcp.json +8 -0
- package/plugin/CLAUDE.md +26 -0
- package/plugin/skills/analyze/SKILL.md +12 -0
- package/plugin/skills/focus/SKILL.md +20 -0
- package/plugin/skills/graph/SKILL.md +13 -0
- package/plugin/skills/symbols/SKILL.md +13 -0
package/README.md
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
# minicode
|
|
2
2
|
|
|
3
|
-
A
|
|
3
|
+
A graph-native coding agent and code exploration environment built around structural context optimization. It started as a way to make local models viable under tighter context budgets, and it now also works well with hosted frontier models through the same runtime, web UI, and OpenAI-compatible serve mode.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
_Run `minicode serve` to get the web UI on localhost: chat, tool activity, session controls, model switching, symbol focus, annotations, and a live dependency graph._
|
|
6
|
+
|
|
7
|
+
<img width="1723" height="920" alt="Screenshot 2026-03-26 at 6 30 23 PM" src="https://github.com/user-attachments/assets/499c8dc7-cc2b-4125-abd5-32b2fc9795ea" />
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
Read operations dominate token usage in typical agent sessions; minicode addresses this by optimizing for **specific languages**. It indexes your project at startup with language plugins, injects a compact **code map** (signatures only) into the system prompt, and exposes symbol-level tools (`read_symbol`, `find_references`, `get_dependencies`) so the model reads only what it needs instead of entire files. TypeScript and JavaScript support come built-in, with custom language plugins leaving room for broader language support over time.
|
|
6
11
|
|
|
7
12
|
## Quick Start (LM Studio)
|
|
8
13
|
|
|
@@ -12,22 +17,11 @@ Read operations dominate token usage in typical agent sessions; minicode address
|
|
|
12
17
|
# 2. Install
|
|
13
18
|
npm install -g @sean.holung/minicode
|
|
14
19
|
|
|
15
|
-
# 3. Configure
|
|
16
|
-
|
|
20
|
+
# 3. Configure (~/.minicode/agent.config.json is auto-created on first run)
|
|
21
|
+
# Set your model name — minicode will prompt you if this is missing.
|
|
17
22
|
cat > ~/.minicode/.env << 'EOF'
|
|
18
23
|
MODEL_PROVIDER=openai-compatible
|
|
19
|
-
MODEL=
|
|
20
|
-
OPENAI_BASE_URL=http://localhost:1234/v1
|
|
21
|
-
OPENAI_API_KEY=
|
|
22
|
-
MAX_STEPS=50
|
|
23
|
-
MAX_TOKENS=4096
|
|
24
|
-
MAX_CONTEXT_TOKENS=24000
|
|
25
|
-
WORKSPACE_ROOT=.
|
|
26
|
-
COMMAND_TIMEOUT_MS=30000
|
|
27
|
-
MAX_FILE_SIZE_BYTES=1000000
|
|
28
|
-
CONFIRM_DESTRUCTIVE=true
|
|
29
|
-
KEEP_RECENT_MESSAGES=12
|
|
30
|
-
LOOP_DETECTION_WINDOW=6
|
|
24
|
+
MODEL=your-model-name
|
|
31
25
|
EOF
|
|
32
26
|
```
|
|
33
27
|
|
|
@@ -92,7 +86,7 @@ npm run install:global
|
|
|
92
86
|
- **Web UI** — `minicode serve` starts an HTTP + WebSocket server with a bundled chat client, real-time streaming, session management, and project graph data endpoints
|
|
93
87
|
- **OpenAI-compatible API** — any client that speaks the OpenAI protocol can use minicode as a backend at `/v1/chat/completions`
|
|
94
88
|
- **Context optimization:** Code map in system prompt, `read_symbol`, `find_references`, `get_dependencies`
|
|
95
|
-
- **Plugin system:** Extensible language support (TypeScript built
|
|
89
|
+
- **Plugin system:** Extensible language support (TypeScript/JavaScript built in today)
|
|
96
90
|
|
|
97
91
|
## Context Optimization
|
|
98
92
|
|
|
@@ -105,9 +99,9 @@ For the proposed reusable package architecture and public interfaces for a stand
|
|
|
105
99
|
minicode reduces token usage by indexing your project and providing targeted tools:
|
|
106
100
|
|
|
107
101
|
- **Code map** — A compact project skeleton (signatures only) is injected into the system prompt so the model can orient itself without reading full files.
|
|
108
|
-
-
|
|
109
|
-
-
|
|
110
|
-
-
|
|
102
|
+
- `read_symbol` — Read a specific function or class by name, with referenced types.
|
|
103
|
+
- `find_references` — Find all symbols that reference a given symbol.
|
|
104
|
+
- `get_dependencies` — Get the dependency cone of a symbol.
|
|
111
105
|
|
|
112
106
|
The index is cached in `~/.minicode/cache/<workspace-hash>/` for faster startup on subsequent runs. Caches are global and keyed by workspace path, so nothing is stored inside your project directories.
|
|
113
107
|
|
|
@@ -170,9 +164,9 @@ See [docs/PLUGIN_SPEC.md](docs/PLUGIN_SPEC.md) for the full specification. Quick
|
|
|
170
164
|
|
|
171
165
|
Configuration can come from (later sources override earlier):
|
|
172
166
|
|
|
173
|
-
1.
|
|
174
|
-
2.
|
|
175
|
-
3.
|
|
167
|
+
1. `~/.minicode/.env` — User-level defaults (API keys, model, etc.)
|
|
168
|
+
2. `~/.minicode/agent.config.json` — User-level JSON config
|
|
169
|
+
3. Project `.env` and `agent.config.json` in workspace root
|
|
176
170
|
4. Environment variables (highest precedence)
|
|
177
171
|
|
|
178
172
|
Nothing is written inside your workspace; config and cache live under `~/.minicode/`.
|
|
@@ -183,45 +177,41 @@ Nothing is written inside your workspace; config and cache live under `~/.minico
|
|
|
183
177
|
| Variable | Required | Default | Notes |
|
|
184
178
|
| ----------------------- | --------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
|
185
179
|
| `MODEL_PROVIDER` | No | `openai-compatible` | `anthropic` or `openai-compatible` (aliases: `openai`, `lmstudio`, `lm-studio`) |
|
|
186
|
-
| `MODEL` |
|
|
180
|
+
| `MODEL` | Yes | none | Model name for selected provider |
|
|
187
181
|
| `ANTHROPIC_API_KEY` | Yes (Anthropic) | none | Required when `MODEL_PROVIDER=anthropic` |
|
|
188
182
|
| `OPENAI_BASE_URL` | No | `http://localhost:1234/v1` | Base URL for OpenAI-compatible API (LM Studio, etc.) |
|
|
189
183
|
| `OPENAI_API_KEY` | No | none | Optional for local servers; required if your endpoint enforces auth |
|
|
184
|
+
| `OPENROUTER_API_KEY` | No | none | Preferred key when `OPENAI_BASE_URL` points at OpenRouter; falls back to `OPENAI_API_KEY` if unset |
|
|
190
185
|
| `MAX_STEPS` | No | `50` | Max agent loop iterations per user turn |
|
|
191
186
|
| `MAX_TOKENS` | No | `4096` | Max model output tokens per model call |
|
|
192
|
-
| `MAX_CONTEXT_TOKENS` | No | `
|
|
193
|
-
| `MAX_TOOL_OUTPUT_CHARS` | No | `
|
|
194
|
-
| `WORKSPACE_ROOT` | No | current working directory | Root directory tools are allowed to access
|
|
187
|
+
| `MAX_CONTEXT_TOKENS` | No | `32000` | Approximate session history trimming target. For small models (e.g. 8k context), set lower (e.g. `6000`) to leave room for responses. |
|
|
188
|
+
| `MAX_TOOL_OUTPUT_CHARS` | No | `8000` | Max chars per tool result before truncation. Set to `0` to disable. |
|
|
189
|
+
| `WORKSPACE_ROOT` | No | current working directory | Root directory tools are allowed to access (set at runtime, not typically configured) |
|
|
195
190
|
| `COMMAND_TIMEOUT_MS` | No | `30000` | Timeout for shell/search commands |
|
|
196
191
|
| `MAX_FILE_SIZE_BYTES` | No | `1000000` | Read limit for `read_file` |
|
|
197
192
|
| `CONFIRM_DESTRUCTIVE` | No | `true` | If `true`, blocks destructive shell commands unless confirmed |
|
|
198
193
|
| `KEEP_RECENT_MESSAGES` | No | `12` | Minimum number of latest messages kept during trimming |
|
|
199
194
|
| `LOOP_DETECTION_WINDOW` | No | `6` | Window for repeated tool-call loop detection |
|
|
195
|
+
| `ENABLE_FILE_READ_DEDUP` | No | `true` | Reuses earlier `read_file` results within a turn when the same file slice is still in context |
|
|
196
|
+
| `ENABLE_ADAPTIVE_KEEP_RECENT` | No | `true` | Scales `keepRecentMessages` down as context fills so trimming gets more aggressive when needed |
|
|
197
|
+
| `ENABLE_TOOL_OUTPUT_TRUNCATION` | No | `true` | Enables content-aware truncation strategies for tool output instead of simple head-only clipping |
|
|
200
198
|
| `COMPACTION_THRESHOLD` | No | `0.8` | Context fullness ratio (0–1) at which auto-compaction triggers |
|
|
201
199
|
| `COMPACTION_MODEL` | No | none | Model for LLM-based compaction summaries. When set, `/compact` and auto-compaction use this model instead of mechanical truncation. Use a small, fast model (e.g. your local model). |
|
|
200
|
+
| `REASONING_EFFORT` | No | unset | Reasoning level for providers that support it. Valid values: `xhigh`, `high`, `medium`, `low`, `minimal`, `none` |
|
|
202
201
|
|
|
203
202
|
|
|
204
203
|
### `agent.config.json`
|
|
205
204
|
|
|
206
|
-
|
|
205
|
+
A global `~/.minicode/agent.config.json` is auto-created on first run. Only set what you need — everything has sensible defaults:
|
|
207
206
|
|
|
208
207
|
```json
|
|
209
208
|
{
|
|
210
209
|
"modelProvider": "openai-compatible",
|
|
211
|
-
"model": "
|
|
210
|
+
"model": "your-model-name",
|
|
211
|
+
"openAiBaseUrl": "http://localhost:1234/v1",
|
|
212
212
|
"maxSteps": 50,
|
|
213
213
|
"maxTokens": 4096,
|
|
214
|
-
"maxContextTokens":
|
|
215
|
-
"workspaceRoot": ".",
|
|
216
|
-
"commandTimeout": 30000,
|
|
217
|
-
"commandDenylist": [],
|
|
218
|
-
"confirmDestructive": true,
|
|
219
|
-
"maxFileSizeBytes": 1000000,
|
|
220
|
-
"keepRecentMessages": 12,
|
|
221
|
-
"loopDetectionWindow": 6,
|
|
222
|
-
"openAiBaseUrl": "http://localhost:1234/v1",
|
|
223
|
-
"openAiApiKey": "",
|
|
224
|
-
"compactionModel": ""
|
|
214
|
+
"maxContextTokens": 32000
|
|
225
215
|
}
|
|
226
216
|
```
|
|
227
217
|
|
|
@@ -240,9 +230,14 @@ Field mapping:
|
|
|
240
230
|
- `keepRecentMessages` ↔ `KEEP_RECENT_MESSAGES`
|
|
241
231
|
- `loopDetectionWindow` ↔ `LOOP_DETECTION_WINDOW`
|
|
242
232
|
- `openAiBaseUrl` ↔ `OPENAI_BASE_URL`
|
|
243
|
-
- `openAiApiKey` ↔ `OPENAI_API_KEY`
|
|
233
|
+
- `openAiApiKey` ↔ `OPENAI_API_KEY` / `OPENROUTER_API_KEY` (when using OpenRouter)
|
|
234
|
+
- `maxToolOutputChars` ↔ `MAX_TOOL_OUTPUT_CHARS`
|
|
235
|
+
- `enableFileReadDedup` ↔ `ENABLE_FILE_READ_DEDUP`
|
|
236
|
+
- `enableAdaptiveKeepRecent` ↔ `ENABLE_ADAPTIVE_KEEP_RECENT`
|
|
237
|
+
- `enableToolOutputTruncation` ↔ `ENABLE_TOOL_OUTPUT_TRUNCATION`
|
|
244
238
|
- `compactionThreshold` ↔ `COMPACTION_THRESHOLD`
|
|
245
239
|
- `compactionModel` ↔ `COMPACTION_MODEL`
|
|
240
|
+
- `reasoningEffort` ↔ `REASONING_EFFORT`
|
|
246
241
|
|
|
247
242
|
## Usage
|
|
248
243
|
|
|
@@ -273,6 +268,19 @@ npm run dev -- --oneshot --json "Summarize TODOs"
|
|
|
273
268
|
npm run dev -- --oneshot --out result.txt "Draft changelog"
|
|
274
269
|
```
|
|
275
270
|
|
|
271
|
+
Interactive slash commands:
|
|
272
|
+
|
|
273
|
+
- `/help`
|
|
274
|
+
- `/config`
|
|
275
|
+
- `/compact`
|
|
276
|
+
- `/reasoning [level]`
|
|
277
|
+
- `/models`
|
|
278
|
+
- `/model [name]`
|
|
279
|
+
- `/save [label]`
|
|
280
|
+
- `/load [label]`
|
|
281
|
+
- `/sessions`
|
|
282
|
+
- `/exit`
|
|
283
|
+
|
|
276
284
|
### Exit codes
|
|
277
285
|
|
|
278
286
|
- `0`: Success
|
|
@@ -284,7 +292,9 @@ npm run dev -- --oneshot --out result.txt "Draft changelog"
|
|
|
284
292
|
- `npm run dev` - start the CLI in TypeScript mode
|
|
285
293
|
- `npm run dev:ink` - start with Ink UI (same as `dev` when in a TTY; use to override `CLI_UI_MODE=legacy`)
|
|
286
294
|
- `npm run build` - compile TypeScript to `dist/`
|
|
295
|
+
- `npm run build:web` - build the bundled web client used by `minicode serve`
|
|
287
296
|
- `npm start` - run compiled CLI
|
|
297
|
+
- `npm run install:global` - build and `npm link` the CLI locally
|
|
288
298
|
- `npm run lint` - run ESLint on TypeScript source and tests
|
|
289
299
|
- `npm test` - run Node test suite
|
|
290
|
-
|
|
300
|
+
- `npm run verify-index` - run the TypeScript index verification harness
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* CLI entry point for running benchmark tasks.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* node --import tsx scripts/run-benchmarks.ts [options]
|
|
7
|
+
*
|
|
8
|
+
* Options:
|
|
9
|
+
* --category <name> Run only tasks in the given category
|
|
10
|
+
* --task <id> Run a single task by id (e.g. "navigation/find-symbol-definition")
|
|
11
|
+
* --variant <label> Variant label for the report (default: "ci")
|
|
12
|
+
* --out <path> Write the JSON report to a file
|
|
13
|
+
*
|
|
14
|
+
* Environment:
|
|
15
|
+
* MODEL_PROVIDER, MODEL, OPENAI_BASE_URL, OPENAI_API_KEY, ANTHROPIC_API_KEY
|
|
16
|
+
* — same as minicode runtime config.
|
|
17
|
+
*/
|
|
18
|
+
import path from "node:path";
|
|
19
|
+
import { writeFile } from "node:fs/promises";
|
|
20
|
+
import { createModelClient, createReadFileTool, createWriteFileTool, createEditFileTool, createSearchTool, createListFilesTool, createRunCommandTool, } from "@minicode/agent-sdk";
|
|
21
|
+
import { loadBenchmarkTasks, loadBenchmarkTask } from "../src/benchmark/task-loader.js";
|
|
22
|
+
import { runBenchmarkSuite } from "../src/benchmark/runner.js";
|
|
23
|
+
import { buildReport, formatReport } from "../src/benchmark/reporter.js";
|
|
24
|
+
export function parseArgs(argv) {
|
|
25
|
+
const args = { variant: "ci" };
|
|
26
|
+
for (let i = 0; i < argv.length; i++) {
|
|
27
|
+
const arg = argv[i];
|
|
28
|
+
const next = argv[i + 1];
|
|
29
|
+
if (arg === "--category" && next) {
|
|
30
|
+
args.category = next;
|
|
31
|
+
i++;
|
|
32
|
+
}
|
|
33
|
+
else if (arg === "--task" && next) {
|
|
34
|
+
args.task = next;
|
|
35
|
+
i++;
|
|
36
|
+
}
|
|
37
|
+
else if (arg === "--variant" && next) {
|
|
38
|
+
args.variant = next;
|
|
39
|
+
i++;
|
|
40
|
+
}
|
|
41
|
+
else if (arg === "--out" && next) {
|
|
42
|
+
args.out = next;
|
|
43
|
+
i++;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return args;
|
|
47
|
+
}
|
|
48
|
+
/* ------------------------------------------------------------------ */
|
|
49
|
+
/* Config builder */
|
|
50
|
+
/* ------------------------------------------------------------------ */
|
|
51
|
+
export function buildConfig() {
|
|
52
|
+
const provider = (process.env.MODEL_PROVIDER ?? "openai-compatible");
|
|
53
|
+
const model = process.env.MODEL ?? "test-model";
|
|
54
|
+
return {
|
|
55
|
+
modelProvider: provider,
|
|
56
|
+
model,
|
|
57
|
+
maxSteps: Number(process.env.MAX_STEPS ?? "50"),
|
|
58
|
+
maxTokens: Number(process.env.MAX_TOKENS ?? "4096"),
|
|
59
|
+
maxContextTokens: Number(process.env.MAX_CONTEXT_TOKENS ?? "32000"),
|
|
60
|
+
workspaceRoot: process.cwd(),
|
|
61
|
+
commandTimeoutMs: Number(process.env.COMMAND_TIMEOUT_MS ?? "30000"),
|
|
62
|
+
maxFileSizeBytes: Number(process.env.MAX_FILE_SIZE_BYTES ?? "1000000"),
|
|
63
|
+
commandDenylist: [],
|
|
64
|
+
confirmDestructive: false,
|
|
65
|
+
keepRecentMessages: Number(process.env.KEEP_RECENT_MESSAGES ?? "12"),
|
|
66
|
+
loopDetectionWindow: Number(process.env.LOOP_DETECTION_WINDOW ?? "6"),
|
|
67
|
+
maxToolOutputChars: Number(process.env.MAX_TOOL_OUTPUT_CHARS ?? "8000"),
|
|
68
|
+
openAiBaseUrl: process.env.OPENAI_BASE_URL ?? "http://localhost:1234/v1",
|
|
69
|
+
...(process.env.OPENAI_API_KEY ? { openAiApiKey: process.env.OPENAI_API_KEY } : {}),
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
/* ------------------------------------------------------------------ */
|
|
73
|
+
/* Task loading */
|
|
74
|
+
/* ------------------------------------------------------------------ */
|
|
75
|
+
export async function loadTasks(tasksDir, args) {
|
|
76
|
+
if (args.task) {
|
|
77
|
+
const single = await loadBenchmarkTask(tasksDir, args.task);
|
|
78
|
+
if (!single) {
|
|
79
|
+
throw new Error(`Task not found: ${args.task}`);
|
|
80
|
+
}
|
|
81
|
+
return [single];
|
|
82
|
+
}
|
|
83
|
+
let tasks = await loadBenchmarkTasks(tasksDir);
|
|
84
|
+
if (args.category) {
|
|
85
|
+
tasks = tasks.filter((t) => t.category === args.category);
|
|
86
|
+
if (tasks.length === 0) {
|
|
87
|
+
throw new Error(`No tasks found for category: ${args.category}`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return tasks;
|
|
91
|
+
}
|
|
92
|
+
/* ------------------------------------------------------------------ */
|
|
93
|
+
/* Main */
|
|
94
|
+
/* ------------------------------------------------------------------ */
|
|
95
|
+
async function main() {
|
|
96
|
+
const args = parseArgs(process.argv.slice(2));
|
|
97
|
+
const config = buildConfig();
|
|
98
|
+
const tasksDir = path.resolve(process.cwd(), "benchmarks", "tasks");
|
|
99
|
+
console.log(`Benchmark runner starting...`);
|
|
100
|
+
console.log(` Provider: ${config.modelProvider}`);
|
|
101
|
+
console.log(` Model: ${config.model}`);
|
|
102
|
+
console.log(` Variant: ${args.variant}`);
|
|
103
|
+
const tasks = await loadTasks(tasksDir, args);
|
|
104
|
+
console.log(` Tasks: ${tasks.length}`);
|
|
105
|
+
console.log("");
|
|
106
|
+
const modelClient = createModelClient(config);
|
|
107
|
+
const tools = [
|
|
108
|
+
createReadFileTool(config),
|
|
109
|
+
createWriteFileTool(config),
|
|
110
|
+
createEditFileTool(config),
|
|
111
|
+
createSearchTool(config),
|
|
112
|
+
createListFilesTool(config),
|
|
113
|
+
createRunCommandTool(config),
|
|
114
|
+
];
|
|
115
|
+
const traces = await runBenchmarkSuite(tasks, {
|
|
116
|
+
modelClient,
|
|
117
|
+
config,
|
|
118
|
+
tools,
|
|
119
|
+
variant: args.variant,
|
|
120
|
+
onTaskComplete: (taskId, trace) => {
|
|
121
|
+
const dur = (trace.durationMs / 1000).toFixed(1);
|
|
122
|
+
console.log(` [done] ${taskId} (${dur}s, ${trace.toolCalls.length} tool calls)`);
|
|
123
|
+
},
|
|
124
|
+
});
|
|
125
|
+
const report = buildReport(tasks, traces, args.variant, config.model);
|
|
126
|
+
const formatted = formatReport(report);
|
|
127
|
+
console.log("");
|
|
128
|
+
console.log(formatted);
|
|
129
|
+
if (args.out) {
|
|
130
|
+
const outPath = path.resolve(args.out);
|
|
131
|
+
await writeFile(outPath, JSON.stringify(report, null, 2), "utf8");
|
|
132
|
+
console.log(`\nReport written to ${outPath}`);
|
|
133
|
+
}
|
|
134
|
+
// Exit with failure if any task failed
|
|
135
|
+
if (report.summary.failed > 0) {
|
|
136
|
+
process.exitCode = 1;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// Only run main when executed directly (not imported for testing)
|
|
140
|
+
const isDirectRun = process.argv[1]?.endsWith("run-benchmarks.ts") ||
|
|
141
|
+
process.argv[1]?.endsWith("run-benchmarks.js");
|
|
142
|
+
if (isDirectRun) {
|
|
143
|
+
main().catch((err) => {
|
|
144
|
+
console.error("Benchmark runner failed:", err);
|
|
145
|
+
process.exitCode = 1;
|
|
146
|
+
});
|
|
147
|
+
}
|
package/dist/src/agent/config.js
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import { access, readFile } from "node:fs/promises";
|
|
1
|
+
import { access, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
2
2
|
import os from "node:os";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import process from "node:process";
|
|
5
|
-
import { fileURLToPath } from "node:url";
|
|
6
5
|
import dotenv from "dotenv";
|
|
7
6
|
/** User-level config directory: ~/.minicode */
|
|
8
7
|
export const MINICODE_HOME = path.join(os.homedir(), ".minicode");
|
|
@@ -32,17 +31,52 @@ export function formatConfigForDisplay(config) {
|
|
|
32
31
|
"enableToolOutputTruncation: " + (config.enableToolOutputTruncation ?? false),
|
|
33
32
|
"compactionThreshold: " + (config.compactionThreshold ?? "(disabled)"),
|
|
34
33
|
"compactionModel: " + (config.compactionModel ?? "(disabled — using mechanical compaction)"),
|
|
34
|
+
"reasoningEffort: " + (config.reasoningEffort ?? "(unset — no reasoning parameters sent)"),
|
|
35
|
+
"enableDynamicPrompt: " + (config.enableDynamicPrompt ?? true),
|
|
35
36
|
];
|
|
36
37
|
return lines.join("\n");
|
|
37
38
|
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
39
|
+
/**
|
|
40
|
+
* Check if the config has enough information to connect to a model provider.
|
|
41
|
+
* Returns null if valid, or a user-facing setup message if not.
|
|
42
|
+
*/
|
|
43
|
+
/**
|
|
44
|
+
* Return a list of missing config items that prevent the agent from running.
|
|
45
|
+
* Empty array means the config is valid.
|
|
46
|
+
*/
|
|
47
|
+
export function getConfigMissing(config) {
|
|
48
|
+
const missing = [];
|
|
49
|
+
if (!config.model) {
|
|
50
|
+
missing.push("MODEL is not set");
|
|
51
|
+
}
|
|
52
|
+
if (config.modelProvider === "anthropic" && !process.env.ANTHROPIC_API_KEY) {
|
|
53
|
+
missing.push("ANTHROPIC_API_KEY is not set");
|
|
54
|
+
}
|
|
55
|
+
return missing;
|
|
56
|
+
}
|
|
57
|
+
export function getConfigSetupMessage(config) {
|
|
58
|
+
const missing = getConfigMissing(config);
|
|
59
|
+
if (missing.length === 0) {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
return [
|
|
63
|
+
"minicode is not configured yet. Missing:",
|
|
64
|
+
...missing.map((m) => ` - ${m}`),
|
|
65
|
+
"",
|
|
66
|
+
`Set these in ~/.minicode/.env or as environment variables.`,
|
|
67
|
+
`Edit ~/.minicode/agent.config.json for non-secret settings.`,
|
|
68
|
+
"",
|
|
69
|
+
"Example ~/.minicode/.env for a local model:",
|
|
70
|
+
" MODEL_PROVIDER=openai-compatible",
|
|
71
|
+
" OPENAI_BASE_URL=http://localhost:1234/v1",
|
|
72
|
+
" MODEL=your-model-name",
|
|
73
|
+
"",
|
|
74
|
+
"Example for Anthropic:",
|
|
75
|
+
" MODEL_PROVIDER=anthropic",
|
|
76
|
+
" ANTHROPIC_API_KEY=sk-ant-...",
|
|
77
|
+
" MODEL=claude-sonnet-4-20250514",
|
|
78
|
+
].join("\n");
|
|
79
|
+
}
|
|
46
80
|
const DEFAULT_COMMAND_DENYLIST = [
|
|
47
81
|
/\brm\s+-rf\s+\//i,
|
|
48
82
|
/\bmkfs\b/i,
|
|
@@ -54,6 +88,15 @@ const DEFAULT_COMMAND_DENYLIST = [
|
|
|
54
88
|
/\binit\s+0\b/i,
|
|
55
89
|
/\bchmod\s+-R\s+777\s+\//i,
|
|
56
90
|
];
|
|
91
|
+
const VALID_REASONING_EFFORTS = new Set([
|
|
92
|
+
"xhigh", "high", "medium", "low", "minimal", "none",
|
|
93
|
+
]);
|
|
94
|
+
function parseReasoningEffort(value) {
|
|
95
|
+
if (!value)
|
|
96
|
+
return undefined;
|
|
97
|
+
const normalized = value.trim().toLowerCase();
|
|
98
|
+
return VALID_REASONING_EFFORTS.has(normalized) ? normalized : undefined;
|
|
99
|
+
}
|
|
57
100
|
function parseNumber(value, fallback) {
|
|
58
101
|
if (!value) {
|
|
59
102
|
return fallback;
|
|
@@ -74,7 +117,7 @@ function parseBoolean(value, fallback) {
|
|
|
74
117
|
}
|
|
75
118
|
return fallback;
|
|
76
119
|
}
|
|
77
|
-
async function loadConfigFile(configPath) {
|
|
120
|
+
export async function loadConfigFile(configPath) {
|
|
78
121
|
try {
|
|
79
122
|
await access(configPath);
|
|
80
123
|
}
|
|
@@ -88,6 +131,48 @@ async function loadConfigFile(configPath) {
|
|
|
88
131
|
}
|
|
89
132
|
return parsed;
|
|
90
133
|
}
|
|
134
|
+
async function loadDotenvFile(envPath) {
|
|
135
|
+
try {
|
|
136
|
+
const file = await readFile(envPath, "utf8");
|
|
137
|
+
return dotenv.parse(file);
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
return {};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
function applyEnvLayer(target, sources, layer, source, override) {
|
|
144
|
+
for (const [key, value] of Object.entries(layer)) {
|
|
145
|
+
if (!override && target[key] !== undefined) {
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
target[key] = value;
|
|
149
|
+
sources[key] = source;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
function applyProcessEnv(target, sources) {
|
|
153
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
154
|
+
if (value === undefined) {
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
target[key] = value;
|
|
158
|
+
sources[key] = "process";
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
export async function resolveConfigEnv(options = {}) {
|
|
162
|
+
const minicodeHome = options.minicodeHome ?? MINICODE_HOME;
|
|
163
|
+
const homeEnvPath = path.join(minicodeHome, ".env");
|
|
164
|
+
const values = {};
|
|
165
|
+
const sources = {};
|
|
166
|
+
// Base: ~/.minicode/.env
|
|
167
|
+
applyEnvLayer(values, sources, await loadDotenvFile(homeEnvPath), "home-dotenv", true);
|
|
168
|
+
// Override: shell environment variables take precedence
|
|
169
|
+
applyProcessEnv(values, sources);
|
|
170
|
+
return {
|
|
171
|
+
values,
|
|
172
|
+
sources,
|
|
173
|
+
homeEnvPath,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
91
176
|
function parseUserDenylist(patterns) {
|
|
92
177
|
if (!patterns?.length) {
|
|
93
178
|
return [];
|
|
@@ -113,51 +198,75 @@ function parseModelProvider(value) {
|
|
|
113
198
|
}
|
|
114
199
|
return "anthropic";
|
|
115
200
|
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
201
|
+
const DEFAULT_CONFIG_CONTENT = `{
|
|
202
|
+
"modelProvider": "openai-compatible",
|
|
203
|
+
"model": "",
|
|
204
|
+
"openAiBaseUrl": "http://localhost:1234/v1",
|
|
205
|
+
"maxSteps": 50,
|
|
206
|
+
"maxTokens": 4096,
|
|
207
|
+
"maxContextTokens": 32000
|
|
208
|
+
}
|
|
209
|
+
`;
|
|
210
|
+
async function ensureMinicodeHome(minicodeHome) {
|
|
211
|
+
await mkdir(minicodeHome, { recursive: true });
|
|
212
|
+
const configPath = path.join(minicodeHome, "agent.config.json");
|
|
213
|
+
try {
|
|
214
|
+
await access(configPath);
|
|
215
|
+
}
|
|
216
|
+
catch {
|
|
217
|
+
await writeFile(configPath, DEFAULT_CONFIG_CONTENT, "utf8");
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
export async function loadAgentConfig(cwd = process.cwd(), options = {}) {
|
|
221
|
+
const minicodeHome = options.minicodeHome ?? MINICODE_HOME;
|
|
222
|
+
await ensureMinicodeHome(minicodeHome);
|
|
223
|
+
const homeConfigPath = path.join(minicodeHome, "agent.config.json");
|
|
224
|
+
const fileConfig = await loadConfigFile(homeConfigPath);
|
|
225
|
+
const env = (await resolveConfigEnv({ minicodeHome })).values;
|
|
226
|
+
const rawWorkspaceRoot = env.WORKSPACE_ROOT ?? fileConfig.workspaceRoot ?? cwd;
|
|
123
227
|
const workspaceRoot = path.resolve(cwd, rawWorkspaceRoot);
|
|
124
228
|
const commandDenylist = [
|
|
125
229
|
...DEFAULT_COMMAND_DENYLIST,
|
|
126
230
|
...parseUserDenylist(fileConfig.commandDenylist),
|
|
127
231
|
];
|
|
128
|
-
const rawBaseUrl =
|
|
232
|
+
const rawBaseUrl = env.OPENAI_BASE_URL ??
|
|
129
233
|
fileConfig.openAiBaseUrl ??
|
|
130
234
|
"http://localhost:1234/v1";
|
|
131
235
|
const isOpenRouter = rawBaseUrl.includes("openrouter");
|
|
132
236
|
const openAiApiKey = isOpenRouter
|
|
133
|
-
? (
|
|
134
|
-
|
|
237
|
+
? (env.OPENROUTER_API_KEY ??
|
|
238
|
+
env.OPENAI_API_KEY ??
|
|
135
239
|
fileConfig.openAiApiKey)
|
|
136
|
-
: (
|
|
240
|
+
: (env.OPENAI_API_KEY ?? fileConfig.openAiApiKey);
|
|
137
241
|
return {
|
|
138
|
-
modelProvider: parseModelProvider(
|
|
139
|
-
model:
|
|
242
|
+
modelProvider: parseModelProvider(env.MODEL_PROVIDER ?? fileConfig.modelProvider ?? "openai-compatible"),
|
|
243
|
+
model: env.MODEL ??
|
|
140
244
|
fileConfig.model ??
|
|
141
|
-
"
|
|
142
|
-
maxSteps: parseNumber(
|
|
143
|
-
maxTokens: parseNumber(
|
|
144
|
-
maxContextTokens: parseNumber(
|
|
245
|
+
"",
|
|
246
|
+
maxSteps: parseNumber(env.MAX_STEPS, fileConfig.maxSteps ?? 50),
|
|
247
|
+
maxTokens: parseNumber(env.MAX_TOKENS, fileConfig.maxTokens ?? 4096),
|
|
248
|
+
maxContextTokens: parseNumber(env.MAX_CONTEXT_TOKENS, fileConfig.maxContextTokens ?? 32_000),
|
|
145
249
|
workspaceRoot,
|
|
146
|
-
commandTimeoutMs: parseNumber(
|
|
147
|
-
maxFileSizeBytes: parseNumber(
|
|
250
|
+
commandTimeoutMs: parseNumber(env.COMMAND_TIMEOUT_MS, fileConfig.commandTimeout ?? 30_000),
|
|
251
|
+
maxFileSizeBytes: parseNumber(env.MAX_FILE_SIZE_BYTES, fileConfig.maxFileSizeBytes ?? 1_000_000),
|
|
148
252
|
commandDenylist,
|
|
149
|
-
confirmDestructive: parseBoolean(
|
|
150
|
-
keepRecentMessages: parseNumber(
|
|
151
|
-
loopDetectionWindow: parseNumber(
|
|
152
|
-
maxToolOutputChars: parseNumber(
|
|
253
|
+
confirmDestructive: parseBoolean(env.CONFIRM_DESTRUCTIVE, fileConfig.confirmDestructive ?? true),
|
|
254
|
+
keepRecentMessages: parseNumber(env.KEEP_RECENT_MESSAGES, fileConfig.keepRecentMessages ?? 12),
|
|
255
|
+
loopDetectionWindow: parseNumber(env.LOOP_DETECTION_WINDOW, fileConfig.loopDetectionWindow ?? 6),
|
|
256
|
+
maxToolOutputChars: parseNumber(env.MAX_TOOL_OUTPUT_CHARS, fileConfig.maxToolOutputChars ?? 8_000),
|
|
153
257
|
openAiBaseUrl: rawBaseUrl,
|
|
154
258
|
...(openAiApiKey !== undefined ? { openAiApiKey } : {}),
|
|
155
|
-
enableFileReadDedup: parseBoolean(
|
|
156
|
-
enableAdaptiveKeepRecent: parseBoolean(
|
|
157
|
-
enableToolOutputTruncation: parseBoolean(
|
|
158
|
-
compactionThreshold: parseNumber(
|
|
159
|
-
...(
|
|
160
|
-
? { compactionModel:
|
|
259
|
+
enableFileReadDedup: parseBoolean(env.ENABLE_FILE_READ_DEDUP, fileConfig.enableFileReadDedup ?? true),
|
|
260
|
+
enableAdaptiveKeepRecent: parseBoolean(env.ENABLE_ADAPTIVE_KEEP_RECENT, fileConfig.enableAdaptiveKeepRecent ?? true),
|
|
261
|
+
enableToolOutputTruncation: parseBoolean(env.ENABLE_TOOL_OUTPUT_TRUNCATION, fileConfig.enableToolOutputTruncation ?? true),
|
|
262
|
+
compactionThreshold: parseNumber(env.COMPACTION_THRESHOLD, fileConfig.compactionThreshold ?? 0.8),
|
|
263
|
+
...(env.COMPACTION_MODEL ?? fileConfig.compactionModel
|
|
264
|
+
? { compactionModel: env.COMPACTION_MODEL ?? fileConfig.compactionModel }
|
|
161
265
|
: {}),
|
|
266
|
+
enableDynamicPrompt: parseBoolean(env.ENABLE_DYNAMIC_PROMPT, fileConfig.enableDynamicPrompt ?? true),
|
|
267
|
+
...(() => {
|
|
268
|
+
const effort = parseReasoningEffort(env.REASONING_EFFORT ?? fileConfig.reasoningEffort);
|
|
269
|
+
return effort ? { reasoningEffort: effort } : {};
|
|
270
|
+
})(),
|
|
162
271
|
};
|
|
163
272
|
}
|