@sean.holung/minicode 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/dist/scripts/run-benchmarks.js +73 -28
- package/dist/src/benchmark/runner.js +142 -59
- package/dist/src/indexer/project-index.js +49 -13
- package/dist/src/serve/agent-bridge.js +12 -3
- package/dist/src/serve/mcp-server.js +70 -21
- package/dist/src/serve/server.js +37 -4
- package/dist/src/shared/graph-symbols.js +82 -0
- package/dist/src/shared/symbol-resolution.js +33 -0
- package/dist/src/tools/find-path.js +15 -6
- package/dist/src/tools/find-references.js +7 -2
- package/dist/src/tools/get-dependencies.js +8 -3
- package/dist/src/tools/read-symbol.js +9 -3
- package/dist/src/tools/registry.js +4 -1
- package/dist/src/tools/search-code-map.js +18 -3
- package/dist/src/web/app.js +154 -33
- package/dist/tests/benchmark-harness.test.js +100 -0
- package/dist/tests/file-tools.test.js +34 -1
- package/dist/tests/find-path.test.js +43 -2
- package/dist/tests/find-references.test.js +49 -0
- package/dist/tests/get-dependencies.test.js +23 -0
- package/dist/tests/graph-symbols.test.js +45 -0
- package/dist/tests/indexer.test.js +6 -0
- package/dist/tests/read-symbol.test.js +35 -0
- package/dist/tests/request-tracker.test.js +15 -0
- package/dist/tests/run-benchmarks.test.js +117 -33
- package/dist/tests/search-code-map.test.js +2 -0
- package/dist/tests/serve.integration.test.js +109 -3
- package/dist/tests/session-ui.test.js +2 -0
- package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js +2 -1
- package/node_modules/@minicode/agent-sdk/dist/src/agent/agent.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/index.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/index.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts +3 -0
- package/node_modules/@minicode/agent-sdk/dist/src/indexer/types.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/registry.d.ts +3 -0
- package/node_modules/@minicode/agent-sdk/dist/src/tools/registry.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/registry.js +4 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/registry.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/run-command.d.ts +11 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/run-command.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/run-command.js +4 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/run-command.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/search.d.ts.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/src/tools/search.js +16 -8
- package/node_modules/@minicode/agent-sdk/dist/src/tools/search.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/tests/file-tools.test.js +19 -2
- package/node_modules/@minicode/agent-sdk/dist/tests/file-tools.test.js.map +1 -1
- package/node_modules/@minicode/agent-sdk/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
# minicode
|
|
2
2
|
|
|
3
|
-
A graph-native coding agent and code exploration environment built around structural context optimization. It started as a way to make local models viable under tighter context budgets, and it now also works well with hosted frontier models through the same runtime, web UI, and OpenAI-compatible serve mode.
|
|
3
|
+
A graph-native coding agent and code exploration environment built around structural context optimization that leverages symbol-aware retrieval, dependency graphs, and targeted context. It started as a way to make local models viable under tighter context budgets, and it now also works well with hosted frontier models through the same runtime, web UI, and OpenAI-compatible serve mode.
|
|
4
|
+
|
|
5
|
+
minicode is built on a simple bet: models perform better when you give them less, but better context. Bloated context directly degrades output quality: attention dilutes, positional biases cause mid-context information loss, and inference latency grows as token count increases.
|
|
6
|
+
|
|
7
|
+
Read operations dominate token usage in typical agent sessions; minicode addresses this by optimizing for **specific languages**. It indexes your project at startup with language plugins, injects a compact **code map** (signatures only) into the system prompt, and exposes symbol-level tools (`read_symbol`, `find_references`, `get_dependencies`) so the model reads only what it needs instead of entire files. This also enables the agent to walk the code structurally to gain a better understanding of the codebase at a structural level. TypeScript and JavaScript support come built-in, with custom language plugins leaving room for broader language support over time.
|
|
4
8
|
|
|
5
9
|
_Run `minicode serve` to get the web UI on localhost: chat, tool activity, session controls, model switching, symbol focus, annotations, and a live dependency graph._
|
|
6
10
|
|
|
7
11
|
<img width="1723" height="920" alt="Screenshot 2026-03-26 at 6 30 23 PM" src="https://github.com/user-attachments/assets/499c8dc7-cc2b-4125-abd5-32b2fc9795ea" />
|
|
8
12
|
|
|
9
13
|
|
|
10
|
-
Read operations dominate token usage in typical agent sessions; minicode addresses this by optimizing for **specific languages**. It indexes your project at startup with language plugins, injects a compact **code map** (signatures only) into the system prompt, and exposes symbol-level tools (`read_symbol`, `find_references`, `get_dependencies`) so the model reads only what it needs instead of entire files. TypeScript and JavaScript support come built-in, with custom language plugins leaving room for broader language support over time.
|
|
11
|
-
|
|
12
14
|
## Quick Start (LM Studio)
|
|
13
15
|
|
|
14
16
|
```bash
|
|
@@ -12,15 +12,20 @@
|
|
|
12
12
|
* --out <path> Write the JSON report to a file
|
|
13
13
|
*
|
|
14
14
|
* Environment:
|
|
15
|
-
* MODEL_PROVIDER, MODEL, OPENAI_BASE_URL, OPENAI_API_KEY, ANTHROPIC_API_KEY
|
|
16
|
-
* —
|
|
15
|
+
* MODEL_PROVIDER, MODEL, OPENAI_BASE_URL, OPENAI_API_KEY, OPENROUTER_API_KEY, ANTHROPIC_API_KEY
|
|
16
|
+
* — benchmark-layer overrides for benchmarks/benchmark.config.json.
|
|
17
17
|
*/
|
|
18
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
18
19
|
import path from "node:path";
|
|
20
|
+
import { homedir } from "node:os";
|
|
19
21
|
import { writeFile } from "node:fs/promises";
|
|
20
|
-
import { createModelClient,
|
|
22
|
+
import { createModelClient, } from "@minicode/agent-sdk";
|
|
23
|
+
import { parse as parseDotenv } from "dotenv";
|
|
21
24
|
import { loadBenchmarkTasks, loadBenchmarkTask } from "../src/benchmark/task-loader.js";
|
|
22
25
|
import { runBenchmarkSuite } from "../src/benchmark/runner.js";
|
|
23
26
|
import { buildReport, formatReport } from "../src/benchmark/reporter.js";
|
|
27
|
+
import { buildProjectIndex } from "../src/indexer/project-index.js";
|
|
28
|
+
import { createToolRegistry } from "../src/tools/registry.js";
|
|
24
29
|
export function parseArgs(argv) {
|
|
25
30
|
const args = { variant: "ci" };
|
|
26
31
|
for (let i = 0; i < argv.length; i++) {
|
|
@@ -48,25 +53,63 @@ export function parseArgs(argv) {
|
|
|
48
53
|
/* ------------------------------------------------------------------ */
|
|
49
54
|
/* Config builder */
|
|
50
55
|
/* ------------------------------------------------------------------ */
|
|
51
|
-
export function
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
export function getBenchmarkConfigPath(repoRoot = process.cwd()) {
|
|
57
|
+
return path.resolve(repoRoot, "benchmarks", "benchmark.config.json");
|
|
58
|
+
}
|
|
59
|
+
function loadJsonConfigFile(configPath) {
|
|
60
|
+
if (!existsSync(configPath)) {
|
|
61
|
+
return {};
|
|
62
|
+
}
|
|
63
|
+
return JSON.parse(readFileSync(configPath, "utf8"));
|
|
64
|
+
}
|
|
65
|
+
function loadHomeEnvVars(homeEnvPath) {
|
|
66
|
+
if (!existsSync(homeEnvPath)) {
|
|
67
|
+
return {};
|
|
68
|
+
}
|
|
69
|
+
return parseDotenv(readFileSync(homeEnvPath, "utf8"));
|
|
70
|
+
}
|
|
71
|
+
function firstDefined(...values) {
|
|
72
|
+
return values.find((value) => value != null && value.length > 0);
|
|
73
|
+
}
|
|
74
|
+
function getNumberSetting(envValue, fileValue, fallback) {
|
|
75
|
+
if (envValue != null && envValue.length > 0) {
|
|
76
|
+
return Number(envValue);
|
|
77
|
+
}
|
|
78
|
+
return fileValue ?? fallback;
|
|
79
|
+
}
|
|
80
|
+
export function buildConfig(options = {}) {
|
|
81
|
+
const repoRoot = path.resolve(options.repoRoot ?? process.cwd());
|
|
82
|
+
const env = options.env ?? process.env;
|
|
83
|
+
const homeEnvPath = options.homeEnvPath ?? path.join(homedir(), ".minicode", ".env");
|
|
84
|
+
const configPath = options.configPath ?? getBenchmarkConfigPath(repoRoot);
|
|
85
|
+
const fileConfig = loadJsonConfigFile(configPath);
|
|
86
|
+
const homeEnv = loadHomeEnvVars(homeEnvPath);
|
|
87
|
+
const getShellOverride = (key) => env[key];
|
|
88
|
+
const getSecret = (key) => firstDefined(env[key], homeEnv[key]);
|
|
89
|
+
const provider = (firstDefined(getShellOverride("MODEL_PROVIDER"), fileConfig.modelProvider, "openai-compatible") ?? "openai-compatible");
|
|
90
|
+
const model = firstDefined(getShellOverride("MODEL"), fileConfig.model, "test-model") ?? "test-model";
|
|
91
|
+
const openAiBaseUrl = firstDefined(getShellOverride("OPENAI_BASE_URL"), fileConfig.openAiBaseUrl, "http://localhost:1234/v1") ?? "http://localhost:1234/v1";
|
|
92
|
+
const openAiApiKey = provider === "openai-compatible"
|
|
93
|
+
? (openAiBaseUrl.includes("openrouter.ai")
|
|
94
|
+
? firstDefined(getSecret("OPENROUTER_API_KEY"), getSecret("OPENAI_API_KEY"))
|
|
95
|
+
: getSecret("OPENAI_API_KEY"))
|
|
96
|
+
: undefined;
|
|
54
97
|
return {
|
|
55
98
|
modelProvider: provider,
|
|
56
99
|
model,
|
|
57
|
-
maxSteps:
|
|
58
|
-
maxTokens:
|
|
59
|
-
maxContextTokens:
|
|
60
|
-
workspaceRoot:
|
|
61
|
-
commandTimeoutMs:
|
|
62
|
-
maxFileSizeBytes:
|
|
100
|
+
maxSteps: getNumberSetting(getShellOverride("MAX_STEPS"), fileConfig.maxSteps, 50),
|
|
101
|
+
maxTokens: getNumberSetting(getShellOverride("MAX_TOKENS"), fileConfig.maxTokens, 4096),
|
|
102
|
+
maxContextTokens: getNumberSetting(getShellOverride("MAX_CONTEXT_TOKENS"), fileConfig.maxContextTokens, 32000),
|
|
103
|
+
workspaceRoot: repoRoot,
|
|
104
|
+
commandTimeoutMs: getNumberSetting(getShellOverride("COMMAND_TIMEOUT_MS"), fileConfig.commandTimeoutMs, 30000),
|
|
105
|
+
maxFileSizeBytes: getNumberSetting(getShellOverride("MAX_FILE_SIZE_BYTES"), fileConfig.maxFileSizeBytes, 1000000),
|
|
63
106
|
commandDenylist: [],
|
|
64
107
|
confirmDestructive: false,
|
|
65
|
-
keepRecentMessages:
|
|
66
|
-
loopDetectionWindow:
|
|
67
|
-
maxToolOutputChars:
|
|
68
|
-
openAiBaseUrl
|
|
69
|
-
...(
|
|
108
|
+
keepRecentMessages: getNumberSetting(getShellOverride("KEEP_RECENT_MESSAGES"), fileConfig.keepRecentMessages, 12),
|
|
109
|
+
loopDetectionWindow: getNumberSetting(getShellOverride("LOOP_DETECTION_WINDOW"), fileConfig.loopDetectionWindow, 6),
|
|
110
|
+
maxToolOutputChars: getNumberSetting(getShellOverride("MAX_TOOL_OUTPUT_CHARS"), fileConfig.maxToolOutputChars, 8000),
|
|
111
|
+
openAiBaseUrl,
|
|
112
|
+
...(openAiApiKey ? { openAiApiKey } : {}),
|
|
70
113
|
};
|
|
71
114
|
}
|
|
72
115
|
/* ------------------------------------------------------------------ */
|
|
@@ -94,8 +137,9 @@ export async function loadTasks(tasksDir, args) {
|
|
|
94
137
|
/* ------------------------------------------------------------------ */
|
|
95
138
|
async function main() {
|
|
96
139
|
const args = parseArgs(process.argv.slice(2));
|
|
97
|
-
const
|
|
98
|
-
const
|
|
140
|
+
const repoRoot = process.cwd();
|
|
141
|
+
const config = buildConfig({ repoRoot });
|
|
142
|
+
const tasksDir = path.resolve(repoRoot, "benchmarks", "tasks");
|
|
99
143
|
console.log(`Benchmark runner starting...`);
|
|
100
144
|
console.log(` Provider: ${config.modelProvider}`);
|
|
101
145
|
console.log(` Model: ${config.model}`);
|
|
@@ -104,19 +148,20 @@ async function main() {
|
|
|
104
148
|
console.log(` Tasks: ${tasks.length}`);
|
|
105
149
|
console.log("");
|
|
106
150
|
const modelClient = createModelClient(config);
|
|
107
|
-
const tools = [
|
|
108
|
-
createReadFileTool(config),
|
|
109
|
-
createWriteFileTool(config),
|
|
110
|
-
createEditFileTool(config),
|
|
111
|
-
createSearchTool(config),
|
|
112
|
-
createListFilesTool(config),
|
|
113
|
-
createRunCommandTool(config),
|
|
114
|
-
];
|
|
115
151
|
const traces = await runBenchmarkSuite(tasks, {
|
|
116
152
|
modelClient,
|
|
117
153
|
config,
|
|
118
|
-
tools,
|
|
119
154
|
variant: args.variant,
|
|
155
|
+
repoRoot,
|
|
156
|
+
isolateWorkspace: true,
|
|
157
|
+
createToolset: async (taskConfig) => {
|
|
158
|
+
const projectIndex = await buildProjectIndex(taskConfig.workspaceRoot);
|
|
159
|
+
const toolRegistry = createToolRegistry(taskConfig, projectIndex);
|
|
160
|
+
return {
|
|
161
|
+
tools: toolRegistry.getDefinitions(),
|
|
162
|
+
projectIndex,
|
|
163
|
+
};
|
|
164
|
+
},
|
|
120
165
|
onTaskComplete: (taskId, trace) => {
|
|
121
166
|
const dur = (trace.durationMs / 1000).toFixed(1);
|
|
122
167
|
console.log(` [done] ${taskId} (${dur}s, ${trace.toolCalls.length} tool calls)`);
|
|
@@ -5,7 +5,17 @@
|
|
|
5
5
|
* over tool-call instrumentation and trace capture.
|
|
6
6
|
*/
|
|
7
7
|
import { execSync } from "node:child_process";
|
|
8
|
+
import { cp, mkdtemp, rm } from "node:fs/promises";
|
|
9
|
+
import { tmpdir } from "node:os";
|
|
10
|
+
import path from "node:path";
|
|
8
11
|
import { CodingAgent, Session, ToolRegistry, } from "@minicode/agent-sdk";
|
|
12
|
+
const COPY_SKIP_NAMES = new Set([
|
|
13
|
+
".git",
|
|
14
|
+
"node_modules",
|
|
15
|
+
"dist",
|
|
16
|
+
"build",
|
|
17
|
+
"coverage",
|
|
18
|
+
]);
|
|
9
19
|
function getGitCommitSha() {
|
|
10
20
|
try {
|
|
11
21
|
return execSync("git rev-parse HEAD", { encoding: "utf8" }).trim();
|
|
@@ -19,73 +29,146 @@ const STRUCTURAL_TOOLS = new Set([
|
|
|
19
29
|
"find_references",
|
|
20
30
|
"get_dependencies",
|
|
21
31
|
"search_code_map",
|
|
32
|
+
"find_path",
|
|
22
33
|
]);
|
|
34
|
+
function sanitizeTaskId(taskId) {
|
|
35
|
+
return taskId.replace(/[^a-z0-9-_]+/gi, "-");
|
|
36
|
+
}
|
|
37
|
+
function resolveSourceWorkspaceRoot(task, options) {
|
|
38
|
+
if (!task.workspaceRoot) {
|
|
39
|
+
return path.resolve(options.config.workspaceRoot);
|
|
40
|
+
}
|
|
41
|
+
const repoRoot = path.resolve(options.repoRoot ?? process.cwd());
|
|
42
|
+
return path.resolve(repoRoot, task.workspaceRoot);
|
|
43
|
+
}
|
|
44
|
+
function shouldCopyPath(src) {
|
|
45
|
+
const name = path.basename(src);
|
|
46
|
+
return !COPY_SKIP_NAMES.has(name);
|
|
47
|
+
}
|
|
48
|
+
async function prepareTaskWorkspace(task, options) {
|
|
49
|
+
const sourceWorkspaceRoot = resolveSourceWorkspaceRoot(task, options);
|
|
50
|
+
if (options.isolateWorkspace === false) {
|
|
51
|
+
return {
|
|
52
|
+
sourceWorkspaceRoot,
|
|
53
|
+
workspaceRoot: sourceWorkspaceRoot,
|
|
54
|
+
cleanup: async () => { },
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
const tempRoot = await mkdtemp(path.join(tmpdir(), "minicode-benchmark-"));
|
|
58
|
+
const isolatedWorkspaceRoot = path.join(tempRoot, sanitizeTaskId(task.id));
|
|
59
|
+
await cp(sourceWorkspaceRoot, isolatedWorkspaceRoot, {
|
|
60
|
+
recursive: true,
|
|
61
|
+
filter: shouldCopyPath,
|
|
62
|
+
});
|
|
63
|
+
return {
|
|
64
|
+
sourceWorkspaceRoot,
|
|
65
|
+
workspaceRoot: isolatedWorkspaceRoot,
|
|
66
|
+
cleanup: async () => {
|
|
67
|
+
await rm(tempRoot, { recursive: true, force: true });
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
function getTrackedSymbolNames(toolName, input) {
|
|
72
|
+
if (toolName === "find_path") {
|
|
73
|
+
const names = [input.from, input.to]
|
|
74
|
+
.filter((value) => typeof value === "string" && value.length > 0);
|
|
75
|
+
return [...new Set(names)];
|
|
76
|
+
}
|
|
77
|
+
const name = input.symbol ?? input.symbolName ?? input.name ?? input.query;
|
|
78
|
+
return typeof name === "string" && name.length > 0 ? [name] : [];
|
|
79
|
+
}
|
|
80
|
+
function trackStructuralFileReads(toolName, projectIndex, input, filesRead) {
|
|
81
|
+
if (!projectIndex || !STRUCTURAL_TOOLS.has(toolName)) {
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
for (const symbolName of getTrackedSymbolNames(toolName, input)) {
|
|
85
|
+
const symbol = projectIndex.getSymbol(symbolName);
|
|
86
|
+
if (symbol) {
|
|
87
|
+
filesRead.add(symbol.filePath);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
23
91
|
/**
|
|
24
92
|
* Run a single benchmark task and return the captured trace.
|
|
25
93
|
*/
|
|
26
94
|
export async function runBenchmarkTask(task, options) {
|
|
95
|
+
const workspace = await prepareTaskWorkspace(task, options);
|
|
27
96
|
const captured = [];
|
|
28
97
|
const filesRead = new Set();
|
|
29
98
|
const symbolsQueried = new Set();
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
99
|
+
try {
|
|
100
|
+
const taskConfig = {
|
|
101
|
+
...options.config,
|
|
102
|
+
workspaceRoot: workspace.workspaceRoot,
|
|
103
|
+
};
|
|
104
|
+
const toolset = options.createToolset
|
|
105
|
+
? await options.createToolset(taskConfig, task)
|
|
106
|
+
: { tools: options.tools ?? [] };
|
|
107
|
+
// Wrap each tool to capture calls
|
|
108
|
+
const instrumentedTools = toolset.tools.map((tool) => ({
|
|
109
|
+
...tool,
|
|
110
|
+
execute: async (input) => {
|
|
111
|
+
const start = performance.now();
|
|
112
|
+
const output = await tool.execute(input);
|
|
113
|
+
const durationMs = performance.now() - start;
|
|
114
|
+
captured.push({
|
|
115
|
+
name: tool.name,
|
|
116
|
+
input,
|
|
117
|
+
output: output.length > 2000 ? output.slice(0, 2000) + "…[truncated]" : output,
|
|
118
|
+
durationMs,
|
|
119
|
+
});
|
|
120
|
+
if (tool.name === "read_file") {
|
|
121
|
+
const filePath = input.path ?? input.file_path ?? input.filePath;
|
|
122
|
+
if (typeof filePath === "string") {
|
|
123
|
+
filesRead.add(filePath);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
trackStructuralFileReads(tool.name, toolset.projectIndex, input, filesRead);
|
|
127
|
+
if (STRUCTURAL_TOOLS.has(tool.name)) {
|
|
128
|
+
for (const symbolName of getTrackedSymbolNames(tool.name, input)) {
|
|
129
|
+
symbolsQueried.add(symbolName);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return output;
|
|
133
|
+
},
|
|
134
|
+
}));
|
|
135
|
+
const registry = new ToolRegistry(instrumentedTools);
|
|
136
|
+
const session = new Session();
|
|
137
|
+
const agent = new CodingAgent({
|
|
138
|
+
config: taskConfig,
|
|
139
|
+
modelClient: options.modelClient,
|
|
140
|
+
toolRegistry: registry,
|
|
141
|
+
session,
|
|
142
|
+
});
|
|
143
|
+
const startedAt = new Date().toISOString();
|
|
144
|
+
const start = performance.now();
|
|
145
|
+
const { text, usage } = await agent.runTurn(task.prompt);
|
|
146
|
+
const durationMs = performance.now() - start;
|
|
147
|
+
const trace = {
|
|
148
|
+
taskId: task.id,
|
|
149
|
+
model: taskConfig.model,
|
|
150
|
+
variant: options.variant,
|
|
151
|
+
commitSha: getGitCommitSha(),
|
|
152
|
+
sourceWorkspaceRoot: workspace.sourceWorkspaceRoot,
|
|
153
|
+
workspaceRoot: workspace.workspaceRoot,
|
|
154
|
+
response: text,
|
|
155
|
+
toolCalls: captured,
|
|
156
|
+
filesRead: [...filesRead],
|
|
157
|
+
symbolsQueried: [...symbolsQueried],
|
|
158
|
+
usage: {
|
|
159
|
+
inputTokens: usage?.inputTokens ?? 0,
|
|
160
|
+
outputTokens: usage?.outputTokens ?? 0,
|
|
161
|
+
totalTokens: (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0),
|
|
162
|
+
},
|
|
163
|
+
durationMs,
|
|
164
|
+
startedAt,
|
|
165
|
+
};
|
|
166
|
+
options.onTaskComplete?.(task.id, trace);
|
|
167
|
+
return trace;
|
|
168
|
+
}
|
|
169
|
+
finally {
|
|
170
|
+
await workspace.cleanup();
|
|
171
|
+
}
|
|
89
172
|
}
|
|
90
173
|
/**
|
|
91
174
|
* Run all provided benchmark tasks sequentially.
|
|
@@ -48,22 +48,26 @@ function buildAdjacencyTo(edges) {
|
|
|
48
48
|
}
|
|
49
49
|
return map;
|
|
50
50
|
}
|
|
51
|
-
function
|
|
51
|
+
function resolveSymbols(name, symbols) {
|
|
52
52
|
const direct = symbols.get(name);
|
|
53
53
|
if (direct)
|
|
54
|
-
return direct;
|
|
54
|
+
return [direct];
|
|
55
55
|
const matches = [...symbols.values()].filter((sym) => getSymbolLookupNames(sym).includes(name));
|
|
56
56
|
if (matches.length === 0) {
|
|
57
|
-
return
|
|
57
|
+
return [];
|
|
58
58
|
}
|
|
59
59
|
matches.sort((a, b) => Number(b.exported) - Number(a.exported) ||
|
|
60
60
|
a.filePath.localeCompare(b.filePath) ||
|
|
61
61
|
a.startLine - b.startLine ||
|
|
62
62
|
a.qualifiedName.localeCompare(b.qualifiedName));
|
|
63
|
-
return matches
|
|
63
|
+
return matches;
|
|
64
|
+
}
|
|
65
|
+
function resolveSymbol(name, symbols) {
|
|
66
|
+
return resolveSymbols(name, symbols)[0];
|
|
64
67
|
}
|
|
65
68
|
export function createProjectIndex(symbols, files, dependencyEdges, plugins, projectFiles, workspaceRoot) {
|
|
66
69
|
let adjacencyFrom = buildAdjacencyFrom(dependencyEdges);
|
|
70
|
+
const root = path.resolve(workspaceRoot);
|
|
67
71
|
function rebuildSymbolsMap() {
|
|
68
72
|
const normalizedSymbols = normalizeIndexedSymbols(files);
|
|
69
73
|
symbols.clear();
|
|
@@ -71,6 +75,42 @@ export function createProjectIndex(symbols, files, dependencyEdges, plugins, pro
|
|
|
71
75
|
symbols.set(qualifiedName, symbol);
|
|
72
76
|
}
|
|
73
77
|
}
|
|
78
|
+
function rebuildDependencyEdges() {
|
|
79
|
+
for (const p of plugins) {
|
|
80
|
+
if (p.resolveDependencies) {
|
|
81
|
+
const allSymbols = [...symbols.values()];
|
|
82
|
+
const edges = p.resolveDependencies(allSymbols, projectFiles);
|
|
83
|
+
dependencyEdges.splice(0, dependencyEdges.length, ...edges);
|
|
84
|
+
adjacencyFrom = buildAdjacencyFrom(dependencyEdges);
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
async function refreshFromWorkspace() {
|
|
90
|
+
const validExtensions = new Set(plugins.flatMap((p) => p.extensions));
|
|
91
|
+
const sourceFiles = [];
|
|
92
|
+
await collectSourceFiles(root, root, sourceFiles, validExtensions);
|
|
93
|
+
files.clear();
|
|
94
|
+
projectFiles.clear();
|
|
95
|
+
for (const relPath of sourceFiles) {
|
|
96
|
+
const plugin = getPluginForFile(relPath, plugins);
|
|
97
|
+
if (!plugin)
|
|
98
|
+
continue;
|
|
99
|
+
const absPath = path.join(root, relPath);
|
|
100
|
+
let content;
|
|
101
|
+
try {
|
|
102
|
+
content = await readFile(absPath, "utf8");
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
projectFiles.set(relPath, content);
|
|
108
|
+
const extracted = plugin.indexFile(relPath, content);
|
|
109
|
+
files.set(relPath, extracted);
|
|
110
|
+
}
|
|
111
|
+
rebuildSymbolsMap();
|
|
112
|
+
rebuildDependencyEdges();
|
|
113
|
+
}
|
|
74
114
|
return {
|
|
75
115
|
symbols,
|
|
76
116
|
files,
|
|
@@ -81,6 +121,9 @@ export function createProjectIndex(symbols, files, dependencyEdges, plugins, pro
|
|
|
81
121
|
getSymbol(name) {
|
|
82
122
|
return resolveSymbol(name, symbols);
|
|
83
123
|
},
|
|
124
|
+
getSymbolMatches(name) {
|
|
125
|
+
return resolveSymbols(name, symbols);
|
|
126
|
+
},
|
|
84
127
|
getSymbolsInFile(filePath) {
|
|
85
128
|
return files.get(filePath) ?? [];
|
|
86
129
|
},
|
|
@@ -226,16 +269,9 @@ export function createProjectIndex(symbols, files, dependencyEdges, plugins, pro
|
|
|
226
269
|
const extracted = plugin.indexFile(relPath, content);
|
|
227
270
|
files.set(relPath, extracted);
|
|
228
271
|
rebuildSymbolsMap();
|
|
229
|
-
|
|
230
|
-
if (p.resolveDependencies) {
|
|
231
|
-
const allSymbols = [...symbols.values()];
|
|
232
|
-
const edges = p.resolveDependencies(allSymbols, projectFiles);
|
|
233
|
-
dependencyEdges.splice(0, dependencyEdges.length, ...edges);
|
|
234
|
-
adjacencyFrom = buildAdjacencyFrom(dependencyEdges);
|
|
235
|
-
break;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
272
|
+
rebuildDependencyEdges();
|
|
238
273
|
},
|
|
274
|
+
refreshFromWorkspace,
|
|
239
275
|
};
|
|
240
276
|
}
|
|
241
277
|
/**
|
|
@@ -276,10 +276,18 @@ export class AgentBridge {
|
|
|
276
276
|
return undefined;
|
|
277
277
|
return this.projectIndex.getSymbol(name);
|
|
278
278
|
}
|
|
279
|
+
getSymbolMatches(name) {
|
|
280
|
+
if (!this.projectIndex)
|
|
281
|
+
return [];
|
|
282
|
+
return this.projectIndex.getSymbolMatches(name);
|
|
283
|
+
}
|
|
279
284
|
getDependencies(symbolName, depth) {
|
|
280
285
|
if (!this.projectIndex)
|
|
281
286
|
return undefined;
|
|
282
|
-
const
|
|
287
|
+
const matches = this.projectIndex.getSymbolMatches(symbolName);
|
|
288
|
+
if (matches.length !== 1)
|
|
289
|
+
return undefined;
|
|
290
|
+
const cone = this.projectIndex.getDependencyCone(matches[0].qualifiedName, depth);
|
|
283
291
|
if (cone.length === 0)
|
|
284
292
|
return undefined;
|
|
285
293
|
return cone.map((sym) => ({
|
|
@@ -293,9 +301,10 @@ export class AgentBridge {
|
|
|
293
301
|
getReferences(symbolName) {
|
|
294
302
|
if (!this.projectIndex)
|
|
295
303
|
return undefined;
|
|
296
|
-
const
|
|
297
|
-
if (
|
|
304
|
+
const matches = this.projectIndex.getSymbolMatches(symbolName);
|
|
305
|
+
if (matches.length !== 1)
|
|
298
306
|
return undefined;
|
|
307
|
+
const sym = matches[0];
|
|
299
308
|
// Find all edges pointing TO this symbol
|
|
300
309
|
const refs = this.projectIndex.dependencyEdges
|
|
301
310
|
.filter((e) => e.to === sym.qualifiedName || e.to === sym.name)
|