@sean.holung/minicode 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/agent/config.js +11 -1
- package/dist/src/benchmark/config.js +15 -1
- package/dist/src/benchmark/index.js +1 -1
- package/dist/src/benchmark/workspace-changes.js +93 -12
- package/dist/src/cli/benchmark-run.js +285 -12
- package/dist/src/cli/contextbench-trajectory.js +258 -0
- package/dist/src/web/app.js +1 -1
- package/dist/src/web/favicon.ico +0 -0
- package/dist/src/web/favicon.svg +9 -0
- package/dist/src/web/index.html +2 -0
- package/dist/tests/agent.test.js +17 -21
- package/dist/tests/benchmark-run.test.js +223 -2
- package/dist/tests/contextbench-trajectory.test.js +228 -0
- package/dist/tests/graph-onboarding.test.js +20 -0
- package/dist/tests/model-client-openai.test.js +3 -2
- package/dist/tests/package-metadata.test.js +9 -0
- package/dist/tests/reasoning-effort.test.js +83 -0
- package/dist/tests/workspace-changes.test.js +50 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/agent.d.ts +13 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/agent.d.ts.map +1 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/agent.js +148 -25
- package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/agent.js.map +1 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/types.d.ts +62 -0
- package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/types.d.ts.map +1 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/types.js.map +1 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/model/client.d.ts +5 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/model/client.d.ts.map +1 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/model/client.js +88 -8
- package/node_modules/@sean.holung/minicode-sdk/dist/src/model/client.js.map +1 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/prompt/system-prompt.d.ts.map +1 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/prompt/system-prompt.js +1 -1
- package/node_modules/@sean.holung/minicode-sdk/dist/src/prompt/system-prompt.js.map +1 -1
- package/node_modules/minicode-plugin-python/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { test } from "node:test";
|
|
3
|
+
import { buildContextBenchTrajectory, parsePatchSpans, } from "../src/cli/contextbench-trajectory.js";
|
|
4
|
+
function trace(step, name, input, result = null, skipped = false) {
|
|
5
|
+
return { step, name, input, result, skipped };
|
|
6
|
+
}
|
|
7
|
+
test("read_file with explicit offset and limit becomes a tight explore_context span", () => {
|
|
8
|
+
const trajectory = buildContextBenchTrajectory({
|
|
9
|
+
systemPrompt: "system",
|
|
10
|
+
userPrompt: "Fix the bug.",
|
|
11
|
+
toolCalls: [trace(1, "read_file", { path: "app/main.py", offset: 10, limit: 30 })],
|
|
12
|
+
finalAssistantText: "Done.",
|
|
13
|
+
workspaceRoot: "/workspace",
|
|
14
|
+
patch: "",
|
|
15
|
+
});
|
|
16
|
+
const explore = trajectory.messages.find((m) => m.role === "assistant" && m.content.includes("<explore_context>"));
|
|
17
|
+
assert.ok(explore, "should emit one explore_context message");
|
|
18
|
+
assert.match(explore.content, /File: app\/main\.py/);
|
|
19
|
+
// offset=10, limit=30 → lines 10..39 (start + limit - 1)
|
|
20
|
+
assert.match(explore.content, /Lines: 10-39/);
|
|
21
|
+
});
|
|
22
|
+
test("read_file without offset/limit prefers the result's last line-number prefix", () => {
|
|
23
|
+
const numberedContent = Array.from({ length: 5 }, (_, i) => `${i + 1}|line ${i + 1}`).join("\n") + "\n";
|
|
24
|
+
const trajectory = buildContextBenchTrajectory({
|
|
25
|
+
systemPrompt: "system",
|
|
26
|
+
userPrompt: "Fix the bug.",
|
|
27
|
+
toolCalls: [trace(1, "read_file", { path: "app/foo.py" }, numberedContent)],
|
|
28
|
+
finalAssistantText: "Done.",
|
|
29
|
+
workspaceRoot: "/workspace",
|
|
30
|
+
patch: "",
|
|
31
|
+
});
|
|
32
|
+
const explore = trajectory.messages.find((m) => m.content.includes("<explore_context>"));
|
|
33
|
+
assert.ok(explore);
|
|
34
|
+
assert.match(explore.content, /File: app\/foo\.py\nLines: 1-5/);
|
|
35
|
+
});
|
|
36
|
+
test("read_file with unparseable result and no offset/limit is omitted", () => {
|
|
37
|
+
const trajectory = buildContextBenchTrajectory({
|
|
38
|
+
systemPrompt: "system",
|
|
39
|
+
userPrompt: "Fix.",
|
|
40
|
+
toolCalls: [trace(1, "read_file", { path: "x.py" }, "no line numbers here")],
|
|
41
|
+
finalAssistantText: "Done.",
|
|
42
|
+
workspaceRoot: "/workspace",
|
|
43
|
+
patch: "",
|
|
44
|
+
});
|
|
45
|
+
// Only the final PATCH_CONTEXT assistant message should be present;
|
|
46
|
+
// no explore_context for this step because we couldn't bound it safely.
|
|
47
|
+
const exploreMessages = trajectory.messages.filter((m) => m.content.includes("<explore_context>"));
|
|
48
|
+
assert.equal(exploreMessages.length, 0);
|
|
49
|
+
});
|
|
50
|
+
test("read_symbol consults the project index to derive file+lines", () => {
|
|
51
|
+
const stubIndex = {
|
|
52
|
+
getSymbol: (name) => name === "Foo.bar"
|
|
53
|
+
? {
|
|
54
|
+
name: "bar",
|
|
55
|
+
qualifiedName: "Foo.bar",
|
|
56
|
+
kind: "method",
|
|
57
|
+
filePath: "src/foo.ts",
|
|
58
|
+
startLine: 42,
|
|
59
|
+
endLine: 71,
|
|
60
|
+
signature: "bar()",
|
|
61
|
+
exported: false,
|
|
62
|
+
dependencies: [],
|
|
63
|
+
}
|
|
64
|
+
: undefined,
|
|
65
|
+
getSymbolMatches: () => [],
|
|
66
|
+
dependencyEdges: [],
|
|
67
|
+
getDependencyCone: () => [],
|
|
68
|
+
};
|
|
69
|
+
const trajectory = buildContextBenchTrajectory({
|
|
70
|
+
systemPrompt: "system",
|
|
71
|
+
userPrompt: "Fix.",
|
|
72
|
+
toolCalls: [trace(1, "read_symbol", { name: "Foo.bar" })],
|
|
73
|
+
finalAssistantText: "Done.",
|
|
74
|
+
workspaceRoot: "/workspace",
|
|
75
|
+
patch: "",
|
|
76
|
+
...(stubIndex !== undefined ? { projectIndex: stubIndex } : {}),
|
|
77
|
+
});
|
|
78
|
+
const explore = trajectory.messages.find((m) => m.content.includes("<explore_context>"));
|
|
79
|
+
assert.ok(explore);
|
|
80
|
+
assert.match(explore.content, /File: src\/foo\.ts\nLines: 42-71/);
|
|
81
|
+
});
|
|
82
|
+
test("find_references emits a span per incoming-edge source symbol", () => {
|
|
83
|
+
const symbols = {
|
|
84
|
+
"Foo.bar": {
|
|
85
|
+
name: "bar",
|
|
86
|
+
qualifiedName: "Foo.bar",
|
|
87
|
+
kind: "method",
|
|
88
|
+
filePath: "src/foo.ts",
|
|
89
|
+
startLine: 10,
|
|
90
|
+
endLine: 20,
|
|
91
|
+
signature: "",
|
|
92
|
+
exported: false,
|
|
93
|
+
dependencies: [],
|
|
94
|
+
},
|
|
95
|
+
callerOne: {
|
|
96
|
+
name: "callerOne",
|
|
97
|
+
qualifiedName: "callerOne",
|
|
98
|
+
kind: "function",
|
|
99
|
+
filePath: "src/caller-one.ts",
|
|
100
|
+
startLine: 100,
|
|
101
|
+
endLine: 110,
|
|
102
|
+
signature: "",
|
|
103
|
+
exported: false,
|
|
104
|
+
dependencies: [],
|
|
105
|
+
},
|
|
106
|
+
callerTwo: {
|
|
107
|
+
name: "callerTwo",
|
|
108
|
+
qualifiedName: "callerTwo",
|
|
109
|
+
kind: "function",
|
|
110
|
+
filePath: "src/caller-two.ts",
|
|
111
|
+
startLine: 200,
|
|
112
|
+
endLine: 220,
|
|
113
|
+
signature: "",
|
|
114
|
+
exported: false,
|
|
115
|
+
dependencies: [],
|
|
116
|
+
},
|
|
117
|
+
};
|
|
118
|
+
const stubIndex = {
|
|
119
|
+
getSymbol: (name) => symbols[name],
|
|
120
|
+
getSymbolMatches: () => [],
|
|
121
|
+
dependencyEdges: [
|
|
122
|
+
{ from: "callerOne", to: "Foo.bar", kind: "calls" },
|
|
123
|
+
{ from: "callerTwo", to: "Foo.bar", kind: "calls" },
|
|
124
|
+
],
|
|
125
|
+
getDependencyCone: () => [],
|
|
126
|
+
};
|
|
127
|
+
const trajectory = buildContextBenchTrajectory({
|
|
128
|
+
systemPrompt: "system",
|
|
129
|
+
userPrompt: "Fix.",
|
|
130
|
+
toolCalls: [trace(1, "find_references", { name: "Foo.bar" })],
|
|
131
|
+
finalAssistantText: "Done.",
|
|
132
|
+
workspaceRoot: "/workspace",
|
|
133
|
+
patch: "",
|
|
134
|
+
...(stubIndex !== undefined ? { projectIndex: stubIndex } : {}),
|
|
135
|
+
});
|
|
136
|
+
const explore = trajectory.messages.find((m) => m.content.includes("<explore_context>"));
|
|
137
|
+
assert.ok(explore);
|
|
138
|
+
assert.match(explore.content, /File: src\/caller-one\.ts\nLines: 100-110/);
|
|
139
|
+
assert.match(explore.content, /File: src\/caller-two\.ts\nLines: 200-220/);
|
|
140
|
+
});
|
|
141
|
+
test("PATCH_CONTEXT is computed from the unified diff's new-file hunk ranges", () => {
|
|
142
|
+
const patch = [
|
|
143
|
+
"diff --git a/app/main.py b/app/main.py",
|
|
144
|
+
"--- a/app/main.py",
|
|
145
|
+
"+++ b/app/main.py",
|
|
146
|
+
"@@ -10,5 +12,7 @@",
|
|
147
|
+
" unchanged",
|
|
148
|
+
"-removed",
|
|
149
|
+
"+added",
|
|
150
|
+
"+added 2",
|
|
151
|
+
"diff --git a/app/util.py b/app/util.py",
|
|
152
|
+
"--- a/app/util.py",
|
|
153
|
+
"+++ b/app/util.py",
|
|
154
|
+
"@@ -1,3 +1,4 @@",
|
|
155
|
+
"+new helper",
|
|
156
|
+
" a",
|
|
157
|
+
" b",
|
|
158
|
+
" c",
|
|
159
|
+
].join("\n");
|
|
160
|
+
const trajectory = buildContextBenchTrajectory({
|
|
161
|
+
systemPrompt: "system",
|
|
162
|
+
userPrompt: "Fix.",
|
|
163
|
+
toolCalls: [],
|
|
164
|
+
finalAssistantText: "Done.",
|
|
165
|
+
workspaceRoot: "/workspace",
|
|
166
|
+
patch,
|
|
167
|
+
});
|
|
168
|
+
const final = trajectory.messages[trajectory.messages.length - 1];
|
|
169
|
+
assert.match(final.content, /<PATCH_CONTEXT>/);
|
|
170
|
+
// 12,7 → new lines 12..18
|
|
171
|
+
assert.match(final.content, /File: app\/main\.py\nLines: 12-18/);
|
|
172
|
+
// 1,4 → new lines 1..4
|
|
173
|
+
assert.match(final.content, /File: app\/util\.py\nLines: 1-4/);
|
|
174
|
+
assert.equal(trajectory.info.submission, patch);
|
|
175
|
+
});
|
|
176
|
+
test("skipped tool calls do not contribute spans (e.g. loop-guard nudges)", () => {
|
|
177
|
+
const trajectory = buildContextBenchTrajectory({
|
|
178
|
+
systemPrompt: "system",
|
|
179
|
+
userPrompt: "Fix.",
|
|
180
|
+
toolCalls: [
|
|
181
|
+
trace(1, "read_file", { path: "x.py", offset: 1, limit: 10 }, "ok", false),
|
|
182
|
+
trace(2, "read_file", { path: "x.py", offset: 1, limit: 10 }, "[loop guard: ...]", true),
|
|
183
|
+
],
|
|
184
|
+
finalAssistantText: "Done.",
|
|
185
|
+
workspaceRoot: "/workspace",
|
|
186
|
+
patch: "",
|
|
187
|
+
});
|
|
188
|
+
const exploreMessages = trajectory.messages.filter((m) => m.content.includes("<explore_context>"));
|
|
189
|
+
// Only the first (non-skipped) call should produce an explore_context.
|
|
190
|
+
assert.equal(exploreMessages.length, 1);
|
|
191
|
+
});
|
|
192
|
+
test("messages always lead with system + user roles", () => {
|
|
193
|
+
const trajectory = buildContextBenchTrajectory({
|
|
194
|
+
systemPrompt: "you are an agent",
|
|
195
|
+
userPrompt: "fix the issue",
|
|
196
|
+
toolCalls: [],
|
|
197
|
+
finalAssistantText: "Done.",
|
|
198
|
+
workspaceRoot: "/workspace",
|
|
199
|
+
patch: "",
|
|
200
|
+
});
|
|
201
|
+
assert.equal(trajectory.messages[0]?.role, "system");
|
|
202
|
+
assert.equal(trajectory.messages[0]?.content, "you are an agent");
|
|
203
|
+
assert.equal(trajectory.messages[1]?.role, "user");
|
|
204
|
+
assert.equal(trajectory.messages[1]?.content, "fix the issue");
|
|
205
|
+
});
|
|
206
|
+
test("parsePatchSpans handles +N,0 hunks (deletion-only at line N)", () => {
|
|
207
|
+
const patch = [
|
|
208
|
+
"diff --git a/a.py b/a.py",
|
|
209
|
+
"--- a/a.py",
|
|
210
|
+
"+++ b/a.py",
|
|
211
|
+
"@@ -5,3 +5,0 @@",
|
|
212
|
+
"-removed",
|
|
213
|
+
"-removed",
|
|
214
|
+
"-removed",
|
|
215
|
+
].join("\n");
|
|
216
|
+
const spans = parsePatchSpans(patch);
|
|
217
|
+
// Count=0 means the new file has no lines at this hunk position; degenerate
|
|
218
|
+
// case — we still emit a 1-line span at the starting line so the file is
|
|
219
|
+
// surfaced rather than dropped.
|
|
220
|
+
assert.equal(spans.length, 1);
|
|
221
|
+
assert.equal(spans[0]?.file, "a.py");
|
|
222
|
+
assert.equal(spans[0]?.startLine, 5);
|
|
223
|
+
assert.equal(spans[0]?.endLine, 5);
|
|
224
|
+
});
|
|
225
|
+
test("parsePatchSpans returns empty list for an empty diff", () => {
|
|
226
|
+
assert.deepEqual(parsePatchSpans(""), []);
|
|
227
|
+
assert.deepEqual(parsePatchSpans("\n\n \n"), []);
|
|
228
|
+
});
|
|
@@ -3,6 +3,7 @@ import { test } from 'node:test';
|
|
|
3
3
|
import { readFileSync } from 'node:fs';
|
|
4
4
|
import { join } from 'node:path';
|
|
5
5
|
const distWeb = join(import.meta.dirname, '..', 'dist', 'src', 'web');
|
|
6
|
+
const graphSource = join(import.meta.dirname, '..', 'src', 'web', 'graph.ts');
|
|
6
7
|
test('built CSS contains graph-onboarding styles', () => {
|
|
7
8
|
const css = readFileSync(join(distWeb, 'style.css'), 'utf-8');
|
|
8
9
|
assert.ok(css.includes('.graph-onboarding'), 'CSS should contain .graph-onboarding class');
|
|
@@ -23,11 +24,20 @@ test('built HTML contains #cy graph container', () => {
|
|
|
23
24
|
const html = readFileSync(join(distWeb, 'index.html'), 'utf-8');
|
|
24
25
|
assert.ok(html.includes('id="cy"'), 'HTML should contain the #cy graph container');
|
|
25
26
|
assert.ok(html.includes('id="graph-pane"'), 'HTML should contain the #graph-pane wrapper');
|
|
27
|
+
assert.ok(html.includes('href="/favicon.svg"'), 'HTML should link the app favicon');
|
|
28
|
+
assert.ok(html.includes('href="/favicon.ico"'), 'HTML should link the fallback ICO favicon');
|
|
26
29
|
assert.ok(html.includes('Search symbols or files...'), 'HTML should expose mixed symbol/file search');
|
|
27
30
|
assert.ok(html.includes('id="graph-refresh"'), 'HTML should expose a graph refresh button');
|
|
28
31
|
assert.ok(html.includes('id="file-preview-modal"'), 'HTML should contain the file preview modal shell');
|
|
29
32
|
assert.ok(html.includes('id="file-preview-code"'), 'HTML should contain the file preview code surface');
|
|
30
33
|
});
|
|
34
|
+
test('web build copies the favicon asset', () => {
|
|
35
|
+
const favicon = readFileSync(join(distWeb, 'favicon.svg'), 'utf-8');
|
|
36
|
+
const fallbackFavicon = readFileSync(join(distWeb, 'favicon.ico'));
|
|
37
|
+
assert.ok(favicon.includes('<svg'), 'favicon should be an SVG asset');
|
|
38
|
+
assert.ok(favicon.includes('#38bdf8'), 'favicon should use the minicode graph accent');
|
|
39
|
+
assert.ok(fallbackFavicon.length > 0, 'fallback favicon should be copied');
|
|
40
|
+
});
|
|
31
41
|
test('onboarding hint includes user-facing guidance text in built JS', () => {
|
|
32
42
|
const js = readFileSync(join(distWeb, 'app.js'), 'utf-8');
|
|
33
43
|
assert.ok(js.includes('Code dependency graph'), 'onboarding title should mention the code dependency graph');
|
|
@@ -58,6 +68,16 @@ test('built JS auto-opens symbol details for agent activity and graph search sel
|
|
|
58
68
|
assert.ok(js.includes('openDetail: true'), 'JS should request the detail panel when focusing symbols from agent activity or search');
|
|
59
69
|
assert.ok(js.includes('await showDetail(node, detailEl)'), 'JS should populate the symbol detail panel when focus requests it');
|
|
60
70
|
});
|
|
71
|
+
test('symbol selection keeps the current graph viewport instead of fitting the canvas', () => {
|
|
72
|
+
const source = readFileSync(graphSource, 'utf-8');
|
|
73
|
+
const showDetailStart = source.indexOf('async function showDetail');
|
|
74
|
+
const loadSourceStart = source.indexOf('async function loadSource');
|
|
75
|
+
assert.ok(showDetailStart >= 0, 'graph source should define showDetail');
|
|
76
|
+
assert.ok(loadSourceStart > showDetailStart, 'graph source should define loadSource after showDetail');
|
|
77
|
+
const showDetailBody = source.slice(showDetailStart, loadSourceStart);
|
|
78
|
+
assert.ok(showDetailBody.includes('syncDetailPanelLayout();'), 'showDetail should resize Cytoscape without changing pan/zoom');
|
|
79
|
+
assert.ok(!showDetailBody.includes('syncDetailPanelLayout({ fit: true })'), 'showDetail should not fit the graph when a symbol is selected');
|
|
80
|
+
});
|
|
61
81
|
test('built JS supports file search results and file-centered neighborhood rendering', () => {
|
|
62
82
|
const js = readFileSync(join(distWeb, 'app.js'), 'utf-8');
|
|
63
83
|
assert.ok(js.includes('focusFileInGraph'), 'JS should define a file-focused graph seeding helper');
|
|
@@ -73,7 +73,7 @@ test("openai-compatible client sends tool schemas and parses tool calls", async
|
|
|
73
73
|
const tools = parsedBody.tools;
|
|
74
74
|
assert.equal(tools[0]?.type, "function");
|
|
75
75
|
});
|
|
76
|
-
test("openai-compatible client sends
|
|
76
|
+
test("openai-compatible client sends OpenRouter app attribution headers", async () => {
|
|
77
77
|
let capturedHeaders = {};
|
|
78
78
|
const fetchImpl = async (_input, init) => {
|
|
79
79
|
const rawHeaders = init?.headers;
|
|
@@ -100,7 +100,8 @@ test("openai-compatible client sends correct app URL in HTTP-Referer header", as
|
|
|
100
100
|
maxTokens: 64,
|
|
101
101
|
});
|
|
102
102
|
assert.equal(capturedHeaders["HTTP-Referer"], "https://minicode.seanholung.com", "HTTP-Referer should point to minicode.seanholung.com");
|
|
103
|
-
assert.equal(capturedHeaders["X-Title"], "minicode");
|
|
103
|
+
assert.equal(capturedHeaders["X-OpenRouter-Title"], "minicode");
|
|
104
|
+
assert.equal(capturedHeaders["X-OpenRouter-Categories"], "cli-agent,programming-app");
|
|
104
105
|
});
|
|
105
106
|
test("openai-compatible client repairs missing tool results before sending", async () => {
|
|
106
107
|
let capturedBody = "";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import assert from 'node:assert/strict';
|
|
2
|
+
import { test } from 'node:test';
|
|
3
|
+
import { readFileSync } from 'node:fs';
|
|
4
|
+
import { join } from 'node:path';
|
|
5
|
+
const packageJsonPath = join(import.meta.dirname, '..', 'package.json');
|
|
6
|
+
test('package metadata includes the minicode website for app attribution', () => {
|
|
7
|
+
const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
8
|
+
assert.equal(pkg.homepage, 'https://minicode.seanholung.com');
|
|
9
|
+
});
|
|
@@ -54,6 +54,40 @@ test("loadAgentConfig leaves reasoningEffort undefined when env var is unset", a
|
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
56
|
});
|
|
57
|
+
test("loadAgentConfig parses REASONING_MAX_TOKENS env var", async () => {
|
|
58
|
+
const prev = process.env.REASONING_MAX_TOKENS;
|
|
59
|
+
try {
|
|
60
|
+
process.env.REASONING_MAX_TOKENS = "4000";
|
|
61
|
+
const config = await loadAgentConfig("/tmp");
|
|
62
|
+
assert.equal(config.reasoningMaxTokens, 4000);
|
|
63
|
+
}
|
|
64
|
+
finally {
|
|
65
|
+
if (prev === undefined) {
|
|
66
|
+
delete process.env.REASONING_MAX_TOKENS;
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
process.env.REASONING_MAX_TOKENS = prev;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
test("loadAgentConfig ignores non-positive or invalid REASONING_MAX_TOKENS values", async () => {
|
|
74
|
+
const prev = process.env.REASONING_MAX_TOKENS;
|
|
75
|
+
try {
|
|
76
|
+
for (const bad of ["0", "-100", "abc", ""]) {
|
|
77
|
+
process.env.REASONING_MAX_TOKENS = bad;
|
|
78
|
+
const config = await loadAgentConfig("/tmp");
|
|
79
|
+
assert.equal(config.reasoningMaxTokens, undefined, `expected undefined for input ${JSON.stringify(bad)}`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
finally {
|
|
83
|
+
if (prev === undefined) {
|
|
84
|
+
delete process.env.REASONING_MAX_TOKENS;
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
process.env.REASONING_MAX_TOKENS = prev;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
});
|
|
57
91
|
test("loadAgentConfig normalizes REASONING_EFFORT case", async () => {
|
|
58
92
|
const prev = process.env.REASONING_EFFORT;
|
|
59
93
|
try {
|
|
@@ -262,3 +296,52 @@ test("agent loop omits reasoningEffort when not configured", async () => {
|
|
|
262
296
|
await agent.runTurn("Hello");
|
|
263
297
|
assert.equal(capturedParams.reasoningEffort, undefined);
|
|
264
298
|
});
|
|
299
|
+
// ---------------------------------------------------------------------------
|
|
300
|
+
// Agent loop passes toolChoice to model client
|
|
301
|
+
// ---------------------------------------------------------------------------
|
|
302
|
+
test("agent loop passes toolChoice to model client chat call when set", async () => {
|
|
303
|
+
let capturedToolChoice = "<unset>";
|
|
304
|
+
const config = {
|
|
305
|
+
...createTestAgentConfig("/tmp"),
|
|
306
|
+
toolChoice: "required",
|
|
307
|
+
};
|
|
308
|
+
const mockClient = {
|
|
309
|
+
async chat(params) {
|
|
310
|
+
capturedToolChoice = params.toolChoice;
|
|
311
|
+
return {
|
|
312
|
+
text: "Response",
|
|
313
|
+
toolCalls: [],
|
|
314
|
+
stopReason: "end_turn",
|
|
315
|
+
usage: { inputTokens: 10, outputTokens: 5 },
|
|
316
|
+
};
|
|
317
|
+
},
|
|
318
|
+
};
|
|
319
|
+
const agent = new CodingAgent({
|
|
320
|
+
config,
|
|
321
|
+
modelClient: mockClient,
|
|
322
|
+
toolRegistry: new ToolRegistry([]),
|
|
323
|
+
});
|
|
324
|
+
await agent.runTurn("Hello");
|
|
325
|
+
assert.equal(capturedToolChoice, "required");
|
|
326
|
+
});
|
|
327
|
+
test("agent loop omits toolChoice when not configured", async () => {
|
|
328
|
+
let capturedParams = {};
|
|
329
|
+
const mockClient = {
|
|
330
|
+
async chat(params) {
|
|
331
|
+
capturedParams = params;
|
|
332
|
+
return {
|
|
333
|
+
text: "Response",
|
|
334
|
+
toolCalls: [],
|
|
335
|
+
stopReason: "end_turn",
|
|
336
|
+
usage: { inputTokens: 10, outputTokens: 5 },
|
|
337
|
+
};
|
|
338
|
+
},
|
|
339
|
+
};
|
|
340
|
+
const agent = new CodingAgent({
|
|
341
|
+
config: createTestAgentConfig("/tmp"),
|
|
342
|
+
modelClient: mockClient,
|
|
343
|
+
toolRegistry: new ToolRegistry([]),
|
|
344
|
+
});
|
|
345
|
+
await agent.runTurn("Hello");
|
|
346
|
+
assert.equal(capturedParams.toolChoice, undefined);
|
|
347
|
+
});
|
|
@@ -4,7 +4,7 @@ import { mkdtemp, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import { afterEach, test } from "node:test";
|
|
7
|
-
import { collectWorkspaceChanges, writeWorkspaceDiff, } from "../src/benchmark/workspace-changes.js";
|
|
7
|
+
import { captureBaselineRef, collectWorkspaceChanges, writeWorkspaceDiff, } from "../src/benchmark/workspace-changes.js";
|
|
8
8
|
const tempDirs = [];
|
|
9
9
|
afterEach(async () => {
|
|
10
10
|
await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true })));
|
|
@@ -80,3 +80,52 @@ test("workspace diff only includes files inside the selected workspace subtree",
|
|
|
80
80
|
assert.doesNotMatch(diff, /sibling\.ts/);
|
|
81
81
|
assert.doesNotMatch(diff, /ROOT\.md/);
|
|
82
82
|
});
|
|
83
|
+
test("baseline ref captures committed changes that would otherwise be invisible", async () => {
|
|
84
|
+
// Regression: Gemini-3-Pro ran `git add` + `git commit` mid-task on a
|
|
85
|
+
// benchmark run. The old `git diff` (working-tree vs index) saw nothing
|
|
86
|
+
// and the harness threw away a working fix.
|
|
87
|
+
const workspaceRoot = await createGitWorkspace();
|
|
88
|
+
const baseline = await captureBaselineRef(workspaceRoot);
|
|
89
|
+
assert.ok(baseline && baseline.length >= 7, "baseline ref should be a SHA");
|
|
90
|
+
// Model edits a tracked file and commits, then leaves an untracked helper.
|
|
91
|
+
await writeFile(path.join(workspaceRoot, "src", "app.ts"), "export const value = 2;\n", "utf8");
|
|
92
|
+
execFileSync("git", ["add", "src/app.ts"], { cwd: workspaceRoot, stdio: "ignore" });
|
|
93
|
+
execFileSync("git", ["commit", "-m", "fix value"], { cwd: workspaceRoot, stdio: "ignore" });
|
|
94
|
+
await writeFile(path.join(workspaceRoot, "reproduce.py"), "print('hi')\n", "utf8");
|
|
95
|
+
const withoutBaseline = await collectWorkspaceChanges(workspaceRoot);
|
|
96
|
+
// Without the baseline we miss the committed file entirely.
|
|
97
|
+
assert.deepEqual(withoutBaseline.changedFiles.sort(), ["reproduce.py"]);
|
|
98
|
+
const withBaseline = await collectWorkspaceChanges(workspaceRoot, baseline ?? undefined);
|
|
99
|
+
assert.deepEqual(withBaseline.changedFiles.sort(), ["reproduce.py", "src/app.ts"]);
|
|
100
|
+
const diffPath = path.join(workspaceRoot, "artifacts", "with-baseline.patch");
|
|
101
|
+
const wrote = await writeWorkspaceDiff(workspaceRoot, diffPath, baseline ?? undefined);
|
|
102
|
+
assert.equal(wrote, true);
|
|
103
|
+
const diff = await readFile(diffPath, "utf8");
|
|
104
|
+
assert.match(diff, /diff --git a\/src\/app\.ts b\/src\/app\.ts/);
|
|
105
|
+
assert.match(diff, /-export const value = 1;/);
|
|
106
|
+
assert.match(diff, /\+export const value = 2;/);
|
|
107
|
+
assert.match(diff, /diff --git a\/reproduce\.py b\/reproduce\.py/);
|
|
108
|
+
});
|
|
109
|
+
test("baseline ref also captures staged and unstaged changes (no false negatives)", async () => {
|
|
110
|
+
const workspaceRoot = await createGitWorkspace();
|
|
111
|
+
const baseline = await captureBaselineRef(workspaceRoot);
|
|
112
|
+
// One staged tracked change, one unstaged tracked change, one untracked.
|
|
113
|
+
await mkdir(path.join(workspaceRoot, "src"), { recursive: true });
|
|
114
|
+
await writeFile(path.join(workspaceRoot, "src", "app.ts"), "export const value = 99;\n", "utf8");
|
|
115
|
+
execFileSync("git", ["add", "src/app.ts"], { cwd: workspaceRoot, stdio: "ignore" });
|
|
116
|
+
await writeFile(path.join(workspaceRoot, "src", "app.ts"), "export const value = 100;\n", "utf8");
|
|
117
|
+
await writeFile(path.join(workspaceRoot, "notes.md"), "# notes\n", "utf8");
|
|
118
|
+
const changes = await collectWorkspaceChanges(workspaceRoot, baseline ?? undefined);
|
|
119
|
+
assert.deepEqual(changes.changedFiles.sort(), ["notes.md", "src/app.ts"]);
|
|
120
|
+
const diffPath = path.join(workspaceRoot, "artifacts", "mixed.patch");
|
|
121
|
+
await writeWorkspaceDiff(workspaceRoot, diffPath, baseline ?? undefined);
|
|
122
|
+
const diff = await readFile(diffPath, "utf8");
|
|
123
|
+
assert.match(diff, /\+export const value = 100;/);
|
|
124
|
+
assert.match(diff, /diff --git a\/notes\.md b\/notes\.md/);
|
|
125
|
+
});
|
|
126
|
+
test("captureBaselineRef returns null for a non-git workspace", async () => {
|
|
127
|
+
const workspaceRoot = await mkdtemp(path.join(os.tmpdir(), "minicode-workspace-changes-nongit-"));
|
|
128
|
+
tempDirs.push(workspaceRoot);
|
|
129
|
+
const baseline = await captureBaselineRef(workspaceRoot);
|
|
130
|
+
assert.equal(baseline, null);
|
|
131
|
+
});
|
|
@@ -8,7 +8,10 @@ import type { AgentConfig, BeforeToolCallHook, ModelClient, OutputSchema } from
|
|
|
8
8
|
* Content-aware truncation for tool outputs.
|
|
9
9
|
* Different tools benefit from different truncation strategies:
|
|
10
10
|
* - read_file: No truncation — the model needs exact text for edits
|
|
11
|
-
* - run_command: Keep tail (
|
|
11
|
+
* - run_command: Keep head + tail (50/50). Errors live at the end of
|
|
12
|
+
* normal failures, but pathological cases (infinite loops, runaway
|
|
13
|
+
* recursion, excessive logging) reveal themselves at the start. A
|
|
14
|
+
* tail-heavy split hides the diagnostic bytes for the runaway case.
|
|
12
15
|
* - search: Keep head with a match count footer
|
|
13
16
|
* - default: Keep head (existing behavior)
|
|
14
17
|
*/
|
|
@@ -144,7 +147,16 @@ export declare class CodingAgent {
|
|
|
144
147
|
inputTokens: number;
|
|
145
148
|
outputTokens: number;
|
|
146
149
|
cachedInputTokens?: number;
|
|
150
|
+
reasoningTokens?: number;
|
|
147
151
|
};
|
|
152
|
+
/**
|
|
153
|
+
* Most recent step's reasoning content, when the model provided any
|
|
154
|
+
* (Anthropic extended thinking blocks or OpenRouter `message.reasoning`).
|
|
155
|
+
* Surfaced so callers can include it when re-prompting after a failed
|
|
156
|
+
* attempt — e.g. benchmark mode's retry path can feed the model's own
|
|
157
|
+
* prior thinking back so it sees what it was about to do.
|
|
158
|
+
*/
|
|
159
|
+
reasoningContent?: string;
|
|
148
160
|
streamed?: boolean;
|
|
149
161
|
}>;
|
|
150
162
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../../src/agent/agent.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,OAAO,EAAE,OAAO,EAAE,MAAM,uBAAuB,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EACV,WAAW,EACX,kBAAkB,EAClB,WAAW,EACX,YAAY,EAEb,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../../src/agent/agent.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,OAAO,EAAE,OAAO,EAAE,MAAM,uBAAuB,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EACV,WAAW,EACX,kBAAkB,EAClB,WAAW,EACX,YAAY,EAEb,MAAM,YAAY,CAAC;AA2HpB;;;;;;;;;;GAUG;AACH,wBAAgB,kBAAkB,CAChC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,GACf,MAAM,CAyCR;AAED,MAAM,MAAM,gBAAgB,GAAG;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AACrE,MAAM,MAAM,sBAAsB,GAAG;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAClF,MAAM,MAAM,YAAY,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC,CAAC;AACF,MAAM,MAAM,mBAAmB,GAAG;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AACF,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,gBAAgB,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;CAC1B,CAAC;AACF,MAAM,MAAM,QAAQ,GAChB,gBAAgB,GAChB,sBAAsB,GACtB,YAAY,GACZ,qBAAqB,GACrB,mBAAmB,GACnB,qBAAqB,CAAC;AA+B1B,qBAAa,WAAW;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAe;IAC5C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0E;IACrG,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0C;IACrE,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0C;IACrE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA0C;IACpE,OAAO,CAAC,QAAQ,CAAC,qBAAqB,CAAyC;IAC/E,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAsB;IACxD,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiC;IAEhE;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAsB;IAEnD;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAkC;IAEhE,kEAAkE;IAClE,OAAO,CAAC,kBAAkB,CAAqB;gBAEnC,MAAM,EAAE;QAClB,MAAM,EAAE,WAAW,CAAC;QACpB,WAAW,EAAE,WAAW,CAAC;QACzB,YAAY,EAAE,YAAY,CAAC;QAC3B,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,KAAK,aAAa,GAAG,SAAS,CAAC;QACvE,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;QACvC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;QACvC,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;QACtC,qBAAqB,CAAC,EAAE,MAAM,MAAM,GAAG,SAAS,CAAC;QACjD;;;;;;;;;;;WAWG;QACH,iBAAiB,CAAC,EAAE,mBAAmB,CAAC;QACxC;;;;;WAKG;QACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;KACrC;IAeD,OAAO,CAAC,UAAU;IASlB,UAAU,IAAI,OAAO;IAIrB,kBAAkB,IAAI,WAAW,CAAC,iBAAiB,CAAC;IAIpD,gBAAgB,IAAI;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,gBAAgB,EAAE,MAAM,CAAA;KAAE;IAOvE,kBAAkB,CAAC,MAAM,EAAE,WAAW,CAAC,iBAAiB,CAAC,GAAG,IAAI;IAMhE;;;;;;;OAOG;IACG,cAAc,IAAI,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAMxD,OAAO,CAAC,WAAW;IAKnB;;;;;OAKG;IACH,OAAO,CAAC,wBAAwB;IAehC;;;;;OAKG;YACW,eAAe;IAavB,OAAO,CACX,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,WAAW,CAAC;QAAC,YAAY,CAAC,EAAE,YAAY,CAAA;KAAE,GAC9D,OAAO,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,OAAO,CAAC;QACjB,KAAK,CAAC,EAAE;YACN,WAAW,EAAE,MAAM,CAAC;YACpB,YAAY,EAAE,MAAM,CAAC;YACrB,iBAAiB,CAAC,EAAE,MAAM,CAAC;YAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;SAC1B,CAAC;QACF;;;;;;WAMG;QACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,QAAQ,CAAC,EAAE,OAAO,CAAC;KACpB,CAAC;CAqcH"}
|