@sean.holung/minicode 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/src/agent/config.js +11 -1
  2. package/dist/src/benchmark/config.js +15 -1
  3. package/dist/src/benchmark/index.js +1 -1
  4. package/dist/src/benchmark/workspace-changes.js +93 -12
  5. package/dist/src/cli/benchmark-run.js +285 -12
  6. package/dist/src/cli/contextbench-trajectory.js +258 -0
  7. package/dist/src/web/app.js +1 -1
  8. package/dist/src/web/favicon.ico +0 -0
  9. package/dist/src/web/favicon.svg +9 -0
  10. package/dist/src/web/index.html +2 -0
  11. package/dist/tests/agent.test.js +17 -21
  12. package/dist/tests/benchmark-run.test.js +223 -2
  13. package/dist/tests/contextbench-trajectory.test.js +228 -0
  14. package/dist/tests/graph-onboarding.test.js +20 -0
  15. package/dist/tests/model-client-openai.test.js +3 -2
  16. package/dist/tests/package-metadata.test.js +9 -0
  17. package/dist/tests/reasoning-effort.test.js +83 -0
  18. package/dist/tests/workspace-changes.test.js +50 -1
  19. package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/agent.d.ts +13 -1
  20. package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/agent.d.ts.map +1 -1
  21. package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/agent.js +148 -25
  22. package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/agent.js.map +1 -1
  23. package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/types.d.ts +62 -0
  24. package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/types.d.ts.map +1 -1
  25. package/node_modules/@sean.holung/minicode-sdk/dist/src/agent/types.js.map +1 -1
  26. package/node_modules/@sean.holung/minicode-sdk/dist/src/model/client.d.ts +5 -1
  27. package/node_modules/@sean.holung/minicode-sdk/dist/src/model/client.d.ts.map +1 -1
  28. package/node_modules/@sean.holung/minicode-sdk/dist/src/model/client.js +88 -8
  29. package/node_modules/@sean.holung/minicode-sdk/dist/src/model/client.js.map +1 -1
  30. package/node_modules/@sean.holung/minicode-sdk/dist/src/prompt/system-prompt.d.ts.map +1 -1
  31. package/node_modules/@sean.holung/minicode-sdk/dist/src/prompt/system-prompt.js +1 -1
  32. package/node_modules/@sean.holung/minicode-sdk/dist/src/prompt/system-prompt.js.map +1 -1
  33. package/node_modules/minicode-plugin-python/dist/tsconfig.tsbuildinfo +1 -1
  34. package/package.json +2 -1
@@ -0,0 +1,228 @@
1
+ import assert from "node:assert/strict";
2
+ import { test } from "node:test";
3
+ import { buildContextBenchTrajectory, parsePatchSpans, } from "../src/cli/contextbench-trajectory.js";
4
+ function trace(step, name, input, result = null, skipped = false) {
5
+ return { step, name, input, result, skipped };
6
+ }
7
+ test("read_file with explicit offset and limit becomes a tight explore_context span", () => {
8
+ const trajectory = buildContextBenchTrajectory({
9
+ systemPrompt: "system",
10
+ userPrompt: "Fix the bug.",
11
+ toolCalls: [trace(1, "read_file", { path: "app/main.py", offset: 10, limit: 30 })],
12
+ finalAssistantText: "Done.",
13
+ workspaceRoot: "/workspace",
14
+ patch: "",
15
+ });
16
+ const explore = trajectory.messages.find((m) => m.role === "assistant" && m.content.includes("<explore_context>"));
17
+ assert.ok(explore, "should emit one explore_context message");
18
+ assert.match(explore.content, /File: app\/main\.py/);
19
+ // offset=10, limit=30 → lines 10..39 (start + limit - 1)
20
+ assert.match(explore.content, /Lines: 10-39/);
21
+ });
22
+ test("read_file without offset/limit prefers the result's last line-number prefix", () => {
23
+ const numberedContent = Array.from({ length: 5 }, (_, i) => `${i + 1}|line ${i + 1}`).join("\n") + "\n";
24
+ const trajectory = buildContextBenchTrajectory({
25
+ systemPrompt: "system",
26
+ userPrompt: "Fix the bug.",
27
+ toolCalls: [trace(1, "read_file", { path: "app/foo.py" }, numberedContent)],
28
+ finalAssistantText: "Done.",
29
+ workspaceRoot: "/workspace",
30
+ patch: "",
31
+ });
32
+ const explore = trajectory.messages.find((m) => m.content.includes("<explore_context>"));
33
+ assert.ok(explore);
34
+ assert.match(explore.content, /File: app\/foo\.py\nLines: 1-5/);
35
+ });
36
+ test("read_file with unparseable result and no offset/limit is omitted", () => {
37
+ const trajectory = buildContextBenchTrajectory({
38
+ systemPrompt: "system",
39
+ userPrompt: "Fix.",
40
+ toolCalls: [trace(1, "read_file", { path: "x.py" }, "no line numbers here")],
41
+ finalAssistantText: "Done.",
42
+ workspaceRoot: "/workspace",
43
+ patch: "",
44
+ });
45
+ // Only the final PATCH_CONTEXT assistant message should be present;
46
+ // no explore_context for this step because we couldn't bound it safely.
47
+ const exploreMessages = trajectory.messages.filter((m) => m.content.includes("<explore_context>"));
48
+ assert.equal(exploreMessages.length, 0);
49
+ });
50
+ test("read_symbol consults the project index to derive file+lines", () => {
51
+ const stubIndex = {
52
+ getSymbol: (name) => name === "Foo.bar"
53
+ ? {
54
+ name: "bar",
55
+ qualifiedName: "Foo.bar",
56
+ kind: "method",
57
+ filePath: "src/foo.ts",
58
+ startLine: 42,
59
+ endLine: 71,
60
+ signature: "bar()",
61
+ exported: false,
62
+ dependencies: [],
63
+ }
64
+ : undefined,
65
+ getSymbolMatches: () => [],
66
+ dependencyEdges: [],
67
+ getDependencyCone: () => [],
68
+ };
69
+ const trajectory = buildContextBenchTrajectory({
70
+ systemPrompt: "system",
71
+ userPrompt: "Fix.",
72
+ toolCalls: [trace(1, "read_symbol", { name: "Foo.bar" })],
73
+ finalAssistantText: "Done.",
74
+ workspaceRoot: "/workspace",
75
+ patch: "",
76
+ ...(stubIndex !== undefined ? { projectIndex: stubIndex } : {}),
77
+ });
78
+ const explore = trajectory.messages.find((m) => m.content.includes("<explore_context>"));
79
+ assert.ok(explore);
80
+ assert.match(explore.content, /File: src\/foo\.ts\nLines: 42-71/);
81
+ });
82
+ test("find_references emits a span per incoming-edge source symbol", () => {
83
+ const symbols = {
84
+ "Foo.bar": {
85
+ name: "bar",
86
+ qualifiedName: "Foo.bar",
87
+ kind: "method",
88
+ filePath: "src/foo.ts",
89
+ startLine: 10,
90
+ endLine: 20,
91
+ signature: "",
92
+ exported: false,
93
+ dependencies: [],
94
+ },
95
+ callerOne: {
96
+ name: "callerOne",
97
+ qualifiedName: "callerOne",
98
+ kind: "function",
99
+ filePath: "src/caller-one.ts",
100
+ startLine: 100,
101
+ endLine: 110,
102
+ signature: "",
103
+ exported: false,
104
+ dependencies: [],
105
+ },
106
+ callerTwo: {
107
+ name: "callerTwo",
108
+ qualifiedName: "callerTwo",
109
+ kind: "function",
110
+ filePath: "src/caller-two.ts",
111
+ startLine: 200,
112
+ endLine: 220,
113
+ signature: "",
114
+ exported: false,
115
+ dependencies: [],
116
+ },
117
+ };
118
+ const stubIndex = {
119
+ getSymbol: (name) => symbols[name],
120
+ getSymbolMatches: () => [],
121
+ dependencyEdges: [
122
+ { from: "callerOne", to: "Foo.bar", kind: "calls" },
123
+ { from: "callerTwo", to: "Foo.bar", kind: "calls" },
124
+ ],
125
+ getDependencyCone: () => [],
126
+ };
127
+ const trajectory = buildContextBenchTrajectory({
128
+ systemPrompt: "system",
129
+ userPrompt: "Fix.",
130
+ toolCalls: [trace(1, "find_references", { name: "Foo.bar" })],
131
+ finalAssistantText: "Done.",
132
+ workspaceRoot: "/workspace",
133
+ patch: "",
134
+ ...(stubIndex !== undefined ? { projectIndex: stubIndex } : {}),
135
+ });
136
+ const explore = trajectory.messages.find((m) => m.content.includes("<explore_context>"));
137
+ assert.ok(explore);
138
+ assert.match(explore.content, /File: src\/caller-one\.ts\nLines: 100-110/);
139
+ assert.match(explore.content, /File: src\/caller-two\.ts\nLines: 200-220/);
140
+ });
141
+ test("PATCH_CONTEXT is computed from the unified diff's new-file hunk ranges", () => {
142
+ const patch = [
143
+ "diff --git a/app/main.py b/app/main.py",
144
+ "--- a/app/main.py",
145
+ "+++ b/app/main.py",
146
+ "@@ -10,5 +12,7 @@",
147
+ " unchanged",
148
+ "-removed",
149
+ "+added",
150
+ "+added 2",
151
+ "diff --git a/app/util.py b/app/util.py",
152
+ "--- a/app/util.py",
153
+ "+++ b/app/util.py",
154
+ "@@ -1,3 +1,4 @@",
155
+ "+new helper",
156
+ " a",
157
+ " b",
158
+ " c",
159
+ ].join("\n");
160
+ const trajectory = buildContextBenchTrajectory({
161
+ systemPrompt: "system",
162
+ userPrompt: "Fix.",
163
+ toolCalls: [],
164
+ finalAssistantText: "Done.",
165
+ workspaceRoot: "/workspace",
166
+ patch,
167
+ });
168
+ const final = trajectory.messages[trajectory.messages.length - 1];
169
+ assert.match(final.content, /<PATCH_CONTEXT>/);
170
+ // 12,7 → new lines 12..18
171
+ assert.match(final.content, /File: app\/main\.py\nLines: 12-18/);
172
+ // 1,4 → new lines 1..4
173
+ assert.match(final.content, /File: app\/util\.py\nLines: 1-4/);
174
+ assert.equal(trajectory.info.submission, patch);
175
+ });
176
+ test("skipped tool calls do not contribute spans (e.g. loop-guard nudges)", () => {
177
+ const trajectory = buildContextBenchTrajectory({
178
+ systemPrompt: "system",
179
+ userPrompt: "Fix.",
180
+ toolCalls: [
181
+ trace(1, "read_file", { path: "x.py", offset: 1, limit: 10 }, "ok", false),
182
+ trace(2, "read_file", { path: "x.py", offset: 1, limit: 10 }, "[loop guard: ...]", true),
183
+ ],
184
+ finalAssistantText: "Done.",
185
+ workspaceRoot: "/workspace",
186
+ patch: "",
187
+ });
188
+ const exploreMessages = trajectory.messages.filter((m) => m.content.includes("<explore_context>"));
189
+ // Only the first (non-skipped) call should produce an explore_context.
190
+ assert.equal(exploreMessages.length, 1);
191
+ });
192
+ test("messages always lead with system + user roles", () => {
193
+ const trajectory = buildContextBenchTrajectory({
194
+ systemPrompt: "you are an agent",
195
+ userPrompt: "fix the issue",
196
+ toolCalls: [],
197
+ finalAssistantText: "Done.",
198
+ workspaceRoot: "/workspace",
199
+ patch: "",
200
+ });
201
+ assert.equal(trajectory.messages[0]?.role, "system");
202
+ assert.equal(trajectory.messages[0]?.content, "you are an agent");
203
+ assert.equal(trajectory.messages[1]?.role, "user");
204
+ assert.equal(trajectory.messages[1]?.content, "fix the issue");
205
+ });
206
+ test("parsePatchSpans handles +N,0 hunks (deletion-only at line N)", () => {
207
+ const patch = [
208
+ "diff --git a/a.py b/a.py",
209
+ "--- a/a.py",
210
+ "+++ b/a.py",
211
+ "@@ -5,3 +5,0 @@",
212
+ "-removed",
213
+ "-removed",
214
+ "-removed",
215
+ ].join("\n");
216
+ const spans = parsePatchSpans(patch);
217
+ // Count=0 means the new file has no lines at this hunk position; degenerate
218
+ // case — we still emit a 1-line span at the starting line so the file is
219
+ // surfaced rather than dropped.
220
+ assert.equal(spans.length, 1);
221
+ assert.equal(spans[0]?.file, "a.py");
222
+ assert.equal(spans[0]?.startLine, 5);
223
+ assert.equal(spans[0]?.endLine, 5);
224
+ });
225
+ test("parsePatchSpans returns empty list for an empty diff", () => {
226
+ assert.deepEqual(parsePatchSpans(""), []);
227
+ assert.deepEqual(parsePatchSpans("\n\n \n"), []);
228
+ });
@@ -3,6 +3,7 @@ import { test } from 'node:test';
3
3
  import { readFileSync } from 'node:fs';
4
4
  import { join } from 'node:path';
5
5
  const distWeb = join(import.meta.dirname, '..', 'dist', 'src', 'web');
6
+ const graphSource = join(import.meta.dirname, '..', 'src', 'web', 'graph.ts');
6
7
  test('built CSS contains graph-onboarding styles', () => {
7
8
  const css = readFileSync(join(distWeb, 'style.css'), 'utf-8');
8
9
  assert.ok(css.includes('.graph-onboarding'), 'CSS should contain .graph-onboarding class');
@@ -23,11 +24,20 @@ test('built HTML contains #cy graph container', () => {
23
24
  const html = readFileSync(join(distWeb, 'index.html'), 'utf-8');
24
25
  assert.ok(html.includes('id="cy"'), 'HTML should contain the #cy graph container');
25
26
  assert.ok(html.includes('id="graph-pane"'), 'HTML should contain the #graph-pane wrapper');
27
+ assert.ok(html.includes('href="/favicon.svg"'), 'HTML should link the app favicon');
28
+ assert.ok(html.includes('href="/favicon.ico"'), 'HTML should link the fallback ICO favicon');
26
29
  assert.ok(html.includes('Search symbols or files...'), 'HTML should expose mixed symbol/file search');
27
30
  assert.ok(html.includes('id="graph-refresh"'), 'HTML should expose a graph refresh button');
28
31
  assert.ok(html.includes('id="file-preview-modal"'), 'HTML should contain the file preview modal shell');
29
32
  assert.ok(html.includes('id="file-preview-code"'), 'HTML should contain the file preview code surface');
30
33
  });
34
+ test('web build copies the favicon asset', () => {
35
+ const favicon = readFileSync(join(distWeb, 'favicon.svg'), 'utf-8');
36
+ const fallbackFavicon = readFileSync(join(distWeb, 'favicon.ico'));
37
+ assert.ok(favicon.includes('<svg'), 'favicon should be an SVG asset');
38
+ assert.ok(favicon.includes('#38bdf8'), 'favicon should use the minicode graph accent');
39
+ assert.ok(fallbackFavicon.length > 0, 'fallback favicon should be copied');
40
+ });
31
41
  test('onboarding hint includes user-facing guidance text in built JS', () => {
32
42
  const js = readFileSync(join(distWeb, 'app.js'), 'utf-8');
33
43
  assert.ok(js.includes('Code dependency graph'), 'onboarding title should mention the code dependency graph');
@@ -58,6 +68,16 @@ test('built JS auto-opens symbol details for agent activity and graph search sel
58
68
  assert.ok(js.includes('openDetail: true'), 'JS should request the detail panel when focusing symbols from agent activity or search');
59
69
  assert.ok(js.includes('await showDetail(node, detailEl)'), 'JS should populate the symbol detail panel when focus requests it');
60
70
  });
71
+ test('symbol selection keeps the current graph viewport instead of fitting the canvas', () => {
72
+ const source = readFileSync(graphSource, 'utf-8');
73
+ const showDetailStart = source.indexOf('async function showDetail');
74
+ const loadSourceStart = source.indexOf('async function loadSource');
75
+ assert.ok(showDetailStart >= 0, 'graph source should define showDetail');
76
+ assert.ok(loadSourceStart > showDetailStart, 'graph source should define loadSource after showDetail');
77
+ const showDetailBody = source.slice(showDetailStart, loadSourceStart);
78
+ assert.ok(showDetailBody.includes('syncDetailPanelLayout();'), 'showDetail should resize Cytoscape without changing pan/zoom');
79
+ assert.ok(!showDetailBody.includes('syncDetailPanelLayout({ fit: true })'), 'showDetail should not fit the graph when a symbol is selected');
80
+ });
61
81
  test('built JS supports file search results and file-centered neighborhood rendering', () => {
62
82
  const js = readFileSync(join(distWeb, 'app.js'), 'utf-8');
63
83
  assert.ok(js.includes('focusFileInGraph'), 'JS should define a file-focused graph seeding helper');
@@ -73,7 +73,7 @@ test("openai-compatible client sends tool schemas and parses tool calls", async
73
73
  const tools = parsedBody.tools;
74
74
  assert.equal(tools[0]?.type, "function");
75
75
  });
76
- test("openai-compatible client sends correct app URL in HTTP-Referer header", async () => {
76
+ test("openai-compatible client sends OpenRouter app attribution headers", async () => {
77
77
  let capturedHeaders = {};
78
78
  const fetchImpl = async (_input, init) => {
79
79
  const rawHeaders = init?.headers;
@@ -100,7 +100,8 @@ test("openai-compatible client sends correct app URL in HTTP-Referer header", as
100
100
  maxTokens: 64,
101
101
  });
102
102
  assert.equal(capturedHeaders["HTTP-Referer"], "https://minicode.seanholung.com", "HTTP-Referer should point to minicode.seanholung.com");
103
- assert.equal(capturedHeaders["X-Title"], "minicode");
103
+ assert.equal(capturedHeaders["X-OpenRouter-Title"], "minicode");
104
+ assert.equal(capturedHeaders["X-OpenRouter-Categories"], "cli-agent,programming-app");
104
105
  });
105
106
  test("openai-compatible client repairs missing tool results before sending", async () => {
106
107
  let capturedBody = "";
@@ -0,0 +1,9 @@
1
+ import assert from 'node:assert/strict';
2
+ import { test } from 'node:test';
3
+ import { readFileSync } from 'node:fs';
4
+ import { join } from 'node:path';
5
+ const packageJsonPath = join(import.meta.dirname, '..', 'package.json');
6
+ test('package metadata includes the minicode website for app attribution', () => {
7
+ const pkg = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
8
+ assert.equal(pkg.homepage, 'https://minicode.seanholung.com');
9
+ });
@@ -54,6 +54,40 @@ test("loadAgentConfig leaves reasoningEffort undefined when env var is unset", a
54
54
  }
55
55
  }
56
56
  });
57
+ test("loadAgentConfig parses REASONING_MAX_TOKENS env var", async () => {
58
+ const prev = process.env.REASONING_MAX_TOKENS;
59
+ try {
60
+ process.env.REASONING_MAX_TOKENS = "4000";
61
+ const config = await loadAgentConfig("/tmp");
62
+ assert.equal(config.reasoningMaxTokens, 4000);
63
+ }
64
+ finally {
65
+ if (prev === undefined) {
66
+ delete process.env.REASONING_MAX_TOKENS;
67
+ }
68
+ else {
69
+ process.env.REASONING_MAX_TOKENS = prev;
70
+ }
71
+ }
72
+ });
73
+ test("loadAgentConfig ignores non-positive or invalid REASONING_MAX_TOKENS values", async () => {
74
+ const prev = process.env.REASONING_MAX_TOKENS;
75
+ try {
76
+ for (const bad of ["0", "-100", "abc", ""]) {
77
+ process.env.REASONING_MAX_TOKENS = bad;
78
+ const config = await loadAgentConfig("/tmp");
79
+ assert.equal(config.reasoningMaxTokens, undefined, `expected undefined for input ${JSON.stringify(bad)}`);
80
+ }
81
+ }
82
+ finally {
83
+ if (prev === undefined) {
84
+ delete process.env.REASONING_MAX_TOKENS;
85
+ }
86
+ else {
87
+ process.env.REASONING_MAX_TOKENS = prev;
88
+ }
89
+ }
90
+ });
57
91
  test("loadAgentConfig normalizes REASONING_EFFORT case", async () => {
58
92
  const prev = process.env.REASONING_EFFORT;
59
93
  try {
@@ -262,3 +296,52 @@ test("agent loop omits reasoningEffort when not configured", async () => {
262
296
  await agent.runTurn("Hello");
263
297
  assert.equal(capturedParams.reasoningEffort, undefined);
264
298
  });
299
+ // ---------------------------------------------------------------------------
300
+ // Agent loop passes toolChoice to model client
301
+ // ---------------------------------------------------------------------------
302
+ test("agent loop passes toolChoice to model client chat call when set", async () => {
303
+ let capturedToolChoice = "<unset>";
304
+ const config = {
305
+ ...createTestAgentConfig("/tmp"),
306
+ toolChoice: "required",
307
+ };
308
+ const mockClient = {
309
+ async chat(params) {
310
+ capturedToolChoice = params.toolChoice;
311
+ return {
312
+ text: "Response",
313
+ toolCalls: [],
314
+ stopReason: "end_turn",
315
+ usage: { inputTokens: 10, outputTokens: 5 },
316
+ };
317
+ },
318
+ };
319
+ const agent = new CodingAgent({
320
+ config,
321
+ modelClient: mockClient,
322
+ toolRegistry: new ToolRegistry([]),
323
+ });
324
+ await agent.runTurn("Hello");
325
+ assert.equal(capturedToolChoice, "required");
326
+ });
327
+ test("agent loop omits toolChoice when not configured", async () => {
328
+ let capturedParams = {};
329
+ const mockClient = {
330
+ async chat(params) {
331
+ capturedParams = params;
332
+ return {
333
+ text: "Response",
334
+ toolCalls: [],
335
+ stopReason: "end_turn",
336
+ usage: { inputTokens: 10, outputTokens: 5 },
337
+ };
338
+ },
339
+ };
340
+ const agent = new CodingAgent({
341
+ config: createTestAgentConfig("/tmp"),
342
+ modelClient: mockClient,
343
+ toolRegistry: new ToolRegistry([]),
344
+ });
345
+ await agent.runTurn("Hello");
346
+ assert.equal(capturedParams.toolChoice, undefined);
347
+ });
@@ -4,7 +4,7 @@ import { mkdtemp, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import { afterEach, test } from "node:test";
7
- import { collectWorkspaceChanges, writeWorkspaceDiff, } from "../src/benchmark/workspace-changes.js";
7
+ import { captureBaselineRef, collectWorkspaceChanges, writeWorkspaceDiff, } from "../src/benchmark/workspace-changes.js";
8
8
  const tempDirs = [];
9
9
  afterEach(async () => {
10
10
  await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true })));
@@ -80,3 +80,52 @@ test("workspace diff only includes files inside the selected workspace subtree",
80
80
  assert.doesNotMatch(diff, /sibling\.ts/);
81
81
  assert.doesNotMatch(diff, /ROOT\.md/);
82
82
  });
83
+ test("baseline ref captures committed changes that would otherwise be invisible", async () => {
84
+ // Regression: Gemini-3-Pro ran `git add` + `git commit` mid-task on a
85
+ // benchmark run. The old `git diff` (working-tree vs index) saw nothing
86
+ // and the harness threw away a working fix.
87
+ const workspaceRoot = await createGitWorkspace();
88
+ const baseline = await captureBaselineRef(workspaceRoot);
89
+ assert.ok(baseline && baseline.length >= 7, "baseline ref should be a SHA");
90
+ // Model edits a tracked file and commits, then leaves an untracked helper.
91
+ await writeFile(path.join(workspaceRoot, "src", "app.ts"), "export const value = 2;\n", "utf8");
92
+ execFileSync("git", ["add", "src/app.ts"], { cwd: workspaceRoot, stdio: "ignore" });
93
+ execFileSync("git", ["commit", "-m", "fix value"], { cwd: workspaceRoot, stdio: "ignore" });
94
+ await writeFile(path.join(workspaceRoot, "reproduce.py"), "print('hi')\n", "utf8");
95
+ const withoutBaseline = await collectWorkspaceChanges(workspaceRoot);
96
+ // Without the baseline we miss the committed file entirely.
97
+ assert.deepEqual(withoutBaseline.changedFiles.sort(), ["reproduce.py"]);
98
+ const withBaseline = await collectWorkspaceChanges(workspaceRoot, baseline ?? undefined);
99
+ assert.deepEqual(withBaseline.changedFiles.sort(), ["reproduce.py", "src/app.ts"]);
100
+ const diffPath = path.join(workspaceRoot, "artifacts", "with-baseline.patch");
101
+ const wrote = await writeWorkspaceDiff(workspaceRoot, diffPath, baseline ?? undefined);
102
+ assert.equal(wrote, true);
103
+ const diff = await readFile(diffPath, "utf8");
104
+ assert.match(diff, /diff --git a\/src\/app\.ts b\/src\/app\.ts/);
105
+ assert.match(diff, /-export const value = 1;/);
106
+ assert.match(diff, /\+export const value = 2;/);
107
+ assert.match(diff, /diff --git a\/reproduce\.py b\/reproduce\.py/);
108
+ });
109
+ test("baseline ref also captures staged and unstaged changes (no false negatives)", async () => {
110
+ const workspaceRoot = await createGitWorkspace();
111
+ const baseline = await captureBaselineRef(workspaceRoot);
112
+ // One staged tracked change, one unstaged tracked change, one untracked.
113
+ await mkdir(path.join(workspaceRoot, "src"), { recursive: true });
114
+ await writeFile(path.join(workspaceRoot, "src", "app.ts"), "export const value = 99;\n", "utf8");
115
+ execFileSync("git", ["add", "src/app.ts"], { cwd: workspaceRoot, stdio: "ignore" });
116
+ await writeFile(path.join(workspaceRoot, "src", "app.ts"), "export const value = 100;\n", "utf8");
117
+ await writeFile(path.join(workspaceRoot, "notes.md"), "# notes\n", "utf8");
118
+ const changes = await collectWorkspaceChanges(workspaceRoot, baseline ?? undefined);
119
+ assert.deepEqual(changes.changedFiles.sort(), ["notes.md", "src/app.ts"]);
120
+ const diffPath = path.join(workspaceRoot, "artifacts", "mixed.patch");
121
+ await writeWorkspaceDiff(workspaceRoot, diffPath, baseline ?? undefined);
122
+ const diff = await readFile(diffPath, "utf8");
123
+ assert.match(diff, /\+export const value = 100;/);
124
+ assert.match(diff, /diff --git a\/notes\.md b\/notes\.md/);
125
+ });
126
+ test("captureBaselineRef returns null for a non-git workspace", async () => {
127
+ const workspaceRoot = await mkdtemp(path.join(os.tmpdir(), "minicode-workspace-changes-nongit-"));
128
+ tempDirs.push(workspaceRoot);
129
+ const baseline = await captureBaselineRef(workspaceRoot);
130
+ assert.equal(baseline, null);
131
+ });
@@ -8,7 +8,10 @@ import type { AgentConfig, BeforeToolCallHook, ModelClient, OutputSchema } from
8
8
  * Content-aware truncation for tool outputs.
9
9
  * Different tools benefit from different truncation strategies:
10
10
  * - read_file: No truncation — the model needs exact text for edits
11
- * - run_command: Keep tail (errors/results are at the end)
11
+ * - run_command: Keep head + tail (50/50). Errors live at the end of
12
+ * normal failures, but pathological cases (infinite loops, runaway
13
+ * recursion, excessive logging) reveal themselves at the start. A
14
+ * tail-heavy split hides the diagnostic bytes for the runaway case.
12
15
  * - search: Keep head with a match count footer
13
16
  * - default: Keep head (existing behavior)
14
17
  */
@@ -144,7 +147,16 @@ export declare class CodingAgent {
144
147
  inputTokens: number;
145
148
  outputTokens: number;
146
149
  cachedInputTokens?: number;
150
+ reasoningTokens?: number;
147
151
  };
152
+ /**
153
+ * Most recent step's reasoning content, when the model provided any
154
+ * (Anthropic extended thinking blocks or OpenRouter `message.reasoning`).
155
+ * Surfaced so callers can include it when re-prompting after a failed
156
+ * attempt — e.g. benchmark mode's retry path can feed the model's own
157
+ * prior thinking back so it sees what it was about to do.
158
+ */
159
+ reasoningContent?: string;
148
160
  streamed?: boolean;
149
161
  }>;
150
162
  }
@@ -1 +1 @@
1
- {"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../../src/agent/agent.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,OAAO,EAAE,OAAO,EAAE,MAAM,uBAAuB,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EACV,WAAW,EACX,kBAAkB,EAClB,WAAW,EACX,YAAY,EAEb,MAAM,YAAY,CAAC;AAgFpB;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAChC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,GACf,MAAM,CA+BR;AAED,MAAM,MAAM,gBAAgB,GAAG;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AACrE,MAAM,MAAM,sBAAsB,GAAG;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAClF,MAAM,MAAM,YAAY,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC,CAAC;AACF,MAAM,MAAM,mBAAmB,GAAG;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AACF,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,gBAAgB,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;CAC1B,CAAC;AACF,MAAM,MAAM,QAAQ,GAChB,gBAAgB,GAChB,sBAAsB,GACtB,YAAY,GACZ,qBAAqB,GACrB,mBAAmB,GACnB,qBAAqB,CAAC;AA+B1B,qBAAa,WAAW;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAe;IAC5C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0E;IACrG,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0C;IACrE,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0C;IACrE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA0C;IACpE,OAAO,CAAC,QAAQ,CAAC,qBAAqB,CAAyC;IAC/E,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAsB;IACxD,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiC;IAEhE;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAsB;IAEnD;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAkC;IAEhE,kEAAkE;IAClE,OAAO,CAAC,kBAAkB,CAAqB;gBAEnC,MAAM,EAAE;QAClB,MAAM,EAAE,WAAW,CAAC;QACpB,WAAW,EAAE,WAAW,CAAC;QACzB,YAAY,EAAE,YAAY,CAAC;QAC3B,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,KAAK,aAAa,GAAG,SAAS,CAAC;QACvE,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;QACvC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;QACvC,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;QACtC,qBAAqB,CAAC,EAAE,MAAM,MAAM,GAAG,SAAS,CAAC;QACjD;;;;;;;;;;;WAWG;QACH,iBAAiB,CAAC,EAAE,mBAAmB,CAAC;QACxC;;;;;WAKG;QACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;KACrC;IAeD,OAAO,CAAC,UAAU;IASlB,UAAU,IAAI,OAAO;IAIrB,kBAAkB,IAAI,WAAW,CAAC,iBAAiB,CAAC;IAIpD,gBAAgB,IAAI;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,gBAAgB,EAAE,MAAM,CAAA;KAAE;IAOvE,kBAAkB,CAAC,MAAM,EAAE,WAAW,CAAC,iBAAiB,CAAC,GAAG,IAAI;IAMhE;;;;;;;OAOG;IACG,cAAc,IAAI,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAMxD,OAAO,CAAC,WAAW;IAKnB;;;;;OAKG;IACH,OAAO,CAAC,wBAAwB;IAehC;;;;;OAKG;YACW,eAAe;IAavB,OAAO,CACX,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,WAAW,CAAC;QAAC,YAAY,CAAC,EAAE,YAAY,CAAA;KAAE,GAC9D,OAAO,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,OAAO,CAAC;QACjB,KAAK,CAAC,EAAE;YACN,WAAW,EAAE,MAAM,CAAC;YACpB,YAAY,EAAE,MAAM,CAAC;YACrB,iBAAiB,CAAC,EAAE,MAAM,CAAC;SAC5B,CAAC;QACF,QAAQ,CAAC,EAAE,OAAO,CAAC;KACpB,CAAC;CAuXH"}
1
+ {"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../../src/agent/agent.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,OAAO,EAAE,OAAO,EAAE,MAAM,uBAAuB,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EACV,WAAW,EACX,kBAAkB,EAClB,WAAW,EACX,YAAY,EAEb,MAAM,YAAY,CAAC;AA2HpB;;;;;;;;;;GAUG;AACH,wBAAgB,kBAAkB,CAChC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,GACf,MAAM,CAyCR;AAED,MAAM,MAAM,gBAAgB,GAAG;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AACrE,MAAM,MAAM,sBAAsB,GAAG;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAClF,MAAM,MAAM,YAAY,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC,CAAC;AACF,MAAM,MAAM,mBAAmB,GAAG;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AACF,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,gBAAgB,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;CAC1B,CAAC;AACF,MAAM,MAAM,QAAQ,GAChB,gBAAgB,GAChB,sBAAsB,GACtB,YAAY,GACZ,qBAAqB,GACrB,mBAAmB,GACnB,qBAAqB,CAAC;AA+B1B,qBAAa,WAAW;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAe;IAC5C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0E;IACrG,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0C;IACrE,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0C;IACrE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA0C;IACpE,OAAO,CAAC,QAAQ,CAAC,qBAAqB,CAAyC;IAC/E,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAsB;IACxD,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiC;IAEhE;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAsB;IAEnD;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAkC;IAEhE,kEAAkE;IAClE,OAAO,CAAC,kBAAkB,CAAqB;gBAEnC,MAAM,EAAE;QAClB,MAAM,EAAE,WAAW,CAAC;QACpB,WAAW,EAAE,WAAW,CAAC;QACzB,YAAY,EAAE,YAAY,CAAC;QAC3B,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,KAAK,aAAa,GAAG,SAAS,CAAC;QACvE,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;QACvC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;QACvC,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;QACtC,qBAAqB,CAAC,EAAE,MAAM,MAAM,GAAG,SAAS,CAAC;QACjD;;;;;;;;;;;WAWG;QACH,iBAAiB,CAAC,EAAE,mBAAmB,CAAC;QACxC;;;;;WAKG;QACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;KACrC;IAeD,OAAO,CAAC,UAAU;IASlB,UAAU,IAAI,OAAO;IAIrB,kBAAkB,IAAI,WAAW,CAAC,iBAAiB,CAAC;IAIpD,gBAAgB,IAAI;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,gBAAgB,EAAE,MAAM,CAAA;KAAE;IAOvE,kBAAkB,CAAC,MAAM,EAAE,WAAW,CAAC,iBAAiB,CAAC,GAAG,IAAI;IAMhE;;;;;;;OAOG;IACG,cAAc,IAAI,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAMxD,OAAO,CAAC,WAAW;IAKnB;;;;;OAKG;IACH,OAAO,CAAC,wBAAwB;IAehC;;;;;OAKG;YACW,eAAe;IAavB,OAAO,CACX,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,WAAW,CAAC;QAAC,YAAY,CAAC,EAAE,YAAY,CAAA;KAAE,GAC9D,OAAO,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,OAAO,CAAC;QACjB,KAAK,CAAC,EAAE;YACN,WAAW,EAAE,MAAM,CAAC;YACpB,YAAY,EAAE,MAAM,CAAC;YACrB,iBAAiB,CAAC,EAAE,MAAM,CAAC;YAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;SAC1B,CAAC;QACF;;;;;;WAMG;QACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,QAAQ,CAAC,EAAE,OAAO,CAAC;KACpB,CAAC;CAqcH"}