@oh-my-pi/pi-coding-agent 13.14.0 → 13.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +140 -0
  2. package/package.json +10 -8
  3. package/src/autoresearch/command-initialize.md +34 -0
  4. package/src/autoresearch/command-resume.md +17 -0
  5. package/src/autoresearch/contract.ts +332 -0
  6. package/src/autoresearch/dashboard.ts +447 -0
  7. package/src/autoresearch/git.ts +243 -0
  8. package/src/autoresearch/helpers.ts +458 -0
  9. package/src/autoresearch/index.ts +693 -0
  10. package/src/autoresearch/prompt.md +227 -0
  11. package/src/autoresearch/resume-message.md +16 -0
  12. package/src/autoresearch/state.ts +386 -0
  13. package/src/autoresearch/tools/init-experiment.ts +310 -0
  14. package/src/autoresearch/tools/log-experiment.ts +833 -0
  15. package/src/autoresearch/tools/run-experiment.ts +640 -0
  16. package/src/autoresearch/types.ts +218 -0
  17. package/src/cli/args.ts +8 -2
  18. package/src/cli/initial-message.ts +58 -0
  19. package/src/config/keybindings.ts +417 -212
  20. package/src/config/model-registry.ts +1 -0
  21. package/src/config/model-resolver.ts +57 -9
  22. package/src/config/settings-schema.ts +38 -10
  23. package/src/config/settings.ts +1 -4
  24. package/src/exec/bash-executor.ts +7 -5
  25. package/src/export/html/template.css +43 -13
  26. package/src/export/html/template.generated.ts +1 -1
  27. package/src/export/html/template.html +1 -0
  28. package/src/export/html/template.js +107 -0
  29. package/src/extensibility/extensions/types.ts +31 -8
  30. package/src/internal-urls/docs-index.generated.ts +1 -1
  31. package/src/lsp/index.ts +1 -1
  32. package/src/main.ts +44 -44
  33. package/src/mcp/oauth-discovery.ts +1 -1
  34. package/src/modes/acp/acp-agent.ts +957 -0
  35. package/src/modes/acp/acp-event-mapper.ts +531 -0
  36. package/src/modes/acp/acp-mode.ts +13 -0
  37. package/src/modes/acp/index.ts +2 -0
  38. package/src/modes/components/agent-dashboard.ts +5 -4
  39. package/src/modes/components/bash-execution.ts +40 -11
  40. package/src/modes/components/custom-editor.ts +47 -47
  41. package/src/modes/components/extensions/extension-dashboard.ts +2 -1
  42. package/src/modes/components/history-search.ts +2 -1
  43. package/src/modes/components/hook-editor.ts +2 -1
  44. package/src/modes/components/hook-input.ts +8 -7
  45. package/src/modes/components/hook-selector.ts +15 -10
  46. package/src/modes/components/keybinding-hints.ts +9 -9
  47. package/src/modes/components/login-dialog.ts +3 -3
  48. package/src/modes/components/mcp-add-wizard.ts +2 -1
  49. package/src/modes/components/model-selector.ts +14 -3
  50. package/src/modes/components/oauth-selector.ts +2 -1
  51. package/src/modes/components/python-execution.ts +2 -3
  52. package/src/modes/components/session-selector.ts +2 -1
  53. package/src/modes/components/settings-selector.ts +2 -1
  54. package/src/modes/components/status-line-segment-editor.ts +2 -1
  55. package/src/modes/components/tool-execution.ts +4 -5
  56. package/src/modes/components/tree-selector.ts +3 -2
  57. package/src/modes/components/user-message-selector.ts +3 -8
  58. package/src/modes/components/user-message.ts +16 -0
  59. package/src/modes/controllers/command-controller.ts +0 -2
  60. package/src/modes/controllers/extension-ui-controller.ts +89 -4
  61. package/src/modes/controllers/input-controller.ts +29 -23
  62. package/src/modes/controllers/mcp-command-controller.ts +1 -1
  63. package/src/modes/index.ts +1 -0
  64. package/src/modes/interactive-mode.ts +17 -5
  65. package/src/modes/print-mode.ts +1 -1
  66. package/src/modes/prompt-action-autocomplete.ts +7 -7
  67. package/src/modes/rpc/rpc-mode.ts +7 -2
  68. package/src/modes/rpc/rpc-types.ts +1 -0
  69. package/src/modes/theme/theme.ts +53 -44
  70. package/src/modes/types.ts +9 -2
  71. package/src/modes/utils/hotkeys-markdown.ts +19 -19
  72. package/src/modes/utils/keybinding-matchers.ts +21 -0
  73. package/src/modes/utils/ui-helpers.ts +1 -1
  74. package/src/patch/hashline.ts +139 -127
  75. package/src/patch/index.ts +77 -59
  76. package/src/patch/shared.ts +19 -11
  77. package/src/prompts/tools/hashline.md +43 -116
  78. package/src/sdk.ts +34 -17
  79. package/src/session/agent-session.ts +123 -30
  80. package/src/session/session-manager.ts +32 -31
  81. package/src/session/streaming-output.ts +87 -37
  82. package/src/tools/ask.ts +56 -30
  83. package/src/tools/bash-interactive.ts +2 -6
  84. package/src/tools/bash-interceptor.ts +1 -39
  85. package/src/tools/bash-skill-urls.ts +1 -1
  86. package/src/tools/browser.ts +1 -1
  87. package/src/tools/gemini-image.ts +1 -1
  88. package/src/tools/python.ts +2 -2
  89. package/src/tools/resolve.ts +1 -1
  90. package/src/utils/child-process.ts +88 -0
@@ -0,0 +1,693 @@
1
+ import * as fs from "node:fs";
2
+ import * as path from "node:path";
3
+ import type { AutocompleteItem } from "@oh-my-pi/pi-tui";
4
+ import { renderPromptTemplate } from "../config/prompt-templates";
5
+ import type { ExtensionContext, ExtensionFactory } from "../extensibility/extensions";
6
+ import commandInitializeTemplate from "./command-initialize.md" with { type: "text" };
7
+ import commandResumeTemplate from "./command-resume.md" with { type: "text" };
8
+ import { pathMatchesContractPath } from "./contract";
9
+ import { createDashboardController } from "./dashboard";
10
+ import { ensureAutoresearchBranch } from "./git";
11
+ import {
12
+ formatNum,
13
+ isAutoresearchCommittableFile,
14
+ isAutoresearchLocalStatePath,
15
+ isAutoresearchShCommand,
16
+ normalizeAutoresearchPath,
17
+ readMaxExperiments,
18
+ readPendingRunSummary,
19
+ resolveWorkDir,
20
+ validateWorkDir,
21
+ } from "./helpers";
22
+ import promptTemplate from "./prompt.md" with { type: "text" };
23
+ import resumeMessageTemplate from "./resume-message.md" with { type: "text" };
24
+ import {
25
+ cloneExperimentState,
26
+ createExperimentState,
27
+ createRuntimeStore,
28
+ currentResults,
29
+ findBaselineMetric,
30
+ reconstructControlState,
31
+ reconstructStateFromJsonl,
32
+ } from "./state";
33
+ import { createInitExperimentTool } from "./tools/init-experiment";
34
+ import { createLogExperimentTool } from "./tools/log-experiment";
35
+ import { createRunExperimentTool } from "./tools/run-experiment";
36
+ import type { AutoresearchRuntime, ChecksResult, ExperimentResult, PendingRunSummary } from "./types";
37
+
38
+ const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment"];
39
+
40
+ interface AutoresearchSetupInput {
41
+ intent: string;
42
+ benchmarkCommand: string;
43
+ metricName: string;
44
+ metricUnit: string;
45
+ direction: "lower" | "higher";
46
+ secondaryMetrics: string[];
47
+ scopePaths: string[];
48
+ offLimits: string[];
49
+ constraints: string[];
50
+ }
51
+
52
+ export const createAutoresearchExtension: ExtensionFactory = api => {
53
+ const runtimeStore = createRuntimeStore();
54
+ const dashboard = createDashboardController();
55
+
56
+ const getSessionKey = (ctx: ExtensionContext): string => ctx.sessionManager.getSessionId();
57
+ const getRuntime = (ctx: ExtensionContext): AutoresearchRuntime => runtimeStore.ensure(getSessionKey(ctx));
58
+
59
+ const rehydrate = async (ctx: ExtensionContext): Promise<void> => {
60
+ const runtime = getRuntime(ctx);
61
+ const workDir = resolveWorkDir(ctx.cwd);
62
+ const reconstructed = reconstructStateFromJsonl(workDir);
63
+ const control = reconstructControlState(ctx.sessionManager.getBranch());
64
+ const loggedRunNumbers = collectLoggedRunNumbers(reconstructed.state.results);
65
+ runtime.state = cloneExperimentState(reconstructed.state);
66
+ runtime.state.maxExperiments = readMaxExperiments(ctx.cwd);
67
+ runtime.goal = control.goal;
68
+ runtime.autoresearchMode = control.autoresearchMode;
69
+ runtime.autoResumeArmed = false;
70
+ runtime.lastAutoResumePendingRunNumber = null;
71
+ runtime.lastRunSummary = await readPendingRunSummary(workDir, loggedRunNumbers);
72
+ runtime.lastRunChecks = summaryToChecks(runtime.lastRunSummary);
73
+ runtime.lastRunDuration = runtime.lastRunSummary?.durationSeconds ?? null;
74
+ runtime.lastRunAsi = runtime.lastRunSummary?.parsedAsi ?? null;
75
+ runtime.lastRunArtifactDir = runtime.lastRunSummary?.runDirectory ?? null;
76
+ runtime.lastRunNumber = runtime.lastRunSummary?.runNumber ?? null;
77
+ runtime.runningExperiment = null;
78
+ dashboard.updateWidget(ctx, runtime);
79
+ const activeTools = api.getActiveTools();
80
+ const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
81
+ const nextActiveTools = runtime.autoresearchMode
82
+ ? [...new Set([...activeTools, ...EXPERIMENT_TOOL_NAMES])]
83
+ : activeTools.filter(name => !experimentTools.has(name));
84
+ const toolsChanged =
85
+ nextActiveTools.length !== activeTools.length ||
86
+ nextActiveTools.some((name, index) => name !== activeTools[index]);
87
+ if (toolsChanged) {
88
+ await api.setActiveTools(nextActiveTools);
89
+ }
90
+ };
91
+
92
+ const setMode = (
93
+ ctx: ExtensionContext,
94
+ enabled: boolean,
95
+ goal: string | null,
96
+ mode: "on" | "off" | "clear",
97
+ ): void => {
98
+ const runtime = getRuntime(ctx);
99
+ runtime.autoresearchMode = enabled;
100
+ runtime.autoResumeArmed = false;
101
+ runtime.goal = goal;
102
+ runtime.lastAutoResumePendingRunNumber = null;
103
+ api.appendEntry("autoresearch-control", goal ? { mode, goal } : { mode });
104
+ };
105
+
106
+ api.registerTool(createInitExperimentTool({ dashboard, getRuntime, pi: api }));
107
+ api.registerTool(createRunExperimentTool({ dashboard, getRuntime, pi: api }));
108
+ api.registerTool(createLogExperimentTool({ dashboard, getRuntime, pi: api }));
109
+ api.on("tool_call", (event, ctx) => {
110
+ const runtime = getRuntime(ctx);
111
+ if (!runtime.autoresearchMode) return;
112
+ if (event.toolName === "bash") {
113
+ const command = typeof event.input.command === "string" ? event.input.command : "";
114
+ const validationError = validateAutoresearchBashCommand(command);
115
+ if (validationError) {
116
+ return {
117
+ block: true,
118
+ reason: validationError,
119
+ };
120
+ }
121
+ return;
122
+ }
123
+ if (event.toolName !== "write" && event.toolName !== "edit" && event.toolName !== "ast_edit") return;
124
+
125
+ const rawPaths = getGuardedToolPaths(event.toolName, event.input);
126
+ if (rawPaths === null) {
127
+ return {
128
+ block: true,
129
+ reason:
130
+ "Autoresearch requires an explicit target path for this editing tool so it can enforce Files in Scope and Off Limits before changes are made.",
131
+ };
132
+ }
133
+
134
+ const workDir = resolveWorkDir(ctx.cwd);
135
+ for (const rawPath of rawPaths) {
136
+ const relativePath = resolveAutoresearchRelativePath(workDir, rawPath);
137
+ if (!relativePath.ok) {
138
+ return {
139
+ block: true,
140
+ reason: relativePath.reason,
141
+ };
142
+ }
143
+ const validationError = validateEditableAutoresearchPath(relativePath.relativePath, runtime);
144
+ if (validationError) {
145
+ return {
146
+ block: true,
147
+ reason: `Autoresearch blocked edits to ${relativePath.relativePath}: ${validationError}`,
148
+ };
149
+ }
150
+ }
151
+ });
152
+
153
+ api.registerCommand("autoresearch", {
154
+ description: "Start, stop, or clear builtin autoresearch mode.",
155
+ getArgumentCompletions(argumentPrefix: string): AutocompleteItem[] | null {
156
+ if (argumentPrefix.includes(" ")) return null;
157
+ const completions: AutocompleteItem[] = [
158
+ { label: "off", value: "off", description: "Leave autoresearch mode" },
159
+ { label: "clear", value: "clear", description: "Delete autoresearch.jsonl and leave autoresearch mode" },
160
+ ];
161
+ const normalized = argumentPrefix.trim().toLowerCase();
162
+ const filtered = completions.filter(item => item.label.startsWith(normalized));
163
+ return filtered.length > 0 ? filtered : null;
164
+ },
165
+ async handler(args, ctx): Promise<void> {
166
+ const trimmed = args.trim();
167
+ const runtime = getRuntime(ctx);
168
+ const workDirError = validateWorkDir(ctx.cwd);
169
+ if (workDirError) {
170
+ ctx.ui.notify(workDirError, "error");
171
+ return;
172
+ }
173
+
174
+ if (trimmed === "off") {
175
+ setMode(ctx, false, runtime.goal, "off");
176
+ dashboard.updateWidget(ctx, runtime);
177
+ const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
178
+ await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
179
+ ctx.ui.notify("Autoresearch mode disabled", "info");
180
+ return;
181
+ }
182
+ if (trimmed === "clear") {
183
+ const workDir = resolveWorkDir(ctx.cwd);
184
+ const jsonlPath = path.join(workDir, "autoresearch.jsonl");
185
+ const localStatePath = path.join(workDir, ".autoresearch");
186
+ if (fs.existsSync(jsonlPath)) {
187
+ fs.rmSync(jsonlPath);
188
+ }
189
+ if (fs.existsSync(localStatePath)) {
190
+ fs.rmSync(localStatePath, { force: true, recursive: true });
191
+ }
192
+ runtime.state = createExperimentState();
193
+ runtime.state.maxExperiments = readMaxExperiments(ctx.cwd);
194
+ runtime.goal = null;
195
+ runtime.lastRunChecks = null;
196
+ runtime.lastRunDuration = null;
197
+ runtime.lastRunAsi = null;
198
+ runtime.lastRunArtifactDir = null;
199
+ runtime.lastRunNumber = null;
200
+ runtime.lastRunSummary = null;
201
+ setMode(ctx, false, null, "clear");
202
+ dashboard.updateWidget(ctx, runtime);
203
+ const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
204
+ await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
205
+ ctx.ui.notify("Autoresearch local state cleared", "info");
206
+ return;
207
+ }
208
+
209
+ const workDir = resolveWorkDir(ctx.cwd);
210
+ const autoresearchMdPath = path.join(workDir, "autoresearch.md");
211
+ const hasAutoresearchMd = fs.existsSync(autoresearchMdPath);
212
+ const controlState = reconstructControlState(ctx.sessionManager.getBranch());
213
+ const shouldResumeExistingNotes =
214
+ hasAutoresearchMd &&
215
+ (hasLocalAutoresearchState(workDir) || (controlState.lastMode !== "clear" && trimmed.length === 0));
216
+
217
+ if (shouldResumeExistingNotes) {
218
+ const resumeContext = trimmed;
219
+ const resumeGoal = runtime.goal ?? runtime.state.name ?? null;
220
+ const branchResult = await ensureAutoresearchBranch(api, workDir, resumeGoal);
221
+ if (!branchResult.ok) {
222
+ ctx.ui.notify(branchResult.error, "error");
223
+ return;
224
+ }
225
+
226
+ setMode(ctx, true, resumeGoal, "on");
227
+ dashboard.updateWidget(ctx, runtime);
228
+ await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
229
+ api.sendUserMessage(
230
+ renderPromptTemplate(commandResumeTemplate, {
231
+ autoresearch_md_path: autoresearchMdPath,
232
+ branch_status_line: branchResult.created
233
+ ? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
234
+ : `Using dedicated git branch \`${branchResult.branchName}\`.`,
235
+ has_resume_context: resumeContext.length > 0,
236
+ resume_context: resumeContext,
237
+ }),
238
+ );
239
+ return;
240
+ }
241
+
242
+ const setup = await promptForAutoresearchSetup(
243
+ ctx,
244
+ trimmed || runtime.goal || "what should autoresearch improve?",
245
+ );
246
+ if (!setup) return;
247
+
248
+ const branchResult = await ensureAutoresearchBranch(api, workDir, setup.intent);
249
+ if (!branchResult.ok) {
250
+ ctx.ui.notify(branchResult.error, "error");
251
+ return;
252
+ }
253
+
254
+ setMode(ctx, true, setup.intent, "on");
255
+ runtime.state.name = setup.intent;
256
+ runtime.state.metricName = setup.metricName;
257
+ runtime.state.metricUnit = setup.metricUnit;
258
+ runtime.state.bestDirection = setup.direction;
259
+ runtime.state.secondaryMetrics = setup.secondaryMetrics.map(name => ({ name, unit: "" }));
260
+ runtime.state.benchmarkCommand = setup.benchmarkCommand;
261
+ runtime.state.scopePaths = [...setup.scopePaths];
262
+ runtime.state.offLimits = [...setup.offLimits];
263
+ runtime.state.constraints = [...setup.constraints];
264
+ dashboard.updateWidget(ctx, runtime);
265
+ await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
266
+ api.sendUserMessage(
267
+ renderPromptTemplate(commandInitializeTemplate, {
268
+ branch_status_line: branchResult.created
269
+ ? `Created and checked out dedicated git branch \`${branchResult.branchName}\`.`
270
+ : `Using dedicated git branch \`${branchResult.branchName}\`.`,
271
+ intent: setup.intent,
272
+ benchmark_command: setup.benchmarkCommand,
273
+ metric_name: setup.metricName,
274
+ metric_unit: setup.metricUnit,
275
+ direction: setup.direction,
276
+ has_secondary_metrics: setup.secondaryMetrics.length > 0,
277
+ secondary_metrics: setup.secondaryMetrics,
278
+ secondary_metrics_block: formatBulletBlock(
279
+ setup.secondaryMetrics,
280
+ value => ` - \`${value}\``,
281
+ " - `(none)`",
282
+ ),
283
+ scope_paths: setup.scopePaths,
284
+ scope_paths_block: formatBulletBlock(setup.scopePaths, value => ` - \`${value}\``),
285
+ has_off_limits: setup.offLimits.length > 0,
286
+ off_limits: setup.offLimits,
287
+ off_limits_block: formatBulletBlock(setup.offLimits, value => ` - \`${value}\``, " - `(none)`"),
288
+ has_constraints: setup.constraints.length > 0,
289
+ constraints: setup.constraints,
290
+ constraints_block: formatBulletBlock(setup.constraints, value => ` - ${value}`, " - `(none)`"),
291
+ }),
292
+ );
293
+ },
294
+ });
295
+
296
+ api.registerShortcut("ctrl+x", {
297
+ description: "Toggle autoresearch dashboard",
298
+ handler(ctx): void {
299
+ const runtime = getRuntime(ctx);
300
+ if (runtime.state.results.length === 0 && !runtime.runningExperiment) {
301
+ ctx.ui.notify("No autoresearch results yet", "info");
302
+ return;
303
+ }
304
+ runtime.dashboardExpanded = !runtime.dashboardExpanded;
305
+ dashboard.updateWidget(ctx, runtime);
306
+ },
307
+ });
308
+
309
+ api.registerShortcut("ctrl+shift+x", {
310
+ description: "Show autoresearch dashboard overlay",
311
+ handler(ctx): Promise<void> {
312
+ return dashboard.showOverlay(ctx, getRuntime(ctx));
313
+ },
314
+ });
315
+
316
+ api.on("session_start", (_event, ctx) => rehydrate(ctx));
317
+ api.on("session_switch", (_event, ctx) => rehydrate(ctx));
318
+ api.on("session_branch", (_event, ctx) => rehydrate(ctx));
319
+ api.on("session_tree", (_event, ctx) => rehydrate(ctx));
320
+ api.on("session_shutdown", (_event, ctx) => {
321
+ dashboard.clear(ctx);
322
+ runtimeStore.clear(getSessionKey(ctx));
323
+ });
324
+
325
+ api.on("agent_end", async (_event, ctx) => {
326
+ const runtime = getRuntime(ctx);
327
+ runtime.runningExperiment = null;
328
+ dashboard.updateWidget(ctx, runtime);
329
+ dashboard.requestRender();
330
+ if (!runtime.autoresearchMode) return;
331
+ if (ctx.hasPendingMessages()) {
332
+ runtime.autoResumeArmed = false;
333
+ return;
334
+ }
335
+ const workDir = resolveWorkDir(ctx.cwd);
336
+ const pendingRun =
337
+ runtime.lastRunSummary ??
338
+ (await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
339
+ runtime.lastRunSummary = pendingRun;
340
+ runtime.lastRunChecks = summaryToChecks(pendingRun);
341
+ runtime.lastRunDuration = pendingRun?.durationSeconds ?? runtime.lastRunDuration;
342
+ runtime.lastRunAsi = pendingRun?.parsedAsi ?? runtime.lastRunAsi;
343
+ const shouldResumePendingRun =
344
+ pendingRun !== null && runtime.lastAutoResumePendingRunNumber !== pendingRun.runNumber;
345
+ if (!shouldResumePendingRun && !runtime.autoResumeArmed) {
346
+ return;
347
+ }
348
+ runtime.autoResumeArmed = false;
349
+ runtime.lastAutoResumePendingRunNumber = pendingRun?.runNumber ?? null;
350
+ const autoresearchMdPath = path.join(workDir, "autoresearch.md");
351
+ const ideasPath = path.join(workDir, "autoresearch.ideas.md");
352
+ api.sendMessage(
353
+ {
354
+ customType: "autoresearch-resume",
355
+ content: renderPromptTemplate(resumeMessageTemplate, {
356
+ autoresearch_md_path: autoresearchMdPath,
357
+ has_ideas: fs.existsSync(ideasPath),
358
+ has_pending_run: Boolean(pendingRun),
359
+ }),
360
+ display: false,
361
+ attribution: "agent",
362
+ },
363
+ { deliverAs: "nextTurn", triggerTurn: true },
364
+ );
365
+ });
366
+
367
+ api.on("before_agent_start", async (event, ctx) => {
368
+ const runtime = getRuntime(ctx);
369
+ if (!runtime.autoresearchMode) return;
370
+ const workDir = resolveWorkDir(ctx.cwd);
371
+ const autoresearchMdPath = path.join(workDir, "autoresearch.md");
372
+ const checksPath = path.join(workDir, "autoresearch.checks.sh");
373
+ const ideasPath = path.join(workDir, "autoresearch.ideas.md");
374
+ const programPath = path.join(workDir, "autoresearch.program.md");
375
+ const pendingRun =
376
+ runtime.lastRunSummary ??
377
+ (await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
378
+ runtime.lastRunSummary = pendingRun;
379
+ runtime.lastRunChecks = summaryToChecks(pendingRun);
380
+ runtime.lastRunDuration = pendingRun?.durationSeconds ?? runtime.lastRunDuration;
381
+ runtime.lastRunAsi = pendingRun?.parsedAsi ?? runtime.lastRunAsi;
382
+ const currentSegmentResults = currentResults(runtime.state.results, runtime.state.currentSegment);
383
+ const baselineMetric = findBaselineMetric(runtime.state.results, runtime.state.currentSegment);
384
+ const bestResult = findBestResult(runtime);
385
+ const goal = runtime.goal ?? runtime.state.name ?? "";
386
+ const recentResults = currentSegmentResults.slice(-3).map(result => {
387
+ const asiSummary = summarizeExperimentAsi(result);
388
+ return {
389
+ asi_summary: asiSummary,
390
+ description: result.description,
391
+ has_asi_summary: Boolean(asiSummary),
392
+ metric_display: formatNum(result.metric, runtime.state.metricUnit),
393
+ run_number: result.runNumber ?? runtime.state.results.indexOf(result) + 1,
394
+ status: result.status,
395
+ };
396
+ });
397
+ return {
398
+ systemPrompt: renderPromptTemplate(promptTemplate, {
399
+ base_system_prompt: event.systemPrompt,
400
+ has_goal: goal.trim().length > 0,
401
+ goal,
402
+ working_dir: workDir,
403
+ default_metric_name: runtime.state.metricName,
404
+ metric_name: runtime.state.metricName,
405
+ has_autoresearch_md: fs.existsSync(autoresearchMdPath),
406
+ autoresearch_md_path: autoresearchMdPath,
407
+ has_checks: fs.existsSync(checksPath),
408
+ checks_path: checksPath,
409
+ has_ideas: fs.existsSync(ideasPath),
410
+ ideas_path: ideasPath,
411
+ has_program: fs.existsSync(programPath),
412
+ program_path: programPath,
413
+ current_segment: runtime.state.currentSegment + 1,
414
+ current_segment_run_count: currentSegmentResults.length,
415
+ has_baseline_metric: baselineMetric !== null,
416
+ baseline_metric_display: formatNum(baselineMetric, runtime.state.metricUnit),
417
+ has_best_result: Boolean(bestResult),
418
+ best_metric_display: bestResult
419
+ ? formatNum(bestResult.metric, runtime.state.metricUnit)
420
+ : formatNum(baselineMetric, runtime.state.metricUnit),
421
+ best_run_number: bestResult
422
+ ? (bestResult.runNumber ?? runtime.state.results.indexOf(bestResult) + 1)
423
+ : null,
424
+ has_recent_results: recentResults.length > 0,
425
+ recent_results: recentResults,
426
+ has_pending_run: Boolean(pendingRun),
427
+ pending_run_number: pendingRun?.runNumber,
428
+ pending_run_command: pendingRun?.command,
429
+ pending_run_directory: pendingRun?.runDirectory,
430
+ pending_run_passed: pendingRun?.passed ?? false,
431
+ has_pending_run_metric: pendingRun?.parsedPrimary !== null && pendingRun?.parsedPrimary !== undefined,
432
+ pending_run_metric_display:
433
+ pendingRun?.parsedPrimary !== null && pendingRun?.parsedPrimary !== undefined
434
+ ? formatNum(pendingRun.parsedPrimary, runtime.state.metricUnit)
435
+ : null,
436
+ }),
437
+ };
438
+ });
439
+ };
440
+
441
+ async function promptForAutoresearchSetup(
442
+ ctx: ExtensionContext,
443
+ defaultIntent: string,
444
+ ): Promise<AutoresearchSetupInput | undefined> {
445
+ const intentInput = await ctx.ui.input("Autoresearch Intent", defaultIntent);
446
+ if (intentInput === undefined) return undefined;
447
+ const intent = intentInput.trim();
448
+ if (intent.length === 0) {
449
+ ctx.ui.notify("Autoresearch intent is required", "info");
450
+ return undefined;
451
+ }
452
+
453
+ const benchmarkCommandInput = await ctx.ui.input("Benchmark Command", "bash autoresearch.sh");
454
+ if (benchmarkCommandInput === undefined) return undefined;
455
+ const benchmarkCommand = benchmarkCommandInput.trim();
456
+ if (benchmarkCommand.length === 0) {
457
+ ctx.ui.notify("Benchmark command is required", "info");
458
+ return undefined;
459
+ }
460
+ if (!isAutoresearchShCommand(benchmarkCommand)) {
461
+ ctx.ui.notify("Benchmark command must invoke `autoresearch.sh` directly", "info");
462
+ return undefined;
463
+ }
464
+
465
+ const metricNameInput = await ctx.ui.input("Primary Metric Name", "runtime_ms");
466
+ if (metricNameInput === undefined) return undefined;
467
+ const metricName = metricNameInput.trim();
468
+ if (metricName.length === 0) {
469
+ ctx.ui.notify("Primary metric name is required", "info");
470
+ return undefined;
471
+ }
472
+
473
+ const metricUnitInput = await ctx.ui.input("Metric Unit", "ms");
474
+ if (metricUnitInput === undefined) return undefined;
475
+ const metricUnit = metricUnitInput.trim();
476
+
477
+ const directionInput = await ctx.ui.input("Metric Direction", "lower");
478
+ if (directionInput === undefined) return undefined;
479
+ const normalizedDirection = directionInput.trim().toLowerCase();
480
+ if (normalizedDirection !== "lower" && normalizedDirection !== "higher") {
481
+ ctx.ui.notify("Metric direction must be `lower` or `higher`", "info");
482
+ return undefined;
483
+ }
484
+
485
+ const secondaryMetricsInput = await ctx.ui.input("Tradeoff Metrics", "");
486
+ if (secondaryMetricsInput === undefined) return undefined;
487
+
488
+ const scopePathsInput = await ctx.ui.input("Files in Scope", "packages/coding-agent/src/autoresearch");
489
+ if (scopePathsInput === undefined) return undefined;
490
+ const scopePaths = splitSetupList(scopePathsInput);
491
+ if (scopePaths.length === 0) {
492
+ ctx.ui.notify("Files in Scope must include at least one path", "info");
493
+ return undefined;
494
+ }
495
+
496
+ const offLimitsInput = await ctx.ui.input("Off Limits", "");
497
+ if (offLimitsInput === undefined) return undefined;
498
+ const constraintsInput = await ctx.ui.input("Constraints", "");
499
+ if (constraintsInput === undefined) return undefined;
500
+
501
+ return {
502
+ intent,
503
+ benchmarkCommand,
504
+ metricName,
505
+ metricUnit,
506
+ direction: normalizedDirection,
507
+ secondaryMetrics: splitSetupList(secondaryMetricsInput),
508
+ scopePaths,
509
+ offLimits: splitSetupList(offLimitsInput),
510
+ constraints: splitSetupList(constraintsInput),
511
+ };
512
+ }
513
+
514
+ function splitSetupList(value: string): string[] {
515
+ return value
516
+ .split(/\r?\n|,/)
517
+ .map(entry => entry.trim())
518
+ .filter((entry, index, values) => entry.length > 0 && values.indexOf(entry) === index);
519
+ }
520
+
521
+ function formatBulletBlock(values: string[], renderValue: (value: string) => string, emptyValue = ""): string {
522
+ if (values.length === 0) {
523
+ return emptyValue;
524
+ }
525
+ return values.map(renderValue).join("\n");
526
+ }
527
+
528
+ function hasLocalAutoresearchState(workDir: string): boolean {
529
+ return fs.existsSync(path.join(workDir, "autoresearch.jsonl")) || fs.existsSync(path.join(workDir, ".autoresearch"));
530
+ }
531
+
532
+ function summarizeExperimentAsi(result: ExperimentResult): string | null {
533
+ const hypothesis = typeof result.asi?.hypothesis === "string" ? result.asi.hypothesis.trim() : "";
534
+ const rollbackReason = typeof result.asi?.rollback_reason === "string" ? result.asi.rollback_reason.trim() : "";
535
+ const nextActionHint = typeof result.asi?.next_action_hint === "string" ? result.asi.next_action_hint.trim() : "";
536
+ const summary = [hypothesis, rollbackReason, nextActionHint].filter(part => part.length > 0).join(" | ");
537
+ return summary.length > 0 ? summary.slice(0, 220) : null;
538
+ }
539
+
540
+ function getGuardedToolPaths(toolName: string, input: Record<string, unknown>): string[] | null {
541
+ if (toolName === "write") {
542
+ return typeof input.path === "string" ? [input.path] : null;
543
+ }
544
+ if (toolName === "ast_edit") {
545
+ return typeof input.path === "string" ? [input.path] : null;
546
+ }
547
+ if (toolName !== "edit") {
548
+ return [];
549
+ }
550
+
551
+ const paths: string[] = [];
552
+ if (typeof input.path === "string") {
553
+ paths.push(input.path);
554
+ }
555
+ if (typeof input.rename === "string") {
556
+ paths.push(input.rename);
557
+ }
558
+ if (typeof input.move === "string") {
559
+ paths.push(input.move);
560
+ }
561
+ return paths;
562
+ }
563
+
564
+ function resolveAutoresearchRelativePath(
565
+ workDir: string,
566
+ rawPath: string,
567
+ ): { ok: false; reason: string } | { ok: true; relativePath: string } {
568
+ if (looksLikeInternalUrl(rawPath)) {
569
+ return {
570
+ ok: false,
571
+ reason: `Autoresearch cannot validate internal URL paths during scoped editing: ${rawPath}`,
572
+ };
573
+ }
574
+ const resolvedPath = path.isAbsolute(rawPath) ? path.resolve(rawPath) : path.resolve(workDir, rawPath);
575
+ const canonicalWorkDir = canonicalizeExistingPath(workDir);
576
+ const canonicalTargetPath = canonicalizeTargetPath(resolvedPath);
577
+ const relativePath = path.relative(canonicalWorkDir, canonicalTargetPath);
578
+ if (relativePath === ".." || relativePath.startsWith(`..${path.sep}`) || path.isAbsolute(relativePath)) {
579
+ return {
580
+ ok: false,
581
+ reason: `Autoresearch blocked edits outside the working tree: ${rawPath}`,
582
+ };
583
+ }
584
+ return {
585
+ ok: true,
586
+ relativePath: relativePath.length === 0 ? "." : normalizeAutoresearchPath(relativePath),
587
+ };
588
+ }
589
+
590
+ function validateEditableAutoresearchPath(relativePath: string, runtime: AutoresearchRuntime): string | null {
591
+ if (isAutoresearchLocalStatePath(relativePath)) {
592
+ return "autoresearch local state files are managed by the experiment tools and cannot be edited directly";
593
+ }
594
+ if (runtime.state.offLimits.some(spec => pathMatchesContractPath(relativePath, spec))) {
595
+ return "this path is listed under Off Limits in autoresearch.md";
596
+ }
597
+ if (isAutoresearchCommittableFile(relativePath)) {
598
+ return null;
599
+ }
600
+ if (runtime.state.scopePaths.length === 0) {
601
+ return "Files in Scope is not initialized yet; only autoresearch control files may be edited before init_experiment runs";
602
+ }
603
+ if (!runtime.state.scopePaths.some(spec => pathMatchesContractPath(relativePath, spec))) {
604
+ return "this path is outside Files in Scope in autoresearch.md";
605
+ }
606
+ return null;
607
+ }
608
+
609
+ function findBestResult(runtime: AutoresearchRuntime): ExperimentResult | null {
610
+ let best: ExperimentResult | null = null;
611
+ for (const result of runtime.state.results) {
612
+ if (result.segment !== runtime.state.currentSegment || result.status !== "keep") continue;
613
+ if (!best) {
614
+ best = result;
615
+ continue;
616
+ }
617
+ if (runtime.state.bestDirection === "lower" ? result.metric < best.metric : result.metric > best.metric) {
618
+ best = result;
619
+ }
620
+ }
621
+ return best;
622
+ }
623
+
624
+ function collectLoggedRunNumbers(results: ExperimentResult[]): Set<number> {
625
+ const runNumbers = new Set<number>();
626
+ for (const result of results) {
627
+ if (result.runNumber !== null) {
628
+ runNumbers.add(result.runNumber);
629
+ }
630
+ }
631
+ return runNumbers;
632
+ }
633
+
634
+ function summaryToChecks(summary: PendingRunSummary | null): ChecksResult | null {
635
+ if (!summary || summary.checksPass === null) {
636
+ return null;
637
+ }
638
+ return {
639
+ pass: summary.checksPass,
640
+ output: "",
641
+ duration: summary.checksDurationSeconds ?? 0,
642
+ };
643
+ }
644
+
645
+ function looksLikeInternalUrl(value: string): boolean {
646
+ return /^[a-z][a-z0-9+.-]*:\/\//i.test(value);
647
+ }
648
+
649
+ function canonicalizeExistingPath(targetPath: string): string {
650
+ try {
651
+ return fs.realpathSync.native(targetPath);
652
+ } catch {
653
+ return path.resolve(targetPath);
654
+ }
655
+ }
656
+
657
+ function canonicalizeTargetPath(targetPath: string): string {
658
+ const pendingSegments: string[] = [];
659
+ let currentPath = path.resolve(targetPath);
660
+ while (!fs.existsSync(currentPath)) {
661
+ const parentPath = path.dirname(currentPath);
662
+ if (parentPath === currentPath) {
663
+ return currentPath;
664
+ }
665
+ pendingSegments.unshift(path.basename(currentPath));
666
+ currentPath = parentPath;
667
+ }
668
+ return path.resolve(canonicalizeExistingPath(currentPath), ...pendingSegments);
669
+ }
670
+
671
+ function validateAutoresearchBashCommand(command: string): string | null {
672
+ const trimmed = command.trim();
673
+ if (trimmed.length === 0) {
674
+ return null;
675
+ }
676
+ const mutationPatterns = [
677
+ /(^|[;&|()]\s*)(?:bash|sh)\b/,
678
+ /(^|[;&|()]\s*)(?:python|python3|node|perl|ruby|php)\b/,
679
+ /(^|[;&|()]\s*)(?:mv|cp|rm|mkdir|touch|chmod|chown|ln|install|patch)\b/,
680
+ /(^|[;&|()]\s*)sed\s+-i\b/,
681
+ /(^|[;&|()]\s*)git\s+(?:add|apply|checkout|clean|commit|merge|rebase|reset|restore|revert|stash|switch|worktree)\b/,
682
+ /(^|[^<])>>?/,
683
+ /\|\s*tee\b/,
684
+ /<<<?/,
685
+ ];
686
+ if (mutationPatterns.some(pattern => pattern.test(trimmed))) {
687
+ return (
688
+ "Autoresearch only allows read-only shell inspection. " +
689
+ "Use write/edit/ast_edit for file changes and run_experiment for benchmark execution."
690
+ );
691
+ }
692
+ return null;
693
+ }