@oh-my-pi/pi-coding-agent 14.5.14 → 14.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/CHANGELOG.md +39 -0
  2. package/package.json +7 -7
  3. package/src/autoresearch/command-resume.md +5 -8
  4. package/src/autoresearch/git.ts +41 -51
  5. package/src/autoresearch/helpers.ts +43 -359
  6. package/src/autoresearch/index.ts +281 -273
  7. package/src/autoresearch/prompt-setup.md +43 -0
  8. package/src/autoresearch/prompt.md +52 -193
  9. package/src/autoresearch/resume-message.md +2 -8
  10. package/src/autoresearch/state.ts +59 -166
  11. package/src/autoresearch/storage.ts +687 -0
  12. package/src/autoresearch/tools/init-experiment.ts +201 -290
  13. package/src/autoresearch/tools/log-experiment.ts +304 -517
  14. package/src/autoresearch/tools/run-experiment.ts +117 -296
  15. package/src/autoresearch/tools/update-notes.ts +116 -0
  16. package/src/autoresearch/types.ts +16 -66
  17. package/src/config/settings-schema.ts +1 -1
  18. package/src/config/settings.ts +20 -1
  19. package/src/cursor.ts +1 -1
  20. package/src/edit/index.ts +9 -31
  21. package/src/edit/line-hash.ts +70 -43
  22. package/src/edit/modes/hashline.lark +26 -0
  23. package/src/edit/modes/hashline.ts +898 -1099
  24. package/src/edit/modes/patch.ts +0 -7
  25. package/src/edit/modes/replace.ts +0 -4
  26. package/src/edit/renderer.ts +22 -20
  27. package/src/edit/streaming.ts +8 -28
  28. package/src/eval/eval.lark +24 -30
  29. package/src/eval/js/context-manager.ts +5 -162
  30. package/src/eval/js/prelude.txt +0 -12
  31. package/src/eval/parse.ts +129 -129
  32. package/src/eval/py/prelude.py +1 -219
  33. package/src/export/html/template.generated.ts +1 -1
  34. package/src/export/html/template.js +2 -2
  35. package/src/internal-urls/docs-index.generated.ts +1 -1
  36. package/src/modes/components/session-observer-overlay.ts +5 -2
  37. package/src/modes/components/status-line/segments.ts +1 -1
  38. package/src/modes/components/status-line.ts +3 -5
  39. package/src/modes/components/tree-selector.ts +4 -5
  40. package/src/modes/components/welcome.ts +11 -1
  41. package/src/modes/controllers/command-controller.ts +2 -6
  42. package/src/modes/controllers/event-controller.ts +1 -2
  43. package/src/modes/controllers/extension-ui-controller.ts +3 -15
  44. package/src/modes/controllers/input-controller.ts +0 -1
  45. package/src/modes/controllers/selector-controller.ts +1 -1
  46. package/src/modes/interactive-mode.ts +5 -7
  47. package/src/prompts/system/system-prompt.md +14 -38
  48. package/src/prompts/tools/ast-edit.md +8 -8
  49. package/src/prompts/tools/ast-grep.md +10 -10
  50. package/src/prompts/tools/eval.md +13 -31
  51. package/src/prompts/tools/find.md +2 -1
  52. package/src/prompts/tools/hashline.md +66 -57
  53. package/src/prompts/tools/search.md +2 -2
  54. package/src/session/session-manager.ts +17 -13
  55. package/src/tools/ast-edit.ts +141 -44
  56. package/src/tools/ast-grep.ts +112 -36
  57. package/src/tools/eval.ts +2 -53
  58. package/src/tools/find.ts +16 -15
  59. package/src/tools/path-utils.ts +36 -196
  60. package/src/tools/search.ts +56 -35
  61. package/src/utils/edit-mode.ts +2 -11
  62. package/src/utils/file-display-mode.ts +1 -1
  63. package/src/utils/git.ts +17 -0
  64. package/src/utils/session-color.ts +0 -12
  65. package/src/utils/title-generator.ts +22 -38
  66. package/src/autoresearch/apply-contract-to-state.ts +0 -24
  67. package/src/autoresearch/contract.ts +0 -288
  68. package/src/edit/modes/atom.lark +0 -29
  69. package/src/edit/modes/atom.ts +0 -1773
  70. package/src/prompts/tools/atom.md +0 -150
@@ -1,40 +1,34 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import type { AutocompleteItem } from "@oh-my-pi/pi-tui";
4
- import { prompt } from "@oh-my-pi/pi-utils";
4
+ import { logger, prompt } from "@oh-my-pi/pi-utils";
5
5
  import type { ExtensionContext, ExtensionFactory } from "../extensibility/extensions";
6
+ import * as git from "../utils/git";
6
7
  import commandResumeTemplate from "./command-resume.md" with { type: "text" };
7
- import { pathMatchesContractPath } from "./contract";
8
8
  import { createDashboardController } from "./dashboard";
9
9
  import { ensureAutoresearchBranch } from "./git";
10
- import {
11
- collectLoggedRunNumbers,
12
- formatNum,
13
- isAutoresearchCommittableFile,
14
- isAutoresearchLocalStatePath,
15
- normalizeAutoresearchPath,
16
- readMaxExperiments,
17
- readPendingRunSummary,
18
- resolveWorkDir,
19
- validateWorkDir,
20
- } from "./helpers";
10
+ import { formatNum } from "./helpers";
21
11
  import promptTemplate from "./prompt.md" with { type: "text" };
12
+ import setupPromptTemplate from "./prompt-setup.md" with { type: "text" };
22
13
  import resumeMessageTemplate from "./resume-message.md" with { type: "text" };
23
14
  import {
24
- cloneExperimentState,
15
+ buildExperimentState,
25
16
  createExperimentState,
26
17
  createRuntimeStore,
27
18
  currentResults,
28
19
  findBaselineMetric,
20
+ findBaselineRunNumber,
21
+ findBestKeptMetric,
29
22
  reconstructControlState,
30
- reconstructStateFromJsonl,
31
23
  } from "./state";
24
+ import { openAutoresearchStorage, openAutoresearchStorageIfExists, type RunRow, type SessionRow } from "./storage";
32
25
  import { createInitExperimentTool } from "./tools/init-experiment";
33
26
  import { createLogExperimentTool } from "./tools/log-experiment";
34
27
  import { createRunExperimentTool } from "./tools/run-experiment";
35
- import type { AutoresearchRuntime, ChecksResult, ExperimentResult, PendingRunSummary } from "./types";
28
+ import { createUpdateNotesTool } from "./tools/update-notes";
29
+ import type { AutoresearchRuntime, ExperimentResult, PendingRunSummary } from "./types";
36
30
 
37
- const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment"];
31
+ const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment", "update_notes"];
38
32
 
39
33
  export const createAutoresearchExtension: ExtensionFactory = api => {
40
34
  const runtimeStore = createRuntimeStore();
@@ -43,26 +37,60 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
43
37
  const getSessionKey = (ctx: ExtensionContext): string => ctx.sessionManager.getSessionId();
44
38
  const getRuntime = (ctx: ExtensionContext): AutoresearchRuntime => runtimeStore.ensure(getSessionKey(ctx));
45
39
 
40
+ const loadActiveSession = async (
41
+ ctx: ExtensionContext,
42
+ ): Promise<{ session: SessionRow | null; currentBranch: string | null }> => {
43
+ const currentBranch = await tryReadBranch(ctx.cwd);
44
+ const storage = await openAutoresearchStorageIfExists(ctx.cwd);
45
+ if (!storage) return { session: null, currentBranch };
46
+ const session = storage.getActiveSessionForBranch(currentBranch);
47
+ return { session, currentBranch };
48
+ };
49
+
46
50
  const rehydrate = async (ctx: ExtensionContext): Promise<void> => {
47
51
  const runtime = getRuntime(ctx);
48
- const workDir = resolveWorkDir(ctx.cwd);
49
- const reconstructed = reconstructStateFromJsonl(workDir);
50
52
  const control = reconstructControlState(ctx.sessionManager.getBranch());
51
- const loggedRunNumbers = collectLoggedRunNumbers(reconstructed.state.results);
52
- runtime.state = cloneExperimentState(reconstructed.state);
53
- runtime.state.maxExperiments = readMaxExperiments(ctx.cwd);
54
53
  runtime.goal = control.goal;
55
- runtime.autoresearchMode = control.autoresearchMode;
56
54
  runtime.autoResumeArmed = false;
57
55
  runtime.lastAutoResumePendingRunNumber = null;
58
- runtime.lastRunSummary = await readPendingRunSummary(workDir, loggedRunNumbers);
59
- runtime.lastRunChecks = summaryToChecks(runtime.lastRunSummary);
56
+
57
+ // Skip storage entirely if autoresearch was never activated in this conversation.
58
+ // This is the common case: every project gets a session_start event but most
59
+ // never touch autoresearch, so we must not create a SQLite file just to look.
60
+ const everActivated = control.lastMode !== null;
61
+ const { session, currentBranch } = everActivated
62
+ ? await loadActiveSession(ctx)
63
+ : { session: null, currentBranch: null };
64
+
65
+ // Mode is effective only when the recorded session matches the current git
66
+ // branch. When the user switches off the autoresearch branch the widget hides
67
+ // and the experiment tools detach, but the session entries are preserved so
68
+ // switching back resumes seamlessly.
69
+ const onActiveBranch = session === null || session.branch === null || session.branch === currentBranch;
70
+ runtime.autoresearchMode = control.autoresearchMode && onActiveBranch;
71
+
72
+ if (session && onActiveBranch) {
73
+ const storage = await openAutoresearchStorageIfExists(ctx.cwd);
74
+ if (storage) {
75
+ const loggedRuns = storage.listLoggedRuns(session.id);
76
+ runtime.state = buildExperimentState(session, loggedRuns);
77
+ runtime.goal = runtime.goal ?? session.goal;
78
+ runtime.lastRunSummary = pendingRunSummaryFromRow(storage.getPendingRun(session.id));
79
+ } else {
80
+ runtime.state = createExperimentState();
81
+ runtime.lastRunSummary = null;
82
+ }
83
+ } else {
84
+ runtime.state = createExperimentState();
85
+ runtime.lastRunSummary = null;
86
+ }
60
87
  runtime.lastRunDuration = runtime.lastRunSummary?.durationSeconds ?? null;
61
88
  runtime.lastRunAsi = runtime.lastRunSummary?.parsedAsi ?? null;
62
89
  runtime.lastRunArtifactDir = runtime.lastRunSummary?.runDirectory ?? null;
63
90
  runtime.lastRunNumber = runtime.lastRunSummary?.runNumber ?? null;
64
91
  runtime.runningExperiment = null;
65
92
  dashboard.updateWidget(ctx, runtime);
93
+
66
94
  const activeTools = api.getActiveTools();
67
95
  const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
68
96
  const nextActiveTools = runtime.autoresearchMode
@@ -93,50 +121,21 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
93
121
  api.registerTool(createInitExperimentTool({ dashboard, getRuntime, pi: api }));
94
122
  api.registerTool(createRunExperimentTool({ dashboard, getRuntime, pi: api }));
95
123
  api.registerTool(createLogExperimentTool({ dashboard, getRuntime, pi: api }));
96
- api.on("tool_call", (event, ctx) => {
97
- const runtime = getRuntime(ctx);
98
- if (!runtime.autoresearchMode) return;
99
- if (event.toolName !== "write" && event.toolName !== "edit" && event.toolName !== "ast_edit") return;
100
-
101
- const rawPaths = getGuardedToolPaths(event.toolName, event.input);
102
- if (rawPaths === null) {
103
- return {
104
- block: true,
105
- reason:
106
- "Autoresearch requires an explicit target path for this editing tool so it can enforce Files in Scope and Off Limits before changes are made.",
107
- };
108
- }
109
-
110
- const workDir = resolveWorkDir(ctx.cwd);
111
- for (const rawPath of rawPaths) {
112
- const relativePath = resolveAutoresearchRelativePath(workDir, rawPath);
113
- if (!relativePath.ok) {
114
- return {
115
- block: true,
116
- reason: relativePath.reason,
117
- };
118
- }
119
- const validationError = validateEditableAutoresearchPath(relativePath.relativePath, runtime);
120
- if (validationError) {
121
- return {
122
- block: true,
123
- reason: `Autoresearch blocked edits to ${relativePath.relativePath}: ${validationError}`,
124
- };
125
- }
126
- }
127
- });
124
+ api.registerTool(createUpdateNotesTool({ dashboard, getRuntime, pi: api }));
128
125
 
129
126
  api.registerCommand("autoresearch", {
130
127
  description: "Toggle builtin autoresearch mode, or pass off / clear, or a goal message.",
131
128
  getArgumentCompletions(argumentPrefix: string): AutocompleteItem[] | null {
132
129
  if (argumentPrefix.includes(" ")) return null;
133
130
  const normalized = argumentPrefix.trim().toLowerCase();
134
- // No suggestions for an empty argument prefix so Tab after "/autoresearch " does not
135
- // force-complete into off/clear; bare command submit toggles like /plan.
136
131
  if (normalized.length === 0) return null;
137
132
  const completions: AutocompleteItem[] = [
138
133
  { label: "off", value: "off", description: "Leave autoresearch mode" },
139
- { label: "clear", value: "clear", description: "Delete autoresearch.jsonl and leave autoresearch mode" },
134
+ {
135
+ label: "clear",
136
+ value: "clear",
137
+ description: "Reset worktree to baseline and close the active session",
138
+ },
140
139
  ];
141
140
  const filtered = completions.filter(item => item.label.startsWith(normalized));
142
141
  return filtered.length > 0 ? filtered : null;
@@ -144,11 +143,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
144
143
  async handler(args, ctx): Promise<void> {
145
144
  const trimmed = args.trim();
146
145
  const runtime = getRuntime(ctx);
147
- const workDirError = validateWorkDir(ctx.cwd);
148
- if (workDirError) {
149
- ctx.ui.notify(workDirError, "error");
150
- return;
151
- }
152
146
 
153
147
  if (trimmed === "" && runtime.autoresearchMode) {
154
148
  setMode(ctx, false, runtime.goal, "off");
@@ -167,59 +161,52 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
167
161
  ctx.ui.notify("Autoresearch mode disabled", "info");
168
162
  return;
169
163
  }
170
- if (trimmed === "clear") {
171
- const workDir = resolveWorkDir(ctx.cwd);
172
- const jsonlPath = path.join(workDir, "autoresearch.jsonl");
173
- const localStatePath = path.join(workDir, ".autoresearch");
174
- if (fs.existsSync(jsonlPath)) {
175
- fs.rmSync(jsonlPath);
176
- }
177
- if (fs.existsSync(localStatePath)) {
178
- fs.rmSync(localStatePath, { force: true, recursive: true });
179
- }
180
- runtime.state = createExperimentState();
181
- runtime.state.maxExperiments = readMaxExperiments(ctx.cwd);
182
- runtime.goal = null;
183
- runtime.lastRunChecks = null;
184
- runtime.lastRunDuration = null;
185
- runtime.lastRunAsi = null;
186
- runtime.lastRunArtifactDir = null;
187
- runtime.lastRunNumber = null;
188
- runtime.lastRunSummary = null;
189
- setMode(ctx, false, null, "clear");
190
- dashboard.updateWidget(ctx, runtime);
191
- const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
192
- await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
193
- ctx.ui.notify("Autoresearch local state cleared", "info");
164
+
165
+ if (trimmed === "clear" || trimmed.startsWith("clear ")) {
166
+ const flagPart = trimmed === "clear" ? "" : trimmed.slice("clear ".length).trim();
167
+ const keepTree = flagPart.includes("--keep-tree");
168
+ const resetTreeForce = flagPart.includes("--reset-tree");
169
+ await handleClear(ctx, runtime, { keepTree, resetTreeForce });
194
170
  return;
195
171
  }
196
172
 
197
- const workDir = resolveWorkDir(ctx.cwd);
198
- const autoresearchMdPath = path.join(workDir, "autoresearch.md");
199
- const hasAutoresearchMd = fs.existsSync(autoresearchMdPath);
200
- const controlState = reconstructControlState(ctx.sessionManager.getBranch());
201
- const shouldResumeExistingNotes =
202
- hasAutoresearchMd &&
203
- (hasLocalAutoresearchState(workDir) || (controlState.lastMode !== "clear" && trimmed.length === 0));
204
-
205
- if (shouldResumeExistingNotes) {
206
- const resumeContext = trimmed;
207
- const resumeGoal = runtime.goal ?? runtime.state.name ?? null;
208
- const branchResult = await ensureAutoresearchBranch(api, workDir, resumeGoal);
209
- if (!branchResult.ok) {
210
- ctx.ui.notify(branchResult.error, "error");
211
- return;
212
- }
173
+ const goalArg = trimmed.length > 0 ? trimmed : null;
174
+ const branchResult = await ensureAutoresearchBranch(api, ctx.cwd, goalArg ?? runtime.goal);
175
+ if (!branchResult.ok) {
176
+ ctx.ui.notify(branchResult.error, "error");
177
+ return;
178
+ }
179
+ if (branchResult.warning) {
180
+ ctx.ui.notify(branchResult.warning, "warning");
181
+ }
182
+
183
+ // Look up an existing session for the branch we just landed on. A session
184
+ // recorded under a different autoresearch/* branch is intentionally ignored
185
+ // — `/autoresearch` on a fresh branch starts a fresh session. Only open the
186
+ // DB if it already exists; the empty-state path must not create one.
187
+ const existingStorage = await openAutoresearchStorageIfExists(ctx.cwd);
188
+ const existingSession = existingStorage?.getActiveSessionForBranch(branchResult.branchName) ?? null;
189
+ const resumeContext = trimmed;
190
+ const branchStatusLine = branchResult.branchName
191
+ ? branchResult.created
192
+ ? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
193
+ : `Using dedicated git branch \`${branchResult.branchName}\`.`
194
+ : "Continuing on the current branch — no autoresearch branch was created.";
213
195
 
214
- setMode(ctx, true, resumeGoal, "on");
196
+ if (existingSession && existingStorage) {
197
+ if (goalArg) existingStorage.updateSession(existingSession.id, { goal: goalArg });
198
+ if (branchResult.branchName) {
199
+ existingStorage.updateSession(existingSession.id, { branch: branchResult.branchName });
200
+ }
201
+ const refreshed = existingStorage.getSessionById(existingSession.id) ?? existingSession;
202
+ runtime.state = buildExperimentState(refreshed, existingStorage.listLoggedRuns(refreshed.id));
203
+ runtime.goal = refreshed.goal ?? goalArg;
204
+ setMode(ctx, true, runtime.goal, "on");
215
205
  dashboard.updateWidget(ctx, runtime);
216
206
  await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
217
207
  api.sendUserMessage(
218
208
  prompt.render(commandResumeTemplate, {
219
- autoresearch_md_path: autoresearchMdPath,
220
- branch_status_line: branchResult.created
221
- ? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
222
- : `Using dedicated git branch \`${branchResult.branchName}\`.`,
209
+ branch_status_line: branchStatusLine,
223
210
  has_resume_context: resumeContext.length > 0,
224
211
  resume_context: resumeContext,
225
212
  }),
@@ -227,18 +214,11 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
227
214
  return;
228
215
  }
229
216
 
230
- const branchGoal = trimmed.length > 0 ? trimmed : null;
231
- const branchResult = await ensureAutoresearchBranch(api, workDir, branchGoal);
232
- if (!branchResult.ok) {
233
- ctx.ui.notify(branchResult.error, "error");
234
- return;
235
- }
236
-
237
- setMode(ctx, true, branchGoal, "on");
217
+ setMode(ctx, true, goalArg, "on");
238
218
  dashboard.updateWidget(ctx, runtime);
239
219
  await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
240
- if (trimmed.length > 0) {
241
- api.sendUserMessage(trimmed);
220
+ if (goalArg !== null) {
221
+ api.sendUserMessage(goalArg);
242
222
  } else {
243
223
  ctx.ui.notify("Autoresearch enabled—describe what to optimize in your next message.", "info");
244
224
  }
@@ -284,12 +264,11 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
284
264
  runtime.autoResumeArmed = false;
285
265
  return;
286
266
  }
287
- const workDir = resolveWorkDir(ctx.cwd);
288
- const pendingRun =
289
- runtime.lastRunSummary ??
290
- (await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
267
+ const { session } = await loadActiveSession(ctx);
268
+ const storage = session ? await openAutoresearchStorageIfExists(ctx.cwd) : null;
269
+ const pendingRow = session && storage ? storage.getPendingRun(session.id) : null;
270
+ const pendingRun = pendingRunSummaryFromRow(pendingRow);
291
271
  runtime.lastRunSummary = pendingRun;
292
- runtime.lastRunChecks = summaryToChecks(pendingRun);
293
272
  runtime.lastRunDuration = pendingRun?.durationSeconds ?? runtime.lastRunDuration;
294
273
  runtime.lastRunAsi = pendingRun?.parsedAsi ?? runtime.lastRunAsi;
295
274
  const shouldResumePendingRun =
@@ -299,14 +278,10 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
299
278
  }
300
279
  runtime.autoResumeArmed = false;
301
280
  runtime.lastAutoResumePendingRunNumber = pendingRun?.runNumber ?? null;
302
- const autoresearchMdPath = path.join(workDir, "autoresearch.md");
303
- const ideasPath = path.join(workDir, "autoresearch.ideas.md");
304
281
  api.sendMessage(
305
282
  {
306
283
  customType: "autoresearch-resume",
307
284
  content: prompt.render(resumeMessageTemplate, {
308
- autoresearch_md_path: autoresearchMdPath,
309
- has_ideas: fs.existsSync(ideasPath),
310
285
  has_pending_run: Boolean(pendingRun),
311
286
  }),
312
287
  display: false,
@@ -319,206 +294,239 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
319
294
  api.on("before_agent_start", async (event, ctx) => {
320
295
  const runtime = getRuntime(ctx);
321
296
  if (!runtime.autoresearchMode) return;
322
- const workDir = resolveWorkDir(ctx.cwd);
323
- const autoresearchMdPath = path.join(workDir, "autoresearch.md");
324
- const checksPath = path.join(workDir, "autoresearch.checks.sh");
325
- const ideasPath = path.join(workDir, "autoresearch.ideas.md");
326
- const programPath = path.join(workDir, "autoresearch.program.md");
327
- const pendingRun =
328
- runtime.lastRunSummary ??
329
- (await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
297
+ // Re-check git branch on every agent start. If the user manually switched
298
+ // off the autoresearch/* branch between turns, we silently drop autoresearch
299
+ // from this turn — the widget hides, the experiment tools detach, and we do
300
+ // not inject the autoresearch system prompt.
301
+ const { session, currentBranch } = await loadActiveSession(ctx);
302
+ const onActiveBranch = session === null || session.branch === null || session.branch === currentBranch;
303
+ if (!onActiveBranch) {
304
+ runtime.autoresearchMode = false;
305
+ runtime.state = createExperimentState();
306
+ runtime.lastRunSummary = null;
307
+ runtime.runningExperiment = null;
308
+ dashboard.updateWidget(ctx, runtime);
309
+ const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
310
+ await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
311
+ return;
312
+ }
313
+ const storage = await openAutoresearchStorageIfExists(ctx.cwd);
314
+ if (session && storage) {
315
+ runtime.state = buildExperimentState(session, storage.listLoggedRuns(session.id));
316
+ }
317
+ const pendingRow = session && storage ? storage.getPendingRun(session.id) : null;
318
+ const pendingRun = pendingRunSummaryFromRow(pendingRow);
330
319
  runtime.lastRunSummary = pendingRun;
331
- runtime.lastRunChecks = summaryToChecks(pendingRun);
332
320
  runtime.lastRunDuration = pendingRun?.durationSeconds ?? runtime.lastRunDuration;
333
321
  runtime.lastRunAsi = pendingRun?.parsedAsi ?? runtime.lastRunAsi;
334
- const currentSegmentResults = currentResults(runtime.state.results, runtime.state.currentSegment);
335
- const baselineMetric = findBaselineMetric(runtime.state.results, runtime.state.currentSegment);
336
- const bestResult = findBestResult(runtime);
337
- const goal = runtime.goal ?? runtime.state.name ?? "";
322
+ const state = runtime.state;
323
+ const currentSegmentResults = currentResults(state.results, state.currentSegment);
324
+ const baselineMetric = findBaselineMetric(state.results, state.currentSegment);
325
+ const baselineRunNumber = findBaselineRunNumber(state.results, state.currentSegment);
326
+ const bestMetric = findBestKeptMetric(state.results, state.currentSegment, state.bestDirection);
327
+ const bestResult = bestKeptResult(state.results, state.currentSegment, state.bestDirection);
328
+ const goal = runtime.goal ?? state.goal ?? state.name ?? "";
338
329
  const recentResults = currentSegmentResults.slice(-3).map(result => {
339
330
  const asiSummary = summarizeExperimentAsi(result);
340
331
  return {
341
332
  asi_summary: asiSummary,
342
333
  description: result.description,
343
334
  has_asi_summary: Boolean(asiSummary),
344
- metric_display: formatNum(result.metric, runtime.state.metricUnit),
345
- run_number: result.runNumber ?? runtime.state.results.indexOf(result) + 1,
335
+ metric_display: formatNum(result.metric, state.metricUnit),
336
+ run_number: result.runNumber ?? state.results.indexOf(result) + 1,
346
337
  status: result.status,
338
+ has_deviations: result.scopeDeviations.length > 0,
339
+ deviations: result.scopeDeviations.join(", "),
340
+ justified: Boolean(result.justification),
341
+ flagged: result.flagged,
342
+ flagged_reason: result.flaggedReason ?? "",
347
343
  };
348
344
  });
349
- const hasAutoresearchMd = fs.existsSync(autoresearchMdPath);
345
+ const unjustifiedRuns = currentSegmentResults
346
+ .filter(r => r.status === "keep" && !r.flagged && r.scopeDeviations.length > 0 && !r.justification)
347
+ .slice(-3)
348
+ .map(r => ({
349
+ run_number: r.runNumber,
350
+ paths: r.scopeDeviations.join(", "),
351
+ }));
352
+ if (!session) {
353
+ const currentBranch = await tryReadBranch(ctx.cwd);
354
+ const onAutoresearchBranch = currentBranch?.startsWith("autoresearch/") ?? false;
355
+ const baselineWarning = onAutoresearchBranch
356
+ ? null
357
+ : "Heads up: you are not on a dedicated `autoresearch/*` branch. `log_experiment discard` will only revert run-modified files, not reset to baseline — so harness files written before `init_experiment` may not survive a discard. Clean the worktree and re-run `/autoresearch` if you want full revert safety.";
358
+ return {
359
+ systemPrompt: prompt.render(setupPromptTemplate, {
360
+ base_system_prompt: event.systemPrompt,
361
+ has_goal: goal.trim().length > 0,
362
+ goal,
363
+ working_dir: ctx.cwd,
364
+ has_branch: Boolean(currentBranch),
365
+ branch: currentBranch ?? "",
366
+ has_baseline_warning: baselineWarning !== null,
367
+ baseline_warning: baselineWarning ?? "",
368
+ }),
369
+ };
370
+ }
350
371
  return {
351
372
  systemPrompt: prompt.render(promptTemplate, {
352
373
  base_system_prompt: event.systemPrompt,
353
374
  has_goal: goal.trim().length > 0,
354
375
  goal,
355
- has_autoresearch_md: hasAutoresearchMd,
356
- working_dir: workDir,
357
- default_metric_name: runtime.state.metricName,
358
- metric_name: runtime.state.metricName,
359
- autoresearch_md_path: autoresearchMdPath,
360
- has_checks: fs.existsSync(checksPath),
361
- checks_path: checksPath,
362
- has_ideas: fs.existsSync(ideasPath),
363
- ideas_path: ideasPath,
364
- has_program: fs.existsSync(programPath),
365
- program_path: programPath,
366
- current_segment: runtime.state.currentSegment + 1,
376
+ working_dir: ctx.cwd,
377
+ default_metric_name: state.metricName,
378
+ metric_name: state.metricName,
379
+ has_branch: Boolean(state.branch),
380
+ branch: state.branch,
381
+ has_baseline_commit: Boolean(state.baselineCommit),
382
+ baseline_commit: state.baselineCommit ? state.baselineCommit.slice(0, 12) : "",
383
+ has_notes: state.notes.trim().length > 0,
384
+ notes: state.notes,
385
+ current_segment: state.currentSegment + 1,
367
386
  current_segment_run_count: currentSegmentResults.length,
368
387
  has_baseline_metric: baselineMetric !== null,
369
- baseline_metric_display: formatNum(baselineMetric, runtime.state.metricUnit),
370
- has_best_result: Boolean(bestResult),
371
- best_metric_display: bestResult
372
- ? formatNum(bestResult.metric, runtime.state.metricUnit)
373
- : formatNum(baselineMetric, runtime.state.metricUnit),
374
- best_run_number: bestResult
375
- ? (bestResult.runNumber ?? runtime.state.results.indexOf(bestResult) + 1)
376
- : null,
388
+ baseline_metric_display: formatNum(baselineMetric, state.metricUnit),
389
+ baseline_run_number: baselineRunNumber,
390
+ has_best_result: bestResult !== null && bestMetric !== null,
391
+ best_metric_display: bestMetric !== null ? formatNum(bestMetric, state.metricUnit) : "-",
392
+ best_run_number: bestResult ? (bestResult.runNumber ?? state.results.indexOf(bestResult) + 1) : null,
377
393
  has_recent_results: recentResults.length > 0,
378
394
  recent_results: recentResults,
395
+ has_unjustified_runs: unjustifiedRuns.length > 0,
396
+ unjustified_runs: unjustifiedRuns,
379
397
  has_pending_run: Boolean(pendingRun),
380
398
  pending_run_number: pendingRun?.runNumber,
381
399
  pending_run_command: pendingRun?.command,
382
- pending_run_directory: pendingRun?.runDirectory,
383
400
  pending_run_passed: pendingRun?.passed ?? false,
384
401
  has_pending_run_metric: pendingRun?.parsedPrimary !== null && pendingRun?.parsedPrimary !== undefined,
385
402
  pending_run_metric_display:
386
403
  pendingRun?.parsedPrimary !== null && pendingRun?.parsedPrimary !== undefined
387
- ? formatNum(pendingRun.parsedPrimary, runtime.state.metricUnit)
404
+ ? formatNum(pendingRun.parsedPrimary, state.metricUnit)
388
405
  : null,
389
406
  }),
390
407
  };
391
408
  });
392
- };
393
409
 
394
- function hasLocalAutoresearchState(workDir: string): boolean {
395
- return fs.existsSync(path.join(workDir, "autoresearch.jsonl")) || fs.existsSync(path.join(workDir, ".autoresearch"));
396
- }
410
+ async function handleClear(
411
+ ctx: ExtensionContext,
412
+ runtime: AutoresearchRuntime,
413
+ opts: { keepTree: boolean; resetTreeForce: boolean },
414
+ ): Promise<void> {
415
+ const storage = await openAutoresearchStorage(ctx.cwd);
416
+ const session = storage.getActiveSession();
417
+ const branchName = await tryReadBranch(ctx.cwd);
418
+ const onAutoresearchBranch = branchName?.startsWith("autoresearch/") ?? false;
419
+ const shouldResetTree = !opts.keepTree && (onAutoresearchBranch || opts.resetTreeForce);
420
+ if (shouldResetTree && session?.baselineCommit) {
421
+ try {
422
+ await git.reset(ctx.cwd, { hard: true, target: session.baselineCommit });
423
+ await git.clean(ctx.cwd);
424
+ ctx.ui.notify(`Reset worktree to baseline ${session.baselineCommit.slice(0, 12)}.`, "info");
425
+ } catch (err) {
426
+ ctx.ui.notify(
427
+ `Failed to reset worktree to baseline: ${err instanceof Error ? err.message : String(err)}`,
428
+ "error",
429
+ );
430
+ }
431
+ } else if (shouldResetTree) {
432
+ ctx.ui.notify("No baseline commit recorded — skipped worktree reset.", "warning");
433
+ }
397
434
 
398
- function summarizeExperimentAsi(result: ExperimentResult): string | null {
399
- const hypothesis = typeof result.asi?.hypothesis === "string" ? result.asi.hypothesis.trim() : "";
400
- const rollbackReason = typeof result.asi?.rollback_reason === "string" ? result.asi.rollback_reason.trim() : "";
401
- const nextActionHint = typeof result.asi?.next_action_hint === "string" ? result.asi.next_action_hint.trim() : "";
402
- const summary = [hypothesis, rollbackReason, nextActionHint].filter(part => part.length > 0).join(" | ");
403
- return summary.length > 0 ? summary.slice(0, 220) : null;
404
- }
435
+ removeLegacyArtifacts(ctx.cwd);
405
436
 
406
- function getGuardedToolPaths(toolName: string, input: Record<string, unknown>): string[] | null {
407
- if (toolName === "write") {
408
- return typeof input.path === "string" ? [input.path] : null;
409
- }
410
- if (toolName === "ast_edit") {
411
- return typeof input.path === "string" ? [input.path] : null;
412
- }
413
- if (toolName !== "edit") {
414
- return [];
437
+ if (session) {
438
+ storage.closeSession(session.id);
439
+ }
440
+ runtime.state = createExperimentState();
441
+ runtime.goal = null;
442
+ runtime.lastRunDuration = null;
443
+ runtime.lastRunAsi = null;
444
+ runtime.lastRunArtifactDir = null;
445
+ runtime.lastRunNumber = null;
446
+ runtime.lastRunSummary = null;
447
+ setMode(ctx, false, null, "clear");
448
+ dashboard.updateWidget(ctx, runtime);
449
+ const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
450
+ await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
451
+ ctx.ui.notify("Autoresearch session cleared.", "info");
415
452
  }
453
+ };
416
454
 
417
- const paths: string[] = [];
418
- if (typeof input.path === "string") {
419
- paths.push(input.path);
420
- }
421
- if (typeof input.rename === "string") {
422
- paths.push(input.rename);
423
- }
424
- if (typeof input.move === "string") {
425
- paths.push(input.move);
455
+ const LEGACY_ARTIFACTS = [
456
+ "autoresearch.md",
457
+ "autoresearch.sh",
458
+ "autoresearch.checks.sh",
459
+ "autoresearch.program.md",
460
+ "autoresearch.ideas.md",
461
+ "autoresearch.jsonl",
462
+ "autoresearch.config.json",
463
+ ".autoresearch",
464
+ ];
465
+
466
+ function removeLegacyArtifacts(workDir: string): void {
467
+ for (const name of LEGACY_ARTIFACTS) {
468
+ const target = path.join(workDir, name);
469
+ try {
470
+ fs.rmSync(target, { recursive: true, force: true });
471
+ } catch (err) {
472
+ logger.warn("Failed to remove legacy autoresearch artifact", {
473
+ path: target,
474
+ error: err instanceof Error ? err.message : String(err),
475
+ });
476
+ }
426
477
  }
427
- return paths;
428
478
  }
429
479
 
430
- function resolveAutoresearchRelativePath(
431
- workDir: string,
432
- rawPath: string,
433
- ): { ok: false; reason: string } | { ok: true; relativePath: string } {
434
- if (looksLikeInternalUrl(rawPath)) {
435
- return {
436
- ok: false,
437
- reason: `Autoresearch cannot validate internal URL paths during scoped editing: ${rawPath}`,
438
- };
439
- }
440
- const resolvedPath = path.isAbsolute(rawPath) ? path.resolve(rawPath) : path.resolve(workDir, rawPath);
441
- const canonicalWorkDir = canonicalizeExistingPath(workDir);
442
- const canonicalTargetPath = canonicalizeTargetPath(resolvedPath);
443
- const relativePath = path.relative(canonicalWorkDir, canonicalTargetPath);
444
- if (relativePath === ".." || relativePath.startsWith(`..${path.sep}`) || path.isAbsolute(relativePath)) {
445
- return {
446
- ok: false,
447
- reason: `Autoresearch blocked edits outside the working tree: ${rawPath}`,
448
- };
449
- }
480
+ function pendingRunSummaryFromRow(row: RunRow | null): PendingRunSummary | null {
481
+ if (!row) return null;
482
+ if (row.status !== null) return null;
483
+ if (row.completedAt === null) return null;
484
+ const passed = row.exitCode === 0 && !row.timedOut;
450
485
  return {
451
- ok: true,
452
- relativePath: relativePath.length === 0 ? "." : normalizeAutoresearchPath(relativePath),
486
+ command: row.command,
487
+ durationSeconds: row.durationMs !== null ? row.durationMs / 1000 : null,
488
+ parsedAsi: row.parsedAsi,
489
+ parsedMetrics: row.parsedMetrics,
490
+ parsedPrimary: row.parsedPrimary,
491
+ passed,
492
+ preRunDirtyPaths: row.preRunDirtyPaths,
493
+ runDirectory: path.dirname(row.logPath),
494
+ runNumber: row.id,
495
+ exitCode: row.exitCode,
496
+ timedOut: row.timedOut,
453
497
  };
454
498
  }
455
499
 
456
- function validateEditableAutoresearchPath(relativePath: string, runtime: AutoresearchRuntime): string | null {
457
- if (isAutoresearchLocalStatePath(relativePath)) {
458
- return "autoresearch local state files are managed by the experiment tools and cannot be edited directly";
459
- }
460
- if (runtime.state.offLimits.some(spec => pathMatchesContractPath(relativePath, spec))) {
461
- return "this path is listed under Off Limits in autoresearch.md";
462
- }
463
- if (isAutoresearchCommittableFile(relativePath)) {
464
- return null;
465
- }
466
- if (runtime.state.scopePaths.length === 0) {
467
- return "Files in Scope is not initialized yet; only autoresearch control files may be edited before init_experiment runs";
468
- }
469
- if (!runtime.state.scopePaths.some(spec => pathMatchesContractPath(relativePath, spec))) {
470
- return "this path is outside Files in Scope in autoresearch.md";
471
- }
472
- return null;
500
+ function summarizeExperimentAsi(result: ExperimentResult): string | null {
501
+ const hypothesis = typeof result.asi?.hypothesis === "string" ? result.asi.hypothesis.trim() : "";
502
+ const rollback = typeof result.asi?.rollback_reason === "string" ? result.asi.rollback_reason.trim() : "";
503
+ const next = typeof result.asi?.next_action_hint === "string" ? result.asi.next_action_hint.trim() : "";
504
+ const summary = [hypothesis, rollback, next].filter(part => part.length > 0).join(" | ");
505
+ return summary.length > 0 ? summary.slice(0, 220) : null;
473
506
  }
474
507
 
475
- function findBestResult(runtime: AutoresearchRuntime): ExperimentResult | null {
508
+ function bestKeptResult(
509
+ results: ExperimentResult[],
510
+ segment: number,
511
+ direction: "lower" | "higher",
512
+ ): ExperimentResult | null {
476
513
  let best: ExperimentResult | null = null;
477
- for (const result of runtime.state.results) {
478
- if (result.segment !== runtime.state.currentSegment || result.status !== "keep") continue;
514
+ for (const result of results) {
515
+ if (result.segment !== segment || result.status !== "keep" || result.flagged) continue;
479
516
  if (!best) {
480
517
  best = result;
481
518
  continue;
482
519
  }
483
- if (runtime.state.bestDirection === "lower" ? result.metric < best.metric : result.metric > best.metric) {
484
- best = result;
485
- }
520
+ const better = direction === "lower" ? result.metric < best.metric : result.metric > best.metric;
521
+ if (better) best = result;
486
522
  }
487
523
  return best;
488
524
  }
489
- function summaryToChecks(summary: PendingRunSummary | null): ChecksResult | null {
490
- if (!summary || summary.checksPass === null) {
491
- return null;
492
- }
493
- return {
494
- pass: summary.checksPass,
495
- output: "",
496
- duration: summary.checksDurationSeconds ?? 0,
497
- };
498
- }
499
525
 
500
- function looksLikeInternalUrl(value: string): boolean {
501
- return /^[a-z][a-z0-9+.-]*:\/\//i.test(value);
502
- }
503
-
504
- function canonicalizeExistingPath(targetPath: string): string {
526
+ async function tryReadBranch(cwd: string): Promise<string | null> {
505
527
  try {
506
- return fs.realpathSync.native(targetPath);
528
+ return (await git.branch.current(cwd)) ?? null;
507
529
  } catch {
508
- return path.resolve(targetPath);
509
- }
510
- }
511
-
512
- function canonicalizeTargetPath(targetPath: string): string {
513
- const pendingSegments: string[] = [];
514
- let currentPath = path.resolve(targetPath);
515
- while (!fs.existsSync(currentPath)) {
516
- const parentPath = path.dirname(currentPath);
517
- if (parentPath === currentPath) {
518
- return currentPath;
519
- }
520
- pendingSegments.unshift(path.basename(currentPath));
521
- currentPath = parentPath;
530
+ return null;
522
531
  }
523
- return path.resolve(canonicalizeExistingPath(currentPath), ...pendingSegments);
524
532
  }