@oh-my-pi/pi-coding-agent 14.5.14 → 14.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/package.json +7 -7
- package/src/autoresearch/command-resume.md +5 -8
- package/src/autoresearch/git.ts +41 -51
- package/src/autoresearch/helpers.ts +43 -359
- package/src/autoresearch/index.ts +281 -273
- package/src/autoresearch/prompt-setup.md +43 -0
- package/src/autoresearch/prompt.md +52 -193
- package/src/autoresearch/resume-message.md +2 -8
- package/src/autoresearch/state.ts +59 -166
- package/src/autoresearch/storage.ts +687 -0
- package/src/autoresearch/tools/init-experiment.ts +201 -290
- package/src/autoresearch/tools/log-experiment.ts +304 -517
- package/src/autoresearch/tools/run-experiment.ts +117 -296
- package/src/autoresearch/tools/update-notes.ts +116 -0
- package/src/autoresearch/types.ts +16 -66
- package/src/config/settings-schema.ts +1 -1
- package/src/config/settings.ts +20 -1
- package/src/cursor.ts +1 -1
- package/src/edit/index.ts +9 -31
- package/src/edit/line-hash.ts +70 -43
- package/src/edit/modes/hashline.lark +26 -0
- package/src/edit/modes/hashline.ts +898 -1099
- package/src/edit/modes/patch.ts +0 -7
- package/src/edit/modes/replace.ts +0 -4
- package/src/edit/renderer.ts +22 -20
- package/src/edit/streaming.ts +8 -28
- package/src/eval/eval.lark +24 -30
- package/src/eval/js/context-manager.ts +5 -162
- package/src/eval/js/prelude.txt +0 -12
- package/src/eval/parse.ts +129 -129
- package/src/eval/py/prelude.py +1 -219
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +2 -2
- package/src/internal-urls/docs-index.generated.ts +1 -1
- package/src/modes/components/session-observer-overlay.ts +5 -2
- package/src/modes/components/status-line/segments.ts +1 -1
- package/src/modes/components/status-line.ts +3 -5
- package/src/modes/components/tree-selector.ts +4 -5
- package/src/modes/components/welcome.ts +11 -1
- package/src/modes/controllers/command-controller.ts +2 -6
- package/src/modes/controllers/event-controller.ts +1 -2
- package/src/modes/controllers/extension-ui-controller.ts +3 -15
- package/src/modes/controllers/input-controller.ts +0 -1
- package/src/modes/controllers/selector-controller.ts +1 -1
- package/src/modes/interactive-mode.ts +5 -7
- package/src/prompts/system/system-prompt.md +14 -38
- package/src/prompts/tools/ast-edit.md +8 -8
- package/src/prompts/tools/ast-grep.md +10 -10
- package/src/prompts/tools/eval.md +13 -31
- package/src/prompts/tools/find.md +2 -1
- package/src/prompts/tools/hashline.md +66 -57
- package/src/prompts/tools/search.md +2 -2
- package/src/session/session-manager.ts +17 -13
- package/src/tools/ast-edit.ts +141 -44
- package/src/tools/ast-grep.ts +112 -36
- package/src/tools/eval.ts +2 -53
- package/src/tools/find.ts +16 -15
- package/src/tools/path-utils.ts +36 -196
- package/src/tools/search.ts +56 -35
- package/src/utils/edit-mode.ts +2 -11
- package/src/utils/file-display-mode.ts +1 -1
- package/src/utils/git.ts +17 -0
- package/src/utils/session-color.ts +0 -12
- package/src/utils/title-generator.ts +22 -38
- package/src/autoresearch/apply-contract-to-state.ts +0 -24
- package/src/autoresearch/contract.ts +0 -288
- package/src/edit/modes/atom.lark +0 -29
- package/src/edit/modes/atom.ts +0 -1773
- package/src/prompts/tools/atom.md +0 -150
|
@@ -1,40 +1,34 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import type { AutocompleteItem } from "@oh-my-pi/pi-tui";
|
|
4
|
-
import { prompt } from "@oh-my-pi/pi-utils";
|
|
4
|
+
import { logger, prompt } from "@oh-my-pi/pi-utils";
|
|
5
5
|
import type { ExtensionContext, ExtensionFactory } from "../extensibility/extensions";
|
|
6
|
+
import * as git from "../utils/git";
|
|
6
7
|
import commandResumeTemplate from "./command-resume.md" with { type: "text" };
|
|
7
|
-
import { pathMatchesContractPath } from "./contract";
|
|
8
8
|
import { createDashboardController } from "./dashboard";
|
|
9
9
|
import { ensureAutoresearchBranch } from "./git";
|
|
10
|
-
import {
|
|
11
|
-
collectLoggedRunNumbers,
|
|
12
|
-
formatNum,
|
|
13
|
-
isAutoresearchCommittableFile,
|
|
14
|
-
isAutoresearchLocalStatePath,
|
|
15
|
-
normalizeAutoresearchPath,
|
|
16
|
-
readMaxExperiments,
|
|
17
|
-
readPendingRunSummary,
|
|
18
|
-
resolveWorkDir,
|
|
19
|
-
validateWorkDir,
|
|
20
|
-
} from "./helpers";
|
|
10
|
+
import { formatNum } from "./helpers";
|
|
21
11
|
import promptTemplate from "./prompt.md" with { type: "text" };
|
|
12
|
+
import setupPromptTemplate from "./prompt-setup.md" with { type: "text" };
|
|
22
13
|
import resumeMessageTemplate from "./resume-message.md" with { type: "text" };
|
|
23
14
|
import {
|
|
24
|
-
|
|
15
|
+
buildExperimentState,
|
|
25
16
|
createExperimentState,
|
|
26
17
|
createRuntimeStore,
|
|
27
18
|
currentResults,
|
|
28
19
|
findBaselineMetric,
|
|
20
|
+
findBaselineRunNumber,
|
|
21
|
+
findBestKeptMetric,
|
|
29
22
|
reconstructControlState,
|
|
30
|
-
reconstructStateFromJsonl,
|
|
31
23
|
} from "./state";
|
|
24
|
+
import { openAutoresearchStorage, openAutoresearchStorageIfExists, type RunRow, type SessionRow } from "./storage";
|
|
32
25
|
import { createInitExperimentTool } from "./tools/init-experiment";
|
|
33
26
|
import { createLogExperimentTool } from "./tools/log-experiment";
|
|
34
27
|
import { createRunExperimentTool } from "./tools/run-experiment";
|
|
35
|
-
import
|
|
28
|
+
import { createUpdateNotesTool } from "./tools/update-notes";
|
|
29
|
+
import type { AutoresearchRuntime, ExperimentResult, PendingRunSummary } from "./types";
|
|
36
30
|
|
|
37
|
-
const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment"];
|
|
31
|
+
const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment", "update_notes"];
|
|
38
32
|
|
|
39
33
|
export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
40
34
|
const runtimeStore = createRuntimeStore();
|
|
@@ -43,26 +37,60 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
43
37
|
const getSessionKey = (ctx: ExtensionContext): string => ctx.sessionManager.getSessionId();
|
|
44
38
|
const getRuntime = (ctx: ExtensionContext): AutoresearchRuntime => runtimeStore.ensure(getSessionKey(ctx));
|
|
45
39
|
|
|
40
|
+
const loadActiveSession = async (
|
|
41
|
+
ctx: ExtensionContext,
|
|
42
|
+
): Promise<{ session: SessionRow | null; currentBranch: string | null }> => {
|
|
43
|
+
const currentBranch = await tryReadBranch(ctx.cwd);
|
|
44
|
+
const storage = await openAutoresearchStorageIfExists(ctx.cwd);
|
|
45
|
+
if (!storage) return { session: null, currentBranch };
|
|
46
|
+
const session = storage.getActiveSessionForBranch(currentBranch);
|
|
47
|
+
return { session, currentBranch };
|
|
48
|
+
};
|
|
49
|
+
|
|
46
50
|
const rehydrate = async (ctx: ExtensionContext): Promise<void> => {
|
|
47
51
|
const runtime = getRuntime(ctx);
|
|
48
|
-
const workDir = resolveWorkDir(ctx.cwd);
|
|
49
|
-
const reconstructed = reconstructStateFromJsonl(workDir);
|
|
50
52
|
const control = reconstructControlState(ctx.sessionManager.getBranch());
|
|
51
|
-
const loggedRunNumbers = collectLoggedRunNumbers(reconstructed.state.results);
|
|
52
|
-
runtime.state = cloneExperimentState(reconstructed.state);
|
|
53
|
-
runtime.state.maxExperiments = readMaxExperiments(ctx.cwd);
|
|
54
53
|
runtime.goal = control.goal;
|
|
55
|
-
runtime.autoresearchMode = control.autoresearchMode;
|
|
56
54
|
runtime.autoResumeArmed = false;
|
|
57
55
|
runtime.lastAutoResumePendingRunNumber = null;
|
|
58
|
-
|
|
59
|
-
|
|
56
|
+
|
|
57
|
+
// Skip storage entirely if autoresearch was never activated in this conversation.
|
|
58
|
+
// This is the common case: every project gets a session_start event but most
|
|
59
|
+
// never touch autoresearch, so we must not create a SQLite file just to look.
|
|
60
|
+
const everActivated = control.lastMode !== null;
|
|
61
|
+
const { session, currentBranch } = everActivated
|
|
62
|
+
? await loadActiveSession(ctx)
|
|
63
|
+
: { session: null, currentBranch: null };
|
|
64
|
+
|
|
65
|
+
// Mode is effective only when the recorded session matches the current git
|
|
66
|
+
// branch. When the user switches off the autoresearch branch the widget hides
|
|
67
|
+
// and the experiment tools detach, but the session entries are preserved so
|
|
68
|
+
// switching back resumes seamlessly.
|
|
69
|
+
const onActiveBranch = session === null || session.branch === null || session.branch === currentBranch;
|
|
70
|
+
runtime.autoresearchMode = control.autoresearchMode && onActiveBranch;
|
|
71
|
+
|
|
72
|
+
if (session && onActiveBranch) {
|
|
73
|
+
const storage = await openAutoresearchStorageIfExists(ctx.cwd);
|
|
74
|
+
if (storage) {
|
|
75
|
+
const loggedRuns = storage.listLoggedRuns(session.id);
|
|
76
|
+
runtime.state = buildExperimentState(session, loggedRuns);
|
|
77
|
+
runtime.goal = runtime.goal ?? session.goal;
|
|
78
|
+
runtime.lastRunSummary = pendingRunSummaryFromRow(storage.getPendingRun(session.id));
|
|
79
|
+
} else {
|
|
80
|
+
runtime.state = createExperimentState();
|
|
81
|
+
runtime.lastRunSummary = null;
|
|
82
|
+
}
|
|
83
|
+
} else {
|
|
84
|
+
runtime.state = createExperimentState();
|
|
85
|
+
runtime.lastRunSummary = null;
|
|
86
|
+
}
|
|
60
87
|
runtime.lastRunDuration = runtime.lastRunSummary?.durationSeconds ?? null;
|
|
61
88
|
runtime.lastRunAsi = runtime.lastRunSummary?.parsedAsi ?? null;
|
|
62
89
|
runtime.lastRunArtifactDir = runtime.lastRunSummary?.runDirectory ?? null;
|
|
63
90
|
runtime.lastRunNumber = runtime.lastRunSummary?.runNumber ?? null;
|
|
64
91
|
runtime.runningExperiment = null;
|
|
65
92
|
dashboard.updateWidget(ctx, runtime);
|
|
93
|
+
|
|
66
94
|
const activeTools = api.getActiveTools();
|
|
67
95
|
const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
|
|
68
96
|
const nextActiveTools = runtime.autoresearchMode
|
|
@@ -93,50 +121,21 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
93
121
|
api.registerTool(createInitExperimentTool({ dashboard, getRuntime, pi: api }));
|
|
94
122
|
api.registerTool(createRunExperimentTool({ dashboard, getRuntime, pi: api }));
|
|
95
123
|
api.registerTool(createLogExperimentTool({ dashboard, getRuntime, pi: api }));
|
|
96
|
-
api.
|
|
97
|
-
const runtime = getRuntime(ctx);
|
|
98
|
-
if (!runtime.autoresearchMode) return;
|
|
99
|
-
if (event.toolName !== "write" && event.toolName !== "edit" && event.toolName !== "ast_edit") return;
|
|
100
|
-
|
|
101
|
-
const rawPaths = getGuardedToolPaths(event.toolName, event.input);
|
|
102
|
-
if (rawPaths === null) {
|
|
103
|
-
return {
|
|
104
|
-
block: true,
|
|
105
|
-
reason:
|
|
106
|
-
"Autoresearch requires an explicit target path for this editing tool so it can enforce Files in Scope and Off Limits before changes are made.",
|
|
107
|
-
};
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
const workDir = resolveWorkDir(ctx.cwd);
|
|
111
|
-
for (const rawPath of rawPaths) {
|
|
112
|
-
const relativePath = resolveAutoresearchRelativePath(workDir, rawPath);
|
|
113
|
-
if (!relativePath.ok) {
|
|
114
|
-
return {
|
|
115
|
-
block: true,
|
|
116
|
-
reason: relativePath.reason,
|
|
117
|
-
};
|
|
118
|
-
}
|
|
119
|
-
const validationError = validateEditableAutoresearchPath(relativePath.relativePath, runtime);
|
|
120
|
-
if (validationError) {
|
|
121
|
-
return {
|
|
122
|
-
block: true,
|
|
123
|
-
reason: `Autoresearch blocked edits to ${relativePath.relativePath}: ${validationError}`,
|
|
124
|
-
};
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
});
|
|
124
|
+
api.registerTool(createUpdateNotesTool({ dashboard, getRuntime, pi: api }));
|
|
128
125
|
|
|
129
126
|
api.registerCommand("autoresearch", {
|
|
130
127
|
description: "Toggle builtin autoresearch mode, or pass off / clear, or a goal message.",
|
|
131
128
|
getArgumentCompletions(argumentPrefix: string): AutocompleteItem[] | null {
|
|
132
129
|
if (argumentPrefix.includes(" ")) return null;
|
|
133
130
|
const normalized = argumentPrefix.trim().toLowerCase();
|
|
134
|
-
// No suggestions for an empty argument prefix so Tab after "/autoresearch " does not
|
|
135
|
-
// force-complete into off/clear; bare command submit toggles like /plan.
|
|
136
131
|
if (normalized.length === 0) return null;
|
|
137
132
|
const completions: AutocompleteItem[] = [
|
|
138
133
|
{ label: "off", value: "off", description: "Leave autoresearch mode" },
|
|
139
|
-
{
|
|
134
|
+
{
|
|
135
|
+
label: "clear",
|
|
136
|
+
value: "clear",
|
|
137
|
+
description: "Reset worktree to baseline and close the active session",
|
|
138
|
+
},
|
|
140
139
|
];
|
|
141
140
|
const filtered = completions.filter(item => item.label.startsWith(normalized));
|
|
142
141
|
return filtered.length > 0 ? filtered : null;
|
|
@@ -144,11 +143,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
144
143
|
async handler(args, ctx): Promise<void> {
|
|
145
144
|
const trimmed = args.trim();
|
|
146
145
|
const runtime = getRuntime(ctx);
|
|
147
|
-
const workDirError = validateWorkDir(ctx.cwd);
|
|
148
|
-
if (workDirError) {
|
|
149
|
-
ctx.ui.notify(workDirError, "error");
|
|
150
|
-
return;
|
|
151
|
-
}
|
|
152
146
|
|
|
153
147
|
if (trimmed === "" && runtime.autoresearchMode) {
|
|
154
148
|
setMode(ctx, false, runtime.goal, "off");
|
|
@@ -167,59 +161,52 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
167
161
|
ctx.ui.notify("Autoresearch mode disabled", "info");
|
|
168
162
|
return;
|
|
169
163
|
}
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
const
|
|
173
|
-
const
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
}
|
|
177
|
-
if (fs.existsSync(localStatePath)) {
|
|
178
|
-
fs.rmSync(localStatePath, { force: true, recursive: true });
|
|
179
|
-
}
|
|
180
|
-
runtime.state = createExperimentState();
|
|
181
|
-
runtime.state.maxExperiments = readMaxExperiments(ctx.cwd);
|
|
182
|
-
runtime.goal = null;
|
|
183
|
-
runtime.lastRunChecks = null;
|
|
184
|
-
runtime.lastRunDuration = null;
|
|
185
|
-
runtime.lastRunAsi = null;
|
|
186
|
-
runtime.lastRunArtifactDir = null;
|
|
187
|
-
runtime.lastRunNumber = null;
|
|
188
|
-
runtime.lastRunSummary = null;
|
|
189
|
-
setMode(ctx, false, null, "clear");
|
|
190
|
-
dashboard.updateWidget(ctx, runtime);
|
|
191
|
-
const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
|
|
192
|
-
await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
|
|
193
|
-
ctx.ui.notify("Autoresearch local state cleared", "info");
|
|
164
|
+
|
|
165
|
+
if (trimmed === "clear" || trimmed.startsWith("clear ")) {
|
|
166
|
+
const flagPart = trimmed === "clear" ? "" : trimmed.slice("clear ".length).trim();
|
|
167
|
+
const keepTree = flagPart.includes("--keep-tree");
|
|
168
|
+
const resetTreeForce = flagPart.includes("--reset-tree");
|
|
169
|
+
await handleClear(ctx, runtime, { keepTree, resetTreeForce });
|
|
194
170
|
return;
|
|
195
171
|
}
|
|
196
172
|
|
|
197
|
-
const
|
|
198
|
-
const
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
173
|
+
const goalArg = trimmed.length > 0 ? trimmed : null;
|
|
174
|
+
const branchResult = await ensureAutoresearchBranch(api, ctx.cwd, goalArg ?? runtime.goal);
|
|
175
|
+
if (!branchResult.ok) {
|
|
176
|
+
ctx.ui.notify(branchResult.error, "error");
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
if (branchResult.warning) {
|
|
180
|
+
ctx.ui.notify(branchResult.warning, "warning");
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Look up an existing session for the branch we just landed on. A session
|
|
184
|
+
// recorded under a different autoresearch/* branch is intentionally ignored
|
|
185
|
+
// — `/autoresearch` on a fresh branch starts a fresh session. Only open the
|
|
186
|
+
// DB if it already exists; the empty-state path must not create one.
|
|
187
|
+
const existingStorage = await openAutoresearchStorageIfExists(ctx.cwd);
|
|
188
|
+
const existingSession = existingStorage?.getActiveSessionForBranch(branchResult.branchName) ?? null;
|
|
189
|
+
const resumeContext = trimmed;
|
|
190
|
+
const branchStatusLine = branchResult.branchName
|
|
191
|
+
? branchResult.created
|
|
192
|
+
? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
|
|
193
|
+
: `Using dedicated git branch \`${branchResult.branchName}\`.`
|
|
194
|
+
: "Continuing on the current branch — no autoresearch branch was created.";
|
|
213
195
|
|
|
214
|
-
|
|
196
|
+
if (existingSession && existingStorage) {
|
|
197
|
+
if (goalArg) existingStorage.updateSession(existingSession.id, { goal: goalArg });
|
|
198
|
+
if (branchResult.branchName) {
|
|
199
|
+
existingStorage.updateSession(existingSession.id, { branch: branchResult.branchName });
|
|
200
|
+
}
|
|
201
|
+
const refreshed = existingStorage.getSessionById(existingSession.id) ?? existingSession;
|
|
202
|
+
runtime.state = buildExperimentState(refreshed, existingStorage.listLoggedRuns(refreshed.id));
|
|
203
|
+
runtime.goal = refreshed.goal ?? goalArg;
|
|
204
|
+
setMode(ctx, true, runtime.goal, "on");
|
|
215
205
|
dashboard.updateWidget(ctx, runtime);
|
|
216
206
|
await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
|
|
217
207
|
api.sendUserMessage(
|
|
218
208
|
prompt.render(commandResumeTemplate, {
|
|
219
|
-
|
|
220
|
-
branch_status_line: branchResult.created
|
|
221
|
-
? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
|
|
222
|
-
: `Using dedicated git branch \`${branchResult.branchName}\`.`,
|
|
209
|
+
branch_status_line: branchStatusLine,
|
|
223
210
|
has_resume_context: resumeContext.length > 0,
|
|
224
211
|
resume_context: resumeContext,
|
|
225
212
|
}),
|
|
@@ -227,18 +214,11 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
227
214
|
return;
|
|
228
215
|
}
|
|
229
216
|
|
|
230
|
-
|
|
231
|
-
const branchResult = await ensureAutoresearchBranch(api, workDir, branchGoal);
|
|
232
|
-
if (!branchResult.ok) {
|
|
233
|
-
ctx.ui.notify(branchResult.error, "error");
|
|
234
|
-
return;
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
setMode(ctx, true, branchGoal, "on");
|
|
217
|
+
setMode(ctx, true, goalArg, "on");
|
|
238
218
|
dashboard.updateWidget(ctx, runtime);
|
|
239
219
|
await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
|
|
240
|
-
if (
|
|
241
|
-
api.sendUserMessage(
|
|
220
|
+
if (goalArg !== null) {
|
|
221
|
+
api.sendUserMessage(goalArg);
|
|
242
222
|
} else {
|
|
243
223
|
ctx.ui.notify("Autoresearch enabled—describe what to optimize in your next message.", "info");
|
|
244
224
|
}
|
|
@@ -284,12 +264,11 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
284
264
|
runtime.autoResumeArmed = false;
|
|
285
265
|
return;
|
|
286
266
|
}
|
|
287
|
-
const
|
|
288
|
-
const
|
|
289
|
-
|
|
290
|
-
|
|
267
|
+
const { session } = await loadActiveSession(ctx);
|
|
268
|
+
const storage = session ? await openAutoresearchStorageIfExists(ctx.cwd) : null;
|
|
269
|
+
const pendingRow = session && storage ? storage.getPendingRun(session.id) : null;
|
|
270
|
+
const pendingRun = pendingRunSummaryFromRow(pendingRow);
|
|
291
271
|
runtime.lastRunSummary = pendingRun;
|
|
292
|
-
runtime.lastRunChecks = summaryToChecks(pendingRun);
|
|
293
272
|
runtime.lastRunDuration = pendingRun?.durationSeconds ?? runtime.lastRunDuration;
|
|
294
273
|
runtime.lastRunAsi = pendingRun?.parsedAsi ?? runtime.lastRunAsi;
|
|
295
274
|
const shouldResumePendingRun =
|
|
@@ -299,14 +278,10 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
299
278
|
}
|
|
300
279
|
runtime.autoResumeArmed = false;
|
|
301
280
|
runtime.lastAutoResumePendingRunNumber = pendingRun?.runNumber ?? null;
|
|
302
|
-
const autoresearchMdPath = path.join(workDir, "autoresearch.md");
|
|
303
|
-
const ideasPath = path.join(workDir, "autoresearch.ideas.md");
|
|
304
281
|
api.sendMessage(
|
|
305
282
|
{
|
|
306
283
|
customType: "autoresearch-resume",
|
|
307
284
|
content: prompt.render(resumeMessageTemplate, {
|
|
308
|
-
autoresearch_md_path: autoresearchMdPath,
|
|
309
|
-
has_ideas: fs.existsSync(ideasPath),
|
|
310
285
|
has_pending_run: Boolean(pendingRun),
|
|
311
286
|
}),
|
|
312
287
|
display: false,
|
|
@@ -319,206 +294,239 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
319
294
|
api.on("before_agent_start", async (event, ctx) => {
|
|
320
295
|
const runtime = getRuntime(ctx);
|
|
321
296
|
if (!runtime.autoresearchMode) return;
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
const
|
|
327
|
-
const
|
|
328
|
-
|
|
329
|
-
|
|
297
|
+
// Re-check git branch on every agent start. If the user manually switched
|
|
298
|
+
// off the autoresearch/* branch between turns, we silently drop autoresearch
|
|
299
|
+
// from this turn — the widget hides, the experiment tools detach, and we do
|
|
300
|
+
// not inject the autoresearch system prompt.
|
|
301
|
+
const { session, currentBranch } = await loadActiveSession(ctx);
|
|
302
|
+
const onActiveBranch = session === null || session.branch === null || session.branch === currentBranch;
|
|
303
|
+
if (!onActiveBranch) {
|
|
304
|
+
runtime.autoresearchMode = false;
|
|
305
|
+
runtime.state = createExperimentState();
|
|
306
|
+
runtime.lastRunSummary = null;
|
|
307
|
+
runtime.runningExperiment = null;
|
|
308
|
+
dashboard.updateWidget(ctx, runtime);
|
|
309
|
+
const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
|
|
310
|
+
await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
const storage = await openAutoresearchStorageIfExists(ctx.cwd);
|
|
314
|
+
if (session && storage) {
|
|
315
|
+
runtime.state = buildExperimentState(session, storage.listLoggedRuns(session.id));
|
|
316
|
+
}
|
|
317
|
+
const pendingRow = session && storage ? storage.getPendingRun(session.id) : null;
|
|
318
|
+
const pendingRun = pendingRunSummaryFromRow(pendingRow);
|
|
330
319
|
runtime.lastRunSummary = pendingRun;
|
|
331
|
-
runtime.lastRunChecks = summaryToChecks(pendingRun);
|
|
332
320
|
runtime.lastRunDuration = pendingRun?.durationSeconds ?? runtime.lastRunDuration;
|
|
333
321
|
runtime.lastRunAsi = pendingRun?.parsedAsi ?? runtime.lastRunAsi;
|
|
334
|
-
const
|
|
335
|
-
const
|
|
336
|
-
const
|
|
337
|
-
const
|
|
322
|
+
const state = runtime.state;
|
|
323
|
+
const currentSegmentResults = currentResults(state.results, state.currentSegment);
|
|
324
|
+
const baselineMetric = findBaselineMetric(state.results, state.currentSegment);
|
|
325
|
+
const baselineRunNumber = findBaselineRunNumber(state.results, state.currentSegment);
|
|
326
|
+
const bestMetric = findBestKeptMetric(state.results, state.currentSegment, state.bestDirection);
|
|
327
|
+
const bestResult = bestKeptResult(state.results, state.currentSegment, state.bestDirection);
|
|
328
|
+
const goal = runtime.goal ?? state.goal ?? state.name ?? "";
|
|
338
329
|
const recentResults = currentSegmentResults.slice(-3).map(result => {
|
|
339
330
|
const asiSummary = summarizeExperimentAsi(result);
|
|
340
331
|
return {
|
|
341
332
|
asi_summary: asiSummary,
|
|
342
333
|
description: result.description,
|
|
343
334
|
has_asi_summary: Boolean(asiSummary),
|
|
344
|
-
metric_display: formatNum(result.metric,
|
|
345
|
-
run_number: result.runNumber ??
|
|
335
|
+
metric_display: formatNum(result.metric, state.metricUnit),
|
|
336
|
+
run_number: result.runNumber ?? state.results.indexOf(result) + 1,
|
|
346
337
|
status: result.status,
|
|
338
|
+
has_deviations: result.scopeDeviations.length > 0,
|
|
339
|
+
deviations: result.scopeDeviations.join(", "),
|
|
340
|
+
justified: Boolean(result.justification),
|
|
341
|
+
flagged: result.flagged,
|
|
342
|
+
flagged_reason: result.flaggedReason ?? "",
|
|
347
343
|
};
|
|
348
344
|
});
|
|
349
|
-
const
|
|
345
|
+
const unjustifiedRuns = currentSegmentResults
|
|
346
|
+
.filter(r => r.status === "keep" && !r.flagged && r.scopeDeviations.length > 0 && !r.justification)
|
|
347
|
+
.slice(-3)
|
|
348
|
+
.map(r => ({
|
|
349
|
+
run_number: r.runNumber,
|
|
350
|
+
paths: r.scopeDeviations.join(", "),
|
|
351
|
+
}));
|
|
352
|
+
if (!session) {
|
|
353
|
+
const currentBranch = await tryReadBranch(ctx.cwd);
|
|
354
|
+
const onAutoresearchBranch = currentBranch?.startsWith("autoresearch/") ?? false;
|
|
355
|
+
const baselineWarning = onAutoresearchBranch
|
|
356
|
+
? null
|
|
357
|
+
: "Heads up: you are not on a dedicated `autoresearch/*` branch. `log_experiment discard` will only revert run-modified files, not reset to baseline — so harness files written before `init_experiment` may not survive a discard. Clean the worktree and re-run `/autoresearch` if you want full revert safety.";
|
|
358
|
+
return {
|
|
359
|
+
systemPrompt: prompt.render(setupPromptTemplate, {
|
|
360
|
+
base_system_prompt: event.systemPrompt,
|
|
361
|
+
has_goal: goal.trim().length > 0,
|
|
362
|
+
goal,
|
|
363
|
+
working_dir: ctx.cwd,
|
|
364
|
+
has_branch: Boolean(currentBranch),
|
|
365
|
+
branch: currentBranch ?? "",
|
|
366
|
+
has_baseline_warning: baselineWarning !== null,
|
|
367
|
+
baseline_warning: baselineWarning ?? "",
|
|
368
|
+
}),
|
|
369
|
+
};
|
|
370
|
+
}
|
|
350
371
|
return {
|
|
351
372
|
systemPrompt: prompt.render(promptTemplate, {
|
|
352
373
|
base_system_prompt: event.systemPrompt,
|
|
353
374
|
has_goal: goal.trim().length > 0,
|
|
354
375
|
goal,
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
program_path: programPath,
|
|
366
|
-
current_segment: runtime.state.currentSegment + 1,
|
|
376
|
+
working_dir: ctx.cwd,
|
|
377
|
+
default_metric_name: state.metricName,
|
|
378
|
+
metric_name: state.metricName,
|
|
379
|
+
has_branch: Boolean(state.branch),
|
|
380
|
+
branch: state.branch,
|
|
381
|
+
has_baseline_commit: Boolean(state.baselineCommit),
|
|
382
|
+
baseline_commit: state.baselineCommit ? state.baselineCommit.slice(0, 12) : "",
|
|
383
|
+
has_notes: state.notes.trim().length > 0,
|
|
384
|
+
notes: state.notes,
|
|
385
|
+
current_segment: state.currentSegment + 1,
|
|
367
386
|
current_segment_run_count: currentSegmentResults.length,
|
|
368
387
|
has_baseline_metric: baselineMetric !== null,
|
|
369
|
-
baseline_metric_display: formatNum(baselineMetric,
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
best_run_number: bestResult
|
|
375
|
-
? (bestResult.runNumber ?? runtime.state.results.indexOf(bestResult) + 1)
|
|
376
|
-
: null,
|
|
388
|
+
baseline_metric_display: formatNum(baselineMetric, state.metricUnit),
|
|
389
|
+
baseline_run_number: baselineRunNumber,
|
|
390
|
+
has_best_result: bestResult !== null && bestMetric !== null,
|
|
391
|
+
best_metric_display: bestMetric !== null ? formatNum(bestMetric, state.metricUnit) : "-",
|
|
392
|
+
best_run_number: bestResult ? (bestResult.runNumber ?? state.results.indexOf(bestResult) + 1) : null,
|
|
377
393
|
has_recent_results: recentResults.length > 0,
|
|
378
394
|
recent_results: recentResults,
|
|
395
|
+
has_unjustified_runs: unjustifiedRuns.length > 0,
|
|
396
|
+
unjustified_runs: unjustifiedRuns,
|
|
379
397
|
has_pending_run: Boolean(pendingRun),
|
|
380
398
|
pending_run_number: pendingRun?.runNumber,
|
|
381
399
|
pending_run_command: pendingRun?.command,
|
|
382
|
-
pending_run_directory: pendingRun?.runDirectory,
|
|
383
400
|
pending_run_passed: pendingRun?.passed ?? false,
|
|
384
401
|
has_pending_run_metric: pendingRun?.parsedPrimary !== null && pendingRun?.parsedPrimary !== undefined,
|
|
385
402
|
pending_run_metric_display:
|
|
386
403
|
pendingRun?.parsedPrimary !== null && pendingRun?.parsedPrimary !== undefined
|
|
387
|
-
? formatNum(pendingRun.parsedPrimary,
|
|
404
|
+
? formatNum(pendingRun.parsedPrimary, state.metricUnit)
|
|
388
405
|
: null,
|
|
389
406
|
}),
|
|
390
407
|
};
|
|
391
408
|
});
|
|
392
|
-
};
|
|
393
409
|
|
|
394
|
-
function
|
|
395
|
-
|
|
396
|
-
|
|
410
|
+
async function handleClear(
|
|
411
|
+
ctx: ExtensionContext,
|
|
412
|
+
runtime: AutoresearchRuntime,
|
|
413
|
+
opts: { keepTree: boolean; resetTreeForce: boolean },
|
|
414
|
+
): Promise<void> {
|
|
415
|
+
const storage = await openAutoresearchStorage(ctx.cwd);
|
|
416
|
+
const session = storage.getActiveSession();
|
|
417
|
+
const branchName = await tryReadBranch(ctx.cwd);
|
|
418
|
+
const onAutoresearchBranch = branchName?.startsWith("autoresearch/") ?? false;
|
|
419
|
+
const shouldResetTree = !opts.keepTree && (onAutoresearchBranch || opts.resetTreeForce);
|
|
420
|
+
if (shouldResetTree && session?.baselineCommit) {
|
|
421
|
+
try {
|
|
422
|
+
await git.reset(ctx.cwd, { hard: true, target: session.baselineCommit });
|
|
423
|
+
await git.clean(ctx.cwd);
|
|
424
|
+
ctx.ui.notify(`Reset worktree to baseline ${session.baselineCommit.slice(0, 12)}.`, "info");
|
|
425
|
+
} catch (err) {
|
|
426
|
+
ctx.ui.notify(
|
|
427
|
+
`Failed to reset worktree to baseline: ${err instanceof Error ? err.message : String(err)}`,
|
|
428
|
+
"error",
|
|
429
|
+
);
|
|
430
|
+
}
|
|
431
|
+
} else if (shouldResetTree) {
|
|
432
|
+
ctx.ui.notify("No baseline commit recorded — skipped worktree reset.", "warning");
|
|
433
|
+
}
|
|
397
434
|
|
|
398
|
-
|
|
399
|
-
const hypothesis = typeof result.asi?.hypothesis === "string" ? result.asi.hypothesis.trim() : "";
|
|
400
|
-
const rollbackReason = typeof result.asi?.rollback_reason === "string" ? result.asi.rollback_reason.trim() : "";
|
|
401
|
-
const nextActionHint = typeof result.asi?.next_action_hint === "string" ? result.asi.next_action_hint.trim() : "";
|
|
402
|
-
const summary = [hypothesis, rollbackReason, nextActionHint].filter(part => part.length > 0).join(" | ");
|
|
403
|
-
return summary.length > 0 ? summary.slice(0, 220) : null;
|
|
404
|
-
}
|
|
435
|
+
removeLegacyArtifacts(ctx.cwd);
|
|
405
436
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
437
|
+
if (session) {
|
|
438
|
+
storage.closeSession(session.id);
|
|
439
|
+
}
|
|
440
|
+
runtime.state = createExperimentState();
|
|
441
|
+
runtime.goal = null;
|
|
442
|
+
runtime.lastRunDuration = null;
|
|
443
|
+
runtime.lastRunAsi = null;
|
|
444
|
+
runtime.lastRunArtifactDir = null;
|
|
445
|
+
runtime.lastRunNumber = null;
|
|
446
|
+
runtime.lastRunSummary = null;
|
|
447
|
+
setMode(ctx, false, null, "clear");
|
|
448
|
+
dashboard.updateWidget(ctx, runtime);
|
|
449
|
+
const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
|
|
450
|
+
await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
|
|
451
|
+
ctx.ui.notify("Autoresearch session cleared.", "info");
|
|
415
452
|
}
|
|
453
|
+
};
|
|
416
454
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
455
|
+
const LEGACY_ARTIFACTS = [
|
|
456
|
+
"autoresearch.md",
|
|
457
|
+
"autoresearch.sh",
|
|
458
|
+
"autoresearch.checks.sh",
|
|
459
|
+
"autoresearch.program.md",
|
|
460
|
+
"autoresearch.ideas.md",
|
|
461
|
+
"autoresearch.jsonl",
|
|
462
|
+
"autoresearch.config.json",
|
|
463
|
+
".autoresearch",
|
|
464
|
+
];
|
|
465
|
+
|
|
466
|
+
function removeLegacyArtifacts(workDir: string): void {
|
|
467
|
+
for (const name of LEGACY_ARTIFACTS) {
|
|
468
|
+
const target = path.join(workDir, name);
|
|
469
|
+
try {
|
|
470
|
+
fs.rmSync(target, { recursive: true, force: true });
|
|
471
|
+
} catch (err) {
|
|
472
|
+
logger.warn("Failed to remove legacy autoresearch artifact", {
|
|
473
|
+
path: target,
|
|
474
|
+
error: err instanceof Error ? err.message : String(err),
|
|
475
|
+
});
|
|
476
|
+
}
|
|
426
477
|
}
|
|
427
|
-
return paths;
|
|
428
478
|
}
|
|
429
479
|
|
|
430
|
-
function
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
return {
|
|
436
|
-
ok: false,
|
|
437
|
-
reason: `Autoresearch cannot validate internal URL paths during scoped editing: ${rawPath}`,
|
|
438
|
-
};
|
|
439
|
-
}
|
|
440
|
-
const resolvedPath = path.isAbsolute(rawPath) ? path.resolve(rawPath) : path.resolve(workDir, rawPath);
|
|
441
|
-
const canonicalWorkDir = canonicalizeExistingPath(workDir);
|
|
442
|
-
const canonicalTargetPath = canonicalizeTargetPath(resolvedPath);
|
|
443
|
-
const relativePath = path.relative(canonicalWorkDir, canonicalTargetPath);
|
|
444
|
-
if (relativePath === ".." || relativePath.startsWith(`..${path.sep}`) || path.isAbsolute(relativePath)) {
|
|
445
|
-
return {
|
|
446
|
-
ok: false,
|
|
447
|
-
reason: `Autoresearch blocked edits outside the working tree: ${rawPath}`,
|
|
448
|
-
};
|
|
449
|
-
}
|
|
480
|
+
function pendingRunSummaryFromRow(row: RunRow | null): PendingRunSummary | null {
|
|
481
|
+
if (!row) return null;
|
|
482
|
+
if (row.status !== null) return null;
|
|
483
|
+
if (row.completedAt === null) return null;
|
|
484
|
+
const passed = row.exitCode === 0 && !row.timedOut;
|
|
450
485
|
return {
|
|
451
|
-
|
|
452
|
-
|
|
486
|
+
command: row.command,
|
|
487
|
+
durationSeconds: row.durationMs !== null ? row.durationMs / 1000 : null,
|
|
488
|
+
parsedAsi: row.parsedAsi,
|
|
489
|
+
parsedMetrics: row.parsedMetrics,
|
|
490
|
+
parsedPrimary: row.parsedPrimary,
|
|
491
|
+
passed,
|
|
492
|
+
preRunDirtyPaths: row.preRunDirtyPaths,
|
|
493
|
+
runDirectory: path.dirname(row.logPath),
|
|
494
|
+
runNumber: row.id,
|
|
495
|
+
exitCode: row.exitCode,
|
|
496
|
+
timedOut: row.timedOut,
|
|
453
497
|
};
|
|
454
498
|
}
|
|
455
499
|
|
|
456
|
-
function
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
}
|
|
463
|
-
if (isAutoresearchCommittableFile(relativePath)) {
|
|
464
|
-
return null;
|
|
465
|
-
}
|
|
466
|
-
if (runtime.state.scopePaths.length === 0) {
|
|
467
|
-
return "Files in Scope is not initialized yet; only autoresearch control files may be edited before init_experiment runs";
|
|
468
|
-
}
|
|
469
|
-
if (!runtime.state.scopePaths.some(spec => pathMatchesContractPath(relativePath, spec))) {
|
|
470
|
-
return "this path is outside Files in Scope in autoresearch.md";
|
|
471
|
-
}
|
|
472
|
-
return null;
|
|
500
|
+
function summarizeExperimentAsi(result: ExperimentResult): string | null {
|
|
501
|
+
const hypothesis = typeof result.asi?.hypothesis === "string" ? result.asi.hypothesis.trim() : "";
|
|
502
|
+
const rollback = typeof result.asi?.rollback_reason === "string" ? result.asi.rollback_reason.trim() : "";
|
|
503
|
+
const next = typeof result.asi?.next_action_hint === "string" ? result.asi.next_action_hint.trim() : "";
|
|
504
|
+
const summary = [hypothesis, rollback, next].filter(part => part.length > 0).join(" | ");
|
|
505
|
+
return summary.length > 0 ? summary.slice(0, 220) : null;
|
|
473
506
|
}
|
|
474
507
|
|
|
475
|
-
function
|
|
508
|
+
function bestKeptResult(
|
|
509
|
+
results: ExperimentResult[],
|
|
510
|
+
segment: number,
|
|
511
|
+
direction: "lower" | "higher",
|
|
512
|
+
): ExperimentResult | null {
|
|
476
513
|
let best: ExperimentResult | null = null;
|
|
477
|
-
for (const result of
|
|
478
|
-
if (result.segment !==
|
|
514
|
+
for (const result of results) {
|
|
515
|
+
if (result.segment !== segment || result.status !== "keep" || result.flagged) continue;
|
|
479
516
|
if (!best) {
|
|
480
517
|
best = result;
|
|
481
518
|
continue;
|
|
482
519
|
}
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
}
|
|
520
|
+
const better = direction === "lower" ? result.metric < best.metric : result.metric > best.metric;
|
|
521
|
+
if (better) best = result;
|
|
486
522
|
}
|
|
487
523
|
return best;
|
|
488
524
|
}
|
|
489
|
-
function summaryToChecks(summary: PendingRunSummary | null): ChecksResult | null {
|
|
490
|
-
if (!summary || summary.checksPass === null) {
|
|
491
|
-
return null;
|
|
492
|
-
}
|
|
493
|
-
return {
|
|
494
|
-
pass: summary.checksPass,
|
|
495
|
-
output: "",
|
|
496
|
-
duration: summary.checksDurationSeconds ?? 0,
|
|
497
|
-
};
|
|
498
|
-
}
|
|
499
525
|
|
|
500
|
-
function
|
|
501
|
-
return /^[a-z][a-z0-9+.-]*:\/\//i.test(value);
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
function canonicalizeExistingPath(targetPath: string): string {
|
|
526
|
+
async function tryReadBranch(cwd: string): Promise<string | null> {
|
|
505
527
|
try {
|
|
506
|
-
return
|
|
528
|
+
return (await git.branch.current(cwd)) ?? null;
|
|
507
529
|
} catch {
|
|
508
|
-
return
|
|
509
|
-
}
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
function canonicalizeTargetPath(targetPath: string): string {
|
|
513
|
-
const pendingSegments: string[] = [];
|
|
514
|
-
let currentPath = path.resolve(targetPath);
|
|
515
|
-
while (!fs.existsSync(currentPath)) {
|
|
516
|
-
const parentPath = path.dirname(currentPath);
|
|
517
|
-
if (parentPath === currentPath) {
|
|
518
|
-
return currentPath;
|
|
519
|
-
}
|
|
520
|
-
pendingSegments.unshift(path.basename(currentPath));
|
|
521
|
-
currentPath = parentPath;
|
|
530
|
+
return null;
|
|
522
531
|
}
|
|
523
|
-
return path.resolve(canonicalizeExistingPath(currentPath), ...pendingSegments);
|
|
524
532
|
}
|