npm - @oh-my-pi/pi-coding-agent - Versions diffs - 14.5.14 → 14.6.0 - Mend

@oh-my-pi/pi-coding-agent 14.5.14 → 14.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/CHANGELOG.md +39 -0
package/package.json +7 -7
package/src/autoresearch/command-resume.md +5 -8
package/src/autoresearch/git.ts +41 -51
package/src/autoresearch/helpers.ts +43 -359
package/src/autoresearch/index.ts +281 -273
package/src/autoresearch/prompt-setup.md +43 -0
package/src/autoresearch/prompt.md +52 -193
package/src/autoresearch/resume-message.md +2 -8
package/src/autoresearch/state.ts +59 -166
package/src/autoresearch/storage.ts +687 -0
package/src/autoresearch/tools/init-experiment.ts +201 -290
package/src/autoresearch/tools/log-experiment.ts +304 -517
package/src/autoresearch/tools/run-experiment.ts +117 -296
package/src/autoresearch/tools/update-notes.ts +116 -0
package/src/autoresearch/types.ts +16 -66
package/src/config/settings-schema.ts +1 -1
package/src/config/settings.ts +20 -1
package/src/cursor.ts +1 -1
package/src/edit/index.ts +9 -31
package/src/edit/line-hash.ts +70 -43
package/src/edit/modes/hashline.lark +26 -0
package/src/edit/modes/hashline.ts +898 -1099
package/src/edit/modes/patch.ts +0 -7
package/src/edit/modes/replace.ts +0 -4
package/src/edit/renderer.ts +22 -20
package/src/edit/streaming.ts +8 -28
package/src/eval/eval.lark +24 -30
package/src/eval/js/context-manager.ts +5 -162
package/src/eval/js/prelude.txt +0 -12
package/src/eval/parse.ts +129 -129
package/src/eval/py/prelude.py +1 -219
package/src/export/html/template.generated.ts +1 -1
package/src/export/html/template.js +2 -2
package/src/internal-urls/docs-index.generated.ts +1 -1
package/src/modes/components/session-observer-overlay.ts +5 -2
package/src/modes/components/status-line/segments.ts +1 -1
package/src/modes/components/status-line.ts +3 -5
package/src/modes/components/tree-selector.ts +4 -5
package/src/modes/components/welcome.ts +11 -1
package/src/modes/controllers/command-controller.ts +2 -6
package/src/modes/controllers/event-controller.ts +1 -2
package/src/modes/controllers/extension-ui-controller.ts +3 -15
package/src/modes/controllers/input-controller.ts +0 -1
package/src/modes/controllers/selector-controller.ts +1 -1
package/src/modes/interactive-mode.ts +5 -7
package/src/prompts/system/system-prompt.md +14 -38
package/src/prompts/tools/ast-edit.md +8 -8
package/src/prompts/tools/ast-grep.md +10 -10
package/src/prompts/tools/eval.md +13 -31
package/src/prompts/tools/find.md +2 -1
package/src/prompts/tools/hashline.md +66 -57
package/src/prompts/tools/search.md +2 -2
package/src/session/session-manager.ts +17 -13
package/src/tools/ast-edit.ts +141 -44
package/src/tools/ast-grep.ts +112 -36
package/src/tools/eval.ts +2 -53
package/src/tools/find.ts +16 -15
package/src/tools/path-utils.ts +36 -196
package/src/tools/search.ts +56 -35
package/src/utils/edit-mode.ts +2 -11
package/src/utils/file-display-mode.ts +1 -1
package/src/utils/git.ts +17 -0
package/src/utils/session-color.ts +0 -12
package/src/utils/title-generator.ts +22 -38
package/src/autoresearch/apply-contract-to-state.ts +0 -24
package/src/autoresearch/contract.ts +0 -288
package/src/edit/modes/atom.lark +0 -29
package/src/edit/modes/atom.ts +0 -1773
package/src/prompts/tools/atom.md +0 -150

package/src/autoresearch/index.ts CHANGED Viewed

@@ -1,40 +1,34 @@
 import * as fs from "node:fs";
 import * as path from "node:path";
 import type { AutocompleteItem } from "@oh-my-pi/pi-tui";
-import { prompt } from "@oh-my-pi/pi-utils";
+import { logger, prompt } from "@oh-my-pi/pi-utils";
 import type { ExtensionContext, ExtensionFactory } from "../extensibility/extensions";
+import * as git from "../utils/git";
 import commandResumeTemplate from "./command-resume.md" with { type: "text" };
-import { pathMatchesContractPath } from "./contract";
 import { createDashboardController } from "./dashboard";
 import { ensureAutoresearchBranch } from "./git";
-import {
-	collectLoggedRunNumbers,
-	formatNum,
-	isAutoresearchCommittableFile,
-	isAutoresearchLocalStatePath,
-	normalizeAutoresearchPath,
-	readMaxExperiments,
-	readPendingRunSummary,
-	resolveWorkDir,
-	validateWorkDir,
-} from "./helpers";
+import { formatNum } from "./helpers";
 import promptTemplate from "./prompt.md" with { type: "text" };
+import setupPromptTemplate from "./prompt-setup.md" with { type: "text" };
 import resumeMessageTemplate from "./resume-message.md" with { type: "text" };
 import {
-	cloneExperimentState,
+	buildExperimentState,
 	createExperimentState,
 	createRuntimeStore,
 	currentResults,
 	findBaselineMetric,
+	findBaselineRunNumber,
+	findBestKeptMetric,
 	reconstructControlState,
-	reconstructStateFromJsonl,
 } from "./state";
+import { openAutoresearchStorage, openAutoresearchStorageIfExists, type RunRow, type SessionRow } from "./storage";
 import { createInitExperimentTool } from "./tools/init-experiment";
 import { createLogExperimentTool } from "./tools/log-experiment";
 import { createRunExperimentTool } from "./tools/run-experiment";
-import type { AutoresearchRuntime, ChecksResult, ExperimentResult, PendingRunSummary } from "./types";
+import { createUpdateNotesTool } from "./tools/update-notes";
+import type { AutoresearchRuntime, ExperimentResult, PendingRunSummary } from "./types";
-const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment"];
+const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment", "update_notes"];
 export const createAutoresearchExtension: ExtensionFactory = api => {
 	const runtimeStore = createRuntimeStore();
@@ -43,26 +37,60 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 	const getSessionKey = (ctx: ExtensionContext): string => ctx.sessionManager.getSessionId();
 	const getRuntime = (ctx: ExtensionContext): AutoresearchRuntime => runtimeStore.ensure(getSessionKey(ctx));
+	const loadActiveSession = async (
+		ctx: ExtensionContext,
+	): Promise<{ session: SessionRow | null; currentBranch: string | null }> => {
+		const currentBranch = await tryReadBranch(ctx.cwd);
+		const storage = await openAutoresearchStorageIfExists(ctx.cwd);
+		if (!storage) return { session: null, currentBranch };
+		const session = storage.getActiveSessionForBranch(currentBranch);
+		return { session, currentBranch };
+	};
 	const rehydrate = async (ctx: ExtensionContext): Promise<void> => {
 		const runtime = getRuntime(ctx);
-		const workDir = resolveWorkDir(ctx.cwd);
-		const reconstructed = reconstructStateFromJsonl(workDir);
 		const control = reconstructControlState(ctx.sessionManager.getBranch());
-		const loggedRunNumbers = collectLoggedRunNumbers(reconstructed.state.results);
-		runtime.state = cloneExperimentState(reconstructed.state);
-		runtime.state.maxExperiments = readMaxExperiments(ctx.cwd);
 		runtime.goal = control.goal;
-		runtime.autoresearchMode = control.autoresearchMode;
 		runtime.autoResumeArmed = false;
 		runtime.lastAutoResumePendingRunNumber = null;
-		runtime.lastRunSummary = await readPendingRunSummary(workDir, loggedRunNumbers);
-		runtime.lastRunChecks = summaryToChecks(runtime.lastRunSummary);
+		// Skip storage entirely if autoresearch was never activated in this conversation.
+		// This is the common case: every project gets a session_start event but most
+		// never touch autoresearch, so we must not create a SQLite file just to look.
+		const everActivated = control.lastMode !== null;
+		const { session, currentBranch } = everActivated
+			? await loadActiveSession(ctx)
+			: { session: null, currentBranch: null };
+		// Mode is effective only when the recorded session matches the current git
+		// branch. When the user switches off the autoresearch branch the widget hides
+		// and the experiment tools detach, but the session entries are preserved so
+		// switching back resumes seamlessly.
+		const onActiveBranch = session === null || session.branch === null || session.branch === currentBranch;
+		runtime.autoresearchMode = control.autoresearchMode && onActiveBranch;
+		if (session && onActiveBranch) {
+			const storage = await openAutoresearchStorageIfExists(ctx.cwd);
+			if (storage) {
+				const loggedRuns = storage.listLoggedRuns(session.id);
+				runtime.state = buildExperimentState(session, loggedRuns);
+				runtime.goal = runtime.goal ?? session.goal;
+				runtime.lastRunSummary = pendingRunSummaryFromRow(storage.getPendingRun(session.id));
+			} else {
+				runtime.state = createExperimentState();
+				runtime.lastRunSummary = null;
+			}
+		} else {
+			runtime.state = createExperimentState();
+			runtime.lastRunSummary = null;
+		}
 		runtime.lastRunDuration = runtime.lastRunSummary?.durationSeconds ?? null;
 		runtime.lastRunAsi = runtime.lastRunSummary?.parsedAsi ?? null;
 		runtime.lastRunArtifactDir = runtime.lastRunSummary?.runDirectory ?? null;
 		runtime.lastRunNumber = runtime.lastRunSummary?.runNumber ?? null;
 		runtime.runningExperiment = null;
 		dashboard.updateWidget(ctx, runtime);
 		const activeTools = api.getActiveTools();
 		const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
 		const nextActiveTools = runtime.autoresearchMode
@@ -93,50 +121,21 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 	api.registerTool(createInitExperimentTool({ dashboard, getRuntime, pi: api }));
 	api.registerTool(createRunExperimentTool({ dashboard, getRuntime, pi: api }));
 	api.registerTool(createLogExperimentTool({ dashboard, getRuntime, pi: api }));
-	api.on("tool_call", (event, ctx) => {
-		const runtime = getRuntime(ctx);
-		if (!runtime.autoresearchMode) return;
-		if (event.toolName !== "write" && event.toolName !== "edit" && event.toolName !== "ast_edit") return;
-		const rawPaths = getGuardedToolPaths(event.toolName, event.input);
-		if (rawPaths === null) {
-			return {
-				block: true,
-				reason:
-					"Autoresearch requires an explicit target path for this editing tool so it can enforce Files in Scope and Off Limits before changes are made.",
-			};
-		}
-		const workDir = resolveWorkDir(ctx.cwd);
-		for (const rawPath of rawPaths) {
-			const relativePath = resolveAutoresearchRelativePath(workDir, rawPath);
-			if (!relativePath.ok) {
-				return {
-					block: true,
-					reason: relativePath.reason,
-				};
-			}
-			const validationError = validateEditableAutoresearchPath(relativePath.relativePath, runtime);
-			if (validationError) {
-				return {
-					block: true,
-					reason: `Autoresearch blocked edits to ${relativePath.relativePath}: ${validationError}`,
-				};
-			}
-		}
-	});
+	api.registerTool(createUpdateNotesTool({ dashboard, getRuntime, pi: api }));
 	api.registerCommand("autoresearch", {
 		description: "Toggle builtin autoresearch mode, or pass off / clear, or a goal message.",
 		getArgumentCompletions(argumentPrefix: string): AutocompleteItem[] | null {
 			if (argumentPrefix.includes(" ")) return null;
 			const normalized = argumentPrefix.trim().toLowerCase();
-			// No suggestions for an empty argument prefix so Tab after "/autoresearch " does not
-			// force-complete into off/clear; bare command submit toggles like /plan.
 			if (normalized.length === 0) return null;
 			const completions: AutocompleteItem[] = [
 				{ label: "off", value: "off", description: "Leave autoresearch mode" },
-				{ label: "clear", value: "clear", description: "Delete autoresearch.jsonl and leave autoresearch mode" },
+				{
+					label: "clear",
+					value: "clear",
+					description: "Reset worktree to baseline and close the active session",
+				},
 			];
 			const filtered = completions.filter(item => item.label.startsWith(normalized));
 			return filtered.length > 0 ? filtered : null;
@@ -144,11 +143,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 		async handler(args, ctx): Promise<void> {
 			const trimmed = args.trim();
 			const runtime = getRuntime(ctx);
-			const workDirError = validateWorkDir(ctx.cwd);
-			if (workDirError) {
-				ctx.ui.notify(workDirError, "error");
-				return;
-			}
 			if (trimmed === "" && runtime.autoresearchMode) {
 				setMode(ctx, false, runtime.goal, "off");
@@ -167,59 +161,52 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 				ctx.ui.notify("Autoresearch mode disabled", "info");
 				return;
 			}
-			if (trimmed === "clear") {
-				const workDir = resolveWorkDir(ctx.cwd);
-				const jsonlPath = path.join(workDir, "autoresearch.jsonl");
-				const localStatePath = path.join(workDir, ".autoresearch");
-				if (fs.existsSync(jsonlPath)) {
-					fs.rmSync(jsonlPath);
-				}
-				if (fs.existsSync(localStatePath)) {
-					fs.rmSync(localStatePath, { force: true, recursive: true });
-				}
-				runtime.state = createExperimentState();
-				runtime.state.maxExperiments = readMaxExperiments(ctx.cwd);
-				runtime.goal = null;
-				runtime.lastRunChecks = null;
-				runtime.lastRunDuration = null;
-				runtime.lastRunAsi = null;
-				runtime.lastRunArtifactDir = null;
-				runtime.lastRunNumber = null;
-				runtime.lastRunSummary = null;
-				setMode(ctx, false, null, "clear");
-				dashboard.updateWidget(ctx, runtime);
-				const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
-				await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
-				ctx.ui.notify("Autoresearch local state cleared", "info");
+			if (trimmed === "clear" || trimmed.startsWith("clear ")) {
+				const flagPart = trimmed === "clear" ? "" : trimmed.slice("clear ".length).trim();
+				const keepTree = flagPart.includes("--keep-tree");
+				const resetTreeForce = flagPart.includes("--reset-tree");
+				await handleClear(ctx, runtime, { keepTree, resetTreeForce });
 				return;
 			}
-			const workDir = resolveWorkDir(ctx.cwd);
-			const autoresearchMdPath = path.join(workDir, "autoresearch.md");
-			const hasAutoresearchMd = fs.existsSync(autoresearchMdPath);
-			const controlState = reconstructControlState(ctx.sessionManager.getBranch());
-			const shouldResumeExistingNotes =
-				hasAutoresearchMd &&
-				(hasLocalAutoresearchState(workDir) || (controlState.lastMode !== "clear" && trimmed.length === 0));
-			if (shouldResumeExistingNotes) {
-				const resumeContext = trimmed;
-				const resumeGoal = runtime.goal ?? runtime.state.name ?? null;
-				const branchResult = await ensureAutoresearchBranch(api, workDir, resumeGoal);
-				if (!branchResult.ok) {
-					ctx.ui.notify(branchResult.error, "error");
-					return;
-				}
+			const goalArg = trimmed.length > 0 ? trimmed : null;
+			const branchResult = await ensureAutoresearchBranch(api, ctx.cwd, goalArg ?? runtime.goal);
+			if (!branchResult.ok) {
+				ctx.ui.notify(branchResult.error, "error");
+				return;
+			}
+			if (branchResult.warning) {
+				ctx.ui.notify(branchResult.warning, "warning");
+			}
+			// Look up an existing session for the branch we just landed on. A session
+			// recorded under a different autoresearch/* branch is intentionally ignored
+			// — `/autoresearch` on a fresh branch starts a fresh session. Only open the
+			// DB if it already exists; the empty-state path must not create one.
+			const existingStorage = await openAutoresearchStorageIfExists(ctx.cwd);
+			const existingSession = existingStorage?.getActiveSessionForBranch(branchResult.branchName) ?? null;
+			const resumeContext = trimmed;
+			const branchStatusLine = branchResult.branchName
+				? branchResult.created
+					? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
+					: `Using dedicated git branch \`${branchResult.branchName}\`.`
+				: "Continuing on the current branch — no autoresearch branch was created.";
-				setMode(ctx, true, resumeGoal, "on");
+			if (existingSession && existingStorage) {
+				if (goalArg) existingStorage.updateSession(existingSession.id, { goal: goalArg });
+				if (branchResult.branchName) {
+					existingStorage.updateSession(existingSession.id, { branch: branchResult.branchName });
+				}
+				const refreshed = existingStorage.getSessionById(existingSession.id) ?? existingSession;
+				runtime.state = buildExperimentState(refreshed, existingStorage.listLoggedRuns(refreshed.id));
+				runtime.goal = refreshed.goal ?? goalArg;
+				setMode(ctx, true, runtime.goal, "on");
 				dashboard.updateWidget(ctx, runtime);
 				await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
 				api.sendUserMessage(
 					prompt.render(commandResumeTemplate, {
-						autoresearch_md_path: autoresearchMdPath,
-						branch_status_line: branchResult.created
-							? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
-							: `Using dedicated git branch \`${branchResult.branchName}\`.`,
+						branch_status_line: branchStatusLine,
 						has_resume_context: resumeContext.length > 0,
 						resume_context: resumeContext,
 					}),
@@ -227,18 +214,11 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 				return;
 			}
-			const branchGoal = trimmed.length > 0 ? trimmed : null;
-			const branchResult = await ensureAutoresearchBranch(api, workDir, branchGoal);
-			if (!branchResult.ok) {
-				ctx.ui.notify(branchResult.error, "error");
-				return;
-			}
-			setMode(ctx, true, branchGoal, "on");
+			setMode(ctx, true, goalArg, "on");
 			dashboard.updateWidget(ctx, runtime);
 			await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
-			if (trimmed.length > 0) {
-				api.sendUserMessage(trimmed);
+			if (goalArg !== null) {
+				api.sendUserMessage(goalArg);
 			} else {
 				ctx.ui.notify("Autoresearch enabled—describe what to optimize in your next message.", "info");
 			}
@@ -284,12 +264,11 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 			runtime.autoResumeArmed = false;
 			return;
 		}
-		const workDir = resolveWorkDir(ctx.cwd);
-		const pendingRun =
-			runtime.lastRunSummary ??
-			(await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
+		const { session } = await loadActiveSession(ctx);
+		const storage = session ? await openAutoresearchStorageIfExists(ctx.cwd) : null;
+		const pendingRow = session && storage ? storage.getPendingRun(session.id) : null;
+		const pendingRun = pendingRunSummaryFromRow(pendingRow);
 		runtime.lastRunSummary = pendingRun;
-		runtime.lastRunChecks = summaryToChecks(pendingRun);
 		runtime.lastRunDuration = pendingRun?.durationSeconds ?? runtime.lastRunDuration;
 		runtime.lastRunAsi = pendingRun?.parsedAsi ?? runtime.lastRunAsi;
 		const shouldResumePendingRun =
@@ -299,14 +278,10 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 		}
 		runtime.autoResumeArmed = false;
 		runtime.lastAutoResumePendingRunNumber = pendingRun?.runNumber ?? null;
-		const autoresearchMdPath = path.join(workDir, "autoresearch.md");
-		const ideasPath = path.join(workDir, "autoresearch.ideas.md");
 		api.sendMessage(
 			{
 				customType: "autoresearch-resume",
 				content: prompt.render(resumeMessageTemplate, {
-					autoresearch_md_path: autoresearchMdPath,
-					has_ideas: fs.existsSync(ideasPath),
 					has_pending_run: Boolean(pendingRun),
 				}),
 				display: false,
@@ -319,206 +294,239 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 	api.on("before_agent_start", async (event, ctx) => {
 		const runtime = getRuntime(ctx);
 		if (!runtime.autoresearchMode) return;
-		const workDir = resolveWorkDir(ctx.cwd);
-		const autoresearchMdPath = path.join(workDir, "autoresearch.md");
-		const checksPath = path.join(workDir, "autoresearch.checks.sh");
-		const ideasPath = path.join(workDir, "autoresearch.ideas.md");
-		const programPath = path.join(workDir, "autoresearch.program.md");
-		const pendingRun =
-			runtime.lastRunSummary ??
-			(await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
+		// Re-check git branch on every agent start. If the user manually switched
+		// off the autoresearch/* branch between turns, we silently drop autoresearch
+		// from this turn — the widget hides, the experiment tools detach, and we do
+		// not inject the autoresearch system prompt.
+		const { session, currentBranch } = await loadActiveSession(ctx);
+		const onActiveBranch = session === null || session.branch === null || session.branch === currentBranch;
+		if (!onActiveBranch) {
+			runtime.autoresearchMode = false;
+			runtime.state = createExperimentState();
+			runtime.lastRunSummary = null;
+			runtime.runningExperiment = null;
+			dashboard.updateWidget(ctx, runtime);
+			const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
+			await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
+			return;
+		}
+		const storage = await openAutoresearchStorageIfExists(ctx.cwd);
+		if (session && storage) {
+			runtime.state = buildExperimentState(session, storage.listLoggedRuns(session.id));
+		}
+		const pendingRow = session && storage ? storage.getPendingRun(session.id) : null;
+		const pendingRun = pendingRunSummaryFromRow(pendingRow);
 		runtime.lastRunSummary = pendingRun;
-		runtime.lastRunChecks = summaryToChecks(pendingRun);
 		runtime.lastRunDuration = pendingRun?.durationSeconds ?? runtime.lastRunDuration;
 		runtime.lastRunAsi = pendingRun?.parsedAsi ?? runtime.lastRunAsi;
-		const currentSegmentResults = currentResults(runtime.state.results, runtime.state.currentSegment);
-		const baselineMetric = findBaselineMetric(runtime.state.results, runtime.state.currentSegment);
-		const bestResult = findBestResult(runtime);
-		const goal = runtime.goal ?? runtime.state.name ?? "";
+		const state = runtime.state;
+		const currentSegmentResults = currentResults(state.results, state.currentSegment);
+		const baselineMetric = findBaselineMetric(state.results, state.currentSegment);
+		const baselineRunNumber = findBaselineRunNumber(state.results, state.currentSegment);
+		const bestMetric = findBestKeptMetric(state.results, state.currentSegment, state.bestDirection);
+		const bestResult = bestKeptResult(state.results, state.currentSegment, state.bestDirection);
+		const goal = runtime.goal ?? state.goal ?? state.name ?? "";
 		const recentResults = currentSegmentResults.slice(-3).map(result => {
 			const asiSummary = summarizeExperimentAsi(result);
 			return {
 				asi_summary: asiSummary,
 				description: result.description,
 				has_asi_summary: Boolean(asiSummary),
-				metric_display: formatNum(result.metric, runtime.state.metricUnit),
-				run_number: result.runNumber ?? runtime.state.results.indexOf(result) + 1,
+				metric_display: formatNum(result.metric, state.metricUnit),
+				run_number: result.runNumber ?? state.results.indexOf(result) + 1,
 				status: result.status,
+				has_deviations: result.scopeDeviations.length > 0,
+				deviations: result.scopeDeviations.join(", "),
+				justified: Boolean(result.justification),
+				flagged: result.flagged,
+				flagged_reason: result.flaggedReason ?? "",
 			};
 		});
-		const hasAutoresearchMd = fs.existsSync(autoresearchMdPath);
+		const unjustifiedRuns = currentSegmentResults
+			.filter(r => r.status === "keep" && !r.flagged && r.scopeDeviations.length > 0 && !r.justification)
+			.slice(-3)
+			.map(r => ({
+				run_number: r.runNumber,
+				paths: r.scopeDeviations.join(", "),
+			}));
+		if (!session) {
+			const currentBranch = await tryReadBranch(ctx.cwd);
+			const onAutoresearchBranch = currentBranch?.startsWith("autoresearch/") ?? false;
+			const baselineWarning = onAutoresearchBranch
+				? null
+				: "Heads up: you are not on a dedicated `autoresearch/*` branch. `log_experiment discard` will only revert run-modified files, not reset to baseline — so harness files written before `init_experiment` may not survive a discard. Clean the worktree and re-run `/autoresearch` if you want full revert safety.";
+			return {
+				systemPrompt: prompt.render(setupPromptTemplate, {
+					base_system_prompt: event.systemPrompt,
+					has_goal: goal.trim().length > 0,
+					goal,
+					working_dir: ctx.cwd,
+					has_branch: Boolean(currentBranch),
+					branch: currentBranch ?? "",
+					has_baseline_warning: baselineWarning !== null,
+					baseline_warning: baselineWarning ?? "",
+				}),
+			};
+		}
 		return {
 			systemPrompt: prompt.render(promptTemplate, {
 				base_system_prompt: event.systemPrompt,
 				has_goal: goal.trim().length > 0,
 				goal,
-				has_autoresearch_md: hasAutoresearchMd,
-				working_dir: workDir,
-				default_metric_name: runtime.state.metricName,
-				metric_name: runtime.state.metricName,
-				autoresearch_md_path: autoresearchMdPath,
-				has_checks: fs.existsSync(checksPath),
-				checks_path: checksPath,
-				has_ideas: fs.existsSync(ideasPath),
-				ideas_path: ideasPath,
-				has_program: fs.existsSync(programPath),
-				program_path: programPath,
-				current_segment: runtime.state.currentSegment + 1,
+				working_dir: ctx.cwd,
+				default_metric_name: state.metricName,
+				metric_name: state.metricName,
+				has_branch: Boolean(state.branch),
+				branch: state.branch,
+				has_baseline_commit: Boolean(state.baselineCommit),
+				baseline_commit: state.baselineCommit ? state.baselineCommit.slice(0, 12) : "",
+				has_notes: state.notes.trim().length > 0,
+				notes: state.notes,
+				current_segment: state.currentSegment + 1,
 				current_segment_run_count: currentSegmentResults.length,
 				has_baseline_metric: baselineMetric !== null,
-				baseline_metric_display: formatNum(baselineMetric, runtime.state.metricUnit),
-				has_best_result: Boolean(bestResult),
-				best_metric_display: bestResult
-					? formatNum(bestResult.metric, runtime.state.metricUnit)
-					: formatNum(baselineMetric, runtime.state.metricUnit),
-				best_run_number: bestResult
-					? (bestResult.runNumber ?? runtime.state.results.indexOf(bestResult) + 1)
-					: null,
+				baseline_metric_display: formatNum(baselineMetric, state.metricUnit),
+				baseline_run_number: baselineRunNumber,
+				has_best_result: bestResult !== null && bestMetric !== null,
+				best_metric_display: bestMetric !== null ? formatNum(bestMetric, state.metricUnit) : "-",
+				best_run_number: bestResult ? (bestResult.runNumber ?? state.results.indexOf(bestResult) + 1) : null,
 				has_recent_results: recentResults.length > 0,
 				recent_results: recentResults,
+				has_unjustified_runs: unjustifiedRuns.length > 0,
+				unjustified_runs: unjustifiedRuns,
 				has_pending_run: Boolean(pendingRun),
 				pending_run_number: pendingRun?.runNumber,
 				pending_run_command: pendingRun?.command,
-				pending_run_directory: pendingRun?.runDirectory,
 				pending_run_passed: pendingRun?.passed ?? false,
 				has_pending_run_metric: pendingRun?.parsedPrimary !== null && pendingRun?.parsedPrimary !== undefined,
 				pending_run_metric_display:
 					pendingRun?.parsedPrimary !== null && pendingRun?.parsedPrimary !== undefined
-						? formatNum(pendingRun.parsedPrimary, runtime.state.metricUnit)
+						? formatNum(pendingRun.parsedPrimary, state.metricUnit)
 						: null,
 			}),
 		};
 	});
-};
-function hasLocalAutoresearchState(workDir: string): boolean {
-	return fs.existsSync(path.join(workDir, "autoresearch.jsonl")) || fs.existsSync(path.join(workDir, ".autoresearch"));
-}
+	async function handleClear(
+		ctx: ExtensionContext,
+		runtime: AutoresearchRuntime,
+		opts: { keepTree: boolean; resetTreeForce: boolean },
+	): Promise<void> {
+		const storage = await openAutoresearchStorage(ctx.cwd);
+		const session = storage.getActiveSession();
+		const branchName = await tryReadBranch(ctx.cwd);
+		const onAutoresearchBranch = branchName?.startsWith("autoresearch/") ?? false;
+		const shouldResetTree = !opts.keepTree && (onAutoresearchBranch || opts.resetTreeForce);
+		if (shouldResetTree && session?.baselineCommit) {
+			try {
+				await git.reset(ctx.cwd, { hard: true, target: session.baselineCommit });
+				await git.clean(ctx.cwd);
+				ctx.ui.notify(`Reset worktree to baseline ${session.baselineCommit.slice(0, 12)}.`, "info");
+			} catch (err) {
+				ctx.ui.notify(
+					`Failed to reset worktree to baseline: ${err instanceof Error ? err.message : String(err)}`,
+					"error",
+				);
+			}
+		} else if (shouldResetTree) {
+			ctx.ui.notify("No baseline commit recorded — skipped worktree reset.", "warning");
+		}
-function summarizeExperimentAsi(result: ExperimentResult): string | null {
-	const hypothesis = typeof result.asi?.hypothesis === "string" ? result.asi.hypothesis.trim() : "";
-	const rollbackReason = typeof result.asi?.rollback_reason === "string" ? result.asi.rollback_reason.trim() : "";
-	const nextActionHint = typeof result.asi?.next_action_hint === "string" ? result.asi.next_action_hint.trim() : "";
-	const summary = [hypothesis, rollbackReason, nextActionHint].filter(part => part.length > 0).join(" | ");
-	return summary.length > 0 ? summary.slice(0, 220) : null;
-}
+		removeLegacyArtifacts(ctx.cwd);
-function getGuardedToolPaths(toolName: string, input: Record<string, unknown>): string[] | null {
-	if (toolName === "write") {
-		return typeof input.path === "string" ? [input.path] : null;
-	}
-	if (toolName === "ast_edit") {
-		return typeof input.path === "string" ? [input.path] : null;
-	}
-	if (toolName !== "edit") {
-		return [];
+		if (session) {
+			storage.closeSession(session.id);
+		}
+		runtime.state = createExperimentState();
+		runtime.goal = null;
+		runtime.lastRunDuration = null;
+		runtime.lastRunAsi = null;
+		runtime.lastRunArtifactDir = null;
+		runtime.lastRunNumber = null;
+		runtime.lastRunSummary = null;
+		setMode(ctx, false, null, "clear");
+		dashboard.updateWidget(ctx, runtime);
+		const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
+		await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
+		ctx.ui.notify("Autoresearch session cleared.", "info");
 	}
+};
-	const paths: string[] = [];
-	if (typeof input.path === "string") {
-		paths.push(input.path);
-	}
-	if (typeof input.rename === "string") {
-		paths.push(input.rename);
-	}
-	if (typeof input.move === "string") {
-		paths.push(input.move);
+const LEGACY_ARTIFACTS = [
+	"autoresearch.md",
+	"autoresearch.sh",
+	"autoresearch.checks.sh",
+	"autoresearch.program.md",
+	"autoresearch.ideas.md",
+	"autoresearch.jsonl",
+	"autoresearch.config.json",
+	".autoresearch",
+];
+function removeLegacyArtifacts(workDir: string): void {
+	for (const name of LEGACY_ARTIFACTS) {
+		const target = path.join(workDir, name);
+		try {
+			fs.rmSync(target, { recursive: true, force: true });
+		} catch (err) {
+			logger.warn("Failed to remove legacy autoresearch artifact", {
+				path: target,
+				error: err instanceof Error ? err.message : String(err),
+			});
+		}
 	}
-	return paths;
 }
-function resolveAutoresearchRelativePath(
-	workDir: string,
-	rawPath: string,
-): { ok: false; reason: string } | { ok: true; relativePath: string } {
-	if (looksLikeInternalUrl(rawPath)) {
-		return {
-			ok: false,
-			reason: `Autoresearch cannot validate internal URL paths during scoped editing: ${rawPath}`,
-		};
-	}
-	const resolvedPath = path.isAbsolute(rawPath) ? path.resolve(rawPath) : path.resolve(workDir, rawPath);
-	const canonicalWorkDir = canonicalizeExistingPath(workDir);
-	const canonicalTargetPath = canonicalizeTargetPath(resolvedPath);
-	const relativePath = path.relative(canonicalWorkDir, canonicalTargetPath);
-	if (relativePath === ".." || relativePath.startsWith(`..${path.sep}`) || path.isAbsolute(relativePath)) {
-		return {
-			ok: false,
-			reason: `Autoresearch blocked edits outside the working tree: ${rawPath}`,
-		};
-	}
+function pendingRunSummaryFromRow(row: RunRow | null): PendingRunSummary | null {
+	if (!row) return null;
+	if (row.status !== null) return null;
+	if (row.completedAt === null) return null;
+	const passed = row.exitCode === 0 && !row.timedOut;
 	return {
-		ok: true,
-		relativePath: relativePath.length === 0 ? "." : normalizeAutoresearchPath(relativePath),
+		command: row.command,
+		durationSeconds: row.durationMs !== null ? row.durationMs / 1000 : null,
+		parsedAsi: row.parsedAsi,
+		parsedMetrics: row.parsedMetrics,
+		parsedPrimary: row.parsedPrimary,
+		passed,
+		preRunDirtyPaths: row.preRunDirtyPaths,
+		runDirectory: path.dirname(row.logPath),
+		runNumber: row.id,
+		exitCode: row.exitCode,
+		timedOut: row.timedOut,
 	};
 }
-function validateEditableAutoresearchPath(relativePath: string, runtime: AutoresearchRuntime): string | null {
-	if (isAutoresearchLocalStatePath(relativePath)) {
-		return "autoresearch local state files are managed by the experiment tools and cannot be edited directly";
-	}
-	if (runtime.state.offLimits.some(spec => pathMatchesContractPath(relativePath, spec))) {
-		return "this path is listed under Off Limits in autoresearch.md";
-	}
-	if (isAutoresearchCommittableFile(relativePath)) {
-		return null;
-	}
-	if (runtime.state.scopePaths.length === 0) {
-		return "Files in Scope is not initialized yet; only autoresearch control files may be edited before init_experiment runs";
-	}
-	if (!runtime.state.scopePaths.some(spec => pathMatchesContractPath(relativePath, spec))) {
-		return "this path is outside Files in Scope in autoresearch.md";
-	}
-	return null;
+function summarizeExperimentAsi(result: ExperimentResult): string | null {
+	const hypothesis = typeof result.asi?.hypothesis === "string" ? result.asi.hypothesis.trim() : "";
+	const rollback = typeof result.asi?.rollback_reason === "string" ? result.asi.rollback_reason.trim() : "";
+	const next = typeof result.asi?.next_action_hint === "string" ? result.asi.next_action_hint.trim() : "";
+	const summary = [hypothesis, rollback, next].filter(part => part.length > 0).join(" | ");
+	return summary.length > 0 ? summary.slice(0, 220) : null;
 }
-function findBestResult(runtime: AutoresearchRuntime): ExperimentResult | null {
+function bestKeptResult(
+	results: ExperimentResult[],
+	segment: number,
+	direction: "lower" | "higher",
+): ExperimentResult | null {
 	let best: ExperimentResult | null = null;
-	for (const result of runtime.state.results) {
-		if (result.segment !== runtime.state.currentSegment || result.status !== "keep") continue;
+	for (const result of results) {
+		if (result.segment !== segment || result.status !== "keep" || result.flagged) continue;
 		if (!best) {
 			best = result;
 			continue;
 		}
-		if (runtime.state.bestDirection === "lower" ? result.metric < best.metric : result.metric > best.metric) {
-			best = result;
-		}
+		const better = direction === "lower" ? result.metric < best.metric : result.metric > best.metric;
+		if (better) best = result;
 	}
 	return best;
 }
-function summaryToChecks(summary: PendingRunSummary | null): ChecksResult | null {
-	if (!summary || summary.checksPass === null) {
-		return null;
-	}
-	return {
-		pass: summary.checksPass,
-		output: "",
-		duration: summary.checksDurationSeconds ?? 0,
-	};
-}
-function looksLikeInternalUrl(value: string): boolean {
-	return /^[a-z][a-z0-9+.-]*:\/\//i.test(value);
-}
-function canonicalizeExistingPath(targetPath: string): string {
+async function tryReadBranch(cwd: string): Promise<string | null> {
 	try {
-		return fs.realpathSync.native(targetPath);
+		return (await git.branch.current(cwd)) ?? null;
 	} catch {
-		return path.resolve(targetPath);
-	}
-}
-function canonicalizeTargetPath(targetPath: string): string {
-	const pendingSegments: string[] = [];
-	let currentPath = path.resolve(targetPath);
-	while (!fs.existsSync(currentPath)) {
-		const parentPath = path.dirname(currentPath);
-		if (parentPath === currentPath) {
-			return currentPath;
-		}
-		pendingSegments.unshift(path.basename(currentPath));
-		currentPath = parentPath;
+		return null;
 	}
-	return path.resolve(canonicalizeExistingPath(currentPath), ...pendingSegments);
 }