npm - @oh-my-pi/pi-coding-agent - Versions diffs - 14.5.14 → 14.6.0 - Mend

@oh-my-pi/pi-coding-agent 14.5.14 → 14.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/CHANGELOG.md +39 -0
package/package.json +7 -7
package/src/autoresearch/command-resume.md +5 -8
package/src/autoresearch/git.ts +41 -51
package/src/autoresearch/helpers.ts +43 -359
package/src/autoresearch/index.ts +281 -273
package/src/autoresearch/prompt-setup.md +43 -0
package/src/autoresearch/prompt.md +52 -193
package/src/autoresearch/resume-message.md +2 -8
package/src/autoresearch/state.ts +59 -166
package/src/autoresearch/storage.ts +687 -0
package/src/autoresearch/tools/init-experiment.ts +201 -290
package/src/autoresearch/tools/log-experiment.ts +304 -517
package/src/autoresearch/tools/run-experiment.ts +117 -296
package/src/autoresearch/tools/update-notes.ts +116 -0
package/src/autoresearch/types.ts +16 -66
package/src/config/settings-schema.ts +1 -1
package/src/config/settings.ts +20 -1
package/src/cursor.ts +1 -1
package/src/edit/index.ts +9 -31
package/src/edit/line-hash.ts +70 -43
package/src/edit/modes/hashline.lark +26 -0
package/src/edit/modes/hashline.ts +898 -1099
package/src/edit/modes/patch.ts +0 -7
package/src/edit/modes/replace.ts +0 -4
package/src/edit/renderer.ts +22 -20
package/src/edit/streaming.ts +8 -28
package/src/eval/eval.lark +24 -30
package/src/eval/js/context-manager.ts +5 -162
package/src/eval/js/prelude.txt +0 -12
package/src/eval/parse.ts +129 -129
package/src/eval/py/prelude.py +1 -219
package/src/export/html/template.generated.ts +1 -1
package/src/export/html/template.js +2 -2
package/src/internal-urls/docs-index.generated.ts +1 -1
package/src/modes/components/session-observer-overlay.ts +5 -2
package/src/modes/components/status-line/segments.ts +1 -1
package/src/modes/components/status-line.ts +3 -5
package/src/modes/components/tree-selector.ts +4 -5
package/src/modes/components/welcome.ts +11 -1
package/src/modes/controllers/command-controller.ts +2 -6
package/src/modes/controllers/event-controller.ts +1 -2
package/src/modes/controllers/extension-ui-controller.ts +3 -15
package/src/modes/controllers/input-controller.ts +0 -1
package/src/modes/controllers/selector-controller.ts +1 -1
package/src/modes/interactive-mode.ts +5 -7
package/src/prompts/system/system-prompt.md +14 -38
package/src/prompts/tools/ast-edit.md +8 -8
package/src/prompts/tools/ast-grep.md +10 -10
package/src/prompts/tools/eval.md +13 -31
package/src/prompts/tools/find.md +2 -1
package/src/prompts/tools/hashline.md +66 -57
package/src/prompts/tools/search.md +2 -2
package/src/session/session-manager.ts +17 -13
package/src/tools/ast-edit.ts +141 -44
package/src/tools/ast-grep.ts +112 -36
package/src/tools/eval.ts +2 -53
package/src/tools/find.ts +16 -15
package/src/tools/path-utils.ts +36 -196
package/src/tools/search.ts +56 -35
package/src/utils/edit-mode.ts +2 -11
package/src/utils/file-display-mode.ts +1 -1
package/src/utils/git.ts +17 -0
package/src/utils/session-color.ts +0 -12
package/src/utils/title-generator.ts +22 -38
package/src/autoresearch/apply-contract-to-state.ts +0 -24
package/src/autoresearch/contract.ts +0 -288
package/src/edit/modes/atom.lark +0 -29
package/src/edit/modes/atom.ts +0 -1773
package/src/prompts/tools/atom.md +0 -150

package/src/autoresearch/tools/init-experiment.ts CHANGED Viewed

@@ -1,4 +1,3 @@
-import * as fs from "node:fs";
 import * as path from "node:path";
 import { StringEnum } from "@oh-my-pi/pi-ai";
 import { Text } from "@oh-my-pi/pi-tui";
@@ -6,87 +5,66 @@ import { Type } from "@sinclair/typebox";
 import type { ToolDefinition } from "../../extensibility/extensions";
 import type { Theme } from "../../modes/theme/theme";
 import { replaceTabs, truncateToWidth } from "../../tools/render-utils";
-import { applyAutoresearchContractToExperimentState } from "../apply-contract-to-state";
-import {
-	contractListsEqual,
-	contractPathListsEqual,
-	loadAutoresearchScriptSnapshot,
-	readAutoresearchContract,
-} from "../contract";
-import {
-	abandonUnloggedAutoresearchRuns,
-	collectLoggedRunNumbers,
-	isAutoresearchShCommand,
-	readMaxExperiments,
-	readPendingRunSummary,
-	resolveWorkDir,
-	validateWorkDir,
-} from "../helpers";
-import { cloneExperimentState } from "../state";
+import * as git from "../../utils/git";
+import { parseWorkDirDirtyPaths } from "../git";
+import { dedupeStrings, normalizePathSpec } from "../helpers";
+import { buildExperimentState } from "../state";
+import { openAutoresearchStorage, type SessionRow } from "../storage";
 import type { AutoresearchToolFactoryOptions, ExperimentState } from "../types";
+export const HARNESS_FILENAME = "autoresearch.sh";
+export const DEFAULT_HARNESS_COMMAND = `bash ${HARNESS_FILENAME}`;
+const HARNESS_COMMIT_TITLE = "autoresearch: harness setup";
 const initExperimentSchema = Type.Object({
-	name: Type.String({
-		description: "Human-readable experiment name.",
+	name: Type.String({ description: "Human-readable experiment name." }),
+	goal: Type.Optional(Type.String({ description: "Free-form description of what this session optimizes." })),
+	primary_metric: Type.String({
+		description:
+			"Primary metric name shown in the dashboard. Match the `METRIC <name>=<value>` lines printed by the benchmark.",
 	}),
-	from_autoresearch_md: Type.Optional(
-		Type.Boolean({
-			description:
-				"When true, load benchmark command, metrics, scope, off-limits, and constraints from autoresearch.md instead of passing mirrored fields below.",
-		}),
-	),
-	abandon_unlogged_runs: Type.Optional(
-		Type.Boolean({
-			description:
-				"When true, mark all completed but unlogged run artifacts as abandoned so initialization can proceed without logging them first.",
-		}),
-	),
-	new_segment: Type.Optional(
-		Type.Boolean({
-			description:
-				"When true, force a new segment even when the contract fields have not changed. Without this, re-initialization with matching contract is a no-op.",
-		}),
-	),
-	metric_name: Type.Optional(
-		Type.String({
-			description: "Primary metric name shown in the dashboard. Required when from_autoresearch_md is false.",
-		}),
-	),
 	metric_unit: Type.Optional(
-		Type.String({
-			description: "Unit for the primary metric, for example µs, ms, s, kb, or empty.",
-		}),
+		Type.String({ description: "Unit for the primary metric (e.g. ms, µs, mb). Empty when unitless." }),
 	),
 	direction: Type.Optional(
-		StringEnum(["lower", "higher"], {
-			description: "Whether lower or higher values are better. Defaults to lower.",
-		}),
+		StringEnum(["lower", "higher"], { description: "Whether lower or higher values are better. Defaults to lower." }),
 	),
-	benchmark_command: Type.Optional(
-		Type.String({
-			description: "Benchmark command recorded in autoresearch.md. Required when from_autoresearch_md is false.",
+	secondary_metrics: Type.Optional(
+		Type.Array(Type.String(), {
+			description: "Names of secondary metrics tracked alongside the primary metric.",
 		}),
 	),
 	scope_paths: Type.Optional(
 		Type.Array(Type.String(), {
-			description: "Files in Scope from autoresearch.md. Required when from_autoresearch_md is false.",
-			minItems: 1,
+			description:
+				"Files or directories the agent expects to modify. Used post-hoc to flag scope deviations on log_experiment; never used to block edits.",
 		}),
 	),
 	off_limits: Type.Optional(
 		Type.Array(Type.String(), {
-			description: "Off Limits paths from autoresearch.md.",
+			description:
+				"Paths the agent SHOULD NOT modify. Used post-hoc to flag scope deviations on log_experiment; never used to block edits.",
 		}),
 	),
 	constraints: Type.Optional(
-		Type.Array(Type.String(), {
-			description: "Constraints from autoresearch.md.",
+		Type.Array(Type.String(), { description: "Free-form constraints (e.g. 'no api break')." }),
+	),
+	max_iterations: Type.Optional(Type.Number({ description: "Soft cap on iterations per segment. Optional." })),
+	new_segment: Type.Optional(
+		Type.Boolean({
+			description:
+				"When true, bump to a new segment even when an active session exists. New baselines and best-metric reset.",
 		}),
 	),
 });
 interface InitExperimentDetails {
 	state: ExperimentState;
+	createdSession: boolean;
+	bumpedSegment: boolean;
+	abandonedRuns: number;
+	harnessCommitted: boolean;
+	baselineCommit: string | null;
 }
 export function createInitExperimentTool(
@@ -96,253 +74,117 @@ export function createInitExperimentTool(
 		name: "init_experiment",
 		label: "Init Experiment",
 		description:
-			"Initialize or reset the autoresearch session for the current optimization target before the first logged run of a segment.",
+			"Initialize or reconfigure the autoresearch session. On first call (Phase 1 → Phase 2 transition), requires `./autoresearch.sh` to exist and pending harness changes are auto-committed on an autoresearch branch. Pass `new_segment: true` to start a fresh baseline within an existing session.",
 		parameters: initExperimentSchema,
 		defaultInactive: true,
 		async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
-			const workDirError = validateWorkDir(ctx.cwd);
-			if (workDirError) {
-				return {
-					content: [{ type: "text", text: `Error: ${workDirError}` }],
-				};
-			}
+			const storage = await openAutoresearchStorage(ctx.cwd);
 			const runtime = options.getRuntime(ctx);
-			const state = runtime.state;
-			const isReinitializing = state.results.length > 0;
-			const workDir = resolveWorkDir(ctx.cwd);
-			const loggedRunNumbers = collectLoggedRunNumbers(state.results);
-			let abandonSummary = "";
-			if (params.abandon_unlogged_runs === true) {
-				const abandoned = await abandonUnloggedAutoresearchRuns(workDir, loggedRunNumbers);
-				if (abandoned > 0) {
-					abandonSummary =
-						abandoned === 1
-							? "Abandoned 1 unlogged run artifact.\n"
-							: `Abandoned ${abandoned} unlogged run artifacts.\n`;
-				}
-			}
-			const pendingRun = await readPendingRunSummary(workDir, loggedRunNumbers);
-			if (pendingRun) {
-				const metricInfo = pendingRun.parsedPrimary !== null ? `, metric=${pendingRun.parsedPrimary}` : "";
-				const passedInfo = pendingRun.passed ? "passed" : "failed";
-				return {
-					content: [
-						{
-							type: "text",
-							text:
-								abandonSummary +
-								`Error: run #${pendingRun.runNumber} has not been logged yet.\n` +
-								`Pending: command="${pendingRun.command}"${metricInfo}, ${passedInfo}\n` +
-								"Call log_experiment before re-initializing, or pass abandon_unlogged_runs=true.",
-						},
-					],
-				};
-			}
-			const contractResult = readAutoresearchContract(workDir);
-			const scriptSnapshot = loadAutoresearchScriptSnapshot(workDir);
-			const errors = [...contractResult.errors, ...scriptSnapshot.errors];
-			if (errors.length > 0) {
-				return {
-					content: [{ type: "text", text: `${abandonSummary}Error: ${errors.join(" ")}` }],
-				};
-			}
+			const direction = params.direction ?? "lower";
+			const metricUnit = params.metric_unit ?? "";
+			const scopePaths = dedupeStrings((params.scope_paths ?? []).map(normalizePathSpec));
+			const offLimits = dedupeStrings((params.off_limits ?? []).map(normalizePathSpec));
+			const constraints = dedupeStrings(params.constraints ?? []);
+			const secondaryMetrics = dedupeStrings(params.secondary_metrics ?? []);
+			const goal = params.goal?.trim() || null;
+			const maxIterations =
+				params.max_iterations !== undefined && Number.isFinite(params.max_iterations) && params.max_iterations > 0
+					? Math.floor(params.max_iterations)
+					: null;
+			const branch = (await git.branch.current(ctx.cwd)) ?? null;
+			const onAutoresearchBranch = branch?.startsWith("autoresearch/") ?? false;
-			const benchmarkContract = contractResult.contract.benchmark;
-			const expectedDirection = benchmarkContract.direction ?? "lower";
-			const expectedMetricUnit = benchmarkContract.metricUnit;
-			if (benchmarkContract.command && !isAutoresearchShCommand(benchmarkContract.command)) {
-				return {
-					content: [
-						{
-							type: "text",
-							text:
-								abandonSummary +
-								"Error: Benchmark.command in autoresearch.md must invoke `autoresearch.sh` directly. " +
-								"Move the real workload into `autoresearch.sh` and re-run init_experiment.",
-						},
-					],
-				};
-			}
+			const existing = storage.getActiveSessionForBranch(branch);
+			const isNewSegmentInit = existing !== null && params.new_segment === true;
+			const requiresHarness = !existing || isNewSegmentInit;
-			const fromMd = params.from_autoresearch_md === true;
-			if (!fromMd) {
-				const metricName = params.metric_name?.trim();
-				const benchmarkCommand = params.benchmark_command?.trim();
-				const scopePaths = params.scope_paths;
-				if (!metricName || !benchmarkCommand || !scopePaths || scopePaths.length === 0) {
-					return {
-						content: [
-							{
-								type: "text",
-								text:
-									abandonSummary +
-									"Error: when from_autoresearch_md is false or omitted, metric_name, benchmark_command, and scope_paths are required and must match autoresearch.md. " +
-									"Alternatively pass from_autoresearch_md=true with only name (plus optional flags).",
-							},
-						],
-					};
-				}
-				if (benchmarkContract.command !== benchmarkCommand) {
-					return {
-						content: [
-							{
-								type: "text",
-								text:
-									abandonSummary +
-									"Error: benchmark_command does not match autoresearch.md. " +
-									`Expected: ${benchmarkContract.command ?? "(missing)"}\nReceived: ${params.benchmark_command}`,
-							},
-						],
-					};
-				}
-				if (benchmarkContract.primaryMetric !== metricName) {
-					return {
-						content: [
-							{
-								type: "text",
-								text:
-									abandonSummary +
-									"Error: metric_name does not match autoresearch.md. " +
-									`Expected: ${benchmarkContract.primaryMetric ?? "(missing)"}\nReceived: ${params.metric_name}`,
-							},
-						],
-					};
-				}
-				if ((params.metric_unit ?? "") !== expectedMetricUnit) {
-					return {
-						content: [
-							{
-								type: "text",
-								text:
-									abandonSummary +
-									"Error: metric_unit does not match autoresearch.md. " +
-									`Expected: ${expectedMetricUnit || "(empty)"}\nReceived: ${params.metric_unit ?? "(empty)"}`,
-							},
-						],
-					};
-				}
-				if ((params.direction ?? "lower") !== expectedDirection) {
-					return {
-						content: [
-							{
-								type: "text",
-								text:
-									abandonSummary +
-									"Error: direction does not match autoresearch.md. " +
-									`Expected: ${expectedDirection}\nReceived: ${params.direction ?? "lower"}`,
-							},
-						],
-					};
-				}
-				if (!contractPathListsEqual(scopePaths, contractResult.contract.scopePaths)) {
-					return {
-						content: [
-							{
-								type: "text",
-								text:
-									abandonSummary +
-									"Error: scope_paths do not match autoresearch.md. " +
-									`Expected: ${contractResult.contract.scopePaths.join(", ")}`,
-							},
-						],
-					};
-				}
-				if (!contractPathListsEqual(params.off_limits ?? [], contractResult.contract.offLimits)) {
-					return {
-						content: [
-							{
-								type: "text",
-								text:
-									abandonSummary +
-									"Error: off_limits do not match autoresearch.md. " +
-									`Expected: ${contractResult.contract.offLimits.join(", ") || "(empty)"}`,
-							},
-						],
-					};
-				}
-				if (!contractListsEqual(params.constraints ?? [], contractResult.contract.constraints)) {
+			if (requiresHarness) {
+				const harnessExists = await Bun.file(path.join(ctx.cwd, HARNESS_FILENAME)).exists();
+				if (!harnessExists) {
 					return {
 						content: [
 							{
 								type: "text",
-								text:
-									abandonSummary +
-									"Error: constraints do not match autoresearch.md. " +
-									`Expected: ${contractResult.contract.constraints.join(", ") || "(empty)"}`,
+								text: `Error: ./${HARNESS_FILENAME} does not exist. Phase 1 of autoresearch is harness setup — write \`./${HARNESS_FILENAME}\` so it exits 0 and prints \`METRIC <name>=<value>\`, validate it via \`bash ${HARNESS_FILENAME}\`, then call init_experiment again.`,
 							},
 						],
 					};
 				}
 			}
-			// Check if contract matches current state — if so, re-init is a no-op
-			if (isReinitializing && params.new_segment !== true) {
-				const contract = contractResult.contract;
-				const bm = contract.benchmark;
-				const contractMatches =
-					(bm.primaryMetric ?? "metric") === state.metricName &&
-					bm.metricUnit === state.metricUnit &&
-					(bm.direction ?? "lower") === state.bestDirection &&
-					(bm.command ?? null) === state.benchmarkCommand &&
-					contractPathListsEqual(contract.scopePaths, state.scopePaths) &&
-					contractPathListsEqual(contract.offLimits, state.offLimits) &&
-					contractListsEqual(contract.constraints, state.constraints);
-				if (contractMatches) {
-					runtime.autoresearchMode = true;
-					runtime.autoResumeArmed = true;
-					options.dashboard.updateWidget(ctx, runtime);
-					options.dashboard.requestRender();
-					return {
-						content: [
-							{
-								type: "text",
-								text:
-									abandonSummary +
-									`Experiment session already initialized with matching contract. Continuing segment ${state.currentSegment}.`,
-							},
-						],
-						details: { state: cloneExperimentState(state) },
-					};
+			let harnessCommitted = false;
+			let commitWarning: string | null = null;
+			if (requiresHarness && onAutoresearchBranch) {
+				const dirty = await detectPendingChanges(ctx.cwd);
+				if (dirty) {
+					try {
+						await git.stage.files(ctx.cwd, []);
+						const message = buildHarnessCommitMessage(goal, params.name);
+						await git.commit(ctx.cwd, message);
+						harnessCommitted = true;
+					} catch (err) {
+						commitWarning = `Failed to auto-commit harness changes: ${err instanceof Error ? err.message : String(err)}. Recording baseline at current HEAD; discard may not preserve uncommitted harness files.`;
+					}
 				}
 			}
-			applyAutoresearchContractToExperimentState(contractResult.contract, state);
-			state.name = params.name;
-			state.maxExperiments = readMaxExperiments(ctx.cwd);
-			state.bestMetric = null;
-			state.confidence = null;
-			if (isReinitializing) {
-				state.currentSegment += 1;
-			}
+			const baselineCommit = await tryReadHeadSha(ctx.cwd);
-			const jsonlPath = path.join(workDir, "autoresearch.jsonl");
-			const configLine = JSON.stringify({
-				type: "config",
-				name: state.name,
-				metricName: state.metricName,
-				metricUnit: state.metricUnit,
-				bestDirection: state.bestDirection,
-				benchmarkCommand: state.benchmarkCommand,
-				secondaryMetrics: state.secondaryMetrics.map(metric => metric.name),
-				scopePaths: state.scopePaths,
-				offLimits: state.offLimits,
-				constraints: state.constraints,
-			});
+			let session: SessionRow;
+			let createdSession = false;
+			let bumpedSegment = false;
+			let abandonedRuns = 0;
-			if (isReinitializing) {
-				fs.appendFileSync(jsonlPath, `${configLine}\n`);
+			if (!existing) {
+				session = storage.openSession({
+					name: params.name,
+					goal,
+					primaryMetric: params.primary_metric,
+					metricUnit,
+					direction,
+					preferredCommand: DEFAULT_HARNESS_COMMAND,
+					branch,
+					baselineCommit,
+					maxIterations,
+					scopePaths,
+					offLimits,
+					constraints,
+					secondaryMetrics,
+				});
+				createdSession = true;
 			} else {
-				fs.writeFileSync(jsonlPath, `${configLine}\n`);
+				abandonedRuns = storage.abandonPendingRuns(existing.id);
+				const updates: Parameters<typeof storage.updateSession>[1] = {
+					goal,
+					maxIterations,
+					scopePaths,
+					offLimits,
+					constraints,
+					secondaryMetrics,
+					primaryMetric: params.primary_metric,
+					metricUnit,
+					direction,
+					branch,
+				};
+				if (isNewSegmentInit) {
+					updates.baselineCommit = baselineCommit;
+				}
+				let updated = storage.updateSession(existing.id, updates);
+				if (isNewSegmentInit) {
+					updated = storage.bumpSegment(existing.id);
+					bumpedSegment = true;
+				}
+				session = updated;
 			}
+			const loggedRuns = storage.listLoggedRuns(session.id);
+			const state = buildExperimentState(session, loggedRuns);
+			runtime.state = state;
+			runtime.goal = session.goal;
 			runtime.autoresearchMode = true;
 			runtime.autoResumeArmed = true;
 			runtime.lastAutoResumePendingRunNumber = null;
-			runtime.lastRunChecks = null;
 			runtime.lastRunDuration = null;
 			runtime.lastRunAsi = null;
 			runtime.lastRunArtifactDir = null;
@@ -351,24 +193,65 @@ export function createInitExperimentTool(
 			options.dashboard.updateWidget(ctx, runtime);
 			options.dashboard.requestRender();
-			const lines = [
-				abandonSummary.trimEnd(),
-				`Experiment initialized: ${state.name}`,
-				`Metric: ${state.metricName} (${state.metricUnit || "unitless"}, ${state.bestDirection} is better)`,
-				`Benchmark command: ${state.benchmarkCommand}`,
-				`Working directory: ${workDir}`,
-				`Files in Scope: ${state.scopePaths.join(", ")}`,
-				isReinitializing
-					? "Previous results remain in history. This starts a new segment and requires a fresh baseline."
-					: "Now run the baseline experiment and log it.",
-			].filter(line => line.length > 0);
-			if (state.maxExperiments !== null) {
-				lines.push(`Max iterations: ${state.maxExperiments}`);
+			const lines: string[] = [];
+			if (abandonedRuns > 0) {
+				lines.push(`Abandoned ${abandonedRuns} pending run${abandonedRuns === 1 ? "" : "s"} before reconfiguring.`);
+			}
+			if (harnessCommitted && session.baselineCommit) {
+				lines.push(`Committed harness setup at ${session.baselineCommit.slice(0, 12)}.`);
+			}
+			if (commitWarning) {
+				lines.push(commitWarning);
+			}
+			if (createdSession) {
+				lines.push(`Started session #${session.id}: ${session.name}`);
+			} else if (bumpedSegment) {
+				lines.push(`Bumped segment to ${session.currentSegment} for session #${session.id}: ${session.name}`);
+			} else {
+				lines.push(`Updated session #${session.id} (segment ${session.currentSegment}): ${session.name}`);
+			}
+			lines.push(
+				`Metric: ${session.primaryMetric} (${session.metricUnit || "unitless"}, ${session.direction} is better)`,
+			);
+			lines.push(`Benchmark entrypoint: ${DEFAULT_HARNESS_COMMAND}`);
+			if (session.scopePaths.length > 0) {
+				lines.push(`Files in scope: ${session.scopePaths.join(", ")}`);
+			}
+			if (session.offLimits.length > 0) {
+				lines.push(`Off limits: ${session.offLimits.join(", ")}`);
+			}
+			if (session.maxIterations !== null) {
+				lines.push(`Max iterations per segment: ${session.maxIterations}`);
+			}
+			if (session.branch) {
+				lines.push(`Active branch: ${session.branch}`);
+			}
+			if (session.baselineCommit) {
+				lines.push(`Baseline commit: ${session.baselineCommit.slice(0, 12)}`);
+			}
+			if (createdSession) {
+				lines.push(
+					"Phase 2: iteration loop is active. Run the baseline experiment with `run_experiment` and log it.",
+				);
+			} else if (bumpedSegment) {
+				lines.push("Run a fresh baseline for the new segment.");
+			}
+			if (requiresHarness && !onAutoresearchBranch) {
+				lines.push(
+					"Note: not on a dedicated `autoresearch/*` branch — `log_experiment discard` will only revert run-modified files, not reset to baseline.",
+				);
 			}
 			return {
 				content: [{ type: "text", text: lines.join("\n") }],
-				details: { state: cloneExperimentState(state) },
+				details: {
+					state,
+					createdSession,
+					bumpedSegment,
+					abandonedRuns,
+					harnessCommitted,
+					baselineCommit: session.baselineCommit,
+				},
 			};
 		},
 		renderCall(args, _options, theme): Text {
@@ -384,3 +267,31 @@ export function createInitExperimentTool(
 function renderInitCall(name: string, theme: Theme): string {
 	return `${theme.fg("toolTitle", theme.bold("init_experiment"))} ${theme.fg("accent", truncateToWidth(replaceTabs(name), 100))}`;
 }
+async function tryReadHeadSha(cwd: string): Promise<string | null> {
+	try {
+		return (await git.head.sha(cwd)) ?? null;
+	} catch {
+		return null;
+	}
+}
+async function detectPendingChanges(cwd: string): Promise<boolean> {
+	try {
+		const statusText = await git.status(cwd, { porcelainV1: true, untrackedFiles: "all", z: true });
+		const workDirPrefix = await git.show.prefix(cwd).catch(() => "");
+		return parseWorkDirDirtyPaths(statusText, workDirPrefix).length > 0;
+	} catch {
+		return false;
+	}
+}
+function buildHarnessCommitMessage(goal: string | null, name: string): string {
+	const lines = [HARNESS_COMMIT_TITLE, "", `Benchmark entrypoint: ${DEFAULT_HARNESS_COMMAND}`];
+	if (goal) {
+		lines.push(`Goal: ${goal}`);
+	} else {
+		lines.push(`Session: ${name}`);
+	}
+	return lines.join("\n");
+}