@oh-my-pi/pi-coding-agent 14.5.13 → 14.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/package.json +7 -7
  3. package/src/autoresearch/command-resume.md +5 -8
  4. package/src/autoresearch/git.ts +41 -51
  5. package/src/autoresearch/helpers.ts +43 -359
  6. package/src/autoresearch/index.ts +281 -273
  7. package/src/autoresearch/prompt-setup.md +43 -0
  8. package/src/autoresearch/prompt.md +52 -193
  9. package/src/autoresearch/resume-message.md +2 -8
  10. package/src/autoresearch/state.ts +59 -166
  11. package/src/autoresearch/storage.ts +687 -0
  12. package/src/autoresearch/tools/init-experiment.ts +201 -290
  13. package/src/autoresearch/tools/log-experiment.ts +304 -517
  14. package/src/autoresearch/tools/run-experiment.ts +117 -296
  15. package/src/autoresearch/tools/update-notes.ts +116 -0
  16. package/src/autoresearch/types.ts +16 -66
  17. package/src/commit/pipeline.ts +4 -3
  18. package/src/config/settings-schema.ts +1 -1
  19. package/src/config/settings.ts +20 -1
  20. package/src/config.ts +9 -6
  21. package/src/cursor.ts +1 -1
  22. package/src/edit/index.ts +9 -31
  23. package/src/edit/line-hash.ts +70 -43
  24. package/src/edit/modes/hashline.lark +26 -0
  25. package/src/edit/modes/hashline.ts +898 -1099
  26. package/src/edit/modes/patch.ts +0 -7
  27. package/src/edit/modes/replace.ts +0 -4
  28. package/src/edit/renderer.ts +22 -20
  29. package/src/edit/streaming.ts +8 -28
  30. package/src/eval/eval.lark +24 -30
  31. package/src/eval/js/context-manager.ts +5 -162
  32. package/src/eval/js/prelude.txt +0 -12
  33. package/src/eval/parse.ts +129 -129
  34. package/src/eval/py/kernel.ts +4 -4
  35. package/src/eval/py/prelude.py +1 -219
  36. package/src/export/html/template.generated.ts +1 -1
  37. package/src/export/html/template.js +2 -2
  38. package/src/internal-urls/docs-index.generated.ts +1 -1
  39. package/src/main.ts +10 -0
  40. package/src/mcp/manager.ts +22 -0
  41. package/src/modes/components/session-observer-overlay.ts +5 -2
  42. package/src/modes/components/status-line/segments.ts +1 -1
  43. package/src/modes/components/status-line.ts +3 -5
  44. package/src/modes/components/tree-selector.ts +4 -5
  45. package/src/modes/components/welcome.ts +11 -1
  46. package/src/modes/controllers/command-controller.ts +2 -6
  47. package/src/modes/controllers/event-controller.ts +1 -2
  48. package/src/modes/controllers/extension-ui-controller.ts +3 -15
  49. package/src/modes/controllers/input-controller.ts +0 -1
  50. package/src/modes/controllers/selector-controller.ts +1 -1
  51. package/src/modes/interactive-mode.ts +5 -7
  52. package/src/modes/rpc/rpc-client.ts +9 -0
  53. package/src/modes/rpc/rpc-mode.ts +6 -0
  54. package/src/modes/rpc/rpc-types.ts +9 -0
  55. package/src/prompts/system/system-prompt.md +14 -38
  56. package/src/prompts/tools/ast-edit.md +8 -8
  57. package/src/prompts/tools/ast-grep.md +10 -10
  58. package/src/prompts/tools/eval.md +13 -31
  59. package/src/prompts/tools/find.md +2 -1
  60. package/src/prompts/tools/hashline.md +66 -57
  61. package/src/prompts/tools/search.md +2 -2
  62. package/src/sdk.ts +19 -4
  63. package/src/session/agent-session.ts +110 -4
  64. package/src/session/session-manager.ts +17 -13
  65. package/src/task/agents.ts +4 -5
  66. package/src/tools/archive-reader.ts +9 -3
  67. package/src/tools/ast-edit.ts +141 -44
  68. package/src/tools/ast-grep.ts +112 -36
  69. package/src/tools/browser/readable.ts +11 -6
  70. package/src/tools/browser/tab-supervisor.ts +2 -2
  71. package/src/tools/browser.ts +5 -3
  72. package/src/tools/eval.ts +2 -53
  73. package/src/tools/find.ts +16 -15
  74. package/src/tools/image-gen.ts +2 -2
  75. package/src/tools/path-utils.ts +36 -196
  76. package/src/tools/search.ts +56 -35
  77. package/src/tools/write.ts +8 -1
  78. package/src/utils/edit-mode.ts +2 -11
  79. package/src/utils/file-display-mode.ts +1 -1
  80. package/src/utils/git.ts +17 -0
  81. package/src/utils/session-color.ts +0 -12
  82. package/src/utils/title-generator.ts +22 -38
  83. package/src/web/scrapers/crossref.ts +3 -3
  84. package/src/web/scrapers/devto.ts +1 -1
  85. package/src/web/scrapers/discourse.ts +5 -5
  86. package/src/web/scrapers/firefox-addons.ts +1 -1
  87. package/src/web/scrapers/flathub.ts +2 -2
  88. package/src/web/scrapers/gitlab.ts +1 -1
  89. package/src/web/scrapers/go-pkg.ts +2 -2
  90. package/src/web/scrapers/jetbrains-marketplace.ts +1 -1
  91. package/src/web/scrapers/mastodon.ts +9 -9
  92. package/src/web/scrapers/mdn.ts +11 -7
  93. package/src/web/scrapers/pub-dev.ts +1 -1
  94. package/src/web/scrapers/rawg.ts +3 -3
  95. package/src/web/scrapers/readthedocs.ts +1 -1
  96. package/src/web/scrapers/spdx.ts +1 -1
  97. package/src/web/scrapers/stackoverflow.ts +2 -2
  98. package/src/web/scrapers/types.ts +53 -39
  99. package/src/web/scrapers/w3c.ts +1 -1
  100. package/src/web/search/providers/gemini.ts +2 -2
  101. package/src/autoresearch/apply-contract-to-state.ts +0 -24
  102. package/src/autoresearch/contract.ts +0 -288
  103. package/src/edit/modes/atom.lark +0 -29
  104. package/src/edit/modes/atom.ts +0 -1773
  105. package/src/prompts/tools/atom.md +0 -150
@@ -2,36 +2,21 @@ import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import { StringEnum } from "@oh-my-pi/pi-ai";
4
4
  import { Text } from "@oh-my-pi/pi-tui";
5
- import { logger } from "@oh-my-pi/pi-utils";
6
5
  import { Type } from "@sinclair/typebox";
7
6
  import type { ToolDefinition } from "../../extensibility/extensions";
8
7
  import type { Theme } from "../../modes/theme/theme";
9
8
  import { replaceTabs, truncateToWidth } from "../../tools/render-utils";
10
9
  import * as git from "../../utils/git";
11
- import { applyAutoresearchContractToExperimentState } from "../apply-contract-to-state";
12
- import { loadAutoresearchScriptSnapshot, pathMatchesContractPath, readAutoresearchContract } from "../contract";
13
- import { computeRunModifiedPaths, getCurrentAutoresearchBranch, parseWorkDirDirtyPathsWithStatus } from "../git";
10
+ import { computeRunModifiedPaths, getCurrentAutoresearchBranch, parseWorkDirDirtyPaths } from "../git";
11
+ import { ensureNumericMetricMap, formatNum, mergeAsi, pathMatchesSpec, sanitizeAsi } from "../helpers";
14
12
  import {
15
- collectLoggedRunNumbers,
16
- formatNum,
17
- inferMetricUnitFromName,
18
- isAutoresearchCommittableFile,
19
- isAutoresearchLocalStatePath,
20
- isAutoresearchShCommand,
21
- isBetter,
22
- mergeAsi,
23
- readPendingRunSummary,
24
- resolveWorkDir,
25
- validateWorkDir,
26
- } from "../helpers";
27
- import {
28
- cloneExperimentState,
13
+ buildExperimentState,
29
14
  computeConfidence,
30
15
  currentResults,
31
- findBaselineMetric,
32
16
  findBaselineSecondary,
33
17
  findBestKeptMetric,
34
18
  } from "../state";
19
+ import { openAutoresearchStorageIfExists, type SessionRow } from "../storage";
35
20
  import type {
36
21
  ASIData,
37
22
  AutoresearchToolFactoryOptions,
@@ -41,50 +26,50 @@ import type {
41
26
  NumericMetricMap,
42
27
  } from "../types";
43
28
 
44
- const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment"];
29
+ const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment", "update_notes"];
45
30
 
46
31
  const logExperimentSchema = Type.Object({
47
- commit: Type.String({
48
- description: "Current git commit hash or placeholder.",
49
- }),
50
32
  metric: Type.Number({
51
- description: "Primary metric value for this run.",
33
+ description: "Primary metric value for this run. May differ from the parsed value; deviation is recorded.",
52
34
  }),
53
35
  status: StringEnum(["keep", "discard", "crash", "checks_failed"], {
54
36
  description: "Outcome for this run.",
55
37
  }),
56
- description: Type.String({
57
- description: "Short description of the experiment.",
58
- }),
38
+ description: Type.String({ description: "Short description of the experiment." }),
59
39
  metrics: Type.Optional(
60
- Type.Record(Type.String(), Type.Number(), {
61
- description: "Secondary metrics for this run.",
62
- }),
40
+ Type.Record(Type.String(), Type.Number(), { description: "Secondary metrics for this run." }),
63
41
  ),
64
- force: Type.Optional(
65
- Type.Boolean({
66
- description:
67
- "When true: skip ASI field requirements and allow keeping a run whose primary metric regressed versus the best kept run.",
68
- }),
42
+ asi: Type.Optional(
43
+ Type.Object(
44
+ {},
45
+ {
46
+ additionalProperties: Type.Unknown(),
47
+ description: "Free-form structured metadata captured for this run (hypothesis, learnings, etc.).",
48
+ },
49
+ ),
69
50
  ),
70
- skip_restore: Type.Optional(
71
- Type.Boolean({
51
+ commit: Type.Optional(
52
+ Type.String({ description: "Override the commit hash recorded for this run. Defaults to the current HEAD." }),
53
+ ),
54
+ justification: Type.Optional(
55
+ Type.String({
72
56
  description:
73
- "When true and status is discard/crash/checks_failed: skip reverting the working tree to HEAD. Useful when the experiment did not modify tracked files or you want to preserve the current state.",
57
+ "Required when the run modifies paths outside scope or inside off-limits and you still want it kept. Free-form explanation.",
74
58
  }),
75
59
  ),
76
- asi: Type.Optional(
77
- Type.Record(Type.String(), Type.Unknown(), {
78
- description: "Actionable side information captured for this run.",
79
- }),
60
+ flag_runs: Type.Optional(
61
+ Type.Array(
62
+ Type.Object({
63
+ run_id: Type.Number({ description: "Run id (#) of a previously logged run to flag as suspect." }),
64
+ reason: Type.String({
65
+ description: "Why this earlier run is suspect (e.g. reward-hacked, broken metric).",
66
+ }),
67
+ }),
68
+ { description: "Mark earlier runs as flagged. Flagged runs are excluded from baseline and best-metric math." },
69
+ ),
80
70
  ),
81
71
  });
82
72
 
83
- interface KeepCommitResult {
84
- error?: string;
85
- note?: string;
86
- }
87
-
88
73
  export function createLogExperimentTool(
89
74
  options: AutoresearchToolFactoryOptions,
90
75
  ): ToolDefinition<typeof logExperimentSchema, LogDetails> {
@@ -92,189 +77,111 @@ export function createLogExperimentTool(
92
77
  name: "log_experiment",
93
78
  label: "Log Experiment",
94
79
  description:
95
- "Log the experiment result, update dashboard state, persist JSONL history, and apply git keep or revert behavior.",
80
+ "Log the result of the latest run_experiment. Records the metric, optional ASI metadata, modified paths, and scope deviations. On `keep`, modified files are committed; on `discard`/`crash`/`checks_failed`, the worktree is reverted. Pass `flag_runs` to mark earlier runs as suspect; flagged runs are excluded from baseline and best-metric math.",
96
81
  parameters: logExperimentSchema,
97
82
  defaultInactive: true,
98
83
  async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
99
- const workDirError = validateWorkDir(ctx.cwd);
100
- if (workDirError) {
101
- return {
102
- content: [{ type: "text", text: `Error: ${workDirError}` }],
103
- };
104
- }
105
-
106
- const runtime = options.getRuntime(ctx);
107
- const state = runtime.state;
108
- const workDir = resolveWorkDir(ctx.cwd);
109
-
110
- const contractResult = readAutoresearchContract(workDir);
111
- const scriptSnapshot = loadAutoresearchScriptSnapshot(workDir);
112
- const contractErrors = [...contractResult.errors, ...scriptSnapshot.errors];
113
- if (contractErrors.length > 0) {
114
- return {
115
- content: [{ type: "text", text: `Error: ${contractErrors.join(" ")}` }],
116
- };
117
- }
118
- const benchmarkForSync = contractResult.contract.benchmark;
119
- if (benchmarkForSync.command && !isAutoresearchShCommand(benchmarkForSync.command)) {
84
+ const storage = await openAutoresearchStorageIfExists(ctx.cwd);
85
+ const currentBranch = (await git.branch.current(ctx.cwd)) ?? null;
86
+ const session = storage?.getActiveSessionForBranch(currentBranch) ?? null;
87
+ if (!storage || !session) {
120
88
  return {
121
89
  content: [
122
90
  {
123
91
  type: "text",
124
- text:
125
- "Error: Benchmark.command in autoresearch.md must invoke `autoresearch.sh` directly before logging. " +
126
- "Fix autoresearch.md or move the workload into autoresearch.sh.",
92
+ text: "Error: no active autoresearch session for the current branch. Call init_experiment first.",
127
93
  },
128
94
  ],
129
95
  };
130
96
  }
131
-
132
- const pendingRun =
133
- runtime.lastRunSummary ?? (await readPendingRunSummary(workDir, collectLoggedRunNumbers(state.results)));
97
+ const pendingRun = storage.getPendingRun(session.id);
134
98
  if (!pendingRun) {
135
99
  return {
136
- content: [{ type: "text", text: "Error: no unlogged run is available. Run run_experiment first." }],
137
- };
138
- }
139
-
140
- applyAutoresearchContractToExperimentState(contractResult.contract, state);
141
- const logPreamble =
142
- "Refreshed session fields from autoresearch.md before logging (benchmark, scope, constraints).\n\n";
143
- runtime.lastRunSummary = pendingRun;
144
- runtime.lastRunAsi = pendingRun.parsedAsi;
145
- runtime.lastRunChecks =
146
- pendingRun.checksPass === null
147
- ? null
148
- : {
149
- pass: pendingRun.checksPass,
150
- output: "",
151
- duration: pendingRun.checksDurationSeconds ?? 0,
152
- };
153
- runtime.lastRunDuration = pendingRun.durationSeconds;
154
-
155
- if (pendingRun.parsedPrimary !== null && params.metric !== pendingRun.parsedPrimary) {
156
- return {
157
- content: [
158
- {
159
- type: "text",
160
- text:
161
- "Error: metric does not match the parsed primary metric from the pending run.\n" +
162
- `Expected: ${pendingRun.parsedPrimary}\nReceived: ${params.metric}`,
163
- },
164
- ],
165
- };
166
- }
167
-
168
- if (params.status === "keep" && !pendingRun.passed) {
169
- return {
170
- content: [
171
- {
172
- type: "text",
173
- text: "Error: cannot keep this run because the pending benchmark did not pass. Log it as crash or checks_failed instead.",
174
- },
175
- ],
176
- };
177
- }
178
-
179
- if (params.status === "keep" && runtime.lastRunChecks && !runtime.lastRunChecks.pass) {
180
- return {
181
- content: [
182
- {
183
- type: "text",
184
- text: "Error: cannot keep this run because autoresearch.checks.sh failed. Log it as checks_failed instead.",
185
- },
186
- ],
187
- };
188
- }
189
-
190
- const observedStatusError = validateObservedStatus(params.status, pendingRun);
191
- if (observedStatusError) {
192
- return {
193
- content: [{ type: "text", text: `Error: ${observedStatusError}` }],
100
+ content: [{ type: "text", text: "Error: no pending run available. Run run_experiment first." }],
194
101
  };
195
102
  }
196
103
 
197
- const forceLoose = params.force === true;
198
- const secondaryMetrics = buildSecondaryMetrics(params.metrics, pendingRun.parsedMetrics, state.metricName);
104
+ const runtime = options.getRuntime(ctx);
199
105
 
200
- const mergedAsi = mergeAsi(runtime.lastRunAsi, sanitizeAsi(params.asi));
201
- if (!forceLoose) {
202
- const asiValidationError = validateAsiRequirements(mergedAsi, params.status);
203
- if (asiValidationError) {
204
- return {
205
- content: [{ type: "text", text: `Error: ${asiValidationError}` }],
206
- };
207
- }
106
+ const flaggedRuns: LogDetails["flaggedRuns"] = [];
107
+ for (const flag of params.flag_runs ?? []) {
108
+ const target = storage.getRunById(flag.run_id);
109
+ if (!target || target.sessionId !== session.id) continue;
110
+ storage.flagRun(flag.run_id, flag.reason);
111
+ flaggedRuns.push({ runId: flag.run_id, reason: flag.reason });
208
112
  }
209
113
 
210
- const preRunDirtyPaths = pendingRun.preRunDirtyPaths;
211
- let keepScopeValidation: { committablePaths: string[] } | undefined;
212
- if (params.status === "keep") {
213
- const scopeValidation = await validateKeepPaths(options, workDir, state);
214
- if (typeof scopeValidation === "string") {
215
- return {
216
- content: [{ type: "text", text: `Error: ${scopeValidation}` }],
217
- };
218
- }
219
- const currentBestMetric = findBestKeptMetric(state.results, state.currentSegment, state.bestDirection);
220
- if (
221
- !forceLoose &&
222
- currentBestMetric !== null &&
223
- params.metric !== currentBestMetric &&
224
- !isBetter(params.metric, currentBestMetric, state.bestDirection)
225
- ) {
226
- return {
227
- content: [
228
- {
229
- type: "text",
230
- text:
231
- "Error: cannot keep this run because the primary metric regressed.\n" +
232
- `Current best: ${currentBestMetric}\nReceived: ${params.metric}`,
233
- },
234
- ],
235
- };
236
- }
237
- keepScopeValidation = scopeValidation;
114
+ const branchName = await getCurrentAutoresearchBranch(options.pi, ctx.cwd);
115
+ const onAutoresearchBranch = branchName !== null;
116
+
117
+ let allModified: string[];
118
+ if (onAutoresearchBranch) {
119
+ // On a dedicated autoresearch branch every iteration starts from a clean
120
+ // worktree (init_experiment baseline + previous keep commit / discard reset),
121
+ // so any currently-dirty path is the agent's iteration change. Off-branch we
122
+ // can't tell user dirt apart from agent edits, so we keep the (lossy)
123
+ // preRunDirtyPaths filter.
124
+ const statusText = await tryGitStatus(ctx.cwd);
125
+ const workDirPrefix = await tryGitPrefix(ctx.cwd);
126
+ allModified = parseWorkDirDirtyPaths(statusText, workDirPrefix);
127
+ } else {
128
+ const { modifiedTracked, modifiedUntracked } = await detectModifiedPaths(
129
+ ctx.cwd,
130
+ pendingRun.preRunDirtyPaths,
131
+ );
132
+ allModified = [...modifiedTracked, ...modifiedUntracked];
238
133
  }
134
+ const scopeDeviations = computeScopeDeviations(allModified, session);
239
135
 
240
- const experiment: ExperimentResult = {
241
- runNumber: runtime.lastRunNumber ?? pendingRun.runNumber,
242
- commit: params.commit.slice(0, 7),
243
- metric: params.metric,
244
- metrics: secondaryMetrics,
245
- status: params.status,
246
- description: params.description,
247
- timestamp: Date.now(),
248
- segment: state.currentSegment,
249
- confidence: null,
250
- asi: mergedAsi,
251
- };
136
+ const justification = params.justification?.trim() || null;
137
+ const warnings: string[] = [];
252
138
 
253
- const activeBranch = await getCurrentAutoresearchBranch(options.pi, workDir);
254
- if (!activeBranch) {
255
- return {
256
- content: [
257
- {
258
- type: "text",
259
- text:
260
- "Error: autoresearch keep/discard actions require an active `autoresearch/...` branch. " +
261
- "Run `/autoresearch` again to restore the protected branch before logging this run.",
262
- },
263
- ],
264
- };
265
- }
139
+ const headSha = await tryReadHeadSha(ctx.cwd);
140
+ const explicitCommit = params.commit?.trim();
141
+ let commitHash = explicitCommit && explicitCommit.length > 0 ? explicitCommit : headSha;
266
142
 
267
143
  let gitNote: string | null = null;
268
144
  if (params.status === "keep") {
269
- const commitResult = await commitKeptExperiment(options, workDir, state, experiment, keepScopeValidation);
270
- if (commitResult.error) {
271
- return {
272
- content: [{ type: "text", text: `Error: ${commitResult.error}` }],
273
- };
145
+ if (onAutoresearchBranch && allModified.length > 0) {
146
+ const commitResult = await commitKeptExperiment(
147
+ ctx.cwd,
148
+ params.description,
149
+ params.status,
150
+ params.metric,
151
+ params.metrics ?? {},
152
+ allModified,
153
+ session.primaryMetric,
154
+ );
155
+ if (commitResult.error) {
156
+ return {
157
+ content: [{ type: "text", text: `Error: ${commitResult.error}` }],
158
+ };
159
+ }
160
+ gitNote = commitResult.note ?? null;
161
+ const newSha = await tryReadHeadSha(ctx.cwd);
162
+ if (newSha) commitHash = newSha;
163
+ } else if (!onAutoresearchBranch) {
164
+ warnings.push(
165
+ "Auto-commit skipped: not on a dedicated autoresearch branch. Modified files remain in the worktree.",
166
+ );
167
+ } else if (allModified.length === 0) {
168
+ gitNote = "nothing to commit";
169
+ }
170
+ if (scopeDeviations.length > 0) {
171
+ if (justification === null) {
172
+ warnings.push(
173
+ `Kept with unjustified scope deviations: ${scopeDeviations.join(", ")}. Pass \`justification\` next time or \`flag_runs\` this entry on a future log_experiment if it was a mistake.`,
174
+ );
175
+ } else {
176
+ warnings.push(`Kept with scope deviations (justified): ${scopeDeviations.join(", ")}`);
177
+ }
274
178
  }
275
- gitNote = commitResult.note ?? null;
276
- } else if (!params.skip_restore) {
277
- const revertResult = await revertFailedExperiment(options, workDir, preRunDirtyPaths);
179
+ } else {
180
+ const revertResult = await revertFailedExperiment(
181
+ ctx.cwd,
182
+ pendingRun.preRunDirtyPaths,
183
+ onAutoresearchBranch,
184
+ );
278
185
  if (revertResult.error) {
279
186
  return {
280
187
  content: [{ type: "text", text: `Error: ${revertResult.error}` }],
@@ -283,57 +190,78 @@ export function createLogExperimentTool(
283
190
  gitNote = revertResult.note ?? null;
284
191
  }
285
192
 
286
- const previousState = cloneExperimentState(state);
287
- state.results.push(experiment);
288
- registerSecondaryMetrics(state, secondaryMetrics);
289
- state.bestMetric = findBaselineMetric(state.results, state.currentSegment);
290
- state.confidence = computeConfidence(state.results, state.currentSegment, state.bestDirection);
291
- experiment.confidence = state.confidence;
292
-
293
- const wallClockSeconds = runtime.lastRunDuration;
294
- try {
295
- persistRun(workDir, experiment);
296
- } catch (error) {
297
- runtime.state = previousState;
298
- options.dashboard.updateWidget(ctx, runtime);
299
- options.dashboard.requestRender();
300
- throw error;
301
- }
302
- try {
303
- await updateRunMetadata(runtime.lastRunArtifactDir ?? pendingRun.runDirectory, {
304
- commit: experiment.commit,
305
- confidence: experiment.confidence,
306
- description: experiment.description,
307
- gitNote,
308
- loggedAt: new Date(experiment.timestamp).toISOString(),
309
- loggedAsi: experiment.asi,
310
- loggedMetric: experiment.metric,
311
- loggedMetrics: experiment.metrics,
312
- runNumber: runtime.lastRunNumber ?? pendingRun.runNumber,
313
- status: experiment.status,
314
- wallClockSeconds,
315
- });
316
- } catch (error) {
317
- logger.warn("Failed to update autoresearch run metadata after persisting JSONL history", {
318
- error: error instanceof Error ? error.message : String(error),
319
- runDirectory: runtime.lastRunArtifactDir ?? pendingRun.runDirectory,
320
- runNumber: runtime.lastRunNumber ?? pendingRun.runNumber,
321
- });
193
+ const metric = params.metric;
194
+ const secondaryMetrics: NumericMetricMap = mergeMetrics(
195
+ pendingRun.parsedMetrics,
196
+ params.metrics,
197
+ session.primaryMetric,
198
+ );
199
+ const asi: ASIData | undefined = mergeAsi(pendingRun.parsedAsi, sanitizeAsi(params.asi));
200
+
201
+ if (pendingRun.parsedPrimary !== null && metric !== pendingRun.parsedPrimary) {
202
+ warnings.push(
203
+ `Logged metric ${metric} differs from parsed primary ${pendingRun.parsedPrimary}. Both values stored.`,
204
+ );
322
205
  }
323
206
 
207
+ const loggedAt = Date.now();
208
+ const tentativeRun = storage.markRunLogged({
209
+ runId: pendingRun.id,
210
+ status: params.status,
211
+ description: params.description,
212
+ metric,
213
+ metrics: secondaryMetrics,
214
+ asi: asi ?? null,
215
+ commitHash,
216
+ confidence: null,
217
+ modifiedPaths: allModified,
218
+ scopeDeviations,
219
+ justification,
220
+ loggedAt,
221
+ });
222
+
223
+ // Recompute confidence with this run included
224
+ const refreshedSession = storage.getSessionById(session.id) ?? session;
225
+ const loggedRuns = storage.listLoggedRuns(session.id);
226
+ const stateForConfidence = buildExperimentState(refreshedSession, loggedRuns);
227
+ const confidence = computeConfidence(
228
+ stateForConfidence.results,
229
+ stateForConfidence.currentSegment,
230
+ stateForConfidence.bestDirection,
231
+ );
232
+ storage.updateRunConfidence(tentativeRun.id, confidence);
233
+
234
+ const finalState = buildExperimentState(refreshedSession, storage.listLoggedRuns(session.id));
235
+ runtime.state = finalState;
324
236
  runtime.runningExperiment = null;
325
- runtime.lastRunChecks = null;
237
+ runtime.lastRunSummary = null;
326
238
  runtime.lastRunDuration = null;
327
239
  runtime.lastRunAsi = null;
328
240
  runtime.lastRunArtifactDir = null;
329
241
  runtime.lastRunNumber = null;
330
- runtime.lastRunSummary = null;
331
242
  runtime.autoResumeArmed = true;
332
243
  runtime.lastAutoResumePendingRunNumber = null;
333
244
 
334
- const currentSegmentRuns = currentResults(state.results, state.currentSegment).length;
335
- const text = logPreamble + buildLogText(state, experiment, currentSegmentRuns, wallClockSeconds, gitNote);
336
- if (state.maxExperiments !== null && currentSegmentRuns >= state.maxExperiments) {
245
+ const experiment: ExperimentResult = {
246
+ runNumber: tentativeRun.id,
247
+ commit: (commitHash ?? "").slice(0, 12),
248
+ metric,
249
+ metrics: secondaryMetrics,
250
+ status: params.status,
251
+ description: params.description,
252
+ timestamp: loggedAt,
253
+ segment: pendingRun.segment,
254
+ confidence,
255
+ asi,
256
+ modifiedPaths: allModified,
257
+ scopeDeviations,
258
+ justification,
259
+ flagged: false,
260
+ flaggedReason: null,
261
+ };
262
+
263
+ const segmentRunCount = currentResults(finalState.results, finalState.currentSegment).length;
264
+ if (finalState.maxExperiments !== null && segmentRunCount >= finalState.maxExperiments) {
337
265
  runtime.autoresearchMode = false;
338
266
  options.pi.appendEntry(
339
267
  "autoresearch-control",
@@ -343,19 +271,30 @@ export function createLogExperimentTool(
343
271
  options.pi.getActiveTools().filter(name => !EXPERIMENT_TOOL_NAMES.includes(name)),
344
272
  );
345
273
  }
274
+
346
275
  options.dashboard.updateWidget(ctx, runtime);
347
276
  options.dashboard.requestRender();
348
277
 
278
+ const wallClockSeconds = pendingRun.durationMs !== null ? pendingRun.durationMs / 1000 : null;
279
+ const text = buildLogText(
280
+ finalState,
281
+ experiment,
282
+ segmentRunCount,
283
+ wallClockSeconds,
284
+ gitNote,
285
+ warnings,
286
+ flaggedRuns,
287
+ );
288
+
349
289
  return {
350
290
  content: [{ type: "text", text }],
351
291
  details: {
352
- experiment: {
353
- ...experiment,
354
- metrics: { ...experiment.metrics },
355
- asi: experiment.asi ? structuredClone(experiment.asi) : undefined,
356
- },
357
- state: cloneExperimentState(state),
292
+ experiment,
293
+ state: finalState,
358
294
  wallClockSeconds,
295
+ scopeDeviations,
296
+ justification,
297
+ flaggedRuns,
359
298
  },
360
299
  };
361
300
  },
@@ -373,320 +312,163 @@ export function createLogExperimentTool(
373
312
  if (!details) {
374
313
  return new Text(replaceTabs(result.content.find(part => part.type === "text")?.text ?? ""), 0, 0);
375
314
  }
376
- const summary = renderSummary(details, theme);
377
- return new Text(summary, 0, 0);
315
+ return new Text(renderSummary(details, theme), 0, 0);
378
316
  },
379
317
  };
380
318
  }
381
319
 
382
- function cloneMetrics(value: NumericMetricMap | undefined): NumericMetricMap {
383
- return value ? { ...value } : {};
384
- }
385
-
386
- function buildSecondaryMetrics(
387
- overrides: NumericMetricMap | undefined,
388
- parsedMetrics: NumericMetricMap | null,
389
- primaryMetricName: string,
390
- ): NumericMetricMap {
391
- const merged: NumericMetricMap = {};
392
- for (const [name, value] of Object.entries(parsedMetrics ?? {})) {
393
- if (name === "__proto__" || name === "constructor" || name === "prototype") continue;
394
- if (name === primaryMetricName) continue;
395
- merged[name] = value;
396
- }
397
- for (const [name, value] of Object.entries(cloneMetrics(overrides))) {
398
- if (name === "__proto__" || name === "constructor" || name === "prototype") continue;
399
- merged[name] = value;
400
- }
401
- return merged;
402
- }
403
-
404
- function sanitizeAsi(value: { [key: string]: unknown } | undefined): ASIData | undefined {
405
- if (!value) return undefined;
406
- const result: ASIData = {};
407
- for (const [key, entryValue] of Object.entries(value)) {
408
- if (key === "__proto__" || key === "constructor" || key === "prototype") continue;
409
- const sanitized = sanitizeAsiValue(entryValue);
410
- if (sanitized !== undefined) {
411
- result[key] = sanitized;
412
- }
413
- }
414
- return Object.keys(result).length > 0 ? result : undefined;
415
- }
416
-
417
- function sanitizeAsiValue(value: unknown): ASIData[string] | undefined {
418
- if (value === null) return null;
419
- if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
420
- if (Array.isArray(value)) {
421
- const items = value
422
- .map(item => sanitizeAsiValue(item))
423
- .filter((item): item is NonNullable<typeof item> => item !== undefined);
424
- return items;
425
- }
426
- if (typeof value === "object") {
427
- const objectValue = value as { [key: string]: unknown };
428
- const result: ASIData = {};
429
- for (const [key, entryValue] of Object.entries(objectValue)) {
430
- if (key === "__proto__" || key === "constructor" || key === "prototype") continue;
431
- const sanitized = sanitizeAsiValue(entryValue);
432
- if (sanitized !== undefined) {
433
- result[key] = sanitized;
434
- }
435
- }
436
- return result;
437
- }
438
- return undefined;
439
- }
440
-
441
- export function validateAsiRequirements(asi: ASIData | undefined, status: ExperimentResult["status"]): string | null {
442
- if (!asi) {
443
- return "asi is required. Include at minimum a non-empty hypothesis.";
444
- }
445
- if (typeof asi.hypothesis !== "string" || asi.hypothesis.trim().length === 0) {
446
- return "asi.hypothesis is required and must be a non-empty string.";
447
- }
448
- if (status === "keep") return null;
449
- if (typeof asi.rollback_reason !== "string" || asi.rollback_reason.trim().length === 0) {
450
- return "asi.rollback_reason is required for discard, crash, and checks_failed results.";
451
- }
452
- if (typeof asi.next_action_hint !== "string" || asi.next_action_hint.trim().length === 0) {
453
- return "asi.next_action_hint is required for discard, crash, and checks_failed results.";
454
- }
455
- return null;
456
- }
457
-
458
- function registerSecondaryMetrics(state: ExperimentState, metrics: NumericMetricMap): void {
459
- for (const name of Object.keys(metrics)) {
460
- if (state.secondaryMetrics.some(metric => metric.name === name)) continue;
461
- state.secondaryMetrics.push({
462
- name,
463
- unit: inferMetricUnitFromName(name),
464
- });
465
- }
466
- }
467
-
468
- function persistRun(workDir: string, experiment: ExperimentResult): void {
469
- const entry = {
470
- run: experiment.runNumber,
471
- ...experiment,
472
- };
473
- const jsonlPath = path.join(workDir, "autoresearch.jsonl");
474
- fs.appendFileSync(jsonlPath, `${JSON.stringify(entry)}\n`);
475
- }
476
- function validateObservedStatus(
477
- status: ExperimentResult["status"],
478
- pendingRun: { checksPass: boolean | null; passed: boolean },
479
- ): string | null {
480
- if (pendingRun.checksPass === false) {
481
- return status === "checks_failed"
482
- ? null
483
- : "benchmark checks failed for the pending run. Log it as checks_failed.";
484
- }
485
- if (!pendingRun.passed) {
486
- return status === "crash" ? null : "the pending benchmark failed. Log it as crash.";
487
- }
488
- return status === "keep" || status === "discard" ? null : "the pending benchmark passed. Log it as keep or discard.";
320
+ interface KeepCommitResult {
321
+ error?: string;
322
+ note?: string;
489
323
  }
490
324
 
491
325
  async function commitKeptExperiment(
492
- _options: AutoresearchToolFactoryOptions,
493
- workDir: string,
494
- state: ExperimentState,
495
- experiment: ExperimentResult,
496
- scopeValidation: { committablePaths: string[] } | undefined,
326
+ cwd: string,
327
+ description: string,
328
+ status: ExperimentResult["status"],
329
+ metric: number,
330
+ metrics: NumericMetricMap,
331
+ files: string[],
332
+ primaryMetric: string,
497
333
  ): Promise<KeepCommitResult> {
498
- if (!scopeValidation || scopeValidation.committablePaths.length === 0) {
499
- return { note: "nothing to commit" };
500
- }
501
-
334
+ if (files.length === 0) return { note: "nothing to commit" };
502
335
  try {
503
- await git.stage.files(workDir, scopeValidation.committablePaths);
336
+ await git.stage.files(cwd, files);
504
337
  } catch (err) {
505
- return {
506
- error: `git add failed: ${err instanceof Error ? err.message : String(err)}`,
507
- };
338
+ return { error: `git add failed: ${err instanceof Error ? err.message : String(err)}` };
508
339
  }
509
-
510
- if (!(await git.diff.has(workDir, { cached: true, files: scopeValidation.committablePaths }))) {
340
+ if (!(await git.diff.has(cwd, { cached: true, files }))) {
511
341
  return { note: "nothing to commit" };
512
342
  }
513
-
514
343
  const payload: { [key: string]: string | number } = {
515
- status: experiment.status,
516
- [state.metricName]: experiment.metric,
344
+ status,
345
+ [primaryMetric]: metric,
517
346
  };
518
- for (const [name, value] of Object.entries(experiment.metrics)) {
347
+ for (const [name, value] of Object.entries(metrics)) {
519
348
  payload[name] = value;
520
349
  }
521
- const commitMessage = `${experiment.description}\n\nResult: ${JSON.stringify(payload)}`;
522
- let commitResultText = "";
350
+ const commitMessage = `${description}\n\nResult: ${JSON.stringify(payload)}`;
523
351
  try {
524
- const commitResult = await git.commit(workDir, commitMessage, {
525
- files: scopeValidation.committablePaths,
526
- });
527
- commitResultText = mergeStdoutStderr(commitResult);
352
+ const commitResult = await git.commit(cwd, commitMessage, { files });
353
+ const summary = `${commitResult.stdout}${commitResult.stderr}`.split("\n").find(line => line.trim().length > 0);
354
+ return { note: summary?.trim() ?? "committed" };
528
355
  } catch (err) {
529
- return {
530
- error: `git commit failed: ${err instanceof Error ? err.message : String(err)}`,
531
- };
532
- }
533
-
534
- const newCommit = (await git.head.short(workDir, 7)) ?? "";
535
- if (newCommit.length >= 7) {
536
- experiment.commit = newCommit;
356
+ return { error: `git commit failed: ${err instanceof Error ? err.message : String(err)}` };
537
357
  }
538
- const summaryLine = commitResultText.split("\n").find(line => line.trim().length > 0) ?? "committed";
539
- return { note: summaryLine.trim() };
540
358
  }
541
359
 
542
360
  async function revertFailedExperiment(
543
- options: AutoresearchToolFactoryOptions,
544
- workDir: string,
361
+ cwd: string,
545
362
  preRunDirtyPaths: string[],
363
+ onAutoresearchBranch: boolean,
546
364
  ): Promise<KeepCommitResult> {
547
- let statusText: string;
548
- try {
549
- statusText = await git.status(workDir, {
550
- pathspecs: ["."],
551
- porcelainV1: true,
552
- untrackedFiles: "all",
553
- z: true,
554
- });
555
- } catch (err) {
556
- return {
557
- error: `git status failed: ${err instanceof Error ? err.message : String(err)}`,
558
- };
365
+ if (onAutoresearchBranch) {
366
+ // Discard reverts only the current iteration's uncommitted changes — never
367
+ // rewinds prior `keep` commits. Reset to HEAD so any kept improvements
368
+ // already on the branch survive.
369
+ try {
370
+ await git.reset(cwd, { hard: true, target: "HEAD" });
371
+ await git.clean(cwd);
372
+ return { note: "worktree reset to HEAD" };
373
+ } catch (err) {
374
+ return { error: `git reset/clean failed: ${err instanceof Error ? err.message : String(err)}` };
375
+ }
559
376
  }
560
377
 
561
- const workDirPrefix = await readGitWorkDirPrefix(options, workDir);
378
+ const statusText = await tryGitStatus(cwd);
379
+ const workDirPrefix = await tryGitPrefix(cwd);
562
380
  const { tracked, untracked } = computeRunModifiedPaths(preRunDirtyPaths, statusText, workDirPrefix);
563
- const totalReverted = tracked.length + untracked.length;
564
- if (totalReverted === 0) {
565
- return { note: "nothing to revert" };
566
- }
567
-
381
+ const total = tracked.length + untracked.length;
382
+ if (total === 0) return { note: "nothing to revert" };
568
383
  if (tracked.length > 0) {
569
384
  try {
570
- await git.restore(workDir, { files: tracked, source: "HEAD", staged: true, worktree: true });
385
+ await git.restore(cwd, { files: tracked, source: "HEAD", staged: true, worktree: true });
571
386
  } catch (err) {
572
- return {
573
- error: `git restore failed: ${err instanceof Error ? err.message : String(err)}`,
574
- };
387
+ return { error: `git restore failed: ${err instanceof Error ? err.message : String(err)}` };
575
388
  }
576
389
  }
577
-
578
390
  for (const filePath of untracked) {
579
- const absolutePath = path.join(workDir, filePath);
580
391
  try {
581
- fs.rmSync(absolutePath, { force: true, recursive: true });
392
+ fs.rmSync(path.join(cwd, filePath), { force: true, recursive: true });
582
393
  } catch {
583
- // Best-effort removal of untracked files
394
+ // best effort
584
395
  }
585
396
  }
397
+ return { note: `reverted ${total} file${total === 1 ? "" : "s"}` };
398
+ }
586
399
 
587
- return { note: `reverted ${totalReverted} file${totalReverted === 1 ? "" : "s"}` };
400
+ async function detectModifiedPaths(
401
+ cwd: string,
402
+ preRunDirtyPaths: string[],
403
+ ): Promise<{ modifiedTracked: string[]; modifiedUntracked: string[] }> {
404
+ const statusText = await tryGitStatus(cwd);
405
+ const workDirPrefix = await tryGitPrefix(cwd);
406
+ const { tracked, untracked } = computeRunModifiedPaths(preRunDirtyPaths, statusText, workDirPrefix);
407
+ return { modifiedTracked: tracked, modifiedUntracked: untracked };
588
408
  }
589
409
 
590
- function mergeStdoutStderr(result: { stderr: string; stdout: string }): string {
591
- return `${result.stdout}${result.stderr}`;
410
+ function computeScopeDeviations(modifiedPaths: string[], session: SessionRow): string[] {
411
+ const deviations: string[] = [];
412
+ for (const filePath of modifiedPaths) {
413
+ if (session.offLimits.some(spec => pathMatchesSpec(filePath, spec))) {
414
+ deviations.push(filePath);
415
+ continue;
416
+ }
417
+ if (session.scopePaths.length > 0 && !session.scopePaths.some(spec => pathMatchesSpec(filePath, spec))) {
418
+ deviations.push(filePath);
419
+ }
420
+ }
421
+ return deviations;
592
422
  }
593
423
 
594
- async function validateKeepPaths(
595
- options: AutoresearchToolFactoryOptions,
596
- workDir: string,
597
- state: ExperimentState,
598
- ): Promise<{ committablePaths: string[] } | string> {
599
- if (state.scopePaths.length === 0) {
600
- return "Files in Scope is empty for the current segment. Re-run init_experiment after fixing autoresearch.md.";
424
+ function mergeMetrics(
425
+ parsed: NumericMetricMap | null,
426
+ overrides: NumericMetricMap | undefined,
427
+ primaryMetricName: string,
428
+ ): NumericMetricMap {
429
+ const merged: NumericMetricMap = {};
430
+ for (const [name, value] of Object.entries(parsed ?? {})) {
431
+ if (name === primaryMetricName) continue;
432
+ merged[name] = value;
601
433
  }
434
+ for (const [name, value] of Object.entries(ensureNumericMetricMap(overrides))) {
435
+ merged[name] = value;
436
+ }
437
+ return merged;
438
+ }
602
439
 
603
- let statusText: string;
440
+ async function tryReadHeadSha(cwd: string): Promise<string | null> {
604
441
  try {
605
- statusText = await git.status(workDir, {
606
- pathspecs: ["."],
607
- porcelainV1: true,
608
- untrackedFiles: "all",
609
- z: true,
610
- });
611
- } catch (err) {
612
- return `git status failed: ${err instanceof Error ? err.message : String(err)}`;
442
+ return (await git.head.sha(cwd)) ?? null;
443
+ } catch {
444
+ return null;
613
445
  }
446
+ }
614
447
 
615
- const workDirPrefix = await readGitWorkDirPrefix(options, workDir);
616
- const committablePaths: string[] = [];
617
- for (const entry of parseWorkDirDirtyPathsWithStatus(statusText, workDirPrefix)) {
618
- if (isAutoresearchLocalStatePath(entry.path)) {
619
- continue;
620
- }
621
- if (isAutoresearchCommittableFile(entry.path)) {
622
- committablePaths.push(entry.path);
623
- continue;
624
- }
625
- if (state.offLimits.some(spec => pathMatchesContractPath(entry.path, spec))) {
626
- return `cannot keep this run because ${entry.path} is listed under Off Limits in autoresearch.md`;
627
- }
628
- if (!state.scopePaths.some(spec => pathMatchesContractPath(entry.path, spec))) {
629
- return `cannot keep this run because ${entry.path} is outside Files in Scope`;
630
- }
631
- committablePaths.push(entry.path);
448
+ async function tryGitStatus(cwd: string): Promise<string> {
449
+ try {
450
+ return await git.status(cwd, { porcelainV1: true, untrackedFiles: "all", z: true });
451
+ } catch {
452
+ return "";
632
453
  }
633
-
634
- return { committablePaths };
635
454
  }
636
455
 
637
- async function updateRunMetadata(
638
- runDirectory: string | null,
639
- metadata: {
640
- commit: string;
641
- confidence: number | null;
642
- description: string;
643
- gitNote: string | null;
644
- loggedAt: string;
645
- loggedAsi: ASIData | undefined;
646
- loggedMetric: number;
647
- loggedMetrics: NumericMetricMap;
648
- runNumber: number | null;
649
- status: ExperimentResult["status"];
650
- wallClockSeconds: number | null;
651
- },
652
- ): Promise<void> {
653
- if (!runDirectory) return;
654
- const runJsonPath = path.join(runDirectory, "run.json");
655
- let existing: Record<string, unknown> = {};
456
+ async function tryGitPrefix(cwd: string): Promise<string> {
656
457
  try {
657
- existing = (await Bun.file(runJsonPath).json()) as Record<string, unknown>;
458
+ return await git.show.prefix(cwd);
658
459
  } catch {
659
- existing = {};
460
+ return "";
660
461
  }
661
- await Bun.write(
662
- runJsonPath,
663
- JSON.stringify(
664
- {
665
- ...existing,
666
- loggedRunNumber: metadata.runNumber,
667
- loggedAt: metadata.loggedAt,
668
- loggedAsi: metadata.loggedAsi,
669
- loggedMetric: metadata.loggedMetric,
670
- loggedMetrics: metadata.loggedMetrics,
671
- status: metadata.status,
672
- description: metadata.description,
673
- commit: metadata.commit,
674
- gitNote: metadata.gitNote,
675
- confidence: metadata.confidence,
676
- wallClockSeconds: metadata.wallClockSeconds,
677
- },
678
- null,
679
- 2,
680
- ),
681
- );
682
462
  }
683
463
 
684
464
  function buildLogText(
685
465
  state: ExperimentState,
686
466
  experiment: ExperimentResult,
687
- currentSegmentRuns: number,
467
+ segmentRunCount: number,
688
468
  wallClockSeconds: number | null,
689
469
  gitNote: string | null,
470
+ warnings: string[],
471
+ flaggedRuns: LogDetails["flaggedRuns"],
690
472
  ): string {
691
473
  const displayRunNumber = experiment.runNumber ?? state.results.length;
692
474
  const lines = [`Logged run #${displayRunNumber}: ${experiment.status} - ${experiment.description}`];
@@ -696,7 +478,7 @@ function buildLogText(
696
478
  if (state.bestMetric !== null) {
697
479
  lines.push(`Baseline ${state.metricName}: ${formatNum(state.bestMetric, state.metricUnit)}`);
698
480
  }
699
- if (currentSegmentRuns > 1 && state.bestMetric !== null && experiment.metric !== state.bestMetric) {
481
+ if (segmentRunCount > 1 && state.bestMetric !== null && experiment.metric !== state.bestMetric) {
700
482
  const delta = ((experiment.metric - state.bestMetric) / state.bestMetric) * 100;
701
483
  const sign = delta > 0 ? "+" : "";
702
484
  lines.push(`This run: ${formatNum(experiment.metric, state.metricUnit)} (${sign}${delta.toFixed(1)}%)`);
@@ -708,7 +490,7 @@ function buildLogText(
708
490
  const parts = Object.entries(experiment.metrics).map(([name, value]) => {
709
491
  const unit = state.secondaryMetrics.find(metric => metric.name === name)?.unit ?? "";
710
492
  const baseline = baselineSecondary[name];
711
- if (baseline === undefined || baseline === 0 || currentSegmentRuns === 1) {
493
+ if (baseline === undefined || baseline === 0 || segmentRunCount === 1) {
712
494
  return `${name}: ${formatNum(value, unit)}`;
713
495
  }
714
496
  const delta = ((value - baseline) / baseline) * 100;
@@ -717,6 +499,10 @@ function buildLogText(
717
499
  });
718
500
  lines.push(`Secondary metrics: ${parts.join(" ")}`);
719
501
  }
502
+ const bestKept = findBestKeptMetric(state.results, state.currentSegment, state.bestDirection);
503
+ if (bestKept !== null && state.bestMetric !== null && bestKept !== state.bestMetric) {
504
+ lines.push(`Best kept ${state.metricName}: ${formatNum(bestKept, state.metricUnit)}`);
505
+ }
720
506
  if (experiment.asi) {
721
507
  const asiSummary = Object.entries(experiment.asi)
722
508
  .map(([key, value]) => `${key}: ${truncateAsiValue(value)}`)
@@ -731,21 +517,19 @@ function buildLogText(
731
517
  lines.push(`Git: ${gitNote}`);
732
518
  }
733
519
  if (state.maxExperiments !== null) {
734
- lines.push(`Progress: ${currentSegmentRuns}/${state.maxExperiments} runs in current segment`);
735
- if (currentSegmentRuns >= state.maxExperiments) {
520
+ lines.push(`Progress: ${segmentRunCount}/${state.maxExperiments} runs in current segment`);
521
+ if (segmentRunCount >= state.maxExperiments) {
736
522
  lines.push(`Maximum experiments reached (${state.maxExperiments}). Autoresearch mode is now off.`);
737
523
  }
738
524
  }
739
- return lines.join("\n");
740
- }
741
-
742
- async function readGitWorkDirPrefix(options: AutoresearchToolFactoryOptions, workDir: string): Promise<string> {
743
- void options;
744
- try {
745
- return await git.show.prefix(workDir);
746
- } catch {
747
- return "";
525
+ if (flaggedRuns.length > 0) {
526
+ const formatted = flaggedRuns.map(({ runId, reason }) => `#${runId} (${reason})`).join(", ");
527
+ lines.push(`Flagged: ${formatted}`);
748
528
  }
529
+ for (const warning of warnings) {
530
+ lines.push(`Warning: ${warning}`);
531
+ }
532
+ return lines.join("\n");
749
533
  }
750
534
 
751
535
  function truncateAsiValue(value: ASIData[string]): string {
@@ -764,5 +548,8 @@ function renderSummary(details: LogDetails, theme: Theme): string {
764
548
  if (state.confidence !== null) {
765
549
  summary += ` ${theme.fg("dim", `conf ${state.confidence.toFixed(1)}x`)}`;
766
550
  }
551
+ if (details.scopeDeviations.length > 0) {
552
+ summary += ` ${theme.fg("warning", `deviations:${details.scopeDeviations.length}`)}`;
553
+ }
767
554
  return summary;
768
555
  }