@oh-my-pi/pi-coding-agent 13.18.0 → 14.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/CHANGELOG.md +316 -1
  2. package/package.json +86 -24
  3. package/scripts/format-prompts.ts +2 -2
  4. package/src/autoresearch/apply-contract-to-state.ts +24 -0
  5. package/src/autoresearch/contract.ts +0 -44
  6. package/src/autoresearch/dashboard.ts +1 -2
  7. package/src/autoresearch/git.ts +116 -30
  8. package/src/autoresearch/helpers.ts +49 -0
  9. package/src/autoresearch/index.ts +28 -187
  10. package/src/autoresearch/prompt.md +26 -9
  11. package/src/autoresearch/state.ts +0 -6
  12. package/src/autoresearch/tools/init-experiment.ts +202 -117
  13. package/src/autoresearch/tools/log-experiment.ts +123 -178
  14. package/src/autoresearch/tools/run-experiment.ts +48 -10
  15. package/src/autoresearch/types.ts +2 -2
  16. package/src/capability/index.ts +4 -2
  17. package/src/cli/file-processor.ts +3 -3
  18. package/src/cli/grep-cli.ts +8 -8
  19. package/src/cli/grievances-cli.ts +78 -0
  20. package/src/cli/read-cli.ts +67 -0
  21. package/src/cli/setup-cli.ts +4 -4
  22. package/src/cli/update-cli.ts +3 -3
  23. package/src/cli.ts +2 -0
  24. package/src/commands/grep.ts +6 -1
  25. package/src/commands/grievances.ts +20 -0
  26. package/src/commands/read.ts +33 -0
  27. package/src/commit/agentic/agent.ts +5 -8
  28. package/src/commit/agentic/index.ts +22 -26
  29. package/src/commit/agentic/tools/analyze-file.ts +3 -3
  30. package/src/commit/agentic/tools/git-file-diff.ts +3 -6
  31. package/src/commit/agentic/tools/git-hunk.ts +3 -3
  32. package/src/commit/agentic/tools/git-overview.ts +6 -9
  33. package/src/commit/agentic/tools/index.ts +6 -8
  34. package/src/commit/agentic/tools/propose-commit.ts +4 -7
  35. package/src/commit/agentic/tools/recent-commits.ts +3 -3
  36. package/src/commit/agentic/tools/split-commit.ts +4 -4
  37. package/src/commit/agentic/validation.ts +1 -1
  38. package/src/commit/analysis/conventional.ts +4 -4
  39. package/src/commit/analysis/summary.ts +3 -3
  40. package/src/commit/changelog/generate.ts +4 -4
  41. package/src/commit/changelog/index.ts +5 -9
  42. package/src/commit/map-reduce/map-phase.ts +4 -4
  43. package/src/commit/map-reduce/reduce-phase.ts +4 -4
  44. package/src/commit/pipeline.ts +13 -16
  45. package/src/config/keybindings.ts +7 -6
  46. package/src/config/prompt-templates.ts +44 -226
  47. package/src/config/resolve-config-value.ts +4 -2
  48. package/src/config/settings-schema.ts +98 -2
  49. package/src/config/settings.ts +25 -26
  50. package/src/dap/client.ts +674 -0
  51. package/src/dap/config.ts +150 -0
  52. package/src/dap/defaults.json +211 -0
  53. package/src/dap/index.ts +4 -0
  54. package/src/dap/session.ts +1255 -0
  55. package/src/dap/types.ts +600 -0
  56. package/src/debug/log-viewer.ts +3 -2
  57. package/src/discovery/builtin.ts +1 -2
  58. package/src/discovery/codex.ts +2 -2
  59. package/src/discovery/github.ts +2 -1
  60. package/src/discovery/helpers.ts +2 -2
  61. package/src/discovery/opencode.ts +2 -2
  62. package/src/edit/diff.ts +818 -0
  63. package/src/edit/index.ts +309 -0
  64. package/src/edit/line-hash.ts +67 -0
  65. package/src/edit/modes/chunk.ts +454 -0
  66. package/src/{patch → edit/modes}/hashline.ts +741 -361
  67. package/src/{patch/applicator.ts → edit/modes/patch.ts} +420 -117
  68. package/src/{patch/fuzzy.ts → edit/modes/replace.ts} +519 -197
  69. package/src/{patch → edit}/normalize.ts +97 -76
  70. package/src/{patch/shared.ts → edit/renderer.ts} +181 -108
  71. package/src/exec/bash-executor.ts +4 -2
  72. package/src/exec/idle-timeout-watchdog.ts +126 -0
  73. package/src/exec/non-interactive-env.ts +5 -0
  74. package/src/extensibility/custom-commands/bundled/ci-green/index.ts +6 -18
  75. package/src/extensibility/custom-commands/bundled/review/index.ts +45 -43
  76. package/src/extensibility/custom-commands/loader.ts +1 -2
  77. package/src/extensibility/custom-tools/loader.ts +34 -11
  78. package/src/extensibility/custom-tools/types.ts +1 -1
  79. package/src/extensibility/extensions/loader.ts +9 -4
  80. package/src/extensibility/extensions/runner.ts +24 -1
  81. package/src/extensibility/extensions/types.ts +4 -2
  82. package/src/extensibility/hooks/loader.ts +5 -6
  83. package/src/extensibility/hooks/types.ts +2 -2
  84. package/src/extensibility/plugins/doctor.ts +2 -1
  85. package/src/extensibility/plugins/marketplace/fetcher.ts +2 -57
  86. package/src/extensibility/plugins/marketplace/source-resolver.ts +4 -4
  87. package/src/extensibility/slash-commands.ts +3 -7
  88. package/src/index.ts +3 -1
  89. package/src/internal-urls/docs-index.generated.ts +11 -11
  90. package/src/ipy/executor.ts +58 -17
  91. package/src/ipy/gateway-coordinator.ts +6 -4
  92. package/src/ipy/kernel.ts +45 -22
  93. package/src/ipy/runtime.ts +2 -2
  94. package/src/lsp/client.ts +7 -4
  95. package/src/lsp/clients/lsp-linter-client.ts +4 -4
  96. package/src/lsp/config.ts +2 -2
  97. package/src/lsp/defaults.json +688 -154
  98. package/src/lsp/index.ts +234 -45
  99. package/src/lsp/lspmux.ts +2 -2
  100. package/src/lsp/startup-events.ts +13 -0
  101. package/src/lsp/types.ts +12 -1
  102. package/src/lsp/utils.ts +8 -1
  103. package/src/main.ts +125 -47
  104. package/src/memories/index.ts +4 -5
  105. package/src/modes/acp/acp-agent.ts +563 -163
  106. package/src/modes/acp/acp-event-mapper.ts +9 -1
  107. package/src/modes/acp/acp-mode.ts +4 -2
  108. package/src/modes/components/agent-dashboard.ts +3 -4
  109. package/src/modes/components/diff.ts +6 -7
  110. package/src/modes/components/footer.ts +9 -29
  111. package/src/modes/components/hook-editor.ts +3 -3
  112. package/src/modes/components/hook-selector.ts +6 -1
  113. package/src/modes/components/read-tool-group.ts +6 -12
  114. package/src/modes/components/session-observer-overlay.ts +472 -0
  115. package/src/modes/components/settings-defs.ts +24 -0
  116. package/src/modes/components/status-line.ts +15 -61
  117. package/src/modes/components/tool-execution.ts +1 -1
  118. package/src/modes/components/welcome.ts +1 -1
  119. package/src/modes/controllers/btw-controller.ts +2 -2
  120. package/src/modes/controllers/command-controller.ts +4 -2
  121. package/src/modes/controllers/event-controller.ts +59 -2
  122. package/src/modes/controllers/extension-ui-controller.ts +1 -0
  123. package/src/modes/controllers/input-controller.ts +15 -8
  124. package/src/modes/controllers/selector-controller.ts +26 -0
  125. package/src/modes/index.ts +20 -2
  126. package/src/modes/interactive-mode.ts +278 -69
  127. package/src/modes/rpc/host-tools.ts +186 -0
  128. package/src/modes/rpc/rpc-client.ts +178 -13
  129. package/src/modes/rpc/rpc-mode.ts +73 -3
  130. package/src/modes/rpc/rpc-types.ts +53 -1
  131. package/src/modes/session-observer-registry.ts +146 -0
  132. package/src/modes/shared.ts +0 -42
  133. package/src/modes/theme/theme.ts +80 -8
  134. package/src/modes/types.ts +4 -2
  135. package/src/modes/utils/keybinding-matchers.ts +9 -0
  136. package/src/prompts/system/custom-system-prompt.md +5 -0
  137. package/src/prompts/system/system-prompt.md +8 -1
  138. package/src/prompts/tools/chunk-edit.md +219 -0
  139. package/src/prompts/tools/debug.md +43 -0
  140. package/src/prompts/tools/grep.md +3 -0
  141. package/src/prompts/tools/lsp.md +5 -5
  142. package/src/prompts/tools/read-chunk.md +17 -0
  143. package/src/prompts/tools/read.md +19 -5
  144. package/src/sdk.ts +216 -165
  145. package/src/secrets/index.ts +1 -1
  146. package/src/secrets/obfuscator.ts +25 -17
  147. package/src/session/agent-session.ts +381 -286
  148. package/src/session/agent-storage.ts +12 -12
  149. package/src/session/compaction/branch-summarization.ts +3 -3
  150. package/src/session/compaction/compaction.ts +5 -6
  151. package/src/session/compaction/utils.ts +3 -3
  152. package/src/session/history-storage.ts +62 -19
  153. package/src/session/messages.ts +3 -3
  154. package/src/session/session-dump-format.ts +203 -0
  155. package/src/session/session-manager.ts +15 -5
  156. package/src/session/session-storage.ts +4 -2
  157. package/src/session/streaming-output.ts +1 -1
  158. package/src/session/tool-choice-queue.ts +213 -0
  159. package/src/slash-commands/builtin-registry.ts +56 -8
  160. package/src/ssh/connection-manager.ts +2 -2
  161. package/src/ssh/sshfs-mount.ts +5 -5
  162. package/src/stt/downloader.ts +4 -4
  163. package/src/stt/recorder.ts +4 -4
  164. package/src/stt/transcriber.ts +2 -2
  165. package/src/system-prompt.ts +25 -13
  166. package/src/task/agents.ts +5 -6
  167. package/src/task/commands.ts +2 -5
  168. package/src/task/executor.ts +32 -4
  169. package/src/task/index.ts +91 -82
  170. package/src/task/template.ts +2 -2
  171. package/src/task/types.ts +25 -0
  172. package/src/task/worktree.ts +131 -149
  173. package/src/tools/ask.ts +2 -3
  174. package/src/tools/ast-edit.ts +7 -7
  175. package/src/tools/ast-grep.ts +7 -7
  176. package/src/tools/auto-generated-guard.ts +36 -41
  177. package/src/tools/await-tool.ts +2 -2
  178. package/src/tools/bash.ts +5 -23
  179. package/src/tools/browser.ts +4 -5
  180. package/src/tools/calculator.ts +2 -3
  181. package/src/tools/cancel-job.ts +2 -2
  182. package/src/tools/checkpoint.ts +3 -3
  183. package/src/tools/debug.ts +1007 -0
  184. package/src/tools/exit-plan-mode.ts +3 -3
  185. package/src/tools/fetch.ts +67 -3
  186. package/src/tools/find.ts +4 -5
  187. package/src/tools/fs-cache-invalidation.ts +5 -0
  188. package/src/tools/gemini-image.ts +13 -5
  189. package/src/tools/gh.ts +130 -308
  190. package/src/tools/grep.ts +57 -9
  191. package/src/tools/index.ts +44 -22
  192. package/src/tools/inspect-image.ts +4 -4
  193. package/src/tools/output-meta.ts +1 -1
  194. package/src/tools/python.ts +19 -6
  195. package/src/tools/read.ts +211 -146
  196. package/src/tools/render-mermaid.ts +2 -3
  197. package/src/tools/render-utils.ts +20 -6
  198. package/src/tools/renderers.ts +3 -1
  199. package/src/tools/report-tool-issue.ts +80 -0
  200. package/src/tools/resolve.ts +70 -39
  201. package/src/tools/search-tool-bm25.ts +2 -2
  202. package/src/tools/ssh.ts +2 -2
  203. package/src/tools/todo-write.ts +2 -2
  204. package/src/tools/tool-timeouts.ts +1 -0
  205. package/src/tools/write.ts +5 -6
  206. package/src/tui/tree-list.ts +3 -1
  207. package/src/utils/clipboard.ts +80 -0
  208. package/src/utils/commit-message-generator.ts +2 -3
  209. package/src/utils/edit-mode.ts +49 -0
  210. package/src/utils/external-editor.ts +11 -5
  211. package/src/utils/file-display-mode.ts +6 -5
  212. package/src/utils/file-mentions.ts +8 -7
  213. package/src/utils/git.ts +1400 -0
  214. package/src/utils/image-loading.ts +98 -0
  215. package/src/utils/title-generator.ts +2 -3
  216. package/src/utils/tools-manager.ts +6 -6
  217. package/src/web/scrapers/choosealicense.ts +1 -1
  218. package/src/web/search/index.ts +3 -3
  219. package/src/web/search/render.ts +6 -4
  220. package/src/autoresearch/command-initialize.md +0 -34
  221. package/src/commit/git/errors.ts +0 -9
  222. package/src/commit/git/index.ts +0 -210
  223. package/src/commit/git/operations.ts +0 -54
  224. package/src/patch/diff.ts +0 -433
  225. package/src/patch/index.ts +0 -888
  226. package/src/patch/parser.ts +0 -532
  227. package/src/patch/types.ts +0 -292
  228. package/src/prompts/agents/oracle.md +0 -77
  229. package/src/tools/gh-cli.ts +0 -125
  230. package/src/tools/pending-action.ts +0 -49
  231. package/src/utils/child-process.ts +0 -88
  232. package/src/utils/frontmatter.ts +0 -117
  233. package/src/utils/image-input.ts +0 -274
  234. package/src/utils/mime.ts +0 -53
  235. package/src/utils/prompt-format.ts +0 -170
@@ -8,7 +8,11 @@ Autoresearch mode is active.
8
8
  Primary goal:
9
9
  {{goal}}
10
10
  {{else}}
11
+ {{#if has_autoresearch_md}}
11
12
  Primary goal is documented in `autoresearch.md` for this session.
13
+ {{else}}
14
+ There is no `autoresearch.md` yet. Infer what to optimize from the latest user message and the conversation; after you create `autoresearch.md`, keep it as the durable source of truth for goal and benchmark contract.
15
+ {{/if}}
12
16
  {{/if}}
13
17
 
14
18
  Working directory:
@@ -63,7 +67,7 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
63
67
 
64
68
  - `init_experiment` — initialize or reset the experiment session for the current optimization target.
65
69
  - `run_experiment` — run a benchmark or experiment command with timing, output capture, structured metric parsing, and optional backpressure checks.
66
- - `log_experiment` — record the result, update the dashboard, persist JSONL history, auto-commit kept experiments, and auto-revert discarded or failed experiments.
70
+ - `log_experiment` — record the result, update the dashboard, persist JSONL history, auto-commit kept experiments, and revert only run-modified files for discarded or failed experiments (pre-existing uncommitted changes are preserved).
67
71
 
68
72
  ### Operating protocol
69
73
 
@@ -83,6 +87,8 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
83
87
  - Use the same workload every run unless you intentionally re-initialize with a new segment.
84
88
  - Keep the measurement harness, evaluator, and fixed benchmark inputs stable unless you intentionally start a new segment and document the change.
85
89
  4. Initialize the loop with `init_experiment` before the first logged run of a segment.
90
+ - Pass `from_autoresearch_md: true` with only `name` to load the benchmark contract from `autoresearch.md` without mirroring every field in the tool call.
91
+ - Use `abandon_unlogged_runs: true` only when you intentionally discard unlogged run artifacts and need a fresh segment (for example after a bad or obsolete benchmark directory).
86
92
  5. Run a baseline first.
87
93
  - Establish the baseline metric before attempting optimizations.
88
94
  - Track secondary metrics only when they matter to correctness, quality, or obvious regressions.
@@ -90,7 +96,9 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
90
96
  - Make one coherent experiment at a time.
91
97
  - Run `run_experiment`.
92
98
  - Interpret the result honestly.
93
- - Call `log_experiment` after every run.
99
+ - Call `log_experiment` after every run (it refreshes benchmark/scope fields from `autoresearch.md` before logging so keep validation matches the file on disk).
100
+ - Use `run_experiment` with `force: true` only when you must override the segment benchmark command or skip the direct-`autoresearch.sh` rule.
101
+ - On `log_experiment`, `force: true` relaxes ASI requirements and allows keeping a primary-metric regression; prefer normal logging when possible.
94
102
  7. Keep the primary metric as the decision maker.
95
103
  - `keep` when the primary metric improves.
96
104
  - `discard` when it regresses or stays flat.
@@ -137,7 +145,11 @@ Suggested structure:
137
145
  {{#if has_goal}}
138
146
  - {{goal}}
139
147
  {{else}}
148
+ {{#if has_autoresearch_md}}
140
149
  - document the active target here before the first benchmark
150
+ {{else}}
151
+ - (derive from the user's messages, then record here)
152
+ {{/if}}
141
153
  {{/if}}
142
154
 
143
155
  ## Benchmark
@@ -194,15 +206,20 @@ Resume from the existing notes:
194
206
  {{else}}
195
207
  ### Initial setup
196
208
 
197
- `autoresearch.md` does not exist yet.
209
+ `autoresearch.md` does not exist yet. You decide the benchmark contract, harness, and scope from the user's messages and the repository—do not ask the user to re-type benchmark commands or metric names in a separate UI prompt.
210
+
211
+ Before the first benchmark:
198
212
 
199
- Create the experiment workspace before the first benchmark:
213
+ - Write `autoresearch.md` with goal, benchmark command (must be a **direct** invocation of `autoresearch.sh`, e.g. `bash autoresearch.sh`), primary metric name and unit, direction (`lower` or `higher`), tradeoff metrics if relevant, files in scope, off limits, and constraints.
214
+ - Add a short preflight section: prerequisites, one-time setup, and the comparability invariant that must stay fixed across runs.
215
+ - Mark ground-truth evaluators, fixed datasets, and other measurement-critical files as off limits or hard constraints when they define the benchmark contract.
216
+ - Write or update `autoresearch.program.md` when you learn durable heuristics, failure patterns, or repo-specific strategy for later resume turns.
217
+ - Create `autoresearch.sh` as the canonical benchmark entrypoint; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
218
+ - Optionally add `autoresearch.checks.sh` if correctness or quality needs a hard gate.
219
+ - Call `init_experiment` with arguments that match `autoresearch.md` exactly (benchmark command, metric, unit, direction, scope paths, off limits, constraints).
220
+ - Run and log the baseline.
200
221
 
201
- - write `autoresearch.md`
202
- - write `autoresearch.sh`
203
- - optionally write `autoresearch.checks.sh`
204
- - run `init_experiment`
205
- - run and log the baseline
222
+ Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.program.md`, `autoresearch.ideas.md`, `autoresearch.checks.sh`) may be edited; after initialization, respect Files in Scope from the contract.
206
223
 
207
224
  {{/if}}
208
225
  {{#if has_checks}}
@@ -34,7 +34,6 @@ export function createExperimentState(): ExperimentState {
34
34
  scopePaths: [],
35
35
  offLimits: [],
36
36
  constraints: [],
37
- segmentFingerprint: null,
38
37
  };
39
38
  }
40
39
 
@@ -203,8 +202,6 @@ export function reconstructStateFromJsonl(workDir: string): ReconstructedExperim
203
202
  state.scopePaths = cloneStringArray(configEntry.scopePaths);
204
203
  state.offLimits = cloneStringArray(configEntry.offLimits);
205
204
  state.constraints = cloneStringArray(configEntry.constraints);
206
- state.segmentFingerprint =
207
- typeof configEntry.segmentFingerprint === "string" ? configEntry.segmentFingerprint : null;
208
205
  state.secondaryMetrics = hydrateMetricDefs(configEntry.secondaryMetrics);
209
206
  continue;
210
207
  }
@@ -322,9 +319,6 @@ function parseConfigEntry(value: unknown): AutoresearchJsonConfigEntry | null {
322
319
  candidate.constraints.filter((item): item is string => typeof item === "string"),
323
320
  );
324
321
  }
325
- if (typeof candidate.segmentFingerprint === "string" && candidate.segmentFingerprint.trim().length > 0) {
326
- config.segmentFingerprint = candidate.segmentFingerprint;
327
- }
328
322
  return config;
329
323
  }
330
324
 
@@ -6,15 +6,15 @@ import { Type } from "@sinclair/typebox";
6
6
  import type { ToolDefinition } from "../../extensibility/extensions";
7
7
  import type { Theme } from "../../modes/theme/theme";
8
8
  import { replaceTabs, truncateToWidth } from "../../tools/render-utils";
9
+ import { applyAutoresearchContractToExperimentState } from "../apply-contract-to-state";
9
10
  import {
10
- buildAutoresearchSegmentFingerprint,
11
11
  contractListsEqual,
12
12
  contractPathListsEqual,
13
13
  loadAutoresearchScriptSnapshot,
14
14
  readAutoresearchContract,
15
15
  } from "../contract";
16
16
  import {
17
- inferMetricUnitFromName,
17
+ abandonUnloggedAutoresearchRuns,
18
18
  isAutoresearchShCommand,
19
19
  readMaxExperiments,
20
20
  readPendingRunSummary,
@@ -28,9 +28,29 @@ const initExperimentSchema = Type.Object({
28
28
  name: Type.String({
29
29
  description: "Human-readable experiment name.",
30
30
  }),
31
- metric_name: Type.String({
32
- description: "Primary metric name shown in the dashboard.",
33
- }),
31
+ from_autoresearch_md: Type.Optional(
32
+ Type.Boolean({
33
+ description:
34
+ "When true, load benchmark command, metrics, scope, off-limits, and constraints from autoresearch.md instead of passing mirrored fields below.",
35
+ }),
36
+ ),
37
+ abandon_unlogged_runs: Type.Optional(
38
+ Type.Boolean({
39
+ description:
40
+ "When true, mark all completed but unlogged run artifacts as abandoned so initialization can proceed without logging them first.",
41
+ }),
42
+ ),
43
+ new_segment: Type.Optional(
44
+ Type.Boolean({
45
+ description:
46
+ "When true, force a new segment even when the contract fields have not changed. Without this, re-initialization with matching contract is a no-op.",
47
+ }),
48
+ ),
49
+ metric_name: Type.Optional(
50
+ Type.String({
51
+ description: "Primary metric name shown in the dashboard. Required when from_autoresearch_md is false.",
52
+ }),
53
+ ),
34
54
  metric_unit: Type.Optional(
35
55
  Type.String({
36
56
  description: "Unit for the primary metric, for example µs, ms, s, kb, or empty.",
@@ -41,13 +61,17 @@ const initExperimentSchema = Type.Object({
41
61
  description: "Whether lower or higher values are better. Defaults to lower.",
42
62
  }),
43
63
  ),
44
- benchmark_command: Type.String({
45
- description: "Benchmark command recorded in autoresearch.md.",
46
- }),
47
- scope_paths: Type.Array(Type.String(), {
48
- description: "Files in Scope from autoresearch.md. Must be non-empty.",
49
- minItems: 1,
50
- }),
64
+ benchmark_command: Type.Optional(
65
+ Type.String({
66
+ description: "Benchmark command recorded in autoresearch.md. Required when from_autoresearch_md is false.",
67
+ }),
68
+ ),
69
+ scope_paths: Type.Optional(
70
+ Type.Array(Type.String(), {
71
+ description: "Files in Scope from autoresearch.md. Required when from_autoresearch_md is false.",
72
+ minItems: 1,
73
+ }),
74
+ ),
51
75
  off_limits: Type.Optional(
52
76
  Type.Array(Type.String(), {
53
77
  description: "Off Limits paths from autoresearch.md.",
@@ -86,25 +110,43 @@ export function createInitExperimentTool(
86
110
  const state = runtime.state;
87
111
  const isReinitializing = state.results.length > 0;
88
112
  const workDir = resolveWorkDir(ctx.cwd);
89
- const pendingRun = await readPendingRunSummary(workDir, collectLoggedRunNumbers(state.results));
113
+ const loggedRunNumbers = collectLoggedRunNumbers(state.results);
114
+
115
+ let abandonSummary = "";
116
+ if (params.abandon_unlogged_runs === true) {
117
+ const abandoned = await abandonUnloggedAutoresearchRuns(workDir, loggedRunNumbers);
118
+ if (abandoned > 0) {
119
+ abandonSummary =
120
+ abandoned === 1
121
+ ? "Abandoned 1 unlogged run artifact.\n"
122
+ : `Abandoned ${abandoned} unlogged run artifacts.\n`;
123
+ }
124
+ }
125
+
126
+ const pendingRun = await readPendingRunSummary(workDir, loggedRunNumbers);
90
127
  if (pendingRun) {
128
+ const metricInfo = pendingRun.parsedPrimary !== null ? `, metric=${pendingRun.parsedPrimary}` : "";
129
+ const passedInfo = pendingRun.passed ? "passed" : "failed";
91
130
  return {
92
131
  content: [
93
132
  {
94
133
  type: "text",
95
134
  text:
96
- `Error: run #${pendingRun.runNumber} has not been logged yet. ` +
97
- "Call log_experiment before re-initializing the current segment.",
135
+ abandonSummary +
136
+ `Error: run #${pendingRun.runNumber} has not been logged yet.\n` +
137
+ `Pending: command="${pendingRun.command}"${metricInfo}, ${passedInfo}\n` +
138
+ "Call log_experiment before re-initializing, or pass abandon_unlogged_runs=true.",
98
139
  },
99
140
  ],
100
141
  };
101
142
  }
143
+
102
144
  const contractResult = readAutoresearchContract(workDir);
103
145
  const scriptSnapshot = loadAutoresearchScriptSnapshot(workDir);
104
146
  const errors = [...contractResult.errors, ...scriptSnapshot.errors];
105
147
  if (errors.length > 0) {
106
148
  return {
107
- content: [{ type: "text", text: `Error: ${errors.join(" ")}` }],
149
+ content: [{ type: "text", text: `${abandonSummary}Error: ${errors.join(" ")}` }],
108
150
  };
109
151
  }
110
152
 
@@ -117,118 +159,161 @@ export function createInitExperimentTool(
117
159
  {
118
160
  type: "text",
119
161
  text:
162
+ abandonSummary +
120
163
  "Error: Benchmark.command in autoresearch.md must invoke `autoresearch.sh` directly. " +
121
164
  "Move the real workload into `autoresearch.sh` and re-run init_experiment.",
122
165
  },
123
166
  ],
124
167
  };
125
168
  }
126
- if (benchmarkContract.command !== params.benchmark_command.trim()) {
127
- return {
128
- content: [
129
- {
130
- type: "text",
131
- text:
132
- "Error: benchmark_command does not match autoresearch.md. " +
133
- `Expected: ${benchmarkContract.command ?? "(missing)"}\nReceived: ${params.benchmark_command}`,
134
- },
135
- ],
136
- };
137
- }
138
- if (benchmarkContract.primaryMetric !== params.metric_name.trim()) {
139
- return {
140
- content: [
141
- {
142
- type: "text",
143
- text:
144
- "Error: metric_name does not match autoresearch.md. " +
145
- `Expected: ${benchmarkContract.primaryMetric ?? "(missing)"}\nReceived: ${params.metric_name}`,
146
- },
147
- ],
148
- };
149
- }
150
- if ((params.metric_unit ?? "") !== expectedMetricUnit) {
151
- return {
152
- content: [
153
- {
154
- type: "text",
155
- text:
156
- "Error: metric_unit does not match autoresearch.md. " +
157
- `Expected: ${expectedMetricUnit || "(empty)"}\nReceived: ${params.metric_unit ?? "(empty)"}`,
158
- },
159
- ],
160
- };
161
- }
162
- if ((params.direction ?? "lower") !== expectedDirection) {
163
- return {
164
- content: [
165
- {
166
- type: "text",
167
- text:
168
- "Error: direction does not match autoresearch.md. " +
169
- `Expected: ${expectedDirection}\nReceived: ${params.direction ?? "lower"}`,
170
- },
171
- ],
172
- };
173
- }
174
- if (!contractPathListsEqual(params.scope_paths, contractResult.contract.scopePaths)) {
175
- return {
176
- content: [
177
- {
178
- type: "text",
179
- text:
180
- "Error: scope_paths do not match autoresearch.md. " +
181
- `Expected: ${contractResult.contract.scopePaths.join(", ")}`,
182
- },
183
- ],
184
- };
185
- }
186
- if (!contractPathListsEqual(params.off_limits ?? [], contractResult.contract.offLimits)) {
187
- return {
188
- content: [
189
- {
190
- type: "text",
191
- text:
192
- "Error: off_limits do not match autoresearch.md. " +
193
- `Expected: ${contractResult.contract.offLimits.join(", ") || "(empty)"}`,
194
- },
195
- ],
196
- };
197
- }
198
- if (!contractListsEqual(params.constraints ?? [], contractResult.contract.constraints)) {
199
- return {
200
- content: [
201
- {
202
- type: "text",
203
- text:
204
- "Error: constraints do not match autoresearch.md. " +
205
- `Expected: ${contractResult.contract.constraints.join(", ") || "(empty)"}`,
206
- },
207
- ],
208
- };
169
+
170
+ const fromMd = params.from_autoresearch_md === true;
171
+ if (!fromMd) {
172
+ const metricName = params.metric_name?.trim();
173
+ const benchmarkCommand = params.benchmark_command?.trim();
174
+ const scopePaths = params.scope_paths;
175
+ if (!metricName || !benchmarkCommand || !scopePaths || scopePaths.length === 0) {
176
+ return {
177
+ content: [
178
+ {
179
+ type: "text",
180
+ text:
181
+ abandonSummary +
182
+ "Error: when from_autoresearch_md is false or omitted, metric_name, benchmark_command, and scope_paths are required and must match autoresearch.md. " +
183
+ "Alternatively pass from_autoresearch_md=true with only name (plus optional flags).",
184
+ },
185
+ ],
186
+ };
187
+ }
188
+ if (benchmarkContract.command !== benchmarkCommand) {
189
+ return {
190
+ content: [
191
+ {
192
+ type: "text",
193
+ text:
194
+ abandonSummary +
195
+ "Error: benchmark_command does not match autoresearch.md. " +
196
+ `Expected: ${benchmarkContract.command ?? "(missing)"}\nReceived: ${params.benchmark_command}`,
197
+ },
198
+ ],
199
+ };
200
+ }
201
+ if (benchmarkContract.primaryMetric !== metricName) {
202
+ return {
203
+ content: [
204
+ {
205
+ type: "text",
206
+ text:
207
+ abandonSummary +
208
+ "Error: metric_name does not match autoresearch.md. " +
209
+ `Expected: ${benchmarkContract.primaryMetric ?? "(missing)"}\nReceived: ${params.metric_name}`,
210
+ },
211
+ ],
212
+ };
213
+ }
214
+ if ((params.metric_unit ?? "") !== expectedMetricUnit) {
215
+ return {
216
+ content: [
217
+ {
218
+ type: "text",
219
+ text:
220
+ abandonSummary +
221
+ "Error: metric_unit does not match autoresearch.md. " +
222
+ `Expected: ${expectedMetricUnit || "(empty)"}\nReceived: ${params.metric_unit ?? "(empty)"}`,
223
+ },
224
+ ],
225
+ };
226
+ }
227
+ if ((params.direction ?? "lower") !== expectedDirection) {
228
+ return {
229
+ content: [
230
+ {
231
+ type: "text",
232
+ text:
233
+ abandonSummary +
234
+ "Error: direction does not match autoresearch.md. " +
235
+ `Expected: ${expectedDirection}\nReceived: ${params.direction ?? "lower"}`,
236
+ },
237
+ ],
238
+ };
239
+ }
240
+ if (!contractPathListsEqual(scopePaths, contractResult.contract.scopePaths)) {
241
+ return {
242
+ content: [
243
+ {
244
+ type: "text",
245
+ text:
246
+ abandonSummary +
247
+ "Error: scope_paths do not match autoresearch.md. " +
248
+ `Expected: ${contractResult.contract.scopePaths.join(", ")}`,
249
+ },
250
+ ],
251
+ };
252
+ }
253
+ if (!contractPathListsEqual(params.off_limits ?? [], contractResult.contract.offLimits)) {
254
+ return {
255
+ content: [
256
+ {
257
+ type: "text",
258
+ text:
259
+ abandonSummary +
260
+ "Error: off_limits do not match autoresearch.md. " +
261
+ `Expected: ${contractResult.contract.offLimits.join(", ") || "(empty)"}`,
262
+ },
263
+ ],
264
+ };
265
+ }
266
+ if (!contractListsEqual(params.constraints ?? [], contractResult.contract.constraints)) {
267
+ return {
268
+ content: [
269
+ {
270
+ type: "text",
271
+ text:
272
+ abandonSummary +
273
+ "Error: constraints do not match autoresearch.md. " +
274
+ `Expected: ${contractResult.contract.constraints.join(", ") || "(empty)"}`,
275
+ },
276
+ ],
277
+ };
278
+ }
209
279
  }
210
280
 
211
- const segmentFingerprint = buildAutoresearchSegmentFingerprint(contractResult.contract, {
212
- benchmarkScript: scriptSnapshot.benchmarkScript,
213
- checksScript: scriptSnapshot.checksScript,
214
- });
281
+ // Check if contract matches current state — if so, re-init is a no-op
282
+ if (isReinitializing && params.new_segment !== true) {
283
+ const contract = contractResult.contract;
284
+ const bm = contract.benchmark;
285
+ const contractMatches =
286
+ (bm.primaryMetric ?? "metric") === state.metricName &&
287
+ bm.metricUnit === state.metricUnit &&
288
+ (bm.direction ?? "lower") === state.bestDirection &&
289
+ (bm.command ?? null) === state.benchmarkCommand &&
290
+ contractPathListsEqual(contract.scopePaths, state.scopePaths) &&
291
+ contractPathListsEqual(contract.offLimits, state.offLimits) &&
292
+ contractListsEqual(contract.constraints, state.constraints);
293
+ if (contractMatches) {
294
+ runtime.autoresearchMode = true;
295
+ runtime.autoResumeArmed = true;
296
+ options.dashboard.updateWidget(ctx, runtime);
297
+ options.dashboard.requestRender();
298
+ return {
299
+ content: [
300
+ {
301
+ type: "text",
302
+ text:
303
+ abandonSummary +
304
+ `Experiment session already initialized with matching contract. Continuing segment ${state.currentSegment}.`,
305
+ },
306
+ ],
307
+ details: { state: cloneExperimentState(state) },
308
+ };
309
+ }
310
+ }
215
311
 
312
+ applyAutoresearchContractToExperimentState(contractResult.contract, state);
216
313
  state.name = params.name;
217
- state.metricName = params.metric_name;
218
- state.metricUnit = params.metric_unit ?? "";
219
- state.bestDirection = params.direction ?? "lower";
220
314
  state.maxExperiments = readMaxExperiments(ctx.cwd);
221
315
  state.bestMetric = null;
222
316
  state.confidence = null;
223
- state.secondaryMetrics = benchmarkContract.secondaryMetrics.map(name => ({
224
- name,
225
- unit: inferMetricUnitFromName(name),
226
- }));
227
- state.benchmarkCommand = params.benchmark_command.trim();
228
- state.scopePaths = [...contractResult.contract.scopePaths];
229
- state.offLimits = [...contractResult.contract.offLimits];
230
- state.constraints = [...contractResult.contract.constraints];
231
- state.segmentFingerprint = segmentFingerprint;
232
317
  if (isReinitializing) {
233
318
  state.currentSegment += 1;
234
319
  }
@@ -245,7 +330,6 @@ export function createInitExperimentTool(
245
330
  scopePaths: state.scopePaths,
246
331
  offLimits: state.offLimits,
247
332
  constraints: state.constraints,
248
- segmentFingerprint,
249
333
  });
250
334
 
251
335
  if (isReinitializing) {
@@ -267,6 +351,7 @@ export function createInitExperimentTool(
267
351
  options.dashboard.requestRender();
268
352
 
269
353
  const lines = [
354
+ abandonSummary.trimEnd(),
270
355
  `Experiment initialized: ${state.name}`,
271
356
  `Metric: ${state.metricName} (${state.metricUnit || "unitless"}, ${state.bestDirection} is better)`,
272
357
  `Benchmark command: ${state.benchmarkCommand}`,
@@ -275,7 +360,7 @@ export function createInitExperimentTool(
275
360
  isReinitializing
276
361
  ? "Previous results remain in history. This starts a new segment and requires a fresh baseline."
277
362
  : "Now run the baseline experiment and log it.",
278
- ];
363
+ ].filter(line => line.length > 0);
279
364
  if (state.maxExperiments !== null) {
280
365
  lines.push(`Max iterations: ${state.maxExperiments}`);
281
366
  }