@oh-my-pi/pi-coding-agent 13.18.0 → 14.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/CHANGELOG.md +316 -1
  2. package/package.json +86 -24
  3. package/scripts/format-prompts.ts +2 -2
  4. package/src/autoresearch/apply-contract-to-state.ts +24 -0
  5. package/src/autoresearch/contract.ts +0 -44
  6. package/src/autoresearch/dashboard.ts +1 -2
  7. package/src/autoresearch/git.ts +116 -30
  8. package/src/autoresearch/helpers.ts +49 -0
  9. package/src/autoresearch/index.ts +28 -187
  10. package/src/autoresearch/prompt.md +26 -9
  11. package/src/autoresearch/state.ts +0 -6
  12. package/src/autoresearch/tools/init-experiment.ts +202 -117
  13. package/src/autoresearch/tools/log-experiment.ts +123 -178
  14. package/src/autoresearch/tools/run-experiment.ts +48 -10
  15. package/src/autoresearch/types.ts +2 -2
  16. package/src/capability/index.ts +4 -2
  17. package/src/cli/file-processor.ts +3 -3
  18. package/src/cli/grep-cli.ts +8 -8
  19. package/src/cli/grievances-cli.ts +78 -0
  20. package/src/cli/read-cli.ts +67 -0
  21. package/src/cli/setup-cli.ts +4 -4
  22. package/src/cli/update-cli.ts +3 -3
  23. package/src/cli.ts +2 -0
  24. package/src/commands/grep.ts +6 -1
  25. package/src/commands/grievances.ts +20 -0
  26. package/src/commands/read.ts +33 -0
  27. package/src/commit/agentic/agent.ts +5 -8
  28. package/src/commit/agentic/index.ts +22 -26
  29. package/src/commit/agentic/tools/analyze-file.ts +3 -3
  30. package/src/commit/agentic/tools/git-file-diff.ts +3 -6
  31. package/src/commit/agentic/tools/git-hunk.ts +3 -3
  32. package/src/commit/agentic/tools/git-overview.ts +6 -9
  33. package/src/commit/agentic/tools/index.ts +6 -8
  34. package/src/commit/agentic/tools/propose-commit.ts +4 -7
  35. package/src/commit/agentic/tools/recent-commits.ts +3 -3
  36. package/src/commit/agentic/tools/split-commit.ts +4 -4
  37. package/src/commit/agentic/validation.ts +1 -1
  38. package/src/commit/analysis/conventional.ts +4 -4
  39. package/src/commit/analysis/summary.ts +3 -3
  40. package/src/commit/changelog/generate.ts +4 -4
  41. package/src/commit/changelog/index.ts +5 -9
  42. package/src/commit/map-reduce/map-phase.ts +4 -4
  43. package/src/commit/map-reduce/reduce-phase.ts +4 -4
  44. package/src/commit/pipeline.ts +13 -16
  45. package/src/config/keybindings.ts +7 -6
  46. package/src/config/prompt-templates.ts +44 -226
  47. package/src/config/resolve-config-value.ts +4 -2
  48. package/src/config/settings-schema.ts +98 -2
  49. package/src/config/settings.ts +25 -26
  50. package/src/dap/client.ts +674 -0
  51. package/src/dap/config.ts +150 -0
  52. package/src/dap/defaults.json +211 -0
  53. package/src/dap/index.ts +4 -0
  54. package/src/dap/session.ts +1255 -0
  55. package/src/dap/types.ts +600 -0
  56. package/src/debug/log-viewer.ts +3 -2
  57. package/src/discovery/builtin.ts +1 -2
  58. package/src/discovery/codex.ts +2 -2
  59. package/src/discovery/github.ts +2 -1
  60. package/src/discovery/helpers.ts +2 -2
  61. package/src/discovery/opencode.ts +2 -2
  62. package/src/edit/diff.ts +818 -0
  63. package/src/edit/index.ts +309 -0
  64. package/src/edit/line-hash.ts +67 -0
  65. package/src/edit/modes/chunk.ts +454 -0
  66. package/src/{patch → edit/modes}/hashline.ts +741 -361
  67. package/src/{patch/applicator.ts → edit/modes/patch.ts} +420 -117
  68. package/src/{patch/fuzzy.ts → edit/modes/replace.ts} +519 -197
  69. package/src/{patch → edit}/normalize.ts +97 -76
  70. package/src/{patch/shared.ts → edit/renderer.ts} +181 -108
  71. package/src/exec/bash-executor.ts +4 -2
  72. package/src/exec/idle-timeout-watchdog.ts +126 -0
  73. package/src/exec/non-interactive-env.ts +5 -0
  74. package/src/extensibility/custom-commands/bundled/ci-green/index.ts +6 -18
  75. package/src/extensibility/custom-commands/bundled/review/index.ts +45 -43
  76. package/src/extensibility/custom-commands/loader.ts +1 -2
  77. package/src/extensibility/custom-tools/loader.ts +34 -11
  78. package/src/extensibility/custom-tools/types.ts +1 -1
  79. package/src/extensibility/extensions/loader.ts +9 -4
  80. package/src/extensibility/extensions/runner.ts +24 -1
  81. package/src/extensibility/extensions/types.ts +4 -2
  82. package/src/extensibility/hooks/loader.ts +5 -6
  83. package/src/extensibility/hooks/types.ts +2 -2
  84. package/src/extensibility/plugins/doctor.ts +2 -1
  85. package/src/extensibility/plugins/marketplace/fetcher.ts +2 -57
  86. package/src/extensibility/plugins/marketplace/source-resolver.ts +4 -4
  87. package/src/extensibility/slash-commands.ts +3 -7
  88. package/src/index.ts +3 -1
  89. package/src/internal-urls/docs-index.generated.ts +11 -11
  90. package/src/ipy/executor.ts +58 -17
  91. package/src/ipy/gateway-coordinator.ts +6 -4
  92. package/src/ipy/kernel.ts +45 -22
  93. package/src/ipy/runtime.ts +2 -2
  94. package/src/lsp/client.ts +7 -4
  95. package/src/lsp/clients/lsp-linter-client.ts +4 -4
  96. package/src/lsp/config.ts +2 -2
  97. package/src/lsp/defaults.json +688 -154
  98. package/src/lsp/index.ts +234 -45
  99. package/src/lsp/lspmux.ts +2 -2
  100. package/src/lsp/startup-events.ts +13 -0
  101. package/src/lsp/types.ts +12 -1
  102. package/src/lsp/utils.ts +8 -1
  103. package/src/main.ts +125 -47
  104. package/src/memories/index.ts +4 -5
  105. package/src/modes/acp/acp-agent.ts +563 -163
  106. package/src/modes/acp/acp-event-mapper.ts +9 -1
  107. package/src/modes/acp/acp-mode.ts +4 -2
  108. package/src/modes/components/agent-dashboard.ts +3 -4
  109. package/src/modes/components/diff.ts +6 -7
  110. package/src/modes/components/footer.ts +9 -29
  111. package/src/modes/components/hook-editor.ts +3 -3
  112. package/src/modes/components/hook-selector.ts +6 -1
  113. package/src/modes/components/read-tool-group.ts +6 -12
  114. package/src/modes/components/session-observer-overlay.ts +472 -0
  115. package/src/modes/components/settings-defs.ts +24 -0
  116. package/src/modes/components/status-line.ts +15 -61
  117. package/src/modes/components/tool-execution.ts +1 -1
  118. package/src/modes/components/welcome.ts +1 -1
  119. package/src/modes/controllers/btw-controller.ts +2 -2
  120. package/src/modes/controllers/command-controller.ts +4 -2
  121. package/src/modes/controllers/event-controller.ts +59 -2
  122. package/src/modes/controllers/extension-ui-controller.ts +1 -0
  123. package/src/modes/controllers/input-controller.ts +15 -8
  124. package/src/modes/controllers/selector-controller.ts +26 -0
  125. package/src/modes/index.ts +20 -2
  126. package/src/modes/interactive-mode.ts +278 -69
  127. package/src/modes/rpc/host-tools.ts +186 -0
  128. package/src/modes/rpc/rpc-client.ts +178 -13
  129. package/src/modes/rpc/rpc-mode.ts +73 -3
  130. package/src/modes/rpc/rpc-types.ts +53 -1
  131. package/src/modes/session-observer-registry.ts +146 -0
  132. package/src/modes/shared.ts +0 -42
  133. package/src/modes/theme/theme.ts +80 -8
  134. package/src/modes/types.ts +4 -2
  135. package/src/modes/utils/keybinding-matchers.ts +9 -0
  136. package/src/prompts/system/custom-system-prompt.md +5 -0
  137. package/src/prompts/system/system-prompt.md +8 -1
  138. package/src/prompts/tools/chunk-edit.md +219 -0
  139. package/src/prompts/tools/debug.md +43 -0
  140. package/src/prompts/tools/grep.md +3 -0
  141. package/src/prompts/tools/lsp.md +5 -5
  142. package/src/prompts/tools/read-chunk.md +17 -0
  143. package/src/prompts/tools/read.md +19 -5
  144. package/src/sdk.ts +216 -165
  145. package/src/secrets/index.ts +1 -1
  146. package/src/secrets/obfuscator.ts +25 -17
  147. package/src/session/agent-session.ts +381 -286
  148. package/src/session/agent-storage.ts +12 -12
  149. package/src/session/compaction/branch-summarization.ts +3 -3
  150. package/src/session/compaction/compaction.ts +5 -6
  151. package/src/session/compaction/utils.ts +3 -3
  152. package/src/session/history-storage.ts +62 -19
  153. package/src/session/messages.ts +3 -3
  154. package/src/session/session-dump-format.ts +203 -0
  155. package/src/session/session-manager.ts +15 -5
  156. package/src/session/session-storage.ts +4 -2
  157. package/src/session/streaming-output.ts +1 -1
  158. package/src/session/tool-choice-queue.ts +213 -0
  159. package/src/slash-commands/builtin-registry.ts +56 -8
  160. package/src/ssh/connection-manager.ts +2 -2
  161. package/src/ssh/sshfs-mount.ts +5 -5
  162. package/src/stt/downloader.ts +4 -4
  163. package/src/stt/recorder.ts +4 -4
  164. package/src/stt/transcriber.ts +2 -2
  165. package/src/system-prompt.ts +25 -13
  166. package/src/task/agents.ts +5 -6
  167. package/src/task/commands.ts +2 -5
  168. package/src/task/executor.ts +32 -4
  169. package/src/task/index.ts +91 -82
  170. package/src/task/template.ts +2 -2
  171. package/src/task/types.ts +25 -0
  172. package/src/task/worktree.ts +131 -149
  173. package/src/tools/ask.ts +2 -3
  174. package/src/tools/ast-edit.ts +7 -7
  175. package/src/tools/ast-grep.ts +7 -7
  176. package/src/tools/auto-generated-guard.ts +36 -41
  177. package/src/tools/await-tool.ts +2 -2
  178. package/src/tools/bash.ts +5 -23
  179. package/src/tools/browser.ts +4 -5
  180. package/src/tools/calculator.ts +2 -3
  181. package/src/tools/cancel-job.ts +2 -2
  182. package/src/tools/checkpoint.ts +3 -3
  183. package/src/tools/debug.ts +1007 -0
  184. package/src/tools/exit-plan-mode.ts +3 -3
  185. package/src/tools/fetch.ts +67 -3
  186. package/src/tools/find.ts +4 -5
  187. package/src/tools/fs-cache-invalidation.ts +5 -0
  188. package/src/tools/gemini-image.ts +13 -5
  189. package/src/tools/gh.ts +130 -308
  190. package/src/tools/grep.ts +57 -9
  191. package/src/tools/index.ts +44 -22
  192. package/src/tools/inspect-image.ts +4 -4
  193. package/src/tools/output-meta.ts +1 -1
  194. package/src/tools/python.ts +19 -6
  195. package/src/tools/read.ts +211 -146
  196. package/src/tools/render-mermaid.ts +2 -3
  197. package/src/tools/render-utils.ts +20 -6
  198. package/src/tools/renderers.ts +3 -1
  199. package/src/tools/report-tool-issue.ts +80 -0
  200. package/src/tools/resolve.ts +70 -39
  201. package/src/tools/search-tool-bm25.ts +2 -2
  202. package/src/tools/ssh.ts +2 -2
  203. package/src/tools/todo-write.ts +2 -2
  204. package/src/tools/tool-timeouts.ts +1 -0
  205. package/src/tools/write.ts +5 -6
  206. package/src/tui/tree-list.ts +3 -1
  207. package/src/utils/clipboard.ts +80 -0
  208. package/src/utils/commit-message-generator.ts +2 -3
  209. package/src/utils/edit-mode.ts +49 -0
  210. package/src/utils/external-editor.ts +11 -5
  211. package/src/utils/file-display-mode.ts +6 -5
  212. package/src/utils/file-mentions.ts +8 -7
  213. package/src/utils/git.ts +1400 -0
  214. package/src/utils/image-loading.ts +98 -0
  215. package/src/utils/title-generator.ts +2 -3
  216. package/src/utils/tools-manager.ts +6 -6
  217. package/src/web/scrapers/choosealicense.ts +1 -1
  218. package/src/web/search/index.ts +3 -3
  219. package/src/web/search/render.ts +6 -4
  220. package/src/autoresearch/command-initialize.md +0 -34
  221. package/src/commit/git/errors.ts +0 -9
  222. package/src/commit/git/index.ts +0 -210
  223. package/src/commit/git/operations.ts +0 -54
  224. package/src/patch/diff.ts +0 -433
  225. package/src/patch/index.ts +0 -888
  226. package/src/patch/parser.ts +0 -532
  227. package/src/patch/types.ts +0 -292
  228. package/src/prompts/agents/oracle.md +0 -77
  229. package/src/tools/gh-cli.ts +0 -125
  230. package/src/tools/pending-action.ts +0 -49
  231. package/src/utils/child-process.ts +0 -88
  232. package/src/utils/frontmatter.ts +0 -117
  233. package/src/utils/image-input.ts +0 -274
  234. package/src/utils/mime.ts +0 -53
  235. package/src/utils/prompt-format.ts +0 -170
@@ -7,14 +7,16 @@ import { Type } from "@sinclair/typebox";
7
7
  import type { ToolDefinition } from "../../extensibility/extensions";
8
8
  import type { Theme } from "../../modes/theme/theme";
9
9
  import { replaceTabs, truncateToWidth } from "../../tools/render-utils";
10
- import { getAutoresearchFingerprintMismatchError, pathMatchesContractPath } from "../contract";
11
- import { getCurrentAutoresearchBranch, parseWorkDirDirtyPaths } from "../git";
10
+ import * as git from "../../utils/git";
11
+ import { applyAutoresearchContractToExperimentState } from "../apply-contract-to-state";
12
+ import { loadAutoresearchScriptSnapshot, pathMatchesContractPath, readAutoresearchContract } from "../contract";
13
+ import { computeRunModifiedPaths, getCurrentAutoresearchBranch, parseWorkDirDirtyPathsWithStatus } from "../git";
12
14
  import {
13
- AUTORESEARCH_COMMITTABLE_FILES,
14
15
  formatNum,
15
16
  inferMetricUnitFromName,
16
17
  isAutoresearchCommittableFile,
17
18
  isAutoresearchLocalStatePath,
19
+ isAutoresearchShCommand,
18
20
  isBetter,
19
21
  mergeAsi,
20
22
  readPendingRunSummary,
@@ -60,7 +62,14 @@ const logExperimentSchema = Type.Object({
60
62
  ),
61
63
  force: Type.Optional(
62
64
  Type.Boolean({
63
- description: "Allow introducing new secondary metrics.",
65
+ description:
66
+ "When true: skip ASI field requirements and allow keeping a run whose primary metric regressed versus the best kept run.",
67
+ }),
68
+ ),
69
+ skip_restore: Type.Optional(
70
+ Type.Boolean({
71
+ description:
72
+ "When true and status is discard/crash/checks_failed: skip reverting the working tree to HEAD. Useful when the experiment did not modify tracked files or you want to preserve the current state.",
64
73
  }),
65
74
  ),
66
75
  asi: Type.Optional(
@@ -70,11 +79,6 @@ const logExperimentSchema = Type.Object({
70
79
  ),
71
80
  });
72
81
 
73
- interface PreservedFile {
74
- content: Buffer;
75
- path: string;
76
- }
77
-
78
82
  interface KeepCommitResult {
79
83
  error?: string;
80
84
  note?: string;
@@ -101,10 +105,26 @@ export function createLogExperimentTool(
101
105
  const runtime = options.getRuntime(ctx);
102
106
  const state = runtime.state;
103
107
  const workDir = resolveWorkDir(ctx.cwd);
104
- const fingerprintError = getAutoresearchFingerprintMismatchError(state.segmentFingerprint, workDir);
105
- if (fingerprintError) {
108
+
109
+ const contractResult = readAutoresearchContract(workDir);
110
+ const scriptSnapshot = loadAutoresearchScriptSnapshot(workDir);
111
+ const contractErrors = [...contractResult.errors, ...scriptSnapshot.errors];
112
+ if (contractErrors.length > 0) {
106
113
  return {
107
- content: [{ type: "text", text: `Error: ${fingerprintError}` }],
114
+ content: [{ type: "text", text: `Error: ${contractErrors.join(" ")}` }],
115
+ };
116
+ }
117
+ const benchmarkForSync = contractResult.contract.benchmark;
118
+ if (benchmarkForSync.command && !isAutoresearchShCommand(benchmarkForSync.command)) {
119
+ return {
120
+ content: [
121
+ {
122
+ type: "text",
123
+ text:
124
+ "Error: Benchmark.command in autoresearch.md must invoke `autoresearch.sh` directly before logging. " +
125
+ "Fix autoresearch.md or move the workload into autoresearch.sh.",
126
+ },
127
+ ],
108
128
  };
109
129
  }
110
130
 
@@ -115,6 +135,10 @@ export function createLogExperimentTool(
115
135
  content: [{ type: "text", text: "Error: no unlogged run is available. Run run_experiment first." }],
116
136
  };
117
137
  }
138
+
139
+ applyAutoresearchContractToExperimentState(contractResult.contract, state);
140
+ const logPreamble =
141
+ "Refreshed session fields from autoresearch.md before logging (benchmark, scope, constraints).\n\n";
118
142
  runtime.lastRunSummary = pendingRun;
119
143
  runtime.lastRunAsi = pendingRun.parsedAsi;
120
144
  runtime.lastRunChecks =
@@ -169,22 +193,20 @@ export function createLogExperimentTool(
169
193
  };
170
194
  }
171
195
 
196
+ const forceLoose = params.force === true;
172
197
  const secondaryMetrics = buildSecondaryMetrics(params.metrics, pendingRun.parsedMetrics, state.metricName);
173
- const validationError = validateSecondaryMetrics(state, secondaryMetrics, params.force ?? false);
174
- if (validationError) {
175
- return {
176
- content: [{ type: "text", text: `Error: ${validationError}` }],
177
- };
178
- }
179
198
 
180
199
  const mergedAsi = mergeAsi(runtime.lastRunAsi, sanitizeAsi(params.asi));
181
- const asiValidationError = validateAsiRequirements(mergedAsi, params.status);
182
- if (asiValidationError) {
183
- return {
184
- content: [{ type: "text", text: `Error: ${asiValidationError}` }],
185
- };
200
+ if (!forceLoose) {
201
+ const asiValidationError = validateAsiRequirements(mergedAsi, params.status);
202
+ if (asiValidationError) {
203
+ return {
204
+ content: [{ type: "text", text: `Error: ${asiValidationError}` }],
205
+ };
206
+ }
186
207
  }
187
208
 
209
+ const preRunDirtyPaths = pendingRun.preRunDirtyPaths;
188
210
  let keepScopeValidation: { committablePaths: string[] } | undefined;
189
211
  if (params.status === "keep") {
190
212
  const scopeValidation = await validateKeepPaths(options, workDir, state);
@@ -195,6 +217,7 @@ export function createLogExperimentTool(
195
217
  }
196
218
  const currentBestMetric = findBestKeptMetric(state.results, state.currentSegment, state.bestDirection);
197
219
  if (
220
+ !forceLoose &&
198
221
  currentBestMetric !== null &&
199
222
  params.metric !== currentBestMetric &&
200
223
  !isBetter(params.metric, currentBestMetric, state.bestDirection)
@@ -249,8 +272,8 @@ export function createLogExperimentTool(
249
272
  };
250
273
  }
251
274
  gitNote = commitResult.note ?? null;
252
- } else {
253
- const revertResult = await revertFailedExperiment(options, workDir);
275
+ } else if (!params.skip_restore) {
276
+ const revertResult = await revertFailedExperiment(options, workDir, preRunDirtyPaths);
254
277
  if (revertResult.error) {
255
278
  return {
256
279
  content: [{ type: "text", text: `Error: ${revertResult.error}` }],
@@ -308,7 +331,7 @@ export function createLogExperimentTool(
308
331
  runtime.lastAutoResumePendingRunNumber = null;
309
332
 
310
333
  const currentSegmentRuns = currentResults(state.results, state.currentSegment).length;
311
- const text = buildLogText(state, experiment, currentSegmentRuns, wallClockSeconds, gitNote);
334
+ const text = logPreamble + buildLogText(state, experiment, currentSegmentRuns, wallClockSeconds, gitNote);
312
335
  if (state.maxExperiments !== null && currentSegmentRuns >= state.maxExperiments) {
313
336
  runtime.autoresearchMode = false;
314
337
  options.pi.appendEntry(
@@ -431,23 +454,6 @@ export function validateAsiRequirements(asi: ASIData | undefined, status: Experi
431
454
  return null;
432
455
  }
433
456
 
434
- function validateSecondaryMetrics(state: ExperimentState, metrics: NumericMetricMap, force: boolean): string | null {
435
- if (state.secondaryMetrics.length === 0) return null;
436
- const knownNames = new Set(state.secondaryMetrics.map(metric => metric.name));
437
- const providedNames = new Set(Object.keys(metrics));
438
-
439
- const missing = [...knownNames].filter(name => !providedNames.has(name));
440
- if (missing.length > 0) {
441
- return `missing secondary metrics: ${missing.join(", ")}`;
442
- }
443
-
444
- const newMetrics = [...providedNames].filter(name => !knownNames.has(name));
445
- if (newMetrics.length > 0 && !force) {
446
- return `new secondary metrics require force=true: ${newMetrics.join(", ")}`;
447
- }
448
- return null;
449
- }
450
-
451
457
  function registerSecondaryMetrics(state: ExperimentState, metrics: NumericMetricMap): void {
452
458
  for (const name of Object.keys(metrics)) {
453
459
  if (state.secondaryMetrics.some(metric => metric.name === name)) continue;
@@ -493,7 +499,7 @@ function validateObservedStatus(
493
499
  }
494
500
 
495
501
  async function commitKeptExperiment(
496
- options: AutoresearchToolFactoryOptions,
502
+ _options: AutoresearchToolFactoryOptions,
497
503
  workDir: string,
498
504
  state: ExperimentState,
499
505
  experiment: ExperimentResult,
@@ -503,25 +509,15 @@ async function commitKeptExperiment(
503
509
  return { note: "nothing to commit" };
504
510
  }
505
511
 
506
- const addResult = await options.pi.exec("git", ["add", "--all", "--", ...scopeValidation.committablePaths], {
507
- cwd: workDir,
508
- timeout: 10_000,
509
- });
510
- if (addResult.code !== 0) {
512
+ try {
513
+ await git.stage.files(workDir, scopeValidation.committablePaths);
514
+ } catch (err) {
511
515
  return {
512
- error: `git add failed: ${mergeStdoutStderr(addResult).trim() || `exit ${addResult.code}`}`,
516
+ error: `git add failed: ${err instanceof Error ? err.message : String(err)}`,
513
517
  };
514
518
  }
515
519
 
516
- const diffResult = await options.pi.exec(
517
- "git",
518
- ["diff", "--cached", "--quiet", "--", ...scopeValidation.committablePaths],
519
- {
520
- cwd: workDir,
521
- timeout: 10_000,
522
- },
523
- );
524
- if (diffResult.code === 0) {
520
+ if (!(await git.diff.has(workDir, { cached: true, files: scopeValidation.committablePaths }))) {
525
521
  return { note: "nothing to commit" };
526
522
  }
527
523
 
@@ -533,112 +529,72 @@ async function commitKeptExperiment(
533
529
  payload[name] = value;
534
530
  }
535
531
  const commitMessage = `${experiment.description}\n\nResult: ${JSON.stringify(payload)}`;
536
- const commitResult = await options.pi.exec(
537
- "git",
538
- ["commit", "-m", commitMessage, "--", ...scopeValidation.committablePaths],
539
- {
540
- cwd: workDir,
541
- timeout: 10_000,
542
- },
543
- );
544
- if (commitResult.code !== 0) {
532
+ let commitResultText = "";
533
+ try {
534
+ const commitResult = await git.commit(workDir, commitMessage, {
535
+ files: scopeValidation.committablePaths,
536
+ });
537
+ commitResultText = mergeStdoutStderr(commitResult);
538
+ } catch (err) {
545
539
  return {
546
- error: `git commit failed: ${mergeStdoutStderr(commitResult).trim() || `exit ${commitResult.code}`}`,
540
+ error: `git commit failed: ${err instanceof Error ? err.message : String(err)}`,
547
541
  };
548
542
  }
549
543
 
550
- const revParseResult = await options.pi.exec("git", ["rev-parse", "--short=7", "HEAD"], {
551
- cwd: workDir,
552
- timeout: 5_000,
553
- });
554
- const newCommit = revParseResult.stdout.trim();
544
+ const newCommit = (await git.head.short(workDir, 7)) ?? "";
555
545
  if (newCommit.length >= 7) {
556
546
  experiment.commit = newCommit;
557
547
  }
558
- const summaryLine =
559
- mergeStdoutStderr(commitResult)
560
- .split("\n")
561
- .find(line => line.trim().length > 0) ?? "committed";
548
+ const summaryLine = commitResultText.split("\n").find(line => line.trim().length > 0) ?? "committed";
562
549
  return { note: summaryLine.trim() };
563
550
  }
564
551
 
565
552
  async function revertFailedExperiment(
566
553
  options: AutoresearchToolFactoryOptions,
567
554
  workDir: string,
555
+ preRunDirtyPaths: string[],
568
556
  ): Promise<KeepCommitResult> {
569
- const preservedFiles = preserveAutoresearchFiles(workDir);
570
- const restoreResult = await options.pi.exec(
571
- "git",
572
- ["restore", "--source=HEAD", "--staged", "--worktree", "--", "."],
573
- { cwd: workDir, timeout: 10_000 },
574
- );
575
- const cleanResult = await options.pi.exec("git", ["clean", "-fd", "--", "."], { cwd: workDir, timeout: 10_000 });
576
- const cleanIgnoredResult = await options.pi.exec("git", ["clean", "-fdX", "--", "."], {
577
- cwd: workDir,
578
- timeout: 10_000,
579
- });
580
- restoreAutoresearchFiles(preservedFiles);
581
- if (restoreResult.code !== 0) {
582
- return {
583
- error: `git restore failed: ${mergeStdoutStderr(restoreResult).trim() || `exit ${restoreResult.code}`}`,
584
- };
585
- }
586
- if (cleanResult.code !== 0) {
587
- return {
588
- error: `git clean failed: ${mergeStdoutStderr(cleanResult).trim() || `exit ${cleanResult.code}`}`,
589
- };
590
- }
591
- if (cleanIgnoredResult.code !== 0) {
592
- return {
593
- error: `git clean -X failed: ${mergeStdoutStderr(cleanIgnoredResult).trim() || `exit ${cleanIgnoredResult.code}`}`,
594
- };
595
- }
596
- const dirtyCheckResult = await options.pi.exec(
597
- "git",
598
- ["status", "--porcelain=v1", "-z", "--untracked-files=all", "--", "."],
599
- { cwd: workDir, timeout: 10_000 },
600
- );
601
- if (dirtyCheckResult.code !== 0) {
557
+ let statusText: string;
558
+ try {
559
+ statusText = await git.status(workDir, {
560
+ pathspecs: ["."],
561
+ porcelainV1: true,
562
+ untrackedFiles: "all",
563
+ z: true,
564
+ });
565
+ } catch (err) {
602
566
  return {
603
- error: `git status failed after cleanup: ${mergeStdoutStderr(dirtyCheckResult).trim() || `exit ${dirtyCheckResult.code}`}`,
567
+ error: `git status failed: ${err instanceof Error ? err.message : String(err)}`,
604
568
  };
605
569
  }
570
+
606
571
  const workDirPrefix = await readGitWorkDirPrefix(options, workDir);
607
- const remainingDirtyPaths = parseWorkDirDirtyPaths(dirtyCheckResult.stdout, workDirPrefix).filter(
608
- relativePath => !isAutoresearchLocalStatePath(relativePath),
609
- );
610
- if (remainingDirtyPaths.length > 0) {
611
- return {
612
- error:
613
- "Autoresearch cleanup left the worktree dirty. Resolve these paths before continuing: " +
614
- remainingDirtyPaths.join(", "),
615
- };
572
+ const { tracked, untracked } = computeRunModifiedPaths(preRunDirtyPaths, statusText, workDirPrefix);
573
+ const totalReverted = tracked.length + untracked.length;
574
+ if (totalReverted === 0) {
575
+ return { note: "nothing to revert" };
616
576
  }
617
- return { note: "reverted changes" };
618
- }
619
577
 
620
- function preserveAutoresearchFiles(workDir: string): PreservedFile[] {
621
- const files: PreservedFile[] = [];
622
- for (const relativePath of [...AUTORESEARCH_COMMITTABLE_FILES, "autoresearch.jsonl"]) {
623
- const absolutePath = path.join(workDir, relativePath);
624
- if (!fs.existsSync(absolutePath)) continue;
625
- files.push({
626
- content: fs.readFileSync(absolutePath),
627
- path: absolutePath,
628
- });
629
- }
630
- const localStateDir = path.join(workDir, ".autoresearch");
631
- if (fs.existsSync(localStateDir)) {
632
- collectDirectoryFiles(localStateDir, files);
578
+ if (tracked.length > 0) {
579
+ try {
580
+ await git.restore(workDir, { files: tracked, source: "HEAD", staged: true, worktree: true });
581
+ } catch (err) {
582
+ return {
583
+ error: `git restore failed: ${err instanceof Error ? err.message : String(err)}`,
584
+ };
585
+ }
633
586
  }
634
- return files;
635
- }
636
587
 
637
- function restoreAutoresearchFiles(files: PreservedFile[]): void {
638
- for (const file of files) {
639
- fs.mkdirSync(path.dirname(file.path), { recursive: true });
640
- fs.writeFileSync(file.path, file.content);
588
+ for (const filePath of untracked) {
589
+ const absolutePath = path.join(workDir, filePath);
590
+ try {
591
+ fs.rmSync(absolutePath, { force: true, recursive: true });
592
+ } catch {
593
+ // Best-effort removal of untracked files
594
+ }
641
595
  }
596
+
597
+ return { note: `reverted ${totalReverted} file${totalReverted === 1 ? "" : "s"}` };
642
598
  }
643
599
 
644
600
  function mergeStdoutStderr(result: { stderr: string; stdout: string }): string {
@@ -654,54 +610,40 @@ async function validateKeepPaths(
654
610
  return "Files in Scope is empty for the current segment. Re-run init_experiment after fixing autoresearch.md.";
655
611
  }
656
612
 
657
- const statusResult = await options.pi.exec(
658
- "git",
659
- ["status", "--porcelain=v1", "-z", "--untracked-files=all", "--", "."],
660
- {
661
- cwd: workDir,
662
- timeout: 10_000,
663
- },
664
- );
665
- if (statusResult.code !== 0) {
666
- return `git status failed: ${mergeStdoutStderr(statusResult).trim() || `exit ${statusResult.code}`}`;
613
+ let statusText: string;
614
+ try {
615
+ statusText = await git.status(workDir, {
616
+ pathspecs: ["."],
617
+ porcelainV1: true,
618
+ untrackedFiles: "all",
619
+ z: true,
620
+ });
621
+ } catch (err) {
622
+ return `git status failed: ${err instanceof Error ? err.message : String(err)}`;
667
623
  }
668
624
 
669
625
  const workDirPrefix = await readGitWorkDirPrefix(options, workDir);
670
626
  const committablePaths: string[] = [];
671
- for (const normalizedPath of parseWorkDirDirtyPaths(statusResult.stdout, workDirPrefix)) {
672
- if (isAutoresearchLocalStatePath(normalizedPath)) {
627
+ for (const entry of parseWorkDirDirtyPathsWithStatus(statusText, workDirPrefix)) {
628
+ if (isAutoresearchLocalStatePath(entry.path)) {
673
629
  continue;
674
630
  }
675
- if (isAutoresearchCommittableFile(normalizedPath)) {
676
- committablePaths.push(normalizedPath);
631
+ if (isAutoresearchCommittableFile(entry.path)) {
632
+ committablePaths.push(entry.path);
677
633
  continue;
678
634
  }
679
- if (state.offLimits.some(spec => pathMatchesContractPath(normalizedPath, spec))) {
680
- return `cannot keep this run because ${normalizedPath} is listed under Off Limits in autoresearch.md`;
635
+ if (state.offLimits.some(spec => pathMatchesContractPath(entry.path, spec))) {
636
+ return `cannot keep this run because ${entry.path} is listed under Off Limits in autoresearch.md`;
681
637
  }
682
- if (!state.scopePaths.some(spec => pathMatchesContractPath(normalizedPath, spec))) {
683
- return `cannot keep this run because ${normalizedPath} is outside Files in Scope`;
638
+ if (!state.scopePaths.some(spec => pathMatchesContractPath(entry.path, spec))) {
639
+ return `cannot keep this run because ${entry.path} is outside Files in Scope`;
684
640
  }
685
- committablePaths.push(normalizedPath);
641
+ committablePaths.push(entry.path);
686
642
  }
687
643
 
688
644
  return { committablePaths };
689
645
  }
690
646
 
691
- function collectDirectoryFiles(directory: string, files: PreservedFile[]): void {
692
- for (const entry of fs.readdirSync(directory, { withFileTypes: true })) {
693
- const absolutePath = path.join(directory, entry.name);
694
- if (entry.isDirectory()) {
695
- collectDirectoryFiles(absolutePath, files);
696
- continue;
697
- }
698
- files.push({
699
- content: fs.readFileSync(absolutePath),
700
- path: absolutePath,
701
- });
702
- }
703
- }
704
-
705
647
  async function updateRunMetadata(
706
648
  runDirectory: string | null,
707
649
  metadata: {
@@ -808,9 +750,12 @@ function buildLogText(
808
750
  }
809
751
 
810
752
  async function readGitWorkDirPrefix(options: AutoresearchToolFactoryOptions, workDir: string): Promise<string> {
811
- const prefixResult = await options.pi.exec("git", ["rev-parse", "--show-prefix"], { cwd: workDir, timeout: 5_000 });
812
- if (prefixResult.code !== 0) return "";
813
- return prefixResult.stdout.trim();
753
+ void options;
754
+ try {
755
+ return await git.show.prefix(workDir);
756
+ } catch {
757
+ return "";
758
+ }
814
759
  }
815
760
 
816
761
  function truncateAsiValue(value: ASIData[string]): string {
@@ -8,7 +8,8 @@ import type { ToolDefinition } from "../../extensibility/extensions";
8
8
  import type { Theme } from "../../modes/theme/theme";
9
9
  import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, truncateTail } from "../../session/streaming-output";
10
10
  import { replaceTabs, shortenPath, truncateToWidth } from "../../tools/render-utils";
11
- import { getAutoresearchFingerprintMismatchError } from "../contract";
11
+ import * as git from "../../utils/git";
12
+ import { parseWorkDirDirtyPaths } from "../git";
12
13
  import {
13
14
  EXPERIMENT_MAX_BYTES,
14
15
  EXPERIMENT_MAX_LINES,
@@ -16,6 +17,7 @@ import {
16
17
  formatNum,
17
18
  getAutoresearchRunDirectory,
18
19
  getNextAutoresearchRunNumber,
20
+ isAutoresearchLocalStatePath,
19
21
  isAutoresearchShCommand,
20
22
  killTree,
21
23
  parseAsiLines,
@@ -40,6 +42,12 @@ const runExperimentSchema = Type.Object({
40
42
  description: "Timeout in seconds for autoresearch.checks.sh. Defaults to 300.",
41
43
  }),
42
44
  ),
45
+ force: Type.Optional(
46
+ Type.Boolean({
47
+ description:
48
+ "When true, allow a command that differs from the segment benchmark command and skip the rule that autoresearch.sh must be invoked directly when that script exists.",
49
+ }),
50
+ ),
43
51
  });
44
52
 
45
53
  interface ProcessExecutionResult {
@@ -87,14 +95,9 @@ export function createRunExperimentTool(
87
95
  const workDir = resolveWorkDir(ctx.cwd);
88
96
  const checksPath = path.join(workDir, "autoresearch.checks.sh");
89
97
  const autoresearchScriptPath = path.join(workDir, "autoresearch.sh");
90
- const fingerprintError = getAutoresearchFingerprintMismatchError(state.segmentFingerprint, workDir);
91
- if (fingerprintError) {
92
- return {
93
- content: [{ type: "text", text: `Error: ${fingerprintError}` }],
94
- };
95
- }
96
98
 
97
- if (state.benchmarkCommand && params.command.trim() !== state.benchmarkCommand) {
99
+ const forceCommand = params.force === true;
100
+ if (!forceCommand && state.benchmarkCommand && params.command.trim() !== state.benchmarkCommand) {
98
101
  return {
99
102
  content: [
100
103
  {
@@ -107,7 +110,7 @@ export function createRunExperimentTool(
107
110
  };
108
111
  }
109
112
 
110
- if (fs.existsSync(autoresearchScriptPath) && !isAutoresearchShCommand(params.command)) {
113
+ if (!forceCommand && fs.existsSync(autoresearchScriptPath) && !isAutoresearchShCommand(params.command)) {
111
114
  return {
112
115
  content: [
113
116
  {
@@ -156,6 +159,17 @@ export function createRunExperimentTool(
156
159
  const checksLogPath = path.join(runDirectory, "checks.log");
157
160
  const runJsonPath = path.join(runDirectory, "run.json");
158
161
  await fs.promises.mkdir(runDirectory, { recursive: true });
162
+
163
+ const preRunStatus = await git.status(workDir, {
164
+ porcelainV1: true,
165
+ untrackedFiles: "all",
166
+ z: true,
167
+ });
168
+ const workDirPrefix = await git.show.prefix(workDir);
169
+ const preRunDirtyPaths = parseWorkDirDirtyPaths(preRunStatus, workDirPrefix).filter(
170
+ p => !isAutoresearchLocalStatePath(p),
171
+ );
172
+
159
173
  runtime.lastRunChecks = null;
160
174
  runtime.lastRunDuration = null;
161
175
  runtime.lastRunAsi = null;
@@ -171,6 +185,7 @@ export function createRunExperimentTool(
171
185
  benchmarkLogPath,
172
186
  checksLogPath,
173
187
  command: params.command,
188
+ preRunDirtyPaths,
174
189
  startedAt: new Date().toISOString(),
175
190
  },
176
191
  null,
@@ -287,6 +302,7 @@ export function createRunExperimentTool(
287
302
  parsedAsi,
288
303
  metricName: state.metricName,
289
304
  metricUnit: state.metricUnit,
305
+ preRunDirtyPaths,
290
306
  truncation: llmTruncation.truncated ? llmTruncation : undefined,
291
307
  fullOutputPath: execution.logPath,
292
308
  };
@@ -300,6 +316,7 @@ export function createRunExperimentTool(
300
316
  parsedMetrics,
301
317
  parsedPrimary,
302
318
  passed: resultDetails.passed,
319
+ preRunDirtyPaths,
303
320
  runDirectory,
304
321
  runNumber,
305
322
  };
@@ -329,6 +346,7 @@ export function createRunExperimentTool(
329
346
  parsedMetrics,
330
347
  parsedPrimary,
331
348
  parsedAsi,
349
+ preRunDirtyPaths,
332
350
  truncation: resultDetails.truncation,
333
351
  fullOutputPath: resultDetails.fullOutputPath,
334
352
  },
@@ -337,8 +355,28 @@ export function createRunExperimentTool(
337
355
  ),
338
356
  );
339
357
 
358
+ const commandWarnings: string[] = [];
359
+ if (forceCommand) {
360
+ if (state.benchmarkCommand && params.command.trim() !== state.benchmarkCommand) {
361
+ commandWarnings.push(
362
+ `Warning: command override (force=true). Segment benchmark is ${state.benchmarkCommand}; ran ${params.command}.`,
363
+ );
364
+ }
365
+ if (fs.existsSync(autoresearchScriptPath) && !isAutoresearchShCommand(params.command)) {
366
+ commandWarnings.push(
367
+ "Warning: autoresearch.sh exists but the command was not a direct autoresearch.sh invocation (force=true).",
368
+ );
369
+ }
370
+ }
371
+ const warningPrefix = commandWarnings.length > 0 ? `${commandWarnings.join("\n")}\n\n` : "";
372
+
340
373
  return {
341
- content: [{ type: "text", text: buildRunText(resultDetails, llmTruncation.content, state.bestMetric) }],
374
+ content: [
375
+ {
376
+ type: "text",
377
+ text: warningPrefix + buildRunText(resultDetails, llmTruncation.content, state.bestMetric),
378
+ },
379
+ ],
342
380
  details: resultDetails,
343
381
  };
344
382
  },
@@ -64,7 +64,6 @@ export interface ExperimentState {
64
64
  scopePaths: string[];
65
65
  offLimits: string[];
66
66
  constraints: string[];
67
- segmentFingerprint: string | null;
68
67
  }
69
68
 
70
69
  export interface RunExperimentProgressDetails {
@@ -96,6 +95,7 @@ export interface RunDetails {
96
95
  parsedAsi: ASIData | null;
97
96
  metricName: string;
98
97
  metricUnit: string;
98
+ preRunDirtyPaths: string[];
99
99
  truncation?: TruncationResult;
100
100
  fullOutputPath?: string;
101
101
  }
@@ -122,6 +122,7 @@ export interface PendingRunSummary {
122
122
  parsedMetrics: NumericMetricMap | null;
123
123
  parsedPrimary: number | null;
124
124
  passed: boolean;
125
+ preRunDirtyPaths: string[];
125
126
  runDirectory: string;
126
127
  runNumber: number;
127
128
  }
@@ -165,7 +166,6 @@ export interface AutoresearchJsonConfigEntry {
165
166
  scopePaths?: string[];
166
167
  offLimits?: string[];
167
168
  constraints?: string[];
168
- segmentFingerprint?: string;
169
169
  }
170
170
 
171
171
  export interface AutoresearchJsonRunEntry {
@@ -114,8 +114,10 @@ async function loadImpl<T>(
114
114
  const results = await Promise.all(
115
115
  providers.map(async provider => {
116
116
  try {
117
- const result = await logger.timeAsync(`capability:${capability.id}:${provider.id}`, () =>
118
- provider.load(ctx),
117
+ const result = await logger.time(
118
+ `capability:${capability.id}:${provider.id}`,
119
+ provider.load.bind(provider),
120
+ ctx,
119
121
  );
120
122
  return { provider, result };
121
123
  } catch (error) {
@@ -4,12 +4,11 @@
4
4
  import * as fs from "node:fs";
5
5
  import * as path from "node:path";
6
6
  import type { ImageContent } from "@oh-my-pi/pi-ai";
7
- import { getProjectDir, isEnoent } from "@oh-my-pi/pi-utils";
7
+ import { getProjectDir, isEnoent, readImageMetadata } from "@oh-my-pi/pi-utils";
8
8
  import chalk from "chalk";
9
9
  import { resolveReadPath } from "../tools/path-utils";
10
10
  import { formatBytes } from "../tools/render-utils";
11
11
  import { formatDimensionNote, resizeImage } from "../utils/image-resize";
12
- import { detectSupportedImageMimeTypeFromFile } from "../utils/mime";
13
12
 
14
13
  // Keep CLI startup responsive and avoid OOM when users pass huge files.
15
14
  // If a file exceeds these limits, we include it as a path-only <file/> block.
@@ -42,7 +41,8 @@ export async function processFileArguments(fileArgs: string[], options?: Process
42
41
  process.exit(1);
43
42
  }
44
43
 
45
- const mimeType = await detectSupportedImageMimeTypeFromFile(absolutePath);
44
+ const imageMetadata = await readImageMetadata(absolutePath);
45
+ const mimeType = imageMetadata?.mimeType;
46
46
  const maxBytes = mimeType ? MAX_CLI_IMAGE_BYTES : MAX_CLI_TEXT_BYTES;
47
47
  if (stat.size > maxBytes) {
48
48
  console.error(