@oh-my-pi/pi-coding-agent 13.19.0 → 14.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CHANGELOG.md +266 -1
  2. package/package.json +86 -20
  3. package/scripts/format-prompts.ts +2 -2
  4. package/src/autoresearch/apply-contract-to-state.ts +24 -0
  5. package/src/autoresearch/contract.ts +0 -44
  6. package/src/autoresearch/dashboard.ts +1 -2
  7. package/src/autoresearch/git.ts +91 -0
  8. package/src/autoresearch/helpers.ts +49 -0
  9. package/src/autoresearch/index.ts +28 -187
  10. package/src/autoresearch/prompt.md +26 -9
  11. package/src/autoresearch/state.ts +0 -6
  12. package/src/autoresearch/tools/init-experiment.ts +202 -117
  13. package/src/autoresearch/tools/log-experiment.ts +83 -125
  14. package/src/autoresearch/tools/run-experiment.ts +48 -10
  15. package/src/autoresearch/types.ts +2 -2
  16. package/src/capability/index.ts +4 -2
  17. package/src/cli/file-processor.ts +3 -3
  18. package/src/cli/grep-cli.ts +8 -8
  19. package/src/cli/grievances-cli.ts +78 -0
  20. package/src/cli/read-cli.ts +67 -0
  21. package/src/cli/setup-cli.ts +4 -4
  22. package/src/cli/update-cli.ts +3 -3
  23. package/src/cli.ts +2 -0
  24. package/src/commands/grep.ts +6 -1
  25. package/src/commands/grievances.ts +20 -0
  26. package/src/commands/read.ts +33 -0
  27. package/src/commit/agentic/agent.ts +5 -5
  28. package/src/commit/agentic/index.ts +3 -4
  29. package/src/commit/agentic/tools/analyze-file.ts +3 -3
  30. package/src/commit/agentic/validation.ts +1 -1
  31. package/src/commit/analysis/conventional.ts +4 -4
  32. package/src/commit/analysis/summary.ts +3 -3
  33. package/src/commit/changelog/generate.ts +4 -4
  34. package/src/commit/map-reduce/map-phase.ts +4 -4
  35. package/src/commit/map-reduce/reduce-phase.ts +4 -4
  36. package/src/commit/pipeline.ts +3 -4
  37. package/src/config/prompt-templates.ts +44 -226
  38. package/src/config/resolve-config-value.ts +4 -2
  39. package/src/config/settings-schema.ts +54 -2
  40. package/src/config/settings.ts +25 -26
  41. package/src/dap/client.ts +674 -0
  42. package/src/dap/config.ts +150 -0
  43. package/src/dap/defaults.json +211 -0
  44. package/src/dap/index.ts +4 -0
  45. package/src/dap/session.ts +1255 -0
  46. package/src/dap/types.ts +600 -0
  47. package/src/debug/log-viewer.ts +3 -2
  48. package/src/discovery/builtin.ts +1 -2
  49. package/src/discovery/codex.ts +2 -2
  50. package/src/discovery/github.ts +2 -1
  51. package/src/discovery/helpers.ts +2 -2
  52. package/src/discovery/opencode.ts +2 -2
  53. package/src/edit/diff.ts +818 -0
  54. package/src/edit/index.ts +309 -0
  55. package/src/edit/line-hash.ts +67 -0
  56. package/src/edit/modes/chunk.ts +454 -0
  57. package/src/{patch → edit/modes}/hashline.ts +741 -361
  58. package/src/{patch/applicator.ts → edit/modes/patch.ts} +420 -117
  59. package/src/{patch/fuzzy.ts → edit/modes/replace.ts} +519 -197
  60. package/src/{patch → edit}/normalize.ts +97 -76
  61. package/src/{patch/shared.ts → edit/renderer.ts} +181 -108
  62. package/src/exec/bash-executor.ts +4 -2
  63. package/src/exec/idle-timeout-watchdog.ts +126 -0
  64. package/src/exec/non-interactive-env.ts +5 -0
  65. package/src/extensibility/custom-commands/bundled/ci-green/index.ts +2 -2
  66. package/src/extensibility/custom-commands/bundled/review/index.ts +2 -2
  67. package/src/extensibility/custom-commands/loader.ts +1 -2
  68. package/src/extensibility/custom-tools/loader.ts +34 -11
  69. package/src/extensibility/extensions/loader.ts +9 -4
  70. package/src/extensibility/extensions/runner.ts +24 -1
  71. package/src/extensibility/extensions/types.ts +1 -1
  72. package/src/extensibility/hooks/loader.ts +5 -6
  73. package/src/extensibility/hooks/types.ts +1 -1
  74. package/src/extensibility/plugins/doctor.ts +2 -1
  75. package/src/extensibility/slash-commands.ts +3 -7
  76. package/src/index.ts +2 -1
  77. package/src/internal-urls/docs-index.generated.ts +11 -11
  78. package/src/ipy/executor.ts +58 -17
  79. package/src/ipy/gateway-coordinator.ts +6 -4
  80. package/src/ipy/kernel.ts +45 -22
  81. package/src/ipy/runtime.ts +2 -2
  82. package/src/lsp/client.ts +7 -4
  83. package/src/lsp/clients/lsp-linter-client.ts +4 -4
  84. package/src/lsp/config.ts +2 -2
  85. package/src/lsp/defaults.json +688 -154
  86. package/src/lsp/index.ts +234 -45
  87. package/src/lsp/lspmux.ts +2 -2
  88. package/src/lsp/startup-events.ts +13 -0
  89. package/src/lsp/types.ts +12 -1
  90. package/src/lsp/utils.ts +8 -1
  91. package/src/main.ts +102 -46
  92. package/src/memories/index.ts +4 -5
  93. package/src/modes/acp/acp-agent.ts +563 -163
  94. package/src/modes/acp/acp-event-mapper.ts +9 -1
  95. package/src/modes/acp/acp-mode.ts +4 -2
  96. package/src/modes/components/agent-dashboard.ts +3 -4
  97. package/src/modes/components/diff.ts +6 -7
  98. package/src/modes/components/read-tool-group.ts +6 -12
  99. package/src/modes/components/settings-defs.ts +5 -0
  100. package/src/modes/components/tool-execution.ts +1 -1
  101. package/src/modes/components/welcome.ts +1 -1
  102. package/src/modes/controllers/btw-controller.ts +2 -2
  103. package/src/modes/controllers/command-controller.ts +3 -2
  104. package/src/modes/controllers/input-controller.ts +12 -8
  105. package/src/modes/index.ts +20 -2
  106. package/src/modes/interactive-mode.ts +94 -37
  107. package/src/modes/rpc/host-tools.ts +186 -0
  108. package/src/modes/rpc/rpc-client.ts +178 -13
  109. package/src/modes/rpc/rpc-mode.ts +73 -3
  110. package/src/modes/rpc/rpc-types.ts +53 -1
  111. package/src/modes/theme/theme.ts +80 -8
  112. package/src/modes/types.ts +2 -2
  113. package/src/prompts/system/system-prompt.md +2 -1
  114. package/src/prompts/tools/chunk-edit.md +219 -0
  115. package/src/prompts/tools/debug.md +43 -0
  116. package/src/prompts/tools/grep.md +3 -0
  117. package/src/prompts/tools/lsp.md +5 -5
  118. package/src/prompts/tools/read-chunk.md +17 -0
  119. package/src/prompts/tools/read.md +19 -5
  120. package/src/sdk.ts +190 -154
  121. package/src/secrets/obfuscator.ts +1 -1
  122. package/src/session/agent-session.ts +306 -256
  123. package/src/session/agent-storage.ts +12 -12
  124. package/src/session/compaction/branch-summarization.ts +3 -3
  125. package/src/session/compaction/compaction.ts +5 -6
  126. package/src/session/compaction/utils.ts +3 -3
  127. package/src/session/history-storage.ts +62 -19
  128. package/src/session/messages.ts +3 -3
  129. package/src/session/session-dump-format.ts +203 -0
  130. package/src/session/session-storage.ts +4 -2
  131. package/src/session/streaming-output.ts +1 -1
  132. package/src/session/tool-choice-queue.ts +213 -0
  133. package/src/slash-commands/builtin-registry.ts +56 -8
  134. package/src/ssh/connection-manager.ts +2 -2
  135. package/src/ssh/sshfs-mount.ts +5 -5
  136. package/src/stt/downloader.ts +4 -4
  137. package/src/stt/recorder.ts +4 -4
  138. package/src/stt/transcriber.ts +2 -2
  139. package/src/system-prompt.ts +21 -13
  140. package/src/task/agents.ts +5 -6
  141. package/src/task/commands.ts +2 -5
  142. package/src/task/executor.ts +4 -4
  143. package/src/task/index.ts +3 -4
  144. package/src/task/template.ts +2 -2
  145. package/src/task/worktree.ts +4 -4
  146. package/src/tools/ask.ts +2 -3
  147. package/src/tools/ast-edit.ts +7 -7
  148. package/src/tools/ast-grep.ts +7 -7
  149. package/src/tools/auto-generated-guard.ts +36 -41
  150. package/src/tools/await-tool.ts +2 -2
  151. package/src/tools/bash.ts +5 -23
  152. package/src/tools/browser.ts +4 -5
  153. package/src/tools/calculator.ts +2 -3
  154. package/src/tools/cancel-job.ts +2 -2
  155. package/src/tools/checkpoint.ts +3 -3
  156. package/src/tools/debug.ts +1007 -0
  157. package/src/tools/exit-plan-mode.ts +2 -3
  158. package/src/tools/fetch.ts +67 -3
  159. package/src/tools/find.ts +4 -5
  160. package/src/tools/fs-cache-invalidation.ts +5 -0
  161. package/src/tools/gemini-image.ts +13 -5
  162. package/src/tools/gh.ts +10 -11
  163. package/src/tools/grep.ts +57 -9
  164. package/src/tools/index.ts +44 -22
  165. package/src/tools/inspect-image.ts +4 -4
  166. package/src/tools/output-meta.ts +1 -1
  167. package/src/tools/python.ts +19 -6
  168. package/src/tools/read.ts +198 -67
  169. package/src/tools/render-mermaid.ts +2 -3
  170. package/src/tools/render-utils.ts +20 -6
  171. package/src/tools/renderers.ts +3 -1
  172. package/src/tools/report-tool-issue.ts +80 -0
  173. package/src/tools/resolve.ts +70 -39
  174. package/src/tools/search-tool-bm25.ts +2 -2
  175. package/src/tools/ssh.ts +2 -2
  176. package/src/tools/todo-write.ts +2 -2
  177. package/src/tools/tool-timeouts.ts +1 -0
  178. package/src/tools/write.ts +5 -6
  179. package/src/tui/tree-list.ts +3 -1
  180. package/src/utils/clipboard.ts +80 -0
  181. package/src/utils/commit-message-generator.ts +2 -3
  182. package/src/utils/edit-mode.ts +49 -0
  183. package/src/utils/file-display-mode.ts +6 -5
  184. package/src/utils/file-mentions.ts +8 -7
  185. package/src/utils/git.ts +4 -4
  186. package/src/utils/image-loading.ts +98 -0
  187. package/src/utils/title-generator.ts +2 -3
  188. package/src/utils/tools-manager.ts +6 -6
  189. package/src/web/scrapers/choosealicense.ts +1 -1
  190. package/src/web/search/index.ts +3 -3
  191. package/src/autoresearch/command-initialize.md +0 -34
  192. package/src/patch/diff.ts +0 -433
  193. package/src/patch/index.ts +0 -888
  194. package/src/patch/parser.ts +0 -532
  195. package/src/patch/types.ts +0 -292
  196. package/src/prompts/agents/oracle.md +0 -77
  197. package/src/tools/pending-action.ts +0 -49
  198. package/src/utils/child-process.ts +0 -88
  199. package/src/utils/frontmatter.ts +0 -117
  200. package/src/utils/image-input.ts +0 -274
  201. package/src/utils/mime.ts +0 -53
  202. package/src/utils/prompt-format.ts +0 -170
@@ -1,6 +1,5 @@
1
- import { matchesKey, Text, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
1
+ import { matchesKey, replaceTabs, Text, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
2
2
  import type { Theme } from "../modes/theme/theme";
3
- import { replaceTabs } from "../tools/render-utils";
4
3
  import { formatElapsed, formatNum, isBetter } from "./helpers";
5
4
  import { currentResults, findBaselineMetric, findBaselineRunNumber, findBaselineSecondary } from "./state";
6
5
  import type { AutoresearchRuntime, DashboardController, ExperimentResult, ExperimentState } from "./types";
@@ -236,3 +236,94 @@ function collectUnsafeDirtyPaths(statusOutput: string, workDirPrefix: string): s
236
236
  }
237
237
  return unsafeDirtyPaths;
238
238
  }
239
+
240
+ export interface DirtyPathEntry {
241
+ path: string;
242
+ untracked: boolean;
243
+ }
244
+
245
+ export function parseDirtyPathsWithStatus(statusOutput: string): DirtyPathEntry[] {
246
+ if (statusOutput.includes("\0")) {
247
+ return parseDirtyPathsNulWithStatus(statusOutput);
248
+ }
249
+ return parseDirtyPathsLinesWithStatus(statusOutput);
250
+ }
251
+
252
+ function parseDirtyPathsNulWithStatus(statusOutput: string): DirtyPathEntry[] {
253
+ const seen = new Set<string>();
254
+ const results: DirtyPathEntry[] = [];
255
+ let index = 0;
256
+ while (index + 3 <= statusOutput.length) {
257
+ const statusToken = statusOutput.slice(index, index + 3);
258
+ index += 3;
259
+ const pathEnd = statusOutput.indexOf("\0", index);
260
+ if (pathEnd < 0) break;
261
+ const firstPath = statusOutput.slice(index, pathEnd);
262
+ index = pathEnd + 1;
263
+ const untracked = statusToken.trim().startsWith("??");
264
+ addDirtyPathEntry(seen, results, firstPath, untracked);
265
+ if (isRenameOrCopy(statusToken)) {
266
+ const secondPathEnd = statusOutput.indexOf("\0", index);
267
+ if (secondPathEnd < 0) break;
268
+ const secondPath = statusOutput.slice(index, secondPathEnd);
269
+ index = secondPathEnd + 1;
270
+ addDirtyPathEntry(seen, results, secondPath, false);
271
+ }
272
+ }
273
+ return results;
274
+ }
275
+
276
+ function parseDirtyPathsLinesWithStatus(statusOutput: string): DirtyPathEntry[] {
277
+ const seen = new Set<string>();
278
+ const results: DirtyPathEntry[] = [];
279
+ for (const line of statusOutput.split("\n")) {
280
+ const trimmedLine = line.trimEnd();
281
+ if (trimmedLine.length < 4) continue;
282
+ const statusToken = trimmedLine.slice(0, 3);
283
+ const rawPath = trimmedLine.slice(3).trim();
284
+ if (rawPath.length === 0) continue;
285
+ const untracked = statusToken.trim().startsWith("??");
286
+ const renameParts = rawPath.split(" -> ");
287
+ for (const renamePart of renameParts) {
288
+ addDirtyPathEntry(seen, results, renamePart, untracked);
289
+ }
290
+ }
291
+ return results;
292
+ }
293
+
294
+ function addDirtyPathEntry(seen: Set<string>, results: DirtyPathEntry[], rawPath: string, untracked: boolean): void {
295
+ const normalizedPath = normalizeStatusPath(rawPath);
296
+ if (normalizedPath.length === 0 || seen.has(normalizedPath)) return;
297
+ seen.add(normalizedPath);
298
+ results.push({ path: normalizedPath, untracked });
299
+ }
300
+
301
+ export function parseWorkDirDirtyPathsWithStatus(statusOutput: string, workDirPrefix: string): DirtyPathEntry[] {
302
+ const results: DirtyPathEntry[] = [];
303
+ for (const entry of parseDirtyPathsWithStatus(statusOutput)) {
304
+ const relativePath = relativizeGitPathToWorkDir(entry.path, workDirPrefix);
305
+ if (relativePath === null) continue;
306
+ results.push({ path: relativePath, untracked: entry.untracked });
307
+ }
308
+ return results;
309
+ }
310
+
311
+ export function computeRunModifiedPaths(
312
+ preRunDirtyPaths: string[],
313
+ currentStatusOutput: string,
314
+ workDirPrefix: string,
315
+ ): { tracked: string[]; untracked: string[] } {
316
+ const preRunSet = new Set(preRunDirtyPaths);
317
+ const tracked: string[] = [];
318
+ const untracked: string[] = [];
319
+ for (const entry of parseWorkDirDirtyPathsWithStatus(currentStatusOutput, workDirPrefix)) {
320
+ if (preRunSet.has(entry.path)) continue;
321
+ if (isAutoresearchLocalStatePath(entry.path)) continue;
322
+ if (entry.untracked) {
323
+ untracked.push(entry.path);
324
+ } else {
325
+ tracked.push(entry.path);
326
+ }
327
+ }
328
+ return { tracked, untracked };
329
+ }
@@ -269,6 +269,45 @@ export async function readPendingRunSummary(
269
269
  return null;
270
270
  }
271
271
 
272
+ export async function abandonUnloggedAutoresearchRuns(
273
+ workDir: string,
274
+ loggedRunNumbers: ReadonlySet<number>,
275
+ ): Promise<number> {
276
+ const runsDir = path.join(workDir, ".autoresearch", "runs");
277
+ let entries: fs.Dirent[];
278
+ try {
279
+ entries = await fs.promises.readdir(runsDir, { withFileTypes: true });
280
+ } catch (error) {
281
+ if (isEnoent(error)) return 0;
282
+ throw error;
283
+ }
284
+
285
+ let abandoned = 0;
286
+ const stamp = new Date().toISOString();
287
+ for (const entry of entries) {
288
+ if (!entry.isDirectory()) continue;
289
+ const directoryName = entry.name;
290
+ const runDirectory = path.join(runsDir, directoryName);
291
+ const runJsonPath = path.join(runDirectory, "run.json");
292
+ let parsed: unknown;
293
+ try {
294
+ parsed = await Bun.file(runJsonPath).json();
295
+ } catch (error) {
296
+ if (isEnoent(error)) continue;
297
+ throw error;
298
+ }
299
+
300
+ const pending = parsePendingRunSummary(parsed, runDirectory, directoryName, loggedRunNumbers);
301
+ if (!pending) continue;
302
+
303
+ const existing = typeof parsed === "object" && parsed !== null ? (parsed as Record<string, unknown>) : {};
304
+ await Bun.write(runJsonPath, JSON.stringify({ ...existing, abandonedAt: stamp }, null, 2));
305
+ abandoned += 1;
306
+ }
307
+
308
+ return abandoned;
309
+ }
310
+
272
311
  export function readConfig(cwd: string): AutoresearchConfig {
273
312
  const configPath = path.join(cwd, "autoresearch.config.json");
274
313
  try {
@@ -326,6 +365,7 @@ function parsePendingRunSummary(
326
365
  ): PendingRunSummary | null {
327
366
  if (typeof value !== "object" || value === null) return null;
328
367
  const candidate = value as {
368
+ abandonedAt?: unknown;
329
369
  checks?: { durationSeconds?: unknown; passed?: unknown; timedOut?: unknown };
330
370
  completedAt?: unknown;
331
371
  command?: unknown;
@@ -335,6 +375,7 @@ function parsePendingRunSummary(
335
375
  parsedAsi?: unknown;
336
376
  parsedMetrics?: unknown;
337
377
  parsedPrimary?: unknown;
378
+ preRunDirtyPaths?: unknown;
338
379
  runNumber?: unknown;
339
380
  status?: unknown;
340
381
  timedOut?: unknown;
@@ -342,6 +383,9 @@ function parsePendingRunSummary(
342
383
  if (candidate.loggedAt !== undefined || candidate.status !== undefined) {
343
384
  return null;
344
385
  }
386
+ if (typeof candidate.abandonedAt === "string" && candidate.abandonedAt.trim().length > 0) {
387
+ return null;
388
+ }
345
389
 
346
390
  const command = typeof candidate.command === "string" ? candidate.command : "";
347
391
  const runNumber =
@@ -389,6 +433,10 @@ function parsePendingRunSummary(
389
433
  : null;
390
434
  const checksTimedOut = candidate.checks?.timedOut === true;
391
435
 
436
+ const preRunDirtyPaths = Array.isArray(candidate.preRunDirtyPaths)
437
+ ? candidate.preRunDirtyPaths.filter((item): item is string => typeof item === "string")
438
+ : [];
439
+
392
440
  return {
393
441
  checksDurationSeconds,
394
442
  checksPass,
@@ -399,6 +447,7 @@ function parsePendingRunSummary(
399
447
  parsedMetrics,
400
448
  parsedPrimary,
401
449
  passed: exitCode === 0 && !timedOut && checksPass !== false,
450
+ preRunDirtyPaths,
402
451
  runDirectory,
403
452
  runNumber,
404
453
  };
@@ -1,9 +1,8 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import type { AutocompleteItem } from "@oh-my-pi/pi-tui";
4
- import { renderPromptTemplate } from "../config/prompt-templates";
4
+ import { prompt } from "@oh-my-pi/pi-utils";
5
5
  import type { ExtensionContext, ExtensionFactory } from "../extensibility/extensions";
6
- import commandInitializeTemplate from "./command-initialize.md" with { type: "text" };
7
6
  import commandResumeTemplate from "./command-resume.md" with { type: "text" };
8
7
  import { pathMatchesContractPath } from "./contract";
9
8
  import { createDashboardController } from "./dashboard";
@@ -12,7 +11,6 @@ import {
12
11
  formatNum,
13
12
  isAutoresearchCommittableFile,
14
13
  isAutoresearchLocalStatePath,
15
- isAutoresearchShCommand,
16
14
  normalizeAutoresearchPath,
17
15
  readMaxExperiments,
18
16
  readPendingRunSummary,
@@ -37,18 +35,6 @@ import type { AutoresearchRuntime, ChecksResult, ExperimentResult, PendingRunSum
37
35
 
38
36
  const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment"];
39
37
 
40
- interface AutoresearchSetupInput {
41
- intent: string;
42
- benchmarkCommand: string;
43
- metricName: string;
44
- metricUnit: string;
45
- direction: "lower" | "higher";
46
- secondaryMetrics: string[];
47
- scopePaths: string[];
48
- offLimits: string[];
49
- constraints: string[];
50
- }
51
-
52
38
  export const createAutoresearchExtension: ExtensionFactory = api => {
53
39
  const runtimeStore = createRuntimeStore();
54
40
  const dashboard = createDashboardController();
@@ -109,17 +95,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
109
95
  api.on("tool_call", (event, ctx) => {
110
96
  const runtime = getRuntime(ctx);
111
97
  if (!runtime.autoresearchMode) return;
112
- if (event.toolName === "bash") {
113
- const command = typeof event.input.command === "string" ? event.input.command : "";
114
- const validationError = validateAutoresearchBashCommand(command);
115
- if (validationError) {
116
- return {
117
- block: true,
118
- reason: validationError,
119
- };
120
- }
121
- return;
122
- }
123
98
  if (event.toolName !== "write" && event.toolName !== "edit" && event.toolName !== "ast_edit") return;
124
99
 
125
100
  const rawPaths = getGuardedToolPaths(event.toolName, event.input);
@@ -151,14 +126,17 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
151
126
  });
152
127
 
153
128
  api.registerCommand("autoresearch", {
154
- description: "Start, stop, or clear builtin autoresearch mode.",
129
+ description: "Toggle builtin autoresearch mode, or pass off / clear, or a goal message.",
155
130
  getArgumentCompletions(argumentPrefix: string): AutocompleteItem[] | null {
156
131
  if (argumentPrefix.includes(" ")) return null;
132
+ const normalized = argumentPrefix.trim().toLowerCase();
133
+ // No suggestions for an empty argument prefix so Tab after "/autoresearch " does not
134
+ // force-complete into off/clear; bare command submit toggles like /plan.
135
+ if (normalized.length === 0) return null;
157
136
  const completions: AutocompleteItem[] = [
158
137
  { label: "off", value: "off", description: "Leave autoresearch mode" },
159
138
  { label: "clear", value: "clear", description: "Delete autoresearch.jsonl and leave autoresearch mode" },
160
139
  ];
161
- const normalized = argumentPrefix.trim().toLowerCase();
162
140
  const filtered = completions.filter(item => item.label.startsWith(normalized));
163
141
  return filtered.length > 0 ? filtered : null;
164
142
  },
@@ -171,6 +149,15 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
171
149
  return;
172
150
  }
173
151
 
152
+ if (trimmed === "" && runtime.autoresearchMode) {
153
+ setMode(ctx, false, runtime.goal, "off");
154
+ dashboard.updateWidget(ctx, runtime);
155
+ const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
156
+ await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
157
+ ctx.ui.notify("Autoresearch mode disabled", "info");
158
+ return;
159
+ }
160
+
174
161
  if (trimmed === "off") {
175
162
  setMode(ctx, false, runtime.goal, "off");
176
163
  dashboard.updateWidget(ctx, runtime);
@@ -227,7 +214,7 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
227
214
  dashboard.updateWidget(ctx, runtime);
228
215
  await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
229
216
  api.sendUserMessage(
230
- renderPromptTemplate(commandResumeTemplate, {
217
+ prompt.render(commandResumeTemplate, {
231
218
  autoresearch_md_path: autoresearchMdPath,
232
219
  branch_status_line: branchResult.created
233
220
  ? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
@@ -239,57 +226,21 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
239
226
  return;
240
227
  }
241
228
 
242
- const setup = await promptForAutoresearchSetup(
243
- ctx,
244
- trimmed || runtime.goal || "what should autoresearch improve?",
245
- );
246
- if (!setup) return;
247
-
248
- const branchResult = await ensureAutoresearchBranch(api, workDir, setup.intent);
229
+ const branchGoal = trimmed.length > 0 ? trimmed : null;
230
+ const branchResult = await ensureAutoresearchBranch(api, workDir, branchGoal);
249
231
  if (!branchResult.ok) {
250
232
  ctx.ui.notify(branchResult.error, "error");
251
233
  return;
252
234
  }
253
235
 
254
- setMode(ctx, true, setup.intent, "on");
255
- runtime.state.name = setup.intent;
256
- runtime.state.metricName = setup.metricName;
257
- runtime.state.metricUnit = setup.metricUnit;
258
- runtime.state.bestDirection = setup.direction;
259
- runtime.state.secondaryMetrics = setup.secondaryMetrics.map(name => ({ name, unit: "" }));
260
- runtime.state.benchmarkCommand = setup.benchmarkCommand;
261
- runtime.state.scopePaths = [...setup.scopePaths];
262
- runtime.state.offLimits = [...setup.offLimits];
263
- runtime.state.constraints = [...setup.constraints];
236
+ setMode(ctx, true, branchGoal, "on");
264
237
  dashboard.updateWidget(ctx, runtime);
265
238
  await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
266
- api.sendUserMessage(
267
- renderPromptTemplate(commandInitializeTemplate, {
268
- branch_status_line: branchResult.created
269
- ? `Created and checked out dedicated git branch \`${branchResult.branchName}\`.`
270
- : `Using dedicated git branch \`${branchResult.branchName}\`.`,
271
- intent: setup.intent,
272
- benchmark_command: setup.benchmarkCommand,
273
- metric_name: setup.metricName,
274
- metric_unit: setup.metricUnit,
275
- direction: setup.direction,
276
- has_secondary_metrics: setup.secondaryMetrics.length > 0,
277
- secondary_metrics: setup.secondaryMetrics,
278
- secondary_metrics_block: formatBulletBlock(
279
- setup.secondaryMetrics,
280
- value => ` - \`${value}\``,
281
- " - `(none)`",
282
- ),
283
- scope_paths: setup.scopePaths,
284
- scope_paths_block: formatBulletBlock(setup.scopePaths, value => ` - \`${value}\``),
285
- has_off_limits: setup.offLimits.length > 0,
286
- off_limits: setup.offLimits,
287
- off_limits_block: formatBulletBlock(setup.offLimits, value => ` - \`${value}\``, " - `(none)`"),
288
- has_constraints: setup.constraints.length > 0,
289
- constraints: setup.constraints,
290
- constraints_block: formatBulletBlock(setup.constraints, value => ` - ${value}`, " - `(none)`"),
291
- }),
292
- );
239
+ if (trimmed.length > 0) {
240
+ api.sendUserMessage(trimmed);
241
+ } else {
242
+ ctx.ui.notify("Autoresearch enabled—describe what to optimize in your next message.", "info");
243
+ }
293
244
  },
294
245
  });
295
246
 
@@ -352,7 +303,7 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
352
303
  api.sendMessage(
353
304
  {
354
305
  customType: "autoresearch-resume",
355
- content: renderPromptTemplate(resumeMessageTemplate, {
306
+ content: prompt.render(resumeMessageTemplate, {
356
307
  autoresearch_md_path: autoresearchMdPath,
357
308
  has_ideas: fs.existsSync(ideasPath),
358
309
  has_pending_run: Boolean(pendingRun),
@@ -394,15 +345,16 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
394
345
  status: result.status,
395
346
  };
396
347
  });
348
+ const hasAutoresearchMd = fs.existsSync(autoresearchMdPath);
397
349
  return {
398
- systemPrompt: renderPromptTemplate(promptTemplate, {
350
+ systemPrompt: prompt.render(promptTemplate, {
399
351
  base_system_prompt: event.systemPrompt,
400
352
  has_goal: goal.trim().length > 0,
401
353
  goal,
354
+ has_autoresearch_md: hasAutoresearchMd,
402
355
  working_dir: workDir,
403
356
  default_metric_name: runtime.state.metricName,
404
357
  metric_name: runtime.state.metricName,
405
- has_autoresearch_md: fs.existsSync(autoresearchMdPath),
406
358
  autoresearch_md_path: autoresearchMdPath,
407
359
  has_checks: fs.existsSync(checksPath),
408
360
  checks_path: checksPath,
@@ -438,93 +390,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
438
390
  });
439
391
  };
440
392
 
441
- async function promptForAutoresearchSetup(
442
- ctx: ExtensionContext,
443
- defaultIntent: string,
444
- ): Promise<AutoresearchSetupInput | undefined> {
445
- const intentInput = await ctx.ui.input("Autoresearch Intent", defaultIntent);
446
- if (intentInput === undefined) return undefined;
447
- const intent = intentInput.trim();
448
- if (intent.length === 0) {
449
- ctx.ui.notify("Autoresearch intent is required", "info");
450
- return undefined;
451
- }
452
-
453
- const benchmarkCommandInput = await ctx.ui.input("Benchmark Command", "bash autoresearch.sh");
454
- if (benchmarkCommandInput === undefined) return undefined;
455
- const benchmarkCommand = benchmarkCommandInput.trim();
456
- if (benchmarkCommand.length === 0) {
457
- ctx.ui.notify("Benchmark command is required", "info");
458
- return undefined;
459
- }
460
- if (!isAutoresearchShCommand(benchmarkCommand)) {
461
- ctx.ui.notify("Benchmark command must invoke `autoresearch.sh` directly", "info");
462
- return undefined;
463
- }
464
-
465
- const metricNameInput = await ctx.ui.input("Primary Metric Name", "runtime_ms");
466
- if (metricNameInput === undefined) return undefined;
467
- const metricName = metricNameInput.trim();
468
- if (metricName.length === 0) {
469
- ctx.ui.notify("Primary metric name is required", "info");
470
- return undefined;
471
- }
472
-
473
- const metricUnitInput = await ctx.ui.input("Metric Unit", "ms");
474
- if (metricUnitInput === undefined) return undefined;
475
- const metricUnit = metricUnitInput.trim();
476
-
477
- const directionInput = await ctx.ui.input("Metric Direction", "lower");
478
- if (directionInput === undefined) return undefined;
479
- const normalizedDirection = directionInput.trim().toLowerCase();
480
- if (normalizedDirection !== "lower" && normalizedDirection !== "higher") {
481
- ctx.ui.notify("Metric direction must be `lower` or `higher`", "info");
482
- return undefined;
483
- }
484
-
485
- const secondaryMetricsInput = await ctx.ui.input("Tradeoff Metrics", "");
486
- if (secondaryMetricsInput === undefined) return undefined;
487
-
488
- const scopePathsInput = await ctx.ui.input("Files in Scope", "packages/coding-agent/src/autoresearch");
489
- if (scopePathsInput === undefined) return undefined;
490
- const scopePaths = splitSetupList(scopePathsInput);
491
- if (scopePaths.length === 0) {
492
- ctx.ui.notify("Files in Scope must include at least one path", "info");
493
- return undefined;
494
- }
495
-
496
- const offLimitsInput = await ctx.ui.input("Off Limits", "");
497
- if (offLimitsInput === undefined) return undefined;
498
- const constraintsInput = await ctx.ui.input("Constraints", "");
499
- if (constraintsInput === undefined) return undefined;
500
-
501
- return {
502
- intent,
503
- benchmarkCommand,
504
- metricName,
505
- metricUnit,
506
- direction: normalizedDirection,
507
- secondaryMetrics: splitSetupList(secondaryMetricsInput),
508
- scopePaths,
509
- offLimits: splitSetupList(offLimitsInput),
510
- constraints: splitSetupList(constraintsInput),
511
- };
512
- }
513
-
514
- function splitSetupList(value: string): string[] {
515
- return value
516
- .split(/\r?\n|,/)
517
- .map(entry => entry.trim())
518
- .filter((entry, index, values) => entry.length > 0 && values.indexOf(entry) === index);
519
- }
520
-
521
- function formatBulletBlock(values: string[], renderValue: (value: string) => string, emptyValue = ""): string {
522
- if (values.length === 0) {
523
- return emptyValue;
524
- }
525
- return values.map(renderValue).join("\n");
526
- }
527
-
528
393
  function hasLocalAutoresearchState(workDir: string): boolean {
529
394
  return fs.existsSync(path.join(workDir, "autoresearch.jsonl")) || fs.existsSync(path.join(workDir, ".autoresearch"));
530
395
  }
@@ -667,27 +532,3 @@ function canonicalizeTargetPath(targetPath: string): string {
667
532
  }
668
533
  return path.resolve(canonicalizeExistingPath(currentPath), ...pendingSegments);
669
534
  }
670
-
671
- function validateAutoresearchBashCommand(command: string): string | null {
672
- const trimmed = command.trim();
673
- if (trimmed.length === 0) {
674
- return null;
675
- }
676
- const mutationPatterns = [
677
- /(^|[;&|()]\s*)(?:bash|sh)\b/,
678
- /(^|[;&|()]\s*)(?:python|python3|node|perl|ruby|php)\b/,
679
- /(^|[;&|()]\s*)(?:mv|cp|rm|mkdir|touch|chmod|chown|ln|install|patch)\b/,
680
- /(^|[;&|()]\s*)sed\s+-i\b/,
681
- /(^|[;&|()]\s*)git\s+(?:add|apply|checkout|clean|commit|merge|rebase|reset|restore|revert|stash|switch|worktree)\b/,
682
- /(^|[^<])>>?/,
683
- /\|\s*tee\b/,
684
- /<<<?/,
685
- ];
686
- if (mutationPatterns.some(pattern => pattern.test(trimmed))) {
687
- return (
688
- "Autoresearch only allows read-only shell inspection. " +
689
- "Use write/edit/ast_edit for file changes and run_experiment for benchmark execution."
690
- );
691
- }
692
- return null;
693
- }
@@ -8,7 +8,11 @@ Autoresearch mode is active.
8
8
  Primary goal:
9
9
  {{goal}}
10
10
  {{else}}
11
+ {{#if has_autoresearch_md}}
11
12
  Primary goal is documented in `autoresearch.md` for this session.
13
+ {{else}}
14
+ There is no `autoresearch.md` yet. Infer what to optimize from the latest user message and the conversation; after you create `autoresearch.md`, keep it as the durable source of truth for goal and benchmark contract.
15
+ {{/if}}
12
16
  {{/if}}
13
17
 
14
18
  Working directory:
@@ -63,7 +67,7 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
63
67
 
64
68
  - `init_experiment` — initialize or reset the experiment session for the current optimization target.
65
69
  - `run_experiment` — run a benchmark or experiment command with timing, output capture, structured metric parsing, and optional backpressure checks.
66
- - `log_experiment` — record the result, update the dashboard, persist JSONL history, auto-commit kept experiments, and auto-revert discarded or failed experiments.
70
+ - `log_experiment` — record the result, update the dashboard, persist JSONL history, auto-commit kept experiments, and revert only run-modified files for discarded or failed experiments (pre-existing uncommitted changes are preserved).
67
71
 
68
72
  ### Operating protocol
69
73
 
@@ -83,6 +87,8 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
83
87
  - Use the same workload every run unless you intentionally re-initialize with a new segment.
84
88
  - Keep the measurement harness, evaluator, and fixed benchmark inputs stable unless you intentionally start a new segment and document the change.
85
89
  4. Initialize the loop with `init_experiment` before the first logged run of a segment.
90
+ - Pass `from_autoresearch_md: true` with only `name` to load the benchmark contract from `autoresearch.md` without mirroring every field in the tool call.
91
+ - Use `abandon_unlogged_runs: true` only when you intentionally discard unlogged run artifacts and need a fresh segment (for example after a bad or obsolete benchmark directory).
86
92
  5. Run a baseline first.
87
93
  - Establish the baseline metric before attempting optimizations.
88
94
  - Track secondary metrics only when they matter to correctness, quality, or obvious regressions.
@@ -90,7 +96,9 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
90
96
  - Make one coherent experiment at a time.
91
97
  - Run `run_experiment`.
92
98
  - Interpret the result honestly.
93
- - Call `log_experiment` after every run.
99
+ - Call `log_experiment` after every run (it refreshes benchmark/scope fields from `autoresearch.md` before logging so keep validation matches the file on disk).
100
+ - Use `run_experiment` with `force: true` only when you must override the segment benchmark command or skip the direct-`autoresearch.sh` rule.
101
+ - On `log_experiment`, `force: true` relaxes ASI requirements and allows keeping a primary-metric regression; prefer normal logging when possible.
94
102
  7. Keep the primary metric as the decision maker.
95
103
  - `keep` when the primary metric improves.
96
104
  - `discard` when it regresses or stays flat.
@@ -137,7 +145,11 @@ Suggested structure:
137
145
  {{#if has_goal}}
138
146
  - {{goal}}
139
147
  {{else}}
148
+ {{#if has_autoresearch_md}}
140
149
  - document the active target here before the first benchmark
150
+ {{else}}
151
+ - (derive from the user's messages, then record here)
152
+ {{/if}}
141
153
  {{/if}}
142
154
 
143
155
  ## Benchmark
@@ -194,15 +206,20 @@ Resume from the existing notes:
194
206
  {{else}}
195
207
  ### Initial setup
196
208
 
197
- `autoresearch.md` does not exist yet.
209
+ `autoresearch.md` does not exist yet. You decide the benchmark contract, harness, and scope from the user's messages and the repository—do not ask the user to re-type benchmark commands or metric names in a separate UI prompt.
210
+
211
+ Before the first benchmark:
198
212
 
199
- Create the experiment workspace before the first benchmark:
213
+ - Write `autoresearch.md` with goal, benchmark command (must be a **direct** invocation of `autoresearch.sh`, e.g. `bash autoresearch.sh`), primary metric name and unit, direction (`lower` or `higher`), tradeoff metrics if relevant, files in scope, off limits, and constraints.
214
+ - Add a short preflight section: prerequisites, one-time setup, and the comparability invariant that must stay fixed across runs.
215
+ - Mark ground-truth evaluators, fixed datasets, and other measurement-critical files as off limits or hard constraints when they define the benchmark contract.
216
+ - Write or update `autoresearch.program.md` when you learn durable heuristics, failure patterns, or repo-specific strategy for later resume turns.
217
+ - Create `autoresearch.sh` as the canonical benchmark entrypoint; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
218
+ - Optionally add `autoresearch.checks.sh` if correctness or quality needs a hard gate.
219
+ - Call `init_experiment` with arguments that match `autoresearch.md` exactly (benchmark command, metric, unit, direction, scope paths, off limits, constraints).
220
+ - Run and log the baseline.
200
221
 
201
- - write `autoresearch.md`
202
- - write `autoresearch.sh`
203
- - optionally write `autoresearch.checks.sh`
204
- - run `init_experiment`
205
- - run and log the baseline
222
+ Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.program.md`, `autoresearch.ideas.md`, `autoresearch.checks.sh`) may be edited; after initialization, respect Files in Scope from the contract.
206
223
 
207
224
  {{/if}}
208
225
  {{#if has_checks}}
@@ -34,7 +34,6 @@ export function createExperimentState(): ExperimentState {
34
34
  scopePaths: [],
35
35
  offLimits: [],
36
36
  constraints: [],
37
- segmentFingerprint: null,
38
37
  };
39
38
  }
40
39
 
@@ -203,8 +202,6 @@ export function reconstructStateFromJsonl(workDir: string): ReconstructedExperim
203
202
  state.scopePaths = cloneStringArray(configEntry.scopePaths);
204
203
  state.offLimits = cloneStringArray(configEntry.offLimits);
205
204
  state.constraints = cloneStringArray(configEntry.constraints);
206
- state.segmentFingerprint =
207
- typeof configEntry.segmentFingerprint === "string" ? configEntry.segmentFingerprint : null;
208
205
  state.secondaryMetrics = hydrateMetricDefs(configEntry.secondaryMetrics);
209
206
  continue;
210
207
  }
@@ -322,9 +319,6 @@ function parseConfigEntry(value: unknown): AutoresearchJsonConfigEntry | null {
322
319
  candidate.constraints.filter((item): item is string => typeof item === "string"),
323
320
  );
324
321
  }
325
- if (typeof candidate.segmentFingerprint === "string" && candidate.segmentFingerprint.trim().length > 0) {
326
- config.segmentFingerprint = candidate.segmentFingerprint;
327
- }
328
322
  return config;
329
323
  }
330
324