@oh-my-pi/pi-coding-agent 14.5.14 → 14.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/CHANGELOG.md +39 -0
  2. package/package.json +7 -7
  3. package/src/autoresearch/command-resume.md +5 -8
  4. package/src/autoresearch/git.ts +41 -51
  5. package/src/autoresearch/helpers.ts +43 -359
  6. package/src/autoresearch/index.ts +281 -273
  7. package/src/autoresearch/prompt-setup.md +43 -0
  8. package/src/autoresearch/prompt.md +52 -193
  9. package/src/autoresearch/resume-message.md +2 -8
  10. package/src/autoresearch/state.ts +59 -166
  11. package/src/autoresearch/storage.ts +687 -0
  12. package/src/autoresearch/tools/init-experiment.ts +201 -290
  13. package/src/autoresearch/tools/log-experiment.ts +304 -517
  14. package/src/autoresearch/tools/run-experiment.ts +117 -296
  15. package/src/autoresearch/tools/update-notes.ts +116 -0
  16. package/src/autoresearch/types.ts +16 -66
  17. package/src/config/settings-schema.ts +1 -1
  18. package/src/config/settings.ts +20 -1
  19. package/src/cursor.ts +1 -1
  20. package/src/edit/index.ts +9 -31
  21. package/src/edit/line-hash.ts +70 -43
  22. package/src/edit/modes/hashline.lark +26 -0
  23. package/src/edit/modes/hashline.ts +898 -1099
  24. package/src/edit/modes/patch.ts +0 -7
  25. package/src/edit/modes/replace.ts +0 -4
  26. package/src/edit/renderer.ts +22 -20
  27. package/src/edit/streaming.ts +8 -28
  28. package/src/eval/eval.lark +24 -30
  29. package/src/eval/js/context-manager.ts +5 -162
  30. package/src/eval/js/prelude.txt +0 -12
  31. package/src/eval/parse.ts +129 -129
  32. package/src/eval/py/prelude.py +1 -219
  33. package/src/export/html/template.generated.ts +1 -1
  34. package/src/export/html/template.js +2 -2
  35. package/src/internal-urls/docs-index.generated.ts +1 -1
  36. package/src/modes/components/session-observer-overlay.ts +5 -2
  37. package/src/modes/components/status-line/segments.ts +1 -1
  38. package/src/modes/components/status-line.ts +3 -5
  39. package/src/modes/components/tree-selector.ts +4 -5
  40. package/src/modes/components/welcome.ts +11 -1
  41. package/src/modes/controllers/command-controller.ts +2 -6
  42. package/src/modes/controllers/event-controller.ts +1 -2
  43. package/src/modes/controllers/extension-ui-controller.ts +3 -15
  44. package/src/modes/controllers/input-controller.ts +0 -1
  45. package/src/modes/controllers/selector-controller.ts +1 -1
  46. package/src/modes/interactive-mode.ts +5 -7
  47. package/src/prompts/system/system-prompt.md +14 -38
  48. package/src/prompts/tools/ast-edit.md +8 -8
  49. package/src/prompts/tools/ast-grep.md +10 -10
  50. package/src/prompts/tools/eval.md +13 -31
  51. package/src/prompts/tools/find.md +2 -1
  52. package/src/prompts/tools/hashline.md +66 -57
  53. package/src/prompts/tools/search.md +2 -2
  54. package/src/session/session-manager.ts +17 -13
  55. package/src/tools/ast-edit.ts +141 -44
  56. package/src/tools/ast-grep.ts +112 -36
  57. package/src/tools/eval.ts +2 -53
  58. package/src/tools/find.ts +16 -15
  59. package/src/tools/path-utils.ts +36 -196
  60. package/src/tools/search.ts +56 -35
  61. package/src/utils/edit-mode.ts +2 -11
  62. package/src/utils/file-display-mode.ts +1 -1
  63. package/src/utils/git.ts +17 -0
  64. package/src/utils/session-color.ts +0 -12
  65. package/src/utils/title-generator.ts +22 -38
  66. package/src/autoresearch/apply-contract-to-state.ts +0 -24
  67. package/src/autoresearch/contract.ts +0 -288
  68. package/src/edit/modes/atom.lark +0 -29
  69. package/src/edit/modes/atom.ts +0 -1773
  70. package/src/prompts/tools/atom.md +0 -150
@@ -7,48 +7,25 @@ import { Type } from "@sinclair/typebox";
7
7
  import type { ToolDefinition } from "../../extensibility/extensions";
8
8
  import type { Theme } from "../../modes/theme/theme";
9
9
  import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, truncateTail } from "../../session/streaming-output";
10
- import { replaceTabs, shortenPath, truncateToWidth } from "../../tools/render-utils";
10
+ import { replaceTabs, shortenPath } from "../../tools/render-utils";
11
11
  import * as git from "../../utils/git";
12
12
  import { parseWorkDirDirtyPaths } from "../git";
13
13
  import {
14
- collectLoggedRunNumbers,
15
14
  EXPERIMENT_MAX_BYTES,
16
15
  EXPERIMENT_MAX_LINES,
17
16
  formatElapsed,
18
17
  formatNum,
19
- getAutoresearchRunDirectory,
20
- getNextAutoresearchRunNumber,
21
- isAutoresearchLocalStatePath,
22
- isAutoresearchShCommand,
23
18
  killTree,
24
19
  parseAsiLines,
25
20
  parseMetricLines,
26
- readPendingRunSummary,
27
- resolveWorkDir,
28
- validateWorkDir,
29
21
  } from "../helpers";
22
+ import { buildExperimentState } from "../state";
23
+ import { openAutoresearchStorageIfExists } from "../storage";
30
24
  import type { AutoresearchToolFactoryOptions, RunDetails, RunExperimentProgressDetails } from "../types";
25
+ import { DEFAULT_HARNESS_COMMAND } from "./init-experiment";
31
26
 
32
27
  const runExperimentSchema = Type.Object({
33
- command: Type.String({
34
- description: "Shell command to run for this experiment.",
35
- }),
36
- timeout_seconds: Type.Optional(
37
- Type.Number({
38
- description: "Timeout in seconds. Defaults to 600.",
39
- }),
40
- ),
41
- checks_timeout_seconds: Type.Optional(
42
- Type.Number({
43
- description: "Timeout in seconds for autoresearch.checks.sh. Defaults to 300.",
44
- }),
45
- ),
46
- force: Type.Optional(
47
- Type.Boolean({
48
- description:
49
- "When true, allow a command that differs from the segment benchmark command and skip the rule that autoresearch.sh must be invoked directly when that script exists.",
50
- }),
51
- ),
28
+ timeout_seconds: Type.Optional(Type.Number({ description: "Timeout in seconds. Defaults to 600." })),
52
29
  });
53
30
 
54
31
  interface ProcessExecutionResult {
@@ -58,13 +35,6 @@ interface ProcessExecutionResult {
58
35
  output: string;
59
36
  }
60
37
 
61
- interface ChecksExecutionResult {
62
- code: number | null;
63
- killed: boolean;
64
- logPath: string;
65
- output: string;
66
- }
67
-
68
38
  interface ProgressSnapshot {
69
39
  elapsed: string;
70
40
  runDirectory: string;
@@ -80,136 +50,73 @@ export function createRunExperimentTool(
80
50
  name: "run_experiment",
81
51
  label: "Run Experiment",
82
52
  description:
83
- "Run an experiment command with timing, output capture, structured metric parsing, durable run artifacts, and optional autoresearch.checks.sh validation.",
53
+ "Run any benchmark command. Output is captured automatically; `METRIC name=value` and `ASI key=value` lines printed by the command are parsed.",
84
54
  parameters: runExperimentSchema,
85
55
  defaultInactive: true,
86
56
  async execute(_toolCallId, params, signal, onUpdate, ctx) {
87
- const workDirError = validateWorkDir(ctx.cwd);
88
- if (workDirError) {
89
- return {
90
- content: [{ type: "text", text: `Error: ${workDirError}` }],
91
- };
92
- }
93
-
94
- const runtime = options.getRuntime(ctx);
95
- const state = runtime.state;
96
- const workDir = resolveWorkDir(ctx.cwd);
97
- const checksPath = path.join(workDir, "autoresearch.checks.sh");
98
- const autoresearchScriptPath = path.join(workDir, "autoresearch.sh");
99
-
100
- const forceCommand = params.force === true;
101
- if (!forceCommand && state.benchmarkCommand && params.command.trim() !== state.benchmarkCommand) {
57
+ const storage = await openAutoresearchStorageIfExists(ctx.cwd);
58
+ const currentBranch = (await git.branch.current(ctx.cwd)) ?? null;
59
+ const session = storage?.getActiveSessionForBranch(currentBranch) ?? null;
60
+ if (!storage || !session) {
102
61
  return {
103
62
  content: [
104
63
  {
105
64
  type: "text",
106
- text:
107
- "Error: command does not match the benchmark command recorded for this segment.\n" +
108
- `Expected: ${state.benchmarkCommand}\nReceived: ${params.command}`,
65
+ text: "Error: no active autoresearch session for the current branch. Call init_experiment first.",
109
66
  },
110
67
  ],
111
68
  };
112
69
  }
113
70
 
114
- if (!forceCommand && fs.existsSync(autoresearchScriptPath) && !isAutoresearchShCommand(params.command)) {
115
- return {
116
- content: [
117
- {
118
- type: "text",
119
- text:
120
- `Error: autoresearch.sh exists. Run it directly instead of using a different command.\n` +
121
- `Expected something like: bash autoresearch.sh\n` +
122
- `Received: ${params.command}`,
123
- },
124
- ],
125
- };
126
- }
71
+ const runtime = options.getRuntime(ctx);
127
72
 
128
- if (state.maxExperiments !== null) {
129
- const segmentRuns = state.results.filter(result => result.segment === state.currentSegment).length;
130
- if (segmentRuns >= state.maxExperiments) {
131
- return {
132
- content: [
133
- {
134
- type: "text",
135
- text: `Maximum experiments reached (${state.maxExperiments}). Re-initialize to start a new segment.`,
136
- },
137
- ],
138
- };
139
- }
140
- }
73
+ const abandonedPriorRun = (() => {
74
+ const pending = storage.getPendingRun(session.id);
75
+ if (!pending) return null;
76
+ storage.abandonPendingRuns(session.id);
77
+ return pending.id;
78
+ })();
141
79
 
142
- const pendingRun =
143
- runtime.lastRunSummary ?? (await readPendingRunSummary(workDir, collectLoggedRunNumbers(state.results)));
144
- if (pendingRun) {
145
- return {
146
- content: [
147
- {
148
- type: "text",
149
- text:
150
- `Error: run #${pendingRun.runNumber} has not been logged yet. ` +
151
- "Call log_experiment before starting another benchmark run.",
152
- },
153
- ],
154
- };
155
- }
80
+ const resolvedCommand = DEFAULT_HARNESS_COMMAND;
81
+ const preRunStatus = await tryGitStatus(ctx.cwd);
82
+ const workDirPrefix = await tryGitPrefix(ctx.cwd);
83
+ const preRunDirtyPaths = parseWorkDirDirtyPaths(preRunStatus, workDirPrefix);
156
84
 
157
- const runNumber = getNextAutoresearchRunNumber(workDir, runtime.lastRunNumber);
158
- const runDirectory = getAutoresearchRunDirectory(workDir, runNumber);
159
- const benchmarkLogPath = path.join(runDirectory, "benchmark.log");
160
- const checksLogPath = path.join(runDirectory, "checks.log");
161
- const runJsonPath = path.join(runDirectory, "run.json");
162
- await fs.promises.mkdir(runDirectory, { recursive: true });
163
-
164
- const preRunStatus = await git.status(workDir, {
165
- porcelainV1: true,
166
- untrackedFiles: "all",
167
- z: true,
85
+ const startedAt = Date.now();
86
+ const insertedRun = storage.insertRun({
87
+ sessionId: session.id,
88
+ segment: session.currentSegment,
89
+ command: resolvedCommand,
90
+ logPath: "", // patched after we know the run id
91
+ preRunDirtyPaths,
92
+ startedAt,
168
93
  });
169
- const workDirPrefix = await git.show.prefix(workDir);
170
- const preRunDirtyPaths = parseWorkDirDirtyPaths(preRunStatus, workDirPrefix).filter(
171
- p => !isAutoresearchLocalStatePath(p),
172
- );
173
94
 
174
- runtime.lastRunChecks = null;
95
+ const runDirectory = path.join(storage.projectDir, "runs", String(insertedRun.id).padStart(4, "0"));
96
+ const benchmarkLogPath = path.join(runDirectory, "benchmark.log");
97
+ fs.mkdirSync(runDirectory, { recursive: true });
98
+ storage.updateRunLogPath(insertedRun.id, benchmarkLogPath);
99
+
175
100
  runtime.lastRunDuration = null;
176
101
  runtime.lastRunAsi = null;
177
102
  runtime.lastRunArtifactDir = runDirectory;
178
- runtime.lastRunNumber = runNumber;
103
+ runtime.lastRunNumber = insertedRun.id;
179
104
  runtime.lastRunSummary = null;
180
- await Bun.write(
181
- runJsonPath,
182
- JSON.stringify(
183
- {
184
- runNumber,
185
- runDirectory,
186
- benchmarkLogPath,
187
- checksLogPath,
188
- command: params.command,
189
- preRunDirtyPaths,
190
- startedAt: new Date().toISOString(),
191
- },
192
- null,
193
- 2,
194
- ),
195
- );
196
-
197
105
  runtime.runningExperiment = {
198
- startedAt: Date.now(),
199
- command: params.command,
106
+ startedAt,
107
+ command: resolvedCommand,
200
108
  runDirectory,
201
- runNumber,
109
+ runNumber: insertedRun.id,
202
110
  };
203
111
  options.dashboard.updateWidget(ctx, runtime);
204
112
  options.dashboard.requestRender();
205
113
 
206
114
  const timeoutMs = Math.max(0, Math.floor((params.timeout_seconds ?? 600) * 1000));
207
- const startedAt = Date.now();
208
115
  let execution: ProcessExecutionResult;
209
116
  try {
210
117
  execution = await executeProcess({
211
- command: ["bash", "-lc", params.command],
212
- cwd: workDir,
118
+ command: ["bash", "-lc", resolvedCommand],
119
+ cwd: ctx.cwd,
213
120
  logPath: benchmarkLogPath,
214
121
  timeoutMs,
215
122
  signal,
@@ -232,41 +139,11 @@ export function createRunExperimentTool(
232
139
  options.dashboard.requestRender();
233
140
  }
234
141
 
235
- const durationSeconds = (Date.now() - startedAt) / 1000;
142
+ const completedAt = Date.now();
143
+ const durationMs = completedAt - startedAt;
144
+ const durationSeconds = durationMs / 1000;
236
145
  runtime.lastRunDuration = durationSeconds;
237
146
 
238
- const benchmarkPassed = execution.exitCode === 0 && !execution.killed;
239
- let checksPass: boolean | null = null;
240
- let checksTimedOut = false;
241
- let checksOutput = "";
242
- let checksDuration = 0;
243
- let checksLogPathValue: string | undefined;
244
-
245
- if (benchmarkPassed && fs.existsSync(checksPath)) {
246
- const checksStartedAt = Date.now();
247
- const checksResult = await runChecks({
248
- cwd: workDir,
249
- pathToChecks: checksPath,
250
- logPath: checksLogPath,
251
- timeoutMs: Math.max(0, Math.floor((params.checks_timeout_seconds ?? 300) * 1000)),
252
- signal,
253
- });
254
- checksDuration = (Date.now() - checksStartedAt) / 1000;
255
- checksTimedOut = checksResult.killed;
256
- checksPass = checksResult.code === 0 && !checksResult.killed;
257
- checksOutput = checksResult.output;
258
- checksLogPathValue = checksResult.logPath;
259
- }
260
-
261
- runtime.lastRunChecks =
262
- checksPass === null
263
- ? null
264
- : {
265
- pass: checksPass,
266
- output: checksOutput,
267
- duration: checksDuration,
268
- };
269
-
270
147
  const llmTruncation = truncateTail(execution.output, {
271
148
  maxBytes: EXPERIMENT_MAX_BYTES,
272
149
  maxLines: EXPERIMENT_MAX_LINES,
@@ -278,113 +155,87 @@ export function createRunExperimentTool(
278
155
 
279
156
  const parsedMetricsMap = parseMetricLines(execution.output);
280
157
  const parsedMetrics = parsedMetricsMap.size > 0 ? Object.fromEntries(parsedMetricsMap.entries()) : null;
281
- const parsedPrimary = parsedMetricsMap.get(state.metricName) ?? null;
158
+ const parsedPrimary = parsedMetricsMap.get(session.primaryMetric) ?? null;
282
159
  const parsedAsi = parseAsiLines(execution.output);
283
160
  runtime.lastRunAsi = parsedAsi;
284
161
 
162
+ storage.markRunCompleted({
163
+ runId: insertedRun.id,
164
+ completedAt,
165
+ durationMs,
166
+ exitCode: execution.exitCode,
167
+ timedOut: execution.killed,
168
+ parsedPrimary,
169
+ parsedMetrics,
170
+ parsedAsi,
171
+ });
172
+
173
+ const passed = execution.exitCode === 0 && !execution.killed;
285
174
  const resultDetails: RunDetails = {
286
- runNumber,
175
+ runNumber: insertedRun.id,
287
176
  runDirectory,
288
177
  benchmarkLogPath,
289
- checksLogPath: checksLogPathValue,
290
- command: params.command,
178
+ command: resolvedCommand,
291
179
  exitCode: execution.exitCode,
292
180
  durationSeconds,
293
- passed: benchmarkPassed && (checksPass === null || checksPass),
294
- crashed: execution.exitCode !== 0 || execution.killed || checksPass === false,
181
+ passed,
182
+ crashed: execution.exitCode !== 0 || execution.killed,
295
183
  timedOut: execution.killed,
296
184
  tailOutput: displayTruncation.content,
297
- checksPass,
298
- checksTimedOut,
299
- checksOutput: checksOutput.split("\n").slice(-80).join("\n"),
300
- checksDuration,
301
185
  parsedMetrics,
302
186
  parsedPrimary,
303
187
  parsedAsi,
304
- metricName: state.metricName,
305
- metricUnit: state.metricUnit,
188
+ metricName: session.primaryMetric,
189
+ metricUnit: session.metricUnit,
306
190
  preRunDirtyPaths,
191
+ abandonedPriorRun,
307
192
  truncation: llmTruncation.truncated ? llmTruncation : undefined,
308
193
  fullOutputPath: execution.logPath,
309
194
  };
195
+
310
196
  runtime.lastRunSummary = {
311
- checksDurationSeconds: checksDuration,
312
- checksPass,
313
- checksTimedOut,
314
- command: params.command,
197
+ command: resolvedCommand,
315
198
  durationSeconds,
316
199
  parsedAsi,
317
200
  parsedMetrics,
318
201
  parsedPrimary,
319
- passed: resultDetails.passed,
202
+ passed,
320
203
  preRunDirtyPaths,
321
204
  runDirectory,
322
- runNumber,
205
+ runNumber: insertedRun.id,
206
+ exitCode: execution.exitCode,
207
+ timedOut: execution.killed,
323
208
  };
324
209
  runtime.autoResumeArmed = true;
325
210
  runtime.lastAutoResumePendingRunNumber = null;
211
+
212
+ // Refresh state to reflect any prior abandonment changes (logged set unchanged).
213
+ const refreshedSession = storage.getSessionById(session.id);
214
+ if (refreshedSession) {
215
+ runtime.state = buildExperimentState(refreshedSession, storage.listLoggedRuns(session.id));
216
+ }
326
217
  options.dashboard.updateWidget(ctx, runtime);
327
218
  options.dashboard.requestRender();
328
219
 
329
- await Bun.write(
330
- runJsonPath,
331
- JSON.stringify(
332
- {
333
- runNumber,
334
- runDirectory,
335
- benchmarkLogPath,
336
- checksLogPath: checksLogPathValue,
337
- command: params.command,
338
- completedAt: new Date().toISOString(),
339
- durationSeconds,
340
- exitCode: execution.exitCode,
341
- timedOut: execution.killed,
342
- checks: {
343
- durationSeconds: checksDuration,
344
- passed: checksPass,
345
- timedOut: checksTimedOut,
346
- },
347
- parsedMetrics,
348
- parsedPrimary,
349
- parsedAsi,
350
- preRunDirtyPaths,
351
- truncation: resultDetails.truncation,
352
- fullOutputPath: resultDetails.fullOutputPath,
353
- },
354
- null,
355
- 2,
356
- ),
357
- );
358
-
359
- const commandWarnings: string[] = [];
360
- if (forceCommand) {
361
- if (state.benchmarkCommand && params.command.trim() !== state.benchmarkCommand) {
362
- commandWarnings.push(
363
- `Warning: command override (force=true). Segment benchmark is ${state.benchmarkCommand}; ran ${params.command}.`,
364
- );
365
- }
366
- if (fs.existsSync(autoresearchScriptPath) && !isAutoresearchShCommand(params.command)) {
367
- commandWarnings.push(
368
- "Warning: autoresearch.sh exists but the command was not a direct autoresearch.sh invocation (force=true).",
369
- );
370
- }
220
+ const headerLines: string[] = [];
221
+ if (abandonedPriorRun !== null) {
222
+ headerLines.push(`Note: abandoned prior pending run #${abandonedPriorRun} before starting this run.`);
371
223
  }
372
- const warningPrefix = commandWarnings.length > 0 ? `${commandWarnings.join("\n")}\n\n` : "";
224
+ const warningPrefix = headerLines.length > 0 ? `${headerLines.join("\n")}\n\n` : "";
373
225
 
374
226
  return {
375
227
  content: [
376
228
  {
377
229
  type: "text",
378
- text: warningPrefix + buildRunText(resultDetails, llmTruncation.content, state.bestMetric),
230
+ text: warningPrefix + buildRunText(resultDetails, llmTruncation.content, runtime.state.bestMetric),
379
231
  },
380
232
  ],
381
233
  details: resultDetails,
382
234
  };
383
235
  },
384
- renderCall(args, _options, theme): Text {
385
- const commandPreview = truncateToWidth(replaceTabs(args.command), 100);
236
+ renderCall(_args, _options, theme): Text {
386
237
  return new Text(
387
- `${theme.fg("toolTitle", theme.bold("run_experiment"))} ${theme.fg("muted", commandPreview)}`,
238
+ `${theme.fg("toolTitle", theme.bold("run_experiment"))} ${theme.fg("muted", DEFAULT_HARNESS_COMMAND)}`,
388
239
  0,
389
240
  0,
390
241
  );
@@ -395,17 +246,14 @@ export function createRunExperimentTool(
395
246
  const preview = replaceTabs(result.content.find(part => part.type === "text")?.text ?? "");
396
247
  return new Text(preview ? `${header}\n${theme.fg("dim", preview)}` : header, 0, 0);
397
248
  }
398
-
399
249
  const details = result.details;
400
250
  if (!details || !isRunDetails(details)) {
401
251
  return new Text(replaceTabs(result.content.find(part => part.type === "text")?.text ?? ""), 0, 0);
402
252
  }
403
-
404
253
  const statusText = renderStatus(details, theme);
405
254
  if (!options.expanded && details.tailOutput.trim().length === 0) {
406
255
  return new Text(statusText, 0, 0);
407
256
  }
408
-
409
257
  const preview = replaceTabs(
410
258
  options.expanded ? details.tailOutput : details.tailOutput.split("\n").slice(-5).join("\n"),
411
259
  );
@@ -418,7 +266,23 @@ export function createRunExperimentTool(
418
266
  };
419
267
  }
420
268
 
421
- async function executeProcess(options: {
269
+ async function tryGitStatus(cwd: string): Promise<string> {
270
+ try {
271
+ return await git.status(cwd, { porcelainV1: true, untrackedFiles: "all", z: true });
272
+ } catch {
273
+ return "";
274
+ }
275
+ }
276
+
277
+ async function tryGitPrefix(cwd: string): Promise<string> {
278
+ try {
279
+ return await git.show.prefix(cwd);
280
+ } catch {
281
+ return "";
282
+ }
283
+ }
284
+
285
+ async function executeProcess(opts: {
422
286
  command: string[];
423
287
  cwd: string;
424
288
  logPath: string;
@@ -427,8 +291,8 @@ async function executeProcess(options: {
427
291
  onProgress?(details: ProgressSnapshot): void;
428
292
  }): Promise<ProcessExecutionResult> {
429
293
  const { promise, resolve, reject } = Promise.withResolvers<ProcessExecutionResult>();
430
- const child = childProcess.spawn(options.command[0] ?? "bash", options.command.slice(1), {
431
- cwd: options.cwd,
294
+ const child = childProcess.spawn(opts.command[0] ?? "bash", opts.command.slice(1), {
295
+ cwd: opts.cwd,
432
296
  detached: true,
433
297
  stdio: ["ignore", "pipe", "pipe"],
434
298
  });
@@ -437,7 +301,7 @@ async function executeProcess(options: {
437
301
  let chunksBytes = 0;
438
302
  let killedByTimeout = false;
439
303
  let resolved = false;
440
- let writeStream: fs.WriteStream | undefined = fs.createWriteStream(options.logPath);
304
+ let writeStream: fs.WriteStream | undefined = fs.createWriteStream(opts.logPath);
441
305
  let forceKillTimeout: NodeJS.Timeout | undefined;
442
306
 
443
307
  const closeWriteStream = (): Promise<void> => {
@@ -459,7 +323,7 @@ async function executeProcess(options: {
459
323
  if (progressTimer) clearInterval(progressTimer);
460
324
  if (timeoutHandle) clearTimeout(timeoutHandle);
461
325
  if (forceKillTimeout) clearTimeout(forceKillTimeout);
462
- options.signal?.removeEventListener("abort", abortHandler);
326
+ opts.signal?.removeEventListener("abort", abortHandler);
463
327
  };
464
328
 
465
329
  const finish = (callback: () => void): void => {
@@ -486,8 +350,8 @@ async function executeProcess(options: {
486
350
  });
487
351
  return {
488
352
  elapsed: formatElapsed(Date.now() - startedAt),
489
- runDirectory: path.dirname(options.logPath),
490
- fullOutputPath: options.logPath,
353
+ runDirectory: path.dirname(opts.logPath),
354
+ fullOutputPath: opts.logPath,
491
355
  tailOutput: tail.content,
492
356
  truncation: tail.truncated ? tail : undefined,
493
357
  };
@@ -503,26 +367,26 @@ async function executeProcess(options: {
503
367
  };
504
368
 
505
369
  const startedAt = Date.now();
506
- const progressTimer = options.onProgress
370
+ const progressTimer = opts.onProgress
507
371
  ? setInterval(() => {
508
- options.onProgress?.(snapshot());
372
+ opts.onProgress?.(snapshot());
509
373
  }, 1000)
510
374
  : undefined;
511
375
  const timeoutHandle =
512
- options.timeoutMs > 0
376
+ opts.timeoutMs > 0
513
377
  ? setTimeout(() => {
514
378
  killedByTimeout = true;
515
379
  killTreeWithEscalation();
516
- }, options.timeoutMs)
380
+ }, opts.timeoutMs)
517
381
  : undefined;
518
382
 
519
383
  const abortHandler = (): void => {
520
384
  killTreeWithEscalation();
521
385
  };
522
- if (options.signal?.aborted) {
386
+ if (opts.signal?.aborted) {
523
387
  abortHandler();
524
388
  } else {
525
- options.signal?.addEventListener("abort", abortHandler, { once: true });
389
+ opts.signal?.addEventListener("abort", abortHandler, { once: true });
526
390
  }
527
391
 
528
392
  child.stdout?.on("data", data => {
@@ -539,16 +403,16 @@ async function executeProcess(options: {
539
403
  child.on("close", async code => {
540
404
  try {
541
405
  await closeWriteStream();
542
- if (options.signal?.aborted) {
406
+ if (opts.signal?.aborted) {
543
407
  finish(() => reject(new Error("aborted")));
544
408
  return;
545
409
  }
546
- const output = await fs.promises.readFile(options.logPath, "utf8");
410
+ const output = await fs.promises.readFile(opts.logPath, "utf8");
547
411
  finish(() =>
548
412
  resolve({
549
413
  exitCode: code,
550
414
  killed: killedByTimeout,
551
- logPath: options.logPath,
415
+ logPath: opts.logPath,
552
416
  output,
553
417
  }),
554
418
  );
@@ -560,31 +424,9 @@ async function executeProcess(options: {
560
424
  return promise;
561
425
  }
562
426
 
563
- async function runChecks(options: {
564
- cwd: string;
565
- pathToChecks: string;
566
- logPath: string;
567
- timeoutMs: number;
568
- signal?: AbortSignal;
569
- }): Promise<ChecksExecutionResult> {
570
- const result = await executeProcess({
571
- command: ["bash", options.pathToChecks],
572
- cwd: options.cwd,
573
- logPath: options.logPath,
574
- timeoutMs: options.timeoutMs,
575
- signal: options.signal,
576
- });
577
- return {
578
- code: result.exitCode,
579
- killed: result.killed,
580
- logPath: result.logPath,
581
- output: result.output.trim(),
582
- };
583
- }
584
-
585
427
  function buildRunText(details: RunDetails, outputPreview: string, bestMetric: number | null): string {
586
428
  const lines: string[] = [];
587
- lines.push(`Run directory: ${details.runDirectory}`);
429
+ lines.push(`Run #${details.runNumber} directory: ${details.runDirectory}`);
588
430
  if (details.timedOut) {
589
431
  lines.push(`TIMEOUT after ${details.durationSeconds.toFixed(1)}s`);
590
432
  } else if (details.exitCode !== 0) {
@@ -592,13 +434,6 @@ function buildRunText(details: RunDetails, outputPreview: string, bestMetric: nu
592
434
  } else {
593
435
  lines.push(`PASSED in ${details.durationSeconds.toFixed(1)}s`);
594
436
  }
595
- if (details.checksTimedOut) {
596
- lines.push(`Checks timed out after ${details.checksDuration.toFixed(1)}s`);
597
- } else if (details.checksPass === false) {
598
- lines.push(`Checks failed in ${details.checksDuration.toFixed(1)}s`);
599
- } else if (details.checksPass === true) {
600
- lines.push(`Checks passed in ${details.checksDuration.toFixed(1)}s`);
601
- }
602
437
  if (bestMetric !== null) {
603
438
  lines.push(`Current baseline ${details.metricName}: ${formatNum(bestMetric, details.metricUnit)}`);
604
439
  }
@@ -627,14 +462,6 @@ function buildRunText(details: RunDetails, outputPreview: string, bestMetric: nu
627
462
  `Output truncated (${formatBytes(EXPERIMENT_MAX_BYTES)} limit). Full output: ${details.fullOutputPath}`,
628
463
  );
629
464
  }
630
- if (details.checksLogPath) {
631
- lines.push(`Checks log: ${details.checksLogPath}`);
632
- }
633
- if (details.checksPass === false && details.checksOutput.length > 0) {
634
- lines.push("");
635
- lines.push("Checks output:");
636
- lines.push(details.checksOutput);
637
- }
638
465
  return lines.join("\n").trimEnd();
639
466
  }
640
467
 
@@ -642,12 +469,6 @@ function renderStatus(details: RunDetails, theme: Theme): string {
642
469
  if (details.timedOut) {
643
470
  return theme.fg("error", `TIMEOUT ${details.durationSeconds.toFixed(1)}s`);
644
471
  }
645
- if (details.checksTimedOut) {
646
- return theme.fg("warning", `Checks timeout ${details.checksDuration.toFixed(1)}s`);
647
- }
648
- if (details.checksPass === false) {
649
- return theme.fg("error", `Checks failed ${details.checksDuration.toFixed(1)}s`);
650
- }
651
472
  if (details.exitCode !== 0) {
652
473
  return theme.fg("error", `FAIL exit=${details.exitCode} ${details.durationSeconds.toFixed(1)}s`);
653
474
  }
@@ -665,5 +486,5 @@ function isRunDetails(value: unknown): value is RunDetails {
665
486
 
666
487
  function isProgressDetails(value: unknown): value is RunExperimentProgressDetails {
667
488
  if (typeof value !== "object" || value === null) return false;
668
- return "phase" in value && value.phase === "running";
489
+ return "phase" in value && (value as { phase: unknown }).phase === "running";
669
490
  }